A83: Optimized Movax's sprite routine
[Prev][Next][Index][Thread]
A83: Optimized Movax's sprite routine
It confounds me why the ever-so-popular sprite routine is 
not optimized, even by simple methods (xor a instead of cp 0, etc).  
Here is the optimized version, 6 bytes smaller:
 
; Modified SPRXOR (10 clocks saved if aligned sprite, 
20 clocks if unaligned)
; Xor 8x8 sprite a=x, e=y, bc=sprite 
address
SPRXOR:
 
    push    bc
 
;====   Calculate the address in 
graphbuf   ====
 
    ld      
hl,0
    ld      
d,l             ; 
was ld d,0 (saves 3 clocks and 1 byte)
    
add     hl,de
    
add     hl,de
    
add     hl,de
    
add     hl,hl
    
add     hl,hl
 
    ;    
ld      d,0     ; already at zero! 
(saves 4 clocks and 2 bytes)
    
ld      e,a
    
srl     e
    srl     
e
    srl     e
    
add     hl,de
    
ld      de,8e29h
    
add     hl,de
    
ld      b,00000111b
    
and     b
 
    and     
a               
; was cp 0 (saves 3 clocks and 1 byte)
    
jp      z,ALIGN
 
;====   Non aligned sprite blit starts 
here   ====
    pop     
ix
    ld      
d,a
    ld      e,8
 
LILOP:
    
ld      b,(ix+0)
    
ld      c,0
    
push    de
 
SHLOP:
    srl     
b
    rr      c
    
dec     d
    
jr      nz,SHLOP
    
pop     de
    
ld      a,b
    
xor     (hl)
    
ld      (hl),a
    
inc     hl
    
ld      a,c
    
xor     (hl)
    
ld      (hl),a
    
ld      bc,11
    
add     hl,bc
    
inc     ix
    dec     
e
    jr      
nz,LILOP
    ret    ; was jp done1 (saves 
10 clocks and (3-1)=2 bytes)
 
;====   Aligned sprite blit starts 
here   ====
ALIGN:
    
pop     de
    
ld      b,8
 
ALOP1:
    
ld      a,(de)
    
xor     (hl)
    
ld      (hl),a
    
inc     de
    push    
bc
    ld      
bc,12
    add     
hl,bc
    pop     bc
    
djnz    ALOP1
    ret
; Modified 
SPRXOR
 
I do admire the beauty of the routine, but I think such 
widely used code should be optimized as much as 
possible.
Follow-Ups: