A83: Optimized Movax's sprite routine
[Prev][Next][Index][Thread]
A83: Optimized Movax's sprite routine
It confounds me why the ever-so-popular sprite routine is
not optimized, even by simple methods (xor a instead of cp 0, etc).
Here is the optimized version, 6 bytes smaller:
; Modified SPRXOR (10 clocks saved if aligned sprite,
20 clocks if unaligned)
; Xor 8x8 sprite a=x, e=y, bc=sprite
address
SPRXOR:
push bc
;==== Calculate the address in
graphbuf ====
ld
hl,0
ld
d,l ;
was ld d,0 (saves 3 clocks and 1 byte)
add hl,de
add hl,de
add hl,de
add hl,hl
add hl,hl
;
ld d,0 ; already at zero!
(saves 4 clocks and 2 bytes)
ld e,a
srl e
srl
e
srl e
add hl,de
ld de,8e29h
add hl,de
ld b,00000111b
and b
and
a
; was cp 0 (saves 3 clocks and 1 byte)
jp z,ALIGN
;==== Non aligned sprite blit starts
here ====
pop
ix
ld
d,a
ld e,8
LILOP:
ld b,(ix+0)
ld c,0
push de
SHLOP:
srl
b
rr c
dec d
jr nz,SHLOP
pop de
ld a,b
xor (hl)
ld (hl),a
inc hl
ld a,c
xor (hl)
ld (hl),a
ld bc,11
add hl,bc
inc ix
dec
e
jr
nz,LILOP
ret ; was jp done1 (saves
10 clocks and (3-1)=2 bytes)
;==== Aligned sprite blit starts
here ====
ALIGN:
pop de
ld b,8
ALOP1:
ld a,(de)
xor (hl)
ld (hl),a
inc de
push
bc
ld
bc,12
add
hl,bc
pop bc
djnz ALOP1
ret
; Modified
SPRXOR
I do admire the beauty of the routine, but I think such
widely used code should be optimized as much as
possible.
Follow-Ups: