# suggestion on optimization

페이지 3/5
1 | 2 | | 4 | 5

Oh, another thing:

```	ld	de,128+16
add	hl,de		; hl = enemy.y - ymap + 16
jr	nc,.next	; !(-16 <= enemy.y - ymap < 128
```

Makes sure that hl is in the range 0 to 128+16

This means that

```	ld	a,l
add	a,64-16		; a = enemy.y - ymap + 64
```

always clears the carry.

So the "and a" a bit further down isn't strictly necessary.

I've taken 2 out 3 of your suggestions
Thanks

```
struct sat
y		db	0
x		db	0
f		db	0
c		db	0
ends

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
;	plot enemies and bullets if visible in the current SAT in ram
;
;	depends on xmap,ymap

_plot_enemy:

ld	iy,(alt_ram_sat)
ld	ix,enemies
ld	bc,(max_enem + max_plyr_bullets + max_enem_bullets)*256+0

ld	hl,-128
ld	de,(ymap)
and a
sbc	hl,de
ld	(tempy),hl

ld	hl,(xmap)
ld	de,-32
ld	(tempx),hl

.npc_loop1:
bit 0,(ix+enemy_data.status)
jp	z,.invisible

ld	l,(ix+enemy_data.y+0)
ld	h,(ix+enemy_data.y+1)
ld	de,(tempy)

add	hl,de			; hl = enemy.y - (ymap + 128)
ld	de,128+16		; hl = enemy.y - (ymap + 128) + 128 + 16 >=0
add	hl,de			; hl = enemy.y - ymap + 16 >=0
jr	nc,.invisible	; !(-16 <= enemy.y - ymap < 128)

ld	a,l
add	a,64-16			; a = enemy.y - ymap + 64
ld	(iy+sat.y+0),a
ld	(iy+sat.y+4),a	; not needed if single layer but in this way it is overall faster

ld	l,(ix+enemy_data.x+0)
ld	h,(ix+enemy_data.x+1)
ld	de,(tempx)
; CF is reset by previous add
sbc hl,de			; hl = enemy.x + 32 - xmap < 0
jp	m,.invisible	; hl <0  <==> dx = enemy.x - xmap < -32

ld	de,32
sbc hl,de		; enemy.x + 32 - xmap - 32 <0

ld	a,(ix+enemy_data.color)
jp nc,.noec		; -32< dx <0
or	128			; set EC
.noec
ld	e,a
ld	a,h
and a
jp	nz,.invisible	; dx >255

ld	a,(ix+enemy_data.frame)
ld	(iy+sat.x),l				; write X
ld	(iy+sat.f),a				; write shape
ld	(iy+sat.c),e				; write colour
ld	(ix+enemy_data.plane),c		; save SAT plane
inc c
set 7,(ix+enemy_data.status)	; set it as visible
cp	16*4						; hard coded in the SPT
jp	nc,.two_layers

.one_layer:

ld	e,sat
jp 	.next

.invisible
res 7,(ix+enemy_data.status)	; set it as invisible

.next:
ld	de,enemy_data
djnz	.npc_loop1

ld	a,c
ld	(alt_visible_sprts),a
ret

.two_layers:

ld	(iy+sat.x+4),l				; second layer X
ld	(iy+sat.f+4),a				; second layer shape
ld	a,e
and 0xF0
inc	a							; second layer is always black
ld	(iy+sat.c+4),a
inc c
ld	e,2*sat
jp 	.next

```

OK.
I would probably have placed .invisible after the ret and made a jump back to .next.
It will cost a few cycles more in the case when many sprites are invisible, but in that case you save a lot of cycles by not writing it to the sat. (Same reasoning as why to use relative jumps to .invisible instead of absolute whenever possible.)

While it might be more costly when there are few sprites on the screen the time you want to save as many cycles as possible is when there are many active sprites.

This may or may not work but could be worth trying.

```	ld	hl,32
ld	de,(xmap)
and	a
sbc	hl,de
ld	(tempx),hl
```
```	ld	l,(ix+enemy_data.x+0)
ld	h,(ix+enemy_data.x+1)
ld	de,(tempx)

add	hl,de      	; hl = enemy.x - xmap + 32
ld	de,-256-32
add	hl,de		; hl = enemy.x - xmap - 256
jr	c,.invisible	; !(-32 <= enemy.x - xmap < 256)

ld	a,(ix+enemy_data.color)
inc	h		; z if 0 <= enemy.x - xmap < 256
jp	z,.noec	; -32< dx <0
or	128		; set EC
sbc	hl,de		; add 32 (e=-32)
.noec
ld	h,(ix+enemy_data.frame)
ld	(iy+sat.x),l				; write X
ld	(iy+sat.f),h				; write shape
ld	(iy+sat.c),a				; write colour
ld	(ix+enemy_data.plane),c		; save SAT plane
```

Range check is inverted compared to y and sets carry if out of range.

Humm, I've tested your last modification and it does not work. I will try to investigate on it.
About jumps I expect to have more objects off screen than on screen, so .invisible should be the preferred branch
Moreover I expect more two colors items on screen than single layer items (bullets)
Anyway I'm very happy about the current level of optimization, thanks a lot!

I shuffled around the registers used for writing to sat at the end, so the .two_layers writes have to be updated too.
I also didn't see the "cp 16*4" so you might need to change the last part to get back to the old register usage

```.noec
ld	e,a
ld	a,(ix+enemy_data.frame)
ld	(iy+sat.x),l				; write X
ld	(iy+sat.f),a				; write shape
ld	(iy+sat.c),e				; write colour
ld	(ix+enemy_data.plane),c		; save SAT plane
```

If you expect to mostly have two layers you could switch place of .two_layers and .one_layer and use relative jump again since it will fall through faster.

OK It works thanks! I've had to return ld e,sat to ld de,sat but overall it is much more faster now
This is the last version

```
struct sat
y		db	0
x		db	0
f		db	0
c		db	0
ends

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
;	plot enemies and bullets if visible in the current SAT in ram
;
;	depends on xmap,ymap

_plot_enemy:

ld	iy,(alt_ram_sat)
ld	ix,enemies
ld	bc,(max_enem + max_plyr_bullets + max_enem_bullets)*256+0

ld	hl,-128
ld	de,(ymap)
and a
sbc	hl,de
ld	(tempy),hl

ld	hl,32
ld	de,(xmap)
sbc	hl,de			; cf is always reset by previous sbc
ld	(tempx),hl		; hl = 32 - xmap

.npc_loop1:
bit 0,(ix+enemy_data.status)
jp	z,.invisible

ld	l,(ix+enemy_data.y+0)
ld	h,(ix+enemy_data.y+1)
ld	de,(tempy)

add	hl,de			; hl = enemy.y - (ymap + 128)
ld	de,128+16		; hl = enemy.y - (ymap + 128) + 128 + 16 >=0
add	hl,de			; hl = enemy.y - ymap + 16 >=0
jr	nc,.invisible	; !(-16 <= enemy.y - ymap < 128)

ld	a,l
add	a,64-16			; a = enemy.y - ymap + 64
ld	(iy+sat.y+0),a
ld	(iy+sat.y+4),a	; not needed if single layer but in this way it is overall faster

ld	l,(ix+enemy_data.x+0)
ld	h,(ix+enemy_data.x+1)
ld	de,(tempx)

add	hl,de      		; hl = enemy.x - xmap + 32
ld	de,-256-32
add	hl,de			; hl = enemy.x - xmap - 256
jr	c,.invisible	; !(-32 <= enemy.x - xmap < 256)

ld	a,(ix+enemy_data.color)
inc	h				; z if 0 <= enemy.x - xmap < 256
jp	z,.noec			; -32< dx <0
or	128				; set EC
sbc	hl,de			; add 32 (e=-32)
.noec:
ld	e,a
ld	a,(ix+enemy_data.frame)
ld	(iy+sat.x),l				; write X
ld	(iy+sat.f),a				; write shape
ld	(iy+sat.c),e				; write colour
ld	(ix+enemy_data.plane),c		; save SAT plane
inc c
set 7,(ix+enemy_data.status)	; set it as visible
cp	16*4					; hard coded in the SPT
jp	nc,.two_layers

.one_layer:

ld	de,sat
jp 	.next

.invisible
res 7,(ix+enemy_data.status)	; set it as invisible

.next:
ld	de,enemy_data
djnz	.npc_loop1

ld	a,c
ld	(alt_visible_sprts),a
ret

.two_layers:
ld	(iy+sat.x+4),l				; second layer X
ld	(iy+sat.f+4),a				; second layer shape
ld	a,e
and 0xF0
inc	a				; second layer is always black
ld	(iy+sat.c+4),a
inc c
ld	de,2*sat
jp 	.next

```

I was able to optimize it a bit.

Here's the original version:

```; x - xmap < -32      ; 28 clocks
; -32 <= x - xmap < 0 ; 134 clocks
; 0 <= x - xmap < 256 ; 114 clocks
; x - xmap >= 256     ; 114 clocks

original:
sbc  hl, de                    ; 17
jp   m, invisible              ; 11

ld   de, 32                    ; 11
sbc  hl, de                    ; 17

ld   a, (ix+enemy_data_color)  ; 21
jp   nc, noec                  ; 11
or   128                       ; 8
noec:
ld   e, a                      ; 5
ld   a, h                      ; 5
and  a                         ; 5
jp   nz, invisible             ; 11
```

Here's my proposal:

```; x - xmap < -32      ; 28 clocks
; -32 <= x - xmap < 0 ; 106 clocks
; 0 <= x - xmap < 256 ; 114 clocks
; x - xmap >= 256     ; 80 clocks

proposal:
sbc  hl, de                    ; 17
jp   c, invisible              ; 11

ld   a, l                      ; 5
sub  32                        ; 8
ld   e, a                      ; 5
ld   a, h                      ; 5
sbc  a, 0                      ; 8
jr   c, has_ec                 ; 13/8
jr   nz, invisible             ; 13/8

ld   l, e                      ; 5

has_ec:
and  128                       ; 8
or   (ix+enemy_data_color)     ; 21
ld   e, a                      ; 5
```

Hi Ricbit, thanks for the suggestions
I will try to include them tonight!

This was a mix of superoptimization and coding by hand (superopt currently can't deal with the zero flag).

페이지 3/5
1 | 2 | | 4 | 5