suggestion on optimization

페이지 5/5
1 | 2 | 3 | 4 |

By ARTRAG

Enlighted (6243)

ARTRAG의 아바타

11-04-2019, 22:31

hit9918 wrote:

but in RAM ops the Akku is the better than HL!

21	ld	l,(ix+enemy_data.y+0)
21	ld	h,(ix+enemy_data.y+1)
22	ld	de,(tempy)
	
12	add	hl,de			; hl = enemy.y - (ymap + 128)
11	ld	de,128+16		; hl = enemy.y - (ymap + 128) + 128 + 16 >=0 
12	add	hl,de			; hl = enemy.y - ymap + 16 >=0
8	jr	nc,.invisible	; !(-16 <= enemy.y - ymap < 128)
--
107
14      ld a,(tempy+0)
21      add (ix+enemy_data.y+0)
5       ld e,a
14      ld a,(tempy+1)
21      adc (ix+enemy_data.y+1)
8       jr nz,.invisible                 ;high byte not 0 => outside 8bit window
5       ld a,e
8       cp 128+16
8       jr nc,.invisible
--
104

even more, HL is still free to use! that can save dozen cycles somewhere else
the 16bit version looks like it is faster and was more easy to develop, but the opposite is the case.

I've tested your patch, but I get a mess...


	struct sat
y		db	0
x		db	0
f		db	0
c		db	0
	ends


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
;	plot enemies and bullets if visible in the current SAT in ram
;
;	depends on xmap,ymap

_plot_enemy:

	ld	iy,(alt_ram_sat)
	ld	ix,enemies 
	ld	bc,(max_enem + max_plyr_bullets + max_enem_bullets)*256+0
	
	ld	hl,-128
	ld	de,(ymap)
	and a
	sbc	hl,de
	ld	(tempy),hl

	ld	hl,32
	ld	de,(xmap)
	sbc	hl,de			; cf is always reset by previous sbc
	ld	(tempx),hl		; hl = 32 - xmap 

.npc_loop1:
	bit 0,(ix+enemy_data.status)
	jp	z,.invisible

	ld  a,(tempy+0)
	add a,(ix+enemy_data.y+0)
	ld  e,a
	ld a,(tempy+1)
	adc a,(ix+enemy_data.y+1)
	jr nz,.invisible                 ;high byte not 0 => outside 8bit window
	ld a,e
	cp 128+16
	jr nc,.invisible
	
	; ld	l,(ix+enemy_data.y+0)
	; ld	h,(ix+enemy_data.y+1)
	; ld	de,(tempy)
	
	; add	hl,de			; hl = enemy.y - (ymap + 128)
	; ld	de,128+16		; hl = enemy.y - (ymap + 128) + 128 + 16 >=0 
	; add	hl,de			; hl = enemy.y - ymap + 16 >=0
	; jr	nc,.invisible	; !(-16 <= enemy.y - ymap < 128)

	ld	a,l
	add	a,64-16			; a = enemy.y - ymap + 64	
	ld	(iy+sat.y+0),a
	ld	(iy+sat.y+4),a	; not needed if single layer but in this way it is overall faster 
	
	ld	l,(ix+enemy_data.x+0)
	ld	h,(ix+enemy_data.x+1)
	ld	de,(tempx)

	add	hl,de      		; hl = enemy.x - xmap + 32
	ld	de,-256-32
	add	hl,de			; hl = enemy.x - xmap - 256
	jr	c,.invisible	; !(-32 <= enemy.x - xmap < 256)

	ld	a,(ix+enemy_data.color)
	inc	h				; z if 0 <= enemy.x - xmap < 256
	jp	z,.noec			; -32< dx <0
	or	128				; set EC
	sbc	hl,de			; add 32 (e=-32)
.noec:
	ld	e,a	
	ld	a,(ix+enemy_data.frame)
	ld	(iy+sat.x),l				; write X
	ld	(iy+sat.f),a				; write shape
	ld	(iy+sat.c),e				; write colour
	ld	(ix+enemy_data.plane),c		; save SAT plane    
	inc c
	set 7,(ix+enemy_data.status)	; set it as visible
	cp	16*4						; hard coded in the SPT
	jp	nc,.two_layers

.one_layer:
	ld	de,sat
	add iy,de
	jp 	.next
	
.invisible
	res 7,(ix+enemy_data.status)	; set it as invisible
		
.next:
	ld	de,enemy_data
	add ix,de
	djnz	.npc_loop1

	ld	a,c
	ld	(alt_visible_sprts),a
	ret
	
.two_layers:
	ld	(iy+sat.x+4),l				; second layer X
	ld	(iy+sat.f),a				; write shape
	add	a,4
	ld	(iy+sat.f+4),a				; second layer shape
	ld	a,e
	and 0xF0
	inc	a							; second layer is always black
	ld	(iy+sat.c+4),a	
	inc c
	ld	de,2*sat
	add iy,de
	jp 	.next

By ricbit

Champion (437)

ricbit의 아바타

12-04-2019, 18:28

ARTRAG wrote:

I've tested you patch with xmap-32 in tempx and with jp c
It seems to work when X in in 128-256, but not outside that interval
if I use jp m, it seems to work for x<256 but not for larger values

I had written an unit test before posting the code here, so maybe my code can help you debug:

Unit test on github

The test is passing for all 65536 cases of HL, perhaps some assumption is now invalid? Maybe the carry flag is not always clear anymore?

페이지 5/5
1 | 2 | 3 | 4 |