Assember: 16bit bubble sort

Страница 4/5
1 | 2 | 3 | | 5

By retrocanada76

Champion (480)

Аватар пользователя retrocanada76

05-09-2011, 19:35

at work we use only heapsort. The best for games. Of course we program in C++ only Cool, but is farly simple to implement in assembly

By ARTRAG

Enlighted (6690)

Аватар пользователя ARTRAG

22-10-2011, 10:13

In the end I finished to adopt combsort11 ( http://en.wikipedia.org/wiki/Comb_sort )

numSprites is a costant for the number of elements to sort
dist holds the values with respect to do comparisons
ord holds the order of the element

here follows the C :


//arrays used to sort the sprites
uint spriteDistance[numSprites];

//arrays used to sort the sprites
int spriteOrder[numSprites];

static uchar i,j,it;
uint	dt;
uchar gap ;
char swapped;

uchar amount=numSprites;

//sort algorithm
void combSort(int* order, uint* dist)
{
  gap = amount;
  swapped = 0;
  while(gap > 1 || swapped)
  {
    //shrink factor 1.3
    gap = (gap * 10) / 13;
    if(gap == 9 || gap == 10) gap = 11;
    if (gap < 1) gap = 1;
    swapped = 0;
    for (i = 0; i < amount - gap; i++)
    {
      j = i + gap;
      if (dist[i] < dist[j])
      {
        dt=dist[i];
        dist[i]=dist[j];
        dist[j]=dt;
		
        it=order[i];
        order[i]=order[j];
        order[j]=it;
        swapped = 1;
      }
    }
  }
}

and here its ASM version


	global	_gap
	global	_amount
	global	_swapped
	global	_dt
	global	adiv
	global	wrelop
	file	"SPRITES.C"
	line	55
_combSort:
	push	ix
	ld	ix,0
	add	ix,sp
	push	bc
	push	iy
;_order stored from de
	ld	(ix+-2),e
	ld	(ix+-1),d
; _dist loaded to iy
	line	56
	push	bc
	pop	iy
;SPRITES.C: 56: gap = amount;
	ld	a,(_amount)
	ld	(_gap),a
;SPRITES.C: 57: swapped = 0;
	line	57
	xor	a
	ld	(_swapped),a
;SPRITES.C: 58: while(gap > 1 || swapped)
	line	58
	jp	l6
	line	66
l11:
;SPRITES.C: 66: {
;SPRITES.C: 67: j = i + gap;
	line	67
	ld	a,(_gap)
	ld	c,a
	ld	a,(_i)
	add	a,c
	ld	(_j),a
;SPRITES.C: 68: if (dist[i] < dist[j])
	line	68
	push	iy
	pop	de
	ld	l,a
	ld	h,0
	add	hl,hl
	add	hl,de
	ld	c,(hl)
	inc	hl
	ld	b,(hl)
	push	iy
	pop	de
	ld	a,(_i)
	ld	l,a
	ld	h,0
	add	hl,hl
	add	hl,de
	ld	a,(hl)
	inc	hl
	ld	h,(hl)
	ld	l,a
	or	a
	sbc	hl,bc
	jp	nc,l15
;SPRITES.C: 69: {
	line	70
	push	iy
	pop	de
	ld	a,(_i)
	ld	l,a
	ld	h,0
	add	hl,hl
	add	hl,de
	ld	c,(hl)
	inc	hl
	ld	b,(hl)
	ld	(_dt),bc
;SPRITES.C: 71: dist[i]=dist[j];
	line	71
	push	iy
	pop	de
	ld	a,(_j)
	ld	l,a
	ld	h,0
	add	hl,hl
	add	hl,de
	ld	c,(hl)
	inc	hl
	ld	b,(hl)
	push	iy
	pop	de
	ld	a,(_i)
	ld	l,a
	ld	h,0
	add	hl,hl
	add	hl,de
	ld	(hl),c
	inc	hl
	ld	(hl),b
;SPRITES.C: 72: dist[j]=dt;
	line	72
	ld	bc,(_dt)
	push	iy
	pop	de
	ld	a,(_j)
	ld	l,a
	ld	h,0
	add	hl,hl
	add	hl,de
	ld	(hl),c
	inc	hl
	ld	(hl),b
;SPRITES.C: 74: it=order[i];
	line	74
	ld	e,(ix+-2)
	ld	d,(ix+-1)
	ld	a,(_i)
	ld	l,a
	ld	h,0
	add	hl,hl
	add	hl,de
	ld	a,(hl)
	ld	(_it),a
;SPRITES.C: 75: order[i]=order[j];
; _order loaded to de
	line	75
	ld	a,(_j)
	ld	l,a
	ld	h,0
	add	hl,hl
	add	hl,de
	ld	c,(hl)
	inc	hl
	ld	b,(hl)
	ld	a,(_i)
	ld	l,a
	ld	h,0
	add	hl,hl
	add	hl,de
	ld	(hl),c
	inc	hl
	ld	(hl),b
;SPRITES.C: 76: order[j]=it;
	line	76
	ld	a,(_it)
	ld	c,a
	ld	b,0
	ld	a,(_j)
	ld	l,a
	ld	h,b
	add	hl,hl
	add	hl,de
	ld	(hl),c
	inc	hl
	ld	(hl),b
;SPRITES.C: 77: swapped = 1;
	line	77
	ld	a,01h
	ld	(_swapped),a
	line	78
l15:
;SPRITES.C: 78: }
	line	65
	ld	a,(_i)
	inc	a
L1:
	ld	(_i),a
	ld	a,(_gap)
	ld	e,a
	ld	d,0
	ld	a,(_amount)
	ld	l,a
	ld	h,d
	or	a
	sbc	hl,de
	ex	de,hl
	ld	a,(_i)
	ld	l,a
	call	wrelop
	jp	m,l11
	line	80
l6:
;SPRITES.C: 80: }
	line	58
	ld	a,(_gap)
	cp	02h
	jp	nc,l7
	ld	a,(_swapped)
	or	a
	jp	nz,l7
;SPRITES.C: 81: }
	line	81
	pop	iy
	ld	sp,ix
	pop	ix
	ret	
l7:
;SPRITES.C: 59: {
	line	61
	ld	de,0Dh
	ld	a,(_gap)
	ld	l,a
	ld	h,0
	add	hl,hl
	ld	b,h
	ld	c,l
	add	hl,hl
	add	hl,hl
	add	hl,bc
	call	adiv
	ld	a,l
	ld	(_gap),a
;SPRITES.C: 62: if(gap == 9 || gap == 10) gap = 11;
	line	62
	cp	09h
	jp	z,u30
	cp	0Ah
	jp	nz,l9
u30:
	ld	a,0Bh
	ld	(_gap),a
l9:
;SPRITES.C: 63: if (gap < 1) gap = 1;
	line	63
	ld	a,(_gap)
	cp	01h
	jp	nc,l10
	ld	a,01h
	ld	(_gap),a
l10:
;SPRITES.C: 64: swapped = 0;
	line	64
	xor	a
	ld	(_swapped),a
;SPRITES.C: 65: for (i = 0; i < amount - gap; i++)
	line	65
	jp	L1

By hit9918

Prophet (2911)

Аватар пользователя hit9918

23-10-2011, 04:29

@ARTRAG, I wonder, how about a bordercolor rasterdisplay showing how much time the parts of the engine need? With the irq as a breakpoint, one maybe can step thru in the debugger and then see which part shows longest bars.

About the sort, you can save the swap of dist[]!

     if (dist[order[i]] < dist[order[j]])
      {
        it=order[i];
        order[i]=order[j];
        order[j]=it;
        swapped = 1;
      }

With this version, in other parts of the app you got to acess dist[order[i]] instead dist[i].
This is more than paid off by sort accessing stuff very often and saving the swap of dist[].
Also other things of sprites like x y go like sprx[order[i]].

Within the loop, i and j just increment. This is asking for having an ipointer and jpointer instead of order[i] and order[j], much faster.

By ARTRAG

Enlighted (6690)

Аватар пользователя ARTRAG

23-10-2011, 08:57

Thanks for the tip
I was also thinking to change drastically the way I sort the sprites:

I could use transformY instead of spriteDistance !!!

transformY is the "Z" distance of the sprite from the screen
spriteDistance is the distance from the player

Using transformY for sorting sprites, I can avoid to compute spriteDistance at all, even if I need to rewrite the code and split the sections where I compute the screen projection by the section where I draw sprites.

This has priority on textures
WIP

By ARTRAG

Enlighted (6690)

Аватар пользователя ARTRAG

23-10-2011, 10:07

https://sites.google.com/site/testmsx/msx2-doom/mz3d_dos_2011_10_23.rar?attredirects=0&d=1

here the link to the last evolution of the raycaster:

Further speed improvements:

Sprite distance from the player is not used anymore: now, for sorting, I use the distance of sprites from the screen, that I have to compute in any case for scaling them.

The pros are :
- no more distances from the player to be computed
- I removed a bug introduced in the last optimization that allowed sprites behind the player to be traced under some conditions

The contra are:
- I need to sort an array of long (distance of sprites from the screen are computed as long)
- I had to split the sprite loop in two (look at it)

New controls:

Press space and walk against a wall to drill the maze
(It reminds Minecraft)
Shift + arrows for running
Graph + arrows for strafe

WIP on all the rest

@hit9918
Thanks, look at the new code and how I use the order[] array
Is there any speed trick I can use?

By ARTRAG

Enlighted (6690)

Аватар пользователя ARTRAG

23-10-2011, 10:14

BTW
what is the format for symbols to be used by openmsx debugger?

By PingPong

Prophet (3836)

Аватар пользователя PingPong

23-10-2011, 10:45


@hit9918
Thanks, look at the new code and how I use the order[] array
Is there any speed trick I can use?

about using pointers to access , instead of the subscribing form

, remember to declare those pointers as "static".
otherwise the compiler will allocate those vars into the stack, and will use the ix,iy registers to manage increments of those pointers, slowing down execution.

example:
static int* more_fast;
more_fast++;

asm: (somethink like)
ld hl,(_more_fast)
inc hl
ld (_more_fast),hl

example1:
int* not_so_fast;
not_so_fast++;

asm: (somethink like)
ld l,(ix + someoffset) ; reach the ptr low byte
ld h,(ix + someoffset+1); reach the ptr high byte
inc hl
ld (ix + someoffset),l ; reach the ptr low byte
ld (ix + someoffset+1),h; reach the ptr high byte

By ARTRAG

Enlighted (6690)

Аватар пользователя ARTRAG

23-10-2011, 11:58

moved to pointers... not sure if it is a gain here


//arrays used to sort the sprites
long spriteDistance[numSprites];

//arrays used to sort the sprites
int spriteOrder[numSprites];

static int i;
static long *idp,*jdp;
static int *iop,*jop;

long    dt;
int     it;

//sort algorithm
void combSort(int* order, long* dist, uint amount)
{
  int gap = amount;
  char swapped = 0;
  while(gap > 1 || swapped)
  {

	//shrink factor 1.3
	gap = (gap * 10) / 13;
	if(gap == 9 || gap == 10) gap = 11;
	if (gap < 1) gap = 1;
	swapped = 0;
	idp = dist;
	iop = order;
	for (i = 0; i < amount - gap; i++,idp++,iop++)
	{
		jdp = idp + gap;
		jop = iop + gap;
		if (*idp < *jdp)
		{

			dt=*idp;
			*idp=*jdp;
			*jdp=dt;
					
			it=*iop;
			*iop=*jop;
			*jop=it;
			swapped = 1;
		}
	}
  }
}


_combSort:
	push	ix
	ld	ix,0
	add	ix,sp
	push	bc
	push	bc
	dec	sp
	push	iy
;_dist stored from bc
	ld	(ix+-3),c
	ld	(ix+-2),b
;_order stored from de
	line	37
	ld	(ix+-5),e
	ld	(ix+-4),d
;SPRITES.C: 37: int gap = amount;
; _gap allocated to iy
	ld	l,(ix+4)
	ld	h,(ix+5)
	push	hl
	pop	iy
;SPRITES.C: 38: char swapped = 0;
	line	38
	ld	(ix+-1),0
;SPRITES.C: 39: while(gap > 1 || swapped)
	line	39
	jp	l10
	line	50
l15:
;SPRITES.C: 50: {
;SPRITES.C: 51: jdp = idp + gap;
	line	51
	push	iy
	pop	hl
	add	hl,hl
	add	hl,hl
	ld	de,(_idp)
	add	hl,de
	ld	(_jdp),hl
;SPRITES.C: 52: jop = iop + gap;
	line	52
	push	iy
	pop	hl
	add	hl,hl
	ld	de,(_iop)
	add	hl,de
	ld	(_jop),hl
;SPRITES.C: 53: if (*idp < *jdp)
	line	53
	ld	hl,(_jdp)
	ld	e,(hl)
	inc	hl
	ld	d,(hl)
	inc	hl
	ld	a,(hl)
	inc	hl
	ld	h,(hl)
	ld	l,a
	push	hl
	push	de
	ld	hl,(_idp)
	ld	e,(hl)
	inc	hl
	ld	d,(hl)
	inc	hl
	ld	a,(hl)
	inc	hl
	ld	h,(hl)
	ld	l,a
	call	arelop
	jp	p,l19
;SPRITES.C: 54: {
	line	56
	ld	hl,(_idp)
	ld	e,(hl)
	inc	hl
	ld	d,(hl)
	inc	hl
	ld	a,(hl)
	inc	hl
	ld	h,(hl)
	ld	l,a
	ld	(_dt),de
	ld	(_dt+02h),hl
;SPRITES.C: 57: *idp=*jdp;
	line	57
	ld	hl,(_jdp)
	ld	e,(hl)
	inc	hl
	ld	d,(hl)
	inc	hl
	ld	a,(hl)
	inc	hl
	ld	h,(hl)
	ld	l,a
	push	hl
	push	de
	ld	hl,(_idp)
	pop	de
	ld	(hl),e
	inc	hl
	ld	(hl),d
	inc	hl
	pop	bc
	ld	(hl),c
	inc	hl
	ld	(hl),b
;SPRITES.C: 58: *jdp=dt;
	line	58
	ld	de,(_dt)
	ld	bc,(_dt+02h)
	ld	hl,(_jdp)
	ld	(hl),e
	inc	hl
	ld	(hl),d
	inc	hl
	ld	(hl),c
	inc	hl
	ld	(hl),b
;SPRITES.C: 60: it=*iop;
	line	60
	ld	hl,(_iop)
	ld	c,(hl)
	inc	hl
	ld	b,(hl)
	ld	(_it),bc
;SPRITES.C: 61: *iop=*jop;
	line	61
	ld	hl,(_jop)
	ld	c,(hl)
	inc	hl
	ld	b,(hl)
	ld	hl,(_iop)
	ld	(hl),c
	inc	hl
	ld	(hl),b
;SPRITES.C: 62: *jop=it;
	line	62
	ld	bc,(_it)
	ld	hl,(_jop)
	ld	(hl),c
	inc	hl
	ld	(hl),b
;SPRITES.C: 63: swapped = 1;
	line	63
	ld	(ix+-1),01h
	line	64
l19:
;SPRITES.C: 64: }
	line	49
	ld	hl,(_i)
	inc	hl
	ld	(_i),hl
	ld	hl,(_idp)
	inc	hl
	inc	hl
	inc	hl
	inc	hl
	ld	(_idp),hl
	ld	hl,(_iop)
	inc	hl
	inc	hl
	ld	(_iop),hl
l18:
	push	iy
	pop	de
	ld	l,(ix+4)
	ld	h,(ix+5)
	or	a
	sbc	hl,de
	ex	de,hl
	ld	hl,(_i)
	or	a
	sbc	hl,de
	jp	c,l15
	line	66
l10:
;SPRITES.C: 66: }
	line	39
	ld	de,02h
	push	iy
	pop	hl
	call	wrelop
	jp	p,l11
	ld	a,(ix+-1)
	or	a
	jp	nz,l11
;SPRITES.C: 67: }
	line	67
	pop	iy
	ld	sp,ix
	pop	ix
	pop	hl
	pop	af
	jp	(hl)
l11:
;SPRITES.C: 40: {
	line	43
	ld	de,0Dh
	push	iy
	pop	hl
	add	hl,hl
	ld	b,h
	ld	c,l
	add	hl,hl
	add	hl,hl
	add	hl,bc
	call	adiv
	push	hl
	pop	iy
;SPRITES.C: 44: if(gap == 9 || gap == 10) gap = 11;
	line	44
	ld	a,l
	xor	09h
	or	h
	jp	z,u30
	push	iy
	pop	hl
	ld	a,l
	xor	0Ah
	or	h
	jp	nz,l13
u30:
	ld	iy,0Bh
l13:
;SPRITES.C: 45: if (gap < 1) gap = 1;
	line	45
	ld	de,01h
	push	iy
	pop	hl
	call	wrelop
	jp	p,l14
	ld	iy,01h
l14:
;SPRITES.C: 46: swapped = 0;
	line	46
	ld	(ix+-1),0
;SPRITES.C: 47: idp = dist;
	line	47
	ld	l,(ix+-3)
	ld	h,(ix+-2)
	ld	(_idp),hl
;SPRITES.C: 48: iop = order;
	line	48
	ld	l,(ix+-5)
	ld	h,(ix+-4)
	ld	(_iop),hl
;SPRITES.C: 49: for (i = 0; i < amount - gap; i++,idp++,iop++)
	line	49
	ld	hl,0
	ld	(_i),hl
	jp	l18

By Manuel

Ascended (18392)

Аватар пользователя Manuel

23-10-2011, 16:03

BTW
what is the format for symbols to be used by openmsx debugger?

As the open dialog reveals:
"tniASM 0.x symbol files (*.sym)"
"asMSX 0.x symbol files (*.sym)"
"HiTech link map files (*.map)"

By ARTRAG

Enlighted (6690)

Аватар пользователя ARTRAG

23-10-2011, 16:06

Prolly it is hitech for cpm. I have tried my files without resultd (I use the cross compiler)

Страница 4/5
1 | 2 | 3 | | 5