# Assember: 16bit bubble sort

Страница 4/5
1 | 2 | 3 | | 5

at work we use only heapsort. The best for games. Of course we program in C++ only , but is farly simple to implement in assembly

In the end I finished to adopt combsort11 ( http://en.wikipedia.org/wiki/Comb_sort )

numSprites is a costant for the number of elements to sort
dist holds the values with respect to do comparisons
ord holds the order of the element

here follows the C :

//arrays used to sort the sprites
uint spriteDistance[numSprites];

//arrays used to sort the sprites
int spriteOrder[numSprites];

static uchar i,j,it;
uint	dt;
uchar gap ;
char swapped;

uchar amount=numSprites;

//sort algorithm
void combSort(int* order, uint* dist)
{
gap = amount;
swapped = 0;
while(gap > 1 || swapped)
{
//shrink factor 1.3
gap = (gap * 10) / 13;
if(gap == 9 || gap == 10) gap = 11;
if (gap < 1) gap = 1;
swapped = 0;
for (i = 0; i < amount - gap; i++)
{
j = i + gap;
if (dist[i] < dist[j])
{
dt=dist[i];
dist[i]=dist[j];
dist[j]=dt;

it=order[i];
order[i]=order[j];
order[j]=it;
swapped = 1;
}
}
}
}

and here its ASM version

global	_gap
global	_amount
global	_swapped
global	_dt
global	wrelop
file	"SPRITES.C"
line	55
_combSort:
push	ix
ld	ix,0
push	bc
push	iy
;_order stored from de
ld	(ix+-2),e
ld	(ix+-1),d
line	56
push	bc
pop	iy
;SPRITES.C: 56: gap = amount;
ld	a,(_amount)
ld	(_gap),a
;SPRITES.C: 57: swapped = 0;
line	57
xor	a
ld	(_swapped),a
;SPRITES.C: 58: while(gap > 1 || swapped)
line	58
jp	l6
line	66
l11:
;SPRITES.C: 66: {
;SPRITES.C: 67: j = i + gap;
line	67
ld	a,(_gap)
ld	c,a
ld	a,(_i)
ld	(_j),a
;SPRITES.C: 68: if (dist[i] < dist[j])
line	68
push	iy
pop	de
ld	l,a
ld	h,0
ld	c,(hl)
inc	hl
ld	b,(hl)
push	iy
pop	de
ld	a,(_i)
ld	l,a
ld	h,0
ld	a,(hl)
inc	hl
ld	h,(hl)
ld	l,a
or	a
sbc	hl,bc
jp	nc,l15
;SPRITES.C: 69: {
line	70
push	iy
pop	de
ld	a,(_i)
ld	l,a
ld	h,0
ld	c,(hl)
inc	hl
ld	b,(hl)
ld	(_dt),bc
;SPRITES.C: 71: dist[i]=dist[j];
line	71
push	iy
pop	de
ld	a,(_j)
ld	l,a
ld	h,0
ld	c,(hl)
inc	hl
ld	b,(hl)
push	iy
pop	de
ld	a,(_i)
ld	l,a
ld	h,0
ld	(hl),c
inc	hl
ld	(hl),b
;SPRITES.C: 72: dist[j]=dt;
line	72
ld	bc,(_dt)
push	iy
pop	de
ld	a,(_j)
ld	l,a
ld	h,0
ld	(hl),c
inc	hl
ld	(hl),b
;SPRITES.C: 74: it=order[i];
line	74
ld	e,(ix+-2)
ld	d,(ix+-1)
ld	a,(_i)
ld	l,a
ld	h,0
ld	a,(hl)
ld	(_it),a
;SPRITES.C: 75: order[i]=order[j];
line	75
ld	a,(_j)
ld	l,a
ld	h,0
ld	c,(hl)
inc	hl
ld	b,(hl)
ld	a,(_i)
ld	l,a
ld	h,0
ld	(hl),c
inc	hl
ld	(hl),b
;SPRITES.C: 76: order[j]=it;
line	76
ld	a,(_it)
ld	c,a
ld	b,0
ld	a,(_j)
ld	l,a
ld	h,b
ld	(hl),c
inc	hl
ld	(hl),b
;SPRITES.C: 77: swapped = 1;
line	77
ld	a,01h
ld	(_swapped),a
line	78
l15:
;SPRITES.C: 78: }
line	65
ld	a,(_i)
inc	a
L1:
ld	(_i),a
ld	a,(_gap)
ld	e,a
ld	d,0
ld	a,(_amount)
ld	l,a
ld	h,d
or	a
sbc	hl,de
ex	de,hl
ld	a,(_i)
ld	l,a
call	wrelop
jp	m,l11
line	80
l6:
;SPRITES.C: 80: }
line	58
ld	a,(_gap)
cp	02h
jp	nc,l7
ld	a,(_swapped)
or	a
jp	nz,l7
;SPRITES.C: 81: }
line	81
pop	iy
ld	sp,ix
pop	ix
ret
l7:
;SPRITES.C: 59: {
line	61
ld	de,0Dh
ld	a,(_gap)
ld	l,a
ld	h,0
ld	b,h
ld	c,l
ld	a,l
ld	(_gap),a
;SPRITES.C: 62: if(gap == 9 || gap == 10) gap = 11;
line	62
cp	09h
jp	z,u30
cp	0Ah
jp	nz,l9
u30:
ld	a,0Bh
ld	(_gap),a
l9:
;SPRITES.C: 63: if (gap < 1) gap = 1;
line	63
ld	a,(_gap)
cp	01h
jp	nc,l10
ld	a,01h
ld	(_gap),a
l10:
;SPRITES.C: 64: swapped = 0;
line	64
xor	a
ld	(_swapped),a
;SPRITES.C: 65: for (i = 0; i < amount - gap; i++)
line	65
jp	L1

@ARTRAG, I wonder, how about a bordercolor rasterdisplay showing how much time the parts of the engine need? With the irq as a breakpoint, one maybe can step thru in the debugger and then see which part shows longest bars.

About the sort, you can save the swap of dist[]!

if (dist[order[i]] < dist[order[j]])
{
it=order[i];
order[i]=order[j];
order[j]=it;
swapped = 1;
}

With this version, in other parts of the app you got to acess dist[order[i]] instead dist[i].
This is more than paid off by sort accessing stuff very often and saving the swap of dist[].
Also other things of sprites like x y go like sprx[order[i]].

Within the loop, i and j just increment. This is asking for having an ipointer and jpointer instead of order[i] and order[j], much faster.

Thanks for the tip
I was also thinking to change drastically the way I sort the sprites:

I could use transformY instead of spriteDistance !!!

transformY is the "Z" distance of the sprite from the screen
spriteDistance is the distance from the player

Using transformY for sorting sprites, I can avoid to compute spriteDistance at all, even if I need to rewrite the code and split the sections where I compute the screen projection by the section where I draw sprites.

This has priority on textures
WIP

here the link to the last evolution of the raycaster:

Further speed improvements:

Sprite distance from the player is not used anymore: now, for sorting, I use the distance of sprites from the screen, that I have to compute in any case for scaling them.

The pros are :
- no more distances from the player to be computed
- I removed a bug introduced in the last optimization that allowed sprites behind the player to be traced under some conditions

The contra are:
- I need to sort an array of long (distance of sprites from the screen are computed as long)
- I had to split the sprite loop in two (look at it)

New controls:

Press space and walk against a wall to drill the maze
(It reminds Minecraft)
Shift + arrows for running
Graph + arrows for strafe

WIP on all the rest

@hit9918
Thanks, look at the new code and how I use the order[] array
Is there any speed trick I can use?

BTW
what is the format for symbols to be used by openmsx debugger?

@hit9918
Thanks, look at the new code and how I use the order[] array
Is there any speed trick I can use?

, remember to declare those pointers as "static".
otherwise the compiler will allocate those vars into the stack, and will use the ix,iy registers to manage increments of those pointers, slowing down execution.

example:
static int* more_fast;
more_fast++;

asm: (somethink like)
ld hl,(_more_fast)
inc hl
ld (_more_fast),hl

example1:
int* not_so_fast;
not_so_fast++;

asm: (somethink like)
ld l,(ix + someoffset) ; reach the ptr low byte
ld h,(ix + someoffset+1); reach the ptr high byte
inc hl
ld (ix + someoffset),l ; reach the ptr low byte
ld (ix + someoffset+1),h; reach the ptr high byte

moved to pointers... not sure if it is a gain here

//arrays used to sort the sprites
long spriteDistance[numSprites];

//arrays used to sort the sprites
int spriteOrder[numSprites];

static int i;
static long *idp,*jdp;
static int *iop,*jop;

long    dt;
int     it;

//sort algorithm
void combSort(int* order, long* dist, uint amount)
{
int gap = amount;
char swapped = 0;
while(gap > 1 || swapped)
{

//shrink factor 1.3
gap = (gap * 10) / 13;
if(gap == 9 || gap == 10) gap = 11;
if (gap < 1) gap = 1;
swapped = 0;
idp = dist;
iop = order;
for (i = 0; i < amount - gap; i++,idp++,iop++)
{
jdp = idp + gap;
jop = iop + gap;
if (*idp < *jdp)
{

dt=*idp;
*idp=*jdp;
*jdp=dt;

it=*iop;
*iop=*jop;
*jop=it;
swapped = 1;
}
}
}
}

_combSort:
push	ix
ld	ix,0
push	bc
push	bc
dec	sp
push	iy
;_dist stored from bc
ld	(ix+-3),c
ld	(ix+-2),b
;_order stored from de
line	37
ld	(ix+-5),e
ld	(ix+-4),d
;SPRITES.C: 37: int gap = amount;
; _gap allocated to iy
ld	l,(ix+4)
ld	h,(ix+5)
push	hl
pop	iy
;SPRITES.C: 38: char swapped = 0;
line	38
ld	(ix+-1),0
;SPRITES.C: 39: while(gap > 1 || swapped)
line	39
jp	l10
line	50
l15:
;SPRITES.C: 50: {
;SPRITES.C: 51: jdp = idp + gap;
line	51
push	iy
pop	hl
ld	de,(_idp)
ld	(_jdp),hl
;SPRITES.C: 52: jop = iop + gap;
line	52
push	iy
pop	hl
ld	de,(_iop)
ld	(_jop),hl
;SPRITES.C: 53: if (*idp < *jdp)
line	53
ld	hl,(_jdp)
ld	e,(hl)
inc	hl
ld	d,(hl)
inc	hl
ld	a,(hl)
inc	hl
ld	h,(hl)
ld	l,a
push	hl
push	de
ld	hl,(_idp)
ld	e,(hl)
inc	hl
ld	d,(hl)
inc	hl
ld	a,(hl)
inc	hl
ld	h,(hl)
ld	l,a
call	arelop
jp	p,l19
;SPRITES.C: 54: {
line	56
ld	hl,(_idp)
ld	e,(hl)
inc	hl
ld	d,(hl)
inc	hl
ld	a,(hl)
inc	hl
ld	h,(hl)
ld	l,a
ld	(_dt),de
ld	(_dt+02h),hl
;SPRITES.C: 57: *idp=*jdp;
line	57
ld	hl,(_jdp)
ld	e,(hl)
inc	hl
ld	d,(hl)
inc	hl
ld	a,(hl)
inc	hl
ld	h,(hl)
ld	l,a
push	hl
push	de
ld	hl,(_idp)
pop	de
ld	(hl),e
inc	hl
ld	(hl),d
inc	hl
pop	bc
ld	(hl),c
inc	hl
ld	(hl),b
;SPRITES.C: 58: *jdp=dt;
line	58
ld	de,(_dt)
ld	bc,(_dt+02h)
ld	hl,(_jdp)
ld	(hl),e
inc	hl
ld	(hl),d
inc	hl
ld	(hl),c
inc	hl
ld	(hl),b
;SPRITES.C: 60: it=*iop;
line	60
ld	hl,(_iop)
ld	c,(hl)
inc	hl
ld	b,(hl)
ld	(_it),bc
;SPRITES.C: 61: *iop=*jop;
line	61
ld	hl,(_jop)
ld	c,(hl)
inc	hl
ld	b,(hl)
ld	hl,(_iop)
ld	(hl),c
inc	hl
ld	(hl),b
;SPRITES.C: 62: *jop=it;
line	62
ld	bc,(_it)
ld	hl,(_jop)
ld	(hl),c
inc	hl
ld	(hl),b
;SPRITES.C: 63: swapped = 1;
line	63
ld	(ix+-1),01h
line	64
l19:
;SPRITES.C: 64: }
line	49
ld	hl,(_i)
inc	hl
ld	(_i),hl
ld	hl,(_idp)
inc	hl
inc	hl
inc	hl
inc	hl
ld	(_idp),hl
ld	hl,(_iop)
inc	hl
inc	hl
ld	(_iop),hl
l18:
push	iy
pop	de
ld	l,(ix+4)
ld	h,(ix+5)
or	a
sbc	hl,de
ex	de,hl
ld	hl,(_i)
or	a
sbc	hl,de
jp	c,l15
line	66
l10:
;SPRITES.C: 66: }
line	39
ld	de,02h
push	iy
pop	hl
call	wrelop
jp	p,l11
ld	a,(ix+-1)
or	a
jp	nz,l11
;SPRITES.C: 67: }
line	67
pop	iy
ld	sp,ix
pop	ix
pop	hl
pop	af
jp	(hl)
l11:
;SPRITES.C: 40: {
line	43
ld	de,0Dh
push	iy
pop	hl
ld	b,h
ld	c,l
push	hl
pop	iy
;SPRITES.C: 44: if(gap == 9 || gap == 10) gap = 11;
line	44
ld	a,l
xor	09h
or	h
jp	z,u30
push	iy
pop	hl
ld	a,l
xor	0Ah
or	h
jp	nz,l13
u30:
ld	iy,0Bh
l13:
;SPRITES.C: 45: if (gap < 1) gap = 1;
line	45
ld	de,01h
push	iy
pop	hl
call	wrelop
jp	p,l14
ld	iy,01h
l14:
;SPRITES.C: 46: swapped = 0;
line	46
ld	(ix+-1),0
;SPRITES.C: 47: idp = dist;
line	47
ld	l,(ix+-3)
ld	h,(ix+-2)
ld	(_idp),hl
;SPRITES.C: 48: iop = order;
line	48
ld	l,(ix+-5)
ld	h,(ix+-4)
ld	(_iop),hl
;SPRITES.C: 49: for (i = 0; i < amount - gap; i++,idp++,iop++)
line	49
ld	hl,0
ld	(_i),hl
jp	l18

BTW
what is the format for symbols to be used by openmsx debugger?

As the open dialog reveals:
"tniASM 0.x symbol files (*.sym)"
"asMSX 0.x symbol files (*.sym)"