Assembler Optimizer

Page 8/18
1 | 2 | 3 | 4 | 5 | 6 | 7 | | 9 | 10 | 11 | 12 | 13

By Metalion

Paragon (1149)

Metalion's picture

02-07-2020, 20:44

I do not understand those optimizations :

PatternBasedOptimizer substitution in threed/lib/neonlib/src/VDP.asm, line 21: 2 bytes saved
    cp 1
    jr c, MSX1
    jr z, MSX2
Replaced by:
    cp 1 + 1
    jr c, MSX2

How does it work the same way ?
Where is the jump to MSX1 ?

PatternBasedOptimizer substitution in threed/src/Application.asm, line 236: 4 bytes saved
    ld ix, Application_points
Replaced by:

PatternBasedOptimizer substitution in threed/src/Application.asm, line 243: 4 bytes saved
    ld ix, Application_edges
Replaced by:

Replaced by what ??
They seem to have been just erased ("4 bytes saved").

By santiontanon

Paladin (1014)

santiontanon's picture

02-07-2020, 20:50

@Grauw: oh, good catch! it was missing indeed, added Smile
And oh, that error is interesting, I though I had that case covered. Let me investigate!

@Metalion: yeah, I need to improve the output haha, that means that it was removed, as it was unnecessary code (those values where never used)

By santiontanon

Paladin (1014)

santiontanon's picture

02-07-2020, 20:51

@Metalion: oh!!!! good catch on the MSX1/MSX2 jump, I didn't realize it when I checked for output correctness! there must be a bug in my pattern matching code. Added to the to-do list, thanks for spotting it! Smile

By hit9918

Prophet (2893)

hit9918's picture

02-07-2020, 21:01

it would be cool if it could deal SDCC code

;app.c:12: while(1) {
00102$:
;app.c:13: p->x += p->dx;
	push	bc
	pop	iy
	inc	iy
	inc	iy
	ld	e, 0 (iy)
	ld	d, 1 (iy)
	ld	l, c
	ld	h, b
	ld	a, (hl)
	inc	hl
	ld	h, (hl)
	ld	l, a
	add	hl, de
	ld	0 (iy), l
	ld	1 (iy), h
	jr	00102$
;app.c:15: }
--
199 cycles

By santiontanon

Paladin (1014)

santiontanon's picture

02-07-2020, 22:55

Oh, good idea, I'll add it to my to-do list, is there any example SDCC-generated assembler codebase publicly available that I could use to test?

By hit9918

Prophet (2893)

hit9918's picture

03-07-2020, 01:03

I find pointers and addresses to be the top problem in SDCC.

some examples

#include "string.h"
#include "math.h"


int main() {
        return 0;
}

struct obj { int dx; int x; int dy; int y; };
void test(struct obj **arr, int n) {
	struct obj *p = *arr++;
	while(1) {
                p->x += p->dx;
        }
}


struct node {  
        struct node *next; 
        int dx; int x; int dy; int y; int dz; int z;
        struct node *prev;
};
typedef struct node node;


void test1(node *p) {
        while(p) {
                p->x += p->dx;
                p->y += p->dy;
                p->z += p->dz;
                p = p->next;
        }
}

void test2(node *p0) {
        node *p = p0;
        int *q = 0; int w = 0; int w2 = 0;
        while(p) {
                q = &(p->dx);
                w = *q; q++; w2 = *q; w2 += w; *q = w2; q++;
                w = *q; q++; w2 = *q; w2 += w; *q = w2; q++;
                w = *q; q++; w2 = *q; w2 += w; *q = w2; 
                p = p->next;
        }
}
;--------------------------------------------------------
; File Created by SDCC : free open source ANSI-C Compiler
; Version 3.8.0 #10557 (MINGW64)
;--------------------------------------------------------
	.module app
	.optsdcc -mz80
	
;--------------------------------------------------------
; Public variables in this module
;--------------------------------------------------------
	.globl _test2
	.globl _test1
	.globl _test
	.globl _main
;--------------------------------------------------------
; special function registers
;--------------------------------------------------------
;--------------------------------------------------------
; ram data
;--------------------------------------------------------
	.area _DATA
;--------------------------------------------------------
; ram data
;--------------------------------------------------------
	.area _INITIALIZED
;--------------------------------------------------------
; absolute external ram data
;--------------------------------------------------------
	.area _DABS (ABS)
;--------------------------------------------------------
; global & static initialisations
;--------------------------------------------------------
	.area _HOME
	.area _GSINIT
	.area _GSFINAL
	.area _GSINIT
;--------------------------------------------------------
; Home
;--------------------------------------------------------
	.area _HOME
	.area _HOME
;--------------------------------------------------------
; code
;--------------------------------------------------------
	.area _CODE
;app.c:5: int main() {
;	---------------------------------
; Function main
; ---------------------------------
_main::
;app.c:6: return 0;
	ld	hl, #0x0000
;app.c:7: }
	ret
;app.c:10: void test(struct obj **arr, int n) {
;	---------------------------------
; Function test
; ---------------------------------
_test::
	push	ix
	ld	ix,#0
	add	ix,sp
;app.c:11: struct obj *p = *arr++;
	ld	l, 4 (ix)
	ld	h, 5 (ix)
	ld	c, (hl)
	inc	hl
	ld	b, (hl)
;app.c:12: while(1) {
00102$:
;app.c:13: p->x += p->dx;
	push	bc
	pop	iy
	inc	iy
	inc	iy
	ld	e, 0 (iy)
	ld	d, 1 (iy)
	ld	l, c
	ld	h, b
	ld	a, (hl)
	inc	hl
	ld	h, (hl)
	ld	l, a
	add	hl, de
	ld	0 (iy), l
	ld	1 (iy), h
	jr	00102$
;app.c:15: }
	pop	ix
	ret
;app.c:26: void test1(node *p) {
;	---------------------------------
; Function test1
; ---------------------------------
_test1::
	push	ix
	ld	ix,#0
	add	ix,sp
	push	af
;app.c:27: while(p) {
00101$:
	ld	a, 5 (ix)
	or	a, 4 (ix)
	jr	Z,00104$
;app.c:28: p->x += p->dx;
	ld	c, 4 (ix)
	ld	b, 5 (ix)
	ld	hl, #0x0004
	add	hl, bc
	ex	(sp), hl
	pop	hl
	push	hl
	ld	e, (hl)
	inc	hl
	ld	d, (hl)
	ld	l, c
	ld	h, b
	inc	hl
	inc	hl
	ld	a, (hl)
	inc	hl
	ld	h, (hl)
	ld	l, a
	add	hl, de
	ex	de, hl
	pop	hl
	push	hl
	ld	(hl), e
	inc	hl
	ld	(hl), d
;app.c:29: p->y += p->dy;
	ld	hl, #0x0008
	add	hl, bc
	ex	(sp), hl
	pop	hl
	push	hl
	ld	e, (hl)
	inc	hl
	ld	d, (hl)
	push	bc
	pop	iy
	ld	l, 6 (iy)
	ld	h, 7 (iy)
	add	hl, de
	ex	de, hl
	pop	hl
	push	hl
	ld	(hl), e
	inc	hl
	ld	(hl), d
;app.c:30: p->z += p->dz;
	ld	hl, #0x000c
	add	hl, bc
	ex	(sp), hl
	pop	hl
	push	hl
	ld	e, (hl)
	inc	hl
	ld	d, (hl)
	push	bc
	pop	iy
	ld	l, 10 (iy)
	ld	h, 11 (iy)
	add	hl, de
	ex	de, hl
	pop	hl
	push	hl
	ld	(hl), e
	inc	hl
	ld	(hl), d
;app.c:31: p = p->next;
	ld	a, (bc)
	ld	4 (ix), a
	inc	bc
	ld	a, (bc)
	ld	5 (ix), a
	jr	00101$
00104$:
;app.c:33: }
	ld	sp, ix
	pop	ix
	ret
;app.c:35: void test2(node *p0) {
;	---------------------------------
; Function test2
; ---------------------------------
_test2::
	push	ix
	ld	ix,#0
	add	ix,sp
;app.c:36: node *p = p0;
	ld	c, 4 (ix)
	ld	b, 5 (ix)
;app.c:38: while(p) {
00101$:
	ld	a, b
	or	a, c
	jr	Z,00104$
;app.c:39: q = &(p->dx);
	push	bc
	pop	iy
	inc	iy
	inc	iy
;app.c:40: w = *q; q++; w2 = *q; w2 += w; *q = w2; q++;
	ld	e, 0 (iy)
	ld	d, 1 (iy)
	inc	iy
	inc	iy
	ld	l, 0 (iy)
	ld	h, 1 (iy)
	add	hl, de
	ld	0 (iy), l
	ld	1 (iy), h
	inc	iy
	inc	iy
;app.c:41: w = *q; q++; w2 = *q; w2 += w; *q = w2; q++;
	ld	e, 0 (iy)
	ld	d, 1 (iy)
	inc	iy
	inc	iy
	ld	l, 0 (iy)
	ld	h, 1 (iy)
	add	hl, de
	ld	0 (iy), l
	ld	1 (iy), h
	inc	iy
	inc	iy
;app.c:42: w = *q; q++; w2 = *q; w2 += w; *q = w2; 
	ld	e, 0 (iy)
	ld	d, 1 (iy)
	inc	iy
	inc	iy
	ld	l, 0 (iy)
	ld	h, 1 (iy)
	add	hl, de
	ld	0 (iy), l
	ld	1 (iy), h
;app.c:43: p = p->next;
	ld	l, c
	ld	h, b
	ld	c, (hl)
	inc	hl
	ld	b, (hl)
	jr	00101$
00104$:
;app.c:45: }
	pop	ix
	ret
	.area _CODE
	.area _INITIALIZER
	.area _CABS (ABS)

By santiontanon

Paladin (1014)

santiontanon's picture

03-07-2020, 06:46

Thanks @hit9918, I'll save this file as an example and will add supporting SDCC output to my to do list!

Also, I just made a new release (MDL alpha v3): https://github.com/santiontanon/mdlz80optimizer/releases/tag...

The main update is fixing the issues you guys identified above:
- that incorrect optimization does not happen any more
- @grauw: I have tried it now on your "vgmplay-msx" project, and I got it to run there. It is a large project, so it reports saving about 40 bytes. I don't think I have the "section" keyword handled well though (so parsing the source ends in one of the "ERROR" statements in your source code). But at least it reads it now, resolves all the macros and gets to the point of running the optimizer! Might be worth comparing the symbol table generated by Glass with that generated by MDL to see if there are disagreements...

Also, TheNestruo brought a little bit of sanity to the code-base by refactoring a lot of the underlying Java infrastructure. So, a few things should be more robust now. Thanks a lot!

There is still an outstanding issue when optimization happens in code generated by a macro defined in a different file, as it is hard to report filename/number of where did the optimization happen. So, it might be hard to map the output of the optimizer to the original source code in projects that heavily rely on macros. I need to figure out what's the best way to report that...

By santiontanon

Paladin (1014)

santiontanon's picture

03-07-2020, 08:37

Ah, I forgot, in the new release, there is a new "-a" option that writes an output file like this.

For example, with this call:

java -jar mdl.jar xspelunker/src/spelunk-main.asm -po -a annotations.asm

I get a file called annotations.txt with this content:

xspelunker/src/spelunk-gfx.asm  45      warning Label defined without a colon.
xspelunker/src/spelunk-player.asm       569     warning Label defined without a colon.
xspelunker/src/spelunk-player.asm       926     warning Label defined without a colon.
xspelunker/src/spelunk-player.asm       1281    warning Label defined without a colon.
xspelunker/src/spelunk-player.asm       2034    warning Label defined without a colon.
xspelunker/src/spelunk-player.asm       2050    warning Label defined without a colon.
xspelunker/src/spelunk-player.asm       2072    warning Label defined without a colon.
xspelunker/src/spelunk-player.asm       2084    warning Label defined without a colon.
xspelunker/src/spelunk-player.asm       2103    warning Label defined without a colon.
xspelunker/src/spelunk-player.asm       2115    warning Label defined without a colon.
xspelunker/src/spelunk-player-bullets.asm       340     warning Label defined without a colon.
xspelunker/src/spelunk-player-bullets.asm       346     warning Label defined without a colon.
xspelunker/src/spelunk-player-bullets.asm       354     warning Label defined without a colon.
xspelunker/src/spelunk-player-bullets.asm       368     warning Label defined without a colon.
xspelunker/src/spelunk-player-bullets.asm       376     warning Label defined without a colon.
xspelunker/src/spelunk-pcg.asm  1420    warning Label defined without a colon.
xspelunker/src/spelunk-pcg.asm  2009    warning Use of confusing z80 'jp (reg)' syntax, rather than the more accurate 'jp reg'.
xspelunker/src/spelunk-player-bullets.asm       561     optimization    cp 0 -> or a
xspelunker/src/spelunk-player-bullets.asm       165     optimization    cp 1 -> dec a
xspelunker/src/spelunk-gui.asm  84      optimization    cp 1 -> dec a
xspelunker/src/spelunk-pcg.asm  186     optimization    cp 1 -> dec a
xspelunker/src/spelunk-config.asm       42      optimization    cp 1 -> dec a
xspelunker/src/spelunk-player.asm       136     optimization    unused ld reg,?
xspelunker/src/spelunk-player.asm       751     optimization    unused ld reg,?
xspelunker/src/spelunk-pcg.asm  141     optimization    unused ld reg,?
xspelunker/src/spelunk-pcg.asm  1754    optimization    ld a,n; ld (hl),a -> ld (hl),n
xspelunker/src/spelunk-player.asm       604     optimization    dec b; jr nz,label -> djnz label
xspelunker/src/spelunk-pcg.asm  667     optimization    dec b; jr nz,label -> djnz label
xspelunker/src/spelunk-enemies.asm      743     optimization    ld a,reg; neg -> xor a; sub reg
xspelunker/src/spelunk-enemies.asm      750     optimization    ld a,reg; neg -> xor a; sub reg

This is my first attempt at generating some output that could be parsed by a plugin in some editor like Sublime/VSCode, and show in-editor optimization annotations. So, each line is of the form "filename tab line-number tab tag tab message" to be easily parseable, one message per line. Of course, just first attempt, not sure if this is the right or most useful format. But again, it's a start Smile

Note: also, if you really like defining labels without colons in assembler and do not want those warnings, you can deactivate them with a flag, of course Smile (and the warning about jp (hl) only shows up if you have selected a dialect that supports jp hl instead)

By theNestruo

Master (155)

theNestruo's picture

03-07-2020, 11:50

I was trying to test the optimizer with the sources I have here... but I ran into several problems. No cake for me Sad

I'll open the proper issues as soon as possible, but here's a quick summary:

  • My own code (tniASM 0.45) failed because it uses ZX7, and dzx7_standard.asm uses undocumented instruction "SLL E" aka "SLS E"
  • "org $4000, $bfff" also caused the parser to fail (or maybe it was "ds $4010 - $, $00" or a similar construct; can't remember)
  • My particular usage of relative include paths was a little bit troublesome (kinda fixed it but...)
  • MetalGear disassembly failed because of STRUCT keyword
  • Other asMSX projects I had in my HDD failed because "the macro .rom was not expanded"
  • Previously, those asMSX projects failed because of lack of "skip" support in ".incbin" support (fixed) and alternative syntax for several instructions, such as "EX HL,DE" (also fixed)
  • I run into a stack overflow problem in some classic game disassemblies, between SourceCode.getAddress and SourceCode.getAddressAfter (this seem to be fixed now)

I'll try to provide a more precise description of the problems (as well as source code examples) as issues (or even better, pull requests!)

By ARTRAG

Enlighted (6396)

ARTRAG's picture

03-07-2020, 13:31

hit9918 wrote:

it would be cool if it could deal SDCC code

;app.c:12: while(1) {
00102$:
;app.c:13: p->x += p->dx;
	push	bc
	pop	iy
	inc	iy
	inc	iy
	ld	e, 0 (iy)
	ld	d, 1 (iy)
	ld	l, c
	ld	h, b
	ld	a, (hl)
	inc	hl
	ld	h, (hl)
	ld	l, a
	add	hl, de
	ld	0 (iy), l
	ld	1 (iy), h
	jr	00102$
;app.c:15: }
--
199 cycles

This could be replaced by

	ld l,c
	ld h,b
	ld e,(hl)
	inc hl
	ld d,(hl)
	inc hl
	ld c,(hl)
	inc hl
	ld b,(hl)
	ex de,hl
	add hl,bc
	ex de,hl
	ld (hl),d
	dec hl
	ld (hl),e
Page 8/18
1 | 2 | 3 | 4 | 5 | 6 | 7 | | 9 | 10 | 11 | 12 | 13