Author
| C compiler comparison - part two
| PingPong msx master Posts: 1025 | Posted: April 12 2007, 18:54   | Anyone know where to find tech info about z88dk?
| | arnold_m msx lover Posts: 85 | Posted: April 12 2007, 21:19   | | | PingPong msx master Posts: 1025 | Posted: April 14 2007, 11:40   | seen, i cannot find nothing about .com generation... does anything know about this in z88dk?
| | zogo msx friend Posts: 2 | Posted: May 26 2007, 10:15   | Hi,
Where I could find the Hi_tech compiler?
Regards.
| | Yukio msx professional Posts: 824 | Posted: May 26 2007, 16:41   | | | ARTRAG msx master Posts: 1751 | Posted: October 03 2008, 19:10   | I studied a bit the IAR compiler, and now i can get COM files!!
Bruno, I compiled your circle example with IAR workbench 4.06A for windows
Using optimization for speed at level 9 (max) enabling the use of undocumented instructions, of alternated registers and allocating auto variables as static in ram
The com, in a bluemsx msx2 machine at 50Hz takes 10,26 secs (0x201 frames at 50Hz)
Here follows the ASM generated from you C:
NAME circles(16)
RSEG CODE(0)
RSEG IDATA0(0)
RSEG CDATA0(0)
PUBLIC circle
PUBLIC di
PUBLIC ei
PUBLIC main
PUBLIC plotpoints
PUBLIC sc2
PUBLIC setpixel
PUBLIC vdpsetreg
PUBLIC vraminit
PUBLIC vrampeek
PUBLIC vrampoke
EXTERN ?CLZ80L_4_06_L00
EXTERN ?UC_RSH_L01
EXTERN ?SS_CMP_L02
EXTERN ?ENT_PARM_DIRECT_L09
EXTERN ?ENT_AUTO_DIRECT_L09
EXTERN ?LEAVE_DIRECT_L09
RSEG CODE
di:
DI
RET
ei:
EI
RET
vdpsetreg:
CALL ?ENT_PARM_DIRECT_L09
CALL di
LD A,C
OUT (153),A
LD A,E
OR 128
OUT (153),A
CALL ei
JP ?LEAVE_DIRECT_L09
vrampeek:
CALL ?ENT_PARM_DIRECT_L09
CALL di
LD A,E
AND 255
OUT (153),A
LD L,(IX+2)
LD H,(IX+3)
LD L,H
LD H,0
LD A,L
AND 63
OUT (153),A
CALL ei
IN A,(152)
JP ?LEAVE_DIRECT_L09
vrampoke:
CALL ?ENT_PARM_DIRECT_L09
CALL di
LD A,E
AND 255
OUT (153),A
LD L,(IX+2)
LD H,(IX+3)
LD L,H
LD H,0
LD A,L
AND 63
OR 64
OUT (153),A
CALL ei
LD A,C
OUT (152),A
JP ?LEAVE_DIRECT_L09
setpixel:
CALL ?ENT_AUTO_DIRECT_L09
DEFW 65534
SRL C
SRL C
SRL C
LD B,0
LD B,C
LD C,0
LD A,(IX+4)
AND 7
LD E,A
LD D,0
LD L,C
LD H,B
ADD HL,DE
LD A,(IX+2)
AND 248
LD C,A
LD B,0
ADD HL,BC
LD (IX-2),L
LD (IX-1),H
EX DE,HL
CALL vrampeek
PUSH AF
LD A,(IX+2)
AND 7
LD B,A
LD A,128
CALL ?UC_RSH_L01
LD B,A
POP AF
OR B
LD C,A
LD E,(IX-2)
LD D,(IX-1)
CALL vrampoke
JP ?LEAVE_DIRECT_L09
sc2:
CALL ?ENT_AUTO_DIRECT_L09
DEFW 65534
LD (IX-2),0
?0012:
LD A,(IX-2)
CP 8
JR NC,?0011
?0013:
LD C,(IX-2)
LD B,0
LD HL,?0010
ADD HL,BC
LD C,(HL)
LD E,(IX-2)
CALL vdpsetreg
INC (IX-2)
JR ?0012
?0011:
JP ?LEAVE_DIRECT_L09
vraminit:
CALL ?ENT_AUTO_DIRECT_L09
DEFW 65534
XOR A
LD (IX-2),A
LD (IX-1),A
?0016:
LD BC,256
LD L,(IX-2)
LD H,(IX-1)
AND A
SBC HL,BC
JR NC,?0015
?0017:
LD C,(IX-2)
LD L,(IX-2)
LD H,(IX-1)
LD A,24
ADD A,H
LD H,A
EX DE,HL
CALL vrampoke
LD C,(IX-2)
LD L,(IX-2)
LD H,(IX-1)
LD A,25
ADD A,H
LD H,A
EX DE,HL
CALL vrampoke
LD C,(IX-2)
LD L,(IX-2)
LD H,(IX-1)
LD A,26
ADD A,H
LD H,A
EX DE,HL
CALL vrampoke
INC (IX-2)
JR NZ,?0039
INC (IX-1)
?0039:
JR ?0016
?0015:
LD (IX-2),0
LD (IX-1),32
?0020:
LD BC,14336
LD L,(IX-2)
LD H,(IX-1)
AND A
SBC HL,BC
JR NC,?0019
?0021:
LD C,241
LD E,(IX-2)
LD D,(IX-1)
CALL vrampoke
INC (IX-2)
JR NZ,?0040
INC (IX-1)
?0040:
JR ?0020
?0019:
XOR A
LD (IX-2),A
LD (IX-1),A
?0024:
LD BC,6144
LD L,(IX-2)
LD H,(IX-1)
AND A
SBC HL,BC
JR NC,?0023
?0025:
LD C,0
LD E,(IX-2)
LD D,(IX-1)
CALL vrampoke
INC (IX-2)
JR NZ,?0041
INC (IX-1)
?0041:
JR ?0024
?0023:
JP ?LEAVE_DIRECT_L09
plotpoints:
CALL ?ENT_PARM_DIRECT_L09
LD A,(IX+10)
ADD A,C
LD C,A
LD A,(IX+8)
ADD A,E
LD E,A
CALL setpixel
LD A,(IX+10)
ADD A,(IX+4)
LD C,A
LD A,(IX+8)
SUB (IX+2)
LD E,A
CALL setpixel
LD A,(IX+10)
SUB (IX+4)
LD C,A
LD A,(IX+8)
SUB (IX+2)
LD E,A
CALL setpixel
LD A,(IX+10)
SUB (IX+4)
LD C,A
LD A,(IX+8)
ADD A,(IX+2)
LD E,A
CALL setpixel
LD A,(IX+10)
ADD A,(IX+2)
LD C,A
LD A,(IX+8)
ADD A,(IX+4)
LD E,A
CALL setpixel
LD A,(IX+10)
ADD A,(IX+2)
LD C,A
LD A,(IX+8)
SUB (IX+4)
LD E,A
CALL setpixel
LD A,(IX+10)
SUB (IX+2)
LD C,A
LD A,(IX+8)
SUB (IX+4)
LD E,A
CALL setpixel
LD A,(IX+10)
SUB (IX+2)
LD C,A
LD A,(IX+8)
ADD A,(IX+4)
LD E,A
CALL setpixel
JP ?LEAVE_DIRECT_L09
circle:
CALL ?ENT_AUTO_DIRECT_L09
DEFW 65530
XOR A
LD (IX-6),A
LD (IX-5),A
LD L,(IX+8)
LD H,(IX+9)
LD (IX-4),L
LD (IX-3),H
LD L,(IX+8)
LD H,(IX+9)
ADD HL,HL
EX DE,HL
LD HL,3
AND A
SBC HL,DE
LD (IX-2),L
LD (IX-1),H
?0028:
LD C,(IX-4)
LD B,(IX-3)
LD L,(IX-6)
LD H,(IX-5)
CALL ?SS_CMP_L02
JR NC,?0027
?0029:
LD L,(IX+4)
LD H,(IX+5)
PUSH HL
LD L,(IX+2)
LD H,(IX+3)
PUSH HL
LD C,(IX-4)
LD B,(IX-3)
LD E,(IX-6)
LD D,(IX-5)
CALL plotpoints
POP HL
POP HL
BIT 7,(IX-1)
JR Z,?0031
?0030:
LD L,(IX-6)
LD H,(IX-5)
ADD HL,HL
ADD HL,HL
LD C,(IX-2)
LD B,(IX-1)
ADD HL,BC
LD DE,6
ADD HL,DE
LD (IX-2),L
LD (IX-1),H
JR ?0032
?0031:
LD C,(IX-4)
LD B,(IX-3)
LD L,(IX-6)
LD H,(IX-5)
AND A
SBC HL,BC
ADD HL,HL
ADD HL,HL
LD E,(IX-2)
LD D,(IX-1)
ADD HL,DE
LD BC,10
ADD HL,BC
LD (IX-2),L
LD (IX-1),H
LD L,(IX-4)
LD H,(IX-3)
DEC HL
LD (IX-4),L
LD (IX-3),H
?0032:
INC (IX-6)
JR NZ,?0042
INC (IX-5)
?0042:
JP ?0028
?0027:
LD L,(IX-4)
LD H,(IX-3)
LD C,(IX-6)
LD B,(IX-5)
AND A
SBC HL,BC
JR NZ,?0034
?0033:
LD L,(IX+4)
LD H,(IX+5)
PUSH HL
LD L,(IX+2)
LD H,(IX+3)
PUSH HL
LD C,(IX-4)
LD B,(IX-3)
LD E,(IX-6)
LD D,(IX-5)
CALL plotpoints
POP HL
POP HL
?0034:
JP ?LEAVE_DIRECT_L09
main:
CALL ?ENT_AUTO_DIRECT_L09
DEFW 65534
CALL sc2
CALL vraminit
LD (IX-2),0
?0036:
LD A,(IX-2)
CP 90
JR NC,?0035
?0037:
LD C,(IX-2)
LD B,0
PUSH BC
LD BC,100
LD DE,100
CALL circle
POP HL
INC (IX-2)
JR ?0036
?0035:
JP ?LEAVE_DIRECT_L09
RSEG IDATA0
?0010:
DEFS 8
RSEG CDATA0
DEFB 2
DEFB '`'
DEFB 6
DEFB 255
DEFB 3
DEFB '6'
DEFB 7
DEFB 4
END
| | ARTRAG msx master Posts: 1751 | Posted: October 03 2008, 19:20   | FYI
when disabling the static allocation of autos
the execution time rises to 0x2ef frames, i.e. 15 secs
| | PingPong msx master Posts: 1025 | Posted: October 04 2008, 10:38   | That's an improvement, for sure. But i think HT for PC is far away from IAR. The latter appear to perform almost like SDCC.
| | PingPong msx master Posts: 1025 | Posted: October 04 2008, 15:36   | I see also some true gems:
vrampeek:
LD A,E
AND 255 <-- A AND 255 always equal to A!
OUT (153),A
LD L,(IX+2) <-- Load L, ok 
LD H,(IX+3) <-- Load H,ok 
LD L,H <- LD l from H? 
LD H,0 <- LD H with zero? And the previous ld ? 
LD A,L <- LD A,L that is loaded with H that is loaded with (IX+3)     
AND 63
OUT (153),A
| | Yukio msx professional Posts: 824 | Posted: October 05 2008, 12:56   | Z80.
Now there is a MSX specific information:
Z88DK
Their updated the informations ...
| | ARTRAG msx master Posts: 1751 | Posted: October 05 2008, 17:18   | Quote:
| I see also some true gems:
vrampeek:
LD A,E
AND 255 <-- A AND 255 always equal to A!
OUT (153),A
LD L,(IX+2) <-- Load L, ok 
LD H,(IX+3) <-- Load H,ok 
LD L,H <- LD l from H? 
LD H,0 <- LD H with zero? And the previous ld ? 
LD A,L <- LD A,L that is loaded with H that is loaded with (IX+3)     
AND 63
OUT (153),A
|
Well, HTC too has some problems, but i'd say IAR has chosen the worst place to put inefficiency
| | PingPong msx master Posts: 1025 | Posted: October 05 2008, 18:05   | Hey ARTRAG, i've done some comparative test from HTC and z88dk. The test is similar, times:
HTC: 8/9 secs
z88dk: 21 secs.
Here the list
#include <stdio.h>
#include <sys.h>
#define EI asm("ei");
#define DI asm("di");
void vdpsetreg (unsigned char regno, unsigned char value)
{
DI
outp(0x99, value);
outp(0x99, regno | 0x80);
EI
}
unsigned char vrampeek(unsigned int addr)
{
DI
outp(0x99, addr & 255);
outp(0x99, (addr >> 8) & 0x3f);
EI
return inp(0x98);
}
void vrampoke(unsigned int addr, unsigned char byte)
{
DI
outp(0x99, addr & 255);
outp(0x99, 0x40 | ((addr >> 8) & 0x3f));
EI
outp(0x98, byte);
}
static unsigned char vdpregs[] = {0x02,0x60,0x06,0xFF, 0x03, 0x36, 0x07,0x04};
void sc2()
{
unsigned char r = 0;
while (r<sizeof(vdpregs))
{
vdpsetreg(r,vdpregs[r]);
r++;
}
}
void vraminit()
{
unsigned int counter;
for (counter=8192;counter<8192+6144;counter++)
{
vrampoke (counter, 0xF9);
}
for (counter=0;counter<6144;counter++)
{
vrampoke (counter, 0);
}
for (counter=0;counter<256;counter++)
{
vrampoke (6144+counter, counter);
vrampoke (6144+counter+256, counter);
vrampoke (6144+counter+512, counter);
}
}
unsigned char mask[] = {128,64,32,16,8,4,2,1};
void setpixel (unsigned char x, unsigned char y)
{
unsigned int addr = (x >> 3) * 8 + (y & 7) + (((unsigned int)(y & 248)) << 5);
vrampoke (addr, vrampeek(addr) | mask [(x & 7)] );
}
void plotpoints(unsigned int x, unsigned int y, unsigned int cx, unsigned int cy)
{
setpixel((unsigned char)cx+x,(unsigned char)cy+y);
setpixel((unsigned char)cx-x,(unsigned char)cy+y);
setpixel((unsigned char)cx-x,(unsigned char)cy-y);
setpixel((unsigned char)cx+x,(unsigned char)cy-y);
setpixel((unsigned char)cx+y,(unsigned char)cy+x);
setpixel((unsigned char)cx-y,(unsigned char)cy+x);
setpixel((unsigned char)cx-y,(unsigned char)cy-x);
setpixel((unsigned char)cx+y,(unsigned char)cy-x);
}
void circle(unsigned int cx, unsigned int cy, unsigned int radius)
{
int x, y, d;
x = 0; y = radius;
d = 3 - 2*radius;
while (x<y)
{
plotpoints(x, y, cx, cy);
if ( d<0 )
d = d + 4*x + 6;
else
{
d = d + 4*(x-y) + 10;
y--;
}
x++;
}
if (x==y)
plotpoints(x, y, cx, cy);
}
void main(void)
{
unsigned char px;
unsigned char py;
sc2();
vdpsetreg(7,0);
vraminit();
vdpsetreg(7,15);
for (py=0;py<90;py++)
{
circle(128,96,py);
}
//while(1);
}
the time should be taken from where the border color become white.
for z88dk substitute the include <sys.h> with <stdlib.h>
| | PingPong msx master Posts: 1025 | Posted: October 05 2008, 18:07   | Pratically a similar test in pure MSX BASIC give rougly the same results of z88dk. However, i do not know how to optimize in z88dk.
| | PingPong msx master Posts: 1025 | Posted: October 05 2008, 18:14   | would be a good idea to convert this program to pure Pascal. The compiler was created by Borland, and at the time they were very good optimizing compilers. Maybe we give us some surprise. Anyone want to try?
| |
| |
| |