PS
my solution was
; call with parameters in E and C
;
; ld e,(_x)
; ld c,(_y)
; call _setpixel
global _setpixel
_setpixel:
ld a,191
cp c
ret c
; _x loaded to e
; _y loaded to c
;unsigned int vddr = (x & 0xf8) + (y & 7) + (int)256 * (y>>3);
ld a,c
rrca
rrca
rrca
and 0x1F
ld h,a ; (int)256*(y>>3)
ld a,e
and 0xF8
ld l,a
ld a,c
and 07h
ld c,a
ld b,0
add hl,bc
ld a,e ;_x stored from e
and 07h
ld c,a
;ld b,0
ex de,hl ; _vddr allocated to de
ld hl,BitTab
add hl,bc
di
ld a,e
out (099h),a
ld a,d
;and 03Fh
out (099h),a
nop
in a,(098h)
or (hl)
ld c,a
ld a,e
out (099h),a
ld a,d
;and 03Fh
or 040h
out (099h),a
ld a,c
out (098h),a
ei
ret
BitTab:
db 128,64,32,16,8,4,2,1
More than 266 T states (but including clipping and DI/EI
)
Could your solution improve eben more by exploiting a 6K ram buffer that mirrors the VRAM?
Sure, it always makes things faster...
PSET:
EX DE,HL
LD (HL),D
LD A,E
AND 7
LD B,A
LD C,#99
XOR E
OR 2
RRD
OUT (C),A
LD E,A
LD A,(HL)
RLCA
OUT (C),A
LD D,A
LD H,#C0
LD A,(DE)
LD L,B
OR (HL)
LD (DE),A
OUT (#98),A
RET
I'm not sure how I did this for MSX2 - maybe also screen2 but with tighter timings.
On msx2/sc5 there is absolutely no problem! after doing vdp register setup the only thing is to fill the changing data (x,y,color) and issue the command. teoretically one can reach the speed of 200000pixels/sec.
Problem is only on msx1/sc2: because of
- calculations needed to map x,y to vram address
- the need to read / calculate mask / mask / write data
- the associated msx1 vdp delay
; call with parameters in E and C
Rule 3 said that you need to use H and L
why to use out (c).a and not direct address?
Could your solution improve eben more by exploiting a 6K ram buffer that mirrors the VRAM?
Sure, it always makes things faster...
PSET:
EX DE,HL
LD (HL),D
LD A,E
AND 7
LD B,A
LD C,#99
XOR E
OR 2
RRD
OUT (C),A
LD E,A
LD A,(HL)
RLCA
OUT (C),A
LD D,A
LD H,#C0
LD A,(DE)
LD L,B
OR (HL)
LD (DE),A
OUT (#98),A
RET
It gives 177 Tstates, but with the change above, we get
PSET:
EX DE,HL
LD (HL),D
LD A,E
AND 7
LD B,A
XOR E
OR 2
RRD
OUT (#99),A
LD E,A
LD A,(HL)
RLCA
OUT (#99),A
LD D,A
LD H,#C0
LD A,(DE)
LD L,B
OR (HL)
LD (DE),A
OUT (#98),A
RET
that gives 165 T states and should be equivalent
My sole doubt: how the hell it is supposed to work ? 
@NYYRIKKI
I'm trying to figure out how this code should work:
call NameTableInit ; initialize the PNT in VRAM ld de,VBuffer ; I set in DE the address of a 6Kb area ld hl,Y*256+X ; I set in H,L the X,Y of the point call PSET ; set a pixel in RAM and in VRAM [...] VBuffer: ds 6*1024
Something like this?
No...
DE = 1 byte of free RAM
HL = Y*256+X
(you can optimize the routine by swapping these and removing the EX DE,HL)
VBuffer is fixed at address #4000-#57FF
So, something like:
ld a,(#f342)
ld h,#40
call #24 ; Switch RAM
ld a,2
call #5F ; Set screenmode
call ColorInit
call NameTableInit
call Clear4000
ld de,temp
ld hl,Y*256+X ; I set in H,L the X,Y of the point
jp PSET
temp: db 0
humm
My code is in a rom (with scc mapper) I cannot work with ram in 4000h (nor i can swap ram there at each pset).
The more reasonable place where i can allocate a 6k buffer is 0c000h
this means that i have set/reset bit 7 in the pset code when accessing to vram, but...
what is I leave set the highest address in the VRAM I/O ?
Does the vdp go mad?
to wirite registers I have to send as second byte to port 99h the values 1,0,r,r,r,r,r,r
to set vram addresses I have to send as second byte to port 99h the values 0,X,a,a,a,a,a,a
where X is 1 for writing vram and 0 for reading vram
so, as i have to set the vram addresses for writing
what if I send as second byte to port 99h the values 1,1,a,a,a,a,a,a ?
I'm pretty sure it will not work... Try something like:
PSET:
EX DE,HL
LD (HL),D
LD A,E
AND 7
LD B,A
XOR 6
XOR E
RRD
OUT (#99),A
LD E,A
LD A,(HL)
RLCA
LD D,A
AND #7F
OUT (#99),A
LD H,#D8
LD A,(DE)
LD L,B
OR (HL)
LD (DE),A
OUT (#98),A
RET
(Note internal table address changed to #D800)
