Skip to content

Instantly share code, notes, and snippets.

@ped7g
Last active February 14, 2022 14:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ped7g/cc65958c69f08564d140ffd14a9ef715 to your computer and use it in GitHub Desktop.
Save ped7g/cc65958c69f08564d140ffd14a9ef715 to your computer and use it in GitHub Desktop.
ZX Spectrum Next example of optimisations using the Z80N extended instructions
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; Author: Ped7g ; (C) 2022 ; license: https://opensource.org/licenses/MIT
; Z80N (ZX Next) assembly, sjasmplus syntax: https://github.com/z00m128/sjasmplus
;
; code-size optimisation based on facebook post with small example showcasing the usage of routines
;
; default config of example is doing the counter-clockwise rotation of buffer to screen,
; flip comment to get clock-wise variant:
computeULAFromVRamAddress EQU computeULAFromVRamAddressCCW
; computeULAFromVRamAddress EQU computeULAFromVRamAddressCW
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; Original facebook post sparking the interest:
/*
I have a 1k video buffer which represents a 32x32 character matrix starting at 0x9000. Characters are 8x8 pixels.
The buffer is rotated 90 degrees clockwise so the character at 0x9000 is the top of the rightmost column, 0x9020 is the top of the second column from the left etc….
This routine should compute the corresponding ULA address on the ZX Spectrum Next, Including the 90 degree rotation anti clockwise so it can be displayed normally… so 0x9000 should be the top left corner of the ULA. 0x9020 the first acharacter on the secong ULA character line.. Can it be optimised?
Using a table of pre-calculated addresses is one option…. Are there others?
; Entry: DE = VRAM Address
; Return:-
;Carry set, HL = Valid ULA Address
;Carry Clear: VRAM Address out of range
computeULAFromVRamAddress
EX DE,HL
LD A,L
AND 0x1F
RLCA
RLCA
RLCA ; * 8 as needs to be pixel coordinate
LD E,A ; E=Y (ZXN X)
LD A,H
SUB 0x90 ; subtract base to get offset
SRL A
RR L
SRL A
RR L
SRL A
RR L
SRL A
RR L
SRL A
RR L ; divide by 32
LD A,L
CP 0x18 ; ZXN only has 24 character lines
RET NC ; out of range
RLCA
RLCA
RLCA : convert to pixel coordinates
LD D,A ;D=X (ZXN Y)
PIXELAD
EX DE,HL
SCF
RET
*/
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; ZX Spectrum Next example, showing the 32x32 map counter-clock-wise rotated (cut to 32x24)
DEVICE ZXSPECTRUMNEXT
ORG $9000
stack_top: ; stack will go downward from $9000
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; 32x32 ASCII map which will be displayed rotated counter-clockwise
VRAM:
;01234567890123456789012345678901; 32 chars
DB "- - - - - - - - - - - - - - - - " ; L00
DB "C-C-C-C-C-C-C-C-C-C-C-C-C-C-C-C-" ; L01
DB "0C0C0C0C0C1C1C1C1C1C2C2C2C2C2C3C" ; L02
DB "00204060800121416181022242628203" ; L03
DB " 1 3 5 7 9 1 3 5 7 9 1 3 5 7 9 1" ; L04
DB " " ; L05
DB " S L " ; L06
DB " N A " ; L07
DB " L SMM S iT +|!|!|!|+ L " ; L08
DB "|0 AUO T nr - - 0|" ; L09
DB " 9 HLT a = / = 9 " ; L10
DB " OT V tn - // - " ; L11
DB " NCO I es = // = " ; L12
DB " L O B S xp - /'_ - L " ; L13
DB "|1 IY I to = /-_ = 1|" ; L14
DB " 4 TLT B s - /-_ - 4 " ; L15
DB " ARA L ei = /;_ = " ; L16
DB " TA E dn - m\\ )/ - " ; L17
DB " L OEE ig = _ '_= L " ; L18
DB "|1 R D R t - m/ ({_- 1|" ; L19
DB " 9 TI O oA = \\;_> = 9 " ; L20
DB " EAS W rS - \\-_ - " ; L21
DB " S C = \\-_ = " ; L22
DB " L IST ( iI - \\'_ - L " ; L23
DB "|2 WWF C sI = \\\\ = 2|" ; L24
DB " 4 -OE O - \\\\ - 4 " ; L25
DB " KRL L fa = \\ = " ; L26
DB " C = ur - - " ; L27
DB " L OPD 0 nt +!|!|!|!+ L " ; L28
DB "|2 LON 8 2|" ; L29
DB " 9 CTA ) 9 " ; L30
DB " " ; L31
ASSERT $9000+32*32 == $
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; rotate buffer CCW 90degree, using transformation of coordinates:
; ULA.X = 8 * VRAM.y
; ULA.Y = 8 * (0x1F - VRAM.x)
;-----------------------------------------------------------------------------------------
; Entry: DE = VRAM address %????'??yy'yyyx'xxxx (any buffer address not affecting coordinate values)
; Modifies: A, B
; Return:
; - carry set, HL = ULA address (72T)
; - carry clear - out of range (44T)
computeULAFromVRamAddressCCW:
ld b,3
bsla de,b ; DE = %???y'yyyy'xxxx'x000
ld a,8*0x1F
sub e ; A = ULA.Y = 8 * (0x1F - VRAM.x)
cp 8*24
ret nc ; cf=0 => VRAM address out of range (ULA.Y >= 192)
ld e,d
bsla de,b ; E = ULA.X = 8 * VRAM.y
ld d,a ; D = ULA.Y
pixelad ; HL = ULA address, cf=1 still from `cp 8*24`
ret
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; rotate buffer CW 90degree, using transformation of coordinates:
; ULA.X = 8 * (0x1F - VRAM.y)
; ULA.Y = 8 * VRAM.x
;-----------------------------------------------------------------------------------------
; Entry: DE = VRAM address %????'??yy'yyyx'xxxx (any buffer address not affecting coordinate values)
; Modifies: A, B
; Return:
; - carry set, HL = ULA address (72T)
; - carry clear - out of range (72T)
computeULAFromVRamAddressCW:
ld b,3
bsla de,b ; DE = %???y'yyyy'xxxx'x000, E = ULA.Y
ld a,d
ld d,e ; D = ULA.Y = 8 * VRAM.x
cpl ; 0x1F - (v & 0x1F) == (~v) & 0x1F (and upper bits are noise)
add a,a
add a,a
add a,a
ld e,a ; E = ULA.X = 8 * (0x1F - VRAM.y)
pixelad
ld a,h ; check validity of resulting address, HL >= 0x5800 had Y >= 192
cp $58 ; cf=1 when address is valid
ret
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; calculate ULA attribute address from ULA pixel address
; Entry: HL = ULA address
; Return: HL = ULA attribute address
computeATTRFromULA:
ld a,h
rra
rra
rra
and 3
or $58
ld h,a
ret
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; print ASCII char from A to ULA address HL with default attribute (using ROM font at $3D00)
printCharFromDeAtHl:
; set attribute byte first
push hl
call computeATTRFromULA
ld (hl),$44 ; bright 1, paper 0, ink 4
pop hl
; print the char itself
ld a,(de) ; char from VRAM to print
push de
ex de,hl
ld hl,$3C00/8
add hl,a ; HL = char_font_address/8
.3 add hl,hl ; HL *= 8 ; ROM font address of char
.8 ldws ; 8x LDWS will draw the char
pop de
ret
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; wait for scanline 192, to time any printing into vertical-retrace period
WaitForScanline192:
; read NextReg $1F - LSB of current raster line (MSB is always zero for line 192 -> ignored)
ld bc,$243B ; TBBLUE_REGISTER_SELECT_P_243B
ld a,$1F ; VIDEO_LINE_LSB_NR_1F
out (c),a ; select NextReg $1F
inc b ; BC = TBBLUE_REGISTER_ACCESS_P_253B
; if already at scanline 192, then wait extra whole frame (for super-fast game loops)
.cantStartAt192:
in a,(c) ; read the raster line LSB
cp 192
jr z,.cantStartAt192
; if not yet at scanline 192, wait for it ... wait for it ...
.waitLoop:
in a,(c) ; read the raster line LSB
cp 192
jr nz,.waitLoop
ret
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; generate 16bit pseudo random value
; From http://map.grauw.nl/sources/external/z80bits.html#4.2 (Milos "baze" Bazelides Z80 bits)
Rand16:
; Out: HL = pseudo-random number, period 65536
; modifies: A, DE
.s+1: ld de,0 ; seed
ld a,d
ld h,e
ld l,253
or a
sbc hl,de
sbc a,0
sbc hl,de
ld d,0
sbc a,d
ld e,a
sbc hl,de
jr nc,.storeSeed
inc hl
.storeSeed:
ld (.s),hl
ret
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; main example code, displaying the 32x32 map in counter-clockwise rotation
start:
; border is black from NEX file definition, screen should be in ULA mode (by NEX loader reset)
nextreg 7,3 ; 28MHz mode
; display the initial print of VRAM buffer (do all 32x32 chars, handle invalid ULA addresses)
full_print:
ld de,VRAM
.loop:
push de
call computeULAFromVRamAddress ; cf=0 when outside of ULA screen, hl=address otherwise
pop de
call c,printCharFromDeAtHl ; print it, if HL is valid ULA address
inc de
bit 2,d
jr z,.loop ; loop until VRAM+0x400 is reached (end of VRAM buffer)
; now produce random coordinates every frame and change colour of such char and tinker with space chars
main_loop:
call WaitForScanline192
.wasSpace:
call Rand16
ld a,h
and $03
ld h,a ; hl = random 0x000..0x3FF value (random 32x32 coordinates)
ex de,hl
add de,VRAM ; de = random address into VRAM buffer
ld a,(de)
cp ' '
jr z,.wasSpace
call computeULAFromVRamAddress ; HL = ULA address of the random char at DE
jr nc,main_loop ; outside of ULA range, don't change attribute
call computeATTRFromULA
inc (hl) ; increment ink
res 3,(hl) ; force black paper
jr main_loop
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
; create NEX file with the example
SAVENEX OPEN "cw32x32.nex", start, stack_top
SAVENEX AUTO
SAVENEX CLOSE
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment