Skip to content

Instantly share code, notes, and snippets.

Created March 1, 2022 13:04
Show Gist options
  • Save ped7g/0c4e94796b474994ed88d0bdd1bf2f25 to your computer and use it in GitHub Desktop.
Save ped7g/0c4e94796b474994ed88d0bdd1bf2f25 to your computer and use it in GitHub Desktop.
Z80 asm quicksort of uint16_t array
; Author: Ped7g ; (C) 2022 ; license: MIT / GPL / Public Domain (pick whichever fits best for you)
; assembled with:
; based on project, written as comparison
; Notes for myself about debugging/verification: in CSpect debugger `SAVE "data.bin",from,length`
; Then `hexdump -v -e '/2 "%u\n"' DATA.BIN > data.txt`. Then compare with `sort -g data.txt`
DEFINE ORG_ADR $8080 ; using only uncontended faster memory, but smaller buffers
OPT --syntax=abf : CSPECTMAP ""
/* // C routine from z80_babel project, used for algorithm in asm code
void quicksort_c(uint16_t *A, uint16_t len) {
if (len < 2) return;
uint16_t pivot = A[len / 2];
uint16_t i, j;
for (i = 0, j = len - 1; ; i++, j--) {
while (A[i] < pivot) i++;
while (A[j] > pivot) j--;
if (i >= j) break;
uint16_t temp = A[i];
A[i] = A[j];
A[j] = temp;
quicksort_c(A, i);
quicksort_c(A + i, len - i);
; Quicksort, inputs (__sdcccall(1) calling convention):
; HL = uint16_t* A (pointer to beginning of array)
; DE = uint16_t len (number of word elements in array)
; modifies: AF, A'F', BC, DE, HL
; WARNING: array can't be aligned to start/end of 64ki address space, like HL == 0x0000, or having last value at 0xFFFE
; WARNING: stack space required is on average about 6*log(len) (depending on the data, in extreme case it may be more)
; convert arguments to HL=A.begin(), DE=A.end() and continue with quicksort_a_impl
ex de,hl
add hl,hl
add hl,de
ex de,hl
; |
; fallthrough into implementation
; |
; v
; Quicksort implementation, inputs:
; HL = uint16_t* A.begin() (pointer to beginning of array)
; DE = uint16_t* A.end() (pointer beyond array)
; modifies: AF, A'F', BC, HL (DE is preserved)
; array must be located within 0x0002..0xFFFD
ld c,l
ld b,h ; BC = A.begin()
; if (len < 2) return; -> if (end <= begin+2) return;
inc hl
inc hl
or a
sbc hl,de ; HL = -(2*len-2), len = (2-HL)/2
ret nc ; case: begin+2 >= end <=> (len < 2)
push de ; preserve A.end() for recursion
push bc ; preserve A.begin() for recursion
; uint16_t pivot = A[len / 2];
rr h
rr l
dec hl
res 0,l
add hl,de
ld a,(hl)
inc hl
ld l,(hl)
ld h,b
ld b,l
ld l,c
ld c,a ; HL = A.begin(), DE = A.end(), BC = pivot
; flip HL/DE meaning, it makes simpler the recursive tail and (A[j] > pivot) test
ex de,hl ; DE = A.begin(), HL = A.end(), BC = pivot
dec de ; but keep "from" address (related to A[i]) at -1 as "default" state
; for (i = 0, j = len - 1; ; i++, j--) { ; DE = (A+i-1).hi, HL = A+j+1
; while (A[j] > pivot) j--;
dec hl
ld a,b
sub (hl)
dec hl ; HL = A+j (finally)
jr c,.find_j ; if cf=1, A[j].hi > pivot.hi
jr nz,.j_found ; if zf=0, A[j].hi < pivot.hi
ld a,c ; if (A[j].hi == pivot.hi) then A[j].lo vs pivot.lo is checked
sub (hl)
jr c,.find_j
; while (A[i] < pivot) i++;
inc de
ld a,(de)
inc de ; DE = (A+i).hi (ahead +0.5 for swap)
sub c
ld a,(de)
sbc a,b
jr c,.find_i ; cf=1 -> A[i] < pivot
; if (i >= j) break; // DE = (A+i).hi, HL = A+j, BC=pivot
sbc hl,de ; cf=0 since `jr c,.find_i`
jr c,.swaps_done
add hl,de ; DE = (A+i).hi, HL = A+j
; swap(A[i], A[j]);
inc hl
ld a,(de)
ex af,af'
ld a,(de)
ex af,af'
ld (hl),a ; Swap(A[i].hi, A[j].hi) done
dec hl
ex af,af'
ld (hl),a ; Swap(A[i].lo, A[j].lo) done
inc bc
inc bc ; pivot value restored (was -=2 by ldd+ldi)
; --j; HL = A+j is A+j+1 for next loop (ready)
; ++i; DE = (A+i).hi is (A+i-1).hi for next loop (ready)
jp .find_next_swap
; i >= j, all elements were already swapped WRT pivot, call recursively for the two sub-parts
dec de ; DE = A+i
; quicksort_c(A, i);
pop hl ; HL = A
call quicksort_a_impl
; quicksort_c(A + i, len - i);
ex de,hl ; HL = A+i
pop de ; DE = end() (and return it preserved)
jp quicksort_a_impl
DISPLAY "quicksort_a code size: ",/A,$-quicksort_a
DISPLAY "quicksort_a_impl code size: ",/A,$-quicksort_a_impl
; test harness to debug and tune the routine
; check empty array and len=1 array (shouldn't touch buffer, just exit)
ld hl,test_array
ld de,0
call quicksort_a
ld hl,test_array
ld de,1
call quicksort_a
; check simple 1-9 array
ld hl,test_array
ld de,test_array.count
call quicksort_a
; sort chunk of ROM and auto-test the result by code
ld a,3
out ($FE),a
ld hl,time_test_array
ld de,test_array
ld bc,2048<<1
; sort it
ld hl,test_array
ld de,2048
call quicksort_a
; check it a bit (just order of elements, and xor checksum)
ld a,2
out ($FE),a ; also xor-CRC seed
ld (.oldsp+1),sp
ld bc,$FF00 | high 2048 ; B = 255, C = 8x256 -> 2047x sbc
ld sp,test_array
pop de
xor e
xor d
ex de,hl
pop de
xor e
xor d
sbc hl,de
jr nc,$ ; bad order, get stuck
djnz .check_order
dec c
jr nz,.check_order
; check xor-CRC against ROM
ld sp,time_test_array
ld bc,high 2048 ; B = 0, C = 8x256
pop de
xor e
xor d
djnz .check_crc
dec c
jr nz,.check_crc
cp 2
jr nz,$ ; bad xor-CRC, get stuck
ld sp,0
ld a,4
out ($FE),a
; sync with interrupt
ld a,1
out ($FE),a
; copy shuffled original data - RED border
ld a,3
out ($FE),a
ld hl,time_test_array
ld de,test_array
ld bc,time_test_array.count<<1
; run the sort
ld a,5
out ($FE),a
ld hl,test_array
ld de,time_test_array.count
call quicksort_a
jr timing_check
; dw 1,2,3,4,5,6,7,8,9
dw 8,6,4,2,1,3,5,7,9
.count: EQU ($-test_array)/2
; time_test_array: EQU $0000
; .count: EQU 2048
time_test_array: EQU $0000
.count: EQU 74
; ^ max array length sortable within single frame (20ms) in CSpect emulator (with IM1)
SAVESNA "qsort.sna", test_start ;; DEBUG snapshot
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment