Skip to content

Instantly share code, notes, and snippets.

@ped7g
Created February 23, 2022 10:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ped7g/c55bfa0d55ca13ce029549636cdd1de5 to your computer and use it in GitHub Desktop.
Save ped7g/c55bfa0d55ca13ce029549636cdd1de5 to your computer and use it in GitHub Desktop.
Z80 asm Erastothenes sieve to calculate prime numbers
; Author: Ped7g ; (C) 2022 ; license: MIT / GPL / Public Domain (pick whichever fits best for you)
; assembled with: https://github.com/z00m128/sjasmplus/
; based on https://github.com/MartinezTorres/z80_babel project, written as comparison
; DEFINE ORG_ADR $5D01 ; using maximum memory in zx48 snapshot -> prime numbers: 4339, last prime number 41491
; DEFINE DO_MAX_BUFFER_CASE ; (4339 primes was with shorter test-code, not last version)
DEFINE ORG_ADR $8080 ; using only uncontended faster memory, but smaller buffers
OPT --syntax=abf
DEVICE ZXSPECTRUM48,ORG_ADR-1
ORG ORG_ADR
test_start:
IFDEF DO_MAX_BUFFER_CASE
case0: ; pushing the limits sieving maximum primes possible, overlapping primes and work buffer up to $FFFF
ld ix,primes
ld de,primes+2
ld bc,0x10000-(primes+2)
call sieve_a_1
nop
ENDIF
case1:
; test with very low work_size == 3 (early exit with single prime "2")
ld ix,primes
ld de,work
ld bc,3
call sieve_a_1
nop ; expected result HL=1, primes[0] = 2
case2:
ld ix,primes
ld de,work
ld bc,2048 ; workload like the original z80_babel test
call sieve_a_1
nop ; expected result HL=0x0135 (309), primes[0] = ..., 0x7F7 (2039)
case3:
ld ix,primes
ld de,work
ld bc,work_size ; 26000 to produce 2860 primes
call sieve_a_1
nop ; expected result HL=0x0B2C (2860), primes[0] = ..., 0x658F (25999)
; check result if it's as expected
ld a,6
out (254),a
push hl
add hl,hl
ld bc,hl ; fake
ld de,0 ; crc add:xor
ld hl,primes
.crc
ld a,e
xor (hl)
ld e,a
xor d
add a,(hl)
ld d,a
inc hl
dec bc
ld a,b
or c
jr nz,.crc
ld a,4
ld hl,0x0DEB
or a
sbc hl,de
jr z,.ok0
ld a,2
.ok0:
pop hl
ld de,0x0B2C
or a
sbc hl,de
jr z,.ok1
ld a,2
.ok1:
ld hl,25999
ld de,(ix-2) ; fake
or a
sbc hl,de
jr z,.ok2
ld a,2
.ok2:
out (254),a
cp 4
jr nz,$
.timeL:
ei
halt
ld a,5
out (254),a
ld ix,primes
ld de,work
ld bc,500 ;DEBUG
call sieve_a_1
xor a
out (254),a
jr .timeL
jr $
/*
uint16_t sieve_c_1(uint16_t *primes, uint8_t *work, uint16_t work_size ) {
for (uint16_t i = 0; i < work_size; i++) {
work[i] = 0x00;
}
uint16_t *p = primes;
*p++ = 2;
for (uint16_t i = 3; i < work_size; i+=2) {
if (work[i]) continue;
*p++ = i;
for (uint16_t j = i + i + i; j < work_size; j += i + i)
work[j] = 1;
}
return p-primes;
}
*/
sieve_a_1:
; IX = primes
; DE = work (can overlap primes starting from primes+2 address)
; BC = work_size
; 108 bytes
; primes[0] = 2;
xor a
ld (ix+0),2
ld (ix+1),a
; if work_size is < 4, return only one prime number 2
ld hl,-4
add hl,bc
jr nc,.small_size
; 4 <= work_size asserted
push ix ; store "primes" into stack for return code, IX becomes "p"
inc ix
inc ix ; p++; (2 is already written)
; clear the work buffer (from 0, like C code, even if starting from 3 is enough)
; for (uint16_t i = 0; i < work_size; i++) work[i] = 0x00;
ld l,e ; HL = work
ld h,d
push hl ; preserve original "work" on stack
inc de
ld (hl),a
dec bc
ldir
; HL = work + work_size - 1 => inverted equals to -(work+work_size) -> useful constant for tests
ld a,l
cpl
ld e,a
ld a,h
cpl
ld d,a ; DE = -(work+work_size)
; outer loop checking numbers
; for (uint16_t i = 3; i < work_size; i+=2) {
ld a,1 ; useful constant to check/fill work buffer
ld c,a ; BC = i-2 = 1 (BC was zero from `ldir`)
.outer_loop:
pop hl
push hl
inc bc ; i += 2
inc bc
add hl,bc ; HL = work+i
jr c,.outer_loop_done
; if (work[i]) continue;
cp (hl) ; set ZF (the HL may be invalid here)
; test (i < work_size) <=> work+i < work+work_size <=> -(work+work_size)+work+i < 0
add hl,de ; preserves ZF
jr c,.outer_loop_done
jr z,.outer_loop ; "continue" part of `if (work[i])`
sbc hl,de ; restore HL back to work+i
; is prime ; HL = work+i, BC = i, DE = -(work+work_size), IX = p
; *p++ = i
ld (ix+0),c
ld (ix+1),b
inc ix
inc ix
; inner loop setting the sieve
; for (uint16_t j = i + i + i; j < work_size; j += i + i) work[j] = 1;
push bc
push de
ex de,hl
add hl,bc
jr c,.inner_setup_ov
add hl,bc
jr c,.inner_setup_ov
ld c,l
ld b,h
ex de,hl ; BC = i+i-(work+work_size), HL = work+i
pop de ; DE = -(work+work_size)
jr .inner_loop_entry
.inner_setup_ov:
pop de
pop bc
jr .outer_loop
.inner_loop:
sbc hl,de ; restore hl to hl+j
ld (hl),a ; work[j] = 1
.inner_loop_entry:
add hl,bc ; hl += i+i + test for work_size, none of this overflow
jr nc,.inner_loop
pop bc
jr .outer_loop
.outer_loop_done:
; return p-primes;
pop de ; throw away "work" pointer
pop hl ; primes
ld a,ixl
sub l
ld l,a
ld a,ixh
sbc a,h ; cf:A:L = (p - primes) * 2; // technically 17b result
rra
rr l
ld h,a ; HL = number of primes
ret
.small_size:
ld hl,1
ret ; return value 1 (number of primes in `primes`)
DISPLAY "sieve_a_1 code size: ",/A,$-sieve_a_1
primes: ds 2*3100,$AA
work: ds 26000,$BB ; enough for 2860 primes, ending with 25999
work_end:
work_size EQU work_end-work
SAVESNA "sieve.sna", test_start ;; DEBUG snapshot
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment