Skip to content

Instantly share code, notes, and snippets.

@brion
Last active May 9, 2022
Embed
What would you like to do?
Quick sketch of a minimal sweep function for a 6502 retro computing GC implementation with fixed-size LISP-style nodes
; Note fixed addresses are illustrative, not real
.org $4000
; Zero page variables
scratch1 = $80
scratch2 = $82
freelist = $84 ; ref to next free node
gc_color = $86 ; current representation of "white" for tricolor marking
heap_start = $8000
heap_end = $c000
gc_tag_mask = $c0 ; top 2 bits are for GC coloring + free marker
gc_tag_free = $00 ; this node is free. validity of freelist node marker is in a lower tag bit
gc_tag_check = $40 ; has been seen from a root or modified during incremental GC
gc_tag_zero = $80 ; represents either 'white' (reapable) or 'black' (live) sets
gc_tag_one = $c0 ; the opposite state
.struct Node
tag .byte
left .word
right .word
.endstruct
; Do a fast, minimal sweep after marking is complete.
; This cannot be interrupted, so must be as fast as
; possible.
;
; Current estimate is:
; - 12 cycles per node (+8 if freed)
; - 14 fixed cycles per unroll of 5 nodes (10x per page)
; - 2 + 41 = 43 fixed cycles per page
; - 61 + 6 = 67 fixed cycles
; - 64 * (12 * 51 + 14 * 10 + 41) + 67
; - 64 * (612 + 140 + 41) + 67
; - 64 * 793 + 40
; - 50,792 total cycles
;
; for a 16 KiB heap (3,264 nodes over 64 pages)
; That's about 28.3 ms on an Atari at 1.79 MHz.
;
; Invariants:
; * there are no remaining nodes in 'check' set
; * all live, reachable nodes are in the 'black' set
; * all unreachable nodes are in the 'white' set
; * gc_color is set to current value for the 'white' set
;
sweep:
; use scratch1 as index
lda #heap_start.lobyte ; 2 cycles
sta scratch1 ; 3 cycles
lda #heap_start.hibyte ; 2 cycles
sta scratch1 + 1 ; 3 cycles
sta sweep_unroll1 + 2 ; 4 cycles
sta sweep_unroll2 + 2 ; 4 cycles
sta sweep_unroll3 + 2 ; 4 cycles
sta sweep_unroll4 + 2 ; 4 cycles
sta sweep_unroll5 + 2 ; 4 cycles
sta sweep_unroll255 + 2 ; 4 cycles
; save some cycles per node with self-modifying code
; (sickos.jpg)
lda gc_color ; 3 cycles
sta sweep_load_opcode1 + 2 ; 4 cycles
sta sweep_load_opcode2 + 2 ; 4 cycles
sta sweep_load_opcode3 + 2 ; 4 cycles
sta sweep_load_opcode4 + 2 ; 4 cycles
sta sweep_load_opcode5 + 2 ; 4 cycles
sta sweep_load_opcode255 + 2 ; 4 cycles
sweep_page:
; Each 256-byte pages contains 51 nodes (5 bytes each)
; One byte per page is wasted, not currently used
; But this makes it much easier to iterate :D
;
; y = 0
ldy #00 ; 2 cycles
; check if the GC tag matches the reaping color
sweep_node:
; Unroll the loop 5x to reduce total cycles
sweep_unroll1:
ldy #00 ; 2 cycles
sweep_load_opcode1:
lda heap_start,y ; 4 cycles - self-modified
and #gc_tag_mask ; 2 cycles
sweep_cmp_opcode1:
cmp #ff ; 2 cycles - self-modified
bne sweep_unroll2 ; 2 cycles
; mark it as free with null free type. it'll be added to the freelist later
lda #0 ; 2 cycles
sta (scratch1),y ; 6 cycles
sweep_unroll2:
ldy #05 ; 2 cycles
sweep_load_opcode2:
lda heap_start,y ; 4 cycles - self-modified
and #gc_tag_mask ; 2 cycles
sweep_cmp_opcode2:
cmp #ff ; 2 cycles - self-modified
bne sweep_unroll3 ; 2 cycles
lda #0 ; 2 cycles
sta (scratch1),y ; 6 cycles
sweep_unroll3:
ldy #10 ; 2 cycles
sweep_load_opcode3:
lda heap_start,y ; 4 cycles - self-modified
and #gc_tag_mask ; 2 cycles
sweep_cmp_opcode3:
cmp #ff ; 2 cycles - self-modified
bne sweep_unroll4 ; 2 cycles
lda #0 ; 2 cycles
sta (scratch1),y ; 6 cycles
sweep_unroll4:
ldy #15 ; 2 cycles
sweep_load_opcode4:
lda heap_start,y ; 4 cycles - self-modified
and #gc_tag_mask ; 2 cycles
sweep_cmp_opcode4:
cmp #ff ; 2 cycles - self-modified
bne sweep_unroll5 ; 2 cycles
lda #0 ; 2 cycles
sta (scratch1),y ; 6 cycles
sweep_unroll5:
ldy #20 ; 2 cycles
sweep_load_opcode5:
lda heap_start,y ; 4 cycles - self-modified
and #gc_tag_mask ; 2 cycles
sweep_cmp_opcode5:
cmp #ff ; 2 cycles - self-modified
bne sweep_next5 ; 2 cycles
lda #0 ; 2 cycles
sta (scratch1),y ; 6 cycles
sweep_next:
; scratch1 += 25
lda scratch1 ; 3 cycles
clc ; 2 cycles
adc #25 ; 2 cycles
sta scratch1 ; 3 cycles
; loop unroll end condition at y == 250
cmp #250 ; 2 cycles
bne sweep_node ; 2 cycles
; one last node :D
sweep_unroll255:
ldy #0 ; 2 cycles
sweep_load_opcode255:
lda heap_start,y ; 4 cycles - self-modified
and #gc_tag_mask ; 2 cycles
sweep_cmp_opcode255:
cmp #ff ; 2 cycles - self-modified
bne sweep_next_page ; 2 cycles
lda #0 ; 2 cycles
sta (scratch1),y ; 6 cycles
sweep_next_page:
; scratch1 += 256
lda #00 ; 2 cycles
sta scratch1 ; 3 cycles
inc scratch1 + 1 ; 5 cycles
lda scratch1 + 1 ; 3 cycles
sta sweep_load_opcode1 + 2 ; 4 cycles
sta sweep_load_opcode2 + 2 ; 4 cycles
sta sweep_load_opcode3 + 2 ; 4 cycles
sta sweep_load_opcode4 + 2 ; 4 cycles
sta sweep_load_opcode5 + 2 ; 4 cycles
sta sweep_load_opcode255 + 2 ; 4 cycles
; end condition; scratch1 != heap_end
cmp #heap_end.hibyte ; 2 cycles
bne sweep_page ; 2 cycles
rts ; 6 cycles
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment