Skip to content

Instantly share code, notes, and snippets.

@pitust
Created March 30, 2022 06:02
Show Gist options
  • Save pitust/cda287b2df5ba3c991774460ff849013 to your computer and use it in GitHub Desktop.
Save pitust/cda287b2df5ba3c991774460ff849013 to your computer and use it in GitHub Desktop.
// it's a simple mark-and-sweep algorithm
// binary layout:
// text, data and bss must be after `loadbase`, then comes the .heap section, then `stacksize` which must come after .heap
#define heapsize 0x10000
#define loadbase 0x200000
#define stacksize 0x4000
#define kMarkFree -1
#define kMarking 1
#define kReleasing 99
static int phase = 1;
typedef unsigned long size_t;
extern char the_stack[];
void todo(const char* str);
static __attribute__((always_inline)) int get_unmarked() { return phase == 1 ? 0 : 2; }
static __attribute__((always_inline)) int get_marked() { return phase == 1 ? 2 : 0; }
static void flip() { phase = phase == 1 ? 2 : 1; }
typedef struct gc_object_header {
struct gc_object_header* next;
int mark, flags;
size_t size;
void(*fini)();
char data[0];
} gc_object_header_t;
__attribute__((aligned(16), section(".heap"))) unsigned char heap_data[heapsize] = {0};
static gc_object_header_t* list;
static gc_object_header_t* freelist;
static void trace(const char* str, void* p) {
while (*str) asm("out %%al,$0xe9"::"a"(*str++));
size_t pp = (size_t)p;
for (int i = 0;i < 16;i++) {
asm("out %%al,$0xe9"::"a"((pp >> 60)["0123456789abcdef"]));
pp <<= 4;
}
asm("out %%al,$0xe9"::"a"('\n'));
}
static void mark_range(void* start, void* stop) {
while (start < stop) {
void* word = *(void**)start;
gc_object_header_t* iter = list;
while (iter) {
if (iter->data == word) {
if (iter->mark == get_unmarked()) {
iter->mark = get_marked();
mark_range(word, word + iter->size - sizeof(gc_object_header_t));
}
}
iter = iter->next;
}
start = (void*)&((void**)start)[1];
}
}
void do_collect() {
flip();
gc_object_header_t* newlist = (void*)0;
void* top = __builtin_frame_address(0);
mark_range(top, &the_stack[stacksize]);
mark_range((void*)loadbase, heap_data);
while (list) {
gc_object_header_t* iter = list;
list = list->next;
if (iter->mark == get_unmarked()) {
if (iter->fini) iter->fini(iter->data);
iter->next = freelist;
iter->fini = (void*)0;
freelist = iter;
} else {
iter->next = newlist;
newlist = iter;
}
}
list = newlist;
}
void wrapped_collect();
asm(".global wrapped_collect" "\n"
"wrapped_collect:" "\n"
"popq %rax" "\n"
"pushq %rbx" "\n"
"pushq %rcx" "\n"
"pushq %rdx" "\n"
"pushq %rsi" "\n"
"pushq %rdi" "\n"
"pushq %rbp" "\n"
"pushq %r8" "\n"
"pushq %r9" "\n"
"pushq %r10" "\n"
"pushq %r11" "\n"
"pushq %r12" "\n"
"pushq %r13" "\n"
"pushq %r14" "\n"
"pushq %r15" "\n"
"pushq %rax" "\n"
"call do_collect" "\n"
"popq %rax" "\n"
"popq %r15" "\n"
"popq %r14" "\n"
"popq %r13" "\n"
"popq %r12" "\n"
"popq %r11" "\n"
"popq %r10" "\n"
"popq %r9" "\n"
"popq %r8" "\n"
"popq %rbp" "\n"
"popq %rdi" "\n"
"popq %rsi" "\n"
"popq %rdx" "\n"
"popq %rcx" "\n"
"popq %rbx" "\n"
"pushq %rax" "\n"
"retq");
void gc_initialize() {
list = (void*)0;
freelist = (void*)heap_data;
freelist->next = (void*)0;
freelist->size = heapsize;
freelist->mark = get_marked();
}
void gc_collect() {
wrapped_collect();
}
void* gc_allocate(size_t size) {
if (!freelist) {
gc_collect();
if (!freelist) todo("error: kernel heap OOM");
}
if (size & 0xf) size += sizeof(gc_object_header_t) - (size & 0xf);
size += sizeof(gc_object_header_t);
int phase = 0;
redo:;
gc_object_header_t* iter = freelist;
gc_object_header_t** iterctl = &freelist;
gc_object_header_t* optimal = (void*)0;
gc_object_header_t** optctl = (void*)0;
while (iter) {
if (iter->size >= size && (!optimal || iter->size < optimal->size)) {
optimal = iter;
optctl = iterctl;
// optimization: once we have a perfect fit, stop looking for a better fit
if (iter->size == size) break;
}
iterctl = &iter->next;
iter = iter->next;
}
if (!optimal || optimal->size < size) {
if (phase) todo("err: heap frag/oom");
gc_collect();
phase = 1;
goto redo;
}
if (optimal->size > size + sizeof(gc_object_header_t)) {
// split the chunk
optimal->size -= size;
gc_object_header_t* newchunk = (void*)(optimal) + optimal->size;
newchunk->next = list;
list = newchunk;
newchunk->mark = get_marked();
newchunk->size = size;
newchunk->flags = 0;
newchunk->fini = (void*)0;
return newchunk->data;
} else {
// entirely move chunk as marked
optimal->mark = get_marked();
*optctl = optimal->next;
optimal->next = list;
list = optimal;
optimal->fini = (void*)0;
optimal->flags = 0;
return optimal->data;
}
}
void gc_set_fini(void* ptr, void(*func)()) {
gc_object_header_t* iter = list;
while (iter) {
if (iter->data == ptr) {
iter->fini = func;
return;
}
iter = iter->next;
}
todo("no fini");
}
void* memcpy(void* dst, const void* src, unsigned long len);
void* gc_reallocate(void* old, int newsz) {
void* np = gc_allocate(newsz);
gc_object_header_t* iter = list;
while (iter) {
if (iter->data == old) memcpy(np, old, newsz < iter->size ? newsz : iter->size);
iter = iter->next;
}
return np;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment