Skip to content

Instantly share code, notes, and snippets.

@marccgk
Last active February 26, 2023 10:28
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marccgk/f44a4008bfad6dec18079f3b9b50038c to your computer and use it in GitHub Desktop.
Save marccgk/f44a4008bfad6dec18079f3b9b50038c to your computer and use it in GitHub Desktop.
Virtual Memory Linear Arena optimizations and tests
#include <stdint.h>
typedef signed char S8;
typedef signed short S16;
typedef signed int S32;
typedef signed long long S64;
typedef unsigned char U8;
typedef unsigned short U16;
typedef unsigned int U32;
typedef unsigned long long U64;
#define cast(type) (type)
#define ASSERT(expression) \
do { \
if (!(expression)) { \
*(volatile S32 *)0 = 0; \
} \
} while (0)
#define SIZE_B(b) (b)
#define SIZE_KB(kb) SIZE_B((kb)*1024ull)
#define SIZE_MB(mb) SIZE_KB((mb)*1024ull)
#define SIZE_GB(gb) SIZE_MB((gb)*1024ull)
#define SIZE_TB(tb) SIZE_GB((tb)*1024ull)
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
// NOTE: default OS page size
#define PAGE_SIZE SIZE_KB(4)
#define UNLOCKED 0
#define LOCKED 1
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
// NOTE: returns previously held value
#define atomic_cas_u32(dest_ptr, cmp, value) \
cast(U32) _InterlockedCompareExchange(cast(long *)(dest_ptr), cast(long)(value), cast(long)(cmp))
// NOTE: returns previously held value
#define atomic_cas_u64(dest_ptr, cmp, value) \
cast(U64) _InterlockedCompareExchange64(cast(__int64 *)(dest_ptr), cast(__int64)(value), cast(__int64)(cmp))
// NOTE: returns previously held value
#define atomic_write_u32(dest_ptr, value) cast(U32) _InterlockedExchange(cast(long *)(dest_ptr), cast(long)(value))
// NOTE: returns previously held value
#define atomic_add_u64(dest_ptr, value) \
cast(U64) _InterlockedExchangeAdd64(cast(__int64 *)(dest_ptr), cast(__int64)(value))
// NOTE: Virtual Memory Linear Arena
typedef struct {
U8 *mem;
volatile U64 bytes_used;
volatile U64 page_count;
volatile U32 lock;
} Virtual_Memory_Linear_Arena;
void vmla_create(Virtual_Memory_Linear_Arena *arena, U64 max_size) {
*arena = (Virtual_Memory_Linear_Arena){0};
arena->mem = VirtualAlloc(0, max_size, MEM_RESERVE, PAGE_NOACCESS);
}
void vmla_reset(Virtual_Memory_Linear_Arena *arena) {
VirtualFree(arena->mem, arena->page_count * PAGE_SIZE, MEM_DECOMMIT);
arena->bytes_used = 0;
arena->page_count = 0;
}
void vmla_destroy(Virtual_Memory_Linear_Arena *arena) {
VirtualFree(arena->mem, 0, MEM_RELEASE);
*arena = (Virtual_Memory_Linear_Arena){0};
}
#define vmla_push_struct(arena, type) cast(type *) vmla_push_size(arena, sizeof(type))
#define vmla_push_array(arena, type, count) cast(type *) vmla_push_size(arena, sizeof(type) * (count))
void *vmla_push_size_v1(Virtual_Memory_Linear_Arena *arena, U64 size) {
void *result = 0;
U8 *memory = arena->mem;
do {
U64 bytes_used = arena->bytes_used;
U64 page_count = arena->page_count;
U64 available = page_count * PAGE_SIZE;
U64 new_bytes_used = bytes_used + size;
if (new_bytes_used <= available) {
if (atomic_cas_u64(&arena->bytes_used, bytes_used, new_bytes_used) == bytes_used) {
result = memory + bytes_used;
}
} else {
if (atomic_cas_u32(&arena->lock, UNLOCKED, LOCKED) == UNLOCKED) {
bytes_used = arena->bytes_used;
page_count = arena->page_count;
available = page_count * PAGE_SIZE;
new_bytes_used = bytes_used + size;
if (new_bytes_used > available) {
U64 pages_needed = MAX(size / PAGE_SIZE, 2); // NOTE: allocate at least 2 pages
U64 new_pages = MAX((page_count * 3 / 2) - page_count,
pages_needed); // NOTE: 1.5 growth factor
VirtualAlloc(memory + available, new_pages * PAGE_SIZE, MEM_COMMIT, PAGE_READWRITE);
atomic_add_u64(&arena->page_count, new_pages);
}
atomic_write_u32(&arena->lock, UNLOCKED);
}
}
} while (!result);
return result;
}
void *vmla_push_size_v2(Virtual_Memory_Linear_Arena *arena, U64 size) {
void *result = 0;
U8 *memory = arena->mem;
U64 bytes_used = atomic_add_u64(&arena->bytes_used, size);
result = memory + bytes_used;
do {
U64 page_count = arena->page_count;
U64 available = page_count * PAGE_SIZE;
U64 new_bytes_used = bytes_used + size;
if (new_bytes_used > available) {
if (atomic_cas_u32(&arena->lock, UNLOCKED, LOCKED) == UNLOCKED) {
bytes_used = arena->bytes_used;
page_count = arena->page_count;
available = page_count * PAGE_SIZE;
new_bytes_used = bytes_used + size;
if (new_bytes_used > available) {
U64 pages_needed = MAX(size / PAGE_SIZE, 2); // NOTE: allocate at least 2 pages
U64 new_pages = MAX((page_count * 3 / 2) - page_count,
pages_needed); // NOTE: 1.5 growth factor
VirtualAlloc(memory + available, new_pages * PAGE_SIZE, MEM_COMMIT, PAGE_READWRITE);
atomic_add_u64(&arena->page_count, new_pages);
}
atomic_write_u32(&arena->lock, UNLOCKED);
}
}
} while (!result);
return result;
}
void *vmla_push_size(Virtual_Memory_Linear_Arena *arena, U64 size) {
return vmla_push_size_v2(arena, size);
}
@echo off
rem Visual Studio 2017
set ProgramFiles(x86)=%ProgramFiles% (x86)
call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\VC\Auxiliary\Build\vcvarsall.bat" x64
rem Visual Studio 2015
rem call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
set warnings=-Wall -WX -wd4100 -wd4101 -wd4127 -wd4189 -wd4201 -wd4204 -wd4255 -wd4505 -wd4701 -wd4702 -wd4710 -wd4711 -wd4820 -wd4514 -wd4626 -wd5027
set common_compiler_flags=-O2 -nologo -Gm- -GR- -EHsc- -Oi -Z7 -Fabuild/ -FAs -Fdbuild/ -Febuild/ -Fobuild/
set common_linker_flags=-opt:ref -incremental:no
if not exist "build" mkdir build
cl %warnings% %common_compiler_flags% vmla_test.c -link %common_linker_flags%
#include "vmla.c"
#include <stdio.h>
#define QUEUE_MAX_SIZE SIZE_TB(1)
#define ITER_COUNT 10000000
#define ITER_SIZE 4
#define THREAD_COUNT 4
U32 g_multithreaded_test = 1;
volatile U64 g_done_count = 0;
U64 g_cycles[THREAD_COUNT];
U64 cpu_cycles(void) {
LARGE_INTEGER qpc;
QueryPerformanceCounter(&qpc);
U64 result = cast(U64)qpc.QuadPart;
return result;
}
void test_vmla_push_data(Virtual_Memory_Linear_Arena *arena) {
for (U32 i = 0; i < ITER_COUNT; ++i) {
vmla_push_size(arena, ITER_SIZE);
}
atomic_add_u64(&g_done_count, 1);
};
typedef struct {
Virtual_Memory_Linear_Arena *arena;
U32 thread_id;
} Test_Data;
DWORD test_vmla_thread_fn(void *param) {
Test_Data *test_data = cast(Test_Data *)param;
Virtual_Memory_Linear_Arena *arena = test_data->arena;
U64 cycles_begin = cpu_cycles();
test_vmla_push_data(arena);
U64 cycles_end = cpu_cycles();
g_cycles[test_data->thread_id] = cycles_end - cycles_begin;
return 0;
}
void test_virtual_memory_linear_arena(void) {
Virtual_Memory_Linear_Arena arena;
vmla_create(&arena, QUEUE_MAX_SIZE);
if (!g_multithreaded_test) {
Test_Data data = { 0, 0 };
data.arena = &arena;
test_vmla_thread_fn(&data);
U64 bytes_used = arena.bytes_used;
U64 expected = ITER_COUNT * ITER_SIZE;
ASSERT(bytes_used == expected);
} else {
Test_Data data[THREAD_COUNT];
for (U32 i = 0; i < THREAD_COUNT; ++i) {
Test_Data *thread_data = data + i;
thread_data->arena = &arena;
thread_data->thread_id = i;
CreateThread(0, 0, test_vmla_thread_fn, thread_data, 0, 0);
}
U64 done = g_done_count;
while (done < THREAD_COUNT) {
Sleep(5);
done = g_done_count;
}
U64 bytes_used = arena.bytes_used;
U64 expected = ITER_COUNT * ITER_SIZE * THREAD_COUNT;
ASSERT(bytes_used == expected);
U64 cycles_total = 0;
for (U32 i = 0; i < THREAD_COUNT; ++i) {
cycles_total += g_cycles[i];
}
cycles_total /= THREAD_COUNT;
printf("Avg cycles / thread = %zu\n", cycles_total);
}
vmla_destroy(&arena);
}
S32 main(S32 argc, S8 **argv) {
test_virtual_memory_linear_arena();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment