Last active
February 26, 2023 10:28
-
-
Save marccgk/f44a4008bfad6dec18079f3b9b50038c to your computer and use it in GitHub Desktop.
Virtual Memory Linear Arena optimizations and tests
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
typedef signed char S8; | |
typedef signed short S16; | |
typedef signed int S32; | |
typedef signed long long S64; | |
typedef unsigned char U8; | |
typedef unsigned short U16; | |
typedef unsigned int U32; | |
typedef unsigned long long U64; | |
#define cast(type) (type) | |
#define ASSERT(expression) \ | |
do { \ | |
if (!(expression)) { \ | |
*(volatile S32 *)0 = 0; \ | |
} \ | |
} while (0) | |
#define SIZE_B(b) (b) | |
#define SIZE_KB(kb) SIZE_B((kb)*1024ull) | |
#define SIZE_MB(mb) SIZE_KB((mb)*1024ull) | |
#define SIZE_GB(gb) SIZE_MB((gb)*1024ull) | |
#define SIZE_TB(tb) SIZE_GB((tb)*1024ull) | |
#define MIN(a, b) (((a) < (b)) ? (a) : (b)) | |
#define MAX(a, b) (((a) > (b)) ? (a) : (b)) | |
// NOTE: default OS page size | |
#define PAGE_SIZE SIZE_KB(4) | |
#define UNLOCKED 0 | |
#define LOCKED 1 | |
#define WIN32_LEAN_AND_MEAN | |
#include <windows.h> | |
// NOTE: returns previously held value | |
#define atomic_cas_u32(dest_ptr, cmp, value) \ | |
cast(U32) _InterlockedCompareExchange(cast(long *)(dest_ptr), cast(long)(value), cast(long)(cmp)) | |
// NOTE: returns previously held value | |
#define atomic_cas_u64(dest_ptr, cmp, value) \ | |
cast(U64) _InterlockedCompareExchange64(cast(__int64 *)(dest_ptr), cast(__int64)(value), cast(__int64)(cmp)) | |
// NOTE: returns previously held value | |
#define atomic_write_u32(dest_ptr, value) cast(U32) _InterlockedExchange(cast(long *)(dest_ptr), cast(long)(value)) | |
// NOTE: returns previously held value | |
#define atomic_add_u64(dest_ptr, value) \ | |
cast(U64) _InterlockedExchangeAdd64(cast(__int64 *)(dest_ptr), cast(__int64)(value)) | |
// NOTE: Virtual Memory Linear Arena | |
typedef struct { | |
U8 *mem; | |
volatile U64 bytes_used; | |
volatile U64 page_count; | |
volatile U32 lock; | |
} Virtual_Memory_Linear_Arena; | |
void vmla_create(Virtual_Memory_Linear_Arena *arena, U64 max_size) { | |
*arena = (Virtual_Memory_Linear_Arena){0}; | |
arena->mem = VirtualAlloc(0, max_size, MEM_RESERVE, PAGE_NOACCESS); | |
} | |
void vmla_reset(Virtual_Memory_Linear_Arena *arena) { | |
VirtualFree(arena->mem, arena->page_count * PAGE_SIZE, MEM_DECOMMIT); | |
arena->bytes_used = 0; | |
arena->page_count = 0; | |
} | |
void vmla_destroy(Virtual_Memory_Linear_Arena *arena) { | |
VirtualFree(arena->mem, 0, MEM_RELEASE); | |
*arena = (Virtual_Memory_Linear_Arena){0}; | |
} | |
#define vmla_push_struct(arena, type) cast(type *) vmla_push_size(arena, sizeof(type)) | |
#define vmla_push_array(arena, type, count) cast(type *) vmla_push_size(arena, sizeof(type) * (count)) | |
void *vmla_push_size_v1(Virtual_Memory_Linear_Arena *arena, U64 size) { | |
void *result = 0; | |
U8 *memory = arena->mem; | |
do { | |
U64 bytes_used = arena->bytes_used; | |
U64 page_count = arena->page_count; | |
U64 available = page_count * PAGE_SIZE; | |
U64 new_bytes_used = bytes_used + size; | |
if (new_bytes_used <= available) { | |
if (atomic_cas_u64(&arena->bytes_used, bytes_used, new_bytes_used) == bytes_used) { | |
result = memory + bytes_used; | |
} | |
} else { | |
if (atomic_cas_u32(&arena->lock, UNLOCKED, LOCKED) == UNLOCKED) { | |
bytes_used = arena->bytes_used; | |
page_count = arena->page_count; | |
available = page_count * PAGE_SIZE; | |
new_bytes_used = bytes_used + size; | |
if (new_bytes_used > available) { | |
U64 pages_needed = MAX(size / PAGE_SIZE, 2); // NOTE: allocate at least 2 pages | |
U64 new_pages = MAX((page_count * 3 / 2) - page_count, | |
pages_needed); // NOTE: 1.5 growth factor | |
VirtualAlloc(memory + available, new_pages * PAGE_SIZE, MEM_COMMIT, PAGE_READWRITE); | |
atomic_add_u64(&arena->page_count, new_pages); | |
} | |
atomic_write_u32(&arena->lock, UNLOCKED); | |
} | |
} | |
} while (!result); | |
return result; | |
} | |
void *vmla_push_size_v2(Virtual_Memory_Linear_Arena *arena, U64 size) { | |
void *result = 0; | |
U8 *memory = arena->mem; | |
U64 bytes_used = atomic_add_u64(&arena->bytes_used, size); | |
result = memory + bytes_used; | |
do { | |
U64 page_count = arena->page_count; | |
U64 available = page_count * PAGE_SIZE; | |
U64 new_bytes_used = bytes_used + size; | |
if (new_bytes_used > available) { | |
if (atomic_cas_u32(&arena->lock, UNLOCKED, LOCKED) == UNLOCKED) { | |
bytes_used = arena->bytes_used; | |
page_count = arena->page_count; | |
available = page_count * PAGE_SIZE; | |
new_bytes_used = bytes_used + size; | |
if (new_bytes_used > available) { | |
U64 pages_needed = MAX(size / PAGE_SIZE, 2); // NOTE: allocate at least 2 pages | |
U64 new_pages = MAX((page_count * 3 / 2) - page_count, | |
pages_needed); // NOTE: 1.5 growth factor | |
VirtualAlloc(memory + available, new_pages * PAGE_SIZE, MEM_COMMIT, PAGE_READWRITE); | |
atomic_add_u64(&arena->page_count, new_pages); | |
} | |
atomic_write_u32(&arena->lock, UNLOCKED); | |
} | |
} | |
} while (!result); | |
return result; | |
} | |
void *vmla_push_size(Virtual_Memory_Linear_Arena *arena, U64 size) { | |
return vmla_push_size_v2(arena, size); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@echo off | |
rem Visual Studio 2017 | |
set ProgramFiles(x86)=%ProgramFiles% (x86) | |
call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\VC\Auxiliary\Build\vcvarsall.bat" x64 | |
rem Visual Studio 2015 | |
rem call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 | |
set warnings=-Wall -WX -wd4100 -wd4101 -wd4127 -wd4189 -wd4201 -wd4204 -wd4255 -wd4505 -wd4701 -wd4702 -wd4710 -wd4711 -wd4820 -wd4514 -wd4626 -wd5027 | |
set common_compiler_flags=-O2 -nologo -Gm- -GR- -EHsc- -Oi -Z7 -Fabuild/ -FAs -Fdbuild/ -Febuild/ -Fobuild/ | |
set common_linker_flags=-opt:ref -incremental:no | |
if not exist "build" mkdir build | |
cl %warnings% %common_compiler_flags% vmla_test.c -link %common_linker_flags% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "vmla.c" | |
#include <stdio.h> | |
#define QUEUE_MAX_SIZE SIZE_TB(1) | |
#define ITER_COUNT 10000000 | |
#define ITER_SIZE 4 | |
#define THREAD_COUNT 4 | |
U32 g_multithreaded_test = 1; | |
volatile U64 g_done_count = 0; | |
U64 g_cycles[THREAD_COUNT]; | |
U64 cpu_cycles(void) { | |
LARGE_INTEGER qpc; | |
QueryPerformanceCounter(&qpc); | |
U64 result = cast(U64)qpc.QuadPart; | |
return result; | |
} | |
void test_vmla_push_data(Virtual_Memory_Linear_Arena *arena) { | |
for (U32 i = 0; i < ITER_COUNT; ++i) { | |
vmla_push_size(arena, ITER_SIZE); | |
} | |
atomic_add_u64(&g_done_count, 1); | |
}; | |
typedef struct { | |
Virtual_Memory_Linear_Arena *arena; | |
U32 thread_id; | |
} Test_Data; | |
DWORD test_vmla_thread_fn(void *param) { | |
Test_Data *test_data = cast(Test_Data *)param; | |
Virtual_Memory_Linear_Arena *arena = test_data->arena; | |
U64 cycles_begin = cpu_cycles(); | |
test_vmla_push_data(arena); | |
U64 cycles_end = cpu_cycles(); | |
g_cycles[test_data->thread_id] = cycles_end - cycles_begin; | |
return 0; | |
} | |
void test_virtual_memory_linear_arena(void) { | |
Virtual_Memory_Linear_Arena arena; | |
vmla_create(&arena, QUEUE_MAX_SIZE); | |
if (!g_multithreaded_test) { | |
Test_Data data = { 0, 0 }; | |
data.arena = &arena; | |
test_vmla_thread_fn(&data); | |
U64 bytes_used = arena.bytes_used; | |
U64 expected = ITER_COUNT * ITER_SIZE; | |
ASSERT(bytes_used == expected); | |
} else { | |
Test_Data data[THREAD_COUNT]; | |
for (U32 i = 0; i < THREAD_COUNT; ++i) { | |
Test_Data *thread_data = data + i; | |
thread_data->arena = &arena; | |
thread_data->thread_id = i; | |
CreateThread(0, 0, test_vmla_thread_fn, thread_data, 0, 0); | |
} | |
U64 done = g_done_count; | |
while (done < THREAD_COUNT) { | |
Sleep(5); | |
done = g_done_count; | |
} | |
U64 bytes_used = arena.bytes_used; | |
U64 expected = ITER_COUNT * ITER_SIZE * THREAD_COUNT; | |
ASSERT(bytes_used == expected); | |
U64 cycles_total = 0; | |
for (U32 i = 0; i < THREAD_COUNT; ++i) { | |
cycles_total += g_cycles[i]; | |
} | |
cycles_total /= THREAD_COUNT; | |
printf("Avg cycles / thread = %zu\n", cycles_total); | |
} | |
vmla_destroy(&arena); | |
} | |
S32 main(S32 argc, S8 **argv) { | |
test_virtual_memory_linear_arena(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment