Created
November 24, 2023 17:46
-
-
Save skeeto/2850112a6ffab92cfda22ee5e1c58be5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _GNU_SOURCE | |
#include <assert.h> | |
#include <fcntl.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/mman.h> | |
#include <unistd.h> | |
typedef struct mem_record mem_record_t; | |
struct mem_record { | |
mem_record_t *child[2]; | |
mem_record_t *next; | |
uint64_t in_use_space, in_use_objects, alloc_space, alloc_objects; | |
uint64_t *call_stack; | |
uint64_t call_stack_len; | |
}; | |
typedef struct mem_profile mem_profile_t; | |
typedef struct { | |
uint8_t *start; | |
uint8_t *end; | |
mem_profile_t *profile; | |
} arena_t; | |
struct mem_profile { | |
mem_record_t *records; | |
mem_record_t *records_head; | |
mem_record_t **records_tail; | |
uint64_t in_use_space, in_use_objects, alloc_space, alloc_objects; | |
arena_t arena; | |
}; | |
static void *arena_alloc(arena_t *a, size_t size, size_t align, size_t count); | |
static uint8_t record_call_stack(uint64_t *dst, uint64_t cap) { | |
uintptr_t *rbp = __builtin_frame_address(0); | |
uint64_t len = 0; | |
while (rbp != 0 && ((uint64_t)rbp & 7) == 0 && *rbp != 0) { | |
const uintptr_t rip = *(rbp + 1); | |
rbp = (uintptr_t *)*rbp; | |
// `rip` points to the return instruction in the caller, once this call is | |
// done. But: We want the location of the call i.e. the `call xxx` | |
// instruction, so we subtract one byte to point inside it, which is not | |
// quite 'at' it, but good enough. | |
dst[len++] = rip - 1; | |
if (len >= cap) | |
return len; | |
} | |
return len; | |
} | |
static uint64_t hash_call_stack(uint64_t *call_stack, uint64_t call_stack_len) { | |
uint64_t hash = 0; | |
for (uint64_t i = 0; i < call_stack_len; i++) { | |
hash ^= call_stack[i]; | |
hash *= 1111111111111111111; | |
} | |
return hash; | |
} | |
static mem_record_t *upsert(mem_record_t **r, arena_t *perm, uint64_t *call_stack, uint64_t call_stack_len) { | |
for (uint64_t h = hash_call_stack(call_stack, call_stack_len); *r; h <<= 1) { | |
if ((*r)->call_stack_len == call_stack_len && | |
memcmp((*r)->call_stack, call_stack, call_stack_len * sizeof(uint64_t)) == | |
0) { | |
return *r; | |
} | |
r = &(*r)->child[h>>63]; | |
} | |
*r = arena_alloc(perm, sizeof(mem_record_t), _Alignof(mem_record_t), 1); | |
memset(*r, 0, sizeof(mem_record_t)); | |
return *r; | |
} | |
static void mem_profile_record_alloc(mem_profile_t *profile, | |
uint64_t objects_count, | |
uint64_t bytes_count) { | |
// Record the call stack by stack walking. | |
uint64_t call_stack[64] = {0}; | |
uint64_t call_stack_len = | |
record_call_stack(call_stack, sizeof(call_stack) / sizeof(call_stack[0])); | |
// Update the sums. | |
profile->alloc_objects += objects_count; | |
profile->alloc_space += bytes_count; | |
profile->in_use_objects += objects_count; | |
profile->in_use_space += bytes_count; | |
// Upsert the record. | |
mem_record_t *r = upsert(&profile->records, &profile->arena, call_stack, call_stack_len); | |
if (r->call_stack != NULL) { | |
// Found an existing record, update it. | |
r->alloc_objects += objects_count; | |
r->alloc_space += bytes_count; | |
r->in_use_objects += objects_count; | |
r->in_use_space += bytes_count; | |
return; | |
} | |
// Not found, insert a new record. | |
r->alloc_objects = objects_count; | |
r->alloc_space = bytes_count; | |
r->in_use_objects = objects_count; | |
r->in_use_space = bytes_count; | |
r->call_stack = arena_alloc(&profile->arena, sizeof(uint64_t), | |
_Alignof(uint64_t), call_stack_len); | |
memcpy(r->call_stack, call_stack, call_stack_len * sizeof(uint64_t)); | |
r->call_stack_len = call_stack_len; | |
*profile->records_tail = r; | |
profile->records_tail = &r->next; | |
} | |
static void mem_profile_write(mem_profile_t *profile, FILE *out) { | |
fprintf(out, "heap profile: %lu: %lu [ %lu: %lu] @ heapprofile\n", | |
profile->in_use_objects, profile->in_use_space, | |
profile->alloc_objects, profile->alloc_space); | |
for (mem_record_t *r = profile->records_head; r; r = r->next) { | |
fprintf(out, "%lu: %lu [%lu: %lu] @ ", r->in_use_objects, r->in_use_space, | |
r->alloc_objects, r->alloc_space); | |
for (uint64_t j = 0; j < r->call_stack_len; j++) { | |
fprintf(out, "%#lx ", r->call_stack[j]); | |
} | |
fputc('\n', out); | |
} | |
fputs("\nMAPPED_LIBRARIES:\n", out); | |
static uint8_t mem[4096] = {0}; | |
int fd = open("/proc/self/maps", O_RDONLY); | |
assert(fd != -1); | |
ssize_t read_bytes = read(fd, mem, sizeof(mem)); | |
assert(read_bytes != -1); | |
close(fd); | |
fwrite(mem, 1, read_bytes, out); | |
fflush(out); | |
} | |
static void *arena_alloc(arena_t *a, size_t size, size_t align, size_t count) { | |
size_t available = a->end - a->start; | |
size_t padding = -(size_t)a->start & (align - 1); | |
size_t offset = padding + size * count; | |
if (available < offset) { | |
fprintf(stderr, | |
"Out of memory: available=%lu " | |
"allocation_size=%lu\n", | |
available, offset); | |
abort(); | |
} | |
uint8_t *res = a->start + padding; | |
a->start += offset; | |
if (a->profile) { | |
mem_profile_record_alloc(a->profile, count, offset); | |
} | |
return (void *)res; | |
} | |
static arena_t arena_new(uint64_t cap, mem_profile_t *profile) { | |
uint8_t *mem = mmap(NULL, cap, PROT_READ | PROT_WRITE, | |
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); | |
arena_t arena = { | |
.profile = profile, | |
.start = mem, | |
.end = mem + cap, | |
}; | |
return arena; | |
} | |
void b(int n, arena_t *arena) { | |
arena_alloc(arena, sizeof(int), _Alignof(int), n); | |
} | |
void a(int n, arena_t *arena) { | |
arena_alloc(arena, sizeof(int), _Alignof(int), n); | |
b(n, arena); | |
} | |
int main() { | |
arena_t mem_profile_arena = arena_new(1 << 16, NULL); | |
mem_profile_t mem_profile = { | |
.arena = mem_profile_arena, | |
.records_tail = &mem_profile.records_head, | |
}; | |
arena_t arena = arena_new(1 << 28, &mem_profile); | |
for (int i = 0; i < 2; i++) | |
a(2 * 1024 * 1024, &arena); | |
b(3 * 1024 * 1024, &arena); | |
mem_profile_write(&mem_profile, stderr); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment