Skip to content

Instantly share code, notes, and snippets.

@skeeto
Created November 24, 2023 17:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skeeto/2850112a6ffab92cfda22ee5e1c58be5 to your computer and use it in GitHub Desktop.
Save skeeto/2850112a6ffab92cfda22ee5e1c58be5 to your computer and use it in GitHub Desktop.
#define _GNU_SOURCE
#include <assert.h>
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
typedef struct mem_record mem_record_t;
struct mem_record {
mem_record_t *child[2];
mem_record_t *next;
uint64_t in_use_space, in_use_objects, alloc_space, alloc_objects;
uint64_t *call_stack;
uint64_t call_stack_len;
};
typedef struct mem_profile mem_profile_t;
typedef struct {
uint8_t *start;
uint8_t *end;
mem_profile_t *profile;
} arena_t;
struct mem_profile {
mem_record_t *records;
mem_record_t *records_head;
mem_record_t **records_tail;
uint64_t in_use_space, in_use_objects, alloc_space, alloc_objects;
arena_t arena;
};
static void *arena_alloc(arena_t *a, size_t size, size_t align, size_t count);
static uint8_t record_call_stack(uint64_t *dst, uint64_t cap) {
uintptr_t *rbp = __builtin_frame_address(0);
uint64_t len = 0;
while (rbp != 0 && ((uint64_t)rbp & 7) == 0 && *rbp != 0) {
const uintptr_t rip = *(rbp + 1);
rbp = (uintptr_t *)*rbp;
// `rip` points to the return instruction in the caller, once this call is
// done. But: We want the location of the call i.e. the `call xxx`
// instruction, so we subtract one byte to point inside it, which is not
// quite 'at' it, but good enough.
dst[len++] = rip - 1;
if (len >= cap)
return len;
}
return len;
}
static uint64_t hash_call_stack(uint64_t *call_stack, uint64_t call_stack_len) {
uint64_t hash = 0;
for (uint64_t i = 0; i < call_stack_len; i++) {
hash ^= call_stack[i];
hash *= 1111111111111111111;
}
return hash;
}
static mem_record_t *upsert(mem_record_t **r, arena_t *perm, uint64_t *call_stack, uint64_t call_stack_len) {
for (uint64_t h = hash_call_stack(call_stack, call_stack_len); *r; h <<= 1) {
if ((*r)->call_stack_len == call_stack_len &&
memcmp((*r)->call_stack, call_stack, call_stack_len * sizeof(uint64_t)) ==
0) {
return *r;
}
r = &(*r)->child[h>>63];
}
*r = arena_alloc(perm, sizeof(mem_record_t), _Alignof(mem_record_t), 1);
memset(*r, 0, sizeof(mem_record_t));
return *r;
}
static void mem_profile_record_alloc(mem_profile_t *profile,
uint64_t objects_count,
uint64_t bytes_count) {
// Record the call stack by stack walking.
uint64_t call_stack[64] = {0};
uint64_t call_stack_len =
record_call_stack(call_stack, sizeof(call_stack) / sizeof(call_stack[0]));
// Update the sums.
profile->alloc_objects += objects_count;
profile->alloc_space += bytes_count;
profile->in_use_objects += objects_count;
profile->in_use_space += bytes_count;
// Upsert the record.
mem_record_t *r = upsert(&profile->records, &profile->arena, call_stack, call_stack_len);
if (r->call_stack != NULL) {
// Found an existing record, update it.
r->alloc_objects += objects_count;
r->alloc_space += bytes_count;
r->in_use_objects += objects_count;
r->in_use_space += bytes_count;
return;
}
// Not found, insert a new record.
r->alloc_objects = objects_count;
r->alloc_space = bytes_count;
r->in_use_objects = objects_count;
r->in_use_space = bytes_count;
r->call_stack = arena_alloc(&profile->arena, sizeof(uint64_t),
_Alignof(uint64_t), call_stack_len);
memcpy(r->call_stack, call_stack, call_stack_len * sizeof(uint64_t));
r->call_stack_len = call_stack_len;
*profile->records_tail = r;
profile->records_tail = &r->next;
}
static void mem_profile_write(mem_profile_t *profile, FILE *out) {
fprintf(out, "heap profile: %lu: %lu [ %lu: %lu] @ heapprofile\n",
profile->in_use_objects, profile->in_use_space,
profile->alloc_objects, profile->alloc_space);
for (mem_record_t *r = profile->records_head; r; r = r->next) {
fprintf(out, "%lu: %lu [%lu: %lu] @ ", r->in_use_objects, r->in_use_space,
r->alloc_objects, r->alloc_space);
for (uint64_t j = 0; j < r->call_stack_len; j++) {
fprintf(out, "%#lx ", r->call_stack[j]);
}
fputc('\n', out);
}
fputs("\nMAPPED_LIBRARIES:\n", out);
static uint8_t mem[4096] = {0};
int fd = open("/proc/self/maps", O_RDONLY);
assert(fd != -1);
ssize_t read_bytes = read(fd, mem, sizeof(mem));
assert(read_bytes != -1);
close(fd);
fwrite(mem, 1, read_bytes, out);
fflush(out);
}
static void *arena_alloc(arena_t *a, size_t size, size_t align, size_t count) {
size_t available = a->end - a->start;
size_t padding = -(size_t)a->start & (align - 1);
size_t offset = padding + size * count;
if (available < offset) {
fprintf(stderr,
"Out of memory: available=%lu "
"allocation_size=%lu\n",
available, offset);
abort();
}
uint8_t *res = a->start + padding;
a->start += offset;
if (a->profile) {
mem_profile_record_alloc(a->profile, count, offset);
}
return (void *)res;
}
static arena_t arena_new(uint64_t cap, mem_profile_t *profile) {
uint8_t *mem = mmap(NULL, cap, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
arena_t arena = {
.profile = profile,
.start = mem,
.end = mem + cap,
};
return arena;
}
void b(int n, arena_t *arena) {
arena_alloc(arena, sizeof(int), _Alignof(int), n);
}
void a(int n, arena_t *arena) {
arena_alloc(arena, sizeof(int), _Alignof(int), n);
b(n, arena);
}
int main() {
arena_t mem_profile_arena = arena_new(1 << 16, NULL);
mem_profile_t mem_profile = {
.arena = mem_profile_arena,
.records_tail = &mem_profile.records_head,
};
arena_t arena = arena_new(1 << 28, &mem_profile);
for (int i = 0; i < 2; i++)
a(2 * 1024 * 1024, &arena);
b(3 * 1024 * 1024, &arena);
mem_profile_write(&mem_profile, stderr);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment