Skip to content

Instantly share code, notes, and snippets.

@AshishBhattarai
Last active November 22, 2024 18:21
Show Gist options
  • Save AshishBhattarai/7cabbba3144e24b95e12b86a33f32647 to your computer and use it in GitHub Desktop.
Save AshishBhattarai/7cabbba3144e24b95e12b86a33f32647 to your computer and use it in GitHub Desktop.
Two-Level Segregated Fit memory allocator
const std = @import("std");
const Allocator = std.mem.Allocator;
const MemoryPool = std.heap.MemoryPool;
pub fn bit_scan_msb(mask: usize) u8 {
return 63 - @clz(mask);
}
pub fn bit_scan_lsb(mask: usize) u8 {
return @ctz(mask);
}
pub fn is_pow_two(num: usize) bool {
return (num & (num - 1)) == 0 and num > 1;
}
/// Two-Level Segregated Fit memory allocator
/// https://ricefields.me/2024/04/20/tlsf-allocator.html
///
/// Ideally you would store the free-list nodes as header at the start of each memory block.
/// This implement doesn't do so and performs additional memory allocation for the linked list nodes,
/// because its meant to manage GPU device memory.
pub const TSFLAllocHandle = *opaque {};
const TSFLAllocator = struct {
const linear: u8 = 7; // log2(min_alloc)
const sub_bin: u8 = 5; // log2(sub_bin_cnt)
const bin_count: u32 = 64 - linear; // 64 classes
const sub_bin_count: u32 = 1 << sub_bin;
const min_alloc_size: u32 = 1 << linear;
const block_count: u32 = ((bin_count - 1) * sub_bin_count) + 1; // 0th bin is used for linear allocation (< min_alloc)
const min_alignment = linear - sub_bin; // linear sub-bin block size (0th class)
const BlockMemoryPool = MemoryPool(Block);
allocator: MemoryPool(Block),
blocks: [block_count]?*Block,
bin_bitmap: u32,
sub_bin_bitmap: [bin_count]u32,
num_allocation: usize,
num_freeblock: usize,
pub fn init(allocator: Allocator) !TSFLAllocator {
std.debug.assert(linear > sub_bin);
const pool_allocator = try BlockMemoryPool.initPreheated(allocator, 100);
return .{
.allocator = pool_allocator,
.blocks = [_]?*Block{null} ** block_count,
.bin_bitmap = 0,
.sub_bin_bitmap = [_]u32{0} ** bin_count,
.num_allocation = 0,
.num_freeblock = 0,
};
}
pub fn deinit(self: *TSFLAllocator) void {
const freed = self.allocator.reset(.free_all);
std.debug.assert(freed);
}
pub fn alloc(self: *TSFLAllocator, size: usize, alignment: usize) !TSFLAllocHandle {
std.debug.assert(is_pow_two(alignment) and size >= min_alignment);
const block_map = try self.findFreeBlock(size);
const block = self.blocks[block_map.idx].?;
removeFreeBlock(self, block, block_map);
const maybe_split_block = try useFreeBlock(&self.allocator, block, size, alignment);
if (maybe_split_block) |split_block| self.insertFreeBlock(split_block);
self.num_allocation += 1;
return @ptrCast(block);
}
pub fn free(self: *TSFLAllocator, handle: TSFLAllocHandle) void {
const block: *Block = @ptrCast(@alignCast(handle));
freeUsedBlock(block);
self.mergeFreeBlock(block);
self.insertFreeBlock(block);
self.num_allocation -= 1;
}
inline fn freeUsedBlock(block: *Block) void { // return was_merged
std.debug.assert(!block.isFree());
block.offset -= block.next_free.adjustment;
block.markFree();
}
fn mergeFreeBlock(self: *TSFLAllocator, block: *Block) void {
std.debug.assert(block.isFree());
if (block.prev_physical != null and block.prev_physical.?.isFree()) {
const prev_physical = block.prev_physical.?;
self.removeFreeBlockWSize(prev_physical);
// absorb prev
block.offset = prev_physical.offset;
block.size += prev_physical.size;
block.prev_physical = prev_physical.prev_physical;
if (prev_physical.prev_physical) |pre_prev| {
pre_prev.next_physical = block;
}
self.allocator.destroy(prev_physical);
}
// merge next
if (block.next_physical != null and block.next_physical.?.isFree()) {
const next_physical = block.next_physical.?;
self.removeFreeBlockWSize(next_physical);
// absorb next
block.size += next_physical.size;
block.next_physical = next_physical.next_physical;
if (next_physical.next_physical) |next_next| {
next_next.prev_physical = block;
}
self.allocator.destroy(next_physical);
}
}
fn useFreeBlock(allocator: *BlockMemoryPool, block: *Block, size: usize, alignment: usize) !?*Block {
std.debug.assert(block.isFree());
// adjust alignment and size
const aligned_offset = std.mem.alignForward(usize, block.offset, alignment);
const adjustment: u8 = @intCast(aligned_offset - block.offset);
const size_with_adjustment = size + adjustment;
std.debug.assert(aligned_offset - block.offset < std.math.maxInt(u8));
std.debug.assert(size_with_adjustment <= block.size);
var maybe_new_block: ?*Block = null;
// trimmable
if (block.size >= size_with_adjustment + min_alignment) {
const new_block = try allocator.create();
new_block.* = Block{
.size = block.size - size_with_adjustment,
.offset = block.offset + size_with_adjustment,
};
maybe_new_block = new_block;
// physical links
if (block.next_physical) |next_physical| {
next_physical.prev_physical = new_block;
new_block.next_physical = next_physical;
}
new_block.prev_physical = block;
block.next_physical = new_block;
}
block.offset = aligned_offset;
block.size = size;
block.markUsed(adjustment);
return maybe_new_block;
}
fn insertFreeBlock(self: *TSFLAllocator, block: *Block) void {
const map = binmap_down(block.size);
const idx = map.bin_idx * sub_bin_count + map.sub_bin_idx;
const current = self.blocks[idx];
block.prev_free = .{ .ptr = null };
block.next_free = .{ .ptr = current };
if (current) |curr| {
curr.prev_free = .{ .ptr = block };
}
self.blocks[idx] = block;
self.bin_bitmap |= @as(u32, 1) << @intCast(map.bin_idx);
self.sub_bin_bitmap[map.bin_idx] |= @as(u32, 1) << @intCast(map.sub_bin_idx);
self.num_freeblock += 1;
}
fn removeFreeBlock(self: *TSFLAllocator, block: *Block, block_map: BlockMap) void {
std.debug.assert(block.isFree()); // assert(is_free)
const next = block.next_free;
const prev = block.prev_free;
if (next.ptr) |n| n.prev_free = prev;
if (prev.ptr) |p| p.next_free = next;
if (self.blocks[block_map.idx] == block) {
// if head of free-list
if (next.ptr == null) {
// if empty next, unset bitmap
self.sub_bin_bitmap[block_map.bin_idx] &= ~(@as(u32, 1) << @intCast(block_map.sub_bin_idx));
// if sub_bin bitmap is empty, unset bin bitmap
if (self.sub_bin_bitmap[block_map.bin_idx] == 0) self.bin_bitmap &= ~(@as(u32, 1) << @intCast(block_map.bin_idx));
}
}
self.blocks[block_map.idx] = next.ptr;
self.num_freeblock -= 1;
}
fn removeFreeBlockWSize(self: *TSFLAllocator, block: *Block) void {
const map = binmap_down(block.size);
const idx = map.bin_idx * sub_bin_count + map.sub_bin_idx;
const block_map = BlockMap{
.bin_idx = map.bin_idx,
.sub_bin_idx = map.sub_bin_idx,
.rounded_size = map.rounded_size,
.idx = idx,
};
removeFreeBlock(self, block, block_map);
}
fn findFreeBlock(self: TSFLAllocator, size: usize) !BlockMap {
var map = binmap_up(size);
// good fit look up with mapped bin and sub_bin
var sub_bin_bitmap = self.sub_bin_bitmap[map.bin_idx] & (~@as(u32, 0) << @intCast(map.sub_bin_idx));
// not found
if (sub_bin_bitmap == 0) {
// search for next bin
const bin_bitmap = self.bin_bitmap & (~@as(u32, 0) << @intCast(map.bin_idx + 1));
if (bin_bitmap == 0) return error.OutOfFreeBlock;
map.bin_idx = @ctz(bin_bitmap);
// any sub_bin will suffice
sub_bin_bitmap = self.sub_bin_bitmap[map.bin_idx];
}
std.debug.assert(sub_bin_bitmap != 0);
// get index of free block
map.sub_bin_idx = @ctz(sub_bin_bitmap);
const idx = map.bin_idx * sub_bin_count + map.sub_bin_idx;
return BlockMap{
.bin_idx = map.bin_idx,
.sub_bin_idx = map.sub_bin_idx,
.rounded_size = map.rounded_size,
.idx = idx,
};
}
// rounds down the size and maps, for inserts
fn binmap_down(size: usize) struct { bin_idx: u32, sub_bin_idx: u32, rounded_size: usize } {
const bin_idx: u32 = bit_scan_msb(size | min_alloc_size);
const log2_subbin_size: u6 = @intCast(bin_idx - sub_bin);
const sub_bin_idx = size >> log2_subbin_size; // size / block_size
const adjusted_bin_idx: u32 = @intCast((bin_idx - linear) + (sub_bin_idx >> sub_bin));
const adjusted_sub_bin_idx: u32 = @intCast(sub_bin_idx & (sub_bin_count - 1));
const rounded_size = size;
return .{
.bin_idx = adjusted_bin_idx,
.sub_bin_idx = adjusted_sub_bin_idx,
.rounded_size = rounded_size,
};
}
// rounds up the size and maps, for find query
fn binmap_up(size: usize) struct { bin_idx: u32, sub_bin_idx: u32, rounded_size: usize } {
const bin_idx: u32 = bit_scan_msb(size | min_alloc_size);
const log2_subbin_size: u6 = @intCast(bin_idx - sub_bin);
const next_subbin_offset = (@as(u64, 1) << (log2_subbin_size)) - 1; // block_size - 1
const rounded = size +% next_subbin_offset;
const sub_bin_idx = rounded >> log2_subbin_size; // rounded_size / block_size
const adjusted_bin_idx: u32 = @intCast((bin_idx - linear) + (sub_bin_idx >> sub_bin));
const adjusted_sub_bin_idx: u32 = @intCast(sub_bin_idx & (sub_bin_count - 1));
const rounded_size = (rounded) & ~next_subbin_offset;
return .{
.bin_idx = adjusted_bin_idx,
.sub_bin_idx = adjusted_sub_bin_idx,
.rounded_size = rounded_size,
};
}
const BlockMap = struct {
bin_idx: u32,
sub_bin_idx: u32,
rounded_size: usize,
idx: u32,
};
const Block = struct {
offset: usize,
size: usize,
next_free: union { ptr: ?*Block, adjustment: u8 } = .{ .ptr = null },
prev_free: union { ptr: ?*Block } = .{ .ptr = null }, // ptr == self when the block is in-use
next_physical: ?*Block = null,
prev_physical: ?*Block = null,
inline fn isFree(self: *const Block) bool {
return self != self.prev_free.ptr;
}
inline fn markUsed(self: *Block, adjustment: u8) void {
self.prev_free = .{ .ptr = self };
self.next_free = .{ .adjustment = adjustment };
}
inline fn markFree(self: *Block) void {
self.prev_free = .{ .ptr = null };
self.next_free = .{ .ptr = null };
}
};
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment