AshishBhattarai/tlsf.zig

## tlsf.zig
const std = @import("std");
const Allocator = std.mem.Allocator;
const MemoryPool = std.heap.MemoryPool;

pub fn bit_scan_msb(mask: usize) u8 {
    return 63 - @clz(mask);
}
pub fn bit_scan_lsb(mask: usize) u8 {
    return @ctz(mask);
}
pub fn is_pow_two(num: usize) bool {
    return (num & (num - 1)) == 0 and num > 1;
}

/// Two-Level Segregated Fit memory allocator
/// https://ricefields.me/2024/04/20/tlsf-allocator.html
///
/// Ideally you would store the free-list nodes as header at the start of each memory block.
/// This implement doesn't do so and performs additional memory allocation for the linked list nodes,
/// because its meant to manage GPU device memory.

pub const TSFLAllocHandle = *opaque {};
const TSFLAllocator = struct {
    const linear: u8 = 7; // log2(min_alloc)
    const sub_bin: u8 = 5; // log2(sub_bin_cnt)
    const bin_count: u32 = 64 - linear; // 64 classes
    const sub_bin_count: u32 = 1 << sub_bin;
    const min_alloc_size: u32 = 1 << linear;
    const block_count: u32 = ((bin_count - 1) * sub_bin_count) + 1; // 0th bin is used for linear allocation (< min_alloc)
    const min_alignment = linear - sub_bin; // linear sub-bin block size (0th class)

    const BlockMemoryPool = MemoryPool(Block);

    allocator: MemoryPool(Block),
    blocks: [block_count]?*Block,
    bin_bitmap: u32,
    sub_bin_bitmap: [bin_count]u32,

    num_allocation: usize,
    num_freeblock: usize,

    pub fn init(allocator: Allocator) !TSFLAllocator {
        std.debug.assert(linear > sub_bin);

        const pool_allocator = try BlockMemoryPool.initPreheated(allocator, 100);

        return .{
            .allocator = pool_allocator,
            .blocks = [_]?*Block{null} ** block_count,
            .bin_bitmap = 0,
            .sub_bin_bitmap = [_]u32{0} ** bin_count,
            .num_allocation = 0,
            .num_freeblock = 0,
        };
    }

    pub fn deinit(self: *TSFLAllocator) void {
        const freed = self.allocator.reset(.free_all);
        std.debug.assert(freed);
    }

    pub fn alloc(self: *TSFLAllocator, size: usize, alignment: usize) !TSFLAllocHandle {
        std.debug.assert(is_pow_two(alignment) and size >= min_alignment);

        const block_map = try self.findFreeBlock(size);
        const block = self.blocks[block_map.idx].?;
        removeFreeBlock(self, block, block_map);

        const maybe_split_block = try useFreeBlock(&self.allocator, block, size, alignment);
        if (maybe_split_block) |split_block| self.insertFreeBlock(split_block);
        self.num_allocation += 1;

        return @ptrCast(block);
    }

    pub fn free(self: *TSFLAllocator, handle: TSFLAllocHandle) void {
        const block: *Block = @ptrCast(@alignCast(handle));
        freeUsedBlock(block);
        self.mergeFreeBlock(block);
        self.insertFreeBlock(block);
        self.num_allocation -= 1;
    }

    inline fn freeUsedBlock(block: *Block) void { // return was_merged
        std.debug.assert(!block.isFree());

        block.offset -= block.next_free.adjustment;
        block.markFree();
    }

    fn mergeFreeBlock(self: *TSFLAllocator, block: *Block) void {
        std.debug.assert(block.isFree());

        if (block.prev_physical != null and block.prev_physical.?.isFree()) {
            const prev_physical = block.prev_physical.?;
            self.removeFreeBlockWSize(prev_physical);
            // absorb prev
            block.offset = prev_physical.offset;
            block.size += prev_physical.size;
            block.prev_physical = prev_physical.prev_physical;
            if (prev_physical.prev_physical) |pre_prev| {
                pre_prev.next_physical = block;
            }

            self.allocator.destroy(prev_physical);
        }

        // merge next
        if (block.next_physical != null and block.next_physical.?.isFree()) {
            const next_physical = block.next_physical.?;
            self.removeFreeBlockWSize(next_physical);
            // absorb next
            block.size += next_physical.size;
            block.next_physical = next_physical.next_physical;
            if (next_physical.next_physical) |next_next| {
                next_next.prev_physical = block;
            }

            self.allocator.destroy(next_physical);
        }
    }

    fn useFreeBlock(allocator: *BlockMemoryPool, block: *Block, size: usize, alignment: usize) !?*Block {
        std.debug.assert(block.isFree());

        // adjust alignment and size
        const aligned_offset = std.mem.alignForward(usize, block.offset, alignment);
        const adjustment: u8 = @intCast(aligned_offset - block.offset);
        const size_with_adjustment = size + adjustment;

        std.debug.assert(aligned_offset - block.offset < std.math.maxInt(u8));
        std.debug.assert(size_with_adjustment <= block.size);

        var maybe_new_block: ?*Block = null;
        // trimmable
        if (block.size >= size_with_adjustment + min_alignment) {
            const new_block = try allocator.create();
            new_block.* = Block{
                .size = block.size - size_with_adjustment,
                .offset = block.offset + size_with_adjustment,
            };
            maybe_new_block = new_block;

            // physical links
            if (block.next_physical) |next_physical| {
                next_physical.prev_physical = new_block;
                new_block.next_physical = next_physical;
            }
            new_block.prev_physical = block;
            block.next_physical = new_block;
        }

        block.offset = aligned_offset;
        block.size = size;
        block.markUsed(adjustment);

        return maybe_new_block;
    }

    fn insertFreeBlock(self: *TSFLAllocator, block: *Block) void {
        const map = binmap_down(block.size);
        const idx = map.bin_idx * sub_bin_count + map.sub_bin_idx;
        const current = self.blocks[idx];

        block.prev_free = .{ .ptr = null };
        block.next_free = .{ .ptr = current };
        if (current) |curr| {
            curr.prev_free = .{ .ptr = block };
        }
        self.blocks[idx] = block;
        self.bin_bitmap |= @as(u32, 1) << @intCast(map.bin_idx);
        self.sub_bin_bitmap[map.bin_idx] |= @as(u32, 1) << @intCast(map.sub_bin_idx);
        self.num_freeblock += 1;
    }

    fn removeFreeBlock(self: *TSFLAllocator, block: *Block, block_map: BlockMap) void {
        std.debug.assert(block.isFree()); // assert(is_free)

        const next = block.next_free;
        const prev = block.prev_free;

        if (next.ptr) |n| n.prev_free = prev;
        if (prev.ptr) |p| p.next_free = next;

        if (self.blocks[block_map.idx] == block) {
            // if head of free-list
            if (next.ptr == null) {
                // if empty next, unset bitmap
                self.sub_bin_bitmap[block_map.bin_idx] &= ~(@as(u32, 1) << @intCast(block_map.sub_bin_idx));
                // if sub_bin bitmap is empty, unset bin bitmap
                if (self.sub_bin_bitmap[block_map.bin_idx] == 0) self.bin_bitmap &= ~(@as(u32, 1) << @intCast(block_map.bin_idx));
            }
        }
        self.blocks[block_map.idx] = next.ptr;
        self.num_freeblock -= 1;
    }

    fn removeFreeBlockWSize(self: *TSFLAllocator, block: *Block) void {
        const map = binmap_down(block.size);
        const idx = map.bin_idx * sub_bin_count + map.sub_bin_idx;
        const block_map = BlockMap{
            .bin_idx = map.bin_idx,
            .sub_bin_idx = map.sub_bin_idx,
            .rounded_size = map.rounded_size,
            .idx = idx,
        };
        removeFreeBlock(self, block, block_map);
    }

    fn findFreeBlock(self: TSFLAllocator, size: usize) !BlockMap {
        var map = binmap_up(size);
        // good fit look up with mapped bin and sub_bin
        var sub_bin_bitmap = self.sub_bin_bitmap[map.bin_idx] & (~@as(u32, 0) << @intCast(map.sub_bin_idx));

        // not found
        if (sub_bin_bitmap == 0) {
            // search for next bin
            const bin_bitmap = self.bin_bitmap & (~@as(u32, 0) << @intCast(map.bin_idx + 1));
            if (bin_bitmap == 0) return error.OutOfFreeBlock;
            map.bin_idx = @ctz(bin_bitmap);
            // any sub_bin will suffice
            sub_bin_bitmap = self.sub_bin_bitmap[map.bin_idx];
        }

        std.debug.assert(sub_bin_bitmap != 0);

        // get index of free block
        map.sub_bin_idx = @ctz(sub_bin_bitmap);
        const idx = map.bin_idx * sub_bin_count + map.sub_bin_idx;

        return BlockMap{
            .bin_idx = map.bin_idx,
            .sub_bin_idx = map.sub_bin_idx,
            .rounded_size = map.rounded_size,
            .idx = idx,
        };
    }

    // rounds down the size and maps, for inserts
    fn binmap_down(size: usize) struct { bin_idx: u32, sub_bin_idx: u32, rounded_size: usize } {
        const bin_idx: u32 = bit_scan_msb(size | min_alloc_size);
        const log2_subbin_size: u6 = @intCast(bin_idx - sub_bin);
        const sub_bin_idx = size >> log2_subbin_size; // size / block_size

        const adjusted_bin_idx: u32 = @intCast((bin_idx - linear) + (sub_bin_idx >> sub_bin));
        const adjusted_sub_bin_idx: u32 = @intCast(sub_bin_idx & (sub_bin_count - 1));
        const rounded_size = size;

        return .{
            .bin_idx = adjusted_bin_idx,
            .sub_bin_idx = adjusted_sub_bin_idx,
            .rounded_size = rounded_size,
        };
    }

    // rounds up the size and maps, for find query
    fn binmap_up(size: usize) struct { bin_idx: u32, sub_bin_idx: u32, rounded_size: usize } {
        const bin_idx: u32 = bit_scan_msb(size | min_alloc_size);
        const log2_subbin_size: u6 = @intCast(bin_idx - sub_bin);
        const next_subbin_offset = (@as(u64, 1) << (log2_subbin_size)) - 1; // block_size - 1
        const rounded = size +% next_subbin_offset;
        const sub_bin_idx = rounded >> log2_subbin_size; // rounded_size / block_size

        const adjusted_bin_idx: u32 = @intCast((bin_idx - linear) + (sub_bin_idx >> sub_bin));
        const adjusted_sub_bin_idx: u32 = @intCast(sub_bin_idx & (sub_bin_count - 1));
        const rounded_size = (rounded) & ~next_subbin_offset;

        return .{
            .bin_idx = adjusted_bin_idx,
            .sub_bin_idx = adjusted_sub_bin_idx,
            .rounded_size = rounded_size,
        };
    }

    const BlockMap = struct {
        bin_idx: u32,
        sub_bin_idx: u32,
        rounded_size: usize,
        idx: u32,
    };

    const Block = struct {
        offset: usize,
        size: usize,

        next_free: union { ptr: ?*Block, adjustment: u8 } = .{ .ptr = null },
        prev_free: union { ptr: ?*Block } = .{ .ptr = null }, // ptr == self when the block is in-use

        next_physical: ?*Block = null,
        prev_physical: ?*Block = null,

        inline fn isFree(self: *const Block) bool {
            return self != self.prev_free.ptr;
        }

        inline fn markUsed(self: *Block, adjustment: u8) void {
            self.prev_free = .{ .ptr = self };
            self.next_free = .{ .adjustment = adjustment };
        }

        inline fn markFree(self: *Block) void {
            self.prev_free = .{ .ptr = null };
            self.next_free = .{ .ptr = null };
        }
    };
};
	const std = @import("std");
	const Allocator = std.mem.Allocator;
	const MemoryPool = std.heap.MemoryPool;

	pub fn bit_scan_msb(mask: usize) u8 {
	return 63 - @clz(mask);
	}
	pub fn bit_scan_lsb(mask: usize) u8 {
	return @ctz(mask);
	}
	pub fn is_pow_two(num: usize) bool {
	return (num & (num - 1)) == 0 and num > 1;
	}

	/// Two-Level Segregated Fit memory allocator
	/// https://ricefields.me/2024/04/20/tlsf-allocator.html
	///
	/// Ideally you would store the free-list nodes as header at the start of each memory block.
	/// This implement doesn't do so and performs additional memory allocation for the linked list nodes,
	/// because its meant to manage GPU device memory.

	pub const TSFLAllocHandle = *opaque {};
	const TSFLAllocator = struct {
	const linear: u8 = 7; // log2(min_alloc)
	const sub_bin: u8 = 5; // log2(sub_bin_cnt)
	const bin_count: u32 = 64 - linear; // 64 classes
	const sub_bin_count: u32 = 1 << sub_bin;
	const min_alloc_size: u32 = 1 << linear;
	const block_count: u32 = ((bin_count - 1) * sub_bin_count) + 1; // 0th bin is used for linear allocation (< min_alloc)
	const min_alignment = linear - sub_bin; // linear sub-bin block size (0th class)

	const BlockMemoryPool = MemoryPool(Block);

	allocator: MemoryPool(Block),
	blocks: [block_count]?*Block,
	bin_bitmap: u32,
	sub_bin_bitmap: [bin_count]u32,

	num_allocation: usize,
	num_freeblock: usize,

	pub fn init(allocator: Allocator) !TSFLAllocator {
	std.debug.assert(linear > sub_bin);

	const pool_allocator = try BlockMemoryPool.initPreheated(allocator, 100);

	return .{
	.allocator = pool_allocator,
	.blocks = [_]?Block{null} * block_count,
	.bin_bitmap = 0,
	.sub_bin_bitmap = [_]u32{0} ** bin_count,
	.num_allocation = 0,
	.num_freeblock = 0,
	};
	}

	pub fn deinit(self: *TSFLAllocator) void {
	const freed = self.allocator.reset(.free_all);
	std.debug.assert(freed);
	}

	pub fn alloc(self: *TSFLAllocator, size: usize, alignment: usize) !TSFLAllocHandle {
	std.debug.assert(is_pow_two(alignment) and size >= min_alignment);

	const block_map = try self.findFreeBlock(size);
	const block = self.blocks[block_map.idx].?;
	removeFreeBlock(self, block, block_map);

	const maybe_split_block = try useFreeBlock(&self.allocator, block, size, alignment);
	if (maybe_split_block) \|split_block\| self.insertFreeBlock(split_block);
	self.num_allocation += 1;

	return @ptrCast(block);
	}

	pub fn free(self: *TSFLAllocator, handle: TSFLAllocHandle) void {
	const block: *Block = @ptrCast(@alignCast(handle));
	freeUsedBlock(block);
	self.mergeFreeBlock(block);
	self.insertFreeBlock(block);
	self.num_allocation -= 1;
	}

	inline fn freeUsedBlock(block: *Block) void { // return was_merged
	std.debug.assert(!block.isFree());

	block.offset -= block.next_free.adjustment;
	block.markFree();
	}

	fn mergeFreeBlock(self: TSFLAllocator, block: Block) void {
	std.debug.assert(block.isFree());

	if (block.prev_physical != null and block.prev_physical.?.isFree()) {
	const prev_physical = block.prev_physical.?;
	self.removeFreeBlockWSize(prev_physical);
	// absorb prev
	block.offset = prev_physical.offset;
	block.size += prev_physical.size;
	block.prev_physical = prev_physical.prev_physical;
	if (prev_physical.prev_physical) \|pre_prev\| {
	pre_prev.next_physical = block;
	}

	self.allocator.destroy(prev_physical);
	}

	// merge next
	if (block.next_physical != null and block.next_physical.?.isFree()) {
	const next_physical = block.next_physical.?;
	self.removeFreeBlockWSize(next_physical);
	// absorb next
	block.size += next_physical.size;
	block.next_physical = next_physical.next_physical;
	if (next_physical.next_physical) \|next_next\| {
	next_next.prev_physical = block;
	}

	self.allocator.destroy(next_physical);
	}
	}

	fn useFreeBlock(allocator: BlockMemoryPool, block: Block, size: usize, alignment: usize) !?*Block {
	std.debug.assert(block.isFree());

	// adjust alignment and size
	const aligned_offset = std.mem.alignForward(usize, block.offset, alignment);
	const adjustment: u8 = @intCast(aligned_offset - block.offset);
	const size_with_adjustment = size + adjustment;

	std.debug.assert(aligned_offset - block.offset < std.math.maxInt(u8));
	std.debug.assert(size_with_adjustment <= block.size);

	var maybe_new_block: ?*Block = null;
	// trimmable
	if (block.size >= size_with_adjustment + min_alignment) {
	const new_block = try allocator.create();
	new_block.* = Block{
	.size = block.size - size_with_adjustment,
	.offset = block.offset + size_with_adjustment,
	};
	maybe_new_block = new_block;

	// physical links
	if (block.next_physical) \|next_physical\| {
	next_physical.prev_physical = new_block;
	new_block.next_physical = next_physical;
	}
	new_block.prev_physical = block;
	block.next_physical = new_block;
	}

	block.offset = aligned_offset;
	block.size = size;
	block.markUsed(adjustment);

	return maybe_new_block;
	}

	fn insertFreeBlock(self: TSFLAllocator, block: Block) void {
	const map = binmap_down(block.size);
	const idx = map.bin_idx * sub_bin_count + map.sub_bin_idx;
	const current = self.blocks[idx];

	block.prev_free = .{ .ptr = null };
	block.next_free = .{ .ptr = current };
	if (current) \|curr\| {
	curr.prev_free = .{ .ptr = block };
	}
	self.blocks[idx] = block;
	self.bin_bitmap \|= @as(u32, 1) << @intCast(map.bin_idx);
	self.sub_bin_bitmap[map.bin_idx] \|= @as(u32, 1) << @intCast(map.sub_bin_idx);
	self.num_freeblock += 1;
	}

	fn removeFreeBlock(self: TSFLAllocator, block: Block, block_map: BlockMap) void {
	std.debug.assert(block.isFree()); // assert(is_free)

	const next = block.next_free;
	const prev = block.prev_free;

	if (next.ptr) \|n\| n.prev_free = prev;
	if (prev.ptr) \|p\| p.next_free = next;

	if (self.blocks[block_map.idx] == block) {
	// if head of free-list
	if (next.ptr == null) {
	// if empty next, unset bitmap
	self.sub_bin_bitmap[block_map.bin_idx] &= ~(@as(u32, 1) << @intCast(block_map.sub_bin_idx));
	// if sub_bin bitmap is empty, unset bin bitmap
	if (self.sub_bin_bitmap[block_map.bin_idx] == 0) self.bin_bitmap &= ~(@as(u32, 1) << @intCast(block_map.bin_idx));
	}
	}
	self.blocks[block_map.idx] = next.ptr;
	self.num_freeblock -= 1;
	}

	fn removeFreeBlockWSize(self: TSFLAllocator, block: Block) void {
	const map = binmap_down(block.size);
	const idx = map.bin_idx * sub_bin_count + map.sub_bin_idx;
	const block_map = BlockMap{
	.bin_idx = map.bin_idx,
	.sub_bin_idx = map.sub_bin_idx,
	.rounded_size = map.rounded_size,
	.idx = idx,
	};
	removeFreeBlock(self, block, block_map);
	}

	fn findFreeBlock(self: TSFLAllocator, size: usize) !BlockMap {
	var map = binmap_up(size);
	// good fit look up with mapped bin and sub_bin
	var sub_bin_bitmap = self.sub_bin_bitmap[map.bin_idx] & (~@as(u32, 0) << @intCast(map.sub_bin_idx));

	// not found
	if (sub_bin_bitmap == 0) {
	// search for next bin
	const bin_bitmap = self.bin_bitmap & (~@as(u32, 0) << @intCast(map.bin_idx + 1));
	if (bin_bitmap == 0) return error.OutOfFreeBlock;
	map.bin_idx = @ctz(bin_bitmap);
	// any sub_bin will suffice
	sub_bin_bitmap = self.sub_bin_bitmap[map.bin_idx];
	}

	std.debug.assert(sub_bin_bitmap != 0);

	// get index of free block
	map.sub_bin_idx = @ctz(sub_bin_bitmap);
	const idx = map.bin_idx * sub_bin_count + map.sub_bin_idx;

	return BlockMap{
	.bin_idx = map.bin_idx,
	.sub_bin_idx = map.sub_bin_idx,
	.rounded_size = map.rounded_size,
	.idx = idx,
	};
	}

	// rounds down the size and maps, for inserts
	fn binmap_down(size: usize) struct { bin_idx: u32, sub_bin_idx: u32, rounded_size: usize } {
	const bin_idx: u32 = bit_scan_msb(size \| min_alloc_size);
	const log2_subbin_size: u6 = @intCast(bin_idx - sub_bin);
	const sub_bin_idx = size >> log2_subbin_size; // size / block_size

	const adjusted_bin_idx: u32 = @intCast((bin_idx - linear) + (sub_bin_idx >> sub_bin));
	const adjusted_sub_bin_idx: u32 = @intCast(sub_bin_idx & (sub_bin_count - 1));
	const rounded_size = size;

	return .{
	.bin_idx = adjusted_bin_idx,
	.sub_bin_idx = adjusted_sub_bin_idx,
	.rounded_size = rounded_size,
	};
	}

	// rounds up the size and maps, for find query
	fn binmap_up(size: usize) struct { bin_idx: u32, sub_bin_idx: u32, rounded_size: usize } {
	const bin_idx: u32 = bit_scan_msb(size \| min_alloc_size);
	const log2_subbin_size: u6 = @intCast(bin_idx - sub_bin);
	const next_subbin_offset = (@as(u64, 1) << (log2_subbin_size)) - 1; // block_size - 1
	const rounded = size +% next_subbin_offset;
	const sub_bin_idx = rounded >> log2_subbin_size; // rounded_size / block_size

	const adjusted_bin_idx: u32 = @intCast((bin_idx - linear) + (sub_bin_idx >> sub_bin));
	const adjusted_sub_bin_idx: u32 = @intCast(sub_bin_idx & (sub_bin_count - 1));
	const rounded_size = (rounded) & ~next_subbin_offset;

	return .{
	.bin_idx = adjusted_bin_idx,
	.sub_bin_idx = adjusted_sub_bin_idx,
	.rounded_size = rounded_size,
	};
	}

	const BlockMap = struct {
	bin_idx: u32,
	sub_bin_idx: u32,
	rounded_size: usize,
	idx: u32,
	};

	const Block = struct {
	offset: usize,
	size: usize,

	next_free: union { ptr: ?*Block, adjustment: u8 } = .{ .ptr = null },
	prev_free: union { ptr: ?*Block } = .{ .ptr = null }, // ptr == self when the block is in-use

	next_physical: ?*Block = null,
	prev_physical: ?*Block = null,

	inline fn isFree(self: *const Block) bool {
	return self != self.prev_free.ptr;
	}

	inline fn markUsed(self: *Block, adjustment: u8) void {
	self.prev_free = .{ .ptr = self };
	self.next_free = .{ .adjustment = adjustment };
	}

	inline fn markFree(self: *Block) void {
	self.prev_free = .{ .ptr = null };
	self.next_free = .{ .ptr = null };
	}
	};
	};