Created
June 26, 2022 09:26
-
-
Save lithdew/c6e96c3819747a85b9a7284afeb67cf4 to your computer and use it in GitHub Desktop.
zig: generational paged sparse set
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const sparse = @This(); | |
/// This is an implementation of a paged sparse set that stores the payload in | |
/// the sparse array. | |
// | |
/// A sparse set has a dense and a sparse array. The sparse array is directly | |
/// indexed by a 64 bit identifier. The sparse element is linked with a dense | |
/// element, which allows for liveliness checking. The liveliness check itself | |
/// can be performed by doing (psuedo code): | |
/// dense[sparse[sparse_id].dense] == sparse_id | |
// | |
/// To ensure that the sparse array doesn't have to grow to a large size when | |
/// using large sparse_id's, the sparse set uses paging. This cuts up the array | |
/// into several pages of 4096 elements. When an element is set, the sparse set | |
/// ensures that the corresponding page is created. The page associated with an | |
/// id is determined by shifting a bit 12 bits to the right. | |
// | |
/// The sparse set keeps track of a generation count per id, which is increased | |
/// each time an id is deleted. The generation is encoded in the returned id. | |
// | |
/// This sparse set implementation stores payload in the sparse array, which is | |
/// not typical. The reason for this is to guarantee that (in combination with | |
/// paging) the returned payload pointers are stable. This allows for various | |
/// optimizations in the parts of the framework that uses the sparse set. | |
// | |
/// The sparse set has been designed so that new ids can be generated in bulk, in | |
/// an O(1) operation. The way this works is that once a dense-sparse pair is | |
/// created, it is never unpaired. Instead it is moved to the end of the dense | |
/// array. | |
pub fn Set(comptime T: type) type { | |
return struct { | |
/// A sparse page comprised of 4096 elements. Indices into the dense array | |
/// of handles within the set are 1-indexed, as the 0 index indicates a | |
/// null index. | |
pub const Page = struct { | |
indices: [4096]u32 = [_]u32{0} ** 4096, | |
items: [4096]T = undefined, | |
}; | |
const Self = @This(); | |
/// Dense array of handles. | |
handles: [*]Handle = undefined, | |
num_handles: u32 = 0, | |
num_handles_available: u32 = 0, | |
/// Sparse array of pages. | |
sparse_pages: [*]?*Page = undefined, | |
num_sparse_pages: u32 = 0, | |
/// Number of elements stored in the set. | |
len: u32 = 0, | |
/// Deinitialize and free all memory of the set. | |
pub fn deinit(self: *Self, gpa: std.mem.Allocator) void { | |
gpa.free(self.handles[0..self.num_handles_available]); | |
for (self.sparse_pages[0..self.num_sparse_pages]) |page| { | |
gpa.destroy(page orelse continue); | |
} | |
gpa.free(self.sparse_pages[0..self.num_sparse_pages]); | |
} | |
/// Invalidates all handles in the set while retaining all allocated | |
/// memory used to store handles and sparse pages in the set. All | |
/// invalidated handles can be recycled through recycle(). | |
pub fn clearRetainingCapacity(self: *Self) void { | |
self.len = 0; | |
} | |
/// Check whether or not the given handle is alive and associated with | |
/// an item T. | |
pub fn contains(self: *Self, handle: Handle) bool { | |
return self.getPtr(handle) != null; | |
} | |
/// Gets an item T associated with the given handle if the handle is still | |
/// considered to be alive. Returns null otherwise. | |
pub fn get(self: *Self, handle: Handle) ?T { | |
const ptr = self.getPtr(handle) orelse return null; | |
return ptr.*; | |
} | |
/// Gets a pointer to the item T associated with the given handle if the handle is still | |
/// considered to be alive. Returns null otherwise. | |
pub fn getPtr(self: *Self, handle: Handle) ?*const T { | |
const page_index = handle.getPageIndex(); | |
if (page_index >= self.num_sparse_pages) { | |
return null; | |
} | |
const page = self.sparse_pages[page_index] orelse return null; | |
if (page.indices[handle.getPageOffset()] == 0 or page.indices[handle.getPageOffset()] - 1 >= self.len) { | |
return null; | |
} | |
if (self.handles[page.indices[handle.getPageOffset()] - 1].data.managed.generation != handle.data.managed.generation) { | |
return null; | |
} | |
return &page.items[handle.getPageOffset()]; | |
} | |
/// Provides handles that may be reused for the insertion of new items into the set. A | |
/// handle is considered to be reusable if it was previously deleted from the set. | |
pub fn recycle(self: *Self) ?Handle { | |
if (self.len >= self.num_handles) { | |
return null; | |
} | |
return self.handles[self.len]; | |
} | |
/// Associates an item T to the given handle in the set. Panics if the handle is | |
/// no longer alive, which is determined based on the generation count encoded in | |
/// the handle, and the generation count that is tracked for the handle in the | |
/// set. | |
pub fn put(self: *Self, gpa: std.mem.Allocator, handle: Handle, item: T) !void { | |
const page_index = handle.getPageIndex(); | |
const page_offset = handle.getPageOffset(); | |
try self.ensureSparsePageIndexAvailable(gpa, page_index); | |
const page = self.sparse_pages[page_index] orelse page: { | |
const page = try gpa.create(Page); | |
page.* = .{}; | |
self.sparse_pages[page_index] = page; | |
break :page page; | |
}; | |
if (page.indices[page_offset] != 0) { | |
if (page.indices[page_offset] - 1 == self.len) { | |
self.len += 1; | |
} else if (page.indices[page_offset] - 1 > self.len) { | |
const unused_handle = self.handles[self.len]; | |
const unused_page = self.sparse_pages[unused_handle.getPageIndex()] orelse unreachable; | |
self.handles[self.len] = self.handles[page.indices[page_offset] - 1]; | |
self.handles[page.indices[page_offset] - 1] = unused_handle; | |
unused_page.indices[unused_handle.getPageOffset()] = page.indices[page_offset]; | |
page.indices[page_offset] = self.len + 1; | |
self.len += 1; | |
} | |
if (self.handles[page.indices[page_offset] - 1].data.managed.generation != handle.data.managed.generation) { | |
std.debug.panic("BUG: Expected generation {d} when inserting ID {d} with item {any}, but got generation {d}.", .{ | |
self.handles[page.indices[page_offset] - 1].data.managed.generation, | |
handle.id, | |
item, | |
handle.data.managed.generation, | |
}); | |
} | |
} else { | |
try self.ensureNumHandlesAvailable(gpa, self.num_handles + 1); | |
if (self.len < self.num_handles) { | |
const unused_handle = self.handles[self.len]; | |
const unused_page = self.sparse_pages[unused_handle.getPageIndex()] orelse unreachable; | |
self.handles[self.num_handles] = unused_handle; | |
unused_page.indices[unused_handle.getPageOffset()] = self.num_handles + 1; | |
} | |
self.handles[self.len] = handle; | |
page.indices[page_offset] = self.len + 1; | |
self.num_handles += 1; | |
self.len += 1; | |
} | |
page.items[page_offset] = item; | |
} | |
/// Ensures that enough memory is allocated to store the given number of handles. | |
fn ensureNumHandlesAvailable(self: *Self, gpa: std.mem.Allocator, num_handles: u32) !void { | |
var better_capacity = self.num_handles_available; | |
if (better_capacity >= num_handles) { | |
return; | |
} | |
while (true) { | |
better_capacity += better_capacity / 2 + 8; | |
if (better_capacity >= num_handles) { | |
break; | |
} | |
} | |
const handles = try gpa.reallocAtLeast(self.handles[0..self.num_handles_available], better_capacity); | |
self.handles = handles.ptr; | |
self.num_handles_available = @intCast(u32, handles.len); | |
} | |
/// Ensures that the sparse page at the given index is available. | |
fn ensureSparsePageIndexAvailable(self: *Self, gpa: std.mem.Allocator, page_index: u32) !void { | |
if (page_index + 1 < self.num_sparse_pages) { | |
return; | |
} | |
const sparse_pages = try gpa.reallocAtLeast(self.sparse_pages[0..self.num_sparse_pages], page_index + 1); | |
for (sparse_pages[self.num_sparse_pages..]) |*page| { | |
page.* = null; | |
} | |
self.sparse_pages = sparse_pages.ptr; | |
self.num_sparse_pages = @intCast(u32, sparse_pages.len); | |
} | |
/// Remove an item associated with the given handle from the set. Returns true | |
/// if the item was successfully removed. | |
pub fn remove(self: *Self, handle: Handle) bool { | |
return self.fetchRemove(handle) != null; | |
} | |
/// Fetch an item T by its handle and remove it from the set. Returns | |
/// null if the handle is no longer alive. | |
pub fn fetchRemove(self: *Self, handle: Handle) ?T { | |
if (self.len == 0) { | |
return null; | |
} | |
const page_index = handle.getPageIndex(); | |
const page_offset = handle.getPageOffset(); | |
const page = self.sparse_pages[page_index] orelse return null; | |
if (page.indices[page_offset] == 0) { | |
return null; | |
} | |
if (self.handles[page.indices[page_offset] - 1].data.managed.generation != handle.data.managed.generation) { | |
return null; | |
} | |
self.handles[page.indices[page_offset] - 1].data.managed.generation +%= 1; | |
if (page.indices[page_offset] - 1 == self.len - 1) { | |
self.len -= 1; | |
} else if (page.indices[page_offset] - 1 < self.len - 1) { | |
const used_handle = self.handles[self.len - 1]; | |
const used_page = self.sparse_pages[used_handle.getPageIndex()] orelse unreachable; | |
self.handles[self.len - 1] = self.handles[page.indices[page_offset] - 1]; | |
self.handles[page.indices[page_offset] - 1] = used_handle; | |
used_page.indices[used_handle.getPageOffset()] = page.indices[page_offset]; | |
page.indices[page_offset] = (self.len - 1) + 1; | |
self.len -= 1; | |
} else { | |
return null; | |
} | |
return page.items[page_offset]; | |
} | |
/// Manually override the generation count of a handle stored in the set. | |
pub fn update(self: *Self, handle: Handle) void { | |
const page_index = handle.getPageIndex(); | |
const page_offset = handle.getPageOffset(); | |
const page = self.sparse_pages[page_index] orelse return; | |
if (page.indices[page_offset] == 0) { | |
return; | |
} | |
self.handles[page.indices[page_offset] - 1].data.managed.generation = handle.data.managed.generation; | |
} | |
}; | |
} | |
/// A 64-bit handle which may be associated with data in a sparse set. The | |
/// lower 32 bits of a handle may be used to store miscellaneous metadata | |
/// such as the generation count of the handle itself. | |
pub const Handle = packed struct { | |
id: u32, | |
data: packed union { | |
unmanaged: u32, | |
managed: packed struct { | |
generation: u16 = 0, | |
unmanaged: u16 = 0, | |
}, | |
}, | |
pub inline fn init(id: u32) Handle { | |
return .{ .id = id, .data = .{ .managed = .{} } }; | |
} | |
pub inline fn initWithData(id: u32, data: u32) Handle { | |
return .{ .id = id, .data = .{ .unmanaged = data } }; | |
} | |
pub inline fn initWithGeneration(id: u32, generation: u16) Handle { | |
return .{ .id = id, .data = .{ .managed = .{ .generation = generation } } }; | |
} | |
pub inline fn getPageIndex(self: Handle) u20 { | |
return @intCast(u20, self.id >> 12); | |
} | |
pub inline fn getPageOffset(self: Handle) u12 { | |
return @intCast(u12, self.id & 0xfff); | |
} | |
pub inline fn getGeneration(self: Handle) u16 { | |
return self.data.managed.generation; | |
} | |
pub inline fn equals(self: Handle, other: Handle) bool { | |
return self.id == other.id and self.data.unmanaged == other.data.unmanaged; | |
} | |
comptime { | |
if (@bitSizeOf(Handle) != @bitSizeOf(u64)) { | |
@compileError("Expected handles to be 64 bits."); | |
} | |
} | |
}; | |
test "sparse.Set: strings example" { | |
var set: sparse.Set([]const u8) = .{}; | |
defer set.deinit(std.testing.allocator); | |
const Strings = struct { | |
set: *sparse.Set([]const u8), | |
id: u32 = 0, | |
pub fn create(self: *@This()) sparse.Handle { | |
if (self.set.recycle()) |handle| { | |
return handle; | |
} | |
const handle = Handle.init(self.id); | |
self.id +%= 1; | |
return handle; | |
} | |
pub fn append(self: *@This(), gpa: std.mem.Allocator, item: []const u8) !Handle { | |
const handle = self.create(); | |
try self.set.put(gpa, handle, item); | |
return handle; | |
} | |
}; | |
var strings: Strings = .{ .set = &set }; | |
var handle_a = strings.create(); | |
try std.testing.expect(sparse.Handle.init(0).equals(handle_a)); | |
var handle_b = strings.create(); | |
try std.testing.expect(sparse.Handle.init(1).equals(handle_b)); | |
try set.put(std.testing.allocator, handle_a, "xxx"); | |
try set.put(std.testing.allocator, handle_b, "yyy"); | |
try std.testing.expect(set.contains(handle_a)); | |
try std.testing.expect(set.contains(handle_b)); | |
try std.testing.expectEqual(@as(u32, 2), set.len); | |
try std.testing.expectEqualStrings("xxx", set.get(handle_a) orelse unreachable); | |
try std.testing.expectEqualStrings("yyy", set.get(handle_b) orelse unreachable); | |
try std.testing.expectEqualStrings("yyy", set.fetchRemove(handle_b) orelse unreachable); | |
try std.testing.expectEqualStrings("xxx", set.fetchRemove(handle_a) orelse unreachable); | |
try std.testing.expect(!set.contains(handle_a)); | |
try std.testing.expect(!set.contains(handle_b)); | |
try std.testing.expectEqual(@as(u32, 0), set.len); | |
try std.testing.expectEqual(@as(?[]const u8, null), set.get(handle_a)); | |
try std.testing.expectEqual(@as(?[]const u8, null), set.get(handle_b)); | |
try std.testing.expectEqual(@as(?[]const u8, null), set.fetchRemove(handle_a)); | |
try std.testing.expectEqual(@as(?[]const u8, null), set.fetchRemove(handle_b)); | |
try std.testing.expect(!set.contains(handle_a)); | |
try std.testing.expect(!set.contains(handle_b)); | |
try std.testing.expectEqual(@as(u32, 0), set.len); | |
set.clearRetainingCapacity(); | |
handle_a = strings.create(); | |
try std.testing.expect(sparse.Handle.initWithGeneration(0, 1).equals(handle_a)); | |
try set.put(std.testing.allocator, handle_a, "xxx"); | |
try std.testing.expect(set.contains(handle_a)); | |
try std.testing.expect(!set.contains(handle_b)); | |
set.update(sparse.Handle.initWithGeneration(1, 999)); | |
handle_b = strings.create(); | |
try std.testing.expect(sparse.Handle.initWithGeneration(1, 999).equals(handle_b)); | |
try set.put(std.testing.allocator, handle_b, "yyy"); | |
try std.testing.expect(set.contains(handle_a)); | |
try std.testing.expect(set.contains(handle_b)); | |
try std.testing.expect(set.recycle() == null); | |
try std.testing.expectEqual(@as(u32, 2), set.len); | |
try std.testing.expectEqualStrings("xxx", set.get(handle_a) orelse unreachable); | |
try std.testing.expectEqualStrings("yyy", set.get(handle_b) orelse unreachable); | |
try std.testing.expectEqualStrings("xxx", set.fetchRemove(handle_a) orelse unreachable); | |
try std.testing.expectEqualStrings("yyy", set.fetchRemove(handle_b) orelse unreachable); | |
try std.testing.expect(!set.contains(handle_a)); | |
try std.testing.expect(!set.contains(handle_b)); | |
try std.testing.expectEqual(@as(u32, 0), set.len); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment