Skip to content

Instantly share code, notes, and snippets.

@lithdew
Created June 26, 2022 09:26
Show Gist options
  • Save lithdew/c6e96c3819747a85b9a7284afeb67cf4 to your computer and use it in GitHub Desktop.
Save lithdew/c6e96c3819747a85b9a7284afeb67cf4 to your computer and use it in GitHub Desktop.
zig: generational paged sparse set
const std = @import("std");
const sparse = @This();
/// This is an implementation of a paged sparse set that stores the payload in
/// the sparse array.
//
/// A sparse set has a dense and a sparse array. The sparse array is directly
/// indexed by a 64 bit identifier. The sparse element is linked with a dense
/// element, which allows for liveliness checking. The liveliness check itself
/// can be performed by doing (psuedo code):
/// dense[sparse[sparse_id].dense] == sparse_id
//
/// To ensure that the sparse array doesn't have to grow to a large size when
/// using large sparse_id's, the sparse set uses paging. This cuts up the array
/// into several pages of 4096 elements. When an element is set, the sparse set
/// ensures that the corresponding page is created. The page associated with an
/// id is determined by shifting a bit 12 bits to the right.
//
/// The sparse set keeps track of a generation count per id, which is increased
/// each time an id is deleted. The generation is encoded in the returned id.
//
/// This sparse set implementation stores payload in the sparse array, which is
/// not typical. The reason for this is to guarantee that (in combination with
/// paging) the returned payload pointers are stable. This allows for various
/// optimizations in the parts of the framework that uses the sparse set.
//
/// The sparse set has been designed so that new ids can be generated in bulk, in
/// an O(1) operation. The way this works is that once a dense-sparse pair is
/// created, it is never unpaired. Instead it is moved to the end of the dense
/// array.
pub fn Set(comptime T: type) type {
return struct {
/// A sparse page comprised of 4096 elements. Indices into the dense array
/// of handles within the set are 1-indexed, as the 0 index indicates a
/// null index.
pub const Page = struct {
indices: [4096]u32 = [_]u32{0} ** 4096,
items: [4096]T = undefined,
};
const Self = @This();
/// Dense array of handles.
handles: [*]Handle = undefined,
num_handles: u32 = 0,
num_handles_available: u32 = 0,
/// Sparse array of pages.
sparse_pages: [*]?*Page = undefined,
num_sparse_pages: u32 = 0,
/// Number of elements stored in the set.
len: u32 = 0,
/// Deinitialize and free all memory of the set.
pub fn deinit(self: *Self, gpa: std.mem.Allocator) void {
gpa.free(self.handles[0..self.num_handles_available]);
for (self.sparse_pages[0..self.num_sparse_pages]) |page| {
gpa.destroy(page orelse continue);
}
gpa.free(self.sparse_pages[0..self.num_sparse_pages]);
}
/// Invalidates all handles in the set while retaining all allocated
/// memory used to store handles and sparse pages in the set. All
/// invalidated handles can be recycled through recycle().
pub fn clearRetainingCapacity(self: *Self) void {
self.len = 0;
}
/// Check whether or not the given handle is alive and associated with
/// an item T.
pub fn contains(self: *Self, handle: Handle) bool {
return self.getPtr(handle) != null;
}
/// Gets an item T associated with the given handle if the handle is still
/// considered to be alive. Returns null otherwise.
pub fn get(self: *Self, handle: Handle) ?T {
const ptr = self.getPtr(handle) orelse return null;
return ptr.*;
}
/// Gets a pointer to the item T associated with the given handle if the handle is still
/// considered to be alive. Returns null otherwise.
pub fn getPtr(self: *Self, handle: Handle) ?*const T {
const page_index = handle.getPageIndex();
if (page_index >= self.num_sparse_pages) {
return null;
}
const page = self.sparse_pages[page_index] orelse return null;
if (page.indices[handle.getPageOffset()] == 0 or page.indices[handle.getPageOffset()] - 1 >= self.len) {
return null;
}
if (self.handles[page.indices[handle.getPageOffset()] - 1].data.managed.generation != handle.data.managed.generation) {
return null;
}
return &page.items[handle.getPageOffset()];
}
/// Provides handles that may be reused for the insertion of new items into the set. A
/// handle is considered to be reusable if it was previously deleted from the set.
pub fn recycle(self: *Self) ?Handle {
if (self.len >= self.num_handles) {
return null;
}
return self.handles[self.len];
}
/// Associates an item T to the given handle in the set. Panics if the handle is
/// no longer alive, which is determined based on the generation count encoded in
/// the handle, and the generation count that is tracked for the handle in the
/// set.
pub fn put(self: *Self, gpa: std.mem.Allocator, handle: Handle, item: T) !void {
const page_index = handle.getPageIndex();
const page_offset = handle.getPageOffset();
try self.ensureSparsePageIndexAvailable(gpa, page_index);
const page = self.sparse_pages[page_index] orelse page: {
const page = try gpa.create(Page);
page.* = .{};
self.sparse_pages[page_index] = page;
break :page page;
};
if (page.indices[page_offset] != 0) {
if (page.indices[page_offset] - 1 == self.len) {
self.len += 1;
} else if (page.indices[page_offset] - 1 > self.len) {
const unused_handle = self.handles[self.len];
const unused_page = self.sparse_pages[unused_handle.getPageIndex()] orelse unreachable;
self.handles[self.len] = self.handles[page.indices[page_offset] - 1];
self.handles[page.indices[page_offset] - 1] = unused_handle;
unused_page.indices[unused_handle.getPageOffset()] = page.indices[page_offset];
page.indices[page_offset] = self.len + 1;
self.len += 1;
}
if (self.handles[page.indices[page_offset] - 1].data.managed.generation != handle.data.managed.generation) {
std.debug.panic("BUG: Expected generation {d} when inserting ID {d} with item {any}, but got generation {d}.", .{
self.handles[page.indices[page_offset] - 1].data.managed.generation,
handle.id,
item,
handle.data.managed.generation,
});
}
} else {
try self.ensureNumHandlesAvailable(gpa, self.num_handles + 1);
if (self.len < self.num_handles) {
const unused_handle = self.handles[self.len];
const unused_page = self.sparse_pages[unused_handle.getPageIndex()] orelse unreachable;
self.handles[self.num_handles] = unused_handle;
unused_page.indices[unused_handle.getPageOffset()] = self.num_handles + 1;
}
self.handles[self.len] = handle;
page.indices[page_offset] = self.len + 1;
self.num_handles += 1;
self.len += 1;
}
page.items[page_offset] = item;
}
/// Ensures that enough memory is allocated to store the given number of handles.
fn ensureNumHandlesAvailable(self: *Self, gpa: std.mem.Allocator, num_handles: u32) !void {
var better_capacity = self.num_handles_available;
if (better_capacity >= num_handles) {
return;
}
while (true) {
better_capacity += better_capacity / 2 + 8;
if (better_capacity >= num_handles) {
break;
}
}
const handles = try gpa.reallocAtLeast(self.handles[0..self.num_handles_available], better_capacity);
self.handles = handles.ptr;
self.num_handles_available = @intCast(u32, handles.len);
}
/// Ensures that the sparse page at the given index is available.
fn ensureSparsePageIndexAvailable(self: *Self, gpa: std.mem.Allocator, page_index: u32) !void {
if (page_index + 1 < self.num_sparse_pages) {
return;
}
const sparse_pages = try gpa.reallocAtLeast(self.sparse_pages[0..self.num_sparse_pages], page_index + 1);
for (sparse_pages[self.num_sparse_pages..]) |*page| {
page.* = null;
}
self.sparse_pages = sparse_pages.ptr;
self.num_sparse_pages = @intCast(u32, sparse_pages.len);
}
/// Remove an item associated with the given handle from the set. Returns true
/// if the item was successfully removed.
pub fn remove(self: *Self, handle: Handle) bool {
return self.fetchRemove(handle) != null;
}
/// Fetch an item T by its handle and remove it from the set. Returns
/// null if the handle is no longer alive.
pub fn fetchRemove(self: *Self, handle: Handle) ?T {
if (self.len == 0) {
return null;
}
const page_index = handle.getPageIndex();
const page_offset = handle.getPageOffset();
const page = self.sparse_pages[page_index] orelse return null;
if (page.indices[page_offset] == 0) {
return null;
}
if (self.handles[page.indices[page_offset] - 1].data.managed.generation != handle.data.managed.generation) {
return null;
}
self.handles[page.indices[page_offset] - 1].data.managed.generation +%= 1;
if (page.indices[page_offset] - 1 == self.len - 1) {
self.len -= 1;
} else if (page.indices[page_offset] - 1 < self.len - 1) {
const used_handle = self.handles[self.len - 1];
const used_page = self.sparse_pages[used_handle.getPageIndex()] orelse unreachable;
self.handles[self.len - 1] = self.handles[page.indices[page_offset] - 1];
self.handles[page.indices[page_offset] - 1] = used_handle;
used_page.indices[used_handle.getPageOffset()] = page.indices[page_offset];
page.indices[page_offset] = (self.len - 1) + 1;
self.len -= 1;
} else {
return null;
}
return page.items[page_offset];
}
/// Manually override the generation count of a handle stored in the set.
pub fn update(self: *Self, handle: Handle) void {
const page_index = handle.getPageIndex();
const page_offset = handle.getPageOffset();
const page = self.sparse_pages[page_index] orelse return;
if (page.indices[page_offset] == 0) {
return;
}
self.handles[page.indices[page_offset] - 1].data.managed.generation = handle.data.managed.generation;
}
};
}
/// A 64-bit handle which may be associated with data in a sparse set. The
/// lower 32 bits of a handle may be used to store miscellaneous metadata
/// such as the generation count of the handle itself.
pub const Handle = packed struct {
id: u32,
data: packed union {
unmanaged: u32,
managed: packed struct {
generation: u16 = 0,
unmanaged: u16 = 0,
},
},
pub inline fn init(id: u32) Handle {
return .{ .id = id, .data = .{ .managed = .{} } };
}
pub inline fn initWithData(id: u32, data: u32) Handle {
return .{ .id = id, .data = .{ .unmanaged = data } };
}
pub inline fn initWithGeneration(id: u32, generation: u16) Handle {
return .{ .id = id, .data = .{ .managed = .{ .generation = generation } } };
}
pub inline fn getPageIndex(self: Handle) u20 {
return @intCast(u20, self.id >> 12);
}
pub inline fn getPageOffset(self: Handle) u12 {
return @intCast(u12, self.id & 0xfff);
}
pub inline fn getGeneration(self: Handle) u16 {
return self.data.managed.generation;
}
pub inline fn equals(self: Handle, other: Handle) bool {
return self.id == other.id and self.data.unmanaged == other.data.unmanaged;
}
comptime {
if (@bitSizeOf(Handle) != @bitSizeOf(u64)) {
@compileError("Expected handles to be 64 bits.");
}
}
};
test "sparse.Set: strings example" {
var set: sparse.Set([]const u8) = .{};
defer set.deinit(std.testing.allocator);
const Strings = struct {
set: *sparse.Set([]const u8),
id: u32 = 0,
pub fn create(self: *@This()) sparse.Handle {
if (self.set.recycle()) |handle| {
return handle;
}
const handle = Handle.init(self.id);
self.id +%= 1;
return handle;
}
pub fn append(self: *@This(), gpa: std.mem.Allocator, item: []const u8) !Handle {
const handle = self.create();
try self.set.put(gpa, handle, item);
return handle;
}
};
var strings: Strings = .{ .set = &set };
var handle_a = strings.create();
try std.testing.expect(sparse.Handle.init(0).equals(handle_a));
var handle_b = strings.create();
try std.testing.expect(sparse.Handle.init(1).equals(handle_b));
try set.put(std.testing.allocator, handle_a, "xxx");
try set.put(std.testing.allocator, handle_b, "yyy");
try std.testing.expect(set.contains(handle_a));
try std.testing.expect(set.contains(handle_b));
try std.testing.expectEqual(@as(u32, 2), set.len);
try std.testing.expectEqualStrings("xxx", set.get(handle_a) orelse unreachable);
try std.testing.expectEqualStrings("yyy", set.get(handle_b) orelse unreachable);
try std.testing.expectEqualStrings("yyy", set.fetchRemove(handle_b) orelse unreachable);
try std.testing.expectEqualStrings("xxx", set.fetchRemove(handle_a) orelse unreachable);
try std.testing.expect(!set.contains(handle_a));
try std.testing.expect(!set.contains(handle_b));
try std.testing.expectEqual(@as(u32, 0), set.len);
try std.testing.expectEqual(@as(?[]const u8, null), set.get(handle_a));
try std.testing.expectEqual(@as(?[]const u8, null), set.get(handle_b));
try std.testing.expectEqual(@as(?[]const u8, null), set.fetchRemove(handle_a));
try std.testing.expectEqual(@as(?[]const u8, null), set.fetchRemove(handle_b));
try std.testing.expect(!set.contains(handle_a));
try std.testing.expect(!set.contains(handle_b));
try std.testing.expectEqual(@as(u32, 0), set.len);
set.clearRetainingCapacity();
handle_a = strings.create();
try std.testing.expect(sparse.Handle.initWithGeneration(0, 1).equals(handle_a));
try set.put(std.testing.allocator, handle_a, "xxx");
try std.testing.expect(set.contains(handle_a));
try std.testing.expect(!set.contains(handle_b));
set.update(sparse.Handle.initWithGeneration(1, 999));
handle_b = strings.create();
try std.testing.expect(sparse.Handle.initWithGeneration(1, 999).equals(handle_b));
try set.put(std.testing.allocator, handle_b, "yyy");
try std.testing.expect(set.contains(handle_a));
try std.testing.expect(set.contains(handle_b));
try std.testing.expect(set.recycle() == null);
try std.testing.expectEqual(@as(u32, 2), set.len);
try std.testing.expectEqualStrings("xxx", set.get(handle_a) orelse unreachable);
try std.testing.expectEqualStrings("yyy", set.get(handle_b) orelse unreachable);
try std.testing.expectEqualStrings("xxx", set.fetchRemove(handle_a) orelse unreachable);
try std.testing.expectEqualStrings("yyy", set.fetchRemove(handle_b) orelse unreachable);
try std.testing.expect(!set.contains(handle_a));
try std.testing.expect(!set.contains(handle_b));
try std.testing.expectEqual(@as(u32, 0), set.len);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment