Skip to content

Instantly share code, notes, and snippets.

@lithdew
Created August 31, 2022 11:06
Show Gist options
  • Save lithdew/34dbfa9a16e5753aa5507d6cbd669ca4 to your computer and use it in GitHub Desktop.
Save lithdew/34dbfa9a16e5753aa5507d6cbd669ca4 to your computer and use it in GitHub Desktop.
zig: lexicographic hash map
const std = @import("std");
pub fn LexicographicHashMap(comptime V: type, comptime capacity: u32) type {
const shift = @bitSizeOf(u64) - 1 - std.math.log2_int(u64, capacity) + 1;
const overflow = capacity / 10 + (@bitSizeOf(u64) - 1 - shift + 1) << 1;
const num_entries = capacity + overflow;
return struct {
pub const Entry = packed struct {
key: u32 = 0,
value: V = undefined,
};
const Self = @This();
entries: [num_entries]Entry = [_]Entry{.{}} ** num_entries,
len: u32 = 0,
fn idx(ctx: anytype, a: u32) u32 {
return @intCast(u32, ctx.getHash(a) >> shift);
}
pub fn get(self: *const Self, ctx: anytype, key: u32) ?V {
var index = idx(ctx, key);
while (true) : (index += 1) {
const entry = self.entries[index];
const result = ctx.cmp(entry.key, key);
if (result.compare(.gte)) {
if (result != .eq) {
return null;
}
return entry.value;
}
}
}
pub fn put(self: *Self, ctx: anytype, key: u32, value: V) !void {
const result = try self.getOrPut(ctx, key);
result.value_ptr.* = value;
}
pub fn delete(self: *Self, ctx: anytype, key: u32) ?V {
var index = idx(ctx, key);
while (true) : (index += 1) {
const entry = self.entries[index];
const result = ctx.cmp(entry.key, key);
if (result.compare(.gte)) {
if (result != .eq) {
return null;
}
break;
}
}
const value = self.entries[index].value;
index += 1;
while (index - 1 >= idx(ctx, self.entries[index].key) and !ctx.isEmpty(self.entries[index].key)) : (index += 1) {
self.entries[index - 1] = self.entries[index];
}
self.entries[index - 1] = .{};
self.len -= 1;
return value;
}
pub const GetOrPutResult = struct {
found_existing: bool,
value_ptr: *align(1) V,
};
pub fn getOrPut(self: *Self, ctx: anytype, key: u32) !GetOrPutResult {
if (self.len == capacity) {
return error.OutOfMemory;
}
var it: Self.Entry = .{ .key = key, .value = undefined };
var index = idx(ctx, key);
while (true) : (index += 1) {
const entry = self.entries[index];
const result = ctx.cmp(entry.key, key);
if (result.compare(.gte)) {
if (result == .eq) {
return GetOrPutResult{ .found_existing = true, .value_ptr = &self.entries[index].value };
}
self.entries[index] = it;
if (ctx.isEmpty(entry.key)) {
self.len += 1;
return GetOrPutResult{ .found_existing = false, .value_ptr = &self.entries[index].value };
}
it = entry;
break;
}
}
const inserted_at = index;
index += 1;
while (true) : (index += 1) {
const entry = self.entries[index];
self.entries[index] = it;
if (ctx.isEmpty(entry.key)) {
self.len += 1;
return GetOrPutResult{ .found_existing = false, .value_ptr = &self.entries[inserted_at].value };
}
it = entry;
}
}
};
}
test "LexicographicHashMap" {
var map: LexicographicHashMap(u32, 16) = .{};
var strings: std.ArrayListUnmanaged([]const u8) = .{};
defer {
for (strings.items) |string| {
std.testing.allocator.free(string);
}
strings.deinit(std.testing.allocator);
}
const empty_string = try std.testing.allocator.dupe(u8, "");
errdefer std.testing.allocator.free(empty_string);
try strings.append(std.testing.allocator, empty_string);
var ctx: struct {
strings: *const std.ArrayListUnmanaged([]const u8),
pub fn isEmpty(_: @This(), key: u32) bool {
return key == 0;
}
pub fn cmp(self: @This(), a: u32, b: u32) std.math.Order {
if (a == 0) return .gt;
if (b == 0) return .lt;
if (a == 0 and b == 0) return .eq;
return std.mem.order(u8, self.strings.items[a], self.strings.items[b]);
}
pub fn getHash(self: @This(), key: u32) u64 {
var hash = std.mem.zeroes([8]u8);
std.mem.copy(u8, &hash, self.strings.items[key][0..@minimum(hash.len, self.strings.items[key].len)]);
return std.mem.readIntBig(u64, &hash);
}
} = .{ .strings = &strings };
var rng = std.rand.DefaultPrng.init(0);
comptime var i: usize = 0;
inline while (i < 15) : (i += 1) {
const string = try std.testing.allocator.alloc(u8, 10 + rng.random().uintLessThan(usize, 40));
errdefer std.testing.allocator.free(string);
rng.random().bytes(string);
try strings.append(std.testing.allocator, string);
}
comptime var j: usize = 0;
inline while (j < 15) : (j += 1) {
try map.put(ctx, 1 + j, 100 * (1 + j));
}
// 0: 07d580c33d6fda61fc7b9aec91df0f5c1a5ebe3b8cbf -> 100
// 1: 154eed69ba7ec560cb39dd77e95cc622bf6a7c5a0cced1a56832e1 -> 700
// 2: 20e7fa109f130418fa10acb8e790 -> 400
// 3: 246478ae0beb0f4050bf80 -> 800
// 6: 6899c10632848450fc4daae9 -> 300
// 7: 73be085468b8635802c36b0671a58c0e376a9538d6 -> 1100
// 8: 8216fe5731d82f9614e0ec8c08cc6d85bd4cb21a351787ca848101 -> 1300
// 9: 91bec19c6596e64e46169269775dcba3ed7d688aaa2a009ce29a3d5616 -> 1500
// 10: 9a8df05777c343056e02b55ac79074db59c94b46e64373d8fff08923a0 -> 200
// 11: 9e9d6e690d00da3644108c54609b832aca -> 1200
// 12: a804fcd0d32046a0c39caf301288501d98f635ed7d28be53e108 -> 500
// 13: c057accb24156c21df443a0608eb530a97817748b9 -> 600
// 14: c1e593e1a7894fc57c7039 -> 1400
// 15: cde8d25c8b5475dc13816466f7769ddf41dec0aeb7f82b34 -> 900
// 16: dbd772182a2be12949ec0a648b2d36b6a9a4bbb58e69782eb9af87824b49 -> 1000
for (map.entries) |*entry, k| {
if (ctx.isEmpty(entry.key)) {
continue;
}
std.debug.print("{}: {} -> {}\n", .{ k, std.fmt.fmtSliceHexLower(strings.items[entry.key]), entry.value });
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment