Created
July 4, 2020 15:56
-
-
Save lemmi/8c83db17222d616b6e58ccb6a5f497dd to your computer and use it in GitHub Desktop.
swiss hash map in zig
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const math = std.math; | |
const assert = std.debug.assert; | |
const Allocator = std.mem.Allocator; | |
const testing = std.testing; | |
const Wyhash = std.hash.Wyhash; | |
const autoHash = std.hash.autoHash; | |
const meta = std.meta; | |
pub fn Map(comptime K: type, comptime V: type, comptime hash: fn (key: K) u32, comptime eql: fn (a: K, b: K) bool, comptime groupsize: u32) type { | |
return struct { | |
meta: []Meta, | |
entries: []KV, | |
size: usize, | |
allocator: *Allocator, | |
const KV = struct { | |
key: K, | |
val: V, | |
}; | |
const Self = @This(); | |
const Meta = @Vector(groupsize, i8); | |
const bitmaskType = @Type(std.builtin.TypeInfo{ .Int = .{ .is_signed = false, .bits = groupsize } }); | |
const Ctrl = enum(i8) { | |
Empty = -128, | |
Deleted = -2, | |
Sentinel = -1, | |
_, | |
}; | |
pub fn init(allocator: *Allocator) Self { | |
return Self{ | |
.meta = &[_]Meta{}, | |
.entries = &[_]KV{}, | |
.size = 0, | |
.allocator = allocator, | |
}; | |
} | |
pub fn deinit(self: Self) void { | |
self.allocator.free(self.meta); | |
self.allocator.free(self.entries); | |
} | |
pub fn count(self: Self) usize { | |
return self.size; | |
} | |
fn initCapacity(self: *Self, capacity: u32) !void { | |
assert(math.isPowerOfTwo(capacity)); | |
self.meta = try self.allocator.alloc(Meta, try std.math.divExact(u32, capacity, groupsize)); | |
self.entries = try self.allocator.alloc(KV, capacity); | |
for (self.meta) |*g| { | |
g.* = mSplat(Ctrl.Empty); | |
} | |
} | |
fn mSplat(c: Ctrl) Meta { | |
return @splat(groupsize, @enumToInt(c)); | |
} | |
fn mSplatInt(c: i8) Meta { | |
return @splat(groupsize, c); | |
} | |
fn bitmask(v: @Vector(groupsize, bool)) bitmaskType { | |
return @ptrCast(*const bitmaskType, &v).*; | |
} | |
const bitmaskIterType = struct { | |
b: bitmaskType, | |
fn next(self: *bitmaskIterType) ?usize { | |
if (self.b == 0) return null; | |
const ret: usize = @ctz(bitmaskType, self.b); | |
self.b = self.b & (self.b - 1); | |
return ret; | |
} | |
}; | |
fn bitmaskIter(b: bitmaskType) bitmaskIterType { | |
return bitmaskIterType{ .b = b }; | |
} | |
fn h1(h: u32) u32 { | |
return h >> 7; | |
} | |
fn h2(h: u32) i8 { | |
return @as(i8, @truncate(u7, h)); | |
} | |
fn groupIdx(self: *Self, h: u32) u32 { | |
return h1(h) % @truncate(u32, self.meta.len); | |
} | |
const groupIter = struct { | |
idx: u32, | |
i: u32, | |
n: usize, | |
fn next(self: *groupIter) ?u32 { | |
if (self.i == self.n) { | |
return null; | |
} else { | |
self.idx = (self.idx + self.i) % @truncate(u32, self.n); | |
self.i += 1; | |
return self.idx; | |
} | |
} | |
}; | |
fn newGroupIter(self: *Self, idx: u32) groupIter { | |
return groupIter{ .idx = idx, .i = 0, .n = self.meta.len }; | |
} | |
fn get(self: *Self, k: K) ?*KV { | |
if (self.entries.len == 0) return null; | |
const h = hash(k); | |
const h2vec = mSplatInt(h2(h)); | |
const grp = self.groupIdx(h); | |
std.debug.warn("get({})\n", .{k}); | |
var grpiter = self.newGroupIter(grp); | |
while (grpiter.next()) |g| { | |
std.debug.warn(" group {}\n", .{g}); | |
const m = self.meta[g]; | |
var candidates = bitmaskIter(bitmask(m == h2vec)); | |
while (candidates.next()) |bit| { | |
const idx = g * groupsize + bit; | |
const e = &self.entries[idx]; | |
std.debug.warn(" checking idx {}, bit {}", .{ idx, bit }); | |
if (eql(e.key, k)) { | |
std.debug.warn(" OK\n", .{}); | |
return e; | |
} | |
std.debug.warn("\n", .{}); | |
} | |
const empties = bitmask(m == mSplat(Ctrl.Empty)); | |
if (empties > 0) { | |
return null; | |
} | |
} | |
return null; | |
} | |
fn put(self: *Self, k: K, v: V) !?KV { | |
if (self.get(k)) |kv| { | |
const ret = kv.*; | |
kv.val = v; | |
return ret; | |
} | |
try self.autoCapacity(); | |
self.putInternal(k, v); | |
return null; | |
} | |
fn putInternal(self: *Self, k: K, v: V) void { | |
std.debug.warn("inserting {} -> {}\n", .{ k, v }); | |
const h = hash(k); | |
const special = mSplatInt(0); | |
var grpiter = self.newGroupIter(self.groupIdx(h)); | |
while (grpiter.next()) |g| { | |
var m = &self.meta[g]; | |
const b = bitmask(m.* < special); | |
if (b == 0) continue; | |
var candidates = bitmaskIter(b); | |
while (candidates.next()) |bit| { | |
const idx = g * groupsize + bit; | |
const e = &self.entries[idx]; | |
std.debug.warn("found empty space at {}\n", .{idx}); | |
e.key = k; | |
e.val = v; | |
m.*[bit] = h2(h); | |
self.size += 1; | |
return; | |
} | |
} | |
unreachable; | |
} | |
fn needResize(size: usize, capacity: usize) bool { | |
return size > capacity * 7 / 8 or capacity < groupsize; | |
} | |
fn autoCapacity(self: *Self) !void { | |
if (needResize(self.size, self.entries.len)) { | |
try self.resize(); | |
} | |
} | |
fn optimizedCapacity(size: usize) usize { | |
var nextsize = math.ceilPowerOfTwo(usize, math.max(groupsize, size)) catch unreachable; | |
if (needResize(size, nextsize)) { | |
nextsize *= 2; | |
} | |
return nextsize; | |
} | |
fn copy(self: *Self) !Self { | |
var new = Self.init(self.allocator); | |
try new.initCapacity(@truncate(u32, optimizedCapacity(self.size))); | |
const isEntry = mSplatInt(0); | |
for (self.meta) |m, i| { | |
var b = bitmaskIter(bitmask(m >= isEntry)); | |
while (b.next()) |bit| { | |
const idx = i * groupsize + bit; | |
new.putInternal(self.entries[idx].key, self.entries[idx].val); | |
} | |
} | |
assert(new.size == self.size); | |
return new; | |
} | |
fn resize(self: *Self) !void { | |
var new = try self.copy(); | |
std.debug.warn(">>>> Resizising: new size: {}\n", .{new.entries.len}); | |
var old = self.*; | |
self.* = new; | |
old.deinit(); | |
} | |
}; | |
} | |
pub fn AutoMap(comptime K: type, comptime V: type) type { | |
return Map(K, V, getAutoHashFn(K), getAutoEqlFn(K), 16); | |
} | |
test "1024" { | |
std.debug.warn("\n", .{}); | |
var m = AutoMap(u32, u32).init(std.testing.allocator); | |
const cap = 1024; | |
var i: u32 = 0; | |
while (i < 1024) : (i += 1) { | |
const old = try m.put(i, i * 10); | |
testing.expectEqual(old, null); | |
} | |
i = 0; | |
while (i < 1024) : (i += 1) { | |
const kv = m.get(i).?; | |
testing.expectEqual(kv.key, i); | |
testing.expectEqual(kv.val, i * 10); | |
} | |
i = 0; | |
while (i < 1024) : (i += 1) { | |
const old = try m.put(i, i * 10); | |
testing.expectEqual(old.?.key, i); | |
testing.expectEqual(old.?.val, i * 10); | |
} | |
m.deinit(); | |
try testing.allocator_instance.validate(); | |
} | |
pub fn getAutoHashFn(comptime K: type) (fn (K) u32) { | |
return struct { | |
fn hash(key: K) u32 { | |
var hasher = Wyhash.init(0); | |
autoHash(&hasher, key); | |
return @truncate(u32, hasher.final()); | |
} | |
}.hash; | |
} | |
pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) { | |
return struct { | |
fn eql(a: K, b: K) bool { | |
return meta.eql(a, b); | |
} | |
}.eql; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment