Skip to content

Instantly share code, notes, and snippets.

@lemmi
Created July 4, 2020 15:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lemmi/8c83db17222d616b6e58ccb6a5f497dd to your computer and use it in GitHub Desktop.
Save lemmi/8c83db17222d616b6e58ccb6a5f497dd to your computer and use it in GitHub Desktop.
swiss hash map in zig
const std = @import("std");
const math = std.math;
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const testing = std.testing;
const Wyhash = std.hash.Wyhash;
const autoHash = std.hash.autoHash;
const meta = std.meta;
pub fn Map(comptime K: type, comptime V: type, comptime hash: fn (key: K) u32, comptime eql: fn (a: K, b: K) bool, comptime groupsize: u32) type {
return struct {
meta: []Meta,
entries: []KV,
size: usize,
allocator: *Allocator,
const KV = struct {
key: K,
val: V,
};
const Self = @This();
const Meta = @Vector(groupsize, i8);
const bitmaskType = @Type(std.builtin.TypeInfo{ .Int = .{ .is_signed = false, .bits = groupsize } });
const Ctrl = enum(i8) {
Empty = -128,
Deleted = -2,
Sentinel = -1,
_,
};
pub fn init(allocator: *Allocator) Self {
return Self{
.meta = &[_]Meta{},
.entries = &[_]KV{},
.size = 0,
.allocator = allocator,
};
}
pub fn deinit(self: Self) void {
self.allocator.free(self.meta);
self.allocator.free(self.entries);
}
pub fn count(self: Self) usize {
return self.size;
}
fn initCapacity(self: *Self, capacity: u32) !void {
assert(math.isPowerOfTwo(capacity));
self.meta = try self.allocator.alloc(Meta, try std.math.divExact(u32, capacity, groupsize));
self.entries = try self.allocator.alloc(KV, capacity);
for (self.meta) |*g| {
g.* = mSplat(Ctrl.Empty);
}
}
fn mSplat(c: Ctrl) Meta {
return @splat(groupsize, @enumToInt(c));
}
fn mSplatInt(c: i8) Meta {
return @splat(groupsize, c);
}
fn bitmask(v: @Vector(groupsize, bool)) bitmaskType {
return @ptrCast(*const bitmaskType, &v).*;
}
const bitmaskIterType = struct {
b: bitmaskType,
fn next(self: *bitmaskIterType) ?usize {
if (self.b == 0) return null;
const ret: usize = @ctz(bitmaskType, self.b);
self.b = self.b & (self.b - 1);
return ret;
}
};
fn bitmaskIter(b: bitmaskType) bitmaskIterType {
return bitmaskIterType{ .b = b };
}
fn h1(h: u32) u32 {
return h >> 7;
}
fn h2(h: u32) i8 {
return @as(i8, @truncate(u7, h));
}
fn groupIdx(self: *Self, h: u32) u32 {
return h1(h) % @truncate(u32, self.meta.len);
}
const groupIter = struct {
idx: u32,
i: u32,
n: usize,
fn next(self: *groupIter) ?u32 {
if (self.i == self.n) {
return null;
} else {
self.idx = (self.idx + self.i) % @truncate(u32, self.n);
self.i += 1;
return self.idx;
}
}
};
fn newGroupIter(self: *Self, idx: u32) groupIter {
return groupIter{ .idx = idx, .i = 0, .n = self.meta.len };
}
fn get(self: *Self, k: K) ?*KV {
if (self.entries.len == 0) return null;
const h = hash(k);
const h2vec = mSplatInt(h2(h));
const grp = self.groupIdx(h);
std.debug.warn("get({})\n", .{k});
var grpiter = self.newGroupIter(grp);
while (grpiter.next()) |g| {
std.debug.warn(" group {}\n", .{g});
const m = self.meta[g];
var candidates = bitmaskIter(bitmask(m == h2vec));
while (candidates.next()) |bit| {
const idx = g * groupsize + bit;
const e = &self.entries[idx];
std.debug.warn(" checking idx {}, bit {}", .{ idx, bit });
if (eql(e.key, k)) {
std.debug.warn(" OK\n", .{});
return e;
}
std.debug.warn("\n", .{});
}
const empties = bitmask(m == mSplat(Ctrl.Empty));
if (empties > 0) {
return null;
}
}
return null;
}
fn put(self: *Self, k: K, v: V) !?KV {
if (self.get(k)) |kv| {
const ret = kv.*;
kv.val = v;
return ret;
}
try self.autoCapacity();
self.putInternal(k, v);
return null;
}
fn putInternal(self: *Self, k: K, v: V) void {
std.debug.warn("inserting {} -> {}\n", .{ k, v });
const h = hash(k);
const special = mSplatInt(0);
var grpiter = self.newGroupIter(self.groupIdx(h));
while (grpiter.next()) |g| {
var m = &self.meta[g];
const b = bitmask(m.* < special);
if (b == 0) continue;
var candidates = bitmaskIter(b);
while (candidates.next()) |bit| {
const idx = g * groupsize + bit;
const e = &self.entries[idx];
std.debug.warn("found empty space at {}\n", .{idx});
e.key = k;
e.val = v;
m.*[bit] = h2(h);
self.size += 1;
return;
}
}
unreachable;
}
fn needResize(size: usize, capacity: usize) bool {
return size > capacity * 7 / 8 or capacity < groupsize;
}
fn autoCapacity(self: *Self) !void {
if (needResize(self.size, self.entries.len)) {
try self.resize();
}
}
fn optimizedCapacity(size: usize) usize {
var nextsize = math.ceilPowerOfTwo(usize, math.max(groupsize, size)) catch unreachable;
if (needResize(size, nextsize)) {
nextsize *= 2;
}
return nextsize;
}
fn copy(self: *Self) !Self {
var new = Self.init(self.allocator);
try new.initCapacity(@truncate(u32, optimizedCapacity(self.size)));
const isEntry = mSplatInt(0);
for (self.meta) |m, i| {
var b = bitmaskIter(bitmask(m >= isEntry));
while (b.next()) |bit| {
const idx = i * groupsize + bit;
new.putInternal(self.entries[idx].key, self.entries[idx].val);
}
}
assert(new.size == self.size);
return new;
}
fn resize(self: *Self) !void {
var new = try self.copy();
std.debug.warn(">>>> Resizising: new size: {}\n", .{new.entries.len});
var old = self.*;
self.* = new;
old.deinit();
}
};
}
pub fn AutoMap(comptime K: type, comptime V: type) type {
return Map(K, V, getAutoHashFn(K), getAutoEqlFn(K), 16);
}
test "1024" {
std.debug.warn("\n", .{});
var m = AutoMap(u32, u32).init(std.testing.allocator);
const cap = 1024;
var i: u32 = 0;
while (i < 1024) : (i += 1) {
const old = try m.put(i, i * 10);
testing.expectEqual(old, null);
}
i = 0;
while (i < 1024) : (i += 1) {
const kv = m.get(i).?;
testing.expectEqual(kv.key, i);
testing.expectEqual(kv.val, i * 10);
}
i = 0;
while (i < 1024) : (i += 1) {
const old = try m.put(i, i * 10);
testing.expectEqual(old.?.key, i);
testing.expectEqual(old.?.val, i * 10);
}
m.deinit();
try testing.allocator_instance.validate();
}
pub fn getAutoHashFn(comptime K: type) (fn (K) u32) {
return struct {
fn hash(key: K) u32 {
var hasher = Wyhash.init(0);
autoHash(&hasher, key);
return @truncate(u32, hasher.final());
}
}.hash;
}
pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) {
return struct {
fn eql(a: K, b: K) bool {
return meta.eql(a, b);
}
}.eql;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment