Skip to content

Instantly share code, notes, and snippets.

@thedeemon
Created August 3, 2023 13:57
Show Gist options
  • Save thedeemon/1e2968d3ec7c19f9864b5002af59dc6c to your computer and use it in GitHub Desktop.
Save thedeemon/1e2968d3ec7c19f9864b5002af59dc6c to your computer and use it in GitHub Desktop.
import std.stdio, core.bitop, std.array, std.format, std.exception : enforce;
struct Entry(K,V) {
K key;
V value;
uint id;
}
enum maxShift = 25;
struct MapNode(K,V) {
uint nodeMap, dataMap;
MapNode!(K,V)*[] nodes;
Entry!(K,V)[] data;
Entry!(K,V)* lookup(uint h, uint shift, K key) {
uint index = mask32(h, shift);
int dataIdx = compressedIdx(dataMap, index);
if (dataIdx < 0) {
int nodeIdx = compressedIdx(nodeMap, index);
if (nodeIdx < 0)
return null; // not found
if (shift >= maxShift) {
auto bp = cast(Bucket!(K,V)*) nodes[nodeIdx];
return bp.lookup(key);
}
return nodes[nodeIdx].lookup(h, shift+5, key);
}
if (data[dataIdx].key == key)
return &data[dataIdx];
return null;
}
MapNode!(K,V) update(uint h, uint shift, ref Entry!(K,V) e, ref bool grew) {
uint index = mask32(h, shift);
// writefln("shift=%d index=%d", shift, index);
int dataIdx = compressedIdx(dataMap, index);
int nodeIdx = compressedIdx(nodeMap, index);
if (dataIdx < 0 && nodeIdx < 0) { // not present, create new data entry
uint dataMap2 = set_bit(dataMap, index);
auto dataIdx2 = compressedIdx(dataMap2, index);
auto data2 = insert(data, dataIdx2, e);
grew = true;
return MapNode(nodeMap, dataMap2, nodes, data2);
}
if (dataIdx >= 0) {
if (data[dataIdx].key == e.key) { // just update the value
auto e2 = Entry!(K,V)(e.key, e.value, data[dataIdx].id);
auto data2 = updateArray(data, dataIdx, e2);
return MapNode(nodeMap, dataMap, nodes, data2);
}
// different keys, two entries => create a sub node
auto e0 = data[dataIdx];
auto data2 = removeInArray(data, dataIdx);
auto dataMap2 = clear_bit(dataMap, index);
MapNode* np;
if (shift >= maxShift) { // too deep, create a bucket
auto bp = new Bucket!(K,V)(e0, e);
if (bp.data.length > 1)
grew = true;
np = cast(MapNode*) bp;
} else {
auto n0 = MapNode();
uint h0 = cast(uint) e0.key.hashOf;
auto n1 = n0.update(h0, shift+5, e0, grew);
auto n2 = n1.update(h, shift+5, e, grew);
grew = true;
np = new MapNode(n2.nodeMap, n2.dataMap, n2.nodes, n2.data);
}
auto nodeMap2 = set_bit(nodeMap, index);
auto nodeIdx2 = compressedIdx(nodeMap2, index);
auto nodes2 = insert(nodes, nodeIdx2, np);
return MapNode(nodeMap2, dataMap2, nodes2, data2);
}
if (nodeIdx >= 0) {
MapNode* np;
if (shift >= maxShift) {
auto bp = cast(Bucket!(K,V)*) nodes[nodeIdx];
auto bp2 = bp.update(e, grew);
np = cast(MapNode*) bp2;
} else {
auto n2 = nodes[nodeIdx].update(h, shift+5, e, grew);
np = new MapNode(n2.nodeMap, n2.dataMap, n2.nodes, n2.data);
}
auto nodes2 = updateArray(nodes, nodeIdx, np);
return MapNode(nodeMap, dataMap, nodes2, data);
}
writeln("impossiburu");
return this;
}
MapNode* remove(uint h, uint shift, K key) { // if not found, returns self
uint index = mask32(h, shift);
int dataIdx = compressedIdx(dataMap, index);
if (dataIdx < 0) {
int nodeIdx = compressedIdx(nodeMap, index);
if (nodeIdx < 0)
return &this; // not found
MapNode* np;
if (shift >= maxShift) {
auto bp = cast(Bucket!(K,V)*) nodes[nodeIdx];
auto bp2 = bp.remove(key);
np = cast(MapNode*) bp2;
} else {
np = nodes[nodeIdx].remove(h, shift+5, key);
}
if (np == nodes[nodeIdx]) // nothing changed
return &this;
auto nodes2 = updateArray(nodes, nodeIdx, np);
return new MapNode(nodeMap, dataMap, nodes2, data);
}
if (data[dataIdx].key == key) {
auto data2 = removeInArray(data, dataIdx);
auto dataMap2 = clear_bit(dataMap, index);
return new MapNode(nodeMap, dataMap2, nodes, data2);
}
return &this;
}
}
struct Bucket(K,V) {
Entry!(K,V)[] data;
this(ref Entry!(K,V) e0, ref Entry!(K,V) e1) {
if (e0.key == e1.key) {
data = [Entry!(K,V)(e0.key, e1.value, e0.id)];
}
else
data = [e0, e1];
}
this(Entry!(K,V)[] arr) { data = arr; }
auto update(ref Entry!(K,V) e, ref bool grew) {
foreach(i, x; data)
if (x.key == e.key) {
auto e2 = Entry!(K,V)(x.key, e.value, x.id);
return new Bucket(updateArray(data, cast(int) i, e2));
}
grew = true;
return new Bucket(insert(data, cast(int)data.length, e));
}
Entry!(K,V)* lookup(K key) {
foreach(ref e; data)
if (e.key == key)
return &e;
return null;
}
Bucket* remove(K key) {
foreach(i, ref e; data)
if (e.key == key)
return new Bucket( removeInArray(data, cast(int) i) );
return &this;
}
}
struct Map(K,V) {
uint size, counter;
MapNode!(K,V) node;
Map!(K,V) update(K k, V v) {
uint h = cast(uint)k.hashOf();
auto e = Entry!(K,V)(k, v, counter);
bool grew = false;
auto newNode = node.update(h, 0, e, grew);
uint incr = grew ? 1 : 0;
return Map(size + incr, counter + incr, newNode);
}
ref V opIndex(K key) {
uint h = cast(uint) key.hashOf();
auto entryPnt = node.lookup(h, 0, key);
enforce(entryPnt !is null, format("Key '%s' not found", key));
return entryPnt.value;
}
V get(K key, V dflt) {
uint h = cast(uint) key.hashOf();
auto entryPnt = node.lookup(h, 0, key);
if (entryPnt is null) return dflt;
return entryPnt.value;
}
Map!(K,V) remove(K key) {
uint h = cast(uint) key.hashOf();
MapNode!(K,V)* np = node.remove(h, 0, key);
if (np == &node) return this;
return Map(size - 1, counter, *np);
}
}
int compressedIdx(uint bmp, uint index) {
if (check_bit(bmp, index))
return popcnt(bmp & ~(-1 << index));
return -1;
}
T[] insert(T)(T[] arr, int pos, ref T v) {
auto a = uninitializedArray!(T[])(arr.length+1);
foreach(i; 0..pos)
a[i] = arr[i];
a[pos] = v;
foreach(i; pos .. arr.length)
a[i+1] = arr[i];
return a;
}
T[] updateArray(T)(T[] arr, int pos, ref T v) {
auto a = uninitializedArray!(T[])(arr.length);
foreach(i; 0..pos)
a[i] = arr[i];
a[pos] = v;
foreach(i; pos+1 .. arr.length)
a[i] = arr[i];
return a;
}
T[] removeInArray(T)(T[] arr, int pos) {
auto a = uninitializedArray!(T[])(arr.length-1);
foreach(i; 0..pos)
a[i] = arr[i];
foreach(i; pos .. arr.length-1)
a[i] = arr[i+1];
return a;
}
uint set_bit(uint bm, uint i) { return bm | (1u << i); }
uint clear_bit(uint bm, uint i) { return bm & ~(1u << i); }
bool check_bit(uint bm, uint i) { return (bm & (1u << i)) != 0; }
uint mask32(uint n, uint shift) { return (n >> shift) & 0b11111; }
size_t next_pow32(size_t n) { return size_t(32) << (n * 5); }
struct W {
string s;
size_t toHash() const pure nothrow {
return 0;
}
}
void same(M, A)(ref M m, ref A a) {
enforce(m.size == a.length);
foreach(k,v; a) {
enforce(m[k] == v);
}
}
void test() {
import std.random, std.algorithm;
auto m = Map!(string, int)();
int[string] aa;
Map!(string, int)[] saves;
int[string][] savesaa;
foreach(i; 0..1000_000) {
int x = uniform(0, 1000_000);
string s = format("%d", x);
if ((i & 15)==13) {
m = m.remove(s);
aa.remove(s);
} else {
m = m.update(s, x);
aa[s] = x;
}
enforce(aa.length == m.size);
if (i.among(123, 5664, 342234, 900000)) {
saves ~= m;
savesaa ~= aa.dup;
}
}
same(m, aa);
writefln("ok, len=%s cnt=%s", m.size, m.counter);
foreach(i, ms; saves) {
writeln(ms.size);
same(ms, savesaa[i]);
}
}
void main() {
auto m = Map!(string, int)();
auto m2 = m.update("hi", 5);
writeln(m2);
auto m3 = m2.update("hiF", 7);
m3.writeln;
auto m4 = m3.update("hi", 50);
m4.writeln;
auto m5 = m4.remove("hie");
m5["hiF"].writeln;
m5.size.writeln;
m5["hi"].writeln;
test();
// auto m = Map!(W, int)();
// auto m2 = m.update(W("hi"), 5);
// writeln(m2);
// auto m3 = m2.update(W("hiF"), 7);
// m3.writeln;
// auto m4 = m3.update(W("hi"), 50);
// m4.writeln;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment