Created
January 2, 2018 14:58
-
-
Save RedBeard0531/efd1071fe2474ba83149921d0427a8fb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## HHSet is a Heterogeneous Hash Set - It allows O(1) lookup of values by any compatible type. | |
{.experimental.} | |
import hashes, future, typetraits, strformat | |
type | |
Node[T] = object | |
val: T | |
used: bool # TODO make this the sign bit of the hash | |
hash: uint32 | |
next: ref Node[T] | |
HHSet*[T, Key] = object | |
## Heterogeneous Hash Set, storing ``T``, but using ``Key`` for lookup. | |
## If ``T`` isn't convertable to ``Key``, you must define a | |
## ``getKey(T): Key`` or ``getKey(T, typedesc[Key])`` proc to | |
## extract the key. | |
## | |
## The default (zero) state represents the empty set, so you do not need | |
## to call any init function. | |
data: seq[Node[T]] | |
len: int | |
HashEqCompatible*[StorageType] = concept heq, type HEQ | |
## Declares types that where a == b implies hash(a) == hash(b), even with | |
## different types. For example, string and cstring are compatible. | |
mixin isHashEqCompatible | |
compiles(isHashEqCompatible(StorageType, HEQ)) or | |
compiles(isHashEqCompatible(HEQ, StorageType)) | |
hash(heq) is Hash | |
(heq == StorageType) is bool | |
using | |
hhs: HHSet | |
const | |
initSize = if defined(release): 16 else: 1 | |
proc len*(hhs): int = hhs.len | |
iterator allNodes[T, Key](hhs: var HHSet[T, Key]): var Node[T] = | |
if not hhs.data.isNil: | |
for bucket in hhs.data.mitems: | |
yield bucket | |
var link = bucket.next | |
while not link.isNil: | |
yield link[] | |
link = link.next | |
iterator usedNodes[T, Key](hhs: var HHSet[T, Key]): var Node[T] = | |
for node in hhs.allNodes: | |
if node.used: | |
yield node | |
proc rawGet[T, Key](hhs: HHSet[T, Key]; key: HashEqCompatible[Key]): auto = | |
mixin getKey | |
if hhs.len == 0: return cast[ptr Node[hhs.T]](nil) | |
let hash = key.hash.uint32 | |
var node = unsafeAddr hhs.data[hash.int and (hhs.data.xlen - 1)] | |
while true: | |
if node.hash == hash and node.val.getKey(hhs.Key) == key: | |
return node | |
node = unsafeAddr node.next[] | |
if node == nil: | |
return node | |
proc rehash[T, Key](hhs: var HHSet[T, Key]) = | |
var newHHS: HHSet[T, Key] | |
newHHS.data = newSeq[Node[T]](hhs.data.len * 2) | |
for old in hhs.usedNodes: | |
newHHS.rawInsert(old.val.getKey(Key), old.hash).node.val.shallowCopy(old.val) | |
assert newHHS.len == hhs.len | |
hhs.data.swap(newHHS.data) | |
proc rawInsert[T, Key](hhs: var HHSet[T, Key]; key: HashEqCompatible[Key], hash: uint32): | |
tuple[node: ptr Node[T], inserted: bool] = | |
mixin getKey | |
if hhs.data.isNil: | |
hhs.data = newSeq[Node[T]](initSize) | |
let hash = key.hash.uint32 | |
var node = addr hhs.data[hash.int and hhs.data.high] | |
while true: | |
if node.hash == hash and node.val.getKey(hhs.Key) == key: | |
return (node, false) | |
if node.next == nil: | |
if hhs.len >= hhs.data.len: | |
hhs.rehash() # up to 100% load factor. | |
return hhs.rawInsert(key, hash) | |
hhs.len.inc | |
result.inserted = true | |
if node.used: | |
node.next.new() | |
result.node = addr node.next[] | |
else: | |
result.node = node | |
result.node.used = true | |
result.node.hash = hash | |
return | |
node = addr node.next[] | |
template withValueOrInsert*(hhs: var HHSet; key: typed, body: untyped) = | |
## Looks up key in the set and executes body with these names in scope: | |
## | |
## .. code-block:: | |
## let inserted: bool ## True if key was not already in the set | |
## var value: var HHSet.T ## The mutable value in the set | |
## | |
## You are responsible for ensuring that ``value.getKey(hhs.Key) == key`` at | |
## the end of the block. | |
let keyX = key # eval once | |
hhs.withValueOrInsertKnownHash(keyX, key.hash.uint32): body | |
template withValueOrInsertKnownHash*(hhs: var HHSet; key: typed, hash: uint32, body: untyped) = | |
## Like withValueOrInsert but with a pre-computed hash. | |
block: | |
let keyX = key # eval once | |
let (node, inserted {.inject, used.}) = hhs.rawInsert(keyX, hash) | |
type Value = hhs.T | |
iterator oneShot(x: var Value): var Value {.gensym.} = yield x | |
for value {.inject.} in oneShot(node.val): | |
body | |
mixin getKey | |
assert node.val.getKey(hhs.Key) == keyX ## You must initialize value | |
template getValueOrInit*(hhs, key: typed, body: untyped): untyped = | |
## Like withValueOrInsert but with returns the value and only invokes body | |
## if key isn't already in the set. Use this rather than incl if the value | |
## is expensive to construct. | |
runnableExamples: | |
THIS_WONT_COMPILE | |
doAssert false | |
type File = object | |
path, contents: string | |
proc getKey(fc: File): string = fc.path | |
proc getCachedFile(path: string): File = | |
var cache {.global.}: HHSet[File, string] | |
return cache.getValueOrInit(path): | |
value.path = path | |
value.contents = readFile(path) | |
type File = object | |
path, contents: string | |
var val: ptr hhs.T | |
block: | |
hhs.withValueOrInsert(key): | |
if inserted: | |
body | |
val = addr value | |
val[] | |
proc incl*[T, Key](hhs: var HHSet[T, Key]; toInclude: T) = | |
## Inserts toInclude into the set. | |
mixin getKey | |
hhs.withValueOrInsert(toInclude.getKey(Key)): | |
value = toInclude | |
proc contains*[T, Key, U](hhs: var HHSet[T, Key]; key: U): bool = | |
## True if key is in the set. | |
hhs.rawGet(key).val != nil | |
proc getKey*[T, U](val: U, _: typedesc[T]): T = | |
## Provide your own version of the this function to customize key extraction. | |
## For example: | |
## | |
## .. code-block:: | |
## proc getKey(val: string, _: typedesc[cstring]): cstring = val.cstring | |
## | |
## type | |
## Email = distinct string | |
## Person = object | |
## name: string | |
## email: Email | |
## proc getKey(val: Person, _: typedesc[Email]): Email = val.email | |
## | |
## If you only need a single key from an object, you can implement a | |
## single-argument getKey proc: | |
## | |
## .. code-block:: | |
## proc getKey(val: Person): Email = val.email | |
mixin getKey | |
when compiles(getKey(val)): | |
getKey(val) | |
else: | |
val | |
proc `$`*(hhs): string = | |
if hhs.len == 0: return "{}" | |
result = "{" | |
for node in hhs.unsafeAddr[].usedNodes: | |
result.addQuoted node.val | |
result &= ", " | |
result.setLen(result.len - 2) | |
result &= '}' | |
proc isHashEqCompatible[T](_: typedesc[T], t2: typedesc[T]) = discard | |
when isMainModule: | |
type StrView = object | |
str: ptr char | |
len: int | |
# IT SHOULDN'T COMPILE WITH THIS COMMENTED OUT | |
#proc isHashEqCompatible(_: typedesc[string], t2: typedesc[StrView]) = discard | |
#proc isHashEqCompatible(_: typedesc[cstring], t2: typedesc[StrView]) = discard | |
proc hash(s: StrView): Hash = hashData(s.str, s.len) | |
proc `==`(a,b: StrView): bool = a.len == b.len and equalMem(a.str, b.str, a.len) | |
converter toStrView(s: string): StrView = StrView(str: unsafeAddr s[0], len: s.xlen) | |
converter toStrView(s: cstring): StrView = StrView(str: unsafeAddr s[0], len: s.len) | |
var hhs: HHSet[string, StrView] | |
hhs.incl "boo" | |
hhs.withValueOrInsert("hello"): | |
assert inserted | |
value = "hello" | |
hhs.withValueOrInsert("hello"): | |
assert inserted == false | |
echo repr hhs | |
echo repr hhs.rawGet("hello\0asdf".cstring) | |
for letter in 'a'..'z': | |
hhs.incl $letter | |
dump repr hhs | |
dump hhs | |
type Tup[Val] = tuple[k: string, v: Val] | |
proc getKey[T](tup: Tup[T]): StrView = tup.k | |
var hhs2: HHSet[Tup[int], StrView] | |
for letter in 'a'..'z': | |
hhs2.incl(($letter, letter.ord)) | |
dump hhs2 | |
var evals = 0 | |
hhs.withValueOrInsert((evals.inc; "hello")): | |
value = "hello" | |
assert evals == 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment