Skip to content

Instantly share code, notes, and snippets.

@RedBeard0531
Created January 2, 2018 14:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RedBeard0531/efd1071fe2474ba83149921d0427a8fb to your computer and use it in GitHub Desktop.
Save RedBeard0531/efd1071fe2474ba83149921d0427a8fb to your computer and use it in GitHub Desktop.
## HHSet is a Heterogeneous Hash Set - It allows O(1) lookup of values by any compatible type.
{.experimental.}
import hashes, future, typetraits, strformat
type
Node[T] = object
val: T
used: bool # TODO make this the sign bit of the hash
hash: uint32
next: ref Node[T]
HHSet*[T, Key] = object
## Heterogeneous Hash Set, storing ``T``, but using ``Key`` for lookup.
## If ``T`` isn't convertable to ``Key``, you must define a
## ``getKey(T): Key`` or ``getKey(T, typedesc[Key])`` proc to
## extract the key.
##
## The default (zero) state represents the empty set, so you do not need
## to call any init function.
data: seq[Node[T]]
len: int
HashEqCompatible*[StorageType] = concept heq, type HEQ
## Declares types that where a == b implies hash(a) == hash(b), even with
## different types. For example, string and cstring are compatible.
mixin isHashEqCompatible
compiles(isHashEqCompatible(StorageType, HEQ)) or
compiles(isHashEqCompatible(HEQ, StorageType))
hash(heq) is Hash
(heq == StorageType) is bool
using
hhs: HHSet
const
initSize = if defined(release): 16 else: 1
proc len*(hhs): int = hhs.len
iterator allNodes[T, Key](hhs: var HHSet[T, Key]): var Node[T] =
if not hhs.data.isNil:
for bucket in hhs.data.mitems:
yield bucket
var link = bucket.next
while not link.isNil:
yield link[]
link = link.next
iterator usedNodes[T, Key](hhs: var HHSet[T, Key]): var Node[T] =
for node in hhs.allNodes:
if node.used:
yield node
proc rawGet[T, Key](hhs: HHSet[T, Key]; key: HashEqCompatible[Key]): auto =
mixin getKey
if hhs.len == 0: return cast[ptr Node[hhs.T]](nil)
let hash = key.hash.uint32
var node = unsafeAddr hhs.data[hash.int and (hhs.data.xlen - 1)]
while true:
if node.hash == hash and node.val.getKey(hhs.Key) == key:
return node
node = unsafeAddr node.next[]
if node == nil:
return node
proc rehash[T, Key](hhs: var HHSet[T, Key]) =
var newHHS: HHSet[T, Key]
newHHS.data = newSeq[Node[T]](hhs.data.len * 2)
for old in hhs.usedNodes:
newHHS.rawInsert(old.val.getKey(Key), old.hash).node.val.shallowCopy(old.val)
assert newHHS.len == hhs.len
hhs.data.swap(newHHS.data)
proc rawInsert[T, Key](hhs: var HHSet[T, Key]; key: HashEqCompatible[Key], hash: uint32):
tuple[node: ptr Node[T], inserted: bool] =
mixin getKey
if hhs.data.isNil:
hhs.data = newSeq[Node[T]](initSize)
let hash = key.hash.uint32
var node = addr hhs.data[hash.int and hhs.data.high]
while true:
if node.hash == hash and node.val.getKey(hhs.Key) == key:
return (node, false)
if node.next == nil:
if hhs.len >= hhs.data.len:
hhs.rehash() # up to 100% load factor.
return hhs.rawInsert(key, hash)
hhs.len.inc
result.inserted = true
if node.used:
node.next.new()
result.node = addr node.next[]
else:
result.node = node
result.node.used = true
result.node.hash = hash
return
node = addr node.next[]
template withValueOrInsert*(hhs: var HHSet; key: typed, body: untyped) =
## Looks up key in the set and executes body with these names in scope:
##
## .. code-block::
## let inserted: bool ## True if key was not already in the set
## var value: var HHSet.T ## The mutable value in the set
##
## You are responsible for ensuring that ``value.getKey(hhs.Key) == key`` at
## the end of the block.
let keyX = key # eval once
hhs.withValueOrInsertKnownHash(keyX, key.hash.uint32): body
template withValueOrInsertKnownHash*(hhs: var HHSet; key: typed, hash: uint32, body: untyped) =
## Like withValueOrInsert but with a pre-computed hash.
block:
let keyX = key # eval once
let (node, inserted {.inject, used.}) = hhs.rawInsert(keyX, hash)
type Value = hhs.T
iterator oneShot(x: var Value): var Value {.gensym.} = yield x
for value {.inject.} in oneShot(node.val):
body
mixin getKey
assert node.val.getKey(hhs.Key) == keyX ## You must initialize value
template getValueOrInit*(hhs, key: typed, body: untyped): untyped =
## Like withValueOrInsert but with returns the value and only invokes body
## if key isn't already in the set. Use this rather than incl if the value
## is expensive to construct.
runnableExamples:
THIS_WONT_COMPILE
doAssert false
type File = object
path, contents: string
proc getKey(fc: File): string = fc.path
proc getCachedFile(path: string): File =
var cache {.global.}: HHSet[File, string]
return cache.getValueOrInit(path):
value.path = path
value.contents = readFile(path)
type File = object
path, contents: string
var val: ptr hhs.T
block:
hhs.withValueOrInsert(key):
if inserted:
body
val = addr value
val[]
proc incl*[T, Key](hhs: var HHSet[T, Key]; toInclude: T) =
## Inserts toInclude into the set.
mixin getKey
hhs.withValueOrInsert(toInclude.getKey(Key)):
value = toInclude
proc contains*[T, Key, U](hhs: var HHSet[T, Key]; key: U): bool =
## True if key is in the set.
hhs.rawGet(key).val != nil
proc getKey*[T, U](val: U, _: typedesc[T]): T =
## Provide your own version of the this function to customize key extraction.
## For example:
##
## .. code-block::
## proc getKey(val: string, _: typedesc[cstring]): cstring = val.cstring
##
## type
## Email = distinct string
## Person = object
## name: string
## email: Email
## proc getKey(val: Person, _: typedesc[Email]): Email = val.email
##
## If you only need a single key from an object, you can implement a
## single-argument getKey proc:
##
## .. code-block::
## proc getKey(val: Person): Email = val.email
mixin getKey
when compiles(getKey(val)):
getKey(val)
else:
val
proc `$`*(hhs): string =
if hhs.len == 0: return "{}"
result = "{"
for node in hhs.unsafeAddr[].usedNodes:
result.addQuoted node.val
result &= ", "
result.setLen(result.len - 2)
result &= '}'
proc isHashEqCompatible[T](_: typedesc[T], t2: typedesc[T]) = discard
when isMainModule:
type StrView = object
str: ptr char
len: int
# IT SHOULDN'T COMPILE WITH THIS COMMENTED OUT
#proc isHashEqCompatible(_: typedesc[string], t2: typedesc[StrView]) = discard
#proc isHashEqCompatible(_: typedesc[cstring], t2: typedesc[StrView]) = discard
proc hash(s: StrView): Hash = hashData(s.str, s.len)
proc `==`(a,b: StrView): bool = a.len == b.len and equalMem(a.str, b.str, a.len)
converter toStrView(s: string): StrView = StrView(str: unsafeAddr s[0], len: s.xlen)
converter toStrView(s: cstring): StrView = StrView(str: unsafeAddr s[0], len: s.len)
var hhs: HHSet[string, StrView]
hhs.incl "boo"
hhs.withValueOrInsert("hello"):
assert inserted
value = "hello"
hhs.withValueOrInsert("hello"):
assert inserted == false
echo repr hhs
echo repr hhs.rawGet("hello\0asdf".cstring)
for letter in 'a'..'z':
hhs.incl $letter
dump repr hhs
dump hhs
type Tup[Val] = tuple[k: string, v: Val]
proc getKey[T](tup: Tup[T]): StrView = tup.k
var hhs2: HHSet[Tup[int], StrView]
for letter in 'a'..'z':
hhs2.incl(($letter, letter.ord))
dump hhs2
var evals = 0
hhs.withValueOrInsert((evals.inc; "hello")):
value = "hello"
assert evals == 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment