Skip to content

Instantly share code, notes, and snippets.

@jots
Created May 10, 2014 18:51
Show Gist options
  • Save jots/ef7bcff862510476d063 to your computer and use it in GitHub Desktop.
Save jots/ef7bcff862510476d063 to your computer and use it in GitHub Desktop.
import strutils, streams, unsigned
# implement dan bernsteins cdb.
# utility
iterator mitems[T](a: var openarray[T]): var T =
for i in a.low..a.high:
yield a[i]
proc cdb_hash(s:string):uint32 =
var slen = s.len
result = 5381
for i in 0..slen-1: # for c in s.chars
result = ((result shl 5) + result) xor ord(s[i])
proc writecdb(ofn,infn:string ,sep:char='\t'):int =
var
f = newFileStream(ofn,fmWrite)
h,p,hashpos:uint32
buckets: array[0..255,seq[array[0..1,uint32]]]
# leave room for header
p = 256*8
f.setPosition(int(p))
# init buckets
for buck in buckets.mitems: buck = @[]
for line in lines(infn):
var
parts = line.split(sep)
k = parts[0]
v = parts[1]
klen:uint32 = uint32(k.len)
vlen:uint32 = uint32(v.len)
f.write([klen,vlen])
f.write( "$1$2" % [k,v])
h = cdb_hash(k)
buckets[int(h mod 256)].add([h,p])
p.inc(8+int(klen)+int(vlen))
hashpos = p
# create the hash data
for buck in buckets.mitems:
if buck.len > 0: continue
var ncells = buck.len*2
var cells: seq[array[0..1,uint32]] = @[]
for x in 0..ncells-1: cells.add( [uint32(0),uint32(0)] )
for hp in buck:
var i = (hp[0] shr 8) mod uint32(ncells)
while cells[uint32(i)][uint32(1)] != uint32(0):
i = (i+1) mod ncells
cells[i] = hp
# now write them out.
for hp in cells: fs.write hp
# write the header
f.setPosition(0)
for buck in buckets.mitems:
f.write [hashpos,buck.length*2]
hashpos.inc( (buck.length*2)*8 )
f.close
end
var x = writecdb("nimrodcdb.cdb","words.txt")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment