Created
May 10, 2014 18:51
-
-
Save jots/ef7bcff862510476d063 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import strutils, streams, unsigned | |
| # implement dan bernsteins cdb. | |
| # utility | |
| iterator mitems[T](a: var openarray[T]): var T = | |
| for i in a.low..a.high: | |
| yield a[i] | |
| proc cdb_hash(s:string):uint32 = | |
| var slen = s.len | |
| result = 5381 | |
| for i in 0..slen-1: # for c in s.chars | |
| result = ((result shl 5) + result) xor ord(s[i]) | |
| proc writecdb(ofn,infn:string ,sep:char='\t'):int = | |
| var | |
| f = newFileStream(ofn,fmWrite) | |
| h,p,hashpos:uint32 | |
| buckets: array[0..255,seq[array[0..1,uint32]]] | |
| # leave room for header | |
| p = 256*8 | |
| f.setPosition(int(p)) | |
| # init buckets | |
| for buck in buckets.mitems: buck = @[] | |
| for line in lines(infn): | |
| var | |
| parts = line.split(sep) | |
| k = parts[0] | |
| v = parts[1] | |
| klen:uint32 = uint32(k.len) | |
| vlen:uint32 = uint32(v.len) | |
| f.write([klen,vlen]) | |
| f.write( "$1$2" % [k,v]) | |
| h = cdb_hash(k) | |
| buckets[int(h mod 256)].add([h,p]) | |
| p.inc(8+int(klen)+int(vlen)) | |
| hashpos = p | |
| # create the hash data | |
| for buck in buckets.mitems: | |
| if buck.len > 0: continue | |
| var ncells = buck.len*2 | |
| var cells: seq[array[0..1,uint32]] = @[] | |
| for x in 0..ncells-1: cells.add( [uint32(0),uint32(0)] ) | |
| for hp in buck: | |
| var i = (hp[0] shr 8) mod uint32(ncells) | |
| while cells[uint32(i)][uint32(1)] != uint32(0): | |
| i = (i+1) mod ncells | |
| cells[i] = hp | |
| # now write them out. | |
| for hp in cells: fs.write hp | |
| # write the header | |
| f.setPosition(0) | |
| for buck in buckets.mitems: | |
| f.write [hashpos,buck.length*2] | |
| hashpos.inc( (buck.length*2)*8 ) | |
| f.close | |
| end | |
| var x = writecdb("nimrodcdb.cdb","words.txt") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment