Skip to content

Instantly share code, notes, and snippets.

@jots
Created May 9, 2014 23:32
Show Gist options
  • Save jots/65215a6fce042b7d7857 to your computer and use it in GitHub Desktop.
Save jots/65215a6fce042b7d7857 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require "pp"
def cdb_hash(s)
r = 5381
# & 0xffffffff forces to 32 bit. (lots of hair for that)
s.split("").each { |c| r = (((r << 5) + r) ^ c.ord) & 0xffffffff }
return r
end
CDBFILENAME = "pure_ruby_cdb.cdb"
def writecdb(ofn=CDBFILENAME,infn="rubycdb.txt",sep="\t")
f = File.open(ofn,"wb")
# leave room for header pointers
p = 256*8
f.seek(p)
buckets = []; 256.times {|x| buckets[x] = [] }
# open "rb" is very important for UTF8!!
File.open(infn,"rb").each do |line|
line.chomp!
k,v = line.split(sep)
f.write [klen=k.length,vlen=v.length].pack("LL")
f.write k + v
h = cdb_hash(k)
buckets[h % 256] << [h,p]
p += 8+klen+vlen
end
hashpos = p
# create the hash data
buckets.each do |buck|
next if (blen = buck.length) == 0
ncells = blen*2
cells = []
ncells.times {|x| cells[x] = [0,0] }
buck.each do |h,p|
i = (h >> 8) % ncells
while cells[i][1] != 0 # in use... collision!
i = (i+1) % ncells
end
cells[i] = [h,p]
end
# now write them out.
cells.each do |c|
f.write c.pack("LL")
end
end
# write the header
f.seek(0)
buckets.each_with_index do |buck,x|
f.write [hashpos,buck.length*2].pack("LL")
hashpos += (buck.length*2)*8
end
f.close
end
class CDBReader
BPOS = 0
NCELLS = 1
def initialize(fn=CDBFILENAME)
@f = File.open(fn,"rb")
@r = []
# keep pointers handy
256.times do |x|
@r[x] = @f.read(8).unpack("LL")
end
end
def get(k)
h = cdb_hash(k)
buck_pos,ncells = @r[h % 256]
return "" if ncells == 0 # XXX no hit
start = (h >> 8) % ncells
ncells.times do |i|
@f.seek(buck_pos + ((start+i) % ncells)*8)
h1,p1 = @f.read(8).unpack("LL")
return "" if p1 == 0 # XXX no hit
if h1 == h
@f.seek(p1)
klen,vlen = @f.read(8).unpack("LL")
k1 = @f.read(klen)
return @f.read(vlen) if k1 == k
end
end
return "" # XXX no hit
end
end
if __FILE__ == $0
# create test file
WORDS = "/usr/share/dict/words"
f = File.open("words.txt","wb")
File.open(WORDS,"rb").each do |line|
line.chomp!
f.puts [line,line.length].join("\t")
end
f.close
# create cdb file.
writecdb(CDBFILENAME,"words.txt")
# create cdb file using native cdb..
system("cdb -m -c native_cdb.cdb words.txt")
# compare output
system("cmp -l #{CDBFILENAME} native_cdb.cdb")
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment