Skip to content

Instantly share code, notes, and snippets.

@mikecmpbll
Created February 24, 2016 20:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikecmpbll/fa8fe88bde75b412f04a to your computer and use it in GitHub Desktop.
Save mikecmpbll/fa8fe88bde75b412f04a to your computer and use it in GitHub Desktop.
Buzhash in Ruby
require 'stringio'
module Meriback
class Buzhash
# should be seeding this
BYTE_HASH = [
0x589c63e9, 0x118d2c7c, 0x1faecf92, 0x53c9f4d8, 0x6c8cb496, 0x35e0af5a, 0x3a786480, 0x035d8045,
0x5e6bd8f7, 0x6932ba3b, 0x59221501, 0x59f92f7e, 0x60ed62ad, 0x044a900e, 0x4357a8a6, 0x0bd7d8ef,
0x7dc25b20, 0x212d52be, 0x033f160f, 0x0cde12b8, 0x400fa76c, 0x26ebec72, 0x59413573, 0x534bdcbb,
0x2b165e74, 0x25ff262d, 0x61aee04d, 0x07f43f1a, 0x306e498f, 0x5a42f52a, 0x63f5a489, 0x6a713fcf,
0x2ea0d852, 0x72a5fd12, 0x2af29264, 0x147f675d, 0x2cc4ddbb, 0x312bdede, 0x6e544b29, 0x7017b69a,
0x3761c3e3, 0x4a448682, 0x09bd64b9, 0x2b253374, 0x108d1140, 0x7af6ce31, 0x1fcaae36, 0x5738549d,
0x2855c534, 0x2b5fb033, 0x0954b71e, 0x551970b0, 0x1e54f738, 0x0ff3cc4d, 0x7895f350, 0x7a0e51ee,
0x488da8bd, 0x6c030cb5, 0x3e441340, 0x69deb621, 0x28bef6db, 0x116f398a, 0x583029c2, 0x61995a77,
0x044917ce, 0x747ecd29, 0x21a875f6, 0x1c8ced54, 0x20e648b8, 0x69317764, 0x5ed83674, 0x31cc15ad,
0x7f09715d, 0x70ae8ef2, 0x10aadbe5, 0x21467b50, 0x29d956d4, 0x5371e341, 0x09c0cfe4, 0x6e465045,
0x6c534254, 0x4afcce1a, 0x12a8cb54, 0x592e0f16, 0x6813b15f, 0x0f328f09, 0x05b31d24, 0x6e82f526,
0x135a4a37, 0x4ff62bac, 0x431acc7c, 0x72592400, 0x21019c0c, 0x36da0e56, 0x02ab0536, 0x0b2678f8,
0x490b967f, 0x667bf68b, 0x688a6948, 0x071d78ee, 0x6559b070, 0x4c251103, 0x76a26247, 0x37acdbbc,
0x42ffbcae, 0x0c3e3ae2, 0x718f58fe, 0x216f0cdb, 0x26e40986, 0x1d8e74d2, 0x220dda80, 0x5a5d846c,
0x255863e5, 0x53d6d71f, 0x715e0663, 0x3cff6b0c, 0x76eaa419, 0x45f96482, 0x279ac3ef, 0x5f2a3e86,
0x3315ce73, 0x00f2a9c5, 0x52d21ffe, 0x044318eb, 0x10580651, 0x09e09dc5, 0x02fa8952, 0x51e64547,
0x3a672870, 0x025f5aca, 0x4901e37e, 0x31e21c5e, 0x235fbbef, 0x1d93560a, 0x67690854, 0x5a28701b,
0x3ae9aa63, 0x0340d815, 0x3c7efdb1, 0x21f94cac, 0x34167e4f, 0x09ce8807, 0x356f36ef, 0x692f79a2,
0x0d17d626, 0x6850e294, 0x371a6b2f, 0x45c373cc, 0x13b0094c, 0x7cad1f76, 0x30cb5196, 0x0d584aa9,
0x1631ee4d, 0x63de66a5, 0x2e5666a5, 0x16063ed7, 0x6750ecc1, 0x314370ed, 0x1ffeea05, 0x463e9b39,
0x51a30625, 0x3d2997d5, 0x7d2c2db8, 0x758d8dd2, 0x56b5e067, 0x5d5cb017, 0x17445959, 0x7e9be4fc,
0x04ec6ea9, 0x2035a176, 0x0b9019b0, 0x59096b64, 0x2fbc3712, 0x43025f53, 0x6d5aa0df, 0x7ce918c6,
0x1994f796, 0x5f12be8b, 0x0899b06f, 0x2471c06e, 0x40521a4e, 0x27c1c25f, 0x4ab2d5de, 0x743ead80,
0x21c42027, 0x058f6527, 0x4de40964, 0x199bcd70, 0x524a5d86, 0x66c22664, 0x17149fce, 0x0702a6fe,
0x72cb646e, 0x66874880, 0x3e36ef1b, 0x05a3a7f2, 0x1ac5f17e, 0x731ee0c0, 0x2867f14b, 0x2766767c,
0x2ba8130d, 0x673956b7, 0x6a16d480, 0x32a5d7cb, 0x26787ad5, 0x2079fcdc, 0x12db6258, 0x16beaba2,
0x3dbf84db, 0x3849f1a3, 0x4315f4ba, 0x3f9f07a7, 0x19385de3, 0x380c6817, 0x2a56a3f2, 0x045bed59,
0x15ba5d4e, 0x31f32f06, 0x4af1d8d7, 0x0c30d718, 0x55630392, 0x69db6f5f, 0x2c5a4512, 0x10093cd3,
0x726a5501, 0x78fe396f, 0x46db0712, 0x5589ed79, 0x689211ce, 0x2c898670, 0x657935b9, 0x0e3dc0dc,
0x408aa394, 0x3b5f681c, 0x4fcb5775, 0x1d2c3a80, 0x7e4fbba1, 0x74764025, 0x0a5239ab, 0x0f50e2fb,
0x569c9009, 0x247dd3b9, 0x52248867, 0x50df2060, 0x555fa16f, 0x7437dcb5, 0x78b74ae6, 0x5d223f38,
0x4d41ca3b, 0x2ba6b1ca, 0x2462c968, 0x6f427ce6, 0x70ccbd6c, 0x18520ced, 0x3abfaca8, 0x12500157
]
attr_accessor :data_stream, :sum, :byte_buffer, :shift, :window_size, :overflow, :file_buffer
def initialize(data_stream, window_size)
@data_stream = data_stream
@window_size = window_size
@shift = window_size % 32
@byte_buffer = []
@sum = 0
@file_buffer = StringIO.new
end
def each_hash
return enum_for(__method__) unless block_given?
while hash_byte
yield(sum, (data_stream.pos - window_size) + file_buffer.pos)
end
end
def hash_slice(len)
len.times{ hash_byte }
end
def hash_byte
byte = get_byte
return nil unless byte
bh = BYTE_HASH[byte]
byte_buffer << bh
self.sum = if byte_buffer.size > window_size
barrel_shift(self.sum, 1) ^ barrel_shift(byte_buffer.shift, shift) ^ bh
else
barrel_shift(self.sum, 1) ^ bh
end
end
def get_byte
# loading the bytes chunks at a time is way faster..
if file_buffer.eof?
data = data_stream.read(window_size)
if data
file_buffer.reopen(data)
else
return nil
end
end
file_buffer.getbyte
end
def barrel_shift(v, shift)
((v << shift) & 0xffffffff) | (v >> (32 - shift))
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment