Skip to content

Instantly share code, notes, and snippets.

@bnagy
Created September 11, 2012 06:14
Show Gist options
  • Save bnagy/e56e2f00caf4e1bacea6 to your computer and use it in GitHub Desktop.
Save bnagy/e56e2f00caf4e1bacea6 to your computer and use it in GitHub Desktop.
# Data structure to store large, sparse bitmaps
#
# Author: Ben Nagy
# Copyright: Copyright (c) Ben Nagy, 2006-2012.
# License: The MIT License
# (See http://www.opensource.org/licenses/mit-license.php for details.)
require 'rubygems'
require 'delegate'
class VerySparseBitmap < DelegateClass( Hash )
# For bitmaps where there is little clustering, it's pretty much as
# efficient as anything else to just have a plain array of keys
def initialize( enum=[] )
@internal_hash={}
enum.each {|v|
add v
}
super @internal_hash
end
def pack
mykeys=self.keys.sort.pack('w*')
"#{mykeys.size},#{mykeys}"
end
def self.unpack( str )
header,body=str.split(',',2)
keysize=header.to_i
raise ArgumentError, "Bad packed string" unless body.size==keysize
keystr=body[0,keysize]
newhsh={}
keystr.unpack('w*').each {|v| newhsh[v]=0}
s=self.new
s.instance_variable_get(:@internal_hash).replace( newhsh )
s
end
def add( v )
begin
v=Integer( v )
rescue
raise ArgumentError, "#{self.class}: #{__method__}: Unable to parse #{v.inspect} as an int"
end
@internal_hash[v]=0
end
alias :<< :add
def contains?( v )
begin
v=Integer( v )
rescue
raise ArgumentError, "#{self.class}: #{__method__}: Unable to parse #{v.inspect} as an int"
end
@internal_hash.has_key? v
end
def to_a
@internal_hash.keys
end
end
class SparseBitmap < DelegateClass( Hash )
# for bitmaps where these is clustering, this class allows us to create
# 'minibitmaps' of configurable size, and then add values by flipping bits
# in the number stored for the given page.
def initialize( enum=[], slice_size=8 )
@internal_hash={}
@bucketsize=0.size * slice_size
enum.each {|v|
add v
}
super @internal_hash
end
def pack
mykeys=self.keys.pack('w*')
myvals=self.values.pack('w*')
"#{mykeys.size},#{myvals.size}:#{mykeys},#{myvals}"
end
def self.unpack( str )
header,body=str.force_encoding('ASCII-8BIT').split(':',2)
keysize,valsize=header.split(',').map(&:to_i)
raise ArgumentError, "Bad packed string" unless body[keysize]==','
keystr=body[0,keysize]
valstr=body[keysize+1,valsize]
newkeys=keystr.unpack('w*')
newvals=valstr.unpack('w*')
s=self.new
s.instance_variable_get(:@internal_hash).replace( Hash[newkeys.zip(newvals)] )
s
end
def add( v )
begin
v=Integer( v )
rescue
raise ArgumentError, "#{self.class}: #{__method__}: Unable to parse #{v.inspect} as an int"
end
bucket=v / @bucketsize
bit=v % @bucketsize
# set the nth bit in the correct bucket
@internal_hash[bucket]||=0
@internal_hash[bucket] |=1 << bit
end
alias :<< :add
def contains?( v )
begin
v=Integer( v )
rescue
raise ArgumentError, "#{self.class}: #{__method__}: Unable to parse #{v.inspect} as an int"
end
bucket=v / @bucketsize
return false unless @internal_hash[bucket]
bit=v % @bucketsize
@internal_hash[bucket][bit]==1
end
def to_a
@internal_hash.each_with_object([]) {|keyval,ary|
(0...@bucketsize).each {|bit_idx|
if keyval[1][bit_idx]==1
ary << keyval[0]*64 + bit_idx
end
}
}
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment