-
-
Save bnagy/e56e2f00caf4e1bacea6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Data structure to store large, sparse bitmaps | |
# | |
# Author: Ben Nagy | |
# Copyright: Copyright (c) Ben Nagy, 2006-2012. | |
# License: The MIT License | |
# (See http://www.opensource.org/licenses/mit-license.php for details.) | |
require 'rubygems' | |
require 'delegate' | |
class VerySparseBitmap < DelegateClass( Hash ) | |
# For bitmaps where there is little clustering, it's pretty much as | |
# efficient as anything else to just have a plain array of keys | |
def initialize( enum=[] ) | |
@internal_hash={} | |
enum.each {|v| | |
add v | |
} | |
super @internal_hash | |
end | |
def pack | |
mykeys=self.keys.sort.pack('w*') | |
"#{mykeys.size},#{mykeys}" | |
end | |
def self.unpack( str ) | |
header,body=str.split(',',2) | |
keysize=header.to_i | |
raise ArgumentError, "Bad packed string" unless body.size==keysize | |
keystr=body[0,keysize] | |
newhsh={} | |
keystr.unpack('w*').each {|v| newhsh[v]=0} | |
s=self.new | |
s.instance_variable_get(:@internal_hash).replace( newhsh ) | |
s | |
end | |
def add( v ) | |
begin | |
v=Integer( v ) | |
rescue | |
raise ArgumentError, "#{self.class}: #{__method__}: Unable to parse #{v.inspect} as an int" | |
end | |
@internal_hash[v]=0 | |
end | |
alias :<< :add | |
def contains?( v ) | |
begin | |
v=Integer( v ) | |
rescue | |
raise ArgumentError, "#{self.class}: #{__method__}: Unable to parse #{v.inspect} as an int" | |
end | |
@internal_hash.has_key? v | |
end | |
def to_a | |
@internal_hash.keys | |
end | |
end | |
class SparseBitmap < DelegateClass( Hash ) | |
# for bitmaps where these is clustering, this class allows us to create | |
# 'minibitmaps' of configurable size, and then add values by flipping bits | |
# in the number stored for the given page. | |
def initialize( enum=[], slice_size=8 ) | |
@internal_hash={} | |
@bucketsize=0.size * slice_size | |
enum.each {|v| | |
add v | |
} | |
super @internal_hash | |
end | |
def pack | |
mykeys=self.keys.pack('w*') | |
myvals=self.values.pack('w*') | |
"#{mykeys.size},#{myvals.size}:#{mykeys},#{myvals}" | |
end | |
def self.unpack( str ) | |
header,body=str.force_encoding('ASCII-8BIT').split(':',2) | |
keysize,valsize=header.split(',').map(&:to_i) | |
raise ArgumentError, "Bad packed string" unless body[keysize]==',' | |
keystr=body[0,keysize] | |
valstr=body[keysize+1,valsize] | |
newkeys=keystr.unpack('w*') | |
newvals=valstr.unpack('w*') | |
s=self.new | |
s.instance_variable_get(:@internal_hash).replace( Hash[newkeys.zip(newvals)] ) | |
s | |
end | |
def add( v ) | |
begin | |
v=Integer( v ) | |
rescue | |
raise ArgumentError, "#{self.class}: #{__method__}: Unable to parse #{v.inspect} as an int" | |
end | |
bucket=v / @bucketsize | |
bit=v % @bucketsize | |
# set the nth bit in the correct bucket | |
@internal_hash[bucket]||=0 | |
@internal_hash[bucket] |=1 << bit | |
end | |
alias :<< :add | |
def contains?( v ) | |
begin | |
v=Integer( v ) | |
rescue | |
raise ArgumentError, "#{self.class}: #{__method__}: Unable to parse #{v.inspect} as an int" | |
end | |
bucket=v / @bucketsize | |
return false unless @internal_hash[bucket] | |
bit=v % @bucketsize | |
@internal_hash[bucket][bit]==1 | |
end | |
def to_a | |
@internal_hash.each_with_object([]) {|keyval,ary| | |
(0...@bucketsize).each {|bit_idx| | |
if keyval[1][bit_idx]==1 | |
ary << keyval[0]*64 + bit_idx | |
end | |
} | |
} | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment