Create a gist now

Instantly share code, notes, and snippets.

@rklemme /BitSet.rb
Last active Oct 5, 2015

Basis for a bit set which uses String as storage. Storage is split up into multiple String instances to avoid too large allocations.
#!/usr/bin/ruby
# Implementation of a bit set which stores membershio in binary
# Strings managed in chunks.
class BitSet
ZERO = "\000".encode(Encoding::BINARY).freeze
DEFAULT_SEGMENT_SIZE = 1024 * 1024
# Create the BitSet instance. If max_num is given, we preallocate
# as much memory as needed.
#
# segment_size (optional): size of individual chunks, defaults to 1MB
# max_num (optional): highest number we want to store in the set
def initialize(segment_size = DEFAULT_SEGMENT_SIZE, max_num = nil)
@seg_size = segment_size
if max_num
# caller asks for preallocation
seg, off, idx = ix(max_num)
segment = nil
@dat = Array.new(seg + 1) do |si|
if si < seg
# leading segments
if segment
# all after first segment
segment.dup
else
# first segment: create and remember
segment = ZERO * @seg_size
end
else
# last segment, where max_num would be stored
ZERO * (off + 1)
end
end
else
# default case: all empty
@dat = []
end
end
def [](index)
seg, off, idx = ix(index)
d = @dat[seg] and
off < d.bytesize && (d.getbyte(off) & (1 << idx)) != 0
end
def []=(index, flag)
seg, off, idx = ix(index)
if flag
# set
d = dat(seg, off)
mask = 1 << idx
d.setbyte(off, d.getbyte(off) | mask)
else
# reset
d = @dat[seg]
if d && d.bytesize > off
mask = 1 << idx
d.setbyte(off, (d.getbyte(off) | mask) ^ mask)
end
end
end
def clear
# TODO: loosing preallocation here
@dat.clear
self
end
def empty?
@dat.all? {|seg| seg.nil? || /[^\000]/ !~ seg}
end
include Enumerable
def each
return enum_for(:each) unless block_given?
@dat.each_with_index do |dat, seg|
dat and dat.bytesize.times do |off|
s = dat.getbyte(off)
if s != 0
base = (seg * @seg_size + off) * 8
8.times do |idx|
yield base + idx if (s & (1 << idx)) != 0
end
end
end
end
self
end
private
def dat(segment, offset)
d = @dat[segment]
offset += 1
if d
add = offset - d.bytesize
d << (ZERO * add) if add > 0
d
else
@dat[segment] = ZERO * offset
end
end
def ix(idx)
raise "Negative index: #{idx.inspect}" if idx < 0
len, midx = idx.divmod 8
len.divmod(@seg_size) << midx
end
end
if $0 == __FILE__
require 'test/unit'
class BitSetTest < Test::Unit::TestCase
CHUNK = 1024
def setup
@bs = BitSet.new CHUNK
end
def test_init_empty
assert @bs.empty?, 'created is not empty'
refute @bs[0], 'created not empty'
assert_pattern 0, 0, 'created non empty'
seen = false
@bs.each {|*a| seen = true}
assert !seen, 'iteration on empty shows values'
end
def test_set
assert_pattern 0, 0
@bs[0] = 1
assert_pattern 0, 1, 'one bit set'
@bs[7] = 1
assert_pattern 0, 1 | 1 << 7, 'two bits set'
@bs[8] = 1
assert_pattern 1, 1, 'one bit set'
@bs[9] = true
assert_pattern 1, 3, 'two bits set'
end
def test_unset
assert_pattern 0, 0
refute @bs[0]
@bs[0] = true
assert @bs[0]
@bs[0] = false
refute @bs[0]
@bs[0] = true
assert @bs[0]
@bs[0] = nil
refute @bs[0]
@bs[88] = true
assert @bs[88]
@bs[88] = false
refute @bs[88]
end
def test_iter
r = Random.new 0
idx = Array.new(20) { r.rand(CHUNK / 2).tap {|i| @bs[i] = true} }
idx.each {|i| assert @bs[i], "iter failed at #{i}" }
idx2 = idx.dup
@bs.each {|i| assert idx2.delete(i), "visited wrong item #{i}"}
assert_empty idx2, 'not all visited'
end
def test_chunk
idx = CHUNK * 8
refute @bs[idx]
@bs[idx] = true
assert @bs[idx]
idx += 83
refute @bs[idx]
@bs[idx] = true
assert @bs[idx]
end
def test_large
r = Random.new 0
idx = Array.new(20) { r.rand(CHUNK * 32).tap {|i| @bs[i] = true} }
idx.each {|i| assert @bs[i], "iter failed at #{i}" }
end
def test_prealloc
@bs = BitSet.new CHUNK, CHUNK * 3 + 47
assert @bs.empty?, 'created is not empty'
test_init_empty
end
def assert_pattern offset, value, msg = nil
8.times do |i|
send(value[i] == 1 ? :assert : :refute, @bs[(offset << 3) + i], msg)
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment