Skip to content

@rklemme /BitSet.rb
Last active

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Basis for a bit set which uses String as storage. Storage is split up into multiple String instances to avoid too large allocations.
#!/usr/bin/ruby
# Implementation of a bit set which stores membershio in binary
# Strings managed in chunks.
class BitSet
ZERO = "\000".encode(Encoding::BINARY).freeze
DEFAULT_SEGMENT_SIZE = 1024 * 1024
# Create the BitSet instance. If max_num is given, we preallocate
# as much memory as needed.
#
# segment_size (optional): size of individual chunks, defaults to 1MB
# max_num (optional): highest number we want to store in the set
def initialize(segment_size = DEFAULT_SEGMENT_SIZE, max_num = nil)
@seg_size = segment_size
if max_num
# caller asks for preallocation
seg, off, idx = ix(max_num)
segment = nil
@dat = Array.new(seg + 1) do |si|
if si < seg
# leading segments
if segment
# all after first segment
segment.dup
else
# first segment: create and remember
segment = ZERO * @seg_size
end
else
# last segment, where max_num would be stored
ZERO * (off + 1)
end
end
else
# default case: all empty
@dat = []
end
end
def [](index)
seg, off, idx = ix(index)
d = @dat[seg] and
off < d.bytesize && (d.getbyte(off) & (1 << idx)) != 0
end
def []=(index, flag)
seg, off, idx = ix(index)
if flag
# set
d = dat(seg, off)
mask = 1 << idx
d.setbyte(off, d.getbyte(off) | mask)
else
# reset
d = @dat[seg]
if d && d.bytesize > off
mask = 1 << idx
d.setbyte(off, (d.getbyte(off) | mask) ^ mask)
end
end
end
def clear
# TODO: loosing preallocation here
@dat.clear
self
end
def empty?
@dat.all? {|seg| seg.nil? || /[^\000]/ !~ seg}
end
include Enumerable
def each
return enum_for(:each) unless block_given?
@dat.each_with_index do |dat, seg|
dat and dat.bytesize.times do |off|
s = dat.getbyte(off)
if s != 0
base = (seg * @seg_size + off) * 8
8.times do |idx|
yield base + idx if (s & (1 << idx)) != 0
end
end
end
end
self
end
private
def dat(segment, offset)
d = @dat[segment]
offset += 1
if d
add = offset - d.bytesize
d << (ZERO * add) if add > 0
d
else
@dat[segment] = ZERO * offset
end
end
def ix(idx)
raise "Negative index: #{idx.inspect}" if idx < 0
len, midx = idx.divmod 8
len.divmod(@seg_size) << midx
end
end
if $0 == __FILE__
require 'test/unit'
class BitSetTest < Test::Unit::TestCase
CHUNK = 1024
def setup
@bs = BitSet.new CHUNK
end
def test_init_empty
assert @bs.empty?, 'created is not empty'
refute @bs[0], 'created not empty'
assert_pattern 0, 0, 'created non empty'
seen = false
@bs.each {|*a| seen = true}
assert !seen, 'iteration on empty shows values'
end
def test_set
assert_pattern 0, 0
@bs[0] = 1
assert_pattern 0, 1, 'one bit set'
@bs[7] = 1
assert_pattern 0, 1 | 1 << 7, 'two bits set'
@bs[8] = 1
assert_pattern 1, 1, 'one bit set'
@bs[9] = true
assert_pattern 1, 3, 'two bits set'
end
def test_unset
assert_pattern 0, 0
refute @bs[0]
@bs[0] = true
assert @bs[0]
@bs[0] = false
refute @bs[0]
@bs[0] = true
assert @bs[0]
@bs[0] = nil
refute @bs[0]
@bs[88] = true
assert @bs[88]
@bs[88] = false
refute @bs[88]
end
def test_iter
r = Random.new 0
idx = Array.new(20) { r.rand(CHUNK / 2).tap {|i| @bs[i] = true} }
idx.each {|i| assert @bs[i], "iter failed at #{i}" }
idx2 = idx.dup
@bs.each {|i| assert idx2.delete(i), "visited wrong item #{i}"}
assert_empty idx2, 'not all visited'
end
def test_chunk
idx = CHUNK * 8
refute @bs[idx]
@bs[idx] = true
assert @bs[idx]
idx += 83
refute @bs[idx]
@bs[idx] = true
assert @bs[idx]
end
def test_large
r = Random.new 0
idx = Array.new(20) { r.rand(CHUNK * 32).tap {|i| @bs[i] = true} }
idx.each {|i| assert @bs[i], "iter failed at #{i}" }
end
def test_prealloc
@bs = BitSet.new CHUNK, CHUNK * 3 + 47
assert @bs.empty?, 'created is not empty'
test_init_empty
end
def assert_pattern offset, value, msg = nil
8.times do |i|
send(value[i] == 1 ? :assert : :refute, @bs[(offset << 3) + i], msg)
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.