Last active

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

Basis for a bit set which uses String as storage. Storage is split up into multiple String instances to avoid too large allocations.

View BitSet.rb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
#!/usr/bin/ruby
 
# Implementation of a bit set which stores membershio in binary
# Strings managed in chunks.
class BitSet
ZERO = "\000".encode(Encoding::BINARY).freeze
DEFAULT_SEGMENT_SIZE = 1024 * 1024
 
# Create the BitSet instance. If max_num is given, we preallocate
# as much memory as needed.
#
# segment_size (optional): size of individual chunks, defaults to 1MB
# max_num (optional): highest number we want to store in the set
def initialize(segment_size = DEFAULT_SEGMENT_SIZE, max_num = nil)
@seg_size = segment_size
 
if max_num
# caller asks for preallocation
seg, off, idx = ix(max_num)
segment = nil
 
@dat = Array.new(seg + 1) do |si|
if si < seg
# leading segments
if segment
# all after first segment
segment.dup
else
# first segment: create and remember
segment = ZERO * @seg_size
end
else
# last segment, where max_num would be stored
ZERO * (off + 1)
end
end
else
# default case: all empty
@dat = []
end
end
 
def [](index)
seg, off, idx = ix(index)
d = @dat[seg] and
off < d.bytesize && (d.getbyte(off) & (1 << idx)) != 0
end
 
def []=(index, flag)
seg, off, idx = ix(index)
if flag
# set
d = dat(seg, off)
mask = 1 << idx
d.setbyte(off, d.getbyte(off) | mask)
else
# reset
d = @dat[seg]
if d && d.bytesize > off
mask = 1 << idx
d.setbyte(off, (d.getbyte(off) | mask) ^ mask)
end
end
end
 
def clear
# TODO: loosing preallocation here
@dat.clear
self
end
def empty?
@dat.all? {|seg| seg.nil? || /[^\000]/ !~ seg}
end
include Enumerable
 
def each
return enum_for(:each) unless block_given?
 
@dat.each_with_index do |dat, seg|
dat and dat.bytesize.times do |off|
s = dat.getbyte(off)
 
if s != 0
base = (seg * @seg_size + off) * 8
8.times do |idx|
yield base + idx if (s & (1 << idx)) != 0
end
end
end
end
 
self
end
 
private
def dat(segment, offset)
d = @dat[segment]
offset += 1
 
if d
add = offset - d.bytesize
d << (ZERO * add) if add > 0
d
else
@dat[segment] = ZERO * offset
end
end
 
def ix(idx)
raise "Negative index: #{idx.inspect}" if idx < 0
len, midx = idx.divmod 8
len.divmod(@seg_size) << midx
end
end
 
if $0 == __FILE__
require 'test/unit'
 
class BitSetTest < Test::Unit::TestCase
CHUNK = 1024
 
def setup
@bs = BitSet.new CHUNK
end
 
def test_init_empty
assert @bs.empty?, 'created is not empty'
refute @bs[0], 'created not empty'
assert_pattern 0, 0, 'created non empty'
seen = false
@bs.each {|*a| seen = true}
assert !seen, 'iteration on empty shows values'
end
 
def test_set
assert_pattern 0, 0
@bs[0] = 1
assert_pattern 0, 1, 'one bit set'
@bs[7] = 1
assert_pattern 0, 1 | 1 << 7, 'two bits set'
@bs[8] = 1
assert_pattern 1, 1, 'one bit set'
@bs[9] = true
assert_pattern 1, 3, 'two bits set'
end
 
def test_unset
assert_pattern 0, 0
 
refute @bs[0]
 
@bs[0] = true
assert @bs[0]
@bs[0] = false
refute @bs[0]
 
@bs[0] = true
assert @bs[0]
@bs[0] = nil
refute @bs[0]
 
@bs[88] = true
assert @bs[88]
@bs[88] = false
refute @bs[88]
end
 
def test_iter
r = Random.new 0
idx = Array.new(20) { r.rand(CHUNK / 2).tap {|i| @bs[i] = true} }
idx.each {|i| assert @bs[i], "iter failed at #{i}" }
idx2 = idx.dup
@bs.each {|i| assert idx2.delete(i), "visited wrong item #{i}"}
assert_empty idx2, 'not all visited'
end
 
def test_chunk
idx = CHUNK * 8
refute @bs[idx]
@bs[idx] = true
assert @bs[idx]
 
idx += 83
refute @bs[idx]
@bs[idx] = true
assert @bs[idx]
end
 
def test_large
r = Random.new 0
idx = Array.new(20) { r.rand(CHUNK * 32).tap {|i| @bs[i] = true} }
idx.each {|i| assert @bs[i], "iter failed at #{i}" }
end
 
def test_prealloc
@bs = BitSet.new CHUNK, CHUNK * 3 + 47
assert @bs.empty?, 'created is not empty'
test_init_empty
end
 
def assert_pattern offset, value, msg = nil
8.times do |i|
send(value[i] == 1 ? :assert : :refute, @bs[(offset << 3) + i], msg)
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.