Skip to content

Instantly share code, notes, and snippets.

@choro3
Created May 5, 2013 13:49
Show Gist options
  • Save choro3/5520889 to your computer and use it in GitHub Desktop.
Save choro3/5520889 to your computer and use it in GitHub Desktop.
高速文字列解析の世界 p42 完備辞書(ビットが密な場合) ほんとに正しく実装出来てる?
# coding: utf-8
# 完備辞書 (p.42
# 密なばあい
from math import log, ceil
class FID (object):
def __init__(self, seq=long(0), N=65536):
# メモ: Nは2^(2^n)じゃないと綺麗に分割出来ない
self.seq = seq
self.N = N
# construct
self.large_block_size = int(ceil(log(self.N**2, 2)))
self.small_block_size = int(ceil(log(self.N, 2) / 2))
self.large_block = [0] * (self.N / self.large_block_size)
self.small_block = [0] * (self.N / self.small_block_size)
self.bit_block = [0] * (self.N / self.small_block_size)
self.remain = [0] * (1 << self.small_block_size)
print self.N, self.large_block_size, self.small_block_size
c = 0
for i in xrange(self.N):
if i % self.large_block_size == 0:
self.large_block[i / self.large_block_size] = c
if i % self.small_block_size == 0:
self.small_block[i / self.small_block_size] = c - self.large_block[i / self.large_block_size]
if seq & (1 << i): c += 1
for i in xrange(1 << self.small_block_size):
self.remain[i] = self.popcount(i, self.small_block_size)
for i in xrange(self.N / self.small_block_size):
self.bit_block[i] = (self.seq >> (self.small_block_size * i)) & ((1 << self.small_block_size) - 1)
def access(self, idx):
return self.seq & (1 << idx)
def rank(self, idx, bit=1):
return self.large_block[idx / self.large_block_size] \
+ self.small_block[idx / self.small_block_size] \
+ self.remain[self.bit_block[idx / self.small_block_size] & \
((1 << (idx % self.small_block_size + 1)) - 1)]
def select(self, idx, bit=1):
h, l = self.N, 0
while h - l > 1:
if self.rank((h + l) / 2) > idx:
h = (h + l) / 2
else:
l = (h + l) / 2
return l if self.rank(l) == idx else h
def popcount(self, n, size=1024):
return sum(1 if n & (1 << i) else 0 for i in xrange(size))
def __unicode__(self):
bits = ''
for i in xrange(self.N):
if (1 << i) > self.seq: break
bits += '1' if self.seq & (1 << i) else '0'
return u'[%s]' % bits
def __str__(self):
return str(unicode(self))
def main():
d = FID(seq=3849379843892490792930789028394729584907237592172644986509458129845738275392012980384)
for i in xrange(300):
print 'rank(%d) = %d' % (i, d.rank(i))
print d
print d.select(10)
# print d.large_block
# print d.small_block
# print d.remain
# print d.bit_block
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment