Skip to content

Instantly share code, notes, and snippets.

@utahta
Created December 19, 2010 17:11
Show Gist options
  • Save utahta/747487 to your computer and use it in GitHub Desktop.
Save utahta/747487 to your computer and use it in GitHub Desktop.
huge data book chapter 6. vb decode.
#!/usr/bin/env python
import sys
from struct import unpack
from vb import vb_decode
if len(sys.argv) < 2:
print "usage: %s in.txt > out.txt" % sys.argv[0]
sys.exit(1)
fp = open(sys.argv[1], 'rb')
while True:
bytes = fp.read(8)
if not bytes:
break
(tag_len, id_list_len) = unpack('2i', bytes)
tag = fp.read(tag_len)
id_list = []
pre = 0
for id in vb_decode(fp.read(id_list_len)):
id_list.append('%s' % (id + pre))
pre = id + pre
sys.stdout.write('%s\t%s\n' % (tag, ','.join(id_list)))
fp.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment