Skip to content

Instantly share code, notes, and snippets.

@jogonba2
Created April 2, 2015 14:03
Show Gist options
  • Save jogonba2/88dcd37fae56645d43d5 to your computer and use it in GitHub Desktop.
Save jogonba2/88dcd37fae56645d43d5 to your computer and use it in GitHub Desktop.
Posting lists compression.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
def get_gaps_list(posting_lists): return [posting_lists[0]]+[posting_lists[i]-posting_lists[i-1] for i in xrange(1,len(posting_lists))]
def vb_encode_number(number):
bytex = []
while True:
bytex.insert(0,number%128);
if number<128: break
else: number /= 128
bytex[len(bytex)-1] |= 0b10000000
return bytex
def vb_encode(numbers): return [vb_encode_number(number) for number in numbers]
def vb_decode_number(number_list):
number,n = 0,0
for x in xrange(len(number_list)):
if number_list[x]<128: n = 128 * n + number_list[x]
else: number = n = 128 * n + (number_list[x]-128) ; n=0
return number
def vb_decode(numbers_encoded): return [vb_decode_number(number) for number in numbers_encoded]
if __name__ == "__main__":
posting_lists = [824,829,215406]
print "Posting lists: " , posting_lists
gap_list = get_gaps_list(posting_lists)
print "Gap list: " ,gap_list
numbers_encoded = vb_encode(gap_list)
print "Gaps encoded using less bytes for each number: ",numbers_encoded
numbers_decoded = vb_decode(numbers_encoded)
print "Gaps decoded: " , numbers_decoded
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment