Skip to content

Instantly share code, notes, and snippets.

@ven-kyoshiro
Created December 4, 2018 01:20
Show Gist options
  • Save ven-kyoshiro/9980ef8c4f949a0b43d8e1bc0a93c59b to your computer and use it in GitHub Desktop.
Save ven-kyoshiro/9980ef8c4f949a0b43d8e1bc0a93c59b to your computer and use it in GitHub Desktop.
tunstall.py
import requests
import os
import pandas as pd
import collections
import pickle
def notify(message = 'done'):
pass
def tunstall_code(seq,n):
dict_size = 2**n
N = len(seq) - seq.count('-')
leaf = [[N,'']]
all_chr = collections.Counter(seq).keys()-set('-')
old_leaf = []
while True:
s_leaf = sorted(leaf,reverse=True)
for c in all_chr:
count = seq.count(s_leaf[0][1]+c)
if count:
s_leaf.append([count,s_leaf[0][1]+c])
if len(leaf)+len(old_leaf) > dict_size:
break
old_leaf.append(s_leaf[0])
leaf = s_leaf[1:]
leaf = leaf + old_leaf[1:]
return sorted([[len(leaf[i][1]),leaf[i][0],leaf[i][1]] for i in range(len(leaf))],reverse=True)
def main():
df = pd.read_csv('dataset.csv', index_col=0)
seq1 = df.array.values.tolist()
record = {'j':[],'enc':[],'codebook':[]}
for j in [5,6,7,8,9,10,11,12,13,14,15,16,17,18]:
seqx = '-'.join(seq1)
codebook = tunstall_code(seqx,j)
decodebook = {}
for i,c in enumerate(codebook):
seqx = seqx.replace(c[2],chr(12354+i))
decodebook[chr(12354+i)]=c[2]
record['j'].append(j)
record['enc'].append(len(seqx))
record['codebook'].append(codebook)
notify(str(record['j'])+' is done. \n score:'+str(len(seqx)))
with open('vf_record.pickle',mode='wb') as f:
pickle.dump(record,f)
if __name__ == '__main__':
try:
main()
except Exception as e:
notify('!!!! Error !!!!\n'+str(e)+'\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment