Skip to content

Instantly share code, notes, and snippets.

@smrmkt
Last active August 29, 2015 14:12
Show Gist options
  • Save smrmkt/fac2201de9b515bc6b37 to your computer and use it in GitHub Desktop.
Save smrmkt/fac2201de9b515bc6b37 to your computer and use it in GitHub Desktop.
Burrows Wheeler Transform
#!/usr/bin/env python
#-*-coding:utf-8-*-
import argparse
# args
parser = argparse.ArgumentParser()
parser.add_argument('text')
def bwt(t):
# ソート済みの接尾辞配列を作成
# 文字列の終端を表す文字として$を追加しておく
t += '$'
sa = sorted([t[i:] for i in range(len(t))])
# BW変換を行った結果を返す
# 接尾辞が文字列全体と等しい場合は,文字列の最後の文字を使う
return ''.join([t[-len(s)-1] if len(s) < len(t) else t[-1] for s in sa])
def inv_bwt(t):
# 終端文字の位置を取得
p = t.index('$')
# 各文字の出現回数をカウント
c = {s: t.count(s) for s in set(list(t))}
# 辞書順に並べたときの各文字より前にある文字の合計個数を算出
# LFマッピングの右辺第2項を出すのに必要
total = 0
for k in sorted(c):
cur = c[k]
c[k] = total
total += cur
# LFのマッピングを行う
lf_mapping = [t[:i+1].count(t[i])+c[t[i]]-1 for i in range(len(t))]
# 終端文字から逆順にたどる
ret = ''
for i in range(len(t)-1):
ret = t[lf_mapping[p]]+ret
p = lf_mapping[p]
return ret
if __name__ == '__main__':
text = parser.parse_args().text
l = bwt(text)
print l
print inv_bwt(l)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment