Skip to content

Instantly share code, notes, and snippets.

@wadoon
Created July 27, 2013 00:32
Show Gist options
  • Save wadoon/6093128 to your computer and use it in GitHub Desktop.
Save wadoon/6093128 to your computer and use it in GitHub Desktop.
Sequentiell Algorithms. * Suffix Array * Bowler-Wheeler-Transformation * LCP (3 ways)
import sys,math
s = sys.argv[1] + ' '
def sfx(text):
for i in range(len(text)):
yield text[i:]
def sa(ary , l):
for suffix in ary:
yield l.index(suffix)
from itertools import cycle
def bwt(text):
def next():
a = t.pop(0)
t.append(a)
l = []
t = list(text)
for i in range(len(text)):
l.append(''.join(t))
next()
l = sorted(l)
last = lambda s: s[-1]
return ''.join(map(last, l))
def lcp1(SA, T):
lcp = [0] * len(T)
for i in range(2,len(SA)):
l = 0
while T[SA[i]+l] == T [SA[i-1]+l]:
l += 1
lcp[i] = l
return lcp
def rank(SA):
rank = [0] * len(SA)
for i,s in enumerate(SA):
rank[s] = i
return tuple(rank)
def lcp2(SA,T):
SAinv = rank(SA)
lcp = [0] * len(T)
l = 0
for i in range(len(T)):
if SAinv[i] != 0:
#print i, SAinv[i]-1, SA[SAinv[i]-1]
#print T[i+l:]
#print T[SA[SAinv[i]-1]+l:]
while T[i+l] == T[SA[SAinv[i]-1]+l]:
l += 1
#print l
lcp[ SAinv[i] ] = l
l = max(l-1,0)
return lcp
def lcp3(SA,T):
phi = [0] * len(T)
SAinv = rank(SA)
lcp = [0] * len(T)
l = 0
for i in range(len(phi) - 1):
#phi[SA[i]] = SA[i+1]
phi[i] = SA[SAinv[i]-1]
plcp = [0]*len(T)
for i in range(0,len(T)):
while T[i+l] == T[phi[i]+l]:
l += 1
plcp[i] = l
l = max(0, l - 1)
for i in range(len(T)):
lcp[ i ] = plcp[ SA[i] ]
#lcp[ SAinv[i] ] = plcp[i]
return lcp
a = list( sfx(s) )
SA = list(sa(sorted(a),a))
T = s
print "BWT: ", bwt(s)
print "SA: ", SA
print "~SA: ", rank(SA)
print "LCP: ", lcp1(SA,T)
print "LCP: ", lcp2(SA,T)
print "LCP: ", lcp3(SA,T)
print sorted(a)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment