Skip to content

Instantly share code, notes, and snippets.

@HaraldKorneliussen
Last active October 13, 2022 14:59
Show Gist options
  • Save HaraldKorneliussen/2bf20ca4f4f28c1aaa917b146cf39d84 to your computer and use it in GitHub Desktop.
Save HaraldKorneliussen/2bf20ca4f4f28c1aaa917b146cf39d84 to your computer and use it in GitHub Desktop.
A naive python implementation of David A. Scott's bijective Burrows-Wheeler transform + inverse
#!/usr/bin/python
# Factor a string into nonincreasing Lyndon words,
# using Duval's algorithm
def lyndonFactor(s):
k = 0
ret = []
while k < len(s):
i = k
j = i + 1
while j < len(s) and s[i] <= s[j]:
if s[i] == s[j]:
i += 1
else:
i = k
j += 1
oldk = k
k = k + j - i
ret.append(s[oldk:k])
return ret
# A special string comparison order must be used in the bijective BWTS:
# infinite lexicographical order. It's like regular lexorder, but on the
# infinite repeated concatenation of the words.
# To do this in python's sorting key scheme, extend all strings to
# sufficient length to compare them.
def infLexOrderKey(a,l):
while len(a) < l:
a = a * 2
if len(a) > l:
a = a[0:l]
return a
# Get a list of all the rotations of the Lyndon factors of s
def lyndonConjugates(s):
factors = lyndonFactor(s)
ret = []
for factor in factors:
for idx, _ in enumerate(factor):
ret.append(factor[-idx:] + factor[:-idx])
return ret
# The "suffix standard permutation" induced by a word s
# as I understand it, that's the same as the inverse suffix array
def standardPermutation(s):
return [x[0] for x in sorted(enumerate(s), key=lambda p: p[1])]
# Decompose a permutation into a list of cycles.
def findCycles(p):
allCycles = []
for i in range(len(p)):
currentIndex = i
currentCycle = []
while p[currentIndex] != -1:
newIndex = p[currentIndex] # Start from the second element in cycle, to ease decoding
currentCycle.append(newIndex)
p[currentIndex] = -1
currentIndex = newIndex
if len(currentCycle) > 0:
allCycles.append(currentCycle)
return allCycles[::-1] # Reverse list of cycles to ease decoding
# David A. Scott's Bijective Burrows-Wheeler transform:
# take all the rotations of the lyndon factors,
# sort them in inflex order, and take the last character of each.
def bwts(s):
conjs = lyndonConjugates(s)
maxlen = len(s) # lcm of all the lengths suffices, but this should be safe.
return ''.join( [f[-1] for f in sorted(lyndonConjugates(s), key=lambda a: infLexOrderKey(a, maxlen))])
# The inverse: take the standard permutation induced by s,
# find all cycles in it, label them according to s, and append.
def ibwts(s):
cycles = findCycles(standardPermutation(s))
return ''.join([''.join(map(lambda i: s[i], cycle)) for cycle in cycles])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment