Skip to content

Instantly share code, notes, and snippets.

@glickmac
Created September 9, 2022 15:00
Show Gist options
  • Save glickmac/385a16e7de92c7cbe793c699b85a3bf6 to your computer and use it in GitHub Desktop.
Save glickmac/385a16e7de92c7cbe793c699b85a3bf6 to your computer and use it in GitHub Desktop.
def suffix_array(string):
return(list(sorted(range(len(string)), key=lambda i:string[i:])))
def bwt_from_suffix(string, s_array=None):
if s_array is None:
s_array = suffix_array(string)
return("".join(string[idx - 1] for idx in s_array))
def lf_mapping(bwt, letters=None):
if letters is None:
letters = set(bwt)
result = {letter:[0] for letter in letters}
result[bwt[0]] = [1]
for letter in bwt[1:]:
for i, j in result.items():
j.append(j[-1] + (i == letter))
return(result)
from collections import Counter
def count_occurences(string, letters=None):
count = 0
result = {}
if letters is None:
letters = set(s)
c = Counter(string)
for letter in sorted(letters):
result[letter] = count
count += c[letter]
return result
def update(begin, end, letter, lf_map, counts, string_length):
beginning = counts[letter] + lf_map[letter][begin - 1] + 1
ending = counts[letter] + lf_map[letter][end]
return(beginning,ending)
def generate_all(input_string, s_array=None, eos="$"):
letters = set(input_string)
try:
assert eos not in letters
counts = count_occurences(input_string, letters)
input_string = "".join([input_string, eos])
if s_array is None:
s_array = suffix_array(input_string)
bwt = bwt_from_suffix(input_string, s_array)
lf_map = lf_mapping(bwt, letters | set([eos]))
for i, j in lf_map.items():
j.extend([j[-1], 0])
return letters, bwt, lf_map, counts, s_array
except:
print("End of string character found in text, deleted EOS from input string")
input_string = input_string.replace(eos, "")
letters = set(input_string)
counts = count_occurences(input_string, letters)
input_string = "".join([input_string, eos])
if s_array is None:
s_array = suffix_array(input_string)
bwt = bwt_from_suffix(input_string, s_array)
lf_map = lf_mapping(bwt, letters | set([eos]))
for i, j in lf_map.items():
j.extend([j[-1], 0])
return letters, bwt, lf_map, counts, s_array
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment