Skip to content

Instantly share code, notes, and snippets.

@ddrone
Created June 22, 2012 19:59
Show Gist options
  • Save ddrone/2974812 to your computer and use it in GitHub Desktop.
Save ddrone/2974812 to your computer and use it in GitHub Desktop.
Implementation of suffix array construction in O(n log^2 n)
# This code is in public domain.
# Written by Andrew Shulayev, 2012
# Implementation of Karp-Miller-Rosenberg algorithm
# Builds suffix array of a string with length n
# in O(n log^2 n) time
def count_classes(arr, key = lambda x : x):
result = []
for i, x in enumerate(arr):
if i == 0:
result.append(0)
continue
next = result[-1]
if key(arr[i - 1]) != key(x):
next += 1
result.append(next)
return result
def suffix_array(string):
n = len(string)
result = range(n)
key_string = lambda x : string[x]
result.sort(key = key_string)
classes = count_classes(result, key = key_string)
k = 1
while k < n:
tuples = [(classes[i], classes[(i + k) % n], x) for i, x in enumerate(result)]
tuples.sort()
classes = count_classes(tuples, key = lambda t : t[:2])
result = [t[2] for t in tuples]
k *= 2
return result
print suffix_array('abacaba$')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment