Created
October 11, 2012 09:28
-
-
Save twpayne/3871227 to your computer and use it in GitHub Desktop.
Burrows-Wheeler forward and reverse transformations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bisect import bisect_left | |
from itertools import izip | |
def bw(s): | |
"""Forward Burrows-Wheeler transform""" | |
rotations = (s[i:] + s[:i] for i in xrange(len(s))) | |
sorted_rotations = sorted(rotations) | |
last_column = ''.join(sr[-1] for sr in sorted_rotations) | |
index = bisect_left(sorted_rotations, s) | |
return (last_column, index) | |
def bw2(s): | |
"""Forward Burrows-Wheeler transform avoiding trading CPU for memory""" | |
def compare_rotations(i, j): | |
return cmp(s[i:] + s[:i], s[j:] + s[:j]) | |
def compare_rotations2(i, j): | |
# Common prefix optimization to avoid string concatenation | |
n = len(n) - max(i, j) - 1 | |
result = cmp(s[i:i + n], s[j:j + n]) | |
if result == 0: | |
result = cmp(s[i + n:] + s[:i], s[j + n:] + s[:j]) | |
return result | |
sorted_rotation_indexes = sorted(xrange(len(s)), cmp=compare_rotations) | |
return sorted_rotations | |
def rbw(last_column, index): | |
"""Reverse Burrows-Wheeler transform""" | |
sorted_rotations = sorted(last_column) | |
for i in xrange(len(last_column) - 1): | |
rotations = (a + b for a, b in izip(last_column, sorted_rotations)) | |
sorted_rotations = sorted(rotations) | |
return sorted_rotations[index] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment