Skip to content

Instantly share code, notes, and snippets.

@aaronwolen
Created July 18, 2014 16:43
Show Gist options
  • Save aaronwolen/cbdd74180a714267bf0c to your computer and use it in GitHub Desktop.
Save aaronwolen/cbdd74180a714267bf0c to your computer and use it in GitHub Desktop.
A simple R implementation of the Burrows-Wheeler transformation based on Wikipedia's python example.
# Burrows-Wheeler transformation
#
# Transform:
# out <- bwt('SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES')
#
# Reverse:
# ibwt(out)
bwt <- function(x, eof = "!") {
if (grepl(eof, "[[:cntrl:]]")) stop("eof can't be a RegEx control character")
if (grepl(eof, x)) stop("x can't contain eof character")
x <- paste0(x, eof)
n <- nchar(x)
# Take first character and add to the end
rotate <- function(x) paste0(substring(x, 2), substring(x, 1, 1))
# Create table of all possible rotations
tbl <- c(x, vector("character", n - 1))
for(i in 2:n) tbl[i] <- rotate(tbl[i - 1])
# Sort rows alphabetically
tbl <- sort(tbl)
# Return last column of the table
out <- sapply(tbl, substring, first = n, USE.NAMES = FALSE)
paste(out, collapse = "")
}
ibwt <- function(x, eof = "!") {
if (!grepl(eof, x)) stop("x doesn't contain eof character")
tbl <- x <- strsplit(x, "")[[1]]
while (nchar(tbl[1]) < length(x)) {
sorted <- sort(tbl)
tbl <- apply(cbind(x, sorted), 1, paste, collapse = "")
}
sub(eof, "", tbl[grepl(paste0(eof, "$"), tbl)])
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment