Skip to content

Instantly share code, notes, and snippets.

@reedacartwright
Created March 31, 2019 01:52
Show Gist options
  • Save reedacartwright/c3330ccc498f92adb4ca1530761d73ad to your computer and use it in GitHub Desktop.
Save reedacartwright/c3330ccc498f92adb4ca1530761d73ad to your computer and use it in GitHub Desktop.
Align DNA sequences using an amino acid alignment
#!/usr/bin/Rscript --vanilla
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(Biostrings))
G = BString("-")
main = function(infile, dnafile, outfile) {
ARGS = commandArgs(trailingOnly=TRUE)
if(missing(infile)) {
infile = ARGS[1]
}
if(missing(dnafile)) {
dnafile = ARGS[2]
}
if(missing(outfile)) {
outfile = ARGS[3]
}
aa = readAAStringSet(infile);
dna = readBStringSet(dnafile)
dna = dna[names(aa)]
m = str_split(as.character(aa), '')
names(m) = names(aa)
# identify the location of the gaps in the alignment
g = lapply(m, function(x) { IRanges(x == '-')})
g = IRangesList(g)
# translate them to DNA positions
s = 1L+(start(g)-1L)*3L
w = width(g)*3L
for(x in seq_along(dna)) {
for(y in seq_along(s[[x]])) {
subseq(dna[[x]],start=s[[x]][y],width=0) = rep(G,w[[x]][y])
}
}
writeXStringSet(dna,outfile,width=60)
}
if(!interactive()) {
main()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment