Skip to content

Instantly share code, notes, and snippets.

@wleepang
Created April 13, 2012 15:44
CodonOptOutputCompiler:R
# pile up sequences on multiple xlsx sheets from the codon optimizer and export
# as a single list of sequences on a single sheet
# as a fasta file of all sequence for condon finder
CodonOptOutputCompiler = function(xlsFile, outPrefix) {
require(xlsx) # for reading xlsx files
require(seqinr) # for fasta file IO
wbi = loadWorkbook(xlsFile)
sheets = getSheets(wbi)
# extract only sheets that have the right name format
sheets = sheets[grepl('(DNA|protein)', names(sheets))]
seq = vector('list', length=length(sheets))
for (i in 1:length(sheets)) {
sh = sheets[[i]]
# the optimized sequence is in cell 'B6'
row = getRows(sh, rowIndex = 6) # get row 6
cell= getCells(row, colIndex = 2) # get column 'B'
seq[[i]] = getCellValue(cell[[1]])
}
names(seq) = gsub('>', '', names(sheets))
# output the sequences to an xlsx file
write.xlsx(as.data.frame(do.call(rbind, seq)), sprintf('%s.xlsx', outPrefix), col.names=FALSE, row.names=TRUE)
# output the sequences to a fasta txt file
write.fasta(seq, names(seq), sprintf('%s.fasta', outPrefix))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment