Created
April 13, 2012 15:44
CodonOptOutputCompiler:R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pile up sequences on multiple xlsx sheets from the codon optimizer and export | |
# as a single list of sequences on a single sheet | |
# as a fasta file of all sequence for condon finder | |
CodonOptOutputCompiler = function(xlsFile, outPrefix) { | |
require(xlsx) # for reading xlsx files | |
require(seqinr) # for fasta file IO | |
wbi = loadWorkbook(xlsFile) | |
sheets = getSheets(wbi) | |
# extract only sheets that have the right name format | |
sheets = sheets[grepl('(DNA|protein)', names(sheets))] | |
seq = vector('list', length=length(sheets)) | |
for (i in 1:length(sheets)) { | |
sh = sheets[[i]] | |
# the optimized sequence is in cell 'B6' | |
row = getRows(sh, rowIndex = 6) # get row 6 | |
cell= getCells(row, colIndex = 2) # get column 'B' | |
seq[[i]] = getCellValue(cell[[1]]) | |
} | |
names(seq) = gsub('>', '', names(sheets)) | |
# output the sequences to an xlsx file | |
write.xlsx(as.data.frame(do.call(rbind, seq)), sprintf('%s.xlsx', outPrefix), col.names=FALSE, row.names=TRUE) | |
# output the sequences to a fasta txt file | |
write.fasta(seq, names(seq), sprintf('%s.fasta', outPrefix)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment