Created
April 13, 2012 15:43
CodonOptOutputCompiler:Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# compiles output from the Codon Optimizer into a spreadsheet and fasta formated file | |
import sys, re, xlrd, xlwt | |
from Bio import SeqIO | |
from Bio.Seq import Seq | |
from Bio.SeqRecord import SeqRecord | |
from Bio.Alphabet import IUPAC | |
if __name__ == '__main__': | |
argv = sys.argv | |
if len(sys.argv) < 2: | |
print "USAGE: python %s <input xls> <output prefix>" % argv[0] | |
print " <input xls> MS Excel spreadsheet output from Codon Optimizer\n" | |
print " <output prefix> Prefix for output files:\n" | |
print " .xls Stacked list of ids, sequences in a spreadsheet" | |
print " .fasta FASTA formated text file" | |
raise ValueError | |
else: | |
xlsfile = argv[1] | |
outpref = argv[2] | |
# get the input workbook | |
wbin = xlrd.open_workbook(xlsfile) | |
# prepare the output workbook | |
wbout= xlwt.Workbook() | |
shout= wbout.add_sheet('Optimized Genes') | |
sheets = list() | |
result = dict() | |
sequences = list() | |
for shn in wbin.sheet_names(): | |
# get valid sheet names | |
rex = re.search('^>(?P<id>[\w\s]+?)-DNA', shn) | |
if rex: | |
print shn | |
sho = wbin.sheet_by_name(shn) | |
# get the sequence | |
seq = sho.cell(rowx=5, colx=1).value # get value in cell B6 | |
seqRec = SeqRecord(Seq(seq, IUPAC.unambiguous_dna)) | |
seqRec.id = rex.group('id') | |
seqRec.description = shn.replace('>', '') | |
sequences.append(seqRec) | |
# write the sequence result | |
shout.write(len(sequences)-1,0, seqRec.id) | |
shout.write(len(sequences)-1,1, seq) | |
print "%d sequences found." % len(sequences) | |
wbout.save(outpref+'.xls') | |
print "written to %s" % outpref+'.xls' | |
# write the fasta output | |
fastaOut = open(outpref+'.fasta', 'w') | |
SeqIO.write(sequences, fastaOut, 'fasta') | |
fastaOut.close() | |
print "written to %s" % outpref+'.fasta' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment