Skip to content

Instantly share code, notes, and snippets.

@wleepang
Created April 13, 2012 15:43
CodonOptOutputCompiler:Python
# compiles output from the Codon Optimizer into a spreadsheet and fasta formated file
import sys, re, xlrd, xlwt
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import IUPAC
if __name__ == '__main__':
argv = sys.argv
if len(sys.argv) < 2:
print "USAGE: python %s <input xls> <output prefix>" % argv[0]
print " <input xls> MS Excel spreadsheet output from Codon Optimizer\n"
print " <output prefix> Prefix for output files:\n"
print " .xls Stacked list of ids, sequences in a spreadsheet"
print " .fasta FASTA formated text file"
raise ValueError
else:
xlsfile = argv[1]
outpref = argv[2]
# get the input workbook
wbin = xlrd.open_workbook(xlsfile)
# prepare the output workbook
wbout= xlwt.Workbook()
shout= wbout.add_sheet('Optimized Genes')
sheets = list()
result = dict()
sequences = list()
for shn in wbin.sheet_names():
# get valid sheet names
rex = re.search('^>(?P<id>[\w\s]+?)-DNA', shn)
if rex:
print shn
sho = wbin.sheet_by_name(shn)
# get the sequence
seq = sho.cell(rowx=5, colx=1).value # get value in cell B6
seqRec = SeqRecord(Seq(seq, IUPAC.unambiguous_dna))
seqRec.id = rex.group('id')
seqRec.description = shn.replace('>', '')
sequences.append(seqRec)
# write the sequence result
shout.write(len(sequences)-1,0, seqRec.id)
shout.write(len(sequences)-1,1, seq)
print "%d sequences found." % len(sequences)
wbout.save(outpref+'.xls')
print "written to %s" % outpref+'.xls'
# write the fasta output
fastaOut = open(outpref+'.fasta', 'w')
SeqIO.write(sequences, fastaOut, 'fasta')
fastaOut.close()
print "written to %s" % outpref+'.fasta'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment