Skip to content

Instantly share code, notes, and snippets.

@dwinter
Created March 9, 2011 07:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dwinter/861818 to your computer and use it in GitHub Desktop.
Save dwinter/861818 to your computer and use it in GitHub Desktop.
class ORFFinder:
"""Find the longest ORF in a given sequence
"seq" is a string, if "start" is not provided any codon can be the start of
and ORF. If muliple ORFs have the longest length the first one encountered
is printed
"""
def __init__(self, seq, start=[], stop=["TAG", "TAA", "TGA"]):
self.seq = seq
self.start = start
self.stop = stop
self.result = ("+",0,0,0,0)
self.winner = 0
def _reverse_comp(self):
swap = {"A":"T", "T":"A", "C":"G", "G":"C"}
return "".join(swap[b] for b in self.seq)
def _print_current(self):
print "frame %s%s position %s:%s (%s nucleotides)" % self.result
def codons(self, frame):
""" A generator that yields DNA in one codon blocks
"frame" counts for 0. This function yelids a tuple (triplet, index) with
index relative to the original DNA sequence
"""
start = frame
while start + 3 <= len(self.seq):
yield (self.seq[start:start+3], start)
start += 3
def run_one(self, frame_number, direction):
""" Search in one reading frame """
codon_gen = self.codons(frame_number)
while True:
try:
c , index = codon_gen.next()
except StopIteration:
break
# Lots of conditions here: checks if we care about looking for start
# codon then that codon is not a stop
if c in self.start or not self.start and c not in self.stop:
orf_start = index + 1 # we'll return the result as 1-indexed
end = False
while True:
try:
c, index = codon_gen.next()
except StopIteration:
end = True
if c in self.stop:
end = True
if end:
orf_end = index + 3 # because index is realitve to start of codon
L = (orf_end - orf_start) + 1
if L > self.winner:
self.winner = L
self.result = (direction, frame_number+1, orf_start, orf_end, L)
break
def run(self):
direction = "+"
for frame in range(3):
self.run_one(frame, direction)
direction = "-"
self.seq = self._reverse_comp()
for frame in range(3):
self.run_one(frame, direction)
self._print_current()
def little_test():
test = ORFFinder("ACGTACGTACGTACGT").run()
def bigger_test():
from Bio import SeqIO
a = SeqIO.parse("test.fasta", "fasta")
for seq in a:
print seq.id
ORFFinder(seq.seq.tostring()).run()
if __name__ == "__main__":
print "\nrunning original test sequence..."
little_test()
print "\nrunning Brad's test file..."
bigger_test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment