Skip to content

Instantly share code, notes, and snippets.

@chapmanb
Forked from dwinter/gist:861818
Created March 9, 2011 14:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save chapmanb/862240 to your computer and use it in GitHub Desktop.
Save chapmanb/862240 to your computer and use it in GitHub Desktop.
class ORFFinder:
"""Find the longest ORF in a given sequence
"seq" is a string, if "start" is not provided any codon can be the start of
and ORF. If muliple ORFs have the longest length the first one encountered
is printed
"""
def __init__(self, seq, start=[], stop=["TAG", "TAA", "TGA"]):
self.seq = seq
self.start = start
self.stop = stop
self.result = ("+",0,0,0,0)
self.winner = 0
def _reverse_comp(self):
swap = {"A":"T", "T":"A", "C":"G", "G":"C"}
return "".join(swap[b] for b in self.seq)
def _print_current(self):
print "frame %s%s position %s:%s (%s nucleotides)" % self.result
def codons(self, frame):
""" A generator that yields DNA in one codon blocks
"frame" counts for 0. This function yelids a tuple (triplet, index) with
index relative to the original DNA sequence
"""
start = frame
while start + 3 <= len(self.seq):
yield (self.seq[start:start+3], start)
start += 3
def run_one(self, frame_number, direction):
""" Search in one reading frame """
orf_start = None
for c, index in self.codons(frame_number):
if (c not in self.stop and (c in self.start or not self.start)
and orf_start is None):
orf_start = index + 1 # we'll return the result as 1-indexed
elif c in self.stop and orf_start:
self._update_winner(orf_start, index, direction, frame_number)
orf_start = None
if orf_start:
self._update_winner(orf_start, index, direction, frame_number)
def _update_winner(self, orf_start, index, direction, frame_number):
orf_end = index + 3 # because index is realitve to start of codons
L = (orf_end - orf_start) + 1
if L > self.winner:
self.winner = L
self.result = (direction, frame_number+1, orf_start, orf_end, L)
def run(self):
direction = "+"
for frame in range(3):
self.run_one(frame, direction)
direction = "-"
self.seq = self._reverse_comp()
for frame in range(3):
self.run_one(frame, direction)
self._print_current()
def little_test():
test = ORFFinder("ACGTACGTACGTACGT").run()
def bigger_test():
from Bio import SeqIO
a = SeqIO.parse("test-in.fa", "fasta")
for seq in a:
print seq.id
ORFFinder(seq.seq.tostring()).run()
if __name__ == "__main__":
print "\nrunning original test sequence..."
little_test()
print "\nrunning Brad's test file..."
bigger_test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment