Skip to content

Instantly share code, notes, and snippets.

@adamwespiser
Created November 13, 2012 12:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adamwespiser/4065553 to your computer and use it in GitHub Desktop.
Save adamwespiser/4065553 to your computer and use it in GitHub Desktop.
Solution to rosalind's 'GRPH' problem
class Dna:
''' Object representing a FASTA record. '''
def __init__(self, header, sequence):
self.head = header
self.seq = sequence
def __str__(self, separator=''):
return ">{0}\n{1}".format(self.head, separator.join(self.seq))
def __len__(self):
return len(''.join(self.seq))
@property
def name(self, separator=''):
return self.head
@property
def sequence(self, separator=''):
return separator.join(self.seq)
class Fasta:
''' A FASTA iterator/generates DNA objects. '''
def __init__(self, handle):
self.handle = handle
self.seqs = []
def __repr__(self):
return '[HTML]' % (self.handle)
def __iter__(self):
header, sequence = '', []
for line in self.handle:
#print line
if line[0] == '>':
#print "...line start"
if sequence:
#print "....","".join(sequence)
yield Dna(header, "".join(sequence))
header = line[1:-1]
sequence = []
else:
sequence.append(line.strip())
yield Dna(header, "".join(sequence))
def initSeqList(self):
header, sequence = '', []
for line in self.handle:
#print line
if line[0] == '>':
#print "...line start"
if sequence:
#print "....","".join(sequence)
self.seqs.append(Dna(header, "".join(sequence)))
header = line[1:-1]
sequence = []
else:
sequence.append(line.strip())
self.seqs.append(Dna(header, "".join(sequence)))
def seqOverlap(s,t):
sLen = len(s)
tLen = len(t)
seqLen = 0
if (sLen <= tLen):
seqLen = sLen
else:
seqLen = tLen
seqLen = seqLen + 1
for kk in reversed(range(1,seqLen)):
suff = s[-kk:]
pref = t[:kk]
#print suff,pref
if (suff == pref):
return suff
return None
# s[:(len(s) - k)] + s[-k:] == s
def prefix(s,t):
return s[:(len(s) - len(seqOverlap(s,t)))]
def combinedOverlap(s,t):
return prefix(s,t) + t
def incInt(i):
return i + 1
def revTransDNA(seq,lowerCaseReturn=False):
bp = {'A':'T', 'C':'G', 'G':'C', 'T':'A', 'N':'N'}
seqTemp = "".join([bp[x.upper()] for x in reversed(seq)])
if (lowerCaseReturn == False):
return seqTemp
else:
return seqTemp.lower()
def revTransRNA(seq,lowerCaseReturn=False):
bp = {'A':'U', 'C':'G', 'G':'C', 'U':'A', 'N':'N'}
seqTemp = "".join([bp[x.upper()] for x in reversed(seq)])
if (lowerCaseReturn == False):
return seqTemp
else:
return seqTemp.lower()
def getACGTcount(seq):
seq = seq.lower()
count = [0,0,0,0] # a c g t
for s in seq:
if (s == 'a'):
count[0] = count[0] + 1
if (s == 'c'):
count[1] = count[1] + 1
if (s == 'g'):
count[2] = count[2] + 1
if (s == 't'):
count[3] = count[3] + 1
return count
import string
import itertools
from optparse import OptionParser
from awSeqLib import *
def prefSuffMatch(s,t,k=3,kRange=False):
if(kRange == True):
for kk in reversed(range(k,len(s))):
suff = s[-kk:]
pref = t[:kk]
#print suff,pref
if (suff == pref):
return kk
else :
if (s[-k:] == t[:k]):
return k
return 0
def createOverlapList(seqs):
for seq1,seq2 in itertools.product(seqs, repeat=2):
yield [seq1, seq2, prefSuffMatch(seq1.seq, seq2.seq)]
def displayOverlapPair(seq1,seq2,match):
print(seq1.name,seq2.name,match)
def main():
parser = OptionParser()
parser.add_option("-f","--file")
(option, args ) = parser.parse_args()
file = option.file
if file == None:
print "Please specify a file"
exit(0)
fileHandle = open(file, 'r')
for s1,s2,m in createOverlapList(Fasta(fileHandle)):
print(m)
if (int(m) >= 3):
displayOverlapPair(s1,s2,m)
if (__name__ == '__main__'):
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment