Created
November 13, 2012 12:37
-
-
Save adamwespiser/4065553 to your computer and use it in GitHub Desktop.
Solution to rosalind's 'GRPH' problem
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Dna: | |
''' Object representing a FASTA record. ''' | |
def __init__(self, header, sequence): | |
self.head = header | |
self.seq = sequence | |
def __str__(self, separator=''): | |
return ">{0}\n{1}".format(self.head, separator.join(self.seq)) | |
def __len__(self): | |
return len(''.join(self.seq)) | |
@property | |
def name(self, separator=''): | |
return self.head | |
@property | |
def sequence(self, separator=''): | |
return separator.join(self.seq) | |
class Fasta: | |
''' A FASTA iterator/generates DNA objects. ''' | |
def __init__(self, handle): | |
self.handle = handle | |
self.seqs = [] | |
def __repr__(self): | |
return '[HTML]' % (self.handle) | |
def __iter__(self): | |
header, sequence = '', [] | |
for line in self.handle: | |
#print line | |
if line[0] == '>': | |
#print "...line start" | |
if sequence: | |
#print "....","".join(sequence) | |
yield Dna(header, "".join(sequence)) | |
header = line[1:-1] | |
sequence = [] | |
else: | |
sequence.append(line.strip()) | |
yield Dna(header, "".join(sequence)) | |
def initSeqList(self): | |
header, sequence = '', [] | |
for line in self.handle: | |
#print line | |
if line[0] == '>': | |
#print "...line start" | |
if sequence: | |
#print "....","".join(sequence) | |
self.seqs.append(Dna(header, "".join(sequence))) | |
header = line[1:-1] | |
sequence = [] | |
else: | |
sequence.append(line.strip()) | |
self.seqs.append(Dna(header, "".join(sequence))) | |
def seqOverlap(s,t): | |
sLen = len(s) | |
tLen = len(t) | |
seqLen = 0 | |
if (sLen <= tLen): | |
seqLen = sLen | |
else: | |
seqLen = tLen | |
seqLen = seqLen + 1 | |
for kk in reversed(range(1,seqLen)): | |
suff = s[-kk:] | |
pref = t[:kk] | |
#print suff,pref | |
if (suff == pref): | |
return suff | |
return None | |
# s[:(len(s) - k)] + s[-k:] == s | |
def prefix(s,t): | |
return s[:(len(s) - len(seqOverlap(s,t)))] | |
def combinedOverlap(s,t): | |
return prefix(s,t) + t | |
def incInt(i): | |
return i + 1 | |
def revTransDNA(seq,lowerCaseReturn=False): | |
bp = {'A':'T', 'C':'G', 'G':'C', 'T':'A', 'N':'N'} | |
seqTemp = "".join([bp[x.upper()] for x in reversed(seq)]) | |
if (lowerCaseReturn == False): | |
return seqTemp | |
else: | |
return seqTemp.lower() | |
def revTransRNA(seq,lowerCaseReturn=False): | |
bp = {'A':'U', 'C':'G', 'G':'C', 'U':'A', 'N':'N'} | |
seqTemp = "".join([bp[x.upper()] for x in reversed(seq)]) | |
if (lowerCaseReturn == False): | |
return seqTemp | |
else: | |
return seqTemp.lower() | |
def getACGTcount(seq): | |
seq = seq.lower() | |
count = [0,0,0,0] # a c g t | |
for s in seq: | |
if (s == 'a'): | |
count[0] = count[0] + 1 | |
if (s == 'c'): | |
count[1] = count[1] + 1 | |
if (s == 'g'): | |
count[2] = count[2] + 1 | |
if (s == 't'): | |
count[3] = count[3] + 1 | |
return count | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string | |
import itertools | |
from optparse import OptionParser | |
from awSeqLib import * | |
def prefSuffMatch(s,t,k=3,kRange=False): | |
if(kRange == True): | |
for kk in reversed(range(k,len(s))): | |
suff = s[-kk:] | |
pref = t[:kk] | |
#print suff,pref | |
if (suff == pref): | |
return kk | |
else : | |
if (s[-k:] == t[:k]): | |
return k | |
return 0 | |
def createOverlapList(seqs): | |
for seq1,seq2 in itertools.product(seqs, repeat=2): | |
yield [seq1, seq2, prefSuffMatch(seq1.seq, seq2.seq)] | |
def displayOverlapPair(seq1,seq2,match): | |
print(seq1.name,seq2.name,match) | |
def main(): | |
parser = OptionParser() | |
parser.add_option("-f","--file") | |
(option, args ) = parser.parse_args() | |
file = option.file | |
if file == None: | |
print "Please specify a file" | |
exit(0) | |
fileHandle = open(file, 'r') | |
for s1,s2,m in createOverlapList(Fasta(fileHandle)): | |
print(m) | |
if (int(m) >= 3): | |
displayOverlapPair(s1,s2,m) | |
if (__name__ == '__main__'): | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment