Skip to content

Instantly share code, notes, and snippets.

@fransua
Created November 8, 2012 18:47
Show Gist options
  • Save fransua/4040707 to your computer and use it in GitHub Desktop.
Save fransua/4040707 to your computer and use it in GitHub Desktop.
searchs for potential loops in single strand sequences
"""
08 Nov 2012
"""
from string import maketrans
REV = maketrans('ATGC', 'TACG')
def print_loops(seq, matches):
"""
nice way to print loops
st1 st1+len1 st1+len2
| | |
xxxxmmmmmmmmmxmmmmmmmmmmmmmxxxxxxxxxxx
||||||||| ||||||||||||| x
xxxxmmmmmmmmmxmmmmmmmmmmmmmxxxxxxxxxxx
| | |
st2 st2+len1 st2+len2
"""
print ' ' + '-'*80
for i, m in enumerate(matches.items()):
print ' # {}\n'.format(i+1)
_print_loop(seq, m)
print ' ' + '-'*80
def _print_loop(seq, match):
"""
nice way to print 1 loop
st1 st1+len1 st1+len2
| | |
xxxxmmmmmmmmmxmmmmmmmmmmmmmxxxxxxxxxxx
||||||||| ||||||||||||| x
xxxxmmmmmmmmmxmmmmmmmmmmmmmxxxxxxxxxxx
| | |
st2 st2+len1 st2+len2
"""
(st1, st2), lengths = match
len2 = lengths[-1]
loop_len = (len(seq) - (st2 + len2)) - (st1 + len2)
seq1 = seq [:st1 +len2 + loop_len/2]
seq2 = seq[::-1][:st2 + len2 + loop_len/2]
prev = ' '*4
draw = prev + '{1:>{0}}'.format(max(len(seq1), len(seq2)), seq1) + '\n'
draw += prev + ' ' * max(st1, st2)
draw += '|'*lengths[0]
for i in xrange(1,len(lengths)):
draw += ' ' + '|' * (lengths[i]-lengths[i-1]-1)
draw += ' ' * (loop_len/2) + (seq[st1 + len2 + loop_len/2] if loop_len%2 else ')') + '\n'
draw += prev + '{1:>{0}}'.format(max(len(seq1), len(seq2)), seq2) + '\n'
print draw
def merge_not_exact_match_old(matches, gap = 1):
"""
find consecutive perfect matches separated by 1 base (gap value) and
merge them.
"""
while True:
for m in matches:
add = matches[m][-1] + gap
nxt = m[0] + add, m[1] + add
if not nxt in matches: continue
matches[m] = [_ for _ in matches[m]]+[_ + add for _ in matches[nxt]]
break
else:
break
del(matches[nxt])
def merge_not_exact_match(matches, gap = 1):
"""
find consecutive perfect matches separated by 1 base (gap value) and
merge them.
"""
to_merge = []
for m in matches:
add = matches[m][-1] + gap
nxt = m[0] + add, m[1] + add
if not nxt in matches:
continue
to_merge.append((m, nxt))
to_del = []
for m, nxt in to_merge:
matches[m] = [_ for _ in matches[m]]+[_ + add for _ in matches[nxt]]
to_del.append(nxt)
for m in to_del:
del(matches[m])
def search_palindrome(seq, verbose=True):
"""
Find all complementary sequences on a simgle strand.
may be separated by at least 3 bases.
"""
def rev_matches(pattern, seq2, ind):
"""
search for all ocurrences of pattern in seq2
"""
while ind != -1:
yield ind
ind = seq2.find(pattern, ind+1)
rev_seq = seq[::-1].translate(REV)
matches = {}
gap = 3 # this is fixed otherwise print will not work
for i in xrange(len(seq)-gap):
for j in xrange(i+gap, len(seq)):
pattern = seq[i:j]
seq2 = rev_seq[:-j-gap]
indx = seq2.find(pattern)
# not found
if indx == -1:
break
# previous bas pair also matching (already found)
if seq[i-1] is rev_seq[indx-1]:
if i!=0 and indx!=0:
break
# find all matchings in other part
for indx in rev_matches(pattern, seq2, indx):
matches[(i, indx)] = [j-i]
if verbose:
print '{0} possible loops found in sequences:\n'.format(len(matches))
print '{0}\n'.format(seq)
print ' ' + '-'*80
for i, m in enumerate(matches.items()):
print ' # {}\n'.format(i+1)
_print_loop(seq, m)
print ' ' + '-'*80
return matches
def main():
"""
main function... testing
"""
#from random import random
seq = 'CAACGTGGCAACGTGCTATACGTAGTACTGCACGTTCCCAGGTT'*300
matches = search_palindrome(seq, verbose=False)
print len(matches)
merge_not_exact_match(matches)
print len(matches)
x = max(matches, key=lambda x:matches[x][-1])
_print_loop(seq, (x, matches[x]))
#print_loops(seq, matches)
if __name__ == "__main__":
exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment