Skip to content

Instantly share code, notes, and snippets.

@akatriel
Created December 8, 2017 12:22
Show Gist options
  • Save akatriel/c505b524dcfa4843ae54b054e1c22ac3 to your computer and use it in GitHub Desktop.
Save akatriel/c505b524dcfa4843ae54b054e1c22ac3 to your computer and use it in GitHub Desktop.
Motif Finding created by akatriel1 - https://repl.it/@akatriel1/Motif-Finding
def HammingDistance(str1, str2):
count = 0
for i in range(len(str1)):
if str1[i] != str2[i]:
count += 1
return count
def neighbors(kmer, distance):
k = len(kmer)
if k == 0:
return {}
if distance == 0:
return {kmer}
if k == 1:
return {'A', 'T', 'C', 'G'}
suffix = kmer[1:]
suffix_neighbors = neighbors(suffix, distance)
result = set()
for suffix_neighbor in suffix_neighbors:
if HammingDistance(suffix_neighbor, suffix) == distance:
result.add(kmer[0] + suffix_neighbor)
else:
result.add('A' + suffix_neighbor)
result.add('T' + suffix_neighbor)
result.add('C' + suffix_neighbor)
result.add('G' + suffix_neighbor)
return result
# MotifEnumeration(Dna, k, d)
# Patterns ← an empty set
# for each k-mer Pattern in the first string in Dna
# for each k-mer Pattern’ differing from Pattern by at most d mismatches
# if Pattern' appears in each string from Dna with at most d mismatches
# add Pattern' to Patterns
# remove duplicates from Patterns
# return Patterns
def MotifEnumeration(dna, k, d):
patterns = []
motifs = []
for strand in dna:
for i in range(len(strand)-k+1):
motifs.append(list(neighbors(strand[i:i+k], d)))
result = set(motifs[0])
for s in motifs[1:]:
result.intersection(*s)
o = []
for res in result:
for strand in dna:
for i in range(len(strand)-k+1):
if(res in neighbors(strand[i:i+k], d)):
o.append(res)
print(list(set(o)))
return list(patterns)
k = 3
d = 1
t = [
"ATTTGGC",
"TGCCTTA",
"CGGTATC",
"GAAAATT"]
print(MotifEnumeration(t,k,d))
# print(neighbors(t[0], 1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment