Skip to content

Instantly share code, notes, and snippets.

@ajkaanbal
Created June 17, 2016 00:37
Show Gist options
  • Save ajkaanbal/64a90396b5523e65cfb274b76aaea466 to your computer and use it in GitHub Desktop.
Save ajkaanbal/64a90396b5523e65cfb274b76aaea466 to your computer and use it in GitHub Desktop.
def hamming_distance(sequence_1, sequence_2):
"""Count the number of differences between equal length strings sequence_1 and sequence_2"""
diffs = 0
for character_1, character_2 in zip(sequence_1, sequence_2):
if character_1 != character_2:
diffs += 1
return diffs
def most_frequent_pattern(dna, block_size, mistmaches_allowed):
num_sequences = len(dna) - 10
matchings = {}
for i in xrange(num_sequences):
matchings[i] = []
pattern = dna[i:i + block_size]
for j in range(num_sequences):
if i == j:
break
sequence = dna[j:j + block_size]
distance = hamming_distance(pattern, sequence)
if distance <= mistmaches_allowed:
matchings[i].append(pattern)
z = sorted(matchings, key=lambda m: len(matchings[m]))
# print(dna[z[-1]:z[-1] + block_size])
# result = matchings[z[-1]]
result = dna[z[-1]:z[-1] + block_size]
return result
if __name__ == '__main__':
dna = 'CACAGTAGGCGCCGGCACACACAGCCCCGGGCCCCGGGCCGCCCCGGGCCGGCGGCCGCCGGCGCCGGCACACCGGCACAGCCGTACCGGCACAGTAGTACCGGCCGGCCGGCACACCGGCACACCGGGTACACACCGGGGCGCACACACAGGCGGGCGCCGGGCCCCGGGCCGTACCGGGCCGCCGGCGGCCCACAGGCGCCGGCACAGTACCGGCACACACAGTAGCCCACACACAGGCGGGCGGTAGCCGGCGCACACACACACAGTAGGCGCACAGCCGCCCACACACACCGGCCGGCCGGCACAGGCGGGCGGGCGCACACACACCGGCACAGTAGTAGGCGGCCGGCGCACAGCC'
block_size = 10
mistmaches_allowed = 2
frequent_pattern = most_frequent_pattern(dna, block_size, mistmaches_allowed)
print("Result: {}".format(frequent_pattern))
# result: GCGCACACAC
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment