Created
November 7, 2018 18:16
-
-
Save AhmetCanSolak/9e4403e69dc6694425b28e58356756ac to your computer and use it in GitHub Desktop.
Finding Frequent Words by Sorting https://stepik.org/lesson/3011/step/1?unit=8236
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
# ------ Pythonic solution | |
def alternative_solution(inp): | |
cntr = Counter([inp[i:i+2] for i in range(len(inp)-1)]) | |
maxc = cntr.most_common(1)[0][1] | |
return [key for key,elem in cntr.items() if(elem==maxc)] | |
# ------- Given Solution | |
def get_index(inp): | |
index_ref = {'A':0, 'C':1, 'G':2, 'T':3} | |
count = 0 | |
index = 0 | |
for ch in reversed(inp): | |
index += pow(4,count) * index_ref[ch] | |
count += 1 | |
return index | |
def given_solution(inp): | |
liste = [] | |
most_frequents = [] | |
# Populate tuple(kmer,index) list | |
for i in range(len(inp)-1): | |
kmer = inp[i:i+2] | |
index = get_index(kmer) | |
liste.append((kmer,index)) | |
# Sort the list with respect to index | |
liste.sort(key=lambda tupl:tupl[1]) | |
# Get maximum | |
hist = {} | |
maxc = 0 | |
count = 1 | |
prev = liste[0] | |
for elem in liste[1:]: | |
if elem == prev: | |
count += 1 | |
else: | |
hist[prev] = count | |
if count > maxc: | |
maxc = count | |
count = 1 | |
prev = elem | |
# Find kmers of maximum number of occurence | |
for key,val in hist.items(): | |
if val == maxc: | |
most_frequents.append(key[0]) | |
return most_frequents | |
# ----- Simplified Solution | |
def simplified_solution(inp): | |
kmers = {} | |
result = [] | |
# Populate kmers dictionary/Map | |
for i in range(len(inp)-1): | |
kmer = inp[i:i+2] | |
if kmer in kmers: | |
kmers[kmer] += 1 | |
else: | |
kmers[kmer] = 1 | |
# Find the maximum number of occurence | |
maxc = 0 | |
for key, elem in kmers.items(): | |
if elem > maxc: | |
maxc = elem | |
# Find the kmers of maximum occurence | |
for key, elem in kmers.items(): | |
if elem == maxc: | |
result.append(key) | |
return result | |
inp = input() | |
print("Given Solution") | |
print(given_solution(inp)) | |
print("Simplified Solution") | |
print(simplified_solution(inp)) | |
print("Alternative Solution") | |
print(alternative_solution(inp)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment