Skip to content

Instantly share code, notes, and snippets.

@KoStard
Created July 24, 2019 06:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save KoStard/31a7740c48c5d546ee8e6d1d1b41d25e to your computer and use it in GitHub Desktop.
Save KoStard/31a7740c48c5d546ee8e6d1d1b41d25e to your computer and use it in GitHub Desktop.
import heapq
class Node: # Trie node
def __init__(self, val=""):
self.mem = [None] * 26
self.count = 0
self.val = val
def register_word(root, word):
node = root
for c in word:
c = c.lower()
i = ord(c) - ord('a')
if not node.mem[i]:
node.mem[i] = Node(node.val + c)
node = node.mem[i]
node.count += 1
def collect_data(node, buff, k): # Collecting counts from the trie
if node.count:
heapq.heappush(buff, (node.count, node.val))
if len(buff) > k:
heapq.heappop(buff)
for c in node.mem:
if c:
collect_data(c, buff, k)
def most_frequently_used_words(words, k):
"""
Will calculate k most frequently used words using Trie
"""
root = Node()
for w in words:
if w:
register_word(root, w)
buff = []
collect_data(root, buff, k)
return sorted(buff)
## Download hamlet.txt file from https://gist.githubusercontent.com/provpup/2fc41686eab7400b796b/raw/b575bd01a58494dfddc1d6429ef0167e709abf9b/hamlet.txt
# hamlet = open('hamlet.txt', 'r')
# words = []
# buff = ""
# c = hamlet.read(1)
# while c:
# if (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z'):
# buff += c
# elif buff:
# words.append(buff)
# buff = ""
# c = hamlet.read(1)
# print(most_frequently_used_words(words, 50)) # Working in 150ms
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment