Skip to content

Instantly share code, notes, and snippets.

@zhangys-lucky
Last active August 29, 2015 14:27
Show Gist options
  • Save zhangys-lucky/3f620cfbf61949c21c10 to your computer and use it in GitHub Desktop.
Save zhangys-lucky/3f620cfbf61949c21c10 to your computer and use it in GitHub Desktop.
find top k words using Trie and MinHeap(Python Code)
class TrieNode(object):
# Initialize your data structure here.
def __init__(self):
self.count = 0
self.children = {}
class Trie(object):
def __init__(self):
self.root = TrieNode()
# @param {string} word
# @return {void}
# Inserts a word into the trie.
def insert(self, word):
node = self.root
for w in word:
if w not in node.children:
node.children[w] = TrieNode()
node = node.children[w]
node.count += 1
def _startsWith(self,prefix):
"""
@param {string} prefix
@return {TrieNode} or None
"""
node = self.root
for w in prefix:
if w in node.children:
node = node.children[w]
else:
return None
return node
# @param {string} word
# @return {boolean}
# Returns if the word is in the trie.
def search(self, word):
node = self._startsWith(word)
if node and node.v:
return True
else:
return False
# @param {string} prefix
# @return {boolean}
# Returns if there is any word in the trie
# that starts with the given prefix.
def startsWith(self, prefix):
if self._startsWith(prefix):
return True
else:
return False
def topkword(trie,prefix_word,k):
prefix_node = trie._startsWith(prefix_word)
import heapq
heap = []
import functools
@functools.total_ordering
class HeapNode(object):
def __init__(self,s,count):
self.s = s
self.count = count
def __eq__(self, other):
return self.count == other.count
def __lt__(self, other):
return self.count < other.count
def dfs_walk_trie(node,s,heap):
for c in node.children:
child = node.children[c]
if child.count > 0:
# update heap
if len(heap) <= k:
heapq.heappush(heap,HeapNode(s + c,child.count))
else:
heapq.heappushpop(heap,HeapNode(s + c,child.count))
else:
dfs_walk_trie(child,s + c,heap)
pass
dfs_walk_trie(prefix_node,prefix_word,heap)
sort = []
while heap:
sort.append(heapq.heappop(heap))
return [(n.s,n.count) for n in sort][::-1]
trie = Trie()
# create trie tree
for w in ["hell","hell","hess","hess","hess","hess","hewpwpww","hiiii"]:
trie.insert(w)
# print top 3 with 'he' prefix
print(topkword(trie,"he",3))
print(topkword(trie,"hi",3))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment