Skip to content

Instantly share code, notes, and snippets.

@almost
Created December 1, 2012 20:34
Show Gist options
  • Save almost/4184841 to your computer and use it in GitHub Desktop.
Save almost/4184841 to your computer and use it in GitHub Desktop.
# Given a dictionary of words and scores and prefix return the top 10
# words in the dictionary that start with that prefix ordered by score
# (highest first)
# Builds a trie (a suffix tree) from the dictionary. The score
# ordering thing is my idea of how that could work, I haven't look at
# how others do it yet so it might be crazy :)
import random
def add(trie, word, score):
if "MAX" not in trie or score > trie["MAX"]:
trie["MAX"] = score
if len(word) == 0:
trie["SCORE"] = score
return
add(trie.setdefault(word[0], {}), word[1:], score)
def build(words):
root = {}
for score,w in words:
add(root, w, score)
return root
def in_order(trie):
# Only keys of length l, because I've stashed other things in
# multichar fields (no, I wouldn't do that in production :p)
ordered = [(t["MAX"], l, in_order(t), None) for (l,t) in trie.items() if len(l) == 1]
if "SCORE" in trie:
ordered.append((trie["SCORE"], "", iter([]), (trie["SCORE"], "")))
while ordered:
ordered.sort(key=lambda x: -x[0])
m, l, gen, cache = ordered.pop(0)
try:
s,suffix = cache or gen.next()
except StopIteration:
continue
yield (s, l + suffix)
try:
cache = gen.next()
except StopIteration:
continue
ordered.append((cache[0], l, gen, cache))
def lookup(trie, prefix):
if prefix == "":
for x in in_order(trie):
yield x
elif prefix[0] not in trie:
return
else:
for s,w in lookup(trie[prefix[0]], prefix[1:]):
yield (s, prefix+w)
# Grab a bunch of words from a dict file and assign them random scores
trie = build((random.gauss(0, 10000), x) for x in open("/usr/share/dict/words"))
for (x,i) in zip(lookup(trie, ""), range(10)):
print x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment