Skip to content

Instantly share code, notes, and snippets.

@Tordek
Last active August 29, 2015 13:56
Show Gist options
  • Save Tordek/9322235 to your computer and use it in GitHub Desktop.
Save Tordek/9322235 to your computer and use it in GitHub Desktop.
Trie-based random word implementation
import datrie
import random
import string
import sys
suffix_trie = datrie.Trie(string.ascii_lowercase + u"_")
def suffixes(s):
return [s[i:] for i in range(len(s))]
for line in sys.stdin:
# Using `_` as start-of-word marker.
s = u"_" + line.strip().decode("utf-8")
for suffix in suffixes(s):
suffix_trie[suffix] = True
for i in range(50):
current_string = u"_"
while True:
for suffix in suffixes(current_string):
candidates = suffix_trie.suffixes(suffix)
# We only care about which letter comes next.
candidates = ['' if candidate == '' else candidate[0] for candidate in candidates]
# Choose the first candidate with at least 2 options.
if len(candidates) > 1:
break
else:
# No suffix of `current_string` is a prefix of any other word. Abort.
break
next_character = random.choice(candidates)
# Possible end-of-word.
if next_character == '':
if len(suffix) < 4:
# We ended too soon, try again.
continue
else:
break
else:
current_string = current_string + next_character
if current_string in suffix_trie:
# Omit words that are real words
continue
# Discard initial `_`
print current_string[1:]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment