Skip to content

Instantly share code, notes, and snippets.

@astockwell
Last active March 28, 2016 22:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save astockwell/ba6b671cc0dcd3136eb3 to your computer and use it in GitHub Desktop.
Save astockwell/ba6b671cc0dcd3136eb3 to your computer and use it in GitHub Desktop.
The power of Trie's -- list search: 0.436379 seconds, trie search: 0.0003269999999999662 seconds.
import re
import string
import time
pattern_words_only = re.compile('[^A-Za-z0-9 ]+', re.UNICODE)
dictionary_words = [line.rstrip('\n') for line in open('/usr/share/dict/words')]
# print(len(dictionary_words))
two_cities = 'It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, we were all going direct to Heaven, we were all going direct the other way - in short, the period was so far like the present period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree of comparison only. There were a king with a large jaw and a queen with a plain face, on the throne of England; there were a king with a large jaw and a queen with a fair face, on the throne of France. In both countries it was clearer than crystal to the lords of the State preserves of loaves and fishes, that things in general were settled for ever.'
two_cities_words = re.sub(pattern_words_only, '', two_cities).lower().split(' ')
# print(len(two_cities_words))
start_time = time.clock()
for word in two_cities_words:
if word not in dictionary_words:
print(word)
print(time.clock() - start_time, "seconds")
# (0.436379, 'seconds')
import re
import string
import time
pattern_words_only = re.compile('[^A-Za-z0-9 ]+', re.UNICODE)
dictionary_words = [line.rstrip('\n') for line in open('/usr/share/dict/words')]
# print(len(dictionary_words))
_end = '_end_'
# Reference implementation: http://stackoverflow.com/a/11016430/1316499
def make_trie(words):
root = dict()
for word in words:
current_dict = root
for letter in word:
current_dict = current_dict.setdefault(letter, {})
current_dict[_end] = _end
return root
def in_trie(trie, word):
current_dict = trie
for letter in word:
if letter in current_dict:
current_dict = current_dict[letter]
else:
return False
else:
if _end in current_dict:
return True
else:
return False
# Luckily only have to do this once:
dictionary_trie = make_trie(dictionary_words)
two_cities = 'It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, we were all going direct to Heaven, we were all going direct the other way - in short, the period was so far like the present period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree of comparison only. There were a king with a large jaw and a queen with a plain face, on the throne of England; there were a king with a large jaw and a queen with a fair face, on the throne of France. In both countries it was clearer than crystal to the lords of the State preserves of loaves and fishes, that things in general were settled for ever.'
two_cities_words = re.sub(pattern_words_only, '', two_cities).lower().split(' ')
# print(len(two_cities_words))
start_time = time.clock()
for word in two_cities_words:
if in_trie(dictionary_trie, word) is False:
print(word)
print(time.clock() - start_time, "seconds")
# (0.0003269999999999662, 'seconds')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment