Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save intuited/629781 to your computer and use it in GitHub Desktop.
Save intuited/629781 to your computer and use it in GitHub Desktop.
import os
import random
import re
import string
import time
def n_random_words(num_words=3000):
stat = os.stat('/usr/share/dict/words')
# the filesize if the 7th element of the array
flen = stat[6]
f = open('/usr/share/dict/words')
min_word_len = 3
max_word_len = 30
word_regexp = re.compile(r'^[a-z]{%s,%s}$' % (min_word_len, max_word_len)) # only allow lower case
words = []
while len(words) < num_words:
word = None
# seek to a random offset in the file
f.seek(int(random.random() * flen))
# do a single read with sufficient characters
chars = f.read(50)
# split it on white space
wrds = string.split(chars)
# the first element may be only a partial word so use the second
# you can also make other tests on the word here
if len(wrds) > 1 and re.search(word_regexp, wrds[1]):
word = wrds[1]
if word and not word in words:
words.append(word)
return words
def test_performance(num_words_in_universe=5000, num_rand_words_to_search=2000, num_tests=10):
times = {"set": [], "frozenset": [],
"set2": [], "frozenset2": [],
"set3": [], "frozenset3": []}
for i in range(0, num_tests):
words = n_random_words(num_rand_words_to_search)
search_words = n_random_words(num_rand_words_to_search)
words_frozenset = frozenset(words)
words_set = set(words)
s = time.time()
for w in search_words:
if w in words_frozenset:
pass
e = time.time()
times['frozenset'].append(e - s)
s = time.time()
for w in search_words:
if w in words_set:
pass
e = time.time()
times['set'].append(e - s)
s = time.time()
for w in search_words:
if w in words_set:
pass
e = time.time()
times['set2'].append(e - s)
s = time.time()
for w in search_words:
if w in words_frozenset:
pass
e = time.time()
times['frozenset2'].append(e - s)
s = time.time()
for w in search_words:
if w in words_set:
pass
e = time.time()
times['set3'].append(e - s)
s = time.time()
for w in search_words:
if w in words_frozenset:
pass
e = time.time()
times['frozenset3'].append(e - s)
avg_times = []
for label,results in times.iteritems():
avg_times.append([label, sum(results)/len(results)])
avg_times.sort(key=lambda x: x[1])
for a in avg_times:
print a[1], a[0]
if __name__ == '__main__':
from pprint import pprint
pprint(test_performance())
@devicenull
Copy link

words = n_random_words(num_rand_words_to_search)

I think you meant

words = n_random_words(num_words_in_universe)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment