Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Python - speed comparison of dictionary filling
from collections import defaultdict
def defdic(words):
freq = defaultdict(int)
for word in words:
freq[word] += 1
return dict(freq)
def clasic(words):
freq = {}
for word in words:
if word in freq:
freq[word] += 1
else:
freq[word] = 1
return freq
def clasic_try(words):
freq = {}
for word in words:
try:
freq[word] += 1
except KeyError:
freq[word] = 1
return freq
def dictget_loc(words):
freq = {}
myget = freq.get
for word in words:
freq[word] = myget(word, 0) + 1
return freq
def dictget(words):
freq = {}
for word in words:
freq[word] = freq.get(word, 0) + 1
return freq
def time_function(fname, param, rep=100):
print(fname)
call_name = "{}({})".format(fname, param)
setup_name = "from __main__ import {}".format(fname)
print("{} calls of function {} took {} seconds".format(
rep, fname, timeit.timeit(call_name, setup=setup_name, number=rep)))
def get_data():
import re
interpunkce = re.compile(r'[.,?;"!:]')
with open('data/txt/les_miserables.txt') as source:
text = source.read()
text = interpunkce.sub(' ', text)
text = text.lower()
words = text.split()
return words
if __name__ == '__main__':
import timeit
DATA = get_data()
time_function('defdic', DATA)
time_function('clasic', DATA)
time_function('clasic_try', DATA)
time_function('dictget', DATA)
time_function('dictget_loc', DATA)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment