Skip to content

Instantly share code, notes, and snippets.

@mapio
Created June 28, 2011 06:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mapio/1050635 to your computer and use it in GitHub Desktop.
Save mapio/1050635 to your computer and use it in GitHub Desktop.
A test comparing defaultdict and groupby for grouping
from timeit import Timer
from operator import attrgetter
from random import randint
def timeit( stmt, setup ):
t = Timer(stmt=stmt, setup=setup)
print "%.2f usec/pass" % (100000 * t.timeit(number=10000)/10000)
class Person(object):
def __init__(self, age):
self.age = age
def __repr__(self):
return str(self.age)
persons = [Person(age) for age in (randint(10,90) for _ in range(1000))]
get_age = attrgetter('age')
setup0 = """
from collections import defaultdict
from __main__ import persons
"""
setup1 = """
from itertools import groupby
from __main__ import persons, get_age
"""
stmt0="""
persons_by_age = defaultdict(list)
for person in persons:
persons_by_age[person.age].append(person)
"""
stmt1="""
persons_by_age = {k: list(g) for k, g in groupby(sorted(persons, key=get_age), get_age)}
"""
timeit(stmt=stmt0, setup=setup0)
timeit(stmt=stmt1, setup=setup1)
@mapio
Copy link
Author

mapio commented Jun 28, 2011

On my CPU the output is

29.74 usec/pass
103.87 usec/pass

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment