Created
September 7, 2016 11:31
-
-
Save dmmfll/d37c5908282a7a8173cebbad3f1697c4 to your computer and use it in GitHub Desktop.
example of using timeit with sorting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Is it faster to use immutable data or objects to sort?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from faker import Faker" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"faker = Faker()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**define some constants**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"(\n", | |
" PERSON_COUNT,\n", | |
" FIRST,\n", | |
") = (\n", | |
" 500,\n", | |
" 0,\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data = tuple([(faker.name(), faker.pyint()) for _ in range(PERSON_COUNT)])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"('Sharon Jennings', 55)\n" | |
] | |
} | |
], | |
"source": [ | |
"# check the first one\n", | |
"print(data[FIRST])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**using \\* positional expansion operator with zip unzips**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"unsorted_names, unsorted_ages = tuple(zip(*data))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"sorted_names = tuple(sorted(unsorted_names))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**Time how long it takes to sort a list of PERSON_COUNT names.**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"10000 loops, best of 3: 172 µs per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit sorted_names = tuple(sorted(unsorted_names))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"indices = tuple([unsorted_names.index(sorted_name) for sorted_name in sorted_names])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Aaron Green, Aaron Green, Sharon Jennings\n" | |
] | |
} | |
], | |
"source": [ | |
"indexed_first = unsorted_names[indices[FIRST]]\n", | |
"sorted_first = sorted_names[FIRST]\n", | |
"unsorted_names_first = unsorted_names[FIRST]\n", | |
"print(', '.join((indexed_first, sorted_first, unsorted_names_first)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"assert all((\n", | |
" indexed_first == sorted_first,\n", | |
" unsorted_names_first not in (indexed_first, sorted_first), # could fail if first unsorted is sorted first!\n", | |
"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"('Aaron Green', 7206)\n" | |
] | |
} | |
], | |
"source": [ | |
"sorted_data = tuple(data[index] for index in indices)\n", | |
"print(sorted_data[FIRST])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"sorted_names, sorted_ages = tuple(zip(*sorted_data))\n", | |
"# sanity check\n", | |
"assert all((\n", | |
" sorted_names == tuple(sorted(unsorted_names)),\n", | |
" sorted_ages == tuple(unsorted_ages[index] for index in indices),\n", | |
"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def sort_people(people):\n", | |
" \"\"\"Sort people by name.\n", | |
" \n", | |
" :people: tuple of (name, age,) tuples\"\"\"\n", | |
" unsorted_names, unsorted_ages = zip(*people)\n", | |
" sorted_names = sorted(unsorted_names)\n", | |
" indices = (unsorted_names.index(sorted_name) for sorted_name in sorted_names)\n", | |
" sorted_data = tuple(people[index] for index in indices)\n", | |
" return sorted_data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"('Aaron Green', 7206)" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sort_people(data)[FIRST]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"100 loops, best of 3: 2.33 ms per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit sort_people(data)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Is it faster to then use that sorted tuple of names to then grab the ages?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"class Person(object):\n", | |
" def __init__(self, name, age):\n", | |
" self.name = name\n", | |
" self.age = age\n", | |
"\n", | |
" def __repr__(self):\n", | |
" return \"<name: {name}, age: {age}>\".format(name=self.name, age=self.age)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def byName_key(person):\n", | |
" return person.name" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"object_people = tuple([Person(name, age) for name, age in data])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1000 loops, best of 3: 239 µs per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit sorted(object_people, key=byName_key)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1000 loops, best of 3: 203 µs per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit sorted([person.name for person in object_people])" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment