Skip to content

Instantly share code, notes, and snippets.

@dmmfll
Created September 7, 2016 11:31
Show Gist options
  • Save dmmfll/d37c5908282a7a8173cebbad3f1697c4 to your computer and use it in GitHub Desktop.
Save dmmfll/d37c5908282a7a8173cebbad3f1697c4 to your computer and use it in GitHub Desktop.
example of using timeit with sorting
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Is it faster to use immutable data or objects to sort?"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from faker import Faker"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"faker = Faker()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**define some constants**"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"(\n",
" PERSON_COUNT,\n",
" FIRST,\n",
") = (\n",
" 500,\n",
" 0,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = tuple([(faker.name(), faker.pyint()) for _ in range(PERSON_COUNT)])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('Sharon Jennings', 55)\n"
]
}
],
"source": [
"# check the first one\n",
"print(data[FIRST])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**using \\* positional expansion operator with zip unzips**"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"unsorted_names, unsorted_ages = tuple(zip(*data))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sorted_names = tuple(sorted(unsorted_names))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Time how long it takes to sort a list of PERSON_COUNT names.**"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10000 loops, best of 3: 172 µs per loop\n"
]
}
],
"source": [
"%timeit sorted_names = tuple(sorted(unsorted_names))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"indices = tuple([unsorted_names.index(sorted_name) for sorted_name in sorted_names])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Aaron Green, Aaron Green, Sharon Jennings\n"
]
}
],
"source": [
"indexed_first = unsorted_names[indices[FIRST]]\n",
"sorted_first = sorted_names[FIRST]\n",
"unsorted_names_first = unsorted_names[FIRST]\n",
"print(', '.join((indexed_first, sorted_first, unsorted_names_first)))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"assert all((\n",
" indexed_first == sorted_first,\n",
" unsorted_names_first not in (indexed_first, sorted_first), # could fail if first unsorted is sorted first!\n",
"))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('Aaron Green', 7206)\n"
]
}
],
"source": [
"sorted_data = tuple(data[index] for index in indices)\n",
"print(sorted_data[FIRST])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"sorted_names, sorted_ages = tuple(zip(*sorted_data))\n",
"# sanity check\n",
"assert all((\n",
" sorted_names == tuple(sorted(unsorted_names)),\n",
" sorted_ages == tuple(unsorted_ages[index] for index in indices),\n",
"))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def sort_people(people):\n",
" \"\"\"Sort people by name.\n",
" \n",
" :people: tuple of (name, age,) tuples\"\"\"\n",
" unsorted_names, unsorted_ages = zip(*people)\n",
" sorted_names = sorted(unsorted_names)\n",
" indices = (unsorted_names.index(sorted_name) for sorted_name in sorted_names)\n",
" sorted_data = tuple(people[index] for index in indices)\n",
" return sorted_data"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"('Aaron Green', 7206)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sort_people(data)[FIRST]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 loops, best of 3: 2.33 ms per loop\n"
]
}
],
"source": [
"%timeit sort_people(data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Is it faster to then use that sorted tuple of names to then grab the ages?"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class Person(object):\n",
" def __init__(self, name, age):\n",
" self.name = name\n",
" self.age = age\n",
"\n",
" def __repr__(self):\n",
" return \"<name: {name}, age: {age}>\".format(name=self.name, age=self.age)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def byName_key(person):\n",
" return person.name"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"object_people = tuple([Person(name, age) for name, age in data])"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000 loops, best of 3: 239 µs per loop\n"
]
}
],
"source": [
"%timeit sorted(object_people, key=byName_key)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000 loops, best of 3: 203 µs per loop\n"
]
}
],
"source": [
"%timeit sorted([person.name for person in object_people])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment