Last active
August 17, 2018 19:20
-
-
Save shoyer/7a29cc510db06310b370cd4ac537a361 to your computer and use it in GitHub Desktop.
Dask Task class benchmark.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Dask Task class benchmark.ipynb", | |
"version": "0.3.2", | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"[View in Colaboratory](https://colab.research.google.com/gist/shoyer/7a29cc510db06310b370cd4ac537a361/dask-task-class-benchmark.ipynb)" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "bhCKv70oKkaA", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"## Task classes" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "gKlRkBE3tcl_", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
}, | |
"outputId": "9d50d274-2440-4db3-a1d5-617dce72332c" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"! pip3 install cython" | |
], | |
"execution_count": 82, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied: cython in /usr/local/lib/python3.6/dist-packages (0.28.5)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "odTEEyOft6hF", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 53 | |
}, | |
"outputId": "57251fc9-6bf4-40aa-839b-56a63acc95da" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"%load_ext Cython" | |
], | |
"execution_count": 83, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"The Cython extension is already loaded. To reload it, use:\n", | |
" %reload_ext Cython\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "U619cqIotI1W", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"class Task(object):\n", | |
" __slots__ = (\"function\", \"args\", \"kwargs\", \"annotations\")\n", | |
" def __init__(self, function, *args, **kwargs):\n", | |
" self.function = function\n", | |
" self.args = args\n", | |
" self.annotations = kwargs.pop(\"annotations\", None)\n", | |
" self.kwargs = kwargs\n", | |
" def __getstate__(self):\n", | |
" return (self.function, self.args, self.kwargs, self.annotations)\n", | |
" def __setstate__(self, state):\n", | |
" self.function = state[0]\n", | |
" self.args = state[1]\n", | |
" self.kwargs = state[2]\n", | |
" self.annotations = state[3]\n", | |
" def __repr__(self):\n", | |
" details = \", \".join(\"%s=%s\" % (n, repr(self[i]))\n", | |
" for i, n in enumerate(self.__slots__)\n", | |
" if self[i])\n", | |
" return 'Task({})'.format(details)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "bBnE5iuztn2I", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"%%cython\n", | |
"\n", | |
"cdef class CythonTask:\n", | |
" cdef readonly object function\n", | |
" cdef readonly tuple args\n", | |
" cdef readonly dict kwargs\n", | |
" cdef readonly dict annotations\n", | |
"\n", | |
" def __init__(self, function, *args, **kwargs):\n", | |
" self.function = function\n", | |
" self.args = args\n", | |
" self.annotations = kwargs.pop(\"annotations\", None)\n", | |
" self.kwargs = kwargs\n", | |
" \n", | |
" def __getstate__(self):\n", | |
" return (self.function, self.args, self.kwargs, self.annotations)\n", | |
"\n", | |
" def __setstate__(self, state):\n", | |
" self.function = state[0]\n", | |
" self.args = state[1]\n", | |
" self.kwargs = state[2]\n", | |
" self.annotations = state[3]\n", | |
"\n", | |
" def __repr__(self):\n", | |
" details = \", \".join(\"%s=%s\" % (n, repr(self[i]))\n", | |
" for i, n in enumerate(self.__slots__)\n", | |
" if self[i])\n", | |
" return 'Task({})'.format(details)\n", | |
"\n", | |
" \n", | |
"cdef class CythonTask2:\n", | |
" cdef readonly object function\n", | |
" cdef readonly tuple args\n", | |
" cdef readonly dict kwargs\n", | |
" cdef readonly dict annotations\n", | |
"\n", | |
" def __init__(self, function, args=(), kwargs={}, annotations={}):\n", | |
" self.function = function\n", | |
" self.args = args\n", | |
" self.kwargs = kwargs\n", | |
" self.annotations = annotations\n", | |
"\n", | |
" def __reduce__(self):\n", | |
" state = (self.function, self.args, self.kwargs, self.annotations)\n", | |
" return (CythonTask2, state)\n", | |
"\n", | |
" def __repr__(self):\n", | |
" details = \", \".join(\"%s=%s\" % (n, repr(self[i]))\n", | |
" for i, n in enumerate(self.__slots__)\n", | |
" if self[i])\n", | |
" return 'Task({})'.format(details)\n" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "9UJbt4A4xQ0h", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"## Creation timings" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "nY-3IKPZtfvg", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"def f(x):\n", | |
" return x + 1" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "hqkPDkIDD0jH", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 107 | |
}, | |
"outputId": "a4c183b7-52ac-4a14-c4ed-645658f37b5a" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"%timeit -r 30 [(f, i, {\"bob\": \"foo\"}, {\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n", | |
"%timeit -r 30 [Task(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n", | |
"%timeit -r 30 [CythonTask(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n", | |
"%timeit -r 30 [CythonTask2(f, (i,), {\"bob\": \"foo\"}, annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"The slowest run took 4.55 times longer than the fastest. This could mean that an intermediate result is being cached.\n", | |
"1000 loops, best of 30: 343 µs per loop\n", | |
"1000 loops, best of 30: 1.14 ms per loop\n", | |
"1000 loops, best of 30: 635 µs per loop\n", | |
"1000 loops, best of 30: 548 µs per loop\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "hd7Q0kd2xSfF", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"## Pickle timings" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "8H9wZd0PvpPW", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"import pickle" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "aN-6FYL9TnIR", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 89 | |
}, | |
"outputId": "f6a07313-27c7-4cda-c1ee-cfff4bae44f2" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"tasks = [(f, i, {\"bob\": \"foo\"}, {\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n", | |
"%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)\n", | |
"\n", | |
"tasks = [Task(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n", | |
"%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)\n", | |
"\n", | |
"tasks = [CythonTask(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n", | |
"%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)\n", | |
"\n", | |
"tasks = [CythonTask2(f, (i,), {\"bob\": \"foo\"}, annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n", | |
"%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)" | |
], | |
"execution_count": 88, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"1000 loops, best of 30: 500 µs per loop\n", | |
"1000 loops, best of 30: 1.55 ms per loop\n", | |
"1000 loops, best of 30: 1.31 ms per loop\n", | |
"1000 loops, best of 30: 1.17 ms per loop\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "tvqiRlF-HNM_", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment