Skip to content

Instantly share code, notes, and snippets.

@shoyer
Last active August 17, 2018 19:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shoyer/7a29cc510db06310b370cd4ac537a361 to your computer and use it in GitHub Desktop.
Save shoyer/7a29cc510db06310b370cd4ac537a361 to your computer and use it in GitHub Desktop.
Dask Task class benchmark.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Dask Task class benchmark.ipynb",
"version": "0.3.2",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"[View in Colaboratory](https://colab.research.google.com/gist/shoyer/7a29cc510db06310b370cd4ac537a361/dask-task-class-benchmark.ipynb)"
]
},
{
"metadata": {
"id": "bhCKv70oKkaA",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"## Task classes"
]
},
{
"metadata": {
"id": "gKlRkBE3tcl_",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"outputId": "9d50d274-2440-4db3-a1d5-617dce72332c"
},
"cell_type": "code",
"source": [
"! pip3 install cython"
],
"execution_count": 82,
"outputs": [
{
"output_type": "stream",
"text": [
"Requirement already satisfied: cython in /usr/local/lib/python3.6/dist-packages (0.28.5)\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "odTEEyOft6hF",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 53
},
"outputId": "57251fc9-6bf4-40aa-839b-56a63acc95da"
},
"cell_type": "code",
"source": [
"%load_ext Cython"
],
"execution_count": 83,
"outputs": [
{
"output_type": "stream",
"text": [
"The Cython extension is already loaded. To reload it, use:\n",
" %reload_ext Cython\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "U619cqIotI1W",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"class Task(object):\n",
" __slots__ = (\"function\", \"args\", \"kwargs\", \"annotations\")\n",
" def __init__(self, function, *args, **kwargs):\n",
" self.function = function\n",
" self.args = args\n",
" self.annotations = kwargs.pop(\"annotations\", None)\n",
" self.kwargs = kwargs\n",
" def __getstate__(self):\n",
" return (self.function, self.args, self.kwargs, self.annotations)\n",
" def __setstate__(self, state):\n",
" self.function = state[0]\n",
" self.args = state[1]\n",
" self.kwargs = state[2]\n",
" self.annotations = state[3]\n",
" def __repr__(self):\n",
" details = \", \".join(\"%s=%s\" % (n, repr(self[i]))\n",
" for i, n in enumerate(self.__slots__)\n",
" if self[i])\n",
" return 'Task({})'.format(details)"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "bBnE5iuztn2I",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"%%cython\n",
"\n",
"cdef class CythonTask:\n",
" cdef readonly object function\n",
" cdef readonly tuple args\n",
" cdef readonly dict kwargs\n",
" cdef readonly dict annotations\n",
"\n",
" def __init__(self, function, *args, **kwargs):\n",
" self.function = function\n",
" self.args = args\n",
" self.annotations = kwargs.pop(\"annotations\", None)\n",
" self.kwargs = kwargs\n",
" \n",
" def __getstate__(self):\n",
" return (self.function, self.args, self.kwargs, self.annotations)\n",
"\n",
" def __setstate__(self, state):\n",
" self.function = state[0]\n",
" self.args = state[1]\n",
" self.kwargs = state[2]\n",
" self.annotations = state[3]\n",
"\n",
" def __repr__(self):\n",
" details = \", \".join(\"%s=%s\" % (n, repr(self[i]))\n",
" for i, n in enumerate(self.__slots__)\n",
" if self[i])\n",
" return 'Task({})'.format(details)\n",
"\n",
" \n",
"cdef class CythonTask2:\n",
" cdef readonly object function\n",
" cdef readonly tuple args\n",
" cdef readonly dict kwargs\n",
" cdef readonly dict annotations\n",
"\n",
" def __init__(self, function, args=(), kwargs={}, annotations={}):\n",
" self.function = function\n",
" self.args = args\n",
" self.kwargs = kwargs\n",
" self.annotations = annotations\n",
"\n",
" def __reduce__(self):\n",
" state = (self.function, self.args, self.kwargs, self.annotations)\n",
" return (CythonTask2, state)\n",
"\n",
" def __repr__(self):\n",
" details = \", \".join(\"%s=%s\" % (n, repr(self[i]))\n",
" for i, n in enumerate(self.__slots__)\n",
" if self[i])\n",
" return 'Task({})'.format(details)\n"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "9UJbt4A4xQ0h",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"## Creation timings"
]
},
{
"metadata": {
"id": "nY-3IKPZtfvg",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"def f(x):\n",
" return x + 1"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "hqkPDkIDD0jH",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 107
},
"outputId": "a4c183b7-52ac-4a14-c4ed-645658f37b5a"
},
"cell_type": "code",
"source": [
"%timeit -r 30 [(f, i, {\"bob\": \"foo\"}, {\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
"%timeit -r 30 [Task(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
"%timeit -r 30 [CythonTask(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
"%timeit -r 30 [CythonTask2(f, (i,), {\"bob\": \"foo\"}, annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"The slowest run took 4.55 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
"1000 loops, best of 30: 343 µs per loop\n",
"1000 loops, best of 30: 1.14 ms per loop\n",
"1000 loops, best of 30: 635 µs per loop\n",
"1000 loops, best of 30: 548 µs per loop\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "hd7Q0kd2xSfF",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"## Pickle timings"
]
},
{
"metadata": {
"id": "8H9wZd0PvpPW",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"import pickle"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "aN-6FYL9TnIR",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 89
},
"outputId": "f6a07313-27c7-4cda-c1ee-cfff4bae44f2"
},
"cell_type": "code",
"source": [
"tasks = [(f, i, {\"bob\": \"foo\"}, {\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
"%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)\n",
"\n",
"tasks = [Task(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
"%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)\n",
"\n",
"tasks = [CythonTask(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
"%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)\n",
"\n",
"tasks = [CythonTask2(f, (i,), {\"bob\": \"foo\"}, annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
"%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)"
],
"execution_count": 88,
"outputs": [
{
"output_type": "stream",
"text": [
"1000 loops, best of 30: 500 µs per loop\n",
"1000 loops, best of 30: 1.55 ms per loop\n",
"1000 loops, best of 30: 1.31 ms per loop\n",
"1000 loops, best of 30: 1.17 ms per loop\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "tvqiRlF-HNM_",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment