shoyer/dask-task-class-benchmark.ipynb

## dask-task-class-benchmark.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Dask Task class benchmark.ipynb",
      "version": "0.3.2",
      "provenance": [],
      "collapsed_sections": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "[View in Colaboratory](https://colab.research.google.com/gist/shoyer/7a29cc510db06310b370cd4ac537a361/dask-task-class-benchmark.ipynb)"
      ]
    },
    {
      "metadata": {
        "id": "bhCKv70oKkaA",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "## Task classes"
      ]
    },
    {
      "metadata": {
        "id": "gKlRkBE3tcl_",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "9d50d274-2440-4db3-a1d5-617dce72332c"
      },
      "cell_type": "code",
      "source": [
        "! pip3 install cython"
      ],
      "execution_count": 82,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: cython in /usr/local/lib/python3.6/dist-packages (0.28.5)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "odTEEyOft6hF",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 53
        },
        "outputId": "57251fc9-6bf4-40aa-839b-56a63acc95da"
      },
      "cell_type": "code",
      "source": [
        "%load_ext Cython"
      ],
      "execution_count": 83,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "The Cython extension is already loaded. To reload it, use:\n",
            "  %reload_ext Cython\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "U619cqIotI1W",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "class Task(object):\n",
        "    __slots__ = (\"function\", \"args\", \"kwargs\", \"annotations\")\n",
        "    def __init__(self, function, *args, **kwargs):\n",
        "        self.function = function\n",
        "        self.args = args\n",
        "        self.annotations = kwargs.pop(\"annotations\", None)\n",
        "        self.kwargs = kwargs\n",
        "    def __getstate__(self):\n",
        "       return (self.function, self.args, self.kwargs, self.annotations)\n",
        "    def __setstate__(self, state):\n",
        "        self.function = state[0]\n",
        "        self.args = state[1]\n",
        "        self.kwargs = state[2]\n",
        "        self.annotations = state[3]\n",
        "    def __repr__(self):\n",
        "        details = \", \".join(\"%s=%s\" % (n, repr(self[i]))\n",
        "                            for i, n in enumerate(self.__slots__)\n",
        "                            if self[i])\n",
        "        return 'Task({})'.format(details)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "bBnE5iuztn2I",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "%%cython\n",
        "\n",
        "cdef class CythonTask:\n",
        "    cdef readonly object function\n",
        "    cdef readonly tuple args\n",
        "    cdef readonly dict kwargs\n",
        "    cdef readonly dict annotations\n",
        "\n",
        "    def __init__(self, function, *args, **kwargs):\n",
        "        self.function = function\n",
        "        self.args = args\n",
        "        self.annotations = kwargs.pop(\"annotations\", None)\n",
        "        self.kwargs = kwargs\n",
        "        \n",
        "    def __getstate__(self):\n",
        "        return (self.function, self.args, self.kwargs, self.annotations)\n",
        "\n",
        "    def __setstate__(self, state):\n",
        "        self.function = state[0]\n",
        "        self.args = state[1]\n",
        "        self.kwargs = state[2]\n",
        "        self.annotations = state[3]\n",
        "\n",
        "    def __repr__(self):\n",
        "        details = \", \".join(\"%s=%s\" % (n, repr(self[i]))\n",
        "                            for i, n in enumerate(self.__slots__)\n",
        "                            if self[i])\n",
        "        return 'Task({})'.format(details)\n",
        "\n",
        "      \n",
        "cdef class CythonTask2:\n",
        "    cdef readonly object function\n",
        "    cdef readonly tuple args\n",
        "    cdef readonly dict kwargs\n",
        "    cdef readonly dict annotations\n",
        "\n",
        "    def __init__(self, function, args=(), kwargs={}, annotations={}):\n",
        "        self.function = function\n",
        "        self.args = args\n",
        "        self.kwargs = kwargs\n",
        "        self.annotations = annotations\n",
        "\n",
        "    def __reduce__(self):\n",
        "        state = (self.function, self.args, self.kwargs, self.annotations)\n",
        "        return (CythonTask2, state)\n",
        "\n",
        "    def __repr__(self):\n",
        "        details = \", \".join(\"%s=%s\" % (n, repr(self[i]))\n",
        "                            for i, n in enumerate(self.__slots__)\n",
        "                            if self[i])\n",
        "        return 'Task({})'.format(details)\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "9UJbt4A4xQ0h",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "## Creation timings"
      ]
    },
    {
      "metadata": {
        "id": "nY-3IKPZtfvg",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "def f(x):\n",
        "    return x + 1"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "hqkPDkIDD0jH",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 107
        },
        "outputId": "a4c183b7-52ac-4a14-c4ed-645658f37b5a"
      },
      "cell_type": "code",
      "source": [
        "%timeit -r 30 [(f, i, {\"bob\": \"foo\"}, {\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
        "%timeit -r 30 [Task(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
        "%timeit -r 30 [CythonTask(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
        "%timeit -r 30 [CythonTask2(f, (i,), {\"bob\": \"foo\"}, annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "The slowest run took 4.55 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
            "1000 loops, best of 30: 343 µs per loop\n",
            "1000 loops, best of 30: 1.14 ms per loop\n",
            "1000 loops, best of 30: 635 µs per loop\n",
            "1000 loops, best of 30: 548 µs per loop\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "hd7Q0kd2xSfF",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "## Pickle timings"
      ]
    },
    {
      "metadata": {
        "id": "8H9wZd0PvpPW",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "import pickle"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "aN-6FYL9TnIR",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 89
        },
        "outputId": "f6a07313-27c7-4cda-c1ee-cfff4bae44f2"
      },
      "cell_type": "code",
      "source": [
        "tasks = [(f, i, {\"bob\": \"foo\"}, {\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
        "%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)\n",
        "\n",
        "tasks = [Task(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
        "%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)\n",
        "\n",
        "tasks = [CythonTask(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
        "%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)\n",
        "\n",
        "tasks = [CythonTask2(f, (i,), {\"bob\": \"foo\"}, annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
        "%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)"
      ],
      "execution_count": 88,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "1000 loops, best of 30: 500 µs per loop\n",
            "1000 loops, best of 30: 1.55 ms per loop\n",
            "1000 loops, best of 30: 1.31 ms per loop\n",
            "1000 loops, best of 30: 1.17 ms per loop\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "tvqiRlF-HNM_",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "Dask Task class benchmark.ipynb",
	"version": "0.3.2",
	"provenance": [],
	"collapsed_sections": [],
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"[View in Colaboratory](https://colab.research.google.com/gist/shoyer/7a29cc510db06310b370cd4ac537a361/dask-task-class-benchmark.ipynb)"
	]
	},
	{
	"metadata": {
	"id": "bhCKv70oKkaA",
	"colab_type": "text"
	},
	"cell_type": "markdown",
	"source": [
	"## Task classes"
	]
	},
	{
	"metadata": {
	"id": "gKlRkBE3tcl_",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 35
	},
	"outputId": "9d50d274-2440-4db3-a1d5-617dce72332c"
	},
	"cell_type": "code",
	"source": [
	"! pip3 install cython"
	],
	"execution_count": 82,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Requirement already satisfied: cython in /usr/local/lib/python3.6/dist-packages (0.28.5)\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"id": "odTEEyOft6hF",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 53
	},
	"outputId": "57251fc9-6bf4-40aa-839b-56a63acc95da"
	},
	"cell_type": "code",
	"source": [
	"%load_ext Cython"
	],
	"execution_count": 83,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"The Cython extension is already loaded. To reload it, use:\n",
	" %reload_ext Cython\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"id": "U619cqIotI1W",
	"colab_type": "code",
	"colab": {}
	},
	"cell_type": "code",
	"source": [
	"class Task(object):\n",
	" __slots__ = (\"function\", \"args\", \"kwargs\", \"annotations\")\n",
	" def __init__(self, function, args, *kwargs):\n",
	" self.function = function\n",
	" self.args = args\n",
	" self.annotations = kwargs.pop(\"annotations\", None)\n",
	" self.kwargs = kwargs\n",
	" def __getstate__(self):\n",
	" return (self.function, self.args, self.kwargs, self.annotations)\n",
	" def __setstate__(self, state):\n",
	" self.function = state[0]\n",
	" self.args = state[1]\n",
	" self.kwargs = state[2]\n",
	" self.annotations = state[3]\n",
	" def __repr__(self):\n",
	" details = \", \".join(\"%s=%s\" % (n, repr(self[i]))\n",
	" for i, n in enumerate(self.__slots__)\n",
	" if self[i])\n",
	" return 'Task({})'.format(details)"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"metadata": {
	"id": "bBnE5iuztn2I",
	"colab_type": "code",
	"colab": {}
	},
	"cell_type": "code",
	"source": [
	"%%cython\n",
	"\n",
	"cdef class CythonTask:\n",
	" cdef readonly object function\n",
	" cdef readonly tuple args\n",
	" cdef readonly dict kwargs\n",
	" cdef readonly dict annotations\n",
	"\n",
	" def __init__(self, function, args, *kwargs):\n",
	" self.function = function\n",
	" self.args = args\n",
	" self.annotations = kwargs.pop(\"annotations\", None)\n",
	" self.kwargs = kwargs\n",
	" \n",
	" def __getstate__(self):\n",
	" return (self.function, self.args, self.kwargs, self.annotations)\n",
	"\n",
	" def __setstate__(self, state):\n",
	" self.function = state[0]\n",
	" self.args = state[1]\n",
	" self.kwargs = state[2]\n",
	" self.annotations = state[3]\n",
	"\n",
	" def __repr__(self):\n",
	" details = \", \".join(\"%s=%s\" % (n, repr(self[i]))\n",
	" for i, n in enumerate(self.__slots__)\n",
	" if self[i])\n",
	" return 'Task({})'.format(details)\n",
	"\n",
	" \n",
	"cdef class CythonTask2:\n",
	" cdef readonly object function\n",
	" cdef readonly tuple args\n",
	" cdef readonly dict kwargs\n",
	" cdef readonly dict annotations\n",
	"\n",
	" def __init__(self, function, args=(), kwargs={}, annotations={}):\n",
	" self.function = function\n",
	" self.args = args\n",
	" self.kwargs = kwargs\n",
	" self.annotations = annotations\n",
	"\n",
	" def __reduce__(self):\n",
	" state = (self.function, self.args, self.kwargs, self.annotations)\n",
	" return (CythonTask2, state)\n",
	"\n",
	" def __repr__(self):\n",
	" details = \", \".join(\"%s=%s\" % (n, repr(self[i]))\n",
	" for i, n in enumerate(self.__slots__)\n",
	" if self[i])\n",
	" return 'Task({})'.format(details)\n"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"metadata": {
	"id": "9UJbt4A4xQ0h",
	"colab_type": "text"
	},
	"cell_type": "markdown",
	"source": [
	"## Creation timings"
	]
	},
	{
	"metadata": {
	"id": "nY-3IKPZtfvg",
	"colab_type": "code",
	"colab": {}
	},
	"cell_type": "code",
	"source": [
	"def f(x):\n",
	" return x + 1"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"metadata": {
	"id": "hqkPDkIDD0jH",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 107
	},
	"outputId": "a4c183b7-52ac-4a14-c4ed-645658f37b5a"
	},
	"cell_type": "code",
	"source": [
	"%timeit -r 30 [(f, i, {\"bob\": \"foo\"}, {\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
	"%timeit -r 30 [Task(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
	"%timeit -r 30 [CythonTask(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
	"%timeit -r 30 [CythonTask2(f, (i,), {\"bob\": \"foo\"}, annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]"
	],
	"execution_count": 0,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"The slowest run took 4.55 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
	"1000 loops, best of 30: 343 µs per loop\n",
	"1000 loops, best of 30: 1.14 ms per loop\n",
	"1000 loops, best of 30: 635 µs per loop\n",
	"1000 loops, best of 30: 548 µs per loop\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"id": "hd7Q0kd2xSfF",
	"colab_type": "text"
	},
	"cell_type": "markdown",
	"source": [
	"## Pickle timings"
	]
	},
	{
	"metadata": {
	"id": "8H9wZd0PvpPW",
	"colab_type": "code",
	"colab": {}
	},
	"cell_type": "code",
	"source": [
	"import pickle"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"metadata": {
	"id": "aN-6FYL9TnIR",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 89
	},
	"outputId": "f6a07313-27c7-4cda-c1ee-cfff4bae44f2"
	},
	"cell_type": "code",
	"source": [
	"tasks = [(f, i, {\"bob\": \"foo\"}, {\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
	"%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)\n",
	"\n",
	"tasks = [Task(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
	"%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)\n",
	"\n",
	"tasks = [CythonTask(f, i, bob=\"foo\", annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
	"%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)\n",
	"\n",
	"tasks = [CythonTask2(f, (i,), {\"bob\": \"foo\"}, annotations={\"resource\": \"GPU\", \"size\": i}) for i in range(1000)]\n",
	"%timeit -r 30 pickle.dumps(tasks, protocol=pickle.HIGHEST_PROTOCOL)"
	],
	"execution_count": 88,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"1000 loops, best of 30: 500 µs per loop\n",
	"1000 loops, best of 30: 1.55 ms per loop\n",
	"1000 loops, best of 30: 1.31 ms per loop\n",
	"1000 loops, best of 30: 1.17 ms per loop\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"id": "tvqiRlF-HNM_",
	"colab_type": "code",
	"colab": {}
	},
	"cell_type": "code",
	"source": [
	""
	],
	"execution_count": 0,
	"outputs": []
	}
	]
	}