Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save GenevieveBuckley/f9f8219de5c052c3deb234cc44ebc0a2 to your computer and use it in GitHub Desktop.
Save GenevieveBuckley/f9f8219de5c052c3deb234cc44ebc0a2 to your computer and use it in GitHub Desktop.
Dask task graph handling costs on the client
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "6313a4df",
"metadata": {},
"source": [
"# Dask task graph handling costs on the client"
]
},
{
"cell_type": "markdown",
"id": "08935a8f",
"metadata": {},
"source": [
"Example from the \"Doing Nothing Poorly: Accelerating Dask Scheduling\" workshop.\n",
"Dask Summit 2021.\n",
"\n",
"https://summit.dask.org/schedule/presentation/42/doing-nothing-poorly-accelerating-dask-scheduling/"
]
},
{
"cell_type": "markdown",
"id": "59946940",
"metadata": {},
"source": [
"[Link to slides - click here!](https://docs.google.com/presentation/d/e/2PACX-1vQB5mHjhi3f3AvVvLpdsnNqCxQDDlvKTR8cfinQRG3AbIPLBUaiSu6TROSZh7HRcN0UZ2uak_C-Q7FJ/pub?start=false&loop=false&delayms=3000#slide=id.gd9b9e2cb51_0_107)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "3151ec96",
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"import dask\n",
"from dask.datasets import timeseries"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "29d55731",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2021.04.1+25.ge9fdb758\n"
]
}
],
"source": [
"print(dask.__version__)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1f43d470",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 787 ms, sys: 925 ms, total: 1.71 s\n",
"Wall time: 655 ms\n"
]
}
],
"source": [
"# Create task graph on client\n",
"%time ddf = timeseries().shuffle(\"id\", shuffle=\"tasks\").head(compute=False)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c5126df5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 751 µs, sys: 377 µs, total: 1.13 ms\n",
"Wall time: 947 µs\n"
]
}
],
"source": [
"# Optimize\n",
"%time ddf_opt, = dask.optimize(ddf)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2348c87d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 4.21 ms, sys: 139 µs, total: 4.35 ms\n",
"Wall time: 4.25 ms\n"
]
}
],
"source": [
"%%time\n",
"# Serialize\n",
"byte_total = 0\n",
"for k, v in ddf_opt.__dask_graph__().items():\n",
" byte_total += len(pickle.dumps(k)) + len(pickle.dumps(v))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "58749d07",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'29.79 kiB'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Send to the scheduler\n",
"dask.utils.format_bytes(byte_total)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0c0a41a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
# Example from "Doing Nothing Poorly: Accelerating the Dask Scheduler" workshop
# Dask Summit 2021
# Task graph handling costs on the client
import pickle
import dask
from dask.datasets import timeseries
# Create dask task graph
%time ddf = timeseries().shuffle("id", shuffle="tasks").head(compute=False)
# Wall time: 4.01 s
# Optimize
%time ddf_opt, = dask.optimize(ddf)
# Wall time: 1.3 s
# Serialize
byte_total = 0
for k, v in ddf_opt.__dask_graph__().items():
byte_total += len(pickle.dumps(k)) + len(pickle.dumps(v))
# Wall time: 731 ms
# Send to the scheduler
dask.utils.format_bytes(byte_total)
# '15.88 MB' (Assume ~587 ms at 100MB/s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment