Skip to content

Instantly share code, notes, and snippets.

@TAdeJong
Last active November 9, 2023 09:32
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TAdeJong/abc70b33f54c4230e3da1119c7a20609 to your computer and use it in GitHub Desktop.
Save TAdeJong/abc70b33f54c4230e3da1119c7a20609 to your computer and use it in GitHub Desktop.
A simple notebook show casing a quick hack to visualize a dask array computation as it completes.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dask array visualization during compute\n",
"\n",
"This notebook shows a quick hack to visualize a Dask array as computation progresses. It uses the futures in the return of a `.persist()` call. Only works for end results that are small enough to fit in local memory of course! The plotting should be possible in a much nicer way, for example using Bokeh, but I am more familiar with matplotlib, so this is a quick first proof of principle. First needed imports:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import dask\n",
"import dask.array as da\n",
"from dask.distributed import Client, as_completed\n",
"from IPython.display import display, clear_output\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Client</h3>\n",
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
" <li><b>Scheduler: </b>tcp://127.0.0.1:36659</li>\n",
" <li><b>Dashboard: </b><a href='http://127.0.0.1:8787/status' target='_blank'>http://127.0.0.1:8787/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Cluster</h3>\n",
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
" <li><b>Workers: </b>1</li>\n",
" <li><b>Cores: </b>24</li>\n",
" <li><b>Memory: </b>33.67 GB</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: 'tcp://127.0.0.1:36659' processes=1 threads=24, memory=33.67 GB>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Only works with the distributed scheduler. Open the Dashboard to compare.\n",
"Client(n_workers=1, nthreads=1)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Create some work\n",
"D = da.random.random((50,80,8000), chunks=(5,4,2000))\n",
"Dt = D * D\n",
"Dt = Dt.std(axis=-1)\n",
"#Dt.__dask_graph__().layers"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#Definition of the useful function\n",
"def persist_and_visualize(dA):\n",
" A = dA.persist()\n",
" res = np.full(A.shape, np.nan)\n",
" chunksize= np.array(A.chunksize)\n",
" futures = next(iter(A.__dask_graph__().layers.values())).values()\n",
" fig, ax = plt.subplots()\n",
" for batch in as_completed(futures, with_results=True).batches():\n",
" for future, result in batch:\n",
" index = np.array(future.key[1:])*chunksize\n",
" res[tuple(slice(i,j) for i,j in zip(tuple(index), tuple(index+chunksize)))] = result\n",
" ax.imshow(res)\n",
" display(fig)\n",
" clear_output(wait=True)\n",
" plt.pause(0.01)\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Actual live visualization of results as they come in now:\n",
"persist_and_visualize(Dt);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:root] *",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
numpy
matplotlib
ipywidgets
dask
distributed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment