Skip to content

Instantly share code, notes, and snippets.

@GenevieveBuckley
Created August 7, 2020 15:16
Show Gist options
  • Save GenevieveBuckley/38d8183d510b1938d40ba4e5246ae67c to your computer and use it in GitHub Desktop.
Save GenevieveBuckley/38d8183d510b1938d40ba4e5246ae67c to your computer and use it in GitHub Desktop.
dask bug in meta (map_blocks vs map_overlap)
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Untitled2.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "IwFzw9-ydIjf",
"colab_type": "code",
"colab": {}
},
"source": [
"import dask.array as da\n",
"import cupy as cp"
],
"execution_count": 1,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "qCqopm8jdO-P",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 198
},
"outputId": "728926de-8206-455c-d12f-e3909cfa0417"
},
"source": [
"arr = da.from_array(cp.random.random((10, 10)), chunks=5)\n",
"arr"
],
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 800 B </td> <td> 200 B </td></tr>\n",
" <tr><th> Shape </th><td> (10, 10) </td> <td> (5, 5) </td></tr>\n",
" <tr><th> Count </th><td> 5 Tasks </td><td> 4 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float64 </td><td> cupy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"60\" x2=\"120\" y2=\"60\" />\n",
" <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
" <line x1=\"60\" y1=\"0\" x2=\"60\" y2=\"120\" />\n",
" <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 120.000000,0.000000 120.000000,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >10</text>\n",
" <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,60.000000)\">10</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<array, shape=(10, 10), dtype=float64, chunksize=(5, 5), chunktype=cupy.ndarray>"
]
},
"metadata": {
"tags": []
},
"execution_count": 2
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "LgH2BgiMdoJV",
"colab_type": "code",
"colab": {}
},
"source": [
"def plus_one(arr):\n",
" return arr + 1"
],
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "ypBiS3-4dtKW",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 198
},
"outputId": "b75a1d85-dc15-4aa6-eb2b-2f61a33ca816"
},
"source": [
"result_1 = arr.map_blocks(plus_one, meta=arr._meta)\n",
"result_1.compute()\n",
"result_1"
],
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 800 B </td> <td> 200 B </td></tr>\n",
" <tr><th> Shape </th><td> (10, 10) </td> <td> (5, 5) </td></tr>\n",
" <tr><th> Count </th><td> 9 Tasks </td><td> 4 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float64 </td><td> cupy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"60\" x2=\"120\" y2=\"60\" />\n",
" <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
" <line x1=\"60\" y1=\"0\" x2=\"60\" y2=\"120\" />\n",
" <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 120.000000,0.000000 120.000000,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >10</text>\n",
" <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,60.000000)\">10</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<plus_one, shape=(10, 10), dtype=float64, chunksize=(5, 5), chunktype=cupy.ndarray>"
]
},
"metadata": {
"tags": []
},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "j0vExcxxd6Tk",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 198
},
"outputId": "4d85193d-8068-42c2-9ce4-79ae924af411"
},
"source": [
"result_2 = arr.map_overlap(plus_one, meta=arr._meta, depth=0)\n",
"result_2.compute()\n",
"result_2"
],
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 800 B </td> <td> 200 B </td></tr>\n",
" <tr><th> Shape </th><td> (10, 10) </td> <td> (5, 5) </td></tr>\n",
" <tr><th> Count </th><td> 25 Tasks </td><td> 4 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float64 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"60\" x2=\"120\" y2=\"60\" />\n",
" <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
" <line x1=\"60\" y1=\"0\" x2=\"60\" y2=\"120\" />\n",
" <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 120.000000,0.000000 120.000000,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >10</text>\n",
" <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,60.000000)\">10</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<_trim, shape=(10, 10), dtype=float64, chunksize=(5, 5), chunktype=numpy.ndarray>"
]
},
"metadata": {
"tags": []
},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "t_1cQZFceAvc",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "5d146cbd-f6cb-42f9-a531-113e61e41b2e"
},
"source": [
"print(type(result_1._meta)) # expect cupy.ndarray, no suproses here"
],
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"text": [
"<class 'cupy.core.core.ndarray'>\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "1uzJh4DleaMO",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "e21de7b2-5283-4ead-f00c-783f1e3b5252"
},
"source": [
"print(type(result_2._meta)) # expect cupy.ndarray, instead see numpy array reported"
],
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"text": [
"<class 'numpy.ndarray'>\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "L4L_5Te5emBf",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment