Skip to content

Instantly share code, notes, and snippets.

@alimanfoo
Created July 26, 2019 10:35
Show Gist options
  • Save alimanfoo/b074a7685c707af5a58fc0136c91eea7 to your computer and use it in GitHub Desktop.
Save alimanfoo/b074a7685c707af5a58fc0136c91eea7 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Client</h3>\n",
"<ul>\n",
" <li><b>Scheduler: </b>tcp://127.0.0.1:38871\n",
" <li><b>Dashboard: </b><a href='http://127.0.0.1:8787/status' target='_blank'>http://127.0.0.1:8787/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Cluster</h3>\n",
"<ul>\n",
" <li><b>Workers: </b>4</li>\n",
" <li><b>Cores: </b>8</li>\n",
" <li><b>Memory: </b>33.59 GB</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: scheduler='tcp://127.0.0.1:38871' processes=4 cores=8>"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from distributed import Client\n",
"client = Client()\n",
"client"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import dask.array as da\n",
"import zarr"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table> <thead> <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 8.00 kB </td> <td> 800 B </td></tr>\n",
" <tr><th> Shape </th><td> (1000,) </td> <td> (100,) </td></tr>\n",
" <tr><th> Count </th><td> 10 Tasks </td><td> 10 Chunks </td></tr>\n",
" <tr><th> Type </th><td> int64 </td><td> numpy.ndarray </td></tr>\n",
" </tbody></table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"170\" height=\"75\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"25\" x2=\"120\" y2=\"25\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"25\" style=\"stroke-width:2\" />\n",
" <line x1=\"12\" y1=\"0\" x2=\"12\" y2=\"25\" />\n",
" <line x1=\"24\" y1=\"0\" x2=\"24\" y2=\"25\" />\n",
" <line x1=\"36\" y1=\"0\" x2=\"36\" y2=\"25\" />\n",
" <line x1=\"48\" y1=\"0\" x2=\"48\" y2=\"25\" />\n",
" <line x1=\"60\" y1=\"0\" x2=\"60\" y2=\"25\" />\n",
" <line x1=\"72\" y1=\"0\" x2=\"72\" y2=\"25\" />\n",
" <line x1=\"84\" y1=\"0\" x2=\"84\" y2=\"25\" />\n",
" <line x1=\"96\" y1=\"0\" x2=\"96\" y2=\"25\" />\n",
" <line x1=\"108\" y1=\"0\" x2=\"108\" y2=\"25\" />\n",
" <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"25\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 120.000000,0.000000 120.000000,25.412617 0.000000,25.412617\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"60.000000\" y=\"45.412617\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1000</text>\n",
" <text x=\"140.000000\" y=\"12.706308\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,12.706308)\">1</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<randint, shape=(1000,), dtype=int64, chunksize=(100,)>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = da.random.randint(0, 4, size=(1000,), chunks=(100,))\n",
"x = x.rechunk((100,))\n",
"x"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Manual way"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<zarr.hierarchy.Group '/'>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"store = zarr.DirectoryStore('example.zarr')\n",
"# zarr.group is a convenience function, it will create a group if it doesn't exist yet,\n",
"# or open a group if does already exist\n",
"root = zarr.group(store=store)\n",
"root"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<zarr.hierarchy.Group '/'>"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# only opens an existing group\n",
"root = zarr.Group(store=store)\n",
"root"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<zarr.core.Array '/dest' (1000,) int64>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dest = root.empty('dest', shape=x.shape, chunks=x.chunksize, dtype=x.dtype, overwrite=True)\n",
"dest"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"x.store(dest, lock=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Slightly more convenient way"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"x.to_zarr(store, component='dest2')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<link rel=\"stylesheet\" href=\"//cdnjs.cloudflare.com/ajax/libs/jstree/3.3.3/themes/default/style.min.css\"/><div id=\"422c69d7-9a28-43b3-8f76-73549c2b2953\" class=\"zarr-tree\"><ul><li data-jstree='{\"type\": \"Group\"}' class='jstree-open'><span>/</span><ul><li data-jstree='{\"type\": \"Array\"}' class=''><span>dest (1000,) int64</span></li><li data-jstree='{\"type\": \"Array\"}' class=''><span>dest2 (1000,) int64</span></li></ul></li></ul></div>\n",
"<script>\n",
" if (!require.defined('jquery')) {\n",
" require.config({\n",
" paths: {\n",
" jquery: '//cdnjs.cloudflare.com/ajax/libs/jquery/1.12.1/jquery.min'\n",
" },\n",
" });\n",
" }\n",
" if (!require.defined('jstree')) {\n",
" require.config({\n",
" paths: {\n",
" jstree: '//cdnjs.cloudflare.com/ajax/libs/jstree/3.3.3/jstree.min'\n",
" },\n",
" });\n",
" }\n",
" require(['jstree'], function() {\n",
" $('#422c69d7-9a28-43b3-8f76-73549c2b2953').jstree({\n",
" types: {\n",
" Group: {\n",
" icon: \"fa fa-folder\"\n",
" },\n",
" Array: {\n",
" icon: \"fa fa-table\"\n",
" }\n",
" },\n",
" plugins: [\"types\"]\n",
" });\n",
" });\n",
"</script>\n"
],
"text/plain": [
"/\n",
" ├── dest (1000,) int64\n",
" └── dest2 (1000,) int64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"root.tree()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Not this way\n",
"\n",
"Does work, but not parallel."
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"dest[:] = 0"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"dest[:] = x"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 3, 2, 1, 1, 2, 1, 3, 3, 2, 1, 1, 3, 0, 1, 2, 2, 3, 3, 2, 1, 3,\n",
" 1, 0, 2, 3, 0, 2, 2, 2, 0, 2, 3, 0, 1, 1, 0, 2, 3, 0, 1, 1, 2, 3,\n",
" 2, 3, 3, 0, 3, 2, 1, 1, 2, 3, 0, 0, 0, 3, 1, 0, 2, 2, 0, 3, 3, 3,\n",
" 3, 0, 2, 2, 3, 2, 2, 1, 0, 2, 0, 1, 3, 3, 0, 1, 2, 1, 2, 2, 3, 2,\n",
" 0, 3, 2, 1, 1, 1, 2, 0, 0, 0, 3, 1, 1, 1, 1, 1, 3, 3, 3, 3, 1, 2,\n",
" 1, 3, 0, 2, 3, 0, 2, 3, 3, 1, 1, 2, 3, 2, 2, 0, 3, 3, 3, 1, 1, 1,\n",
" 1, 0, 3, 3, 3, 0, 0, 1, 3, 1, 0, 0, 1, 1, 1, 1, 0, 2, 1, 0, 1, 3,\n",
" 0, 3, 0, 0, 1, 0, 2, 2, 3, 1, 2, 0, 2, 3, 2, 2, 1, 2, 3, 1, 0, 2,\n",
" 0, 2, 1, 2, 2, 0, 1, 0, 0, 0, 2, 0, 2, 2, 3, 1, 2, 2, 1, 2, 2, 2,\n",
" 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 1, 2, 0, 3, 0, 0, 0, 2, 1, 3, 0, 1,\n",
" 2, 0, 2, 1, 0, 2, 2, 3, 3, 0, 1, 1, 1, 1, 2, 2, 2, 0, 1, 1, 3, 1,\n",
" 0, 1, 2, 1, 3, 2, 3, 0, 2, 3, 0, 0, 0, 0, 1, 0, 0, 1, 3, 1, 3, 2,\n",
" 3, 3, 3, 2, 1, 1, 2, 0, 3, 1, 3, 3, 0, 1, 1, 3, 1, 0, 2, 1, 3, 2,\n",
" 2, 0, 1, 3, 2, 3, 0, 1, 3, 0, 0, 3, 3, 3, 1, 2, 1, 1, 2, 0, 0, 2,\n",
" 3, 1, 2, 0, 0, 3, 0, 1, 0, 0, 2, 3, 0, 1, 0, 2, 2, 3, 3, 3, 1, 3,\n",
" 1, 2, 3, 1, 3, 0, 0, 0, 2, 2, 3, 3, 1, 0, 2, 2, 1, 1, 0, 0, 3, 3,\n",
" 0, 1, 0, 0, 3, 3, 2, 3, 1, 3, 1, 3, 0, 2, 0, 2, 0, 0, 1, 0, 0, 1,\n",
" 1, 3, 1, 0, 3, 0, 3, 0, 1, 3, 0, 0, 3, 0, 1, 0, 1, 1, 2, 0, 0, 0,\n",
" 1, 3, 1, 2, 1, 0, 1, 0, 0, 2, 0, 0, 3, 2, 3, 1, 3, 1, 0, 3, 2, 3,\n",
" 1, 3, 2, 0, 2, 1, 2, 2, 3, 0, 2, 3, 1, 1, 2, 0, 2, 1, 3, 2, 1, 1,\n",
" 3, 0, 3, 2, 3, 0, 0, 0, 1, 3, 3, 2, 0, 0, 3, 1, 2, 3, 3, 2, 0, 3,\n",
" 1, 0, 0, 0, 2, 3, 1, 1, 2, 1, 2, 2, 2, 1, 3, 1, 2, 2, 0, 1, 2, 0,\n",
" 2, 0, 3, 2, 1, 1, 1, 0, 2, 2, 3, 2, 3, 1, 2, 0, 1, 0, 2, 3, 1, 3,\n",
" 2, 1, 3, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 0, 2, 2, 1, 3, 1, 2, 0,\n",
" 0, 0, 0, 1, 2, 3, 3, 2, 3, 2, 3, 0, 1, 2, 2, 3, 0, 3, 3, 3, 0, 0,\n",
" 1, 0, 1, 1, 0, 0, 0, 0, 2, 3, 0, 2, 2, 1, 1, 2, 2, 1, 3, 1, 1, 2,\n",
" 0, 3, 1, 2, 3, 3, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 0, 3, 2, 1, 2, 1,\n",
" 1, 0, 3, 0, 1, 2, 3, 0, 2, 0, 3, 2, 3, 1, 0, 1, 3, 2, 3, 0, 2, 3,\n",
" 0, 2, 2, 2, 1, 0, 1, 3, 1, 3, 1, 2, 3, 3, 0, 0, 0, 0, 3, 0, 3, 2,\n",
" 2, 2, 0, 0, 3, 0, 0, 2, 3, 0, 3, 0, 0, 1, 2, 2, 3, 0, 3, 3, 2, 0,\n",
" 3, 1, 3, 0, 1, 3, 2, 0, 0, 3, 3, 3, 2, 0, 3, 0, 0, 0, 2, 2, 0, 2,\n",
" 2, 2, 1, 0, 2, 1, 0, 2, 0, 3, 0, 2, 1, 3, 3, 2, 2, 0, 2, 2, 3, 1,\n",
" 1, 3, 3, 3, 3, 3, 0, 2, 0, 0, 2, 1, 3, 0, 1, 1, 0, 0, 1, 1, 3, 0,\n",
" 3, 0, 2, 1, 0, 3, 1, 0, 0, 2, 3, 2, 0, 2, 0, 0, 2, 3, 1, 0, 1, 3,\n",
" 0, 3, 1, 0, 2, 2, 0, 1, 0, 0, 3, 3, 1, 0, 2, 1, 3, 3, 3, 3, 1, 0,\n",
" 1, 1, 3, 0, 3, 0, 1, 2, 2, 1, 0, 2, 2, 1, 2, 3, 1, 2, 1, 3, 1, 1,\n",
" 2, 2, 1, 3, 2, 3, 3, 3, 1, 2, 2, 0, 2, 3, 2, 2, 0, 3, 3, 1, 0, 3,\n",
" 0, 3, 2, 2, 0, 2, 3, 3, 0, 2, 0, 1, 1, 0, 1, 3, 0, 3, 3, 0, 0, 2,\n",
" 1, 0, 3, 0, 2, 1, 2, 1, 0, 2, 3, 1, 3, 2, 1, 1, 3, 0, 3, 2, 1, 3,\n",
" 3, 2, 1, 1, 1, 1, 3, 0, 1, 0, 3, 1, 3, 1, 0, 3, 3, 2, 0, 1, 2, 0,\n",
" 3, 2, 0, 2, 0, 3, 0, 0, 3, 1, 3, 2, 2, 2, 2, 1, 1, 0, 1, 1, 0, 1,\n",
" 3, 3, 2, 2, 0, 0, 2, 2, 1, 2, 3, 0, 0, 3, 2, 3, 1, 0, 3, 2, 1, 1,\n",
" 0, 2, 1, 0, 3, 1, 0, 0, 1, 0, 2, 1, 3, 1, 0, 0, 1, 3, 1, 3, 1, 0,\n",
" 2, 0, 3, 1, 2, 0, 2, 0, 1, 3, 1, 1, 0, 3, 1, 1, 0, 2, 0, 1, 2, 2,\n",
" 0, 0, 0, 2, 0, 3, 2, 1, 1, 1, 2, 1, 0, 0, 1, 3, 0, 2, 1, 1, 3, 3,\n",
" 1, 0, 3, 1, 0, 1, 3, 1, 2, 2])"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dest[:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"jupytext": {
"text_representation": {
"extension": ".py",
"format_name": "percent",
"format_version": "1.2",
"jupytext_version": "1.1.7"
}
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment