Skip to content

Instantly share code, notes, and snippets.

@nvictus
Forked from mrocklin/zarr-s3.ipynb
Created June 19, 2017 19:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nvictus/a4ebebc86b6c9e4653af6e211f7d735d to your computer and use it in GitHub Desktop.
Save nvictus/a4ebebc86b6c9e4653af6e211f7d735d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Use Zarr to store NDArrays in S3\n",
"\n",
"* Zarr: NDArray storage in any dict-like object\n",
"* S3FS: Provides dict-like object for S3 \n",
"* Dask.array: Parallel and distributed arrays"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from s3fs import S3Map\n",
"d = S3Map('zarr-test-1')\n",
"d.clear()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from collections import MutableMapping\n",
"isinstance(d, MutableMapping)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from zarr import empty\n",
"z = empty(shape=(100, 100), chunks=(25, 25), dtype='f4', store=d, \n",
" compression=None)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import dask.array as da\n",
"import dask\n",
"x = da.random.random(size=z.shape, chunks=z.chunks).astype(z.dtype)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 206 ms, sys: 41.4 ms, total: 247 ms\n",
"Wall time: 269 ms\n"
]
}
],
"source": [
"%%time\n",
"x.store(z, lock=False)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"xx = da.from_array(z, z.chunks)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 161 ms, sys: 14.7 ms, total: 176 ms\n",
"Wall time: 141 ms\n"
]
},
{
"data": {
"text/plain": [
"5006.5146"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time xx.sum().compute()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['0.0',\n",
" '0.1',\n",
" '0.2',\n",
" '0.3',\n",
" '1.0',\n",
" '1.1',\n",
" '1.2',\n",
" '1.3',\n",
" '2.0',\n",
" '2.1',\n",
" '2.2',\n",
" '2.3',\n",
" '3.0',\n",
" '3.1',\n",
" '3.2',\n",
" '3.3',\n",
" 'attrs',\n",
" 'meta']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(d)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"b'\\x04px?\\x83/o?\\x05\\x10$?\\xf9\\x17A>6\\xb7\\xfd>\\xf6\\xf3\\x85>\\x87\\x13\\xe9=\\x9d\\x98\\xba>\\x9f2q?:\\xbeb?\\x8fVE?\\xc0ID>\\x05\\xbc\\x05>\\xeb\\x027?\\xcf\\xbb\\xaf>\\x19\\xc8o?\\xce\\x93\\x1f>\\x9c\\xb0+?\\xaa\\xcdU?\\x1fR\\xac>>\\xb2\\x12?\\xe9Kk?g\\xb9\\xa6>\\xaagG?\\xe1_\\x1d?\\xdd(\\x9f>\\xe1v\\x81<Z\\xf6\\xa8<\\xaa\\xf4\\xec;\\xa1P\\xc2>3\\x89d?j\\xbc\\x04?3@\\x06>&\\x7f\\x05?x\\xe7s>\\xc2jO?\\xf5\\\\\\x97>\\x86\\xbe=?=A\\xac>\\xbd\\x16p?x4\\x11?H\\x86W?\\x02\\xe7\\x96>w\\x08\\xee>,\\x8b.=\\r\\xd2\\x13>\\x8e\\xee\\x1d?J\\x0c\\x0f=lnK?L[v>\\x13\\x17\\x0f>)S\\xa2=\\xd3\\xb8F>\\xb3\\xc8?>`\\x01-?4\\xa4\\xf1=\\x10}x?Eu\\xa0>~G\\xd0>\\x12\\xb0\\x04?\\xa8r<?}\\x9b\\xe9>V5\\x18?\\x13\\xd9d?Z2\\xc0>q\\x112?\\x9b\\x19\\xda>\\xa7\\x9a\\xf5>\\xcd_$>\\xe6\\xc70>\\xe5\\xb0j?I\\t\\xba=\\xc4\\x8b]?Q\\xbb\\x15>\\xa08/?7,\\xfb>o\\xe0F?R|J?\\xe3\\x8eS>\\x1e\\x90t?y\\xd3\\xf0>k\\xcd\\x05?\\x934\\xde=\\x84\\x90\\x8f>j\\xf0=?8\\x9a\\xca>/\\xc7\\x00?Dz\\xe4>\\xd0@O?\\\\\\xc6\\xcc>\\xd1\\xb3\\xc6>X{\\\\?{.g?\\xc1\\xb5~=s/\\xb4>\\x19\\x81\\xb5=\\xd8{j>\\x1d\\xb8\\x8d>G\\x00\\x17?\\xe6\\xba\\xdc>\\xf8\\x0b\\x15?j~W?c#\\xd3>$\\t\\xed>\\xeeE\\x1b>\\xcb\\xe8\\x14?TE\\x84>\\x95Y\\xcd>l@\\xf1>_[Z?\\x90{~=\\x91\\x04\\x18?`W\\x11?D\\xcf;?\\xff\\x07\\x15=\\x18\\xbb|?\\xec\\x15?=.\\x9f\\x17?@\\xb0\\x87>%\\x85\\x19?\\xf3\\xc7\\xd1>Q\\x10\\n?\\xd6%\\xac>B\\x9e\\xd8>\\x12\\x167?\\xe3\\xe0G?J_\\x08?\\xcc?\\x89>Px\\xbe>\\x8ewt?\\xa0J\\x96>\\x1e\\x156?\\x88\\xfad?u\\xfc3?\\xd9\\x90\\x11>\\xd30&?\\x03\\xed\\x1b?\\x8a\\xdd\\x0e>-\\xc0\\x12?o\\xde\\x9e>\\xc2\\x8e\\x06?\\x10\\xa4#?$\\xb2\\xa3=m\\x0ck?/\\xad\\xcf;\\xd9\\x87L>z|\\x81>a\\xfd\\x8f>\\xfa\\xc4\\x16?\\xc7\\xae\\\\?\\xabh\\xdf>\\xfe\\xea\\x17?2\\xfb\\x19?_\\xae\\x86>\\xed\\x07D>\\x13\\xbe\\x13?\\x18\\x93\\x91>E^l?\\xe8\\x06Y?\\xc0o\\xdb>N\\xef\\x7f?\\xee(\\x8e=\\xd7\\x9b\\x1b?\\rMe?so~?z\\x00O?\\x97=\\xfd<\\xdfL\\xf2>\\xed\\x80\\xa7>\\xbesT=\\x9a\\xba\\x8d=\\x92Q\\xbd>Q\\x00\\xfa>\\x03=6?y\\x1d\\xd4>\\x08\\x1cJ?\\x9f\\xe8\\xf4=\\x0e\\x12A?\\x04m\\x03?\\xeb\\xa3u?\\xb9X\\xda>\\xfc\\xe8\\xba=-_Q>\\x83_\\x07?\\x16\\xf8P?\\xbd\\xd6\\x1d?[\\xdb\\xca=\\xfd\\xc47?\\xf6#B?K\\xd5\\x7f??\\x06\\x15=!\"b?\\xa9\\xf2\\x10?n\\xb1\\x14>4uR?n\\xea>=\\x91\\x16M?\\xd7P[>(p\\x97>\\xa9\\xe6\\x87=\\xa8a!?{[1?\\xd8\\xf2\\xff>Q~\\x15?\\xb2\\x81O?%[\\xa2=\\xff\\xce\\xb2>\\x80|H?\\xe8\\x99\\x10?\\x88\\xe7K:Ahz?z\\xd5e?\\xf2\\xe1\\xa7<\\x8b\\x16V>W\\xf2s?j\\xbc\\x89>\\xecT;>\\xbcL`?\\xb6\\x13c??\\xdcM?\\xb0:o?\\xc4\\x181?>\\x1f\\xd2>WB\\xef>N\\xb4Q?e|)>\\xafX\\x88>\\xaed!=\\\\\\xf5]?a\\x173?P0<=\\xeeof?_\\x15~?\\xe8\"\\xa3=\\xeeY\\xd4>\\x98\\x92g?_\\xf1\\xf0>\\xf8gA?K\\xa5H>!\\xb6v?\\x06\\xe2\\x1f?h\\xba\\xac>T\\xdc\\xa1>\\x085\\xf5=\\xc2\\x15S>\\x83\\xcb]?\\xfd,\\xa5>\\xe0\\xd6\\x05?M\\xba@?S\\x02\\x03?\\xa7N\\x98>\\x1f\\xfb\\xe3=7\\x10\\xe2>\\xaa\\xcb\\x15>\\xc3eM?\\xea\\x06O=\\x0bU\\x1d=\\xe1m\\xf5>(\\xb9\\xc6>DU\\xcf>\\xe9e@?\\xca\\xaaC?~\\x00\\xf8>\\xa6Em>\\xf1\\xaa@?.\\xa9(<\\x82\\xad\\xd9=k\\x1f\\xfb>\\x06nN?Z\\xf8\\x07?u\\x7fD>\\xf7\\x1a\\x7f>er~=\\xeb\\xceQ?a<\\xb8>\\xd4{J?\"\\x00\\t?T\\xfc\\xae=\\x9bk\\x1c?1\\xc9\\x14?U\\x82\\xf7>N\\xbd\\xc7>\\xabZ\\xa3>\\x85\\xc1f?\\xb6-\\x99>!\\x98\\xce=\\xd6\\x9b\\x90>F\\xfa\\xeb>q\\xbav?{c0>9\\xc1\\xc2=y\\xb9\\xea>1\\xe6\"?\\x9d\\x92\\x9d>\\x05\\xf6V?k\\xa2t>\\x9aN\\x03?\\x07\\xb6\\xc7>B\\x14\\xdd>\\xa2O\\x8f>\\xf80\\x9a=&dP>,Zr?\\xf5\\xa4\\x0b?\\xad\\x10\\xfe>|\\x85\\xbd>\\x1c\\xcb0?zd\\xf9>\\xdb[\\xf2>\\x97\\x9fH?\\xa7\\x08\\xc7=\\xd2S\\xc8=\\xa2!\\x7f>:V.?\\xe8\\x1dz>@\\xcc.?9\\x91=>\"\\r\\xfc>\\xed\\xf9\\xb4>K\\x11\\x1a?G\\xb9k?\\xedMt?\\xb80k?B\\xafl>g\\x03C?w|\\x85>7bV>=O<?Wu(>\\xbf\\xd8^?\\xcd\\x95s?4a\\x12>\\x13*+>E\\xe2\"?\\x14\\xb4\\x1f?%\\xcd\\xf9>\\x104l?t=\\xcd>8S\\xd4>\\x91%->\\xbd\\x98W>}WH?H\\xa3\\x17=\\r:w=\\xcbPZ?\\xf9\\xd3\\xda>\\x80\\xe9\\x08?c\\xf6\\x87>\\x19Pi?\\xebz\\x02?\\x0c\\xd5&?\\xb0\\xca\\x07?P\\xd1s?[\\x8b\\xaf>\\x1d\\xab!?\\xa0\\x8cn?\\x0eO\\xb3>\\xc6\\xe29=\\x7fN!?\\xa8O\\xc8=\\xca\\xb7\\\\?3\\xc2\\xe8<8\\xb6j?\\x82\\x03\\x19?\\x8f\\xf3T=\\x7f\\x15\\x16=\\x03Ol?\\x11\\x05h<M\\x9d\\xf5>9\\'N=\\xde\\xe82??\\xba\\xec>\\x8b>\\x03?\\xfdhP?\\x1bm\\x19?\\x07\\x9e\\x0c?\\x80$u?U\\xdf6?\\xc4\\x88.?T\\xd0M?\\xa3\\xab8?\\xc4\\xec\\xd4>\\x1a\\'4?l\\x92\\xa5=\\xae\\xed\\x86>Br2?\\xb9\\x0f`?E\\'\\xca>:\\xf9\\xb8>\\x81\\xac>?\\xa1\\x08-?\\xc1\"\\xb0>o\\x17~?>=\\r?8\\xae\\x16?N\\x150?tL\\xbe<\\x94\\'\\'?QH\\x19?\\xac\\xd3\\xe0>c\\xb3\\x07?\\xe0\\x06\\x82=h\\x1dD>Fm\\xf9>x\\xc5\\xa5>6i\\xd5>j\\xecU?\\xfc\\xbb\\xb4;<\\xe9\"?\\xd38}?LP\\xb9>3(z?G\\x94\\x03?1\\xa7R?\\x96\\x97s?\\xad,=?\\xb07\\x00?\\x0b\\xd7\\xea>3l\\xcc=\\xd1hR?\\x94\\x8aG? \\xaf\\x16?\\xf4z~?\\xa7q\\xbb>\\xd4ng?\\x10\\xff\\\\?\\xb6\\x0f\\x17?\\x12\\x02h>\\x9b.G>;\\t\\xfb>\\xd8 \\xfc>\\x19t\\xc6>\\x1e\\xb2\\xf5>\\xc6\\xb0x?\\xb6p\\x9f:\\xc0\"d?\\xe8E\\x8c=\\x8f\\x1d\\x0e>g\\xd2 >\\xf8\\xb3\\x00?\\x92\\x9d!?|\\x05W?\\x08\\xbc\\x01?\\xfa\\x03k?`,\\xc5>\\xae\\x0f0?\\x07\\xde\\x1e?\\x01*\\x17?\\x1e\\xef\\x0f?\\x02\\xb6k>\\xc6D\\x18?>9%?\\xac\\xdd_?\\x10\\x14T?OU\\xeb=\\xbcTO>\\xa32\\xd0>U[\\r?\\xba\\xec\\x16>\\x1fk\\x1d?\\xb0\\x01\\xa7>M\\xcd\\r?\\xc9\\x1a,?E\\xa1r?\\x03\\xc3\\x1e?p\\x9du>\\x13im?\\xeee\\xbf>\\x99\\xe9\\xa1>\\x003G?-\\xbc\\x00?\\x1a\\x88\\xec>\\x9d)5?\\xf6\\x82\\x11=T\\x13\\x9b=\\xa7\\xe6+?7c*?\\xae\\x9f->\\xe5\\x19P?\\xf6Qr?\\x1d\\xe1r?P\\x1a^;a\\xc9(>\\x8a\\x95\\xf7>5\\xaf\\x18>\\xa0\\xc2\\xf5=\\xe9\\xd3\\xda>\\x15q\\x18??`\\xbb>$\\xaf\\x97>7\\x8b\\xb8>OF5?\\x83oI?]\\x1a->\\x14\\x122?\\xc0\\xaeM?\\x90E%?\\x0bGD?\\x9d\\xdb@?\\xa9\\x10B?/Zt=\\x90\\xdb0?\\xf9}h=\\x9f`i?\\xcc\\xb1\\xe0>^W\\xf5=w\\xfa\\xba>\\x1608?U>\\x10?@\\xf1\\xf9>\\x01\\xdb`=H]\\x15?#m&?\\xcb\\xe0M?]L\\x17?3m\\x1e?\\x8b\\xce\\x8c>\"\\xb2A?\\x85\\xae\\x15?\\x95O2>\\xd3J\\xfa>\\xd10G?\\x04\\xaa\\xfd>\\xdf\\xb0R?\\x88\\xdd\\xdf>\\xfe\\x10}?\\xd6\\xf5\\x17?\\xa6\\xe28?\\xf2\\xdf\\xbe>\\xc8\\xce\\x04?=\\x87\\x8b=!\\xcf\\x82>z\\xdd\\xa7>\\x89\\x07\\xd4=\\x0e\\x98?;\\xe1\\xc4\\x1b?!\\xc5\\n?\\xa38\\x00?\\xc5\\xad\\xfc>\\x9f\\xdb\\xfe>\\x0b\\'@>\\x16I+?\\xdd=\\x0c?\\x95k|?gtA?\\x90lP?\\xa87\\xd0>\\x7f\\xe6\\xcb=\\xea\\x9a/?{w$=\\x82+\\t?;\\xdc\\xf5>\\xffx\\xb9=\\x80\\x1e\\x14?f\\x8aO?B\\x00W?\\x17\\xbaI?yi\\xe2<\\x05-y?T\\xc4\\x03?\\xea\\x14U>%\\xb3k?\\x11\\x10f? )P?\\xa1\\xafd>N]\\x08?\\xa0-g>\\x0cc3?\\xf4\\xa7;>\\xee\\xac\\\\=\\x080q?\\x7f\\xf26?$!W?\\x96qM?\\xd0gn?\\x8a\\xf1@?\\xb1\\xd0g?-\\x16+>\\xa9o\\x15?\\xfd\\x1eY?G\\xe5>=\\x91.-?\\x01le?\\xe3^\\xd1=$\\xccN>\\xd1\\x1e\\xa3>E\\x15\\x1b?A\\xc2!>\\xbd\\x85\\'>\\xf2\\xaf\\x7f?L\\xad=?\\x1dt1>\\xf3p\\xb5<.z\\xa9=>m\\xde>\\x85\\x125=\\xbc\\xb77>\"\"\\xe5>\\xc3\\x1f\\x05?\\xdazj=\\xe0\\x0f\\xd3=\\x00PY?\\x7f\\xc5F?b\\xdd\\xf9>\\xaf\\n\\xef=\\xe3\\xb37?\\xca\\xc0\\x7f>\\x90\\xe7Y>\\xa6\\xb3\\x99>\\x12\\x9f\\xdf>\\xaa\"9?!4K=H\\x82\\x13=\\x0cp\\xaa=\\xe0{\\x96>\\xb4\\xfd\\xf1>K\\xda\\xc5=?3c?m\\xafZ>\\xc9\\xf4\\x14>\\xb1\\xf6\\xff>\\xc37g?\\xe0\\xc4\\x93>\\xd8\\x02\\xaf>@.z>'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d['0.0']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Distributed\n",
"-----------"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<Executor: scheduler=localhost:8786 workers=160 threads=160>"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from distributed import Executor, progress\n",
"e = Executor('localhost:8786')\n",
"e"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from zarr import empty\n",
"from s3fs import S3Map\n",
"import dask.array as da\n",
"\n",
"d = S3Map('zarr-test-1')\n",
"d.clear()\n",
"z = empty(shape=(100000, 100000),\n",
" chunks=(2000, 2000), \n",
" dtype='f4',\n",
" store=d, \n",
" compression=None)\n",
"\n",
"x = da.random.random(size=z.shape, chunks=z.chunks).astype(z.dtype)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"40.0"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.nbytes / 1e9"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.8 s, sys: 65.9 ms, total: 1.86 s\n",
"Wall time: 33.5 s\n"
]
}
],
"source": [
"%time x.store(z, lock=False, get=e.get)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<dask.context.set_options at 0x7fea8c0d67b8>"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"da.set_options(get=e.get) # distributed by default"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"xx = da.from_array(z, chunks=z.chunks)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 429 ms, sys: 13.2 ms, total: 442 ms\n",
"Wall time: 38.6 s\n"
]
},
{
"data": {
"text/plain": [
"5.0000251e+09"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time xx.sum().compute()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 23.9 ms, sys: 320 µs, total: 24.3 ms\n",
"Wall time: 3.03 s\n"
]
},
{
"data": {
"text/plain": [
"5000038.0"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time xx[0:100].sum().compute()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment