Last active
December 5, 2018 01:57
-
-
Save rabernat/ce1fb414cf53541afe2245363b06c49d to your computer and use it in GitHub Desktop.
How to consolidate metadata in zarr stores from an intake catalog
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The history saving thread hit an unexpected error (OperationalError('disk I/O error',)).History will not be written to the database.\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"'0.11.0+11.g3ae93ac3'" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import xarray as xr\n", | |
"xr.__version__" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<Intake catalog: builtin>" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import intake\n", | |
"cat = intake.cat\n", | |
"cat" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import zarr\n", | |
"import gcsfs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=586241054156-ls4nduknhnelm2u6jtdgii15gsa3iv4v.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdevstorage.full_control&state=R8uSab0A14tjcYTjasfOPyFancIikL&access_type=offline&prompt=consent\n" | |
] | |
}, | |
{ | |
"name": "stdin", | |
"output_type": "stream", | |
"text": [ | |
"Enter the authorization code: 4/qgB0ojO6MBBu5lFmvAx01RP128m1lMRr9F16AW_UTTGqGxJi2bpXbs4\n" | |
] | |
} | |
], | |
"source": [ | |
"gcs = gcsfs.GCSFileSystem('pangeo-181919', token='browser')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"pangeo-data/dataset-duacs-rep-global-merged-allsat-phy-l4-v3-alt\n", | |
"CPU times: user 417 ms, sys: 37 ms, total: 454 ms\n", | |
"Wall time: 7.33 s\n", | |
"pangeo-data/ecco/eccov4r3\n", | |
"CPU times: user 854 ms, sys: 35 ms, total: 889 ms\n", | |
"Wall time: 16.2 s\n", | |
"pangeo-data/SOSE\n", | |
"CPU times: user 1.23 s, sys: 78 ms, total: 1.31 s\n", | |
"Wall time: 32.8 s\n", | |
"pangeo-data/llc4320_surface/grid\n", | |
"CPU times: user 387 ms, sys: 17 ms, total: 404 ms\n", | |
"Wall time: 8.25 s\n", | |
"pangeo-data/llc4320_surface/SST\n", | |
"CPU times: user 2.32 s, sys: 337 ms, total: 2.66 s\n", | |
"Wall time: 36.4 s\n", | |
"pangeo-data/llc4320_surface/SSS\n", | |
"CPU times: user 2.38 s, sys: 336 ms, total: 2.71 s\n", | |
"Wall time: 39.1 s\n", | |
"pangeo-data/llc4320_surface/Eta\n", | |
"CPU times: user 2.33 s, sys: 298 ms, total: 2.63 s\n", | |
"Wall time: 36.1 s\n", | |
"pangeo-data/llc4320_surface/U\n", | |
"CPU times: user 2.32 s, sys: 251 ms, total: 2.57 s\n", | |
"Wall time: 35.1 s\n", | |
"pangeo-data/llc4320_surface/V\n", | |
"CPU times: user 2.26 s, sys: 276 ms, total: 2.53 s\n", | |
"Wall time: 35.2 s\n" | |
] | |
} | |
], | |
"source": [ | |
"for item in cat:\n", | |
" entry = cat[item]\n", | |
" path = entry.describe_open()['args']['urlpath'][6:]\n", | |
" print(path)\n", | |
" gcsmap = gcsfs.GCSMap(path, gcs=gcs)\n", | |
" %time zarr.consolidate_metadata(gcsmap)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"sea_surface_height\n", | |
"Original Speed\n", | |
"CPU times: user 739 ms, sys: 52 ms, total: 791 ms\n", | |
"Wall time: 8.73 s\n", | |
"Consolidated Speed\n", | |
"CPU times: user 192 ms, sys: 2 ms, total: 194 ms\n", | |
"Wall time: 475 ms\n", | |
"ECCOv4r3\n", | |
"Original Speed\n", | |
"CPU times: user 1.48 s, sys: 79 ms, total: 1.56 s\n", | |
"Wall time: 19.5 s\n", | |
"Consolidated Speed\n", | |
"CPU times: user 257 ms, sys: 8 ms, total: 265 ms\n", | |
"Wall time: 949 ms\n", | |
"SOSE\n", | |
"Original Speed\n", | |
"CPU times: user 2.03 s, sys: 121 ms, total: 2.15 s\n", | |
"Wall time: 31.9 s\n", | |
"Consolidated Speed\n", | |
"CPU times: user 342 ms, sys: 13 ms, total: 355 ms\n", | |
"Wall time: 932 ms\n", | |
"LLC4320_grid\n", | |
"Original Speed\n", | |
"CPU times: user 709 ms, sys: 26 ms, total: 735 ms\n", | |
"Wall time: 10.8 s\n", | |
"Consolidated Speed\n", | |
"CPU times: user 68 ms, sys: 3 ms, total: 71 ms\n", | |
"Wall time: 607 ms\n", | |
"LLC4320_SST\n", | |
"Original Speed\n", | |
"CPU times: user 3.58 s, sys: 423 ms, total: 4 s\n", | |
"Wall time: 43 s\n", | |
"Consolidated Speed\n", | |
"CPU times: user 1.1 s, sys: 76 ms, total: 1.18 s\n", | |
"Wall time: 1.54 s\n", | |
"LLC4320_SSS\n", | |
"Original Speed\n", | |
"CPU times: user 3.6 s, sys: 458 ms, total: 4.06 s\n", | |
"Wall time: 41.8 s\n", | |
"Consolidated Speed\n", | |
"CPU times: user 1.03 s, sys: 63 ms, total: 1.09 s\n", | |
"Wall time: 1.83 s\n", | |
"LLC4320_SSH\n", | |
"Original Speed\n", | |
"CPU times: user 3.58 s, sys: 473 ms, total: 4.05 s\n", | |
"Wall time: 38.6 s\n", | |
"Consolidated Speed\n", | |
"CPU times: user 1.25 s, sys: 47 ms, total: 1.3 s\n", | |
"Wall time: 1.66 s\n", | |
"LLC4320_SSU\n", | |
"Original Speed\n", | |
"CPU times: user 3.86 s, sys: 381 ms, total: 4.24 s\n", | |
"Wall time: 37.9 s\n", | |
"Consolidated Speed\n", | |
"CPU times: user 392 ms, sys: 61 ms, total: 453 ms\n", | |
"Wall time: 681 ms\n", | |
"LLC4320_SSV\n", | |
"Original Speed\n", | |
"CPU times: user 4.15 s, sys: 467 ms, total: 4.62 s\n", | |
"Wall time: 35.7 s\n", | |
"Consolidated Speed\n", | |
"CPU times: user 420 ms, sys: 66 ms, total: 486 ms\n", | |
"Wall time: 1.08 s\n" | |
] | |
} | |
], | |
"source": [ | |
"# test how long it takes to open stuff\n", | |
"old_cat = intake.Catalog('https://raw.githubusercontent.com/pangeo-data/pangeo/master/gce/catalog.yaml')\n", | |
"for name in cat:\n", | |
" print(name)\n", | |
" print('Original Speed')\n", | |
" %time old_cat[name].to_dask()\n", | |
" print('Consolidated Speed')\n", | |
" %time cat[name].to_dask()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment