Skip to content

Instantly share code, notes, and snippets.

@rabernat
Last active December 5, 2018 01:57
Show Gist options
  • Save rabernat/ce1fb414cf53541afe2245363b06c49d to your computer and use it in GitHub Desktop.
Save rabernat/ce1fb414cf53541afe2245363b06c49d to your computer and use it in GitHub Desktop.
How to consolidate metadata in zarr stores from an intake catalog
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The history saving thread hit an unexpected error (OperationalError('disk I/O error',)).History will not be written to the database.\n"
]
},
{
"data": {
"text/plain": [
"'0.11.0+11.g3ae93ac3'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import xarray as xr\n",
"xr.__version__"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Intake catalog: builtin>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import intake\n",
"cat = intake.cat\n",
"cat"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import zarr\n",
"import gcsfs"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=586241054156-ls4nduknhnelm2u6jtdgii15gsa3iv4v.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdevstorage.full_control&state=R8uSab0A14tjcYTjasfOPyFancIikL&access_type=offline&prompt=consent\n"
]
},
{
"name": "stdin",
"output_type": "stream",
"text": [
"Enter the authorization code: 4/qgB0ojO6MBBu5lFmvAx01RP128m1lMRr9F16AW_UTTGqGxJi2bpXbs4\n"
]
}
],
"source": [
"gcs = gcsfs.GCSFileSystem('pangeo-181919', token='browser')"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pangeo-data/dataset-duacs-rep-global-merged-allsat-phy-l4-v3-alt\n",
"CPU times: user 417 ms, sys: 37 ms, total: 454 ms\n",
"Wall time: 7.33 s\n",
"pangeo-data/ecco/eccov4r3\n",
"CPU times: user 854 ms, sys: 35 ms, total: 889 ms\n",
"Wall time: 16.2 s\n",
"pangeo-data/SOSE\n",
"CPU times: user 1.23 s, sys: 78 ms, total: 1.31 s\n",
"Wall time: 32.8 s\n",
"pangeo-data/llc4320_surface/grid\n",
"CPU times: user 387 ms, sys: 17 ms, total: 404 ms\n",
"Wall time: 8.25 s\n",
"pangeo-data/llc4320_surface/SST\n",
"CPU times: user 2.32 s, sys: 337 ms, total: 2.66 s\n",
"Wall time: 36.4 s\n",
"pangeo-data/llc4320_surface/SSS\n",
"CPU times: user 2.38 s, sys: 336 ms, total: 2.71 s\n",
"Wall time: 39.1 s\n",
"pangeo-data/llc4320_surface/Eta\n",
"CPU times: user 2.33 s, sys: 298 ms, total: 2.63 s\n",
"Wall time: 36.1 s\n",
"pangeo-data/llc4320_surface/U\n",
"CPU times: user 2.32 s, sys: 251 ms, total: 2.57 s\n",
"Wall time: 35.1 s\n",
"pangeo-data/llc4320_surface/V\n",
"CPU times: user 2.26 s, sys: 276 ms, total: 2.53 s\n",
"Wall time: 35.2 s\n"
]
}
],
"source": [
"for item in cat:\n",
" entry = cat[item]\n",
" path = entry.describe_open()['args']['urlpath'][6:]\n",
" print(path)\n",
" gcsmap = gcsfs.GCSMap(path, gcs=gcs)\n",
" %time zarr.consolidate_metadata(gcsmap)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"sea_surface_height\n",
"Original Speed\n",
"CPU times: user 739 ms, sys: 52 ms, total: 791 ms\n",
"Wall time: 8.73 s\n",
"Consolidated Speed\n",
"CPU times: user 192 ms, sys: 2 ms, total: 194 ms\n",
"Wall time: 475 ms\n",
"ECCOv4r3\n",
"Original Speed\n",
"CPU times: user 1.48 s, sys: 79 ms, total: 1.56 s\n",
"Wall time: 19.5 s\n",
"Consolidated Speed\n",
"CPU times: user 257 ms, sys: 8 ms, total: 265 ms\n",
"Wall time: 949 ms\n",
"SOSE\n",
"Original Speed\n",
"CPU times: user 2.03 s, sys: 121 ms, total: 2.15 s\n",
"Wall time: 31.9 s\n",
"Consolidated Speed\n",
"CPU times: user 342 ms, sys: 13 ms, total: 355 ms\n",
"Wall time: 932 ms\n",
"LLC4320_grid\n",
"Original Speed\n",
"CPU times: user 709 ms, sys: 26 ms, total: 735 ms\n",
"Wall time: 10.8 s\n",
"Consolidated Speed\n",
"CPU times: user 68 ms, sys: 3 ms, total: 71 ms\n",
"Wall time: 607 ms\n",
"LLC4320_SST\n",
"Original Speed\n",
"CPU times: user 3.58 s, sys: 423 ms, total: 4 s\n",
"Wall time: 43 s\n",
"Consolidated Speed\n",
"CPU times: user 1.1 s, sys: 76 ms, total: 1.18 s\n",
"Wall time: 1.54 s\n",
"LLC4320_SSS\n",
"Original Speed\n",
"CPU times: user 3.6 s, sys: 458 ms, total: 4.06 s\n",
"Wall time: 41.8 s\n",
"Consolidated Speed\n",
"CPU times: user 1.03 s, sys: 63 ms, total: 1.09 s\n",
"Wall time: 1.83 s\n",
"LLC4320_SSH\n",
"Original Speed\n",
"CPU times: user 3.58 s, sys: 473 ms, total: 4.05 s\n",
"Wall time: 38.6 s\n",
"Consolidated Speed\n",
"CPU times: user 1.25 s, sys: 47 ms, total: 1.3 s\n",
"Wall time: 1.66 s\n",
"LLC4320_SSU\n",
"Original Speed\n",
"CPU times: user 3.86 s, sys: 381 ms, total: 4.24 s\n",
"Wall time: 37.9 s\n",
"Consolidated Speed\n",
"CPU times: user 392 ms, sys: 61 ms, total: 453 ms\n",
"Wall time: 681 ms\n",
"LLC4320_SSV\n",
"Original Speed\n",
"CPU times: user 4.15 s, sys: 467 ms, total: 4.62 s\n",
"Wall time: 35.7 s\n",
"Consolidated Speed\n",
"CPU times: user 420 ms, sys: 66 ms, total: 486 ms\n",
"Wall time: 1.08 s\n"
]
}
],
"source": [
"# test how long it takes to open stuff\n",
"old_cat = intake.Catalog('https://raw.githubusercontent.com/pangeo-data/pangeo/master/gce/catalog.yaml')\n",
"for name in cat:\n",
" print(name)\n",
" print('Original Speed')\n",
" %time old_cat[name].to_dask()\n",
" print('Consolidated Speed')\n",
" %time cat[name].to_dask()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment