Created
June 15, 2022 15:33
-
-
Save rsignell-usgs/2400089529be8bc3924aacfd3b2485e6 to your computer and use it in GitHub Desktop.
fill_value.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "5b86af71-052b-4081-844b-779db459068d", | |
"metadata": {}, | |
"source": [ | |
"# FillValue issues with kerchunk dataset\n", | |
"NetCDF file has float32 var with `_FillValue` set as 1e37. \n", | |
"* reading NetCDF with Xarray correctly sets these values to NaN\n", | |
"* reading kerchunk JSON with Xarray sets these" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "3b6ba617-9772-4ecf-aa7a-2be2a4bcfa1d", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import fsspec\n", | |
"import xarray as xr" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "daaa298b-2b15-46a2-b8ef-d4828c5ae291", | |
"metadata": {}, | |
"source": [ | |
"#### Open NetCDF file directly" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "d6b937c9-ffb0-42fe-94fc-e591cd297255", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fs = fsspec.filesystem('s3', anon=True, client_kwargs={'endpoint_url': 'https://mghp.osn.xsede.org'})\n", | |
"url = 's3://rsignellbucket1/COAWST/coawst_us_20220101_01.nc'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "a7a9ea7a-89bc-4560-baf5-292cd2cbe368", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ds = xr.open_dataset(fs.open(url), decode_cf=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "30951466-d166-4306-bcda-d015cf583bf7", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array(nan)" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds.temp[0,0,0,0].values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "ff2ea828-1ce7-4781-b73e-fb8d4d4dc46b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ds = xr.open_dataset(fs.open(url), decode_cf=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "75160414-374d-4ef8-8d99-7f2fdac45187", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array(1.e+37, dtype=float32)" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds.temp[0,0,0,0].values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "bcedf939-0b09-4f23-ae79-ffa72d84d03f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1e+37" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds.temp._FillValue" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "529cd7e9-5a1f-4e6b-a515-fe238c61c6ea", | |
"metadata": {}, | |
"source": [ | |
"#### Write to Zarr" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "d6c3a17e-3fc7-4520-8080-bb76a9cb7416", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 157 ms, sys: 63.9 ms, total: 221 ms\n", | |
"Wall time: 2.53 s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"Delayed('_finalize_store-53e634eb-c40f-4095-811d-9fa8b6dc1ded')" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"ds[['temp']].isel(ocean_time=0).to_zarr('foo.zarr', compute=False, mode='w')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "768e9d44-2756-42ec-9c12-c51cfbd59998", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{\n", | |
" \"chunks\": [\n", | |
" 4,\n", | |
" 84,\n", | |
" 448\n", | |
" ],\n", | |
" \"compressor\": {\n", | |
" \"blocksize\": 0,\n", | |
" \"clevel\": 5,\n", | |
" \"cname\": \"lz4\",\n", | |
" \"id\": \"blosc\",\n", | |
" \"shuffle\": 1\n", | |
" },\n", | |
" \"dtype\": \"<f4\",\n", | |
" \"fill_value\": 9.999999933815813e+36,\n", | |
" \"filters\": null,\n", | |
" \"order\": \"C\",\n", | |
" \"shape\": [\n", | |
" 16,\n", | |
" 336,\n", | |
" 896\n", | |
" ],\n", | |
" \"zarr_format\": 2\n", | |
"}" | |
] | |
} | |
], | |
"source": [ | |
"! cat ./foo.zarr/temp/.zarray" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "284ccc02-f603-4210-809b-5c3e62d948eb", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ds2 = xr.open_dataset('foo.zarr', engine='zarr', decode_cf=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "f037fe4b-6aac-4e52-9080-e425329db099", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1e+37" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds2.temp._FillValue" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "d1856c8d-5f22-4386-b616-678b1ced5ee1", | |
"metadata": {}, | |
"source": [ | |
"#### Read Kerchunk JSON representation of the above NetCDF file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "22d6fcf5-27b5-47bc-b3cc-44dae863314a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"json_url = 's3://rsignellbucket1/COAWST/jsons/coawst_us_20220101_01.nc.json'" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "694458e0-50cf-4124-b896-0abd1e1fced2", | |
"metadata": {}, | |
"source": [ | |
"Try with `decode_cf=True`:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "c59e3c10-ebb9-4915-9f8e-8fcb43ee9475", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"s_opts = dict(skip_instance_cache=True, anon=True, client_kwargs={'endpoint_url': 'https://mghp.osn.xsede.org'}) #json \n", | |
"r_opts = dict(anon=True, client_kwargs={'endpoint_url': 'https://mghp.osn.xsede.org'}) #data\n", | |
"\n", | |
"fs = fsspec.filesystem(\"reference\", fo=json_url, ref_storage_args=s_opts,\n", | |
" remote_protocol='s3', remote_options=r_opts)\n", | |
"m = fs.get_mapper(\"\")\n", | |
"\n", | |
"ds = xr.open_dataset(m, engine=\"zarr\", chunks={}, \n", | |
" backend_kwargs=dict(consolidated=False), decode_cf=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "d2e3c687-7288-4266-bf1f-073fdeaf4df9", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array(9.99999993e+36)" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds.temp[0,0,0,0].values" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "a4cc1585-03f8-405c-ad7b-34e3f3b571ab", | |
"metadata": {}, | |
"source": [ | |
"Try with `decode_cf=False`:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "520036ab-5fc0-43b7-b0c5-4efbf5fdb3f9", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"ds = xr.open_dataset(m, engine=\"zarr\", chunks={}, \n", | |
" backend_kwargs=dict(consolidated=False), decode_cf=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "500c61db-467b-43c2-a5c5-54d8ab0bba3d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.0" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds.temp._FillValue" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "61c10df9-b632-4680-9b48-13cb78e429b8", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array(1.e+37, dtype=float32)" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds.temp[0,0,0,0].values" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "a359958a-746e-4433-b6bb-77589ee067f7", | |
"metadata": {}, | |
"source": [ | |
"Look at the JSON" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "0c47572a-c4fa-4c2d-8435-4f01a79101ac", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fs.download('temp/.zattrs', 'foo')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"id": "149f0fea-c368-4bb7-9174-cb75f3c77afa", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{\n", | |
" \"_ARRAY_DIMENSIONS\": [\n", | |
" \"ocean_time\",\n", | |
" \"s_rho\",\n", | |
" \"eta_rho\",\n", | |
" \"xi_rho\"\n", | |
" ],\n", | |
" \"_FillValue\": 9.999999933815813e+36,\n", | |
" \"coordinates\": \"lon_rho lat_rho s_rho ocean_time\",\n", | |
" \"field\": \"temperature, scalar, series\",\n", | |
" \"grid\": \"grid\",\n", | |
" \"location\": \"face\",\n", | |
" \"long_name\": \"potential temperature\",\n", | |
" \"time\": \"ocean_time\",\n", | |
" \"units\": \"Celsius\"\n", | |
"}\n" | |
] | |
} | |
], | |
"source": [ | |
"!more foo" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "users-pangeo", | |
"language": "python", | |
"name": "conda-env-users-pangeo-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.13" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"state": {}, | |
"version_major": 2, | |
"version_minor": 0 | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment