Skip to content

Instantly share code, notes, and snippets.

@cisaacstern
Created December 21, 2021 15:54
Show Gist options
  • Save cisaacstern/0399d552161d5fe91a74508e475e649a to your computer and use it in GitHub Desktop.
Save cisaacstern/0399d552161d5fe91a74508e475e649a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "36a10399-636d-4fad-8a09-6a91b681affe",
"metadata": {},
"source": [
"## `h5netcdf=0.11.0`"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "acdefdb6-9cec-41eb-a375-c45ca92a2a99",
"metadata": {},
"outputs": [],
"source": [
"# !mamba install \"h5netcdf=0.11.0\" -y"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "76f35a83-09b2-467b-a85b-896ec57fbb4d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'0.11.0'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import h5netcdf; h5netcdf.__version__"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4de4a1a9-a81d-40af-a29c-265b656f815b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 2/2117 [00:37<10:59:05, 18.70s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"30.000335216522217 seconds have elapsed during iteration 2 with reader 'xarray', exceeding specified `timeout` of 30 seconds.\n",
"The following path is hanging:\n",
"gs://pangeo-forge-us-central1/pangeo-forge-cache/soda342/5day_ice/00a715b5c27ac7451affd0e56266e562-https_dsrs.atmos.umd.edu_data_soda3.4.2_original_ice_soda3.4.2_5dy_ice_or_2013_05_19.nc\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"from helpers import reader_loop, all_paths\n",
"\n",
"hanging_path = reader_loop(all_paths, reader=\"xarray\")"
]
},
{
"cell_type": "markdown",
"id": "9183b60e-81dc-4169-a43b-3d76558436c5",
"metadata": {},
"source": [
"## `h5netcdf=0.12.0`"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "5dd1e3b4-7ead-4ae6-84fd-c9d8f968203a",
"metadata": {},
"outputs": [],
"source": [
"# !mamba install \"h5netcdf=0.12.0\" -y"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a2a13b0f-fff8-42a3-b8b7-4a8aa3e38703",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'0.12.0'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import h5netcdf; h5netcdf.__version__"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "afraid-thesis",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 2117/2117 [1:54:15<00:00, 3.24s/it] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"All paths opened without hanging.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"from helpers import reader_loop, all_paths\n",
"\n",
"hanging_path = reader_loop(all_paths, reader=\"xarray\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4af5dbb9-5a77-4b20-b372-c837c635b765",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "notebook",
"language": "python",
"name": "notebook"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
import json
import time
import signal
import sys
import fsspec
import gcsfs
import h5py
import numpy as np
import xarray as xr
from tqdm import tqdm
gcs = gcsfs.GCSFileSystem(requester_pays=True)
cache_base = "pangeo-forge-us-central1/pangeo-forge-cache"
all_paths = gcs.ls(f"{cache_base}/soda342/5day_ice")
all_paths = [f"gs://{p}" for p in all_paths]
class Timeout(Exception):
pass
def handler(signum, frame):
raise Timeout
def reset_timeout(timeout):
signal.signal(signal.SIGALRM, handler)
signal.alarm(timeout)
def read_from_cache(path, reader, storage):
kwargs = dict(requester_pays=True) if storage == "gcs" else {}
with fsspec.open(path, mode="rb", **kwargs) as ofile:
if reader == "h5py":
with h5py.File(ofile) as h5file:
for var_name in h5file.keys():
arr = np.asarray(h5file[var_name])
del arr
elif reader == "xarray":
with xr.open_dataset(ofile) as ds:
for _, var_coded in ds.variables.items():
var = xr.backends.zarr.encode_zarr_variable(var_coded)
arr = np.asarray(var.data)
del arr
def reader_loop(paths, reader, timeout=30, traceback=False, storage="gcs"):
for n in tqdm(range(len(paths))):
start = time.time()
reset_timeout(timeout)
try:
read_from_cache(paths[n], reader=reader, storage=storage)
except Timeout as e:
print(
f"\n{time.time()-start} seconds have elapsed during iteration {n} "
f"with reader '{reader}', exceeding specified `timeout` of {timeout} seconds.\n"
f"The following path is hanging:\n{paths[n]}"
)
if traceback:
tasks = fsspec.asyn._dump_running_tasks(printout=False, cancel=False)
print(json.dumps(tasks, indent=4))
last_lines_of_trace = []
for k in tasks.keys():
last_line = (
tasks[k]["traceback.format_stack"][-1]
if "traceback.format_stack" in tasks[k].keys()
else "No traceback available for this task."
)
last_lines_of_trace.append(last_line)
return last_lines_of_trace
else:
return paths[n]
print("All paths opened without hanging.")
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment