Skip to content

Instantly share code, notes, and snippets.

@martindurant
Created May 21, 2021 19:05
Show Gist options
  • Save martindurant/cd17d999f25e936fc34bfb64086442d9 to your computer and use it in GitHub Desktop.
Save martindurant/cd17d999f25e936fc34bfb64086442d9 to your computer and use it in GitHub Desktop.
build references for second matanzas dataset
url = "s3://prod-is-usgs-sb-prod-publish/609bf69ed34ea221ce39b261/breach_matanzas.nc"
from fsspec_reference_maker.hdf import *
so = dict(
mode='rb', anon=True, requester_pays=True,
default_fill_cache=False, default_cache_type='first'
)
with fsspec.open(url, **so) as f:
h5chunks = SingleHdf5ToZarr(f, url, xarray=True, inline_threshold=100)
out = h5chunks.translate()
import json
json.dump(out, open("matanzas2.json", 'w'))
import xarray as xr
ds = xr.open_dataset("reference://",
engine="zarr",
backend_kwargs=dict(
storage_options=dict(
fo="matanzas2.json",
remote_protocol="s3",
remote_options={"anon": True}
)
)
)
ds.nbytes / 2**30
# 50.6GB
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment