Skip to content

Instantly share code, notes, and snippets.

@tinaok
Forked from martindurant/bigtar.py
Last active September 20, 2022 15:59
Show Gist options
  • Save tinaok/31af8aaa7c17db4ffae8fb5487b5dd37 to your computer and use it in GitHub Desktop.
Save tinaok/31af8aaa7c17db4ffae8fb5487b5dd37 to your computer and use it in GitHub Desktop.
Single file datasets
import kerchunk.hdf
import fsspec
import tarfile
import xarray as xr
ro = dict(
s3={
"anon": True,
"client_kwargs": {"endpoint_url": "https://object-store.cloud.muni.cz"}
}
)
# get offsets
with fsspec.open("s3://testfred/ice.tar", **ro["s3"]) as tf:
tar = tarfile.TarFile(fileobj=tf)
offsets = {ti.name: ti.offset_data for ti in tar.getmembers()}
ofs = fsspec.open(
"tar://*.nc::s3://testfred/ice.tar", **ro
)
outs = {}
for of in ofs:
with of as f:
h = kerchunk.hdf.SingleHdf5ToZarr(f)
outs[of.path] = h.translate()
# try first dataset
out = outs["SEDNA-DELTA_y2014m01d01.1d_icemod.nc.nc"]
fs = fsspec.filesystem(
"reference",
fo=out,
template_overrides={"u": "tar://SEDNA-DELTA_y2014m01d01.1d_icemod.nc"},
remote_options={
"fo": "s3://testfred/ice.tar",
"target_options": ro["s3"]
}
)
ds = xr.open_zarr(fs.get_mapper(""), consolidated=False)
# or with direct - apply 512 offset
mod = fs.references.copy()
for k, v in mod.items():
if isinstance(v, list):
v[0] = "s3://testfred/ice.tar"
v[1] += 512
fs2 = fsspec.filesystem(
"reference",
fo=mod,
remote_options=ro["s3"]
)
ds2 = xr.open_zarr(fs2.get_mapper(""), consolidated=False)
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment