Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save martindurant/16bee4256595d3b6814be139ab1bd54e to your computer and use it in GitHub Desktop.
Save martindurant/16bee4256595d3b6814be139ab1bd54e to your computer and use it in GitHub Desktop.
astropy-rice-play
import astropy.io.fits._tiled_compression as tiled
from astropy.io import fits
import numcodecs
import fsspec
import zarr
import json
import numpy as np
class MY_RICE(tiled.codecs.Rice1):
def decode(self, buf, out=None):
arr = super().decode(np.frombuffer(buf, dtype="uint8"))
if out is not None:
out[:] = arr
return out
return arr
numcodecs.register_codec(MY_RICE)
fn = "/Users/mdurant/Downloads/aia.lev1.171A_2017-09-06T13_59_33.35Z.image_lev1.fits"
f = open(fn, "rb")
hdul = fits.open(f)
# includes compression/table info, as opposed to .header which is rewritten to the final image specs
full_head = hdul[1]._header
n_rows = full_head["NAXIS2"]
for i in range(1, 10):
if full_head[f"ZNAME{i}"] == "BLOCKSIZE":
blocksize = full_head[f"ZVAL{i}"]
break
for i in range(1, 10):
if full_head[f"ZNAME{i}"] == "BYTEPIX":
bytepix = full_head[f"ZVAL{i}"]
break
tilesize = full_head["ZTILE1"] * full_head["ZTILE2"]
inf = hdul[1].fileinfo()
f.seek(inf["datLoc"])
table = np.frombuffer(f.read(n_rows*4*2), dtype=[("size", ">i4"), ("offset", ">i4")], count=n_rows)
data_start = f.tell()
def test_codec():
codec = tiled.codecs.Rice1(blocksize=blocksize, bytepix=bytepix, tilesize=tilesize)
f.seek(data_start)
for j in range(n_rows):
block0 = f.read(table["size"][j])
out = codec.decode(np.frombuffer(block0, "uint8"))
assert (hdul[1].data[j] == out).all()
refs = {f"data/{i}.0": [fn, data_start + table["offset"][i], table["size"][i]]
for i in range(len(table))}
refs["data/.zarray"] = b"""
{
"chunks": [1, 4096],
"compressor": {"blocksize": 32, "bytepix": 2, "tilesize": 4096, "id": "FITS_RICE1"},
"filters": [],
"dtype": "<i2",
"fill_value": 0,
"order": "C",
"shape": [4096, 4096],
"zarr_format": 2
}
"""
head = {}
for k in hdul[1].header:
try:
head[k] = hdul[1].header[k]
except:
pass
refs["data/.zattr"] = json.dumps(head, separators=(',', ':'), indent=None).encode()
refs[".zgroup"] = b'{"zarr_format": 2}'
fs = fsspec.filesystem("reference", fo=refs)
g = zarr.open(fs.get_mapper())
(g.data[:] == hdul[1].data).all()
@martindurant
Copy link
Author

It zero pads the output array and then zarr crops it down afterwards?

Yes, we would have to do this. A normal chunk is stored whole, with the unused part normally zeros (or other default fill value), but the actually values doesn't matter. This allows for expanding the array by just rewriting the metadata.

@Cadair
Copy link

Cadair commented Mar 21, 2023

Ok that's interesting, and very different to the underlying way FITS stores it.

We need to make a list of desired changes to our codec implementations. I am going to loose them scattered around the place.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment