Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save martindurant/16bee4256595d3b6814be139ab1bd54e to your computer and use it in GitHub Desktop.
Save martindurant/16bee4256595d3b6814be139ab1bd54e to your computer and use it in GitHub Desktop.
astropy-rice-play
import astropy.io.fits._tiled_compression as tiled
from astropy.io import fits
import numcodecs
import fsspec
import zarr
import json
import numpy as np
class MY_RICE(tiled.codecs.Rice1):
def decode(self, buf, out=None):
arr = super().decode(np.frombuffer(buf, dtype="uint8"))
if out is not None:
out[:] = arr
return out
return arr
numcodecs.register_codec(MY_RICE)
fn = "/Users/mdurant/Downloads/aia.lev1.171A_2017-09-06T13_59_33.35Z.image_lev1.fits"
f = open(fn, "rb")
hdul = fits.open(f)
# includes compression/table info, as opposed to .header which is rewritten to the final image specs
full_head = hdul[1]._header
n_rows = full_head["NAXIS2"]
for i in range(1, 10):
if full_head[f"ZNAME{i}"] == "BLOCKSIZE":
blocksize = full_head[f"ZVAL{i}"]
break
for i in range(1, 10):
if full_head[f"ZNAME{i}"] == "BYTEPIX":
bytepix = full_head[f"ZVAL{i}"]
break
tilesize = full_head["ZTILE1"] * full_head["ZTILE2"]
inf = hdul[1].fileinfo()
f.seek(inf["datLoc"])
table = np.frombuffer(f.read(n_rows*4*2), dtype=[("size", ">i4"), ("offset", ">i4")], count=n_rows)
data_start = f.tell()
def test_codec():
codec = tiled.codecs.Rice1(blocksize=blocksize, bytepix=bytepix, tilesize=tilesize)
f.seek(data_start)
for j in range(n_rows):
block0 = f.read(table["size"][j])
out = codec.decode(np.frombuffer(block0, "uint8"))
assert (hdul[1].data[j] == out).all()
refs = {f"data/{i}.0": [fn, data_start + table["offset"][i], table["size"][i]]
for i in range(len(table))}
refs["data/.zarray"] = b"""
{
"chunks": [1, 4096],
"compressor": {"blocksize": 32, "bytepix": 2, "tilesize": 4096, "id": "FITS_RICE1"},
"filters": [],
"dtype": "<i2",
"fill_value": 0,
"order": "C",
"shape": [4096, 4096],
"zarr_format": 2
}
"""
head = {}
for k in hdul[1].header:
try:
head[k] = hdul[1].header[k]
except:
pass
refs["data/.zattr"] = json.dumps(head, separators=(',', ':'), indent=None).encode()
refs[".zgroup"] = b'{"zarr_format": 2}'
fs = fsspec.filesystem("reference", fo=refs)
g = zarr.open(fs.get_mapper())
(g.data[:] == hdul[1].data).all()
@Cadair
Copy link

Cadair commented Mar 21, 2023

Ok that's interesting, and very different to the underlying way FITS stores it.

We need to make a list of desired changes to our codec implementations. I am going to loose them scattered around the place.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment