Created
March 17, 2023 18:01
-
-
Save martindurant/16bee4256595d3b6814be139ab1bd54e to your computer and use it in GitHub Desktop.
astropy-rice-play
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import astropy.io.fits._tiled_compression as tiled | |
from astropy.io import fits | |
import numcodecs | |
import fsspec | |
import zarr | |
import json | |
import numpy as np | |
class MY_RICE(tiled.codecs.Rice1): | |
def decode(self, buf, out=None): | |
arr = super().decode(np.frombuffer(buf, dtype="uint8")) | |
if out is not None: | |
out[:] = arr | |
return out | |
return arr | |
numcodecs.register_codec(MY_RICE) | |
fn = "/Users/mdurant/Downloads/aia.lev1.171A_2017-09-06T13_59_33.35Z.image_lev1.fits" | |
f = open(fn, "rb") | |
hdul = fits.open(f) | |
# includes compression/table info, as opposed to .header which is rewritten to the final image specs | |
full_head = hdul[1]._header | |
n_rows = full_head["NAXIS2"] | |
for i in range(1, 10): | |
if full_head[f"ZNAME{i}"] == "BLOCKSIZE": | |
blocksize = full_head[f"ZVAL{i}"] | |
break | |
for i in range(1, 10): | |
if full_head[f"ZNAME{i}"] == "BYTEPIX": | |
bytepix = full_head[f"ZVAL{i}"] | |
break | |
tilesize = full_head["ZTILE1"] * full_head["ZTILE2"] | |
inf = hdul[1].fileinfo() | |
f.seek(inf["datLoc"]) | |
table = np.frombuffer(f.read(n_rows*4*2), dtype=[("size", ">i4"), ("offset", ">i4")], count=n_rows) | |
data_start = f.tell() | |
def test_codec(): | |
codec = tiled.codecs.Rice1(blocksize=blocksize, bytepix=bytepix, tilesize=tilesize) | |
f.seek(data_start) | |
for j in range(n_rows): | |
block0 = f.read(table["size"][j]) | |
out = codec.decode(np.frombuffer(block0, "uint8")) | |
assert (hdul[1].data[j] == out).all() | |
refs = {f"data/{i}.0": [fn, data_start + table["offset"][i], table["size"][i]] | |
for i in range(len(table))} | |
refs["data/.zarray"] = b""" | |
{ | |
"chunks": [1, 4096], | |
"compressor": {"blocksize": 32, "bytepix": 2, "tilesize": 4096, "id": "FITS_RICE1"}, | |
"filters": [], | |
"dtype": "<i2", | |
"fill_value": 0, | |
"order": "C", | |
"shape": [4096, 4096], | |
"zarr_format": 2 | |
} | |
""" | |
head = {} | |
for k in hdul[1].header: | |
try: | |
head[k] = hdul[1].header[k] | |
except: | |
pass | |
refs["data/.zattr"] = json.dumps(head, separators=(',', ':'), indent=None).encode() | |
refs[".zgroup"] = b'{"zarr_format": 2}' | |
fs = fsspec.filesystem("reference", fo=refs) | |
g = zarr.open(fs.get_mapper()) | |
(g.data[:] == hdul[1].data).all() |
Ok that's interesting, and very different to the underlying way FITS stores it.
We need to make a list of desired changes to our codec implementations. I am going to loose them scattered around the place.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Yes, we would have to do this. A normal chunk is stored whole, with the unused part normally zeros (or other default fill value), but the actually values doesn't matter. This allows for expanding the array by just rewriting the metadata.