Created
July 10, 2018 17:29
-
-
Save TallJimbo/0c9b7cc4db578ca41a42ad29b925a2d3 to your computer and use it in GitHub Desktop.
Pure-Python lsst.afw.table.io reader for Footprints
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Copyright (c) 2018 Jim Bosch | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to | |
# deal in the Software without restriction, including without limitation the | |
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | |
# sell copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in | |
# all copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
# IN THE SOFTWARE. | |
"""A minimally functional pure-Python reimplementation LSST's SpanSet and | |
Footprint classes, along with a simplified FITS archive reimplementation to | |
read them. | |
""" | |
__all__ = ("Span", "Box", "SpanSet", "Footprint", "HeavyFootprint", "Archive") | |
from collections import namedtuple | |
import numpy as np | |
Span = namedtuple("Span", ["y", "x0", "x1"]) | |
Box = namedtuple("Box", ["y0", "y1", "x0", "x1"]) | |
class SpanSet: | |
"""A Run-length-encoded pixel region. | |
SpanSet behaves like an immutable container of Spans, which are (named) | |
tuples of (y, x0, x1), with both x coordinates inclusive. | |
The only container operations supported are iteration and `len()`. | |
This is a much-simplified but dependency-free version of | |
lsst.afw.geom.SpanSet. | |
Parameters | |
---------- | |
data : `np.ndarray` | |
A 1-d structured array with "y", "x0", and "x1" fields (all integers). | |
""" | |
@classmethod | |
def load(cls, archive, table): | |
"""Read a SpanSet from an Archive. | |
This function is usable as an Archive loader, and may be registered as | |
such with Archive.register_loader. Only Archive should call it. | |
""" | |
return cls(np.array(table)) | |
def __init__(self, data): | |
self._data = data | |
self._area = None | |
self._bbox = None | |
def __iter__(self): | |
for record in self._data: | |
yield Span(y=int(record["y"]), x0=int(record["x0"]), | |
x1=int(record["x1"])) | |
def __len__(self): | |
return len(self._data) | |
@property | |
def area(self): | |
"""The number of pixels in the SpanSet (int).""" | |
if self._area is None: | |
self._area = (1 + self._data["x1"] - self._data["x0"]).sum() | |
return self._area | |
@property | |
def bbox(self): | |
"""The bounding box of the SpanSet, with all bounds inclusive.""" | |
if self._bbox is None: | |
self._bbox = Box(y0=int(self._data["y"].min()), | |
y1=int(self._data["y"].max()), | |
x0=int(self._data["x0"].min()), | |
x1=int(self._data["x1"].max())) | |
return self._bbox | |
class Footprint: | |
"""A combination of a SpanSet and a table of peaks. | |
This is a much-simplified but dependency-free version of | |
lsst.afw.detection.Footprint. | |
Parameters | |
---------- | |
spans : `SpanSet` | |
The SpanSet that manages the pixel region covered by the Footprint. | |
peaks : `numpy.ndarray` | |
A 1-d structured array containing information about above-threshold peaks | |
detected within the Footprint's pixel region. | |
""" | |
@classmethod | |
def load(cls, archive, span_table, peak_table): | |
"""Read a Footprint from an Archive. | |
This function is usable as an Archive loader, and may be registered as | |
such with Archive.register_loader. Only Archive should call it. | |
""" | |
if len(span_table.dtype.fields) == 1: | |
spans = archive[span_table[0]["id"]] | |
else: # backwards compatibility for older persisted Footprints | |
spans = SpanSet.load(archive, span_table) | |
peaks = np.array(peak_table) | |
return cls(spans=spans, peaks=peaks) | |
def __init__(self, spans, peaks): | |
self.spans = spans | |
self.peaks = peaks | |
class HeavyFootprint(Footprint): | |
"""A Footprint that includes flatted image, mask, and variance arrays. | |
This is a much-simplified but dependency-free version of | |
lsst.afw.detection.HeavyFootprint. | |
Parameters | |
---------- | |
spans : `SpanSet` | |
The SpanSet that manages the pixel region covered by the Footprint. | |
peaks : `numpy.ndarray` | |
A 1-d structured array containing information about above-threshold | |
peaks detected within the Footprint's pixel region. | |
image : `numpy.ndarray` | |
A 1-d array of floating-point numbers that contains the values of | |
pixels within the Footprint, with values for different Spans simply | |
concatenated together. | |
mask : `numpy.ndarray` | |
A 1-d array of integers that contains the values of bit mask pixels | |
within the Footprint, with values for different Spans simply | |
concatenated together. | |
image : `numpy.ndarray` | |
A 1-d array of floating-point numbers that contains the values of | |
variance pixels within the Footprint, with values for different Spans | |
simply concatenated together. | |
""" | |
@classmethod | |
def load(cls, archive, span_table, peak_table, pixel_table): | |
"""Read a Footprint from an Archive. | |
This function is usable as an Archive loader, and may be registered as | |
such with Archive.register_loader. Only Archive should call it. | |
""" | |
base = Footprint.load(archive, span_table, peak_table) | |
image = np.array(pixel_table[0]["image"]) | |
mask = np.array(pixel_table[0]["mask"]) | |
variance = np.array(pixel_table[0]["variance"]) | |
return cls(spans=base.spans, peaks=base.peaks, image=image, mask=mask, | |
variance=variance) | |
def __init__(self, spans, peaks, image, mask, variance): | |
super().__init__(spans, peaks) | |
self.image = image | |
self.mask = mask | |
self.variance = variance | |
class Archive: | |
"""A simplified reader for lsst.afw.table.io persistence framework. | |
This is a minimal pure-Python replacement for the much more general | |
archive classes defined (in C++) in LSST's afw package. | |
Archive is a somewhat dict-like object (supporting only `__getitem__`) that | |
reads objects saved in a multi-HDU FITS archive written by LSST code. | |
Such archives are often included in LSST image and catalog files, and hold | |
more complex data structures that are related to the image or catalog. | |
For example, persisted SourceCatalogs have a "footprint" column containing | |
an integer ID that can be used to obtain [Heavy]Footprints from an Archive. | |
To load an individual object from the Archive, simply invoke `__getitem__` | |
with the integer ID of the object. This ID is usually recorded elsewhere | |
in the FITS file, such as the main image header (for e.g. PSF models | |
stored with an image) or catalog columns (for Footprints, as above). | |
Some objects are persisted by persisting child objects and recording | |
the archive IDs of those children. A persisted Footprint, for instance, | |
holds the archive ID of the SpanSet it contains. | |
Archive caches all prevously-loaded objects for faster retrieval. | |
An archive ID of zero always results in `None` being returned; this is | |
sometimes used to persist parent objects that have optional child objects. | |
For retrieval of any particular type of object to work, a loader callable | |
must be registered with the Archive class via the `register_loader` method. | |
On-disk archives contain the Python module name and class name of the | |
object to be loaded; the full persistence framework in lsst.afw imports | |
these modules to ensure its loaders are present. The loaders used by this | |
stand-in system has no such capability, and instead simply maintains a | |
complete dictionary of pure-Python loaders that must be populated (usually | |
at module-import time) by any code that wants to use Archive. See | |
`register_loader` for more information on loader signatures. | |
Parameters | |
---------- | |
hdus : `astropy.io.fits.HDUList` | |
A list of astropy- or pyfits-like HDU objects. The Archive will | |
automatically extract from this the HDUs representing archive data. | |
""" | |
_loaders = {} | |
@classmethod | |
def register_loader(self, module, name, loader): | |
"""Register the given function as a loader for objects saved with the | |
given module and class name. | |
Loaders are callables that take an Archive as their first argument, | |
followed by any number of FITS binary table objects as additional | |
positional arguments. Each table corresponds to a different HDU, but | |
contains only the records recorded as belonging to a single instance | |
of the class being loaded. | |
The loader for a particular class almost always take the same number | |
of table arguments (but may accept a variable number in order to | |
internally support backwards compatibility for an older on-disk | |
representation). | |
""" | |
self._loaders[module, name] = loader | |
def __init__(self, hdus): | |
index_hdu_n = None | |
for i, hdu in enumerate(hdus): | |
if "AR_HDU" in hdu.header: | |
index_hdu_n = hdu.header["AR_HDU"] - 1 | |
break | |
if hdu.header.get("EXTTYPE", "") == "ARCHIVE_INDEX": | |
index_hdu_n = i | |
break | |
self._index = hdus[index_hdu_n] | |
self._tables = hdus[index_hdu_n + 1: | |
index_hdu_n + self._index.header["AR_NCAT"]] | |
for i, hdu in enumerate(self._tables): | |
assert hdu.header["AR_CATN"] == i + 1 | |
assert hdu.header["EXTTYPE"] == "ARCHIVE_DATA" | |
self._objects = {} | |
def __getitem__(self, i): | |
i = int(i) # for numpy integer scalars | |
if i == 0: | |
return None | |
try: | |
return self._objects[i] | |
except KeyError: | |
pass | |
index_rows = self._index.data[self._index.data["id"] == i] | |
tables = {} | |
module = None | |
name = None | |
for row in index_rows: | |
if module is None: | |
module = row["module"] | |
else: | |
assert module == row["module"] | |
if name is None: | |
name = row["name"] | |
else: | |
assert name == row["name"] | |
table = self._tables[row["cat.archive"] - 1].data | |
start = row["row0"] | |
stop = start + row["nrows"] | |
tables[row["cat.persistable"]] = table[start:stop] | |
loader = self._loaders[module, name] | |
result = loader(self, *[tables[n] for n in sorted(tables.keys())]) | |
self._objects[i] = result | |
return result | |
Archive.register_loader("lsst.afw.geom", "SpanSet", | |
SpanSet.load) | |
Archive.register_loader("lsst.afw.detection", "Footprint", | |
Footprint.load) | |
Archive.register_loader("lsst.afw.detection", "HeavyFootprint", | |
HeavyFootprint.load) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment