TallJimbo/read_footprints.py

## read_footprints.py
#
# Copyright (c) 2018 Jim Bosch
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.

"""A minimally functional pure-Python reimplementation LSST's SpanSet and
Footprint classes, along with a simplified FITS archive reimplementation to
read them.
"""

__all__ = ("Span", "Box", "SpanSet", "Footprint", "HeavyFootprint", "Archive")


from collections import namedtuple
import numpy as np


Span = namedtuple("Span", ["y", "x0", "x1"])


Box = namedtuple("Box", ["y0", "y1", "x0", "x1"])


class SpanSet:
    """A Run-length-encoded pixel region.

    SpanSet behaves like an immutable container of Spans, which are (named)
    tuples of (y, x0, x1), with both x coordinates inclusive.
    The only container operations supported are iteration and `len()`.

    This is a much-simplified but dependency-free version of
    lsst.afw.geom.SpanSet.

    Parameters
    ----------
    data : `np.ndarray`
        A 1-d structured array with "y", "x0", and "x1" fields (all integers).
    """

    @classmethod
    def load(cls, archive, table):
        """Read a SpanSet from an Archive.

        This function is usable as an Archive loader, and may be registered as
        such with Archive.register_loader.  Only Archive should call it.
        """
        return cls(np.array(table))

    def __init__(self, data):
        self._data = data
        self._area = None
        self._bbox = None

    def __iter__(self):
        for record in self._data:
            yield Span(y=int(record["y"]), x0=int(record["x0"]),
                       x1=int(record["x1"]))

    def __len__(self):
        return len(self._data)

    @property
    def area(self):
        """The number of pixels in the SpanSet (int)."""
        if self._area is None:
            self._area = (1 + self._data["x1"] - self._data["x0"]).sum()
        return self._area

    @property
    def bbox(self):
        """The bounding box of the SpanSet, with all bounds inclusive."""
        if self._bbox is None:
            self._bbox = Box(y0=int(self._data["y"].min()),
                             y1=int(self._data["y"].max()),
                             x0=int(self._data["x0"].min()),
                             x1=int(self._data["x1"].max()))
        return self._bbox


class Footprint:
    """A combination of a SpanSet and a table of peaks.

    This is a much-simplified but dependency-free version of
    lsst.afw.detection.Footprint.

    Parameters
    ----------
    spans : `SpanSet`
        The SpanSet that manages the pixel region covered by the Footprint.
    peaks : `numpy.ndarray`
        A 1-d structured array containing information about above-threshold peaks
        detected within the Footprint's pixel region.
    """

    @classmethod
    def load(cls, archive, span_table, peak_table):
        """Read a Footprint from an Archive.

        This function is usable as an Archive loader, and may be registered as
        such with Archive.register_loader.  Only Archive should call it.
        """
        if len(span_table.dtype.fields) == 1:
            spans = archive[span_table[0]["id"]]
        else:  # backwards compatibility for older persisted Footprints
            spans = SpanSet.load(archive, span_table)
        peaks = np.array(peak_table)
        return cls(spans=spans, peaks=peaks)

    def __init__(self, spans, peaks):
        self.spans = spans
        self.peaks = peaks


class HeavyFootprint(Footprint):
    """A Footprint that includes flatted image, mask, and variance arrays.

    This is a much-simplified but dependency-free version of
    lsst.afw.detection.HeavyFootprint.

    Parameters
    ----------
    spans : `SpanSet`
        The SpanSet that manages the pixel region covered by the Footprint.
    peaks : `numpy.ndarray`
        A 1-d structured array containing information about above-threshold
        peaks detected within the Footprint's pixel region.
    image : `numpy.ndarray`
        A 1-d array of floating-point numbers that contains the values of
        pixels within the Footprint, with values for different Spans simply
        concatenated together.
    mask : `numpy.ndarray`
        A 1-d array of integers that contains the values of bit mask pixels
        within the Footprint, with values for different Spans simply
        concatenated together.
    image : `numpy.ndarray`
        A 1-d array of floating-point numbers that contains the values of
        variance pixels within the Footprint, with values for different Spans
        simply concatenated together.
    """

    @classmethod
    def load(cls, archive, span_table, peak_table, pixel_table):
        """Read a Footprint from an Archive.

        This function is usable as an Archive loader, and may be registered as
        such with Archive.register_loader.  Only Archive should call it.
        """
        base = Footprint.load(archive, span_table, peak_table)
        image = np.array(pixel_table[0]["image"])
        mask = np.array(pixel_table[0]["mask"])
        variance = np.array(pixel_table[0]["variance"])
        return cls(spans=base.spans, peaks=base.peaks, image=image, mask=mask,
                   variance=variance)

    def __init__(self, spans, peaks, image, mask, variance):
        super().__init__(spans, peaks)
        self.image = image
        self.mask = mask
        self.variance = variance


class Archive:
    """A simplified reader for lsst.afw.table.io persistence framework.

    This is a minimal pure-Python replacement for the much more general
    archive classes defined (in C++) in LSST's afw package.

    Archive is a somewhat dict-like object (supporting only `__getitem__`) that
    reads objects saved in a multi-HDU FITS archive written by LSST code.
    Such archives are often included in LSST image and catalog files, and hold
    more complex data structures that are related to the image or catalog.
    For example, persisted SourceCatalogs have a "footprint" column containing
    an integer ID that can be used to obtain [Heavy]Footprints from an Archive.

    To load an individual object from the Archive, simply invoke `__getitem__`
    with the integer ID of the object.  This ID is usually recorded elsewhere
    in the FITS file, such as the main image header (for e.g. PSF models
    stored with an image) or catalog columns (for Footprints, as above).
    Some objects are persisted by persisting child objects and recording
    the archive IDs of those children.  A persisted Footprint, for instance,
    holds the archive ID of the SpanSet it contains.

    Archive caches all prevously-loaded objects for faster retrieval.

    An archive ID of zero always results in `None` being returned; this is
    sometimes used to persist parent objects that have optional child objects.

    For retrieval of any particular type of object to work, a loader callable
    must be registered with the Archive class via the `register_loader` method.
    On-disk archives contain the Python module name and class name of the
    object to be loaded; the full persistence framework in lsst.afw imports
    these modules to ensure its loaders are present.  The loaders used by this
    stand-in system has no such capability, and instead simply maintains a
    complete dictionary of pure-Python loaders that must be populated (usually
    at module-import time) by any code that wants to use Archive.  See
    `register_loader` for more information on loader signatures.

    Parameters
    ----------
    hdus : `astropy.io.fits.HDUList`
        A list of astropy- or pyfits-like HDU objects.  The Archive will
        automatically extract from this the HDUs representing archive data.

    """

    _loaders = {}

    @classmethod
    def register_loader(self, module, name, loader):
        """Register the given function as a loader for objects saved with the
        given module and class name.

        Loaders are callables that take an Archive as their first argument,
        followed by any number of FITS binary table objects as additional
        positional arguments.  Each table corresponds to a different HDU, but
        contains only the records recorded as belonging to a single instance
        of the class being loaded.

        The loader for a particular class almost always take the same number
        of table arguments (but may accept a variable number in order to
        internally support backwards compatibility for an older on-disk
        representation).
        """
        self._loaders[module, name] = loader

    def __init__(self, hdus):
        index_hdu_n = None
        for i, hdu in enumerate(hdus):
            if "AR_HDU" in hdu.header:
                index_hdu_n = hdu.header["AR_HDU"] - 1
                break
            if hdu.header.get("EXTTYPE", "") == "ARCHIVE_INDEX":
                index_hdu_n = i
                break
        self._index = hdus[index_hdu_n]
        self._tables = hdus[index_hdu_n + 1:
                            index_hdu_n + self._index.header["AR_NCAT"]]
        for i, hdu in enumerate(self._tables):
            assert hdu.header["AR_CATN"] == i + 1
            assert hdu.header["EXTTYPE"] == "ARCHIVE_DATA"
        self._objects = {}

    def __getitem__(self, i):
        i = int(i)  # for numpy integer scalars
        if i == 0:
            return None
        try:
            return self._objects[i]
        except KeyError:
            pass
        index_rows = self._index.data[self._index.data["id"] == i]
        tables = {}
        module = None
        name = None
        for row in index_rows:
            if module is None:
                module = row["module"]
            else:
                assert module == row["module"]
            if name is None:
                name = row["name"]
            else:
                assert name == row["name"]
            table = self._tables[row["cat.archive"] - 1].data
            start = row["row0"]
            stop = start + row["nrows"]
            tables[row["cat.persistable"]] = table[start:stop]
        loader = self._loaders[module, name]
        result = loader(self, *[tables[n] for n in sorted(tables.keys())])
        self._objects[i] = result
        return result


Archive.register_loader("lsst.afw.geom", "SpanSet",
                        SpanSet.load)
Archive.register_loader("lsst.afw.detection", "Footprint",
                        Footprint.load)
Archive.register_loader("lsst.afw.detection", "HeavyFootprint",
                        HeavyFootprint.load)
	#
	# Copyright (c) 2018 Jim Bosch
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to
	# deal in the Software without restriction, including without limitation the
	# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
	# sell copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in
	# all copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	# IN THE SOFTWARE.

	"""A minimally functional pure-Python reimplementation LSST's SpanSet and
	Footprint classes, along with a simplified FITS archive reimplementation to
	read them.
	"""

	__all__ = ("Span", "Box", "SpanSet", "Footprint", "HeavyFootprint", "Archive")


	from collections import namedtuple
	import numpy as np


	Span = namedtuple("Span", ["y", "x0", "x1"])


	Box = namedtuple("Box", ["y0", "y1", "x0", "x1"])


	class SpanSet:
	"""A Run-length-encoded pixel region.

	SpanSet behaves like an immutable container of Spans, which are (named)
	tuples of (y, x0, x1), with both x coordinates inclusive.
	The only container operations supported are iteration and `len()`.

	This is a much-simplified but dependency-free version of
	lsst.afw.geom.SpanSet.

	Parameters
	----------
	data : `np.ndarray`
	A 1-d structured array with "y", "x0", and "x1" fields (all integers).
	"""

	@classmethod
	def load(cls, archive, table):
	"""Read a SpanSet from an Archive.

	This function is usable as an Archive loader, and may be registered as
	such with Archive.register_loader. Only Archive should call it.
	"""
	return cls(np.array(table))

	def __init__(self, data):
	self._data = data
	self._area = None
	self._bbox = None

	def __iter__(self):
	for record in self._data:
	yield Span(y=int(record["y"]), x0=int(record["x0"]),
	x1=int(record["x1"]))

	def __len__(self):
	return len(self._data)

	@property
	def area(self):
	"""The number of pixels in the SpanSet (int)."""
	if self._area is None:
	self._area = (1 + self._data["x1"] - self._data["x0"]).sum()
	return self._area

	@property
	def bbox(self):
	"""The bounding box of the SpanSet, with all bounds inclusive."""
	if self._bbox is None:
	self._bbox = Box(y0=int(self._data["y"].min()),
	y1=int(self._data["y"].max()),
	x0=int(self._data["x0"].min()),
	x1=int(self._data["x1"].max()))
	return self._bbox


	class Footprint:
	"""A combination of a SpanSet and a table of peaks.

	This is a much-simplified but dependency-free version of
	lsst.afw.detection.Footprint.

	Parameters
	----------
	spans : `SpanSet`
	The SpanSet that manages the pixel region covered by the Footprint.
	peaks : `numpy.ndarray`
	A 1-d structured array containing information about above-threshold peaks
	detected within the Footprint's pixel region.
	"""

	@classmethod
	def load(cls, archive, span_table, peak_table):
	"""Read a Footprint from an Archive.

	This function is usable as an Archive loader, and may be registered as
	such with Archive.register_loader. Only Archive should call it.
	"""
	if len(span_table.dtype.fields) == 1:
	spans = archive[span_table[0]["id"]]
	else: # backwards compatibility for older persisted Footprints
	spans = SpanSet.load(archive, span_table)
	peaks = np.array(peak_table)
	return cls(spans=spans, peaks=peaks)

	def __init__(self, spans, peaks):
	self.spans = spans
	self.peaks = peaks


	class HeavyFootprint(Footprint):
	"""A Footprint that includes flatted image, mask, and variance arrays.

	This is a much-simplified but dependency-free version of
	lsst.afw.detection.HeavyFootprint.

	Parameters
	----------
	spans : `SpanSet`
	The SpanSet that manages the pixel region covered by the Footprint.
	peaks : `numpy.ndarray`
	A 1-d structured array containing information about above-threshold
	peaks detected within the Footprint's pixel region.
	image : `numpy.ndarray`
	A 1-d array of floating-point numbers that contains the values of
	pixels within the Footprint, with values for different Spans simply
	concatenated together.
	mask : `numpy.ndarray`
	A 1-d array of integers that contains the values of bit mask pixels
	within the Footprint, with values for different Spans simply
	concatenated together.
	image : `numpy.ndarray`
	A 1-d array of floating-point numbers that contains the values of
	variance pixels within the Footprint, with values for different Spans
	simply concatenated together.
	"""

	@classmethod
	def load(cls, archive, span_table, peak_table, pixel_table):
	"""Read a Footprint from an Archive.

	This function is usable as an Archive loader, and may be registered as
	such with Archive.register_loader. Only Archive should call it.
	"""
	base = Footprint.load(archive, span_table, peak_table)
	image = np.array(pixel_table[0]["image"])
	mask = np.array(pixel_table[0]["mask"])
	variance = np.array(pixel_table[0]["variance"])
	return cls(spans=base.spans, peaks=base.peaks, image=image, mask=mask,
	variance=variance)

	def __init__(self, spans, peaks, image, mask, variance):
	super().__init__(spans, peaks)
	self.image = image
	self.mask = mask
	self.variance = variance


	class Archive:
	"""A simplified reader for lsst.afw.table.io persistence framework.

	This is a minimal pure-Python replacement for the much more general
	archive classes defined (in C++) in LSST's afw package.

	Archive is a somewhat dict-like object (supporting only `__getitem__`) that
	reads objects saved in a multi-HDU FITS archive written by LSST code.
	Such archives are often included in LSST image and catalog files, and hold
	more complex data structures that are related to the image or catalog.
	For example, persisted SourceCatalogs have a "footprint" column containing
	an integer ID that can be used to obtain [Heavy]Footprints from an Archive.

	To load an individual object from the Archive, simply invoke `__getitem__`
	with the integer ID of the object. This ID is usually recorded elsewhere
	in the FITS file, such as the main image header (for e.g. PSF models
	stored with an image) or catalog columns (for Footprints, as above).
	Some objects are persisted by persisting child objects and recording
	the archive IDs of those children. A persisted Footprint, for instance,
	holds the archive ID of the SpanSet it contains.

	Archive caches all prevously-loaded objects for faster retrieval.

	An archive ID of zero always results in `None` being returned; this is
	sometimes used to persist parent objects that have optional child objects.

	For retrieval of any particular type of object to work, a loader callable
	must be registered with the Archive class via the `register_loader` method.
	On-disk archives contain the Python module name and class name of the
	object to be loaded; the full persistence framework in lsst.afw imports
	these modules to ensure its loaders are present. The loaders used by this
	stand-in system has no such capability, and instead simply maintains a
	complete dictionary of pure-Python loaders that must be populated (usually
	at module-import time) by any code that wants to use Archive. See
	`register_loader` for more information on loader signatures.

	Parameters
	----------
	hdus : `astropy.io.fits.HDUList`
	A list of astropy- or pyfits-like HDU objects. The Archive will
	automatically extract from this the HDUs representing archive data.

	"""

	_loaders = {}

	@classmethod
	def register_loader(self, module, name, loader):
	"""Register the given function as a loader for objects saved with the
	given module and class name.

	Loaders are callables that take an Archive as their first argument,
	followed by any number of FITS binary table objects as additional
	positional arguments. Each table corresponds to a different HDU, but
	contains only the records recorded as belonging to a single instance
	of the class being loaded.

	The loader for a particular class almost always take the same number
	of table arguments (but may accept a variable number in order to
	internally support backwards compatibility for an older on-disk
	representation).
	"""
	self._loaders[module, name] = loader

	def __init__(self, hdus):
	index_hdu_n = None
	for i, hdu in enumerate(hdus):
	if "AR_HDU" in hdu.header:
	index_hdu_n = hdu.header["AR_HDU"] - 1
	break
	if hdu.header.get("EXTTYPE", "") == "ARCHIVE_INDEX":
	index_hdu_n = i
	break
	self._index = hdus[index_hdu_n]
	self._tables = hdus[index_hdu_n + 1:
	index_hdu_n + self._index.header["AR_NCAT"]]
	for i, hdu in enumerate(self._tables):
	assert hdu.header["AR_CATN"] == i + 1
	assert hdu.header["EXTTYPE"] == "ARCHIVE_DATA"
	self._objects = {}

	def __getitem__(self, i):
	i = int(i) # for numpy integer scalars
	if i == 0:
	return None
	try:
	return self._objects[i]
	except KeyError:
	pass
	index_rows = self._index.data[self._index.data["id"] == i]
	tables = {}
	module = None
	name = None
	for row in index_rows:
	if module is None:
	module = row["module"]
	else:
	assert module == row["module"]
	if name is None:
	name = row["name"]
	else:
	assert name == row["name"]
	table = self._tables[row["cat.archive"] - 1].data
	start = row["row0"]
	stop = start + row["nrows"]
	tables[row["cat.persistable"]] = table[start:stop]
	loader = self._loaders[module, name]
	result = loader(self, *[tables[n] for n in sorted(tables.keys())])
	self._objects[i] = result
	return result


	Archive.register_loader("lsst.afw.geom", "SpanSet",
	SpanSet.load)
	Archive.register_loader("lsst.afw.detection", "Footprint",
	Footprint.load)
	Archive.register_loader("lsst.afw.detection", "HeavyFootprint",
	HeavyFootprint.load)