Last active
May 14, 2020 14:46
-
-
Save TallJimbo/1a2262743e5bfc34a8c1dd6100ea98d9 to your computer and use it in GitHub Desktop.
Interface sketch for Gen3 registry dimensions pre-fetching object
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class DimensionConnector(ABC): | |
"""An interface for objects that optimize joins and other queris on | |
dimension information via pre-fetching. | |
Parameters | |
---------- | |
dimensions | |
All dimensions that will ever be passed to or used by this object. | |
In the QuantumGraph generation use case, this would be all | |
dimensions referenced by any dataset type or task in the full | |
pipeline, as well as any in the dimension query expression. | |
Notes | |
----- | |
The nominal implementation is some kind of denormalized table, | |
with a column for each dimension, but implementations backed by | |
graph data structures or normalized in-memory tables and | |
indices may also be possible. | |
""" | |
def __init__(self, dimensions: DimensionGraph): | |
self.dimensions = dimensions | |
@abstractmethod | |
def update( | |
self, | |
dataIds: Iterable[Tuple[DimensionGraph, Iterable[DataCoordinate]], *, | |
where: Optional[QueryWhereExpression] = None | |
): | |
"""Add data IDs to the connector. | |
Keys for all dimensions in ``self.dimensions`` are added, but only rows | |
that are related to at least one of the given coordinates for each of | |
the given `DimensionGraph` objects. | |
In the QG gen use case, this would be called when starting the | |
iteration and initially constraining the processing to be done, and | |
_sometimes_ by backward-propagation steps that need to expand their | |
inputs beyond naive overlaps (e.g. jointcal or fgcm). | |
Parameters | |
---------- | |
dataIds | |
Data IDs grouped by their dimensions. | |
""" | |
pass | |
@abstractmethod | |
def walk( | |
self, | |
dimensions: DimensionGraph, *, | |
expand: bool = True | |
) -> Iterator[DataCoordinate]: | |
"""Iterate over unique data IDs with the given dimensions. | |
""" | |
pass | |
@abstractmethod | |
def group( | |
self, | |
keys: DimensionGraph, | |
values: Set[DimensionGraph], *, | |
expand: bool = True | |
) -> Dict[DataCoordinate, Dict[DimensionGraph, Set[DataCoordinate]]: | |
"""Return all unique data IDs with ``keys`` dimensions, along with | |
all related data IDs of each of ``values`` dimensions. | |
In QG gen, this would be called by forward/backward impls with task | |
dimensions as keys and dataset dimensions as values. Those would | |
then need to reject quanta that cannot be produced (if going forward) | |
or are not needed (if going backward), but should be able to assume | |
that the number of rejected quanta is a small fraction of the total. | |
Backward implementations that need more datasets than "natural" | |
dimension relationships imply (e.g. full visits for jointcal or | |
fgcm) should first call `update` to expand the set of data IDs known | |
to the connector accordingly. | |
""" | |
pass | |
@abstractmethod | |
def fetch( | |
self, | |
element: DimensionElement, | |
dataId: DataCoordinate | |
) -> DimensionRecord: | |
"""Fetch the `DimensionRecord` for the given element and data ID. | |
""" | |
pass | |
@abstractmethod | |
def expandDataId( | |
self, | |
dataId: DataId, *, | |
graph: Optional[DimensionGraph] = None | |
) -> ExpandedDataCoordinate; | |
"""Expand the given data ID. | |
""" | |
pass | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment