Last active
February 20, 2019 15:36
-
-
Save khaeru/5b8139bcfb9a02b585459d0ecd915861 to your computer and use it in GitHub Desktop.
Reporting using a directed acyclic graph (DAG)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Reporting using a directed acyclic graph (DAG). | |
2019-01-24 Paul Natsuo Kishimoto <mail@paul.kishimoto.name> | |
This is a demo of one possible pattern for a reporting architecture for ixmp | |
/MESSAGE. It uses a directed acyclic graph (DAG) where the nodes are operations | |
and edges are data. Calling get(…) on the graph causes a node's output, and all | |
its dependencies, to be retrieved. | |
This is implemented using dask: | |
- http://docs.dask.org/en/latest/graphs.html | |
- http://docs.dask.org/en/latest/optimize.html#example | |
…but any other package which supports computation on graphs could be used. | |
The implementation is in three parts: | |
1. Very minimal data structures to mock ixmp.Scenario. | |
2. The internal pieces: | |
- Basic node (calculation) operations. | |
- Tools for manipulating graphs of nodes. | |
3. The user interface. <-- Look at this first! | |
""" | |
from functools import partial | |
import dask | |
import numpy as np | |
import xarray as xr | |
# 1. Very minimal demonstration data ------------------------------------------ | |
# Model dimensions | |
coords = { | |
'level': ['primary', 'final'], | |
'species': ['CO2', 'CH4'], | |
'region': ['AT', 'CA', 'US'], | |
'year': list(range(2020, 2051, 10)), | |
} | |
# Model quantities and their dimensions | |
vars = { | |
'gdp': ('region', 'year'), | |
'population': ('region', 'year'), | |
'energy': ('region', 'year', 'level'), | |
'emissions': ('region', 'year', 'species'), | |
} | |
# A toy 'Scenario' with a 'data' attribute | |
class Scenario: | |
"""An object that generates some fake data.""" | |
def __init__(self): | |
self.name = 'Demo scenario' | |
self.data = xr.Dataset({}, coords) | |
# Populate with random contents | |
for var, dims in vars.items(): | |
size = [len(coords[d]) for d in dims] | |
self.data[var] = (dims, np.random.rand(*size)) | |
def __repr__(self): | |
return '<Scenario>' | |
# 2. Internal pieces ---------------------------------------------------------- | |
# Node operations | |
def data(scenario, name): | |
"""Retrieve the variable *name* from *scenario*. | |
Similar methods could ingest: | |
- non-model data needed for computing derived quantities, or | |
- configuration values. | |
""" | |
return scenario.data[name] | |
def aggregate(data, dimension): | |
"""Aggregate data over a single dimension.""" | |
print('Computing an aggregate!') | |
return data.sum(dim=dimension) | |
def cumulative(data): | |
"""Cumulative sum over years.""" | |
return data.sum(dim='year') | |
def ratio(num, denom): | |
print('Computing a ratio!') | |
return num / denom | |
def simple_report(scenario, gdp_cap): | |
"""Perform some reporting. | |
This method could also: | |
- write to file, a database, etc. | |
- return a value in a specific data type, e.g. pyam.IamDataFrame. | |
""" | |
print('Scenario name: {0.name}'.format(scenario), | |
'GDP per capita:', | |
gdp_cap.to_dataframe(name='gdp_cap').unstack('year'), | |
'…done!', sep='\n\n') | |
# Tools for manipulating graphs | |
def add_vars(graph): | |
"""Add nodes to yield each raw model variable from the Scenario.""" | |
for v in vars.keys(): | |
graph[v] = (partial(data, name=v), 'scenario') | |
return graph | |
def basic_graph(scenario): | |
"""Return a minimal graph.""" | |
result = {'scenario': Scenario()} | |
result = add_vars(result) | |
# Add some operations automatically. They aren't computed unless a call to | |
# get() necessitates. | |
result['gdp_cap'] = (ratio, 'gdp', 'population') | |
result['report'] = (simple_report, 'scenario', 'gdp_cap') | |
return result | |
def print_graph(graph): | |
print(*['{}: {}'.format(k, v) for k, v in graph.items()], '', sep='\n') | |
# 3. User interface ----------------------------------------------------------- | |
# Suppose here we have an ixmp.Scenario that has been solved | |
s = Scenario() | |
# Construct a basic graph using this Scenario's data | |
graph = basic_graph(s) | |
# Show the data structure that defines the graph | |
print_graph(graph) | |
# Get an automatically-provided quantity. Note that aggregate() is not called | |
print(dask.get(graph, 'gdp_cap')) | |
# Trigger an automatically-provided report | |
dask.get(graph, 'report') | |
# Add another node | |
graph['total emissions'] = (aggregate, 'emissions', ['region', 'year']) | |
# Note that ratio() is not called | |
print(dask.get(graph, 'total emissions')) | |
# Define and add a custom operation | |
def custom_sum(emissions, gdp): | |
"""Add apples to oranges…utter madness!""" | |
return (emissions.loc['AT', 2020, 'CH4'] + gdp.loc['US', 2050]).values | |
graph['nonsense'] = (custom_sum, 'emissions', 'gdp') | |
print(dask.get(graph, 'nonsense')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The output: