Skip to content

Instantly share code, notes, and snippets.

@khaeru
Last active February 20, 2019 15:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save khaeru/5b8139bcfb9a02b585459d0ecd915861 to your computer and use it in GitHub Desktop.
Save khaeru/5b8139bcfb9a02b585459d0ecd915861 to your computer and use it in GitHub Desktop.
Reporting using a directed acyclic graph (DAG)
"""Reporting using a directed acyclic graph (DAG).
2019-01-24 Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
This is a demo of one possible pattern for a reporting architecture for ixmp
/MESSAGE. It uses a directed acyclic graph (DAG) where the nodes are operations
and edges are data. Calling get(…) on the graph causes a node's output, and all
its dependencies, to be retrieved.
This is implemented using dask:
- http://docs.dask.org/en/latest/graphs.html
- http://docs.dask.org/en/latest/optimize.html#example
…but any other package which supports computation on graphs could be used.
The implementation is in three parts:
1. Very minimal data structures to mock ixmp.Scenario.
2. The internal pieces:
- Basic node (calculation) operations.
- Tools for manipulating graphs of nodes.
3. The user interface. <-- Look at this first!
"""
from functools import partial
import dask
import numpy as np
import xarray as xr
# 1. Very minimal demonstration data ------------------------------------------
# Model dimensions
coords = {
'level': ['primary', 'final'],
'species': ['CO2', 'CH4'],
'region': ['AT', 'CA', 'US'],
'year': list(range(2020, 2051, 10)),
}
# Model quantities and their dimensions
vars = {
'gdp': ('region', 'year'),
'population': ('region', 'year'),
'energy': ('region', 'year', 'level'),
'emissions': ('region', 'year', 'species'),
}
# A toy 'Scenario' with a 'data' attribute
class Scenario:
"""An object that generates some fake data."""
def __init__(self):
self.name = 'Demo scenario'
self.data = xr.Dataset({}, coords)
# Populate with random contents
for var, dims in vars.items():
size = [len(coords[d]) for d in dims]
self.data[var] = (dims, np.random.rand(*size))
def __repr__(self):
return '<Scenario>'
# 2. Internal pieces ----------------------------------------------------------
# Node operations
def data(scenario, name):
"""Retrieve the variable *name* from *scenario*.
Similar methods could ingest:
- non-model data needed for computing derived quantities, or
- configuration values.
"""
return scenario.data[name]
def aggregate(data, dimension):
"""Aggregate data over a single dimension."""
print('Computing an aggregate!')
return data.sum(dim=dimension)
def cumulative(data):
"""Cumulative sum over years."""
return data.sum(dim='year')
def ratio(num, denom):
print('Computing a ratio!')
return num / denom
def simple_report(scenario, gdp_cap):
"""Perform some reporting.
This method could also:
- write to file, a database, etc.
- return a value in a specific data type, e.g. pyam.IamDataFrame.
"""
print('Scenario name: {0.name}'.format(scenario),
'GDP per capita:',
gdp_cap.to_dataframe(name='gdp_cap').unstack('year'),
'…done!', sep='\n\n')
# Tools for manipulating graphs
def add_vars(graph):
"""Add nodes to yield each raw model variable from the Scenario."""
for v in vars.keys():
graph[v] = (partial(data, name=v), 'scenario')
return graph
def basic_graph(scenario):
"""Return a minimal graph."""
result = {'scenario': Scenario()}
result = add_vars(result)
# Add some operations automatically. They aren't computed unless a call to
# get() necessitates.
result['gdp_cap'] = (ratio, 'gdp', 'population')
result['report'] = (simple_report, 'scenario', 'gdp_cap')
return result
def print_graph(graph):
print(*['{}: {}'.format(k, v) for k, v in graph.items()], '', sep='\n')
# 3. User interface -----------------------------------------------------------
# Suppose here we have an ixmp.Scenario that has been solved
s = Scenario()
# Construct a basic graph using this Scenario's data
graph = basic_graph(s)
# Show the data structure that defines the graph
print_graph(graph)
# Get an automatically-provided quantity. Note that aggregate() is not called
print(dask.get(graph, 'gdp_cap'))
# Trigger an automatically-provided report
dask.get(graph, 'report')
# Add another node
graph['total emissions'] = (aggregate, 'emissions', ['region', 'year'])
# Note that ratio() is not called
print(dask.get(graph, 'total emissions'))
# Define and add a custom operation
def custom_sum(emissions, gdp):
"""Add apples to oranges…utter madness!"""
return (emissions.loc['AT', 2020, 'CH4'] + gdp.loc['US', 2050]).values
graph['nonsense'] = (custom_sum, 'emissions', 'gdp')
print(dask.get(graph, 'nonsense'))
@khaeru
Copy link
Author

khaeru commented Jan 24, 2019

The output:

>>> print_graph(graph)
scenario: <Scenario>
gdp: (functools.partial(<function data at 0x7f5a9449f488>, name='gdp'), 'scenario')
population: (functools.partial(<function data at 0x7f5a9449f488>, name='population'), 'scenario')
energy: (functools.partial(<function data at 0x7f5a9449f488>, name='energy'), 'scenario')
emissions: (functools.partial(<function data at 0x7f5a9449f488>, name='emissions'), 'scenario')
gdp_cap: (<function ratio at 0x7f5a944bad90>, 'gdp', 'population')
report: (<function simple_report at 0x7f5a944baf28>, 'scenario', 'gdp_cap')
>>> # Get an automatically-provided quantity. Note that aggregate() is not called
>>> print(dask.get(graph, 'gdp_cap'))
Computing a ratio!
<xarray.DataArray (region: 3, year: 4)>
array([[ 1.284228,  0.362125,  0.077052,  1.544864],
       [ 0.380438,  1.146229,  0.301242, 19.021477],
       [ 0.552561,  0.284474,  0.623086,  1.972913]])
Coordinates:
  * region   (region) <U2 'AT' 'CA' 'US'
  * year     (year) int64 2020 2030 2040 2050
>>> # Trigger an automatically-provided report
>>> dask.get(graph, 'report')
Computing a ratio!
Scenario name: Demo scenario

GDP per capita:

         gdp_cap                               
year        2020      2030      2040       2050
region                                         
AT      1.284228  0.362125  0.077052   1.544864
CA      0.380438  1.146229  0.301242  19.021477
US      0.552561  0.284474  0.623086   1.972913

…done!
>>> # Note that ratio() is not called
>>> print(dask.get(graph, 'total emissions'))
Computing an aggregate!
<xarray.DataArray 'emissions' (species: 2)>
array([5.409533, 4.982363])
Coordinates:
  * species  (species) <U3 'CO2' 'CH4'
>>> print(dask.get(graph, 'nonsense'))
1.0562990534081358

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment