Last active
September 19, 2019 05:15
-
-
Save andres-fr/132a8f760bf0a7dfa5bbb8ec8aa4b4ab to your computer and use it in GitHub Desktop.
Python3 unittest suite for the graphkit library (https://github.com/yahoo/graphkit)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding:utf-8 -*- | |
""" | |
This module contains test cases regarding usage of Yahoo's ``graphkit`` | |
library for DAG-based computation: https://github.com/yahoo/graphkit | |
""" | |
import unittest | |
from typing import List, Dict | |
# | |
import graphkit | |
from graphkit.modifiers import optional | |
class LoggedCalculator: | |
""" | |
Performs some arithmetic calculations and keeps track of them | |
""" | |
def __init__(self): | |
self.computed: List[str] = [] | |
def constant_one(self, *args, **kwargs) -> float: | |
self.computed.append("c1") | |
return 1 | |
def sum(self, a: float, b: float) -> float: | |
self.computed.append("sum") | |
return a + b | |
def mul(self, a: float, b: float, c: float = 1) -> float: | |
""" | |
.. note:: | |
The default parameters like ``c`` shall become | |
``graphkit.modifiers.optional`` entries with the same name | |
""" | |
self.computed.append("mul") | |
return a * b * c | |
def abspow(self, a: float, p: float) -> float: | |
self.computed.append("abspow") | |
return abs(a) ** p | |
class GraphkitBasicTestCase(unittest.TestCase): | |
""" | |
Tests basic functionality: | |
* Operation and graph creation | |
* Full and partial forward propagation | |
* laziness | |
* Representing optional parameters as optional nodes | |
""" | |
def setUp(self) -> None: | |
""" | |
Create a simple computational graph using ops from LoggedCalculator:: | |
abs(a + (a * b *c?)) ** 3 | |
""" | |
self.lc: LoggedCalculator = LoggedCalculator() | |
# note the optional node "c" wich has to be named like the kwarg | |
mul1 = graphkit.operation(name="mul1", needs=["a", "b", | |
optional("c")], | |
provides=["ab(c)"])(self.lc.mul) | |
sum1 = graphkit.operation(name="sum1", needs=["a", "ab(c)"], | |
provides=["a_plus_ab(c)"])(self.lc.sum) | |
ap1 = graphkit.operation(name="abspow1", needs=["a_plus_ab(c)"], | |
params={"p": 3}, | |
provides=["a_plus_ab(c)_cubed"])( | |
self.lc.abspow) | |
# Ordering in operation collection doesn't matter | |
# needs/provides takes care of that | |
self.graph = graphkit.compose(name="test_graph")(sum1, mul1, ap1) | |
def test_full_forward(self) -> None: | |
""" | |
Perform a full forward propagation and check that methods are called | |
each once and in order. | |
""" | |
out: Dict[str, int] = self.graph({"a": 2, "b": 5}) | |
expected_out: Dict[str, int] = {"a": 2, "b": 5, "ab(c)": 10, | |
"a_plus_ab(c)": 12, | |
"a_plus_ab(c)_cubed": 1728} | |
# | |
self.assertEqual(out, expected_out) | |
self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"]) | |
def test_partial_forward(self) -> None: | |
""" | |
Perform a partial forward propagation and check that only needed | |
methods are computed | |
""" | |
out: Dict[str, int] = self.graph({"a": 2, "b": 5}, | |
outputs=["a_plus_ab(c)"]) | |
expected_out: Dict[str, int] = {"a_plus_ab(c)": 12} | |
# | |
self.assertEqual(out, expected_out) | |
self.assertEqual(self.lc.computed, ["mul", "sum"]) | |
def test_laziness(self) -> None: | |
""" | |
Perform a partial forward, check methods, then compute further | |
and check that no duplicated computation was needed | |
""" | |
out1: Dict[str, int] = self.graph({"a": 2, "b": 5}, | |
outputs=["a_plus_ab(c)"]) | |
# provide the out1 as input ot the same graph and ask further | |
out2: Dict[str, int] = self.graph(out1, | |
outputs=["a_plus_ab(c)_cubed"]) | |
# Each op was only computed once | |
self.assertEqual(out2["a_plus_ab(c)_cubed"], 1728) | |
self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"]) | |
def test_optional(self) -> None: | |
""" | |
Like ``test_full_forward`` but providing optional node ``c`` | |
""" | |
out: Dict[str, int] = self.graph({"a": 2, "b": 5, "c": 0}) | |
expected_out: Dict[str, int] = {"a": 2, "b": 5, "c": 0, "ab(c)": 0, | |
"a_plus_ab(c)": 2, | |
"a_plus_ab(c)_cubed": 8} | |
# | |
self.assertEqual(out, expected_out) | |
self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"]) | |
class GraphkitMergeTestCase(unittest.TestCase): | |
""" | |
Tests multi-graph built-in functionality: | |
* Add nodes to existing graph | |
* Merge different graphs with common nodes | |
""" | |
def setUp(self) -> None: | |
""" | |
Create a simple computational graph using ops from LoggedCalculator:: | |
abs(a + (a * b *c?)) ** 3 | |
""" | |
self.lc: LoggedCalculator = LoggedCalculator() | |
# note the optional node "c" wich has to be named like the kwarg | |
mul1 = graphkit.operation(name="mul1", needs=["a", "b", | |
optional("c")], | |
provides=["ab(c)"])(self.lc.mul) | |
sum1 = graphkit.operation(name="sum1", needs=["a", "ab(c)"], | |
provides=["a_plus_ab(c)"])(self.lc.sum) | |
ap1 = graphkit.operation(name="abspow1", needs=["a_plus_ab(c)"], | |
params={"p": 3}, | |
provides=["a_plus_ab(c)_cubed"])( | |
self.lc.abspow) | |
# Ordering in operation collection doesn't matter | |
# needs/provides takes care of that | |
self.graph = graphkit.compose(name="test_graph")(sum1, mul1, ap1) | |
def test_add_single_op(self) -> None: | |
""" | |
Add an operation to the setup graph and perform a full forward | |
propagation. | |
""" | |
# Add an extra OP to self.graph: | |
sum2 = graphkit.operation(name="sum2", | |
needs=["a_plus_ab(c)_cubed", "x"], | |
provides="tg_plus_x")(self.lc.sum) | |
self.graph2 = graphkit.compose(name="bigger_graph")(self.graph, sum2) | |
# | |
out: Dict[str, int] = self.graph2({"a": 2, "b": 5, "x": 1000}) | |
expected_out: Dict[str, int] = {"a": 2, "b": 5, "x": 1000, "ab(c)": 10, | |
"a_plus_ab(c)": 12, | |
"a_plus_ab(c)_cubed": 1728, | |
"tg_plus_x": 2728} | |
# | |
self.assertEqual(out, expected_out) | |
self.assertEqual(self.lc.computed, ["mul", "sum", "abspow", "sum"]) | |
def test_add_name_collisions(self) -> None: | |
""" | |
Compose a graph with some common input and throughput names and observe | |
the following result: | |
1. Parameters named equal have to be fed with the same value in the | |
input dict (see "a" vs. "b, b2"). | |
2. If two throughput nodes have the same name, the dependencies are | |
respected, and both operations are performed separately (i.e. | |
operations on the first will use the first, etc), but only one of | |
them will figure in the output dict (the one defined last in the case | |
of this test). | |
.. note:: | |
Due to these reasons best is to avoid naming collisions using | |
namespaces. | |
""" | |
# make a second graph with its separate logger. note how "a" is shared | |
# with self.graph but "b2" is different | |
lc2 = LoggedCalculator() | |
mul2 = graphkit.operation(name="mul2", needs=["a", "b2", | |
optional("c")], | |
provides=["ab(c)"])(lc2.mul) | |
sum2 = graphkit.operation(name="sum2", | |
needs=["ab(c)", "w"], | |
provides="w_plus_ab(c)")(lc2.sum) | |
graph2 = graphkit.compose(name="graph2")(mul2, sum2) | |
# merge both graphs. As you can see expected is "ab(c)": 12, | |
# so the output | |
graph3 = graphkit.compose(name="graph3")(self.graph, graph2) | |
out3: Dict[str, int] = graph3({"a": 2, "b": 5, "b2": 1000, "w": 1}) | |
expected_out3: Dict[str, int] = {"a": 2, "b": 5, "b2": 1000, "w": 1, | |
"ab(c)": 2000, | |
# NOTE how this is 12 not 2002 | |
"a_plus_ab(c)": 12, | |
"a_plus_ab(c)_cubed": 1728, | |
# BUT this is 2001 | |
"w_plus_ab(c)": 2001} | |
self.assertEqual(out3, expected_out3) | |
self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"]) | |
self.assertEqual(lc2.computed, ["mul", "sum"]) | |
def test_graph_merge_ok(self) -> None: | |
""" | |
Graph merging is based on operation name: operations in | |
different graphs with same name will be attempted to be merged, | |
**irrespectively of differing inputs and outputs and differing | |
functionality**. | |
.. note:: | |
The merge ordering matters: all the nodes of the first graph are | |
kept, and for the subsequent graphs, "merging" effectively means | |
using the preexisting node and discarding the latter one (assuming | |
they have identical interface). | |
This method tests the merging behaviour with straightforward settings: | |
A graph also in the form ``abs(a + (a * b * c?)) ** 2`` is defined | |
almost identically as ``self.graph``, the abspow being named and | |
parametrized differently. | |
""" | |
# make a second graph. The operations are performed by a separate | |
# LoggedCalculator. See how "sum1" gets factored out and no error | |
# arises regardless of its unique interface and operation | |
lc2: LoggedCalculator = LoggedCalculator() | |
mul2 = graphkit.operation(name="mul1", needs=["a", "b", | |
optional("c")], | |
provides=["ab(c)"])(lc2.mul) | |
sum2 = graphkit.operation(name="sum1", needs=["WHATEVER", "ASDF", "?"], | |
provides=["FOO"])(lc2.constant_one) | |
ap2 = graphkit.operation(name="abspow2", needs=["a_plus_ab(c)"], | |
params={"p": 2}, | |
provides=["a_plus_ab(c)_squared"])(lc2.abspow) | |
graph2 = graphkit.compose(name="graph2")(mul2, sum2, ap2) | |
# merge both graphs | |
graph3 = graphkit.compose(name="graph3", merge=True)(self.graph, | |
graph2) | |
out3: Dict[str, int] = graph3({"a": 2, "b": 5}) | |
expected_out3: Dict[str, int] = {"a": 2, "b": 5, "ab(c)": 10, | |
"a_plus_ab(c)": 12, | |
"a_plus_ab(c)_cubed": 1728, | |
"a_plus_ab(c)_squared": 144} | |
# | |
self.assertEqual(out3, expected_out3) | |
self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"]) | |
self.assertEqual(lc2.computed, ["abspow"]) # the only g2 contribution | |
def test_graph_merge_dropping(self) -> None: | |
""" | |
This method tests the merging behaviour when node2 gets merged | |
with node1 with a different interface, and node3 depends on node2. | |
We observe the following: | |
* The output of node2 can't be reached through the graph | |
* node3 will be dropped from the graph with no error reported | |
* No graph2 computation will be performed by the merged graph | |
""" | |
# create graph2 with breaking interface | |
lc2: LoggedCalculator = LoggedCalculator() | |
mul2 = graphkit.operation(name="mul1", needs=["a", "b", | |
optional("c")], | |
provides=["ab(c)"])(lc2.mul) | |
# note the difference: provides "some_sum" | |
sum2 = graphkit.operation(name="sum1", needs=["a", "ab(c)"], | |
provides=["some_sum"])(lc2.sum) | |
# this node depends on "some_sum" | |
ap2 = graphkit.operation(name="abspow2", needs=["some_sum"], | |
params={"p": 2}, | |
provides=["one_squared"])(lc2.abspow) | |
graph2 = graphkit.compose(name="graph2")(mul2, sum2, ap2) | |
# merge both graphs | |
graph3 = graphkit.compose(name="graph3", merge=True)(self.graph, | |
graph2) | |
out3: Dict[str, int] = graph3({"a": 2, "b": 5}) | |
# "one_squared" is not present because abspow2 was dropped | |
# This looks like a regular forward propagation of graph 1 | |
expected_out3: Dict[str, int] = {"a": 2, "b": 5, "ab(c)": 10, | |
"a_plus_ab(c)": 12, | |
"a_plus_ab(c)_cubed": 1728} | |
# | |
self.assertEqual(out3, expected_out3) | |
self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"]) | |
self.assertEqual(lc2.computed, []) # expect g2 to be completely out |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment