andres-fr/graphkit_utest.py

## graphkit_utest.py
# -*- coding:utf-8 -*-


"""
This module contains test cases regarding usage of Yahoo's ``graphkit``
 library for DAG-based computation: https://github.com/yahoo/graphkit
"""


import unittest
from typing import List, Dict
#
import graphkit
from graphkit.modifiers import optional


class LoggedCalculator:
    """
    Performs some arithmetic calculations and keeps track of them
    """

    def __init__(self):
        self.computed: List[str] = []

    def constant_one(self, *args, **kwargs) -> float:
        self.computed.append("c1")
        return 1

    def sum(self, a: float, b: float) -> float:
        self.computed.append("sum")
        return a + b

    def mul(self, a: float, b: float, c: float = 1) -> float:
        """
        .. note::

          The default parameters like ``c`` shall become
          ``graphkit.modifiers.optional`` entries with the same name
        """
        self.computed.append("mul")
        return a * b * c

    def abspow(self, a: float, p: float) -> float:
        self.computed.append("abspow")
        return abs(a) ** p


class GraphkitBasicTestCase(unittest.TestCase):
    """
    Tests basic functionality:
    * Operation and graph creation
    * Full and partial forward propagation
    * laziness
    * Representing optional parameters as optional nodes
    """

    def setUp(self) -> None:
        """
        Create a simple computational graph using ops from LoggedCalculator::
          abs(a + (a * b *c?)) ** 3
        """
        self.lc: LoggedCalculator = LoggedCalculator()
        # note the optional node "c" wich has to be named like the kwarg
        mul1 = graphkit.operation(name="mul1", needs=["a", "b",
                                                      optional("c")],
                                  provides=["ab(c)"])(self.lc.mul)
        sum1 = graphkit.operation(name="sum1", needs=["a", "ab(c)"],
                                  provides=["a_plus_ab(c)"])(self.lc.sum)
        ap1 = graphkit.operation(name="abspow1", needs=["a_plus_ab(c)"],
                                 params={"p": 3},
                                 provides=["a_plus_ab(c)_cubed"])(
                                     self.lc.abspow)
        # Ordering in operation collection doesn't matter
        # needs/provides takes care of that
        self.graph = graphkit.compose(name="test_graph")(sum1, mul1, ap1)

    def test_full_forward(self) -> None:
        """
        Perform a full forward propagation and check that methods are called
        each once and in order.
        """
        out: Dict[str, int] = self.graph({"a": 2, "b": 5})
        expected_out: Dict[str, int] = {"a": 2, "b": 5, "ab(c)": 10,
                                        "a_plus_ab(c)": 12,
                                        "a_plus_ab(c)_cubed": 1728}
        #
        self.assertEqual(out, expected_out)
        self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"])

    def test_partial_forward(self) -> None:
        """
        Perform a partial forward propagation and check that only needed
        methods are computed
        """
        out: Dict[str, int] = self.graph({"a": 2, "b": 5},
                                         outputs=["a_plus_ab(c)"])
        expected_out: Dict[str, int] = {"a_plus_ab(c)": 12}
        #
        self.assertEqual(out, expected_out)
        self.assertEqual(self.lc.computed, ["mul", "sum"])

    def test_laziness(self) -> None:
        """

        Perform a partial forward, check methods, then compute further
        and check that no duplicated computation was needed
        """
        out1: Dict[str, int] = self.graph({"a": 2, "b": 5},
                                          outputs=["a_plus_ab(c)"])
        # provide the out1 as input ot the same graph and ask further
        out2: Dict[str, int] = self.graph(out1,
                                          outputs=["a_plus_ab(c)_cubed"])
        # Each op was only computed once
        self.assertEqual(out2["a_plus_ab(c)_cubed"], 1728)
        self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"])

    def test_optional(self) -> None:
        """
        Like ``test_full_forward`` but providing optional node ``c``
        """
        out: Dict[str, int] = self.graph({"a": 2, "b": 5, "c": 0})
        expected_out: Dict[str, int] = {"a": 2, "b": 5, "c": 0, "ab(c)": 0,
                                        "a_plus_ab(c)": 2,
                                        "a_plus_ab(c)_cubed": 8}
        #
        self.assertEqual(out, expected_out)
        self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"])


class GraphkitMergeTestCase(unittest.TestCase):
    """
    Tests multi-graph built-in functionality:
    * Add nodes to existing graph
    * Merge different graphs with common nodes
    """

    def setUp(self) -> None:
        """
        Create a simple computational graph using ops from LoggedCalculator::
          abs(a + (a * b *c?)) ** 3
        """
        self.lc: LoggedCalculator = LoggedCalculator()
        # note the optional node "c" wich has to be named like the kwarg
        mul1 = graphkit.operation(name="mul1", needs=["a", "b",
                                                      optional("c")],
                                  provides=["ab(c)"])(self.lc.mul)
        sum1 = graphkit.operation(name="sum1", needs=["a", "ab(c)"],
                                  provides=["a_plus_ab(c)"])(self.lc.sum)
        ap1 = graphkit.operation(name="abspow1", needs=["a_plus_ab(c)"],
                                 params={"p": 3},
                                 provides=["a_plus_ab(c)_cubed"])(
                                     self.lc.abspow)
        # Ordering in operation collection doesn't matter
        # needs/provides takes care of that
        self.graph = graphkit.compose(name="test_graph")(sum1, mul1, ap1)

    def test_add_single_op(self) -> None:
        """
        Add an operation to the setup graph and perform a full forward
        propagation.
        """
        # Add an extra OP to self.graph:
        sum2 = graphkit.operation(name="sum2",
                                  needs=["a_plus_ab(c)_cubed", "x"],
                                  provides="tg_plus_x")(self.lc.sum)
        self.graph2 = graphkit.compose(name="bigger_graph")(self.graph, sum2)
        #
        out: Dict[str, int] = self.graph2({"a": 2, "b": 5, "x": 1000})
        expected_out: Dict[str, int] = {"a": 2, "b": 5, "x": 1000, "ab(c)": 10,
                                        "a_plus_ab(c)": 12,
                                        "a_plus_ab(c)_cubed": 1728,
                                        "tg_plus_x": 2728}
        #
        self.assertEqual(out, expected_out)
        self.assertEqual(self.lc.computed, ["mul", "sum", "abspow", "sum"])

    def test_add_name_collisions(self) -> None:
        """
        Compose a graph with some common input and throughput names and observe
        the following result:

        1. Parameters named equal have to be fed with the same value in the
           input dict (see "a" vs. "b, b2").
        2. If two throughput nodes have the same name, the dependencies are
          respected, and both operations are performed separately (i.e.
          operations on the first will use the first, etc), but only one of
          them will figure in the output dict (the one defined last in the case
          of this test).

        .. note::
          Due to these reasons best is to avoid naming collisions using
          namespaces.
        """
        # make a second graph with its separate logger. note how "a" is shared
        # with self.graph but "b2" is different
        lc2 = LoggedCalculator()
        mul2 = graphkit.operation(name="mul2", needs=["a", "b2",
                                                      optional("c")],
                                  provides=["ab(c)"])(lc2.mul)
        sum2 = graphkit.operation(name="sum2",
                                  needs=["ab(c)", "w"],
                                  provides="w_plus_ab(c)")(lc2.sum)
        graph2 = graphkit.compose(name="graph2")(mul2, sum2)

        # merge both graphs. As you can see expected is "ab(c)": 12,
        # so the output
        graph3 = graphkit.compose(name="graph3")(self.graph, graph2)
        out3: Dict[str, int] = graph3({"a": 2, "b": 5, "b2": 1000, "w": 1})
        expected_out3: Dict[str, int] = {"a": 2, "b": 5, "b2": 1000, "w": 1,
                                         "ab(c)": 2000,
                                         # NOTE how this is 12 not 2002
                                         "a_plus_ab(c)": 12,
                                         "a_plus_ab(c)_cubed": 1728,
                                         # BUT this is 2001
                                         "w_plus_ab(c)": 2001}
        self.assertEqual(out3, expected_out3)
        self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"])
        self.assertEqual(lc2.computed, ["mul", "sum"])

    def test_graph_merge_ok(self) -> None:
        """
        Graph merging is based on operation name: operations in
        different graphs with same name will be attempted to be merged,
        **irrespectively of differing inputs and outputs and differing
        functionality**.

        .. note::

          The merge ordering matters: all the nodes of the first graph are
          kept, and for the subsequent graphs, "merging" effectively means
          using the preexisting node and discarding the latter one (assuming
          they have identical interface).

        This method tests the merging behaviour with straightforward settings:
        A graph also in the form ``abs(a + (a * b * c?)) ** 2`` is defined
        almost identically as ``self.graph``, the abspow being named and
        parametrized differently.
        """
        # make a second graph. The operations are performed by a separate
        # LoggedCalculator. See how "sum1" gets factored out and no error
        # arises regardless of its unique interface and operation
        lc2: LoggedCalculator = LoggedCalculator()
        mul2 = graphkit.operation(name="mul1", needs=["a", "b",
                                                      optional("c")],
                                  provides=["ab(c)"])(lc2.mul)
        sum2 = graphkit.operation(name="sum1", needs=["WHATEVER", "ASDF", "?"],
                                  provides=["FOO"])(lc2.constant_one)
        ap2 = graphkit.operation(name="abspow2", needs=["a_plus_ab(c)"],
                                 params={"p": 2},
                                 provides=["a_plus_ab(c)_squared"])(lc2.abspow)
        graph2 = graphkit.compose(name="graph2")(mul2, sum2, ap2)

        # merge both graphs
        graph3 = graphkit.compose(name="graph3", merge=True)(self.graph,
                                                             graph2)
        out3: Dict[str, int] = graph3({"a": 2, "b": 5})
        expected_out3: Dict[str, int] = {"a": 2, "b": 5, "ab(c)": 10,
                                         "a_plus_ab(c)": 12,
                                         "a_plus_ab(c)_cubed": 1728,
                                         "a_plus_ab(c)_squared": 144}
        #
        self.assertEqual(out3, expected_out3)
        self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"])
        self.assertEqual(lc2.computed, ["abspow"])  # the only g2 contribution

    def test_graph_merge_dropping(self) -> None:
        """
        This method tests the merging behaviour when node2 gets merged
        with node1 with a different interface, and node3 depends on node2.
        We observe the following:

        * The output of node2 can't be reached through the graph
        * node3 will be dropped from the graph with no error reported
        * No graph2 computation will be performed by the merged graph
        """
        # create graph2 with breaking interface
        lc2: LoggedCalculator = LoggedCalculator()
        mul2 = graphkit.operation(name="mul1", needs=["a", "b",
                                                      optional("c")],
                                  provides=["ab(c)"])(lc2.mul)
        # note the difference: provides "some_sum"
        sum2 = graphkit.operation(name="sum1", needs=["a", "ab(c)"],
                                  provides=["some_sum"])(lc2.sum)
        # this node depends on "some_sum"
        ap2 = graphkit.operation(name="abspow2", needs=["some_sum"],
                                 params={"p": 2},
                                 provides=["one_squared"])(lc2.abspow)
        graph2 = graphkit.compose(name="graph2")(mul2, sum2, ap2)
        # merge both graphs
        graph3 = graphkit.compose(name="graph3", merge=True)(self.graph,
                                                             graph2)
        out3: Dict[str, int] = graph3({"a": 2, "b": 5})

        # "one_squared" is not present because abspow2 was dropped
        # This looks like a regular forward propagation of graph 1
        expected_out3: Dict[str, int] = {"a": 2, "b": 5, "ab(c)": 10,
                                         "a_plus_ab(c)": 12,
                                         "a_plus_ab(c)_cubed": 1728}
        #
        self.assertEqual(out3, expected_out3)
        self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"])
        self.assertEqual(lc2.computed, [])  # expect g2 to be completely out
	# -- coding:utf-8 --


	"""
	This module contains test cases regarding usage of Yahoo's ``graphkit``
	library for DAG-based computation: https://github.com/yahoo/graphkit
	"""


	import unittest
	from typing import List, Dict
	#
	import graphkit
	from graphkit.modifiers import optional


	class LoggedCalculator:
	"""
	Performs some arithmetic calculations and keeps track of them
	"""

	def __init__(self):
	self.computed: List[str] = []

	def constant_one(self, args, *kwargs) -> float:
	self.computed.append("c1")
	return 1

	def sum(self, a: float, b: float) -> float:
	self.computed.append("sum")
	return a + b

	def mul(self, a: float, b: float, c: float = 1) -> float:
	"""
	.. note::

	The default parameters like ``c`` shall become
	``graphkit.modifiers.optional`` entries with the same name
	"""
	self.computed.append("mul")
	return a * b * c

	def abspow(self, a: float, p: float) -> float:
	self.computed.append("abspow")
	return abs(a) ** p


	class GraphkitBasicTestCase(unittest.TestCase):
	"""
	Tests basic functionality:
	* Operation and graph creation
	* Full and partial forward propagation
	* laziness
	* Representing optional parameters as optional nodes
	"""

	def setUp(self) -> None:
	"""
	Create a simple computational graph using ops from LoggedCalculator::
	abs(a + (a * b c?)) * 3
	"""
	self.lc: LoggedCalculator = LoggedCalculator()
	# note the optional node "c" wich has to be named like the kwarg
	mul1 = graphkit.operation(name="mul1", needs=["a", "b",
	optional("c")],
	provides=["ab(c)"])(self.lc.mul)
	sum1 = graphkit.operation(name="sum1", needs=["a", "ab(c)"],
	provides=["a_plus_ab(c)"])(self.lc.sum)
	ap1 = graphkit.operation(name="abspow1", needs=["a_plus_ab(c)"],
	params={"p": 3},
	provides=["a_plus_ab(c)_cubed"])(
	self.lc.abspow)
	# Ordering in operation collection doesn't matter
	# needs/provides takes care of that
	self.graph = graphkit.compose(name="test_graph")(sum1, mul1, ap1)

	def test_full_forward(self) -> None:
	"""
	Perform a full forward propagation and check that methods are called
	each once and in order.
	"""
	out: Dict[str, int] = self.graph({"a": 2, "b": 5})
	expected_out: Dict[str, int] = {"a": 2, "b": 5, "ab(c)": 10,
	"a_plus_ab(c)": 12,
	"a_plus_ab(c)_cubed": 1728}
	#
	self.assertEqual(out, expected_out)
	self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"])

	def test_partial_forward(self) -> None:
	"""
	Perform a partial forward propagation and check that only needed
	methods are computed
	"""
	out: Dict[str, int] = self.graph({"a": 2, "b": 5},
	outputs=["a_plus_ab(c)"])
	expected_out: Dict[str, int] = {"a_plus_ab(c)": 12}
	#
	self.assertEqual(out, expected_out)
	self.assertEqual(self.lc.computed, ["mul", "sum"])

	def test_laziness(self) -> None:
	"""

	Perform a partial forward, check methods, then compute further
	and check that no duplicated computation was needed
	"""
	out1: Dict[str, int] = self.graph({"a": 2, "b": 5},
	outputs=["a_plus_ab(c)"])
	# provide the out1 as input ot the same graph and ask further
	out2: Dict[str, int] = self.graph(out1,
	outputs=["a_plus_ab(c)_cubed"])
	# Each op was only computed once
	self.assertEqual(out2["a_plus_ab(c)_cubed"], 1728)
	self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"])

	def test_optional(self) -> None:
	"""
	Like ``test_full_forward`` but providing optional node ``c``
	"""
	out: Dict[str, int] = self.graph({"a": 2, "b": 5, "c": 0})
	expected_out: Dict[str, int] = {"a": 2, "b": 5, "c": 0, "ab(c)": 0,
	"a_plus_ab(c)": 2,
	"a_plus_ab(c)_cubed": 8}
	#
	self.assertEqual(out, expected_out)
	self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"])


	class GraphkitMergeTestCase(unittest.TestCase):
	"""
	Tests multi-graph built-in functionality:
	* Add nodes to existing graph
	* Merge different graphs with common nodes
	"""

	def setUp(self) -> None:
	"""
	Create a simple computational graph using ops from LoggedCalculator::
	abs(a + (a * b c?)) * 3
	"""
	self.lc: LoggedCalculator = LoggedCalculator()
	# note the optional node "c" wich has to be named like the kwarg
	mul1 = graphkit.operation(name="mul1", needs=["a", "b",
	optional("c")],
	provides=["ab(c)"])(self.lc.mul)
	sum1 = graphkit.operation(name="sum1", needs=["a", "ab(c)"],
	provides=["a_plus_ab(c)"])(self.lc.sum)
	ap1 = graphkit.operation(name="abspow1", needs=["a_plus_ab(c)"],
	params={"p": 3},
	provides=["a_plus_ab(c)_cubed"])(
	self.lc.abspow)
	# Ordering in operation collection doesn't matter
	# needs/provides takes care of that
	self.graph = graphkit.compose(name="test_graph")(sum1, mul1, ap1)

	def test_add_single_op(self) -> None:
	"""
	Add an operation to the setup graph and perform a full forward
	propagation.
	"""
	# Add an extra OP to self.graph:
	sum2 = graphkit.operation(name="sum2",
	needs=["a_plus_ab(c)_cubed", "x"],
	provides="tg_plus_x")(self.lc.sum)
	self.graph2 = graphkit.compose(name="bigger_graph")(self.graph, sum2)
	#
	out: Dict[str, int] = self.graph2({"a": 2, "b": 5, "x": 1000})
	expected_out: Dict[str, int] = {"a": 2, "b": 5, "x": 1000, "ab(c)": 10,
	"a_plus_ab(c)": 12,
	"a_plus_ab(c)_cubed": 1728,
	"tg_plus_x": 2728}
	#
	self.assertEqual(out, expected_out)
	self.assertEqual(self.lc.computed, ["mul", "sum", "abspow", "sum"])

	def test_add_name_collisions(self) -> None:
	"""
	Compose a graph with some common input and throughput names and observe
	the following result:

	1. Parameters named equal have to be fed with the same value in the
	input dict (see "a" vs. "b, b2").
	2. If two throughput nodes have the same name, the dependencies are
	respected, and both operations are performed separately (i.e.
	operations on the first will use the first, etc), but only one of
	them will figure in the output dict (the one defined last in the case
	of this test).

	.. note::
	Due to these reasons best is to avoid naming collisions using
	namespaces.
	"""
	# make a second graph with its separate logger. note how "a" is shared
	# with self.graph but "b2" is different
	lc2 = LoggedCalculator()
	mul2 = graphkit.operation(name="mul2", needs=["a", "b2",
	optional("c")],
	provides=["ab(c)"])(lc2.mul)
	sum2 = graphkit.operation(name="sum2",
	needs=["ab(c)", "w"],
	provides="w_plus_ab(c)")(lc2.sum)
	graph2 = graphkit.compose(name="graph2")(mul2, sum2)

	# merge both graphs. As you can see expected is "ab(c)": 12,
	# so the output
	graph3 = graphkit.compose(name="graph3")(self.graph, graph2)
	out3: Dict[str, int] = graph3({"a": 2, "b": 5, "b2": 1000, "w": 1})
	expected_out3: Dict[str, int] = {"a": 2, "b": 5, "b2": 1000, "w": 1,
	"ab(c)": 2000,
	# NOTE how this is 12 not 2002
	"a_plus_ab(c)": 12,
	"a_plus_ab(c)_cubed": 1728,
	# BUT this is 2001
	"w_plus_ab(c)": 2001}
	self.assertEqual(out3, expected_out3)
	self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"])
	self.assertEqual(lc2.computed, ["mul", "sum"])

	def test_graph_merge_ok(self) -> None:
	"""
	Graph merging is based on operation name: operations in
	different graphs with same name will be attempted to be merged,
	**irrespectively of differing inputs and outputs and differing
	functionality**.

	.. note::

	The merge ordering matters: all the nodes of the first graph are
	kept, and for the subsequent graphs, "merging" effectively means
	using the preexisting node and discarding the latter one (assuming
	they have identical interface).

	This method tests the merging behaviour with straightforward settings:
	A graph also in the form ``abs(a + (a * b * c?)) ** 2`` is defined
	almost identically as ``self.graph``, the abspow being named and
	parametrized differently.
	"""
	# make a second graph. The operations are performed by a separate
	# LoggedCalculator. See how "sum1" gets factored out and no error
	# arises regardless of its unique interface and operation
	lc2: LoggedCalculator = LoggedCalculator()
	mul2 = graphkit.operation(name="mul1", needs=["a", "b",
	optional("c")],
	provides=["ab(c)"])(lc2.mul)
	sum2 = graphkit.operation(name="sum1", needs=["WHATEVER", "ASDF", "?"],
	provides=["FOO"])(lc2.constant_one)
	ap2 = graphkit.operation(name="abspow2", needs=["a_plus_ab(c)"],
	params={"p": 2},
	provides=["a_plus_ab(c)_squared"])(lc2.abspow)
	graph2 = graphkit.compose(name="graph2")(mul2, sum2, ap2)

	# merge both graphs
	graph3 = graphkit.compose(name="graph3", merge=True)(self.graph,
	graph2)
	out3: Dict[str, int] = graph3({"a": 2, "b": 5})
	expected_out3: Dict[str, int] = {"a": 2, "b": 5, "ab(c)": 10,
	"a_plus_ab(c)": 12,
	"a_plus_ab(c)_cubed": 1728,
	"a_plus_ab(c)_squared": 144}
	#
	self.assertEqual(out3, expected_out3)
	self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"])
	self.assertEqual(lc2.computed, ["abspow"]) # the only g2 contribution

	def test_graph_merge_dropping(self) -> None:
	"""
	This method tests the merging behaviour when node2 gets merged
	with node1 with a different interface, and node3 depends on node2.
	We observe the following:

	* The output of node2 can't be reached through the graph
	* node3 will be dropped from the graph with no error reported
	* No graph2 computation will be performed by the merged graph
	"""
	# create graph2 with breaking interface
	lc2: LoggedCalculator = LoggedCalculator()
	mul2 = graphkit.operation(name="mul1", needs=["a", "b",
	optional("c")],
	provides=["ab(c)"])(lc2.mul)
	# note the difference: provides "some_sum"
	sum2 = graphkit.operation(name="sum1", needs=["a", "ab(c)"],
	provides=["some_sum"])(lc2.sum)
	# this node depends on "some_sum"
	ap2 = graphkit.operation(name="abspow2", needs=["some_sum"],
	params={"p": 2},
	provides=["one_squared"])(lc2.abspow)
	graph2 = graphkit.compose(name="graph2")(mul2, sum2, ap2)
	# merge both graphs
	graph3 = graphkit.compose(name="graph3", merge=True)(self.graph,
	graph2)
	out3: Dict[str, int] = graph3({"a": 2, "b": 5})

	# "one_squared" is not present because abspow2 was dropped
	# This looks like a regular forward propagation of graph 1
	expected_out3: Dict[str, int] = {"a": 2, "b": 5, "ab(c)": 10,
	"a_plus_ab(c)": 12,
	"a_plus_ab(c)_cubed": 1728}
	#
	self.assertEqual(out3, expected_out3)
	self.assertEqual(self.lc.computed, ["mul", "sum", "abspow"])
	self.assertEqual(lc2.computed, []) # expect g2 to be completely out