gnn/caching.py

## caching.py
"""Illustrate some strategies to cache potentially expensive results.

Sometimes results are obtained via code which has side effects and/or is
potentially expensive so you don't want to run it more often than
necessary. This script demonstrates a few different ways of caching
those results at the use site, so they only need to be generated once
and can be read from whatever is used as a cache afterwards.
"""

# First we need a function having an observable side effect, so we can
# test that our caching strategies actually work and the function isn't
# called more than once.
def minus(caller):
    print(f"From `{caller}`.")
    return -1


# First, let's create an example without caching so we can later
# illustrate that the side effect is actually triggered more than once.
def negate(x):
    return minus(caller="negate") * x


# Now that we have the counterexample out of the way, let's look at the
# first way of caching values generated and used inside a function. In
# short, we use an attribute on the function using the value to store it
# for later use.
def cache_via_attribute(x):
    if not hasattr(cache_via_attribute, "cache"):
        # For this simple example, we're just storing one value in the
        # attribute. But for more complex use cases, you could also use
        # a dictionary, a `dataclass` or objects as the cache.
        # Or you could use multiple appropriately named attributes.
        cache_via_attribute.cache = minus(caller="cache_via_attribute")
    return cache_via_attribute.cache * x


# A second way of caching is by using default values for function
# parameters. Since these are evaluated at function definition time,
# they are essentially evaluated at the same time as module global
# variables. At least for module global functions.
def cache_via_default_value(x, cache=minus(caller="cache_via_default")):
    return cache * x


# Note that you see the output of calling the test function in
# `cache_via_default` first, because this cache gets populated the first
# time, the containing module gets imported. That also brings us
# directly to the pros and cons of each approach. Caching via default
# parameter values can make it a bit unpredictable when the "expensive"
# function is actually called, because tracking import order is
# somewhere between hard and infeasible. Also, this approach generally
# means that the "expensive" function is called at least once (and
# probably on program startup), even if the function using the cached
# results is never actually called. On the plus side, doing it this way
# means that the `cache` can be easily overridden by manually supplying
# the parameter which would need additional work for the attribute
# approach:
def cache_via_overridable_attribute(x, cache=None):
    if (
        not hasattr(cache_via_overridable_attribute, "cache")
        and cache is not None
    ):
        cache_via_overridable_attribute.cache = minus(
            caller="cache_via_overridable_attribute"
        )
    cache = (
        cache if cache is not None else cache_via_overridable_attribute.cache
    )
    return cache * x


# The attribute approach is also the only way of possibly avoiding the
# creation of an additional function altogether, by inlining the
# expensive code into the cache creation.


# Last but not least, there's also the possibility to use
# `functools.cache` on the "expensive" function.
import functools


@functools.cache
def caching_minus(caller):
    print(f"From `{caller}`.")
    return -1


def cache_via_functools(x):
    return caching_minus("cache_via_functools") * x


# This is the easiest approach that still only evaluates the "expensive"
# code only once and only when it is needed. Manipulating the cache when
# calling the outer function is made even more complicated than in the
# attribute approach, though.


if __name__ == "__main__":
    # Now let's make sure, our caching implementations actually works as
    # expected. First, call the uncached version multiple times to see
    # that we can actually observe how often the "expensive" function
    # gets called.
    print("Uncached         :", [negate(x) for x in range(3)])

    # And now do the same with our cached versions, to make sure, we can
    # achieve the same results with only one call of the "expensive"
    # function.
    print("Via attribute    :", [cache_via_attribute(x) for x in range(3)])
    print("Via default value:", [cache_via_default_value(x) for x in range(3)])
    print("Via functools    :", [cache_via_functools(x) for x in range(3)])
	"""Illustrate some strategies to cache potentially expensive results.

	Sometimes results are obtained via code which has side effects and/or is
	potentially expensive so you don't want to run it more often than
	necessary. This script demonstrates a few different ways of caching
	those results at the use site, so they only need to be generated once
	and can be read from whatever is used as a cache afterwards.
	"""

	# First we need a function having an observable side effect, so we can
	# test that our caching strategies actually work and the function isn't
	# called more than once.
	def minus(caller):
	print(f"From `{caller}`.")
	return -1


	# First, let's create an example without caching so we can later
	# illustrate that the side effect is actually triggered more than once.
	def negate(x):
	return minus(caller="negate") * x


	# Now that we have the counterexample out of the way, let's look at the
	# first way of caching values generated and used inside a function. In
	# short, we use an attribute on the function using the value to store it
	# for later use.
	def cache_via_attribute(x):
	if not hasattr(cache_via_attribute, "cache"):
	# For this simple example, we're just storing one value in the
	# attribute. But for more complex use cases, you could also use
	# a dictionary, a `dataclass` or objects as the cache.
	# Or you could use multiple appropriately named attributes.
	cache_via_attribute.cache = minus(caller="cache_via_attribute")
	return cache_via_attribute.cache * x


	# A second way of caching is by using default values for function
	# parameters. Since these are evaluated at function definition time,
	# they are essentially evaluated at the same time as module global
	# variables. At least for module global functions.
	def cache_via_default_value(x, cache=minus(caller="cache_via_default")):
	return cache * x


	# Note that you see the output of calling the test function in
	# `cache_via_default` first, because this cache gets populated the first
	# time, the containing module gets imported. That also brings us
	# directly to the pros and cons of each approach. Caching via default
	# parameter values can make it a bit unpredictable when the "expensive"
	# function is actually called, because tracking import order is
	# somewhere between hard and infeasible. Also, this approach generally
	# means that the "expensive" function is called at least once (and
	# probably on program startup), even if the function using the cached
	# results is never actually called. On the plus side, doing it this way
	# means that the `cache` can be easily overridden by manually supplying
	# the parameter which would need additional work for the attribute
	# approach:
	def cache_via_overridable_attribute(x, cache=None):
	if (
	not hasattr(cache_via_overridable_attribute, "cache")
	and cache is not None
	):
	cache_via_overridable_attribute.cache = minus(
	caller="cache_via_overridable_attribute"
	)
	cache = (
	cache if cache is not None else cache_via_overridable_attribute.cache
	)
	return cache * x


	# The attribute approach is also the only way of possibly avoiding the
	# creation of an additional function altogether, by inlining the
	# expensive code into the cache creation.


	# Last but not least, there's also the possibility to use
	# `functools.cache` on the "expensive" function.
	import functools


	@functools.cache
	def caching_minus(caller):
	print(f"From `{caller}`.")
	return -1


	def cache_via_functools(x):
	return caching_minus("cache_via_functools") * x


	# This is the easiest approach that still only evaluates the "expensive"
	# code only once and only when it is needed. Manipulating the cache when
	# calling the outer function is made even more complicated than in the
	# attribute approach, though.


	if __name__ == "__main__":
	# Now let's make sure, our caching implementations actually works as
	# expected. First, call the uncached version multiple times to see
	# that we can actually observe how often the "expensive" function
	# gets called.
	print("Uncached :", [negate(x) for x in range(3)])

	# And now do the same with our cached versions, to make sure, we can
	# achieve the same results with only one call of the "expensive"
	# function.
	print("Via attribute :", [cache_via_attribute(x) for x in range(3)])
	print("Via default value:", [cache_via_default_value(x) for x in range(3)])
	print("Via functools :", [cache_via_functools(x) for x in range(3)])