Last active
September 6, 2022 17:49
-
-
Save skritch/d184ad58687afba911a5ec342e0c2a78 to your computer and use it in GitHub Desktop.
kedro_f_to_node.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import inspect | |
import re | |
from typing import Callable, Union, Optional | |
import types | |
from kedro.pipeline import Pipeline, node | |
from kedro.pipeline.node import Node | |
CAMEL_TO_SNAKE_RE = re.compile(r'(?<!^)(?=[A-Z])') | |
def camel_to_snake(s: str) -> str: | |
return CAMEL_TO_SNAKE_RE.sub('_', s).lower() | |
def f_to_node(f: Callable, output_names: Optional[Union[str, list[str]]] = None) -> Node: | |
sig = inspect.signature(f) | |
if output_names is None: | |
r = sig.return_annotation | |
assert r != inspect._empty, "Cannot generate a node from a function without a declared return type" | |
assert r not in (types.GenericAlias, types.UnionType, types.EllipsisType, types.GeneratorType, types.AsyncGeneratorType, types.CoroutineType), \ | |
f'Unsupported return type {r} for a node' | |
assert r.__module__ != 'builtins' | |
# TODO: multiple outputs | |
output_names = [camel_to_snake(r.__name__)] | |
# Handle inputs types | |
assert not any(p.kind == inspect.Parameter.POSITIONAL_ONLY or p.annotation == inspect._empty | |
for p in sig.parameters.values()), \ | |
'Cannot use unannotated or positional-only parameters in a recipe function' | |
# TODO: use arg name for all library-code type annotations | |
input_names = [ | |
camel_to_snake(param.annotation.__name__) if param.annotation.__module__ != 'builtins' else name | |
for name, param in sig.parameters.items() | |
] | |
return node( | |
func=f, | |
inputs=input_names, | |
outputs=output_names, | |
name=f.__name__ | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This converts a function into a Kedro node automatically.
The either
output_names
should be provided, or the output should have a type annotation, and this won't be useful unless the output type is unique—so if the output is something generic likepd.Series
, wrap it in a more meaningfully-named class.Same for the inputs, but the input argument name will be used if the type is a builtin. (would be good to make this apply to ALL non-user-code modules.)