Skip to content

Instantly share code, notes, and snippets.

@chrisdiana
Last active Oct 6, 2020
Embed
What would you like to do?
Data Pipeline Function in Python
import pandas as pd
data = {'col1': [1, 2, 2], 'col2': [3, 4, 5], 'col3': [1, 2, 3]}
df = pd.DataFrame(data)
def clean(df):
return df[['col1', 'col2']]
def equals_2(df, col):
return df.loc[df[col] == 2]
df = (pipe(df,
clean,
(equals_2, 'col1')))
print(df)
# col1 col2
# 1 2 4
# 2 2 5
from functools import reduce, partial
def pipe(seed, *funcs):
"""
Data Pipe utility that pipes data from function to function in sequencial order.
Attributes:
seed (object): Input to apply to functions.
*funcs (func): Functions for the input to be passed into.
Use tuples for functions with parameters.
Example:
pipe({'foo': 'bar'}, func1, func2, (func3, 'test'))
"""
def return_partial(arg, func):
args = list(func)
args.insert(1, arg)
return partial(*tuple(args))()
return reduce(lambda arg, func: return_partial(arg, func)
if isinstance(func, tuple) else func(arg), funcs, seed)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment