Last active
August 11, 2022 02:25
-
-
Save kvnkho/ce1dcf0b6bee9957db4c7232af989153 to your computer and use it in GitHub Desktop.
Ploomber with Fugue on Spark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from fugue import transform | |
from sklearn.preprocessing import minmax_scale | |
# %% tags=["parameters"] | |
# declare a list tasks whose products you want to use as inputs | |
upstream = ["extract"] | |
product = None | |
engine = None | |
# %% | |
def normalize(df: pd.DataFrame) -> pd.DataFrame: | |
return df.assign(scaled=minmax_scale(df["col2"])) | |
transform(upstream["extract"]["data"], | |
normalize, | |
schema="*,scaled:float", | |
partition={"by":"col1"}, | |
engine=engine, | |
save_path=product["data"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment