Last active
January 9, 2024 09:48
-
-
Save Abuton/6d32e9d60b9726f4b31f979595f381bb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import os | |
import shutil | |
def extract(path: str = "s3://my_bucket_name/file0.parquet") -> pd.DataFrame: | |
df = pd.read_parquet(path) | |
return df | |
# remove error rows | |
def transform(df: pd.DataFrame) -> pd.DataFrame: | |
df_clean = df[df['customerId'] != 'A'] | |
return df_clean | |
def load_dwh(df: pd.DataFrame, output_location: str) -> None: | |
if os.path.exists(output_location) == True: | |
# removes the entire folder | |
shutil.rmtree(output_location) | |
os.makedir(output_location) | |
df.to_csv(filename.csv) | |
else: | |
df.to_csv(filename.csv) | |
return None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment