Skip to content

Instantly share code, notes, and snippets.

@Abuton
Last active January 9, 2024 09:48
Show Gist options
  • Save Abuton/6d32e9d60b9726f4b31f979595f381bb to your computer and use it in GitHub Desktop.
Save Abuton/6d32e9d60b9726f4b31f979595f381bb to your computer and use it in GitHub Desktop.
import pandas as pd
import os
import shutil
def extract(path: str = "s3://my_bucket_name/file0.parquet") -> pd.DataFrame:
df = pd.read_parquet(path)
return df
# remove error rows
def transform(df: pd.DataFrame) -> pd.DataFrame:
df_clean = df[df['customerId'] != 'A']
return df_clean
def load_dwh(df: pd.DataFrame, output_location: str) -> None:
if os.path.exists(output_location) == True:
# removes the entire folder
shutil.rmtree(output_location)
os.makedir(output_location)
df.to_csv(filename.csv)
else:
df.to_csv(filename.csv)
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment