Skip to content

Instantly share code, notes, and snippets.

@jmquintana79
Last active June 21, 2024 13:34
Show Gist options
  • Save jmquintana79/231273072f9a8f10fc72881d05a6f54b to your computer and use it in GitHub Desktop.
Save jmquintana79/231273072f9a8f10fc72881d05a6f54b to your computer and use it in GitHub Desktop.
import pandas as pd
## unstack a timeseries target variable according to a categorical reference column
def unstack_ts_according_to_reference(df:pd.DataFrame, c_dt:str, c_cat_reference:str, c_target_variable:str)->pd.DataFrame:
"""
Unstack a timeseries target variable according to a categorical reference column.
df -- Dataframe to be processed.
c_dt -- Temporal column.
c_cat_reference -- Categorical column to be used as reference to stack the target variable.
c_target_variable -- Num / Cat column to be stacked.
return -- stacked dataframe(index -- values of c_dt / column names -- values of c_cat_reference)
"""
# validate arguments
assert isinstance(df, pd.DataFrame) and len(df)>0
assert c_dt in df.columns.tolist()
assert c_cat_reference in df.columns.tolist()
assert df[c_cat_reference].dtype.kind is 'O'
assert c_target_variable in df.columns.tolist()
# unstack
temp = df[[c_dt, c_cat_reference, c_target_variable]].set_index([c_dt, c_cat_reference]).unstack()
# reset column names
temp.columns = temp.columns.get_level_values(1)
# return
return temp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment