Skip to content

Instantly share code, notes, and snippets.

@soumilshah1995
Forked from alcheng10/Date_Dimension.md
Created December 16, 2022 01:41
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save soumilshah1995/56948c4dac389e48be08b51c5742b02b to your computer and use it in GitHub Desktop.
Save soumilshah1995/56948c4dac389e48be08b51c5742b02b to your computer and use it in GitHub Desktop.
Python (Pandas) function that creates a dimension date with workday flag
import pandas as pd
import holidays

def create_dim_date(start_date, end_date):
    '''
    Create Dimension Date in Pandas
    
    :return df_date : DataFrame
    '''
    from pandas.tseries.offsets import MonthEnd, QuarterEnd

    # Construct DIM Date Dataframe
    df_date = pd.DataFrame({"Date": pd.date_range(start=f'{start_date}', end=f'{end_date}', freq='D')})

    def get_end_of_month(pd_date):
        if pd_date.is_month_end == True:
            return pd_date
        else:
            return pd_date + MonthEnd(1)

    def get_end_of_quarter(pd_date):
        if pd_date.is_quarter_end == True:
            return pd_date
        else:
            return pd_date + QuarterEnd(1)

    # Add in attributes
    df_date["Day"] = df_date.Date.dt.weekday_name
    df_date["Week"] = df_date.Date.dt.weekofyear
    df_date["Month"] = df_date.Date.dt.month
    df_date["Quarter"] = df_date.Date.dt.quarter
    df_date["Year"] = df_date.Date.dt.year
    df_date["Fiscal_Year"] = df_date['Date'].dt.to_period('A-JUN')
    df_date['EndOfMonth'] = df_date['Date'].apply(get_end_of_month)
    df_date['EOM_YN'] = df_date['Date'].dt.is_month_end
    df_date['EndOfQuarter'] = df_date['Date'].apply(get_end_of_quarter)
    df_date['EOQ_YN'] = df_date['Date'].dt.is_quarter_end

    return df_date


def create_dim_date_with_workday(start_date, end_date):
    '''
    Creates a dimension date that has workday_YN column.

    Uses Australia NSW public holidays.

    :return df_date : DataFrame
    '''

    df_date = create_dim_date(start_date, end_date)
    
    # Start with default that it is workday
    df_date['Workday_YN'] = True
    
    # Exclude public holidays in NSW
    for index, row in df_date.iterrows():
        if row['Day'] in ['Saturday', 'Sunday']:
            df_date.loc[index, 'Workday_YN'] = False
        date = row['Date'].strftime("%Y-%m-%d")
        if date in holidays.Australia(prov='NSW'):
            df_date.loc[index, 'Workday_YN'] = False

    return df_date
    
    
# To use, just call as create_dim_date_with_workday('2018-01-01', '2018-12-31')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment