Skip to content

Instantly share code, notes, and snippets.

@jkuruzovich
Last active December 8, 2020 19:59
Show Gist options
  • Save jkuruzovich/7b5c7acf7f7958622ed11a594f0be4e5 to your computer and use it in GitHub Desktop.
Save jkuruzovich/7b5c7acf7f7958622ed11a594f0be4e5 to your computer and use it in GitHub Desktop.
Useful panel code
def generate_lag(dframe, by, target, periods):
    #print("Generating laged values for", target)
    for num in range(1, periods+1):
        dframe[target+'_lag'+str(num)]=dframe.groupby(by)[target].shift(num)
        dframe[target+'_lag'+str(num)].fillna(0, inplace=True)
    return dframe
    
dframe=pd.get_dummies(dframe, columns=[cf['LEVEL']], prefix=cf['LEVEL_PREFIX'], sparse=True)
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LassoCV, Ridge, RidgeCV, ElasticNet, Lasso
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.api import SimpleExpSmoothing
from statsmodels.tsa.api import Holt #Holtz Winter exponential smoothing
from statsmodels.tsa.api import ExponentialSmoothing
import statsmodels.formula.api as sm
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score  #Prediction error metrics
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LassoCV, Ridge, RidgeCV, ElasticNet, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from datetime import date
import pyodbc, os
from pathlib import Path
import pickle
from collections import OrderedDict
MODELS = OrderedDict()
MODELS["LinearRegression"]=LinearRegression()
MODELS["RidgeRegression"]=Ridge()
MODELS["LassoRegression"]=Lasso()
MODELS["LassoRegressionAlpha1.0"]=Lasso(alpha=1.0)
MODELS["LassoRegressionAlpha0.5"]=Lasso(alpha=0.5)
MODELS["LassoRegressionAlpha0.1"]=Lasso(alpha=0.5)
MODELS["ElasticNet"]=ElasticNet()
MODELS["DecisionTree"]=DecisionTreeRegressor()
MODELS["GradientBoostingRegression"]=GradientBoostingRegressor()
MODELS["RandomForrestRegressor"]=RandomForestRegressor()


def perform_modeling(cf, train_X, test_X, train_y, test_y, models = MODELS):
    train_predict=pd.DataFrame(index=train_X.index)
    test_predict=pd.DataFrame(index=test_y.index)
    #Include ground truth in prediction file.
    train_predict['target']=train_y
    test_predict['target']=test_y

    #Baseline lag 1
    baselinecol=cf['TARGET']+'_lag1'
    print("T-1 Baseline...")
    train_predict['baseline_lag1']=train_X[baselinecol]
    test_predict['baseline_lag1']=test_X[baselinecol]

    trained_models=[]

    for i, (name, model) in enumerate(MODELS.items()):
        print("Fitting model: ", name )
        m = model.fit(train_X, train_y)

        train_predict[name]=m.predict(train_X)

        test_predict[name]=m.predict(test_X)
        trained_models.append({name: m})

    return train_predict, test_predict, trained_models
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment