Skip to content

Instantly share code, notes, and snippets.

@homodigitus
Created January 22, 2022 09:17
Show Gist options
  • Save homodigitus/16120b29bcc52a85e37611a417400fd7 to your computer and use it in GitHub Desktop.
Save homodigitus/16120b29bcc52a85e37611a417400fd7 to your computer and use it in GitHub Desktop.
var model functions
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from stargazer.stargazer import Stargazer
from arch.unitroot import ADF
from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.tsa.vector_ar.vecm import coint_johansen
# augmented dickey fuller test
def adf_test(df, lags=None, trend='c', max_lags=None, method='AIC', low_memory=None):
"""
Parameters
----------
data : {dataframe}
The data to test for a unit root
trend : {'nc', 'c', 'ct', 'ctt'}, optional
The trend component to include in the ADF test
'nc' - No trend components
'c' - Include a constant (Default)
'ct' - Include a constant and linear time trend
'ctt' - Include a constant and linear and quadratic time trends
lags : int, optional
The number of lags to use in the ADF regression. If omitted or None,
`method` is used to automatically select the lag length with no more
than `max_lags` are included.
trend : {'nc', 'c', 'ct', 'ctt'}, optional
The trend component to include in the ADF test
'nc' - No trend components
'c' - Include a constant (Default)
'ct' - Include a constant and linear time trend
'ctt' - Include a constant and linear and quadratic time trends
max_lags : int, optional
The maximum number of lags to use when selecting lag length
method : {'AIC', 'BIC', 't-stat'}, optional
The method to use when selecting the lag length
'AIC' - Select the minimum of the Akaike IC
'BIC' - Select the minimum of the Schwarz/Bayesian IC
't-stat' - Select the minimum of the Schwarz/Bayesian IC
Return
------
report : {Stargazer}
A stargazer model report
"""
# variable names
variable_names = list(df.columns)
# list of ones
list_of_ones = [1] * len(variable_names)
# model list
model_list = []
for v in variable_names:
df_test = ADF(df[v], lags=lags, trend=trend, max_lags=max_lags, method=method, low_memory=low_memory)
df_test_reg = df_test.regression.model.fit()
model_list.append(df_test_reg)
# stargazer table
stargazer = Stargazer(model_list)
stargazer.custom_columns(variable_names, list_of_ones)
# add caption
if trend == 'nc':
stargazer.title("ADF test without constant and trend")
elif trend == 'c':
stargazer.title("ADF test with constant")
elif trend == 'ct':
stargazer.title("ADF test with constant and linear trend")
elif trend == 'ctt':
stargazer.title("ADF test with constant, linear trend and quadratic trend")
else:
stargazer.title('ADF test')
return stargazer
# granger causality matrix
def granger_causality_matrix(data, variables, test="ssr_chi2test", maxlag=None, verbose=False):
"""
Parameters
----------
data : {dataframe}
The data to test for a unit root
variables : {list}
The variables to test for granger causality
test : {'ssr_chi2test', 'ssr_ftest', 'lrtest', 'granger_causality', 'ljungbox'}, optional
The test to use when testing for granger causality
'ssr_chi2test' - The sum of squares test for granger causality
'ssr_ftest' - The F-test for granger causality
'lrtest' - The likelihood ratio test for granger causality
'granger_causality' - The granger causality test for granger causality
'ljungbox' - The ljung-box test for granger causality
maxlag : {int}, optional
The maximum lag to use when testing for granger causality
verbose : {bool}, optional
Whether or not to print the results of the test
Return
------
df : {dataframe}
A dataframe with the results of the test
"""
# create the granger causality matrix
df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
for c in df.columns:
for r in df.index:
test_result = grangercausalitytests(data[[r, c]], maxlag=maxlag, verbose=False)
p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
min_p_value = np.min(p_values)
df.loc[r, c] = min_p_value
df.columns = [var + '_x' for var in variables]
df.index = [var + '_y' for var in variables]
return df
# johansen cointegration test
def cointegration_test(df, alpha=0.05):
"""
Parameters
----------
df : {dataframe}
The data to test for a unit root
alpha : {float}
The significance level to use when testing for cointegration
Return
------
prints the results of the test
"""
# cointegration test
out = coint_johansen(df,-1,5)
d = {'0.90':0, '0.95':1, '0.99':2}
traces = out.lr1
cvts = out.cvt[:, d[str(1-alpha)]]
def adjust(val, length= 6):
return str(val).ljust(length)
# Summary
print('Name :: Test Stat > C(95%) => Signif \n', '--'*20)
for col, trace, cvt in zip(df.columns, traces, cvts):
print(adjust(col), ':: ', adjust(round(trace,2), 9), ">", adjust(cvt, 8), ' => ' , trace > cvt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment