-
-
Save homodigitus/99b5529d79732b8cafeca3cf524b3c08 to your computer and use it in GitHub Desktop.
unit_root_tests
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from stargazer.stargazer import Stargazer | |
from arch.unitroot import ADF, DFGLS, KPSS, PhillipsPerron, ZivotAndrews, VarianceRatio | |
import pandas as pd | |
# dickey fuller test | |
def df_test(df, trend='c', low_memory=None): | |
""" | |
Parameters | |
---------- | |
data : {dataframe} | |
The data to test for a unit root | |
trend : {'nc', 'c', 'ct', 'ctt'}, optional | |
The trend component to include in the ADF test | |
'nc' - No trend components | |
'c' - Include a constant (Default) | |
'ct' - Include a constant and linear time trend | |
'ctt' - Include a constant and linear and quadratic time trends | |
Return | |
------ | |
report : {Stargazer} | |
A stargazer model report | |
""" | |
# variable names | |
variable_names = list(df.columns) | |
# list of ones | |
list_of_ones = [1] * len(variable_names) | |
# model list | |
model_list = [] | |
for v in variable_names: | |
df_test = ADF(df[v], trend=trend, low_memory=low_memory) | |
df_test_reg = df_test.regression.model.fit() | |
model_list.append(df_test_reg) | |
# stargazer table | |
stargazer = Stargazer(model_list) | |
stargazer.custom_columns(variable_names, list_of_ones) | |
# add caption | |
if trend == 'nc': | |
stargazer.title("DF test without constant and trend") | |
elif trend == 'c': | |
stargazer.title("DF test with constant") | |
elif trend == 'ct': | |
stargazer.title("DF test with constant and linear trend") | |
elif trend == 'ctt': | |
stargazer.title("DF test with constant, linear trend and quadratic trend") | |
else: | |
stargazer.title('DF test') | |
return stargazer | |
# augmented dickey fuller test | |
def adf_test(df, lags=None, trend='c', max_lags=None, method='AIC', low_memory=None): | |
""" | |
Parameters | |
---------- | |
data : {dataframe} | |
The data to test for a unit root | |
trend : {'nc', 'c', 'ct', 'ctt'}, optional | |
The trend component to include in the ADF test | |
'nc' - No trend components | |
'c' - Include a constant (Default) | |
'ct' - Include a constant and linear time trend | |
'ctt' - Include a constant and linear and quadratic time trends | |
lags : int, optional | |
The number of lags to use in the ADF regression. If omitted or None, | |
`method` is used to automatically select the lag length with no more | |
than `max_lags` are included. | |
trend : {'nc', 'c', 'ct', 'ctt'}, optional | |
The trend component to include in the ADF test | |
'nc' - No trend components | |
'c' - Include a constant (Default) | |
'ct' - Include a constant and linear time trend | |
'ctt' - Include a constant and linear and quadratic time trends | |
max_lags : int, optional | |
The maximum number of lags to use when selecting lag length | |
method : {'AIC', 'BIC', 't-stat'}, optional | |
The method to use when selecting the lag length | |
'AIC' - Select the minimum of the Akaike IC | |
'BIC' - Select the minimum of the Schwarz/Bayesian IC | |
't-stat' - Select the minimum of the Schwarz/Bayesian IC | |
Return | |
------ | |
report : {Stargazer} | |
A stargazer model report | |
""" | |
# variable names | |
variable_names = list(df.columns) | |
# list of ones | |
list_of_ones = [1] * len(variable_names) | |
# model list | |
model_list = [] | |
for v in variable_names: | |
df_test = ADF(df[v], lags=lags, trend=trend, max_lags=max_lags, method=method, low_memory=low_memory) | |
df_test_reg = df_test.regression.model.fit() | |
model_list.append(df_test_reg) | |
# stargazer table | |
stargazer = Stargazer(model_list) | |
stargazer.custom_columns(variable_names, list_of_ones) | |
# add caption | |
if trend == 'nc': | |
stargazer.title("ADF test without constant and trend") | |
elif trend == 'c': | |
stargazer.title("ADF test with constant") | |
elif trend == 'ct': | |
stargazer.title("ADF test with constant and linear trend") | |
elif trend == 'ctt': | |
stargazer.title("ADF test with constant, linear trend and quadratic trend") | |
else: | |
stargazer.title('ADF test') | |
return stargazer | |
# philips-perron test | |
def pp_test(df, lags=None, trend='c', test_type='tau', signif=0.05): | |
""" | |
Parameters | |
---------- | |
df : {dataframe} | |
The data to test for a unit root | |
trend : {'nc', 'c', 'ct'}, optional | |
The trend component to include in the ADF test | |
'c' - Include a constant (Default) | |
'ct' - Include a constant and linear time trend | |
lags : int, optional | |
The number of lags to use in the ADF regression. If omitted or None, | |
`method` is used to automatically select the lag length with no more | |
than `max_lags` are included. | |
signif: {float} | |
Significance level between 0.00 and 1.00. 0.01, 0.05 and 0.10 | |
are common values. | |
Return | |
------ | |
report : {dataframe} | |
A pandas dataframe model report | |
""" | |
# variable names | |
variable_names = list(df.columns) | |
# parameters to construct output table | |
null_hypothesis = f'Data has unit root. Non-Stationary.' | |
signif = signif | |
stationarity = "" | |
cv1 = "Critical value 1%" | |
cv5 = "Critical value 5%" | |
cv10 = "Critical value 10%" | |
# empty dataframe | |
empty_df = pd.DataFrame(columns=["Variable", "Null Hypothesis", "Significance", | |
cv1, cv5, cv10, "Test Statistic", "No. Lags Chosen", | |
"P-Value", "Stationarity"]) | |
for variable in variable_names: | |
r = PhillipsPerron(df[variable], lags=None, trend='c', test_type='tau') | |
p_value = round(r.pvalue, 4) | |
if p_value <= signif: | |
stationarity = "Series is Stationary." | |
else: | |
stationarity = "Series is Non-Stationary." | |
rr = {'Variable': variable, | |
'Null Hypothesis': null_hypothesis, | |
'Significance': signif, | |
'Critical value 1%':round(r.critical_values['1%'], 4), | |
'Critical value 5%':round(r.critical_values['5%'], 4), | |
'Critical value 10%':round(r.critical_values['10%'], 4), | |
'Test Statistic':round(r.stat, 4), | |
'No. Lags Chosen':round(r.lags, 4), | |
'P-Value':p_value, | |
'Stationarity':stationarity} | |
empty_df = empty_df.append(rr, ignore_index=True) | |
# add caption | |
if trend == 'nc': | |
empty_df = empty_df.style.set_caption("PP test without constant and trend") | |
elif trend == 'c': | |
empty_df = empty_df.style.set_caption("PP test with constant") | |
elif trend == 'ct': | |
empty_df = empty_df.style.set_caption("PP test with constant and linear trend") | |
else: | |
empty_df = empty_df.style.set_caption('PP test') | |
return empty_df | |
## DFGLS test | |
def dfgls_test(df, lags=None, trend='c', max_lags=None, method='AIC', low_memory=None): | |
""" | |
Parameters | |
---------- | |
data : {dataframe} | |
The data to test for a unit root | |
trend : {'c', 'ct'}, optional | |
The trend component to include in the ADF test | |
'c' - Include a constant (Default) | |
'ct' - Include a constant and linear time trend | |
lags : int, optional | |
The number of lags to use in the ADF regression. If omitted or None, | |
`method` is used to automatically select the lag length with no more | |
than `max_lags` are included. | |
max_lags : int, optional | |
The maximum number of lags to use when selecting lag length | |
method : {'AIC', 'BIC', 't-stat'}, optional | |
The method to use when selecting the lag length | |
'AIC' - Select the minimum of the Akaike IC | |
'BIC' - Select the minimum of the Schwarz/Bayesian IC | |
't-stat' - Select the minimum of the Schwarz/Bayesian IC | |
Return | |
------ | |
report : {Stargazer} | |
A stargazer model report | |
""" | |
# variable names | |
variable_names = list(df.columns) | |
# list of ones | |
list_of_ones = [1] * len(variable_names) | |
# model list | |
model_list = [] | |
for v in variable_names: | |
df_test = DFGLS(df[v], lags=None, trend='c', max_lags=None, method='AIC', low_memory=None) | |
df_test_reg = df_test.regression.model.fit() | |
model_list.append(df_test_reg) | |
# stargazer table | |
stargazer = Stargazer(model_list) | |
stargazer.custom_columns(variable_names, list_of_ones) | |
# add caption | |
if trend == 'c': | |
stargazer.title("DFGLS test with constant") | |
elif trend == 'ct': | |
stargazer.title("DFGLS test with constant and linear trend") | |
else: | |
stargazer.title('DFGLS test') | |
return stargazer | |
# kpss test | |
def kpss_test(df, lags=None, trend='c', signif=0.05): | |
""" | |
Parameters | |
---------- | |
df : {dataframe} | |
The data to test for a unit root | |
trend : {'c', 'ct'}, optional | |
The trend component to include in the ADF test | |
'c' - Include a constant (Default) | |
'ct' - Include a constant and linear time trend | |
lags : int, optional | |
The number of lags to use in the ADF regression. If omitted or None, | |
`method` is used to automatically select the lag length with no more | |
than `max_lags` are included. | |
signif: {float} | |
Significance level between 0.00 and 1.00. 0.01, 0.05 and 0.10 | |
are common values. | |
Return | |
------ | |
report : {dataframe} | |
A pandas dataframe model report | |
""" | |
# variable names | |
variable_names = list(df.columns) | |
# parameters to construct output table | |
null_hypothesis = f'Data has not unit root. Stationary.' | |
signif = signif | |
stationarity = "" | |
cv1 = "Critical value 1%" | |
cv5 = "Critical value 5%" | |
cv10 = "Critical value 10%" | |
# empty dataframe | |
empty_df = pd.DataFrame(columns=["Variable", "Null Hypothesis", "Significance", | |
cv1, cv5, cv10, "Test Statistic", "No. Lags Chosen", | |
"P-Value", "Stationarity"]) | |
for variable in variable_names: | |
r = KPSS(df[variable], lags=None, trend='c') | |
p_value = round(r.pvalue, 4) | |
if p_value <= signif: | |
stationarity = "Series is Non-Stationary." | |
else: | |
stationarity = "Series is Stationary." | |
rr = {'Variable': variable, | |
'Null Hypothesis': null_hypothesis, | |
'Significance': signif, | |
'Critical value 1%':round(r.critical_values['1%'], 4), | |
'Critical value 5%':round(r.critical_values['5%'], 4), | |
'Critical value 10%':round(r.critical_values['10%'], 4), | |
'Test Statistic':round(r.stat, 4), | |
'No. Lags Chosen':round(r.lags, 4), | |
'P-Value':p_value, | |
'Stationarity':stationarity} | |
empty_df = empty_df.append(rr, ignore_index=True) | |
# add caption | |
if trend == 'c': | |
empty_df = empty_df.style.set_caption("KPSS test with constant") | |
elif trend == 'ct': | |
empty_df = empty_df.style.set_caption("KPSS test with constant and linear trend") | |
else: | |
empty_df = empty_df.style.set_caption('KPSS test') | |
return empty_df | |
# zivot andrews test | |
def zivot_andrews_test(df, lags = None, trend = 'c', trim = 0.15, max_lags = None, method = 'aic', signif=0.05): | |
""" | |
Parameters | |
---------- | |
df : {dataframe} | |
The data to test for a unit root | |
trend : {"c", "t", "ct"}, optional | |
The trend component to include in the test | |
- "c" - Include a constant (Default) | |
- "t" - Include a linear time trend | |
- "ct" - Include a constant and linear time trend | |
trim : float | |
percentage of series at begin/end to exclude from break-period | |
calculation in range [0, 0.333] (default=0.15) | |
lags : int, optional | |
The number of lags to use in the ADF regression. If omitted or None, | |
`method` is used to automatically select the lag length with no more | |
than `max_lags` are included. | |
max_lags : int, optional | |
The maximum number of lags to use when selecting lag length | |
signif: {float} | |
Significance level between 0.00 and 1.00. 0.01, 0.05 and 0.10 | |
are common values. | |
method : {"AIC", "BIC", "t-stat"}, optional | |
The method to use when selecting the lag length | |
- "AIC" - Select the minimum of the Akaike IC | |
- "BIC" - Select the minimum of the Schwarz/Bayesian IC | |
- "t-stat" - Select the minimum of the Schwarz/Bayesian IC | |
Return | |
------ | |
report : {dataframe} | |
A pandas dataframe model report | |
""" | |
# variable names | |
variable_names = list(df.columns) | |
# parameters to construct output table | |
null_hypothesis = f'Data has unit root. Non-Stationary.' | |
signif = signif | |
stationarity = "" | |
cv1 = "Critical value 1%" | |
cv5 = "Critical value 5%" | |
cv10 = "Critical value 10%" | |
# empty dataframe | |
empty_df = pd.DataFrame(columns=["Variable", "Null Hypothesis", "Significance", | |
cv1, cv5, cv10, "Test Statistic", "No. Lags Chosen", | |
"P-Value", "Stationarity"]) | |
for variable in variable_names: | |
r = ZivotAndrews(df[variable], lags=lags, trend=trend, max_lags=max_lags) | |
p_value = round(r.pvalue, 4) | |
if p_value <= signif: | |
stationarity = "Series is Stationary." | |
else: | |
stationarity = "Series is Non-Stationary." | |
rr = {'Variable': variable, | |
'Null Hypothesis': null_hypothesis, | |
'Significance': signif, | |
'Critical value 1%':round(r.critical_values['1%'], 4), | |
'Critical value 5%':round(r.critical_values['5%'], 4), | |
'Critical value 10%':round(r.critical_values['10%'], 4), | |
'Test Statistic':round(r.stat, 4), | |
'No. Lags Chosen':round(r.lags, 4), | |
'P-Value':p_value, | |
'Stationarity':stationarity} | |
empty_df = empty_df.append(rr, ignore_index=True) | |
# add caption | |
if trend == 'c': | |
empty_df = empty_df.style.set_caption("Zivot-Andrews test without constant and trend") | |
elif trend == 't': | |
empty_df = empty_df.style.set_caption("Zivot-Andrews test with trend") | |
elif trend == 'ct': | |
empty_df = empty_df.style.set_caption("Zivot-Andrews test with constant and linear trend") | |
else: | |
empty_df = empty_df.style.set_caption('Zivot-Andrews test') | |
return empty_df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment