Skip to content

Instantly share code, notes, and snippets.

@homodigitus
Last active Jan 8, 2022
Embed
What would you like to do?
unit_root_tests
from stargazer.stargazer import Stargazer
from arch.unitroot import ADF, DFGLS, KPSS, PhillipsPerron, ZivotAndrews, VarianceRatio
import pandas as pd
# dickey fuller test
def df_test(df, trend='c', low_memory=None):
"""
Parameters
----------
data : {dataframe}
The data to test for a unit root
trend : {'nc', 'c', 'ct', 'ctt'}, optional
The trend component to include in the ADF test
'nc' - No trend components
'c' - Include a constant (Default)
'ct' - Include a constant and linear time trend
'ctt' - Include a constant and linear and quadratic time trends
Return
------
report : {Stargazer}
A stargazer model report
"""
# variable names
variable_names = list(df.columns)
# list of ones
list_of_ones = [1] * len(variable_names)
# model list
model_list = []
for v in variable_names:
df_test = ADF(df[v], trend=trend, low_memory=low_memory)
df_test_reg = df_test.regression.model.fit()
model_list.append(df_test_reg)
# stargazer table
stargazer = Stargazer(model_list)
stargazer.custom_columns(variable_names, list_of_ones)
# add caption
if trend == 'nc':
stargazer.title("DF test without constant and trend")
elif trend == 'c':
stargazer.title("DF test with constant")
elif trend == 'ct':
stargazer.title("DF test with constant and linear trend")
elif trend == 'ctt':
stargazer.title("DF test with constant, linear trend and quadratic trend")
else:
stargazer.title('DF test')
return stargazer
# augmented dickey fuller test
def adf_test(df, lags=None, trend='c', max_lags=None, method='AIC', low_memory=None):
"""
Parameters
----------
data : {dataframe}
The data to test for a unit root
trend : {'nc', 'c', 'ct', 'ctt'}, optional
The trend component to include in the ADF test
'nc' - No trend components
'c' - Include a constant (Default)
'ct' - Include a constant and linear time trend
'ctt' - Include a constant and linear and quadratic time trends
lags : int, optional
The number of lags to use in the ADF regression. If omitted or None,
`method` is used to automatically select the lag length with no more
than `max_lags` are included.
trend : {'nc', 'c', 'ct', 'ctt'}, optional
The trend component to include in the ADF test
'nc' - No trend components
'c' - Include a constant (Default)
'ct' - Include a constant and linear time trend
'ctt' - Include a constant and linear and quadratic time trends
max_lags : int, optional
The maximum number of lags to use when selecting lag length
method : {'AIC', 'BIC', 't-stat'}, optional
The method to use when selecting the lag length
'AIC' - Select the minimum of the Akaike IC
'BIC' - Select the minimum of the Schwarz/Bayesian IC
't-stat' - Select the minimum of the Schwarz/Bayesian IC
Return
------
report : {Stargazer}
A stargazer model report
"""
# variable names
variable_names = list(df.columns)
# list of ones
list_of_ones = [1] * len(variable_names)
# model list
model_list = []
for v in variable_names:
df_test = ADF(df[v], lags=lags, trend=trend, max_lags=max_lags, method=method, low_memory=low_memory)
df_test_reg = df_test.regression.model.fit()
model_list.append(df_test_reg)
# stargazer table
stargazer = Stargazer(model_list)
stargazer.custom_columns(variable_names, list_of_ones)
# add caption
if trend == 'nc':
stargazer.title("ADF test without constant and trend")
elif trend == 'c':
stargazer.title("ADF test with constant")
elif trend == 'ct':
stargazer.title("ADF test with constant and linear trend")
elif trend == 'ctt':
stargazer.title("ADF test with constant, linear trend and quadratic trend")
else:
stargazer.title('ADF test')
return stargazer
# philips-perron test
def pp_test(df, lags=None, trend='c', test_type='tau', signif=0.05):
"""
Parameters
----------
df : {dataframe}
The data to test for a unit root
trend : {'nc', 'c', 'ct'}, optional
The trend component to include in the ADF test
'c' - Include a constant (Default)
'ct' - Include a constant and linear time trend
lags : int, optional
The number of lags to use in the ADF regression. If omitted or None,
`method` is used to automatically select the lag length with no more
than `max_lags` are included.
signif: {float}
Significance level between 0.00 and 1.00. 0.01, 0.05 and 0.10
are common values.
Return
------
report : {dataframe}
A pandas dataframe model report
"""
# variable names
variable_names = list(df.columns)
# parameters to construct output table
null_hypothesis = f'Data has unit root. Non-Stationary.'
signif = signif
stationarity = ""
cv1 = "Critical value 1%"
cv5 = "Critical value 5%"
cv10 = "Critical value 10%"
# empty dataframe
empty_df = pd.DataFrame(columns=["Variable", "Null Hypothesis", "Significance",
cv1, cv5, cv10, "Test Statistic", "No. Lags Chosen",
"P-Value", "Stationarity"])
for variable in variable_names:
r = PhillipsPerron(df[variable], lags=None, trend='c', test_type='tau')
p_value = round(r.pvalue, 4)
if p_value <= signif:
stationarity = "Series is Stationary."
else:
stationarity = "Series is Non-Stationary."
rr = {'Variable': variable,
'Null Hypothesis': null_hypothesis,
'Significance': signif,
'Critical value 1%':round(r.critical_values['1%'], 4),
'Critical value 5%':round(r.critical_values['5%'], 4),
'Critical value 10%':round(r.critical_values['10%'], 4),
'Test Statistic':round(r.stat, 4),
'No. Lags Chosen':round(r.lags, 4),
'P-Value':p_value,
'Stationarity':stationarity}
empty_df = empty_df.append(rr, ignore_index=True)
# add caption
if trend == 'nc':
empty_df = empty_df.style.set_caption("PP test without constant and trend")
elif trend == 'c':
empty_df = empty_df.style.set_caption("PP test with constant")
elif trend == 'ct':
empty_df = empty_df.style.set_caption("PP test with constant and linear trend")
else:
empty_df = empty_df.style.set_caption('PP test')
return empty_df
## DFGLS test
def dfgls_test(df, lags=None, trend='c', max_lags=None, method='AIC', low_memory=None):
"""
Parameters
----------
data : {dataframe}
The data to test for a unit root
trend : {'c', 'ct'}, optional
The trend component to include in the ADF test
'c' - Include a constant (Default)
'ct' - Include a constant and linear time trend
lags : int, optional
The number of lags to use in the ADF regression. If omitted or None,
`method` is used to automatically select the lag length with no more
than `max_lags` are included.
max_lags : int, optional
The maximum number of lags to use when selecting lag length
method : {'AIC', 'BIC', 't-stat'}, optional
The method to use when selecting the lag length
'AIC' - Select the minimum of the Akaike IC
'BIC' - Select the minimum of the Schwarz/Bayesian IC
't-stat' - Select the minimum of the Schwarz/Bayesian IC
Return
------
report : {Stargazer}
A stargazer model report
"""
# variable names
variable_names = list(df.columns)
# list of ones
list_of_ones = [1] * len(variable_names)
# model list
model_list = []
for v in variable_names:
df_test = DFGLS(df[v], lags=None, trend='c', max_lags=None, method='AIC', low_memory=None)
df_test_reg = df_test.regression.model.fit()
model_list.append(df_test_reg)
# stargazer table
stargazer = Stargazer(model_list)
stargazer.custom_columns(variable_names, list_of_ones)
# add caption
if trend == 'c':
stargazer.title("DFGLS test with constant")
elif trend == 'ct':
stargazer.title("DFGLS test with constant and linear trend")
else:
stargazer.title('DFGLS test')
return stargazer
# kpss test
def kpss_test(df, lags=None, trend='c', signif=0.05):
"""
Parameters
----------
df : {dataframe}
The data to test for a unit root
trend : {'c', 'ct'}, optional
The trend component to include in the ADF test
'c' - Include a constant (Default)
'ct' - Include a constant and linear time trend
lags : int, optional
The number of lags to use in the ADF regression. If omitted or None,
`method` is used to automatically select the lag length with no more
than `max_lags` are included.
signif: {float}
Significance level between 0.00 and 1.00. 0.01, 0.05 and 0.10
are common values.
Return
------
report : {dataframe}
A pandas dataframe model report
"""
# variable names
variable_names = list(df.columns)
# parameters to construct output table
null_hypothesis = f'Data has not unit root. Stationary.'
signif = signif
stationarity = ""
cv1 = "Critical value 1%"
cv5 = "Critical value 5%"
cv10 = "Critical value 10%"
# empty dataframe
empty_df = pd.DataFrame(columns=["Variable", "Null Hypothesis", "Significance",
cv1, cv5, cv10, "Test Statistic", "No. Lags Chosen",
"P-Value", "Stationarity"])
for variable in variable_names:
r = KPSS(df[variable], lags=None, trend='c')
p_value = round(r.pvalue, 4)
if p_value <= signif:
stationarity = "Series is Non-Stationary."
else:
stationarity = "Series is Stationary."
rr = {'Variable': variable,
'Null Hypothesis': null_hypothesis,
'Significance': signif,
'Critical value 1%':round(r.critical_values['1%'], 4),
'Critical value 5%':round(r.critical_values['5%'], 4),
'Critical value 10%':round(r.critical_values['10%'], 4),
'Test Statistic':round(r.stat, 4),
'No. Lags Chosen':round(r.lags, 4),
'P-Value':p_value,
'Stationarity':stationarity}
empty_df = empty_df.append(rr, ignore_index=True)
# add caption
if trend == 'c':
empty_df = empty_df.style.set_caption("KPSS test with constant")
elif trend == 'ct':
empty_df = empty_df.style.set_caption("KPSS test with constant and linear trend")
else:
empty_df = empty_df.style.set_caption('KPSS test')
return empty_df
# zivot andrews test
def zivot_andrews_test(df, lags = None, trend = 'c', trim = 0.15, max_lags = None, method = 'aic', signif=0.05):
"""
Parameters
----------
df : {dataframe}
The data to test for a unit root
trend : {"c", "t", "ct"}, optional
The trend component to include in the test
- "c" - Include a constant (Default)
- "t" - Include a linear time trend
- "ct" - Include a constant and linear time trend
trim : float
percentage of series at begin/end to exclude from break-period
calculation in range [0, 0.333] (default=0.15)
lags : int, optional
The number of lags to use in the ADF regression. If omitted or None,
`method` is used to automatically select the lag length with no more
than `max_lags` are included.
max_lags : int, optional
The maximum number of lags to use when selecting lag length
signif: {float}
Significance level between 0.00 and 1.00. 0.01, 0.05 and 0.10
are common values.
method : {"AIC", "BIC", "t-stat"}, optional
The method to use when selecting the lag length
- "AIC" - Select the minimum of the Akaike IC
- "BIC" - Select the minimum of the Schwarz/Bayesian IC
- "t-stat" - Select the minimum of the Schwarz/Bayesian IC
Return
------
report : {dataframe}
A pandas dataframe model report
"""
# variable names
variable_names = list(df.columns)
# parameters to construct output table
null_hypothesis = f'Data has unit root. Non-Stationary.'
signif = signif
stationarity = ""
cv1 = "Critical value 1%"
cv5 = "Critical value 5%"
cv10 = "Critical value 10%"
# empty dataframe
empty_df = pd.DataFrame(columns=["Variable", "Null Hypothesis", "Significance",
cv1, cv5, cv10, "Test Statistic", "No. Lags Chosen",
"P-Value", "Stationarity"])
for variable in variable_names:
r = ZivotAndrews(df[variable], lags=lags, trend=trend, max_lags=max_lags)
p_value = round(r.pvalue, 4)
if p_value <= signif:
stationarity = "Series is Stationary."
else:
stationarity = "Series is Non-Stationary."
rr = {'Variable': variable,
'Null Hypothesis': null_hypothesis,
'Significance': signif,
'Critical value 1%':round(r.critical_values['1%'], 4),
'Critical value 5%':round(r.critical_values['5%'], 4),
'Critical value 10%':round(r.critical_values['10%'], 4),
'Test Statistic':round(r.stat, 4),
'No. Lags Chosen':round(r.lags, 4),
'P-Value':p_value,
'Stationarity':stationarity}
empty_df = empty_df.append(rr, ignore_index=True)
# add caption
if trend == 'c':
empty_df = empty_df.style.set_caption("Zivot-Andrews test without constant and trend")
elif trend == 't':
empty_df = empty_df.style.set_caption("Zivot-Andrews test with trend")
elif trend == 'ct':
empty_df = empty_df.style.set_caption("Zivot-Andrews test with constant and linear trend")
else:
empty_df = empty_df.style.set_caption('Zivot-Andrews test')
return empty_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment