homodigitus/var_model.py

## var_model.py
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from stargazer.stargazer import Stargazer
from arch.unitroot import ADF
from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.tsa.vector_ar.vecm import coint_johansen


# augmented dickey fuller test
def adf_test(df, lags=None, trend='c', max_lags=None, method='AIC', low_memory=None):

    """
    Parameters
    ----------
    data : {dataframe}
           The data to test for a unit root
    trend : {'nc', 'c', 'ct', 'ctt'}, optional
            The trend component to include in the ADF test
            'nc' - No trend components
            'c' - Include a constant (Default)
            'ct' - Include a constant and linear time trend
            'ctt' - Include a constant and linear and quadratic time trends
    lags : int, optional
        The number of lags to use in the ADF regression.  If omitted or None,
        `method` is used to automatically select the lag length with no more
        than `max_lags` are included.
    trend : {'nc', 'c', 'ct', 'ctt'}, optional
        The trend component to include in the ADF test
        'nc' - No trend components
        'c' - Include a constant (Default)
        'ct' - Include a constant and linear time trend
        'ctt' - Include a constant and linear and quadratic time trends
    max_lags : int, optional
        The maximum number of lags to use when selecting lag length
    method : {'AIC', 'BIC', 't-stat'}, optional
        The method to use when selecting the lag length
        'AIC' - Select the minimum of the Akaike IC
        'BIC' - Select the minimum of the Schwarz/Bayesian IC
        't-stat' - Select the minimum of the Schwarz/Bayesian IC

    Return
    ------
    report : {Stargazer}
             A stargazer model report
    """

    # variable names
    variable_names = list(df.columns)

    # list of ones
    list_of_ones = [1] * len(variable_names)

    # model list
    model_list = []
    for v in variable_names:
        df_test = ADF(df[v], lags=lags, trend=trend, max_lags=max_lags, method=method, low_memory=low_memory)
        df_test_reg = df_test.regression.model.fit()
        model_list.append(df_test_reg)

    # stargazer table
    stargazer = Stargazer(model_list)
    stargazer.custom_columns(variable_names, list_of_ones)

    # add caption
    if trend == 'nc':
        stargazer.title("ADF test without constant and trend")
    elif trend == 'c':
        stargazer.title("ADF test with constant")
    elif trend == 'ct':
        stargazer.title("ADF test with constant and linear trend")
    elif trend == 'ctt':
        stargazer.title("ADF test with constant, linear trend and quadratic trend")
    else:
        stargazer.title('ADF test')

    return stargazer


# granger causality matrix
def granger_causality_matrix(data, variables, test="ssr_chi2test", maxlag=None, verbose=False):

    """
    Parameters
    ----------
    data : {dataframe}
              The data to test for a unit root
    variables : {list}
                The variables to test for granger causality
    test : {'ssr_chi2test', 'ssr_ftest', 'lrtest', 'granger_causality', 'ljungbox'}, optional
            The test to use when testing for granger causality
            'ssr_chi2test' - The sum of squares test for granger causality
            'ssr_ftest' - The F-test for granger causality
            'lrtest' - The likelihood ratio test for granger causality
            'granger_causality' - The granger causality test for granger causality
            'ljungbox' - The ljung-box test for granger causality
    maxlag : {int}, optional
            The maximum lag to use when testing for granger causality
    verbose : {bool}, optional
            Whether or not to print the results of the test

    Return
    ------
    df : {dataframe}
            A dataframe with the results of the test
    """
    # create the granger causality matrix
    df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in df.columns:
        for r in df.index:
            test_result = grangercausalitytests(data[[r, c]], maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            df.loc[r, c] = min_p_value
    df.columns = [var + '_x' for var in variables]
    df.index = [var + '_y' for var in variables]

    return df


# johansen cointegration test
def cointegration_test(df, alpha=0.05):

    """
    Parameters
    ----------
    df : {dataframe}
        The data to test for a unit root
    alpha : {float}
        The significance level to use when testing for cointegration

    Return
    ------
    prints the results of the test
    """

    # cointegration test
    out = coint_johansen(df,-1,5)
    d = {'0.90':0, '0.95':1, '0.99':2}
    traces = out.lr1
    cvts = out.cvt[:, d[str(1-alpha)]]
    def adjust(val, length= 6):
        return str(val).ljust(length)

    # Summary
    print('Name   ::  Test Stat > C(95%)    =>   Signif  \n', '--'*20)
    for col, trace, cvt in zip(df.columns, traces, cvts):
        print(adjust(col), ':: ', adjust(round(trace,2), 9), ">", adjust(cvt, 8), ' =>  ' , trace > cvt)
	import warnings
	warnings.filterwarnings("ignore")
	import pandas as pd
	import numpy as np
	from stargazer.stargazer import Stargazer
	from arch.unitroot import ADF
	from statsmodels.tsa.stattools import grangercausalitytests
	from statsmodels.tsa.vector_ar.vecm import coint_johansen


	# augmented dickey fuller test
	def adf_test(df, lags=None, trend='c', max_lags=None, method='AIC', low_memory=None):

	"""
	Parameters
	----------
	data : {dataframe}
	The data to test for a unit root
	trend : {'nc', 'c', 'ct', 'ctt'}, optional
	The trend component to include in the ADF test
	'nc' - No trend components
	'c' - Include a constant (Default)
	'ct' - Include a constant and linear time trend
	'ctt' - Include a constant and linear and quadratic time trends
	lags : int, optional
	The number of lags to use in the ADF regression. If omitted or None,
	`method` is used to automatically select the lag length with no more
	than `max_lags` are included.
	trend : {'nc', 'c', 'ct', 'ctt'}, optional
	The trend component to include in the ADF test
	'nc' - No trend components
	'c' - Include a constant (Default)
	'ct' - Include a constant and linear time trend
	'ctt' - Include a constant and linear and quadratic time trends
	max_lags : int, optional
	The maximum number of lags to use when selecting lag length
	method : {'AIC', 'BIC', 't-stat'}, optional
	The method to use when selecting the lag length
	'AIC' - Select the minimum of the Akaike IC
	'BIC' - Select the minimum of the Schwarz/Bayesian IC
	't-stat' - Select the minimum of the Schwarz/Bayesian IC

	Return
	------
	report : {Stargazer}
	A stargazer model report
	"""

	# variable names
	variable_names = list(df.columns)

	# list of ones
	list_of_ones = [1] * len(variable_names)

	# model list
	model_list = []
	for v in variable_names:
	df_test = ADF(df[v], lags=lags, trend=trend, max_lags=max_lags, method=method, low_memory=low_memory)
	df_test_reg = df_test.regression.model.fit()
	model_list.append(df_test_reg)

	# stargazer table
	stargazer = Stargazer(model_list)
	stargazer.custom_columns(variable_names, list_of_ones)

	# add caption
	if trend == 'nc':
	stargazer.title("ADF test without constant and trend")
	elif trend == 'c':
	stargazer.title("ADF test with constant")
	elif trend == 'ct':
	stargazer.title("ADF test with constant and linear trend")
	elif trend == 'ctt':
	stargazer.title("ADF test with constant, linear trend and quadratic trend")
	else:
	stargazer.title('ADF test')

	return stargazer


	# granger causality matrix
	def granger_causality_matrix(data, variables, test="ssr_chi2test", maxlag=None, verbose=False):

	"""
	Parameters
	----------
	data : {dataframe}
	The data to test for a unit root
	variables : {list}
	The variables to test for granger causality
	test : {'ssr_chi2test', 'ssr_ftest', 'lrtest', 'granger_causality', 'ljungbox'}, optional
	The test to use when testing for granger causality
	'ssr_chi2test' - The sum of squares test for granger causality
	'ssr_ftest' - The F-test for granger causality
	'lrtest' - The likelihood ratio test for granger causality
	'granger_causality' - The granger causality test for granger causality
	'ljungbox' - The ljung-box test for granger causality
	maxlag : {int}, optional
	The maximum lag to use when testing for granger causality
	verbose : {bool}, optional
	Whether or not to print the results of the test

	Return
	------
	df : {dataframe}
	A dataframe with the results of the test
	"""
	# create the granger causality matrix
	df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
	for c in df.columns:
	for r in df.index:
	test_result = grangercausalitytests(data[[r, c]], maxlag=maxlag, verbose=False)
	p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
	if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
	min_p_value = np.min(p_values)
	df.loc[r, c] = min_p_value
	df.columns = [var + '_x' for var in variables]
	df.index = [var + '_y' for var in variables]

	return df


	# johansen cointegration test
	def cointegration_test(df, alpha=0.05):

	"""
	Parameters
	----------
	df : {dataframe}
	The data to test for a unit root
	alpha : {float}
	The significance level to use when testing for cointegration

	Return
	------
	prints the results of the test
	"""

	# cointegration test
	out = coint_johansen(df,-1,5)
	d = {'0.90':0, '0.95':1, '0.99':2}
	traces = out.lr1
	cvts = out.cvt[:, d[str(1-alpha)]]
	def adjust(val, length= 6):
	return str(val).ljust(length)

	# Summary
	print('Name :: Test Stat > C(95%) => Signif \n', '--'*20)
	for col, trace, cvt in zip(df.columns, traces, cvts):
	print(adjust(col), ':: ', adjust(round(trace,2), 9), ">", adjust(cvt, 8), ' => ' , trace > cvt)