Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import pandas as pd
import statsmodels.formula.api as smf
from patsy import dmatrices
import scipy.stats as st
##########################################################################################
#Select between two non-nested fixed effects models using the Encompassing Principle
##########################################################################################
#Define the units (countries) of interest
unit_names = ['Belgium', 'CzechRepublic', 'France', 'Ireland', 'Portugal', 'UK', 'USA']
unit_names.sort()
#column containing the fixed effect units
unit_col_name='COUNTRY'
#model expression of the main model
#GDP_PCAP_GWTH_PCNT = beta_0 + beta_1*Belgium + beta_2*CzechRepublic + beta_3*France +
# beta_4*Ireland + beta_5*Portugal + beta_6*UK + beta_7*USA + beta_8*GCF_GWTH_PCNT +
# beta_9*CO2_PCAP_GWTH_PCNT + e
#Define the y and X variable names
y_var_name = 'GDP_PCAP_GWTH_PCNT'
X_var_names = ['GCF_GWTH_PCNT']
#Load the panel data set of World Bank published development indicators into a Pandas Dataframe
df_panel = pd.read_csv('wb_data_panel_3ind_7units_1992_2014.csv', header=0)
print(df_panel.corr())
#Print out the first 30 rows
print(df_panel.head(30))
#Create the dummy variables, one for each country
df_dummies = pd.get_dummies(df_panel[unit_col_name])
#Join the dummies Dataframe with the panel data set
df_panel_with_dummies = df_panel.join(df_dummies)
print(df_panel_with_dummies)
#Construct the regression equation. Note that we are leaving out one dummy variable so as to
# avoid perfect multi-colinearity between the 7 dummy variables. The regression model's intercept
# will contain the value of the coefficient for the omitted dummy variable.
model_a_expr = y_var_name + ' ~ '
i = 0
for X_var_name in X_var_names:
if i > 0:
model_a_expr = model_a_expr + ' + ' + X_var_name
else:
model_a_expr = model_a_expr + X_var_name
i = i + 1
for dummy_name in unit_names[:-1]:
model_a_expr = model_a_expr + ' + ' + dummy_name
print('Regression expression for model A=\n' + model_a_expr)
#Build and train model A
fe_model_a = smf.ols(formula=model_a_expr, data=df_panel_with_dummies)
fe_model_a_results = fe_model_a.fit()
print('===============================================================================')
print('=========================== Fixed Effects Model A =============================')
print(fe_model_a_results.summary())
#Repeat the above steps for FE model B:
#Construct the regression equation.
X_var_names = ['CO2_PCAP_GWTH_PCNT']
model_b_expr = y_var_name + ' ~ '
i = 0
for X_var_name in X_var_names:
if i > 0:
model_b_expr = model_b_expr + ' + ' + X_var_name
else:
model_b_expr = model_b_expr + X_var_name
i = i + 1
for dummy_name in unit_names[:-1]:
model_b_expr = model_b_expr + ' + ' + dummy_name
print('Regression expression for model B=\n' + model_b_expr)
#Build and train model B
fe_model_b = smf.ols(formula=model_b_expr, data=df_panel_with_dummies)
fe_model_b_results = fe_model_b.fit()
print('===============================================================================')
print('=========================== Fixed Effects Model B =============================')
print(fe_model_b_results.summary())
#Get the predictions of FE model B on the training data set
y_train, Z_train = dmatrices(model_b_expr, df_panel_with_dummies, return_type='dataframe')
y_cap = fe_model_b_results.predict(Z_train)
#Add the predictions from model B to the data panel
df_panel_with_dummies['y_cap'] = y_cap
#Construct the equation of the coomprehensive model y = y_capλ +Xβ + ε
X_var_names = ['y_cap', 'GCF_GWTH_PCNT']
comprehensive_model_expr = y_var_name + ' ~ '
i = 0
for X_var_name in X_var_names:
if i > 0:
comprehensive_model_expr = comprehensive_model_expr + ' + ' + X_var_name
else:
comprehensive_model_expr = comprehensive_model_expr + X_var_name
i = i + 1
for dummy_name in unit_names[:-1]:
comprehensive_model_expr = comprehensive_model_expr + ' + ' + dummy_name
print('Regression expression of the comprehensive model=\n' + comprehensive_model_expr)
#Build and fit the comprehensive model
comprehensive_model = smf.ols(formula=comprehensive_model_expr, data=df_panel_with_dummies)
comprehensive_model_results = comprehensive_model.fit()
print('===============================================================================')
print('============================= Comprehensive Model =============================')
print(comprehensive_model_results.summary())
#Formulate model D by adding CO2_PCAP_GWTH_PCNT to model A
X_var_names = ['GCF_GWTH_PCNT', 'CO2_PCAP_GWTH_PCNT']
fe_model_d_expr = y_var_name + ' ~ '
i = 0
for X_var_name in X_var_names:
if i > 0:
fe_model_d_expr = fe_model_d_expr + ' + ' + X_var_name
else:
fe_model_d_expr = fe_model_d_expr + X_var_name
i = i + 1
for dummy_name in unit_names[:-1]:
fe_model_d_expr = fe_model_d_expr + ' + ' + dummy_name
print('Regression expression of model D=\n' + fe_model_d_expr)
#Build and train FE model D
fe_model_d = smf.ols(formula=fe_model_d_expr, data=df_panel_with_dummies)
fe_model_d_results = fe_model_d.fit()
print('===============================================================================')
print('=========================== Fixed Effects Model D =============================')
print(fe_model_d_results.summary())
#Perform the F-test on models A and D
k_a = len(fe_model_a_results.params)
k_d = len(fe_model_d_results.params)
N = len(df_panel_with_dummies)
rss_a = fe_model_a_results.ssr
rss_d = fe_model_d_results.ssr
f_statistic = ((rss_a-rss_d)/(k_d-k_a))/(rss_d/(N-k_d))
print('F-statistic='+str(f_statistic))
alpha=0.05
f_critical_value=st.f.ppf((1.0-alpha), (k_d-k_a), (N-k_d))
print('F test critical value at alpha of 0.05='+str(f_critical_value))
#Perform the LR test on models A and D
log_ll_a = fe_model_a_results.llf
log_ll_d = fe_model_d_results.llf
lrt_statistic = -2*(log_ll_a - log_ll_d)
print('LRT-statistic='+str(lrt_statistic))
alpha=0.05
chi2_critical_value=st.chi2.ppf((1.0-alpha), (k_d-k_a))
print('Chi-squared critical value at alpha of 0.05='+str(chi2_critical_value))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment