Skip to content

Instantly share code, notes, and snippets.

@sachinsdate
Created May 18, 2022 11:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sachinsdate/c05e8ed16864636388416db3b6f2b9d8 to your computer and use it in GitHub Desktop.
Save sachinsdate/c05e8ed16864636388416db3b6f2b9d8 to your computer and use it in GitHub Desktop.
import pandas as pd
import statsmodels.formula.api as smf
from patsy import dmatrices
import scipy.stats as st
##########################################################################################
#Select between two non-nested fixed effects models using the Encompassing Principle
##########################################################################################
#Define the units (countries) of interest
unit_names = ['Belgium', 'CzechRepublic', 'France', 'Ireland', 'Portugal', 'UK', 'USA']
unit_names.sort()
#column containing the fixed effect units
unit_col_name='COUNTRY'
#model expression of the main model
#GDP_PCAP_GWTH_PCNT = beta_0 + beta_1*Belgium + beta_2*CzechRepublic + beta_3*France +
# beta_4*Ireland + beta_5*Portugal + beta_6*UK + beta_7*USA + beta_8*GCF_GWTH_PCNT +
# beta_9*CO2_PCAP_GWTH_PCNT + e
#Define the y and X variable names
y_var_name = 'GDP_PCAP_GWTH_PCNT'
X_var_names = ['GCF_GWTH_PCNT']
#Load the panel data set of World Bank published development indicators into a Pandas Dataframe
df_panel = pd.read_csv('wb_data_panel_3ind_7units_1992_2014.csv', header=0)
print(df_panel.corr())
#Print out the first 30 rows
print(df_panel.head(30))
#Create the dummy variables, one for each country
df_dummies = pd.get_dummies(df_panel[unit_col_name])
#Join the dummies Dataframe with the panel data set
df_panel_with_dummies = df_panel.join(df_dummies)
print(df_panel_with_dummies)
#Construct the regression equation. Note that we are leaving out one dummy variable so as to
# avoid perfect multi-colinearity between the 7 dummy variables. The regression model's intercept
# will contain the value of the coefficient for the omitted dummy variable.
model_a_expr = y_var_name + ' ~ '
i = 0
for X_var_name in X_var_names:
if i > 0:
model_a_expr = model_a_expr + ' + ' + X_var_name
else:
model_a_expr = model_a_expr + X_var_name
i = i + 1
for dummy_name in unit_names[:-1]:
model_a_expr = model_a_expr + ' + ' + dummy_name
print('Regression expression for model A=\n' + model_a_expr)
#Build and train model A
fe_model_a = smf.ols(formula=model_a_expr, data=df_panel_with_dummies)
fe_model_a_results = fe_model_a.fit()
print('===============================================================================')
print('=========================== Fixed Effects Model A =============================')
print(fe_model_a_results.summary())
#Repeat the above steps for FE model B:
#Construct the regression equation.
X_var_names = ['CO2_PCAP_GWTH_PCNT']
model_b_expr = y_var_name + ' ~ '
i = 0
for X_var_name in X_var_names:
if i > 0:
model_b_expr = model_b_expr + ' + ' + X_var_name
else:
model_b_expr = model_b_expr + X_var_name
i = i + 1
for dummy_name in unit_names[:-1]:
model_b_expr = model_b_expr + ' + ' + dummy_name
print('Regression expression for model B=\n' + model_b_expr)
#Build and train model B
fe_model_b = smf.ols(formula=model_b_expr, data=df_panel_with_dummies)
fe_model_b_results = fe_model_b.fit()
print('===============================================================================')
print('=========================== Fixed Effects Model B =============================')
print(fe_model_b_results.summary())
#Get the predictions of FE model B on the training data set
y_train, Z_train = dmatrices(model_b_expr, df_panel_with_dummies, return_type='dataframe')
y_cap = fe_model_b_results.predict(Z_train)
#Add the predictions from model B to the data panel
df_panel_with_dummies['y_cap'] = y_cap
#Construct the equation of the coomprehensive model y = y_capλ +Xβ + ε
X_var_names = ['y_cap', 'GCF_GWTH_PCNT']
comprehensive_model_expr = y_var_name + ' ~ '
i = 0
for X_var_name in X_var_names:
if i > 0:
comprehensive_model_expr = comprehensive_model_expr + ' + ' + X_var_name
else:
comprehensive_model_expr = comprehensive_model_expr + X_var_name
i = i + 1
for dummy_name in unit_names[:-1]:
comprehensive_model_expr = comprehensive_model_expr + ' + ' + dummy_name
print('Regression expression of the comprehensive model=\n' + comprehensive_model_expr)
#Build and fit the comprehensive model
comprehensive_model = smf.ols(formula=comprehensive_model_expr, data=df_panel_with_dummies)
comprehensive_model_results = comprehensive_model.fit()
print('===============================================================================')
print('============================= Comprehensive Model =============================')
print(comprehensive_model_results.summary())
#Formulate model D by adding CO2_PCAP_GWTH_PCNT to model A
X_var_names = ['GCF_GWTH_PCNT', 'CO2_PCAP_GWTH_PCNT']
fe_model_d_expr = y_var_name + ' ~ '
i = 0
for X_var_name in X_var_names:
if i > 0:
fe_model_d_expr = fe_model_d_expr + ' + ' + X_var_name
else:
fe_model_d_expr = fe_model_d_expr + X_var_name
i = i + 1
for dummy_name in unit_names[:-1]:
fe_model_d_expr = fe_model_d_expr + ' + ' + dummy_name
print('Regression expression of model D=\n' + fe_model_d_expr)
#Build and train FE model D
fe_model_d = smf.ols(formula=fe_model_d_expr, data=df_panel_with_dummies)
fe_model_d_results = fe_model_d.fit()
print('===============================================================================')
print('=========================== Fixed Effects Model D =============================')
print(fe_model_d_results.summary())
#Perform the F-test on models A and D
k_a = len(fe_model_a_results.params)
k_d = len(fe_model_d_results.params)
N = len(df_panel_with_dummies)
rss_a = fe_model_a_results.ssr
rss_d = fe_model_d_results.ssr
f_statistic = ((rss_a-rss_d)/(k_d-k_a))/(rss_d/(N-k_d))
print('F-statistic='+str(f_statistic))
alpha=0.05
f_critical_value=st.f.ppf((1.0-alpha), (k_d-k_a), (N-k_d))
print('F test critical value at alpha of 0.05='+str(f_critical_value))
#Perform the LR test on models A and D
log_ll_a = fe_model_a_results.llf
log_ll_d = fe_model_d_results.llf
lrt_statistic = -2*(log_ll_a - log_ll_d)
print('LRT-statistic='+str(lrt_statistic))
alpha=0.05
chi2_critical_value=st.chi2.ppf((1.0-alpha), (k_d-k_a))
print('Chi-squared critical value at alpha of 0.05='+str(chi2_critical_value))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment