This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import statsmodels.formula.api as smf | |
from patsy import dmatrices | |
import scipy.stats as st | |
########################################################################################## | |
#Select between two non-nested fixed effects models using the Encompassing Principle | |
########################################################################################## | |
#Define the units (countries) of interest | |
unit_names = ['Belgium', 'CzechRepublic', 'France', 'Ireland', 'Portugal', 'UK', 'USA'] | |
unit_names.sort() | |
#column containing the fixed effect units | |
unit_col_name='COUNTRY' | |
#model expression of the main model | |
#GDP_PCAP_GWTH_PCNT = beta_0 + beta_1*Belgium + beta_2*CzechRepublic + beta_3*France + | |
# beta_4*Ireland + beta_5*Portugal + beta_6*UK + beta_7*USA + beta_8*GCF_GWTH_PCNT + | |
# beta_9*CO2_PCAP_GWTH_PCNT + e | |
#Define the y and X variable names | |
y_var_name = 'GDP_PCAP_GWTH_PCNT' | |
X_var_names = ['GCF_GWTH_PCNT'] | |
#Load the panel data set of World Bank published development indicators into a Pandas Dataframe | |
df_panel = pd.read_csv('wb_data_panel_3ind_7units_1992_2014.csv', header=0) | |
print(df_panel.corr()) | |
#Print out the first 30 rows | |
print(df_panel.head(30)) | |
#Create the dummy variables, one for each country | |
df_dummies = pd.get_dummies(df_panel[unit_col_name]) | |
#Join the dummies Dataframe with the panel data set | |
df_panel_with_dummies = df_panel.join(df_dummies) | |
print(df_panel_with_dummies) | |
#Construct the regression equation. Note that we are leaving out one dummy variable so as to | |
# avoid perfect multi-colinearity between the 7 dummy variables. The regression model's intercept | |
# will contain the value of the coefficient for the omitted dummy variable. | |
model_a_expr = y_var_name + ' ~ ' | |
i = 0 | |
for X_var_name in X_var_names: | |
if i > 0: | |
model_a_expr = model_a_expr + ' + ' + X_var_name | |
else: | |
model_a_expr = model_a_expr + X_var_name | |
i = i + 1 | |
for dummy_name in unit_names[:-1]: | |
model_a_expr = model_a_expr + ' + ' + dummy_name | |
print('Regression expression for model A=\n' + model_a_expr) | |
#Build and train model A | |
fe_model_a = smf.ols(formula=model_a_expr, data=df_panel_with_dummies) | |
fe_model_a_results = fe_model_a.fit() | |
print('===============================================================================') | |
print('=========================== Fixed Effects Model A =============================') | |
print(fe_model_a_results.summary()) | |
#Repeat the above steps for FE model B: | |
#Construct the regression equation. | |
X_var_names = ['CO2_PCAP_GWTH_PCNT'] | |
model_b_expr = y_var_name + ' ~ ' | |
i = 0 | |
for X_var_name in X_var_names: | |
if i > 0: | |
model_b_expr = model_b_expr + ' + ' + X_var_name | |
else: | |
model_b_expr = model_b_expr + X_var_name | |
i = i + 1 | |
for dummy_name in unit_names[:-1]: | |
model_b_expr = model_b_expr + ' + ' + dummy_name | |
print('Regression expression for model B=\n' + model_b_expr) | |
#Build and train model B | |
fe_model_b = smf.ols(formula=model_b_expr, data=df_panel_with_dummies) | |
fe_model_b_results = fe_model_b.fit() | |
print('===============================================================================') | |
print('=========================== Fixed Effects Model B =============================') | |
print(fe_model_b_results.summary()) | |
#Get the predictions of FE model B on the training data set | |
y_train, Z_train = dmatrices(model_b_expr, df_panel_with_dummies, return_type='dataframe') | |
y_cap = fe_model_b_results.predict(Z_train) | |
#Add the predictions from model B to the data panel | |
df_panel_with_dummies['y_cap'] = y_cap | |
#Construct the equation of the coomprehensive model y = y_capλ +Xβ + ε | |
X_var_names = ['y_cap', 'GCF_GWTH_PCNT'] | |
comprehensive_model_expr = y_var_name + ' ~ ' | |
i = 0 | |
for X_var_name in X_var_names: | |
if i > 0: | |
comprehensive_model_expr = comprehensive_model_expr + ' + ' + X_var_name | |
else: | |
comprehensive_model_expr = comprehensive_model_expr + X_var_name | |
i = i + 1 | |
for dummy_name in unit_names[:-1]: | |
comprehensive_model_expr = comprehensive_model_expr + ' + ' + dummy_name | |
print('Regression expression of the comprehensive model=\n' + comprehensive_model_expr) | |
#Build and fit the comprehensive model | |
comprehensive_model = smf.ols(formula=comprehensive_model_expr, data=df_panel_with_dummies) | |
comprehensive_model_results = comprehensive_model.fit() | |
print('===============================================================================') | |
print('============================= Comprehensive Model =============================') | |
print(comprehensive_model_results.summary()) | |
#Formulate model D by adding CO2_PCAP_GWTH_PCNT to model A | |
X_var_names = ['GCF_GWTH_PCNT', 'CO2_PCAP_GWTH_PCNT'] | |
fe_model_d_expr = y_var_name + ' ~ ' | |
i = 0 | |
for X_var_name in X_var_names: | |
if i > 0: | |
fe_model_d_expr = fe_model_d_expr + ' + ' + X_var_name | |
else: | |
fe_model_d_expr = fe_model_d_expr + X_var_name | |
i = i + 1 | |
for dummy_name in unit_names[:-1]: | |
fe_model_d_expr = fe_model_d_expr + ' + ' + dummy_name | |
print('Regression expression of model D=\n' + fe_model_d_expr) | |
#Build and train FE model D | |
fe_model_d = smf.ols(formula=fe_model_d_expr, data=df_panel_with_dummies) | |
fe_model_d_results = fe_model_d.fit() | |
print('===============================================================================') | |
print('=========================== Fixed Effects Model D =============================') | |
print(fe_model_d_results.summary()) | |
#Perform the F-test on models A and D | |
k_a = len(fe_model_a_results.params) | |
k_d = len(fe_model_d_results.params) | |
N = len(df_panel_with_dummies) | |
rss_a = fe_model_a_results.ssr | |
rss_d = fe_model_d_results.ssr | |
f_statistic = ((rss_a-rss_d)/(k_d-k_a))/(rss_d/(N-k_d)) | |
print('F-statistic='+str(f_statistic)) | |
alpha=0.05 | |
f_critical_value=st.f.ppf((1.0-alpha), (k_d-k_a), (N-k_d)) | |
print('F test critical value at alpha of 0.05='+str(f_critical_value)) | |
#Perform the LR test on models A and D | |
log_ll_a = fe_model_a_results.llf | |
log_ll_d = fe_model_d_results.llf | |
lrt_statistic = -2*(log_ll_a - log_ll_d) | |
print('LRT-statistic='+str(lrt_statistic)) | |
alpha=0.05 | |
chi2_critical_value=st.chi2.ppf((1.0-alpha), (k_d-k_a)) | |
print('Chi-squared critical value at alpha of 0.05='+str(chi2_critical_value)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment