Last active
December 15, 2023 13:52
-
-
Save sachinsdate/20620e1bd3d38d7476d894cffb4c8b6b to your computer and use it in GitHub Desktop.
A Pooled OLS regression model for panel data sets using Python and statsmodels, alongwith a detailed analysis of its goodness of fit.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import scipy.stats as st | |
import statsmodels.api as sm | |
import statsmodels.graphics.tsaplots as tsap | |
from statsmodels.compat import lzip | |
from statsmodels.stats.diagnostic import het_white | |
from matplotlib import pyplot as plt | |
import seaborn as sns | |
#Load the panel data set of World Bank published development indicators into a Pandas Dataframe | |
df_panel = pd.read_csv('wb_data_panel_2ind_7units_1992_2014.csv', header=0) | |
#Use Seaborn to plot GDP growth over all time periods and across all countries versus gross | |
# capital formation growth: | |
sns.scatterplot(x=df_panel['GCF_GWTH_PCNT'], y=df_panel['GDP_PCAP_GWTH_PCNT'], | |
hue=df_panel['COUNTRY']).set(title= | |
'Y-o-Y % Change in per-capita GDP versus Y-o-Y % Change in Gross capital formation') | |
plt.show() | |
#Define the variables | |
y_var_name = 'GDP_PCAP_GWTH_PCNT' | |
X_var_names = ['GCF_GWTH_PCNT'] | |
#Carve out the pooled Y | |
pooled_y=df_panel[y_var_name] | |
#Carve out the pooled X | |
pooled_X=df_panel[X_var_names] | |
#Add the placeholder for the regression intercept. When the model is fitted, the coefficient of | |
# this variable is the regression model's intercept β_0. | |
pooled_X = sm.add_constant(pooled_X) | |
#Build the OLS model | |
pooled_olsr_model = sm.OLS(endog=pooled_y, exog=pooled_X) | |
#Train the model and fetch the results | |
pooled_olsr_model_results = pooled_olsr_model.fit() | |
#Print the results summary | |
print('===============================================================================') | |
print('================================= Pooled OLS ==================================') | |
print(pooled_olsr_model_results.summary()) | |
#Let's plot the Q-Q plot of the residual errors: | |
sm.qqplot(data=pooled_olsr_model_results.resid, line='45') | |
plt.show() | |
#Print out the mean value of the residual errors | |
print('Mean value of residual errors='+str(pooled_olsr_model_results.resid.mean())) | |
#Plot the residual errors against X to check for heteroskedasticity of residuals | |
fig, ax = plt.subplots() | |
fig.suptitle('Raw residuals of Pooled OLS versus X') | |
plt.ylabel('Residual (y - mu)') | |
plt.xlabel('X='+str(X_var_names[0])) | |
ax.scatter(pooled_X[X_var_names[0]], pooled_olsr_model_results.resid, s=4, c='black', label='Residual Error') | |
plt.show() | |
#Confirm presence of heteroskedasticity of residuals using the White test: | |
keys = ['Lagrange Multiplier statistic:', 'LM test\'s p-value:', | |
'F-statistic:', 'F-test\'s ' 'p-value:'] | |
results = het_white(resid=pooled_olsr_model_results.resid, exog=pooled_X) | |
print('Results of the White test for heteroskedasticity of residual errors ===> ') | |
print(lzip(keys,results)) | |
#plot the residuals against y to see if they are correlated with the response variable | |
fig, ax = plt.subplots() | |
fig.suptitle('Raw residuals of Pooled OLS versus y') | |
plt.ylabel('Residual (y - mu)') | |
plt.xlabel('y') | |
ax.scatter(pooled_y, pooled_olsr_model_results.resid, s=4, c='black', label='Residual Error') | |
plt.show() | |
#Calculate the Pearson's r between residuals and y | |
keys = ['Pearson\'s r:', 'p-value:'] | |
results = st.pearsonr(x=pooled_y, y=pooled_olsr_model_results.resid) | |
print('Results of the Pearson\'s r test of correlation between the residual errors and the ' | |
'response variable y ===>') | |
print(lzip(keys,results)) | |
#Check if the residuals are auto-correlated | |
tsap.plot_acf(x=pooled_olsr_model_results.resid) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment