Skip to content

Instantly share code, notes, and snippets.

Avatar
💭
Up to my ears in regression modeling

sachinsdate

💭
Up to my ears in regression modeling
View GitHub Profile
@sachinsdate
sachinsdate / white_hc_matrix.py
Created Sep 25, 2022
A comparison of heteroskedasticity consistent estimators
View white_hc_matrix.py
import pandas as pd
import statsmodels.formula.api as smf
from patsy import dmatrices
from matplotlib import pyplot as plt
#Load the US Census Bureau data into a Dataframe
df = pd.read_csv('us_census_bureau_acs_2015_2019_subset.csv', header=0)
#Construct the model's equation in Patsy syntax. Statsmodels will automatically add the intercept and so we don't explicitly specify it in the model's equation
View proxy_variables.py
import pandas as pd
import statsmodels.formula.api as smf
#Load the US Census Bureau data into a Dataframe
df = pd.read_csv('us_census_bureau_acs_2015_2019_subset.csv', header=0)
#Construct the model's equation in Patsy syntax. Statsmodels will automatically add the intercept and so we don't explicitly specify it in the model's equation
reg_expr = 'Percent_Households_Below_Poverty_Level ~ Median_Age + Homeowner_Vacancy_Rate + Percent_Pop_25_And_Over_With_College_Or_Higher_Educ'
#Build and train the model and print the training summary
@sachinsdate
sachinsdate / instrumental_variables_regression.py
Last active Aug 30, 2022
A tutorial on instrumental variables regression using the IV2SLS class of statsmodels
View instrumental_variables_regression.py
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from statsmodels.api import add_constant
from statsmodels.sandbox.regression.gmm import IV2SLS
#Load the Panel Study of Income Dynamics (PSID) into a Dataframe
df = pd.read_csv('PSID1976.csv', header=0)
@sachinsdate
sachinsdate / us_census_bureau_acs_2015_2019_subset.csv
Last active Aug 15, 2022
A subset of the 2015–2019 American Community Survey (ACS) 5-Year Estimates conducted by the US Census Bureau used under the following terms of use: https://www.census.gov/data/developers/about/terms-of-service.html
View us_census_bureau_acs_2015_2019_subset.csv
County Percent_Households_Below_Poverty_Level Median_Age Homeowner_Vacancy_Rate Percent_Pop_25_And_Over_With_College_Or_Higher_Educ
Autauga, Alabama 14.7 38.2 1.4 26.6
Baldwin, Alabama 10.5 43 3.3 31.9
Barbour, Alabama 27.5 40.4 3.8 11.6
Bibb, Alabama 18.4 40.9 1.5 10.4
Blount, Alabama 14.2 40.7 0.7 13.1
Bullock, Alabama 28.2 40.2 0.2 12.1
Butler, Alabama 20.5 40.8 3.7 16.1
Calhoun, Alabama 18 39.6 2.1 18.5
Chambers, Alabama 18.1 42 2.7 13.3
View quantile_regression.py
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from patsy import dmatrices
from matplotlib import pyplot as plt
#Import the 7-variable subset of the automobiles dataset into a DataFrame
df = pd.read_csv('automobiles_dataset_subset_uciml.csv', header=0)
View uciml_portuguese_students_math_performance_subset.csv
G1 failures schoolsup famsup studytime goout sex
5 0 1 0 2 4 1
5 0 0 1 2 3 1
7 3 1 0 2 2 1
15 0 0 1 3 2 1
6 0 0 1 2 2 1
15 0 0 1 2 2 0
12 0 0 0 2 4 0
6 0 1 1 2 4 1
16 0 0 1 2 2 0
@sachinsdate
sachinsdate / unemployment_rate_us_fred.csv
Created Jun 21, 2022
US unemployment rate before and after the Great Recession of 2008-09. Source: https://fred.stlouisfed.org/series/UNRATE
View unemployment_rate_us_fred.csv
DATE Time_Period UNRATE Epoch
01-01-02 1 5.7 0
01-02-02 2 5.7 0
01-03-02 3 5.7 0
01-04-02 4 5.9 0
01-05-02 5 5.8 0
01-06-02 6 5.8 0
01-07-02 7 5.8 0
01-08-02 8 5.7 0
01-09-02 9 5.7 0
@sachinsdate
sachinsdate / difference_in_differences_regression.py
Created Jun 17, 2022
An illustration of the use of the Difference-In-Differences regression model to estimate the effect of hurricanes on house prices
View difference_in_differences_regression.py
import pandas as pd
from patsy import dmatrices
import statsmodels.api as sm
#Load the data set into a Pandas Dataframe
df = pd.read_csv('us_fred_coastal_us_states_avg_hpi_before_after_2005.csv', header=0)
#Print it
print(df)
@sachinsdate
sachinsdate / dummy_variables_regression.py
Created Jun 13, 2022
Source code illustrating three different uses of dummy variables in a regression model.
View dummy_variables_regression.py
import pandas as pd
import statsmodels.formula.api as smf
from patsy import dmatrices
import scipy.stats as st
from matplotlib import pyplot as plt
#Import the 7-variable subset of the automobiles dataset into a DataFrame
df = pd.read_csv('automobiles_dataset_subset_uciml.csv', header=0)
#############################################################################################
@sachinsdate
sachinsdate / us_fred_coastal_us_states_avg_hpi_before_after_2005.csv
Last active Jun 13, 2022
Dataset of state-wise house price inflation before and after the 2005 atlantic hurricane season. Data source: https://fred.stlouisfed.org/
View us_fred_coastal_us_states_avg_hpi_before_after_2005.csv
STATE HPI_CHG Time_Period Disaster_Affected NUM_DISASTERS NUM_IND_ASSIST
GASTHPI_CHG 0.014008563 0 0 1 0
NCSTHPI_CHG 0.014220629 0 0 3 0
TXSTHPI_CHG 0.010191721 0 1 5 22
MASTHPI_CHG 0.027536563 0 0 4 9
ALSTHPI_CHG 0.017585072 0 1 4 14
MSSTHPI_CHG 0.013252413 0 1 3 49
SCSTHPI_CHG 0.017988328 0 0 1 0
NHSTHPI_CHG 0.028513272 0 0 5 6
LASTHPI_CHG 0.015574159 0 1 5 55