Skip to content

Instantly share code, notes, and snippets.

@jkclem
Created August 8, 2020 21:39
Show Gist options
  • Save jkclem/5964d731144db90128f766584cb6b787 to your computer and use it in GitHub Desktop.
Save jkclem/5964d731144db90128f766584cb6b787 to your computer and use it in GitHub Desktop.
# for linear algebra and random numbers
import numpy as np
# for linear regression
import statsmodels.api as sm
# for visualization
import matplotlib.pyplot as plt
# for generating combinations of explanatory variables for model selection based on AIC
from itertools import combinations
# set a random seed for reproducibility
np.random.seed(123)
# set the number of simulations to run
num_sims = 500
# set n and k and make a n by k+1 matrix of standard normal random numbers
n = 100
k = 6
# set the number of predictors, including the intercept that are significant
k_significant = 3
# create a column vector of zeros, with k + 1 betas, where the first value is the true intercept
beta = np.zeros(shape=(k + 1, 1))
# set the first k_significant betas to be non-zero values, using iid N(3, 1) random variables
beta[:k_significant] = np.random.normal(loc=5.0, scale=1.0, size=(k_significant, 1))
# create an array of zeros to hold our UNBIASED estimates of the standard deviation of our error term
unbiased_sigma_estimates = np.zeros(shape=num_sims)
# create an array of zeros to hold our BIASED estimates of the standard deviation of our error term (from AIC)
aic_sigma_estimates = np.zeros(shape=num_sims)
# create an array of zeros to hold our BIASED estimates of the standard deviation of our error term (from BIC)
bic_sigma_estimates = np.zeros(shape=num_sims)
# perform num_sims simulations
for i in range(num_sims):
###
# This block generates the random sample
###
# create the temporary design n by k + 1 design matrix
temp_X = np.random.normal(loc=0.0, scale=1.0, size=(n, k + 1))
# set the first column of the design matrix to 1.0 to include an intercept
temp_X[:,0] = 1.0
# create temporary y values using the design matrix and beta vector, and add standard normal random noise
temp_y = np.matmul(temp_X, beta) + np.random.normal(loc=0.0, scale=1.0, size=(n, 1))
###
# This block estimates the standard deviation using the True OLS model
###
# fit an OLS linear model to the temporary data
temp_true_mod = sm.OLS(temp_y, temp_X[:,0:k_significant]).fit()
# estimate the standard deviation of the error term
temp_unbiased_sigma_estimate = np.sqrt(np.sum(temp_true_mod.resid **2) /
(n - temp_true_mod.params.shape[0]))
# add the unbiased estimate to the array at the ith slot
unbiased_sigma_estimates[i] = temp_unbiased_sigma_estimate
###
# This block estimates the standard deviation using the OLS model with best subset selection of variables
# using AIC
###
# select the model with the lowest information criterion
temp_best_AIC_mod = best_information_criterion_selection(temp_y, temp_X, criterion='AIC')
# estimate the standard deviation of the error term
temp_aic_sigma_estimate = np.sqrt(np.sum(temp_best_AIC_mod.resid ** 2) / temp_best_AIC_mod.df_resid)
# add the unbiased estimate to the array at the ith slot
aic_sigma_estimates[i] = temp_aic_sigma_estimate
###
# This block estimates the standard deviation using the OLS model with best subset selection of variables
# using BIC
###
# select the model with the lowest information criterion
temp_best_BIC_mod = best_information_criterion_selection(temp_y, temp_X, criterion='BIC')
# estimate the standard deviation of the error term
temp_bic_sigma_estimate = np.sqrt(np.sum(temp_best_BIC_mod.resid ** 2) / temp_best_BIC_mod.df_resid)
# add the unbiased estimate to the array at the ith slot
bic_sigma_estimates[i] = temp_bic_sigma_estimate
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment