Skip to content

Instantly share code, notes, and snippets.

View jkclem's full-sized avatar

John Clements jkclem

View GitHub Profile
# for the paired t-test for differnce of means
from scipy.stats import ttest_rel
# save and printprint the results of the test on the Ridge estimates
lasso_results = ttest_rel(unbiased_sigma_estimates, lasso_sigma_estimates)
print(f'Test Statistic for the Paired t-test between the True Model and LASSO: {round(lasso_results[0], 4)}')
print(f'p-value for the Paired t-test between the True Model and LASSO: {round(lasso_results[1], 4)}')
print()
# save and printprint the results of the test on the Ridge estimates
ridge_results = ttest_rel(unbiased_sigma_estimates, ridge_sigma_estimates)
# suppresses warnings from sklearn
def warn(*args, **kwargs):
pass
import warnings
warnings.warn = warn
# import LassoCV
from sklearn.linear_model import LassoCV
# import RidgeCV
from sklearn.linear_model import RidgeCV
# for linear algebra and random numbers
import numpy as np
# for linear regression
import statsmodels.api as sm
# for visualization
import matplotlib.pyplot as plt
# for generating combinations of explanatory variables for model selection based on AIC
from itertools import combinations
# set a random seed for reproducibility
def best_information_criterion_selection(y, X, criterion='AIC'):
'''
This function takes in a column numpy array (y) and design matrix (X) (with the first column as all 1s for
the intercept) which is also a numpy array, and returns the OLS model with the lowest Information
Criterion. The default criterion is AIC; and the other option is BIC.
'''
# check inputs are valid
assert y.shape[0] == X.shape[0], 'The number of rows in y and X do not match!'
assert (criterion == 'AIC') or (criterion == 'BIC'), 'Valid criterions are AIC and BIC!'
from statsmodels.discrete.discrete_model import Logit
# add an intercept since statsmodels does not
my_data['Intercept'] = 1
# fit the logistic regression model using MLE
mle_mod = Logit(my_data[target], my_data[['Intercept'] + vars_of_interest])
mle_mod_fit = mle_mod.fit(disp=False)
# print the summary
plt.figure(figsize=(12, 5), dpi= 80, facecolor='w', edgecolor='k')
plt.subplot(1, 2, 1)
plt.plot(mcmc_log_mod.raw_beta_distr[0], mcmc_log_mod.raw_beta_distr[1])
plt.title('Simulated Raw Joint Distribution of the Coefficients', fontsize=12)
plt.xlabel('Intercept', fontsize=10)
plt.ylabel('Coefficient of Price Percentile', fontsize=10)
plt.subplot(1, 2, 2)
plt.plot(mcmc_log_mod.beta_distr[0], mcmc_log_mod.beta_distr[1])
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# load in field goal data
all_data = pd.read_csv('candy-data.csv')
# list of independent variables in the model
vars_of_interest = ['pricepercent']
# name of dependent variable
class mcmc_logistic_reg:
import numpy as np
def __init__self(self):
self.raw_beta_distr = np.empty(1)
self.beta_distr = np.empty(1)
self.beta_hat = np.empty(1)
self.cred_ints = np.empty(1)
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.