John Clements jkclem

## simulation_ttests.py
# for the paired t-test for differnce of means
from scipy.stats import ttest_rel

# save and printprint the results of the test on the Ridge estimates
lasso_results = ttest_rel(unbiased_sigma_estimates, lasso_sigma_estimates)
print(f'Test Statistic for the Paired t-test between the True Model and LASSO: {round(lasso_results[0], 4)}')
print(f'p-value for the Paired t-test between the True Model and LASSO: {round(lasso_results[1], 4)}')
print()
# save and printprint the results of the test on the Ridge estimates
ridge_results = ttest_rel(unbiased_sigma_estimates, ridge_sigma_estimates)

## simulation_study2.py
# suppresses warnings from sklearn
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

# import LassoCV
from sklearn.linear_model import LassoCV
# import RidgeCV
from sklearn.linear_model import RidgeCV

## simulation_study1.py
# for linear algebra and random numbers
import numpy as np
# for linear regression
import statsmodels.api as sm
# for visualization
import matplotlib.pyplot as plt
# for generating combinations of explanatory variables for model selection based on AIC
from itertools import combinations

# set a random seed for reproducibility

## best_information_criterion_selection.py
def best_information_criterion_selection(y, X, criterion='AIC'):
    '''
    This function takes in a column numpy array (y) and design matrix (X) (with the first column as all 1s for
    the intercept) which is also a numpy array, and returns the OLS model with the lowest Information
    Criterion. The default criterion is AIC; and the other option is BIC.
    '''

    # check inputs are valid
    assert y.shape[0] == X.shape[0], 'The number of rows in y and X do not match!'
    assert (criterion == 'AIC') or (criterion == 'BIC'), 'Valid criterions are AIC and BIC!'

## mcmc_demo3.py
from statsmodels.discrete.discrete_model import Logit

# add an intercept since statsmodels does not
my_data['Intercept'] = 1

# fit the logistic regression model using MLE
mle_mod = Logit(my_data[target], my_data[['Intercept'] + vars_of_interest])
mle_mod_fit = mle_mod.fit(disp=False)

# print the summary

## mcmc_demo2.py
plt.figure(figsize=(12, 5), dpi= 80, facecolor='w', edgecolor='k')

plt.subplot(1, 2, 1)
plt.plot(mcmc_log_mod.raw_beta_distr[0], mcmc_log_mod.raw_beta_distr[1])
plt.title('Simulated Raw Joint Distribution of the Coefficients', fontsize=12)
plt.xlabel('Intercept', fontsize=10)
plt.ylabel('Coefficient of Price Percentile', fontsize=10)

plt.subplot(1, 2, 2)
plt.plot(mcmc_log_mod.beta_distr[0], mcmc_log_mod.beta_distr[1])

## mcmc_demo1.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# load in field goal data
all_data = pd.read_csv('candy-data.csv')

# list of independent variables in the model
vars_of_interest = ['pricepercent']
# name of dependent variable

## mcmc_demo0.py
class mcmc_logistic_reg:

    import numpy as np

    def __init__self(self):
        self.raw_beta_distr = np.empty(1)
        self.beta_distr = np.empty(1)
        self.beta_hat = np.empty(1)
        self.cred_ints = np.empty(1)


## Daily Monte Carlo Simulation for Stock Price Prediction Intervals.ipynb

      
              1 file
            
          
              10 forks
            
          
              0 comments
            
          
              25 stars
            
          
                jkclem
                / Daily Monte Carlo Simulation for Stock Price Prediction Intervals.ipynb
            
            
              Last active
              July 7, 2024 13:02
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Timing Sorting and Search Algorithms.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                jkclem
                / Timing Sorting and Search Algorithms.ipynb
            
            
              Last active
              August 19, 2019 06:57
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	# for the paired t-test for differnce of means
	from scipy.stats import ttest_rel

	# save and printprint the results of the test on the Ridge estimates
	lasso_results = ttest_rel(unbiased_sigma_estimates, lasso_sigma_estimates)
	print(f'Test Statistic for the Paired t-test between the True Model and LASSO: {round(lasso_results[0], 4)}')
	print(f'p-value for the Paired t-test between the True Model and LASSO: {round(lasso_results[1], 4)}')
	print()
	# save and printprint the results of the test on the Ridge estimates
	ridge_results = ttest_rel(unbiased_sigma_estimates, ridge_sigma_estimates)
	# suppresses warnings from sklearn
	def warn(args, *kwargs):
	pass
	import warnings
	warnings.warn = warn

	# import LassoCV
	from sklearn.linear_model import LassoCV
	# import RidgeCV
	from sklearn.linear_model import RidgeCV
	# for linear algebra and random numbers
	import numpy as np
	# for linear regression
	import statsmodels.api as sm
	# for visualization
	import matplotlib.pyplot as plt
	# for generating combinations of explanatory variables for model selection based on AIC
	from itertools import combinations

	# set a random seed for reproducibility
	def best_information_criterion_selection(y, X, criterion='AIC'):
	'''
	This function takes in a column numpy array (y) and design matrix (X) (with the first column as all 1s for
	the intercept) which is also a numpy array, and returns the OLS model with the lowest Information
	Criterion. The default criterion is AIC; and the other option is BIC.
	'''

	# check inputs are valid
	assert y.shape[0] == X.shape[0], 'The number of rows in y and X do not match!'
	assert (criterion == 'AIC') or (criterion == 'BIC'), 'Valid criterions are AIC and BIC!'
	from statsmodels.discrete.discrete_model import Logit

	# add an intercept since statsmodels does not
	my_data['Intercept'] = 1

	# fit the logistic regression model using MLE
	mle_mod = Logit(my_data[target], my_data[['Intercept'] + vars_of_interest])
	mle_mod_fit = mle_mod.fit(disp=False)

	# print the summary
	plt.figure(figsize=(12, 5), dpi= 80, facecolor='w', edgecolor='k')

	plt.subplot(1, 2, 1)
	plt.plot(mcmc_log_mod.raw_beta_distr[0], mcmc_log_mod.raw_beta_distr[1])
	plt.title('Simulated Raw Joint Distribution of the Coefficients', fontsize=12)
	plt.xlabel('Intercept', fontsize=10)
	plt.ylabel('Coefficient of Price Percentile', fontsize=10)

	plt.subplot(1, 2, 2)
	plt.plot(mcmc_log_mod.beta_distr[0], mcmc_log_mod.beta_distr[1])
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt

	# load in field goal data
	all_data = pd.read_csv('candy-data.csv')

	# list of independent variables in the model
	vars_of_interest = ['pricepercent']
	# name of dependent variable
	class mcmc_logistic_reg:

	import numpy as np

	def __init__self(self):
	self.raw_beta_distr = np.empty(1)
	self.beta_distr = np.empty(1)
	self.beta_hat = np.empty(1)
	self.cred_ints = np.empty(1)