sachinsdate

## system_of_regression_equations.py
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
from patsy import dmatrices
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sb


#Create a list of the assets whose capital asset pricing models will make up the the

## 90day_RAR_on_assets.csv
Date,RAR_Energy,RAR_Metals,RAR_Auto,RAR_Technology,RAR_Chevron,RAR_Halliburton,RAR_Alcoa,RAR_Nucor,RAR_USSteel,RAR_Ford,RAR_Tesla,RAR_Google,RAR_Microsoft
2019-05-10,6.5961545570058915,-0.382100259291267,16.980423096067693,20.144324542716525,7.838687140506143,-9.476220040520879,-6.9431669207317,6.42141943672513,-17.767082658022694,29.022405063291146,-25.13538238802106,8.952849356982366,23.35190786030733
2019-05-13,6.077872310603407,-0.5278551837630419,16.595338809034903,21.905609130918425,8.573040434742577,-11.501168785151826,-8.83865472560975,4.248187263317492,-22.706320346320346,27.202982005141386,-26.78069516580104,9.053695816906558,24.28270581842493
2019-05-14,3.459818903497883,-3.7857422081352388,14.098548786527978,18.566912229335955,7.393579678758356,-12.890760028149195,-14.120176429075507,0.13789141713202824,-28.08288102261554,24.362673267326734,-29.245256175442353,2.2745797648289487,19.99829490827037
2019-05-15,2.550591352362299,-4.029390347163423,11.923854856180043,18.67323611276973,6.390994854783631

## risk_adjusted_return_ds.py
import pandas as pd


df_asset_prices = pd.read_csv('asset_prices.csv', header=0, parse_dates=['Date'], index_col=0)
df_asset_prices_shifted89 = df_asset_prices.shift(89).dropna()
df_asset_prices_trunc89 = df_asset_prices[89:]
df_asset_prices_90day_return = (df_asset_prices_trunc89-df_asset_prices_shifted89)/df_asset_prices_shifted89*100

df_DTB3 = pd.read_csv('DTB3.csv', header=0, parse_dates=['DATE'], index_col=0)
df_DTB3 = df_DTB3.dropna()

## 90day_RAR_on_assets.csv
Date,RAR_Energy,RAR_Metals,RAR_Auto,RAR_Technology,RAR_Chevron,RAR_Halliburton,RAR_Alcoa,RAR_Nucor,RAR_Ford,RAR_Tesla,RAR_Google,RAR_Microsoft
2019-05-10,6.5961545570058915,-0.382100259291267,16.980423096067693,20.144324542716525,7.838687140506143,-9.476220040520879,-6.9431669207317,6.421419436725146,29.022405063291146,-25.13538238802106,8.952849356982366,23.35190786030733
2019-05-13,6.077872310603407,-0.5278551837630419,16.595338809034903,21.905609130918425,8.573040434742577,-11.501168785151815,-8.83865472560975,4.248187263317492,27.202982005141386,-26.78069516580104,9.053695816906558,24.28270581842493
2019-05-14,3.459818903497883,-3.7857422081352388,14.098548786527978,18.566912229335955,7.393579678758356,-12.890760028149195,-14.120176429075507,0.13789141713202824,24.362673267326734,-29.245256175442353,2.2745797648289487,19.99829490827037
2019-05-15,2.550591352362299,-4.029390347163423,11.923854856180043,18.67323611276973,6.3909948547836315,-13.617494795281056,-14.408592540464461,0.30434117238267255,22.55984

## system_of_regression_equations.py
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
from patsy import dmatrices
from matplotlib import pyplot as plt
import numpy as np

asset_names = ['Chevron', 'Halliburton', 'Alcoa', 'Nucor', 'Ford', 'Tesla', 'Google', 'Microsoft']
#M = number of equations
M = len(asset_names)

## gls.py
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
from patsy import dmatrices
from matplotlib import pyplot as plt
import numpy as np


#Load the US Census Bureau data into a Dataframe
df = pd.read_csv('us_census_bureau_acs_2015_2019_subset.csv', header=0)

## white_hc_matrix.py
import pandas as pd
import statsmodels.formula.api as smf
from patsy import dmatrices
from matplotlib import pyplot as plt


#Load the US Census Bureau data into a Dataframe
df = pd.read_csv('us_census_bureau_acs_2015_2019_subset.csv', header=0)

#Construct the model's equation in Patsy syntax. Statsmodels will automatically add the intercept and so we don't explicitly specify it in the model's equation

## proxy_variables.py
import pandas as pd
import statsmodels.formula.api as smf

#Load the US Census Bureau data into a Dataframe
df = pd.read_csv('us_census_bureau_acs_2015_2019_subset.csv', header=0)

#Construct the model's equation in Patsy syntax. Statsmodels will automatically add the intercept and so we don't explicitly specify it in the model's equation
reg_expr = 'Percent_Households_Below_Poverty_Level ~ Median_Age + Homeowner_Vacancy_Rate + Percent_Pop_25_And_Over_With_College_Or_Higher_Educ'

#Build and train the model and print the training summary

## instrumental_variables_regression.py
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from statsmodels.api import add_constant
from statsmodels.sandbox.regression.gmm import IV2SLS


#Load the Panel Study of Income Dynamics (PSID) into a Dataframe
df = pd.read_csv('PSID1976.csv', header=0)

## us_census_bureau_acs_2015_2019_subset.csv

          
            County
            Percent_Households_Below_Poverty_Level
            Median_Age
            Homeowner_Vacancy_Rate
            Percent_Pop_25_And_Over_With_College_Or_Higher_Educ

            
              Autauga, Alabama
              14.7
              38.2
              1.4
              26.6

            
              Baldwin, Alabama
              10.5
              43
              3.3
              31.9

            
              Barbour, Alabama
              27.5
              40.4
              3.8
              11.6

            
              Bibb, Alabama
              18.4
              40.9
              1.5
              10.4

            
              Blount, Alabama
              14.2
              40.7
              0.7
              13.1

            
              Bullock, Alabama
              28.2
              40.2
              0.2
              12.1

            
              Butler, Alabama
              20.5
              40.8
              3.7
              16.1

            
              Calhoun, Alabama
              18
              39.6
              2.1
              18.5

            
              Chambers, Alabama
              18.1
              42
              2.7
              13.3
	import pandas as pd
	import statsmodels.formula.api as smf
	import statsmodels.api as sm
	from patsy import dmatrices
	from matplotlib import pyplot as plt
	import numpy as np
	import seaborn as sb


	#Create a list of the assets whose capital asset pricing models will make up the the
	Date,RAR_Energy,RAR_Metals,RAR_Auto,RAR_Technology,RAR_Chevron,RAR_Halliburton,RAR_Alcoa,RAR_Nucor,RAR_USSteel,RAR_Ford,RAR_Tesla,RAR_Google,RAR_Microsoft
	2019-05-10,6.5961545570058915,-0.382100259291267,16.980423096067693,20.144324542716525,7.838687140506143,-9.476220040520879,-6.9431669207317,6.42141943672513,-17.767082658022694,29.022405063291146,-25.13538238802106,8.952849356982366,23.35190786030733
	2019-05-13,6.077872310603407,-0.5278551837630419,16.595338809034903,21.905609130918425,8.573040434742577,-11.501168785151826,-8.83865472560975,4.248187263317492,-22.706320346320346,27.202982005141386,-26.78069516580104,9.053695816906558,24.28270581842493
	2019-05-14,3.459818903497883,-3.7857422081352388,14.098548786527978,18.566912229335955,7.393579678758356,-12.890760028149195,-14.120176429075507,0.13789141713202824,-28.08288102261554,24.362673267326734,-29.245256175442353,2.2745797648289487,19.99829490827037
	2019-05-15,2.550591352362299,-4.029390347163423,11.923854856180043,18.67323611276973,6.390994854783631
	import pandas as pd


	df_asset_prices = pd.read_csv('asset_prices.csv', header=0, parse_dates=['Date'], index_col=0)
	df_asset_prices_shifted89 = df_asset_prices.shift(89).dropna()
	df_asset_prices_trunc89 = df_asset_prices[89:]
	df_asset_prices_90day_return = (df_asset_prices_trunc89-df_asset_prices_shifted89)/df_asset_prices_shifted89*100

	df_DTB3 = pd.read_csv('DTB3.csv', header=0, parse_dates=['DATE'], index_col=0)
	df_DTB3 = df_DTB3.dropna()
	import pandas as pd
	import statsmodels.formula.api as smf

	#Load the US Census Bureau data into a Dataframe
	df = pd.read_csv('us_census_bureau_acs_2015_2019_subset.csv', header=0)

	#Construct the model's equation in Patsy syntax. Statsmodels will automatically add the intercept and so we don't explicitly specify it in the model's equation
	reg_expr = 'Percent_Households_Below_Poverty_Level ~ Median_Age + Homeowner_Vacancy_Rate + Percent_Pop_25_And_Over_With_College_Or_Higher_Educ'

	#Build and train the model and print the training summary
County	Percent_Households_Below_Poverty_Level	Median_Age	Homeowner_Vacancy_Rate	Percent_Pop_25_And_Over_With_College_Or_Higher_Educ
Autauga, Alabama	14.7	38.2	1.4	26.6
Baldwin, Alabama	10.5	43	3.3	31.9
Barbour, Alabama	27.5	40.4	3.8	11.6
Bibb, Alabama	18.4	40.9	1.5	10.4
Blount, Alabama	14.2	40.7	0.7	13.1
Bullock, Alabama	28.2	40.2	0.2	12.1
Butler, Alabama	20.5	40.8	3.7	16.1
Calhoun, Alabama	18	39.6	2.1	18.5
Chambers, Alabama	18.1	42	2.7	13.3