marheiska/Event Study

## Event Study
import pandas_datareader as pdr
import pandas as pd
import numpy as np
from sklearn import linear_model
import scipy.stats as st

# Collect Data
data = pdr.DataReader(["TSLA","^GSPC"], 'yahoo','29-06-2010','01-01-2018')
data = data.drop(['High','Low','Open','Volume','Adj Close'], axis=1)
returns = data.pct_change(1) * 100
returns.to_excel('Data.xlsx')

# Only Close
close = pdr.DataReader(["TSLA","^GSPC"], 'yahoo','29-06-2010','01-01-2018')
close = close.drop(['High','Low','Open','Volume','Adj Close'], axis=1)
close.to_excel('Close.xlsx')

# Shape Data
df = pd.read_excel('Data.xlsx', names=['Date','Tesla','SP500']).set_index('Date')[3:]
y_df = df['SP500'].values.reshape(-1, 1)

# Make a list out of column names
stock_list = df.columns.tolist()

# Create an empty list to store betas
betas = []

# Loop over the list of stock tickers
# Do a simple CAPM regression for that stock
# Store the beta in the empty betas list
for x in stock_list:
    x_df = df[x].values.reshape(-1, 1)
    reg = linear_model.LinearRegression()
    betas.append(reg.fit(x_df, y_df).coef_)

# Convert the list to a Numpy Array
beta_np = np.array(betas)

# Expected Returns via Beta
# Need Numpy Array to do Calculations!
sp500array = df['SP500'].values
expected_returns = np.outer(sp500array, beta_np)
expected_returns = pd.DataFrame(expected_returns, index=df.index)
expected_returns.columns = stock_list

# Abnormal Returns
abnormal_returns = (df - expected_returns).drop('SP500', axis=1)

###### If only ONE company is picked with events, run this part..

# Retrieve list with Event Dates
events = pd.read_excel('Events.xlsx')

# Copy Rows (based on events)
for x in events.index:
    abnormal_returns[x + 1] = abnormal_returns['Tesla']

# Drop the row used to duplicate the data
abnormal_returns = abnormal_returns.drop('Tesla', axis=1)

###### ..until here

# Retrieve list with Event Dates
events = pd.read_excel('Events.xlsx')

# Set format correctly
eventlist = events['Events'].dt.date

# Create a Dictionary involving the Events
dictionary = dict(zip(events.index + 1, eventlist))

# Calculate Abnormal Returns around an Event Window
abnormal_returns2 = abnormal_returns.reset_index()

# Set Date as Datetime so both dates are exactly comparable
abnormal_returns2['Date'] = abnormal_returns2['Date'].dt.date

# Empty Dictionary (check that it isn't brackets!)
new_set = {}

# Create for loop for Abnormal Returns around Event Windows [-5,+5]
for number, eventdate in dictionary.items():
    # find specific event row, look where Date is equal to event_date
    row = abnormal_returns2.loc[abnormal_returns2['Date'] == eventdate]
    # get index of row
    index = row.index[0]
    # select 5 plus and 5 minus around that row
    my_set = abnormal_returns2.loc[(index - 5):(index + 5), number].reset_index(drop=True)
    # add to new set
    new_set[number] = my_set

ev = pd.DataFrame(new_set)
ev.index = ev.index - 5

# Calculate the Mean and Standard Deviation of the AAR
mean_AAR = ev.mean(axis = 1)
std_AAR = ev.std(axis = 1)

# Put everything in Dataframes
stats = pd.DataFrame(mean_AAR, columns=['Mean AAR'])
stats['STD AAR'] = std_AAR
stats['T-Test'] = mean_AAR / std_AAR
stats['P-Value'] = st.norm.cdf(stats['T-Test'])

# Display is a great method to show multiple outputs at once
display(stats)

# Calculate the Mean and Standard Deviation of the CAR [-5, 0]
Mean_CAR = np.mean(ev[0:6].sum(axis = 1))
Std_CAR = np.std(ev[0:6].sum(axis = 1))
car50test = Mean_CAR / Std_CAR
print("T-Stat of CAR [-5, 0] is",round(car50test,3))

# Calculate the Mean and Standard Deviation of the CAR [-1, +1]
Mean_CAR11 = np.mean(ev[4:7].sum(axis = 1))
Std_CAR11 = np.std(ev[4:7].sum(axis = 1))
car11test = Mean_CAR11 / Std_CAR11
print("T-Stat of CAR [-1, +1] is",round(car11test,3))

# Calculate the Mean and Standard Deviation of the CAR [0, +5]
Mean_CAR = np.mean(ev[5:11].sum(axis = 1))
Std_CAR = np.std(ev[5:11].sum(axis = 1))
car05test = Mean_CAR / Std_CAR
print("T-Stat of CAR [0, +5] is",round(car05test,3))

# Tests
close = pd.read_excel('Close.xlsx', names=["Date", "Tesla", "SP500"])[2:].set_index('Date')
print('Normality Test:', st.normaltest(close['Tesla']))
print('Jarque Bera Test:', st.jarque_bera(close['Tesla']))
print('Skewness is:', st.skew(close['Tesla']))
print('Kurtosis is:', st.kurtosis(close['Tesla']))
	import pandas_datareader as pdr
	import pandas as pd
	import numpy as np
	from sklearn import linear_model
	import scipy.stats as st

	# Collect Data
	data = pdr.DataReader(["TSLA","^GSPC"], 'yahoo','29-06-2010','01-01-2018')
	data = data.drop(['High','Low','Open','Volume','Adj Close'], axis=1)
	returns = data.pct_change(1) * 100
	returns.to_excel('Data.xlsx')

	# Only Close
	close = pdr.DataReader(["TSLA","^GSPC"], 'yahoo','29-06-2010','01-01-2018')
	close = close.drop(['High','Low','Open','Volume','Adj Close'], axis=1)
	close.to_excel('Close.xlsx')

	# Shape Data
	df = pd.read_excel('Data.xlsx', names=['Date','Tesla','SP500']).set_index('Date')[3:]
	y_df = df['SP500'].values.reshape(-1, 1)

	# Make a list out of column names
	stock_list = df.columns.tolist()

	# Create an empty list to store betas
	betas = []

	# Loop over the list of stock tickers
	# Do a simple CAPM regression for that stock
	# Store the beta in the empty betas list
	for x in stock_list:
	x_df = df[x].values.reshape(-1, 1)
	reg = linear_model.LinearRegression()
	betas.append(reg.fit(x_df, y_df).coef_)

	# Convert the list to a Numpy Array
	beta_np = np.array(betas)

	# Expected Returns via Beta
	# Need Numpy Array to do Calculations!
	sp500array = df['SP500'].values
	expected_returns = np.outer(sp500array, beta_np)
	expected_returns = pd.DataFrame(expected_returns, index=df.index)
	expected_returns.columns = stock_list

	# Abnormal Returns
	abnormal_returns = (df - expected_returns).drop('SP500', axis=1)

	###### If only ONE company is picked with events, run this part..

	# Retrieve list with Event Dates
	events = pd.read_excel('Events.xlsx')

	# Copy Rows (based on events)
	for x in events.index:
	abnormal_returns[x + 1] = abnormal_returns['Tesla']

	# Drop the row used to duplicate the data
	abnormal_returns = abnormal_returns.drop('Tesla', axis=1)

	###### ..until here

	# Retrieve list with Event Dates
	events = pd.read_excel('Events.xlsx')

	# Set format correctly
	eventlist = events['Events'].dt.date

	# Create a Dictionary involving the Events
	dictionary = dict(zip(events.index + 1, eventlist))

	# Calculate Abnormal Returns around an Event Window
	abnormal_returns2 = abnormal_returns.reset_index()

	# Set Date as Datetime so both dates are exactly comparable
	abnormal_returns2['Date'] = abnormal_returns2['Date'].dt.date

	# Empty Dictionary (check that it isn't brackets!)
	new_set = {}

	# Create for loop for Abnormal Returns around Event Windows [-5,+5]
	for number, eventdate in dictionary.items():
	# find specific event row, look where Date is equal to event_date
	row = abnormal_returns2.loc[abnormal_returns2['Date'] == eventdate]
	# get index of row
	index = row.index[0]
	# select 5 plus and 5 minus around that row
	my_set = abnormal_returns2.loc[(index - 5):(index + 5), number].reset_index(drop=True)
	# add to new set
	new_set[number] = my_set

	ev = pd.DataFrame(new_set)
	ev.index = ev.index - 5

	# Calculate the Mean and Standard Deviation of the AAR
	mean_AAR = ev.mean(axis = 1)
	std_AAR = ev.std(axis = 1)

	# Put everything in Dataframes
	stats = pd.DataFrame(mean_AAR, columns=['Mean AAR'])
	stats['STD AAR'] = std_AAR
	stats['T-Test'] = mean_AAR / std_AAR
	stats['P-Value'] = st.norm.cdf(stats['T-Test'])

	# Display is a great method to show multiple outputs at once
	display(stats)

	# Calculate the Mean and Standard Deviation of the CAR [-5, 0]
	Mean_CAR = np.mean(ev[0:6].sum(axis = 1))
	Std_CAR = np.std(ev[0:6].sum(axis = 1))
	car50test = Mean_CAR / Std_CAR
	print("T-Stat of CAR [-5, 0] is",round(car50test,3))

	# Calculate the Mean and Standard Deviation of the CAR [-1, +1]
	Mean_CAR11 = np.mean(ev[4:7].sum(axis = 1))
	Std_CAR11 = np.std(ev[4:7].sum(axis = 1))
	car11test = Mean_CAR11 / Std_CAR11
	print("T-Stat of CAR [-1, +1] is",round(car11test,3))

	# Calculate the Mean and Standard Deviation of the CAR [0, +5]
	Mean_CAR = np.mean(ev[5:11].sum(axis = 1))
	Std_CAR = np.std(ev[5:11].sum(axis = 1))
	car05test = Mean_CAR / Std_CAR
	print("T-Stat of CAR [0, +5] is",round(car05test,3))

	# Tests
	close = pd.read_excel('Close.xlsx', names=["Date", "Tesla", "SP500"])[2:].set_index('Date')
	print('Normality Test:', st.normaltest(close['Tesla']))
	print('Jarque Bera Test:', st.jarque_bera(close['Tesla']))
	print('Skewness is:', st.skew(close['Tesla']))
	print('Kurtosis is:', st.kurtosis(close['Tesla']))