Created
March 9, 2018 21:59
-
-
Save memonkey01/cf8d476622ec6e1d88fd7e52f51ad182 to your computer and use it in GitHub Desktop.
Project 4 Masters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import numpy as np | |
import pandas as pd | |
import matplotlib | |
import matplotlib.pyplot as plt | |
from pandas_datareader import data as pdr | |
from datetime import date, timedelta | |
import matplotlib.mlab as mlab | |
from scipy import stats | |
import statsmodels.api as sm | |
matplotlib.style.use('ggplot') | |
def get_index(index): | |
# Setting up the dates in order to download the data1305 | |
today = date.today() | |
day = timedelta(days=1) | |
today2 = today - day | |
enddate = today2.isoformat() | |
years = timedelta(weeks=1305) | |
period = today - years | |
startdate = period.isoformat() | |
# Define the list of indices | |
index = index | |
# Getting the data using the dates specified above | |
#Data is divided in price, returns | |
data = pdr.get_data_yahoo(index, start=startdate, end=enddate) | |
data['returns'] = data['Close'].pct_change() | |
data = data.dropna() | |
prices = data['Close'] | |
returns = data['returns'] | |
return [prices, returns] | |
# Function to create a histogram to visualy check if the data follows a | |
# lognormal distribution | |
def lognorm_check(dataframe): | |
sigma = np.std(np.log(dataframe)) | |
mu = np.mean(np.log(dataframe)) | |
count, bins, ignored = plt.hist( | |
dataframe, 100, normed=True, align='mid', color='blue', label='Histogram of Prices') | |
x = np.linspace(min(bins), max(bins), 10000) | |
pdf = (np.exp(-((np.log(x) - mu)**2) / (2 * sigma**2))) / \ | |
(x * sigma * np.sqrt(2 * np.pi)) | |
plt.plot(x, pdf, linewidth=2, color='r', label='Lognorm Distribution') | |
plt.axis('tight') | |
plt.legend() | |
plt.title('Lognorm Prices') | |
plt.xlabel("Prices") | |
plt.ylabel("Frequency") | |
plt.show() | |
# Function to measure if the returns are normally distributed | |
def normal_check(dataframe): | |
count, bins, ignored = plt.hist( | |
dataframe, 100, normed=True, align='mid', color='blue', label='Histogram Returns') | |
# Calculate measures of central tendency | |
mu = np.mean(dataframe) | |
sigma = np.std(dataframe) | |
# Generate the normal distribution line | |
x = mlab.normpdf(bins, mu, sigma) | |
plt.plot(bins, x, color='r', lw=4) | |
plt.title("Normal Distribution of the returns") | |
plt.xlabel("Returns") | |
plt.ylabel("Frequency") | |
plt.show() | |
# Function to verify if the series follow a normal distribution by the | |
# mean return is zero | |
def normal_test(dataframe): | |
mean = np.mean(dataframe) | |
test = stats.ttest_1samp(dataframe, 0) | |
return test | |
# Funtion to verify if the series follow a lognormal distribution by using | |
# a Kolmogorov test | |
def lognom_test(dataframe): | |
log = np.log(dataframe) | |
mean = np.mean(log) | |
test = stats.ttest_1samp(dataframe, mean) | |
return test | |
# Funtion to verify the degree of deviation of the ideal distribution | |
def lognorm_var(dataframe): | |
sigma = np.std(np.log(dataframe)) | |
mu = np.mean(np.log(dataframe)) | |
count, bins, ignored = plt.hist( | |
dataframe, 100, normed=True, align='mid', color='blue', alpha=0.0) | |
x = np.linspace(min(bins), max(bins), 100) | |
pdf = (np.exp(-((np.log(x) - mu)**2) / (2 * sigma**2))) / \ | |
(x * sigma * np.sqrt(2 * np.pi)) | |
plt.plot(x, pdf, linewidth=2, color='r', label='Ideal Distribution') | |
plt.plot(x, count, linewidth=2, color='b', label='Real Distribution') | |
plt.axis('tight') | |
plt.fill_between(x, pdf, count, color='b', alpha=0.8) | |
plt.legend() | |
plt.title('Lognorm Difference') | |
plt.xlabel("Prices") | |
plt.ylabel("Frequency") | |
plt.show() | |
# Function to simulate a GBM and to find any behavior similar to black monday | |
def gbm(dataframe): | |
lastprice = dataframe[-2:-1].values | |
lastyearprice = dataframe[-253:-252].values | |
m = ((lastprice / lastyearprice) - 1) # expected return | |
logprice = np.log(lastprice) | |
sigma = 0.20 # Volatility | |
T = 1 # Years | |
M = 10000 # Number of simulations | |
i = 252 # Steps of the simulation | |
dt = float(T) / i | |
paths = np.zeros((M, i), np.float64) | |
x = range(0, i, 1) | |
for j in range(0, M, 1): | |
paths[j, 0] = logprice | |
rand = np.random.standard_normal(i + 1) | |
for i in x[1:]: | |
paths[j, i] = paths[j, i - 1] + \ | |
((m - 0.5 * sigma ** 2) * dt + sigma * np.sqrt(dt) * rand[i]) | |
plt.plot(x, np.exp(paths[j])) | |
plt.title('GBM Simulation') | |
plt.show() | |
return np.exp(paths) | |
# Function to check if a return in one step is less or equal to -20% | |
def black_monday(dataframe): | |
dataframe = pd.DataFrame(dataframe) | |
dataframe2 = dataframe.transpose().pct_change().dropna() | |
counter = 0 | |
for s in range(0, len(dataframe2.columns)): | |
for p in range(1, len(dataframe2)): | |
if dataframe2[s][p] <= -0.2: | |
counter += 1 | |
return counter, len(dataframe2.columns) | |
# Function to calulcale the Hurst exponent, so we know if the time series | |
# is persistent | |
def hurst(n): | |
retn = index[1][0:int(n)] | |
yn = retn - np.mean(retn) | |
zn = np.cumsum(yn) | |
Rn = np.max(zn) - np.min(zn) | |
Sn = np.std(retn) | |
En = Rn / Sn | |
return np.log(En) | |
# Funtion to calculate the dimension of the series by OLS method | |
def dimension(returns): | |
y = [hurst(np.size(returns)), | |
hurst(np.size(returns) / 2), | |
hurst(np.size(returns) / 4), | |
hurst(np.size(returns) / 8), | |
hurst(np.size(returns) / 16), | |
hurst(np.size(returns) / 32)] | |
x1 = [np.log(np.size(returns)), | |
np.log(np.size(returns) / 2), | |
np.log(np.size(returns) / 4), | |
np.log(np.size(returns) / 8), | |
np.log(np.size(returns) / 16), | |
np.log(np.size(returns) / 32)] | |
x = sm.add_constant(x1) | |
model = sm.OLS(y, x) | |
results = model.fit() | |
return results.summary() | |
# Funtion to plot the returns | |
def plot_returns(dataframe): | |
plt.plot(range(len(dataframe)), dataframe, color='black') | |
plt.title('Returns') | |
plt.show() | |
# Function to plot the prices | |
def plot_price(dataframe): | |
plt.plot(range(len(dataframe)), dataframe, color='black') | |
plt.fill_between(range(len(dataframe)), dataframe, color='black') | |
plt.title('Price') | |
plt.axis('off') | |
plt.show() | |
# Function to simulate a fractal whitout recursive process | |
def fractal_serie(n): | |
n = n | |
# Initial x-coordinates | |
initx = [0, 1, 2, 3, 4, 5, 6, 7] | |
# Initial y-coordinates | |
inity = [0, 1, 5, 2, 3, 2, 5, 7] | |
sortedY = sorted(inity) | |
initDomain = initx[-1] - initx[0] | |
initRange = sortedY[-1] - sortedY[0] | |
for j in range(n): | |
newx = [] | |
newy = [] | |
for i in range(len(initx) - 1): | |
currentDomain = initx[i + 1] - initx[i] | |
currentRange = inity[i + 1] - inity[i] | |
domainScale = currentDomain / initDomain | |
rangeScale = currentRange / initRange | |
for val in initx: | |
x = (val * domainScale) + initx[i] | |
newx.append(x) | |
for val in inity: | |
y = (val * rangeScale) + inity[i] | |
newy.append(y) | |
initx = newx | |
inity = newy | |
plt.plot(newx, newy, color='black') | |
plt.fill_between(newx, newy, color='black') | |
plt.title('Aproximate Series') | |
plt.axis('off') | |
plt.show() | |
if __name__ == '__main__': | |
index = get_index('^IXIC') | |
lognorm_check(index[0]) | |
normal_check(index[1]) | |
print normal_test(index[1]) | |
print lognom_test(index[0]) | |
lognorm_var(index[0]) | |
trajectories = gbm(index[0]) | |
print black_monday(trajectories) | |
plot_returns(index[1]) | |
plot_price(index[0]) | |
print dimension(index[1]) | |
fractal_serie(3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment