Skip to content

Instantly share code, notes, and snippets.

@memonkey01
Created March 9, 2018 21:59
Show Gist options
  • Save memonkey01/cf8d476622ec6e1d88fd7e52f51ad182 to your computer and use it in GitHub Desktop.
Save memonkey01/cf8d476622ec6e1d88fd7e52f51ad182 to your computer and use it in GitHub Desktop.
Project 4 Masters
from __future__ import division
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from pandas_datareader import data as pdr
from datetime import date, timedelta
import matplotlib.mlab as mlab
from scipy import stats
import statsmodels.api as sm
matplotlib.style.use('ggplot')
def get_index(index):
# Setting up the dates in order to download the data1305
today = date.today()
day = timedelta(days=1)
today2 = today - day
enddate = today2.isoformat()
years = timedelta(weeks=1305)
period = today - years
startdate = period.isoformat()
# Define the list of indices
index = index
# Getting the data using the dates specified above
#Data is divided in price, returns
data = pdr.get_data_yahoo(index, start=startdate, end=enddate)
data['returns'] = data['Close'].pct_change()
data = data.dropna()
prices = data['Close']
returns = data['returns']
return [prices, returns]
# Function to create a histogram to visualy check if the data follows a
# lognormal distribution
def lognorm_check(dataframe):
sigma = np.std(np.log(dataframe))
mu = np.mean(np.log(dataframe))
count, bins, ignored = plt.hist(
dataframe, 100, normed=True, align='mid', color='blue', label='Histogram of Prices')
x = np.linspace(min(bins), max(bins), 10000)
pdf = (np.exp(-((np.log(x) - mu)**2) / (2 * sigma**2))) / \
(x * sigma * np.sqrt(2 * np.pi))
plt.plot(x, pdf, linewidth=2, color='r', label='Lognorm Distribution')
plt.axis('tight')
plt.legend()
plt.title('Lognorm Prices')
plt.xlabel("Prices")
plt.ylabel("Frequency")
plt.show()
# Function to measure if the returns are normally distributed
def normal_check(dataframe):
count, bins, ignored = plt.hist(
dataframe, 100, normed=True, align='mid', color='blue', label='Histogram Returns')
# Calculate measures of central tendency
mu = np.mean(dataframe)
sigma = np.std(dataframe)
# Generate the normal distribution line
x = mlab.normpdf(bins, mu, sigma)
plt.plot(bins, x, color='r', lw=4)
plt.title("Normal Distribution of the returns")
plt.xlabel("Returns")
plt.ylabel("Frequency")
plt.show()
# Function to verify if the series follow a normal distribution by the
# mean return is zero
def normal_test(dataframe):
mean = np.mean(dataframe)
test = stats.ttest_1samp(dataframe, 0)
return test
# Funtion to verify if the series follow a lognormal distribution by using
# a Kolmogorov test
def lognom_test(dataframe):
log = np.log(dataframe)
mean = np.mean(log)
test = stats.ttest_1samp(dataframe, mean)
return test
# Funtion to verify the degree of deviation of the ideal distribution
def lognorm_var(dataframe):
sigma = np.std(np.log(dataframe))
mu = np.mean(np.log(dataframe))
count, bins, ignored = plt.hist(
dataframe, 100, normed=True, align='mid', color='blue', alpha=0.0)
x = np.linspace(min(bins), max(bins), 100)
pdf = (np.exp(-((np.log(x) - mu)**2) / (2 * sigma**2))) / \
(x * sigma * np.sqrt(2 * np.pi))
plt.plot(x, pdf, linewidth=2, color='r', label='Ideal Distribution')
plt.plot(x, count, linewidth=2, color='b', label='Real Distribution')
plt.axis('tight')
plt.fill_between(x, pdf, count, color='b', alpha=0.8)
plt.legend()
plt.title('Lognorm Difference')
plt.xlabel("Prices")
plt.ylabel("Frequency")
plt.show()
# Function to simulate a GBM and to find any behavior similar to black monday
def gbm(dataframe):
lastprice = dataframe[-2:-1].values
lastyearprice = dataframe[-253:-252].values
m = ((lastprice / lastyearprice) - 1) # expected return
logprice = np.log(lastprice)
sigma = 0.20 # Volatility
T = 1 # Years
M = 10000 # Number of simulations
i = 252 # Steps of the simulation
dt = float(T) / i
paths = np.zeros((M, i), np.float64)
x = range(0, i, 1)
for j in range(0, M, 1):
paths[j, 0] = logprice
rand = np.random.standard_normal(i + 1)
for i in x[1:]:
paths[j, i] = paths[j, i - 1] + \
((m - 0.5 * sigma ** 2) * dt + sigma * np.sqrt(dt) * rand[i])
plt.plot(x, np.exp(paths[j]))
plt.title('GBM Simulation')
plt.show()
return np.exp(paths)
# Function to check if a return in one step is less or equal to -20%
def black_monday(dataframe):
dataframe = pd.DataFrame(dataframe)
dataframe2 = dataframe.transpose().pct_change().dropna()
counter = 0
for s in range(0, len(dataframe2.columns)):
for p in range(1, len(dataframe2)):
if dataframe2[s][p] <= -0.2:
counter += 1
return counter, len(dataframe2.columns)
# Function to calulcale the Hurst exponent, so we know if the time series
# is persistent
def hurst(n):
retn = index[1][0:int(n)]
yn = retn - np.mean(retn)
zn = np.cumsum(yn)
Rn = np.max(zn) - np.min(zn)
Sn = np.std(retn)
En = Rn / Sn
return np.log(En)
# Funtion to calculate the dimension of the series by OLS method
def dimension(returns):
y = [hurst(np.size(returns)),
hurst(np.size(returns) / 2),
hurst(np.size(returns) / 4),
hurst(np.size(returns) / 8),
hurst(np.size(returns) / 16),
hurst(np.size(returns) / 32)]
x1 = [np.log(np.size(returns)),
np.log(np.size(returns) / 2),
np.log(np.size(returns) / 4),
np.log(np.size(returns) / 8),
np.log(np.size(returns) / 16),
np.log(np.size(returns) / 32)]
x = sm.add_constant(x1)
model = sm.OLS(y, x)
results = model.fit()
return results.summary()
# Funtion to plot the returns
def plot_returns(dataframe):
plt.plot(range(len(dataframe)), dataframe, color='black')
plt.title('Returns')
plt.show()
# Function to plot the prices
def plot_price(dataframe):
plt.plot(range(len(dataframe)), dataframe, color='black')
plt.fill_between(range(len(dataframe)), dataframe, color='black')
plt.title('Price')
plt.axis('off')
plt.show()
# Function to simulate a fractal whitout recursive process
def fractal_serie(n):
n = n
# Initial x-coordinates
initx = [0, 1, 2, 3, 4, 5, 6, 7]
# Initial y-coordinates
inity = [0, 1, 5, 2, 3, 2, 5, 7]
sortedY = sorted(inity)
initDomain = initx[-1] - initx[0]
initRange = sortedY[-1] - sortedY[0]
for j in range(n):
newx = []
newy = []
for i in range(len(initx) - 1):
currentDomain = initx[i + 1] - initx[i]
currentRange = inity[i + 1] - inity[i]
domainScale = currentDomain / initDomain
rangeScale = currentRange / initRange
for val in initx:
x = (val * domainScale) + initx[i]
newx.append(x)
for val in inity:
y = (val * rangeScale) + inity[i]
newy.append(y)
initx = newx
inity = newy
plt.plot(newx, newy, color='black')
plt.fill_between(newx, newy, color='black')
plt.title('Aproximate Series')
plt.axis('off')
plt.show()
if __name__ == '__main__':
index = get_index('^IXIC')
lognorm_check(index[0])
normal_check(index[1])
print normal_test(index[1])
print lognom_test(index[0])
lognorm_var(index[0])
trajectories = gbm(index[0])
print black_monday(trajectories)
plot_returns(index[1])
plot_price(index[0])
print dimension(index[1])
fractal_serie(3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment