memonkey01/project4.py

## project4.py
from __future__ import division
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from pandas_datareader import data as pdr
from datetime import date, timedelta
import matplotlib.mlab as mlab
from scipy import stats
import statsmodels.api as sm
matplotlib.style.use('ggplot')


def get_index(index):
    # Setting up the dates in order to download the data1305
    today = date.today()
    day = timedelta(days=1)
    today2 = today - day
    enddate = today2.isoformat()
    years = timedelta(weeks=1305)
    period = today - years
    startdate = period.isoformat()
    # Define the list of indices
    index = index
    # Getting the data using the dates specified above
    #Data is divided in price, returns
    data = pdr.get_data_yahoo(index, start=startdate, end=enddate)
    data['returns'] = data['Close'].pct_change()
    data = data.dropna()
    prices = data['Close']
    returns = data['returns']
    return [prices, returns]

# Function to create a histogram to visualy check if the data follows a
# lognormal distribution


def lognorm_check(dataframe):
    sigma = np.std(np.log(dataframe))
    mu = np.mean(np.log(dataframe))
    count, bins, ignored = plt.hist(
        dataframe, 100, normed=True, align='mid', color='blue', label='Histogram of Prices')
    x = np.linspace(min(bins), max(bins), 10000)
    pdf = (np.exp(-((np.log(x) - mu)**2) / (2 * sigma**2))) / \
        (x * sigma * np.sqrt(2 * np.pi))
    plt.plot(x, pdf, linewidth=2, color='r', label='Lognorm Distribution')
    plt.axis('tight')
    plt.legend()
    plt.title('Lognorm Prices')
    plt.xlabel("Prices")
    plt.ylabel("Frequency")
    plt.show()

# Function to measure if the returns are normally distributed


def normal_check(dataframe):
    count, bins, ignored = plt.hist(
        dataframe, 100, normed=True, align='mid', color='blue', label='Histogram Returns')
    # Calculate measures of central tendency
    mu = np.mean(dataframe)
    sigma = np.std(dataframe)
    # Generate the normal distribution line
    x = mlab.normpdf(bins, mu, sigma)
    plt.plot(bins, x, color='r', lw=4)
    plt.title("Normal Distribution of the returns")
    plt.xlabel("Returns")
    plt.ylabel("Frequency")
    plt.show()

# Function to verify if the series follow a normal distribution by the
# mean return is zero


def normal_test(dataframe):
    mean = np.mean(dataframe)
    test = stats.ttest_1samp(dataframe, 0)
    return test
# Funtion to verify if the series follow a lognormal distribution by using
# a Kolmogorov test


def lognom_test(dataframe):
    log = np.log(dataframe)
    mean = np.mean(log)
    test = stats.ttest_1samp(dataframe, mean)
    return test
# Funtion to verify the degree of deviation of the ideal distribution


def lognorm_var(dataframe):
    sigma = np.std(np.log(dataframe))
    mu = np.mean(np.log(dataframe))
    count, bins, ignored = plt.hist(
        dataframe, 100, normed=True, align='mid', color='blue', alpha=0.0)
    x = np.linspace(min(bins), max(bins), 100)
    pdf = (np.exp(-((np.log(x) - mu)**2) / (2 * sigma**2))) / \
        (x * sigma * np.sqrt(2 * np.pi))

    plt.plot(x, pdf, linewidth=2, color='r', label='Ideal Distribution')
    plt.plot(x, count, linewidth=2, color='b', label='Real Distribution')
    plt.axis('tight')
    plt.fill_between(x, pdf, count, color='b', alpha=0.8)
    plt.legend()
    plt.title('Lognorm Difference')
    plt.xlabel("Prices")
    plt.ylabel("Frequency")
    plt.show()

# Function to simulate a GBM and to find any behavior similar to black monday


def gbm(dataframe):
    lastprice = dataframe[-2:-1].values
    lastyearprice = dataframe[-253:-252].values
    m = ((lastprice / lastyearprice) - 1)  # expected return
    logprice = np.log(lastprice)
    sigma = 0.20  # Volatility
    T = 1  # Years
    M = 10000  # Number of simulations
    i = 252  # Steps of the simulation
    dt = float(T) / i
    paths = np.zeros((M, i), np.float64)
    x = range(0, i, 1)
    for j in range(0, M, 1):
        paths[j, 0] = logprice
        rand = np.random.standard_normal(i + 1)
        for i in x[1:]:
            paths[j, i] = paths[j, i - 1] + \
                ((m - 0.5 * sigma ** 2) * dt + sigma * np.sqrt(dt) * rand[i])
        plt.plot(x, np.exp(paths[j]))
    plt.title('GBM Simulation')
    plt.show()
    return np.exp(paths)


# Function to check if a return in one step is less or equal to -20%
def black_monday(dataframe):
    dataframe = pd.DataFrame(dataframe)
    dataframe2 = dataframe.transpose().pct_change().dropna()
    counter = 0
    for s in range(0, len(dataframe2.columns)):
        for p in range(1, len(dataframe2)):
            if dataframe2[s][p] <= -0.2:
                counter += 1
    return counter, len(dataframe2.columns)

# Function to calulcale the Hurst exponent, so we know if the time series
# is persistent


def hurst(n):
    retn = index[1][0:int(n)]
    yn = retn - np.mean(retn)
    zn = np.cumsum(yn)
    Rn = np.max(zn) - np.min(zn)
    Sn = np.std(retn)
    En = Rn / Sn
    return np.log(En)

# Funtion to calculate the dimension of the series by OLS method


def dimension(returns):
    y = [hurst(np.size(returns)),
         hurst(np.size(returns) / 2),
         hurst(np.size(returns) / 4),
         hurst(np.size(returns) / 8),
         hurst(np.size(returns) / 16),
         hurst(np.size(returns) / 32)]
    x1 = [np.log(np.size(returns)),
          np.log(np.size(returns) / 2),
          np.log(np.size(returns) / 4),
          np.log(np.size(returns) / 8),
          np.log(np.size(returns) / 16),
          np.log(np.size(returns) / 32)]

    x = sm.add_constant(x1)
    model = sm.OLS(y, x)
    results = model.fit()
    return results.summary()

# Funtion to plot the returns


def plot_returns(dataframe):
    plt.plot(range(len(dataframe)), dataframe, color='black')
    plt.title('Returns')
    plt.show()

# Function to plot the prices


def plot_price(dataframe):
    plt.plot(range(len(dataframe)), dataframe, color='black')
    plt.fill_between(range(len(dataframe)), dataframe, color='black')
    plt.title('Price')
    plt.axis('off')
    plt.show()

# Function to simulate a fractal whitout recursive process


def fractal_serie(n):
    n = n
    # Initial x-coordinates
    initx = [0, 1, 2, 3, 4, 5, 6, 7]
    # Initial y-coordinates
    inity = [0, 1, 5, 2, 3, 2, 5, 7]
    sortedY = sorted(inity)
    initDomain = initx[-1] - initx[0]
    initRange = sortedY[-1] - sortedY[0]

    for j in range(n):
        newx = []
        newy = []
        for i in range(len(initx) - 1):
            currentDomain = initx[i + 1] - initx[i]
            currentRange = inity[i + 1] - inity[i]
            domainScale = currentDomain / initDomain
            rangeScale = currentRange / initRange
            for val in initx:
                x = (val * domainScale) + initx[i]
                newx.append(x)
            for val in inity:
                y = (val * rangeScale) + inity[i]
                newy.append(y)
        initx = newx
        inity = newy
        plt.plot(newx, newy, color='black')

    plt.fill_between(newx, newy, color='black')
    plt.title('Aproximate Series')
    plt.axis('off')
    plt.show()


if __name__ == '__main__':
    index = get_index('^IXIC')
    lognorm_check(index[0])
    normal_check(index[1])
    print normal_test(index[1])
    print lognom_test(index[0])
    lognorm_var(index[0])
    trajectories = gbm(index[0])
    print black_monday(trajectories)
    plot_returns(index[1])
    plot_price(index[0])
    print dimension(index[1])
    fractal_serie(3)
	from __future__ import division
	import numpy as np
	import pandas as pd
	import matplotlib
	import matplotlib.pyplot as plt
	from pandas_datareader import data as pdr
	from datetime import date, timedelta
	import matplotlib.mlab as mlab
	from scipy import stats
	import statsmodels.api as sm
	matplotlib.style.use('ggplot')


	def get_index(index):
	# Setting up the dates in order to download the data1305
	today = date.today()
	day = timedelta(days=1)
	today2 = today - day
	enddate = today2.isoformat()
	years = timedelta(weeks=1305)
	period = today - years
	startdate = period.isoformat()
	# Define the list of indices
	index = index
	# Getting the data using the dates specified above
	#Data is divided in price, returns
	data = pdr.get_data_yahoo(index, start=startdate, end=enddate)
	data['returns'] = data['Close'].pct_change()
	data = data.dropna()
	prices = data['Close']
	returns = data['returns']
	return [prices, returns]

	# Function to create a histogram to visualy check if the data follows a
	# lognormal distribution


	def lognorm_check(dataframe):
	sigma = np.std(np.log(dataframe))
	mu = np.mean(np.log(dataframe))
	count, bins, ignored = plt.hist(
	dataframe, 100, normed=True, align='mid', color='blue', label='Histogram of Prices')
	x = np.linspace(min(bins), max(bins), 10000)
	pdf = (np.exp(-((np.log(x) - mu)*2) / (2 sigma**2))) / \
	(x * sigma * np.sqrt(2 * np.pi))
	plt.plot(x, pdf, linewidth=2, color='r', label='Lognorm Distribution')
	plt.axis('tight')
	plt.legend()
	plt.title('Lognorm Prices')
	plt.xlabel("Prices")
	plt.ylabel("Frequency")
	plt.show()

	# Function to measure if the returns are normally distributed


	def normal_check(dataframe):
	count, bins, ignored = plt.hist(
	dataframe, 100, normed=True, align='mid', color='blue', label='Histogram Returns')
	# Calculate measures of central tendency
	mu = np.mean(dataframe)
	sigma = np.std(dataframe)
	# Generate the normal distribution line
	x = mlab.normpdf(bins, mu, sigma)
	plt.plot(bins, x, color='r', lw=4)
	plt.title("Normal Distribution of the returns")
	plt.xlabel("Returns")
	plt.ylabel("Frequency")
	plt.show()

	# Function to verify if the series follow a normal distribution by the
	# mean return is zero


	def normal_test(dataframe):
	mean = np.mean(dataframe)
	test = stats.ttest_1samp(dataframe, 0)
	return test
	# Funtion to verify if the series follow a lognormal distribution by using
	# a Kolmogorov test


	def lognom_test(dataframe):
	log = np.log(dataframe)
	mean = np.mean(log)
	test = stats.ttest_1samp(dataframe, mean)
	return test
	# Funtion to verify the degree of deviation of the ideal distribution


	def lognorm_var(dataframe):
	sigma = np.std(np.log(dataframe))
	mu = np.mean(np.log(dataframe))
	count, bins, ignored = plt.hist(
	dataframe, 100, normed=True, align='mid', color='blue', alpha=0.0)
	x = np.linspace(min(bins), max(bins), 100)
	pdf = (np.exp(-((np.log(x) - mu)*2) / (2 sigma**2))) / \
	(x * sigma * np.sqrt(2 * np.pi))

	plt.plot(x, pdf, linewidth=2, color='r', label='Ideal Distribution')
	plt.plot(x, count, linewidth=2, color='b', label='Real Distribution')
	plt.axis('tight')
	plt.fill_between(x, pdf, count, color='b', alpha=0.8)
	plt.legend()
	plt.title('Lognorm Difference')
	plt.xlabel("Prices")
	plt.ylabel("Frequency")
	plt.show()

	# Function to simulate a GBM and to find any behavior similar to black monday


	def gbm(dataframe):
	lastprice = dataframe[-2:-1].values
	lastyearprice = dataframe[-253:-252].values
	m = ((lastprice / lastyearprice) - 1) # expected return
	logprice = np.log(lastprice)
	sigma = 0.20 # Volatility
	T = 1 # Years
	M = 10000 # Number of simulations
	i = 252 # Steps of the simulation
	dt = float(T) / i
	paths = np.zeros((M, i), np.float64)
	x = range(0, i, 1)
	for j in range(0, M, 1):
	paths[j, 0] = logprice
	rand = np.random.standard_normal(i + 1)
	for i in x[1:]:
	paths[j, i] = paths[j, i - 1] + \
	((m - 0.5 * sigma ** 2) * dt + sigma * np.sqrt(dt) * rand[i])
	plt.plot(x, np.exp(paths[j]))
	plt.title('GBM Simulation')
	plt.show()
	return np.exp(paths)


	# Function to check if a return in one step is less or equal to -20%
	def black_monday(dataframe):
	dataframe = pd.DataFrame(dataframe)
	dataframe2 = dataframe.transpose().pct_change().dropna()
	counter = 0
	for s in range(0, len(dataframe2.columns)):
	for p in range(1, len(dataframe2)):
	if dataframe2[s][p] <= -0.2:
	counter += 1
	return counter, len(dataframe2.columns)

	# Function to calulcale the Hurst exponent, so we know if the time series
	# is persistent


	def hurst(n):
	retn = index[1][0:int(n)]
	yn = retn - np.mean(retn)
	zn = np.cumsum(yn)
	Rn = np.max(zn) - np.min(zn)
	Sn = np.std(retn)
	En = Rn / Sn
	return np.log(En)

	# Funtion to calculate the dimension of the series by OLS method


	def dimension(returns):
	y = [hurst(np.size(returns)),
	hurst(np.size(returns) / 2),
	hurst(np.size(returns) / 4),
	hurst(np.size(returns) / 8),
	hurst(np.size(returns) / 16),
	hurst(np.size(returns) / 32)]
	x1 = [np.log(np.size(returns)),
	np.log(np.size(returns) / 2),
	np.log(np.size(returns) / 4),
	np.log(np.size(returns) / 8),
	np.log(np.size(returns) / 16),
	np.log(np.size(returns) / 32)]

	x = sm.add_constant(x1)
	model = sm.OLS(y, x)
	results = model.fit()
	return results.summary()

	# Funtion to plot the returns


	def plot_returns(dataframe):
	plt.plot(range(len(dataframe)), dataframe, color='black')
	plt.title('Returns')
	plt.show()

	# Function to plot the prices


	def plot_price(dataframe):
	plt.plot(range(len(dataframe)), dataframe, color='black')
	plt.fill_between(range(len(dataframe)), dataframe, color='black')
	plt.title('Price')
	plt.axis('off')
	plt.show()

	# Function to simulate a fractal whitout recursive process


	def fractal_serie(n):
	n = n
	# Initial x-coordinates
	initx = [0, 1, 2, 3, 4, 5, 6, 7]
	# Initial y-coordinates
	inity = [0, 1, 5, 2, 3, 2, 5, 7]
	sortedY = sorted(inity)
	initDomain = initx[-1] - initx[0]
	initRange = sortedY[-1] - sortedY[0]

	for j in range(n):
	newx = []
	newy = []
	for i in range(len(initx) - 1):
	currentDomain = initx[i + 1] - initx[i]
	currentRange = inity[i + 1] - inity[i]
	domainScale = currentDomain / initDomain
	rangeScale = currentRange / initRange
	for val in initx:
	x = (val * domainScale) + initx[i]
	newx.append(x)
	for val in inity:
	y = (val * rangeScale) + inity[i]
	newy.append(y)
	initx = newx
	inity = newy
	plt.plot(newx, newy, color='black')

	plt.fill_between(newx, newy, color='black')
	plt.title('Aproximate Series')
	plt.axis('off')
	plt.show()


	if __name__ == '__main__':
	index = get_index('^IXIC')
	lognorm_check(index[0])
	normal_check(index[1])
	print normal_test(index[1])
	print lognom_test(index[0])
	lognorm_var(index[0])
	trajectories = gbm(index[0])
	print black_monday(trajectories)
	plot_returns(index[1])
	plot_price(index[0])
	print dimension(index[1])
	fractal_serie(3)