Skip to content

Instantly share code, notes, and snippets.

@hhl60492
Created November 27, 2017 19:10
Show Gist options
  • Save hhl60492/0d1b99314a662e0c6c7d918caf20b0b5 to your computer and use it in GitHub Desktop.
Save hhl60492/0d1b99314a662e0c6c7d918caf20b0b5 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# read in the csv
df = pd.read_csv("eth-cad-max.csv")
# get the prices col slice from df
prices = np.array(df['price'])
H = 0.55
start = 600
# pick a point in the time series to start
holdout = 0.05 # 5 percent holdout set to test the Gaussian Process predictions
scaler = 1
prices = prices / scaler
temp = prices[start:]
stop = int((1-holdout) * len(prices))
# assume our time series is a function of the form y = f(x), where x is the date or time index
train_y = prices[start:stop]
test_y = prices[stop:]
train_x = np.linspace(1,len(train_y),len(train_y))
test_x = np.linspace(1,len(test_y),len(test_y))
# assuming our time series is a Fractional Brownian Process (FBM), define the kernel function
def FBM_kernel(x1,x2, H):
sigma = np.zeros([len(x1), len(x2)])
for i in range(len(x1)):
for j in range(len(x2)):
sigma[i,j] = 0.5 * (np.power(np.absolute(x1[i]), 2*H) +
np.power(np.absolute(x2[j]), 2 * H) - np.power(np.absolute(x1[i] - x2[j]), 2 * H))
return sigma
# here's where the magic happens
# training covariance
xx = FBM_kernel(train_y,train_y, H)
# training and testing covariance
xx_t = FBM_kernel(train_y,test_y, H)
x_tx = FBM_kernel(test_y,train_y, H)
# testing covariance
x_tx_t = FBM_kernel(test_y,test_y, H)
# compute prediction
noise_sigma = 0
V_inv = np.linalg.inv(xx + noise_sigma * np.diag(xx))
# test set mean
foo = np.matmul(x_tx,V_inv)
mu_test = np.matmul(foo, train_y)
print(mu_test)
foo = np.matmul(x_tx,V_inv)
sigma_test = x_tx_t - np.matmul(foo,xx_t)
print()
plt.figure()
plt.plot(stop + test_x,mu_test)
plt.fill_between(stop + test_x, mu_test-2*np.diagonal(sigma_test), mu_test+2*np.diagonal(sigma_test), color = 'grey', label = '95% confidence region')
plt.scatter(stop + test_x,mu_test, label='Predicted mean')
plt.plot(stop + test_x, test_y, 'r--', label = 'Actual Price')
plt.xlabel('Time step (days)')
plt.ylabel('CAD$')
plt.legend(loc = 2)
plt.show()
# test set variance
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment