hhl60492/gpr.py

## gpr.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# read in the csv
df = pd.read_csv("eth-cad-max.csv")

# get the prices col slice from df
prices = np.array(df['price'])
H = 0.55
start = 600
# pick a point in the time series to start
holdout = 0.05 # 5 percent holdout set to test the Gaussian Process predictions
scaler = 1
prices = prices / scaler
temp = prices[start:]
stop = int((1-holdout) * len(prices))

# assume our time series is a function of the form y = f(x), where x is the date or time index
train_y = prices[start:stop]
test_y = prices[stop:]
train_x = np.linspace(1,len(train_y),len(train_y))
test_x = np.linspace(1,len(test_y),len(test_y))

# assuming our time series is a Fractional Brownian Process (FBM), define the kernel function
def FBM_kernel(x1,x2, H):
    sigma = np.zeros([len(x1), len(x2)])
    for i in range(len(x1)):
        for j in range(len(x2)):
            sigma[i,j] = 0.5 * (np.power(np.absolute(x1[i]), 2*H) +
                np.power(np.absolute(x2[j]), 2 * H) - np.power(np.absolute(x1[i] - x2[j]), 2 * H))
    return sigma

# here's where the magic happens

# training covariance
xx = FBM_kernel(train_y,train_y, H)
# training and testing covariance
xx_t = FBM_kernel(train_y,test_y, H)
x_tx = FBM_kernel(test_y,train_y, H)

# testing covariance
x_tx_t = FBM_kernel(test_y,test_y, H)

# compute prediction
noise_sigma = 0
V_inv = np.linalg.inv(xx + noise_sigma * np.diag(xx))

# test set mean
foo = np.matmul(x_tx,V_inv)
mu_test = np.matmul(foo, train_y)
print(mu_test)
foo = np.matmul(x_tx,V_inv)
sigma_test = x_tx_t - np.matmul(foo,xx_t)
print()
plt.figure()

plt.plot(stop + test_x,mu_test)
plt.fill_between(stop + test_x, mu_test-2*np.diagonal(sigma_test), mu_test+2*np.diagonal(sigma_test), color = 'grey', label = '95% confidence region')
plt.scatter(stop + test_x,mu_test, label='Predicted mean')
plt.plot(stop + test_x, test_y, 'r--', label = 'Actual Price')

plt.xlabel('Time step (days)')
plt.ylabel('CAD$')
plt.legend(loc = 2)
plt.show()
# test set variance
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt

	# read in the csv
	df = pd.read_csv("eth-cad-max.csv")

	# get the prices col slice from df
	prices = np.array(df['price'])
	H = 0.55
	start = 600
	# pick a point in the time series to start
	holdout = 0.05 # 5 percent holdout set to test the Gaussian Process predictions
	scaler = 1
	prices = prices / scaler
	temp = prices[start:]
	stop = int((1-holdout) * len(prices))

	# assume our time series is a function of the form y = f(x), where x is the date or time index
	train_y = prices[start:stop]
	test_y = prices[stop:]
	train_x = np.linspace(1,len(train_y),len(train_y))
	test_x = np.linspace(1,len(test_y),len(test_y))

	# assuming our time series is a Fractional Brownian Process (FBM), define the kernel function
	def FBM_kernel(x1,x2, H):
	sigma = np.zeros([len(x1), len(x2)])
	for i in range(len(x1)):
	for j in range(len(x2)):
	sigma[i,j] = 0.5 * (np.power(np.absolute(x1[i]), 2*H) +
	np.power(np.absolute(x2[j]), 2 * H) - np.power(np.absolute(x1[i] - x2[j]), 2 * H))
	return sigma

	# here's where the magic happens

	# training covariance
	xx = FBM_kernel(train_y,train_y, H)
	# training and testing covariance
	xx_t = FBM_kernel(train_y,test_y, H)
	x_tx = FBM_kernel(test_y,train_y, H)

	# testing covariance
	x_tx_t = FBM_kernel(test_y,test_y, H)

	# compute prediction
	noise_sigma = 0
	V_inv = np.linalg.inv(xx + noise_sigma * np.diag(xx))

	# test set mean
	foo = np.matmul(x_tx,V_inv)
	mu_test = np.matmul(foo, train_y)
	print(mu_test)
	foo = np.matmul(x_tx,V_inv)
	sigma_test = x_tx_t - np.matmul(foo,xx_t)
	print()
	plt.figure()

	plt.plot(stop + test_x,mu_test)
	plt.fill_between(stop + test_x, mu_test-2np.diagonal(sigma_test), mu_test+2np.diagonal(sigma_test), color = 'grey', label = '95% confidence region')
	plt.scatter(stop + test_x,mu_test, label='Predicted mean')
	plt.plot(stop + test_x, test_y, 'r--', label = 'Actual Price')

	plt.xlabel('Time step (days)')
	plt.ylabel('CAD$')
	plt.legend(loc = 2)
	plt.show()
	# test set variance