naddleman/gpr.py Secret

## gpr.py
"""Gaussian Process Regression

This script implements an example of GPR without using 'advanced' libraries,
e.g., scikit-learn
"""

import numpy as np
import matplotlib.pyplot as plt

# Constants
noise_level = 0.25

def RBF(x1, x2, max_cov, scale):
    squared_distance = -1 * (x1 - x2)**2
    return max_cov * np.exp(squared_distance / (2 * scale ** 2))

def RBF_diff(delta, max_cov, scale):
    squared_distance = -1 * (delta) ** 2
    return max_cov * np.exp(squared_distance / (2 * scale ** 2))

def generate_new_inputs(x_observed, n = 100):
    """
    generates a vector of new input points spanning the interval covered
    by observed x's +/- 10%
    """
    min_x, max_x = np.min(x_observed), np.max(x_observed)
    ten_percent = 0.1 * abs(max_x - min_x)
    interval_min = min_x - ten_percent
    interval_max = max_x + ten_percent
    return np.linspace(interval_min, interval_max, num=n)

def generate_covariance_matrices(x_observed, new_inputs, noise_level,
                                 max_cov, scale):
    """
    Returns covariance matrices K(X_t, X_t), K(X_*, X_t), and K(X_*, X_*),
    representing the covariance between the observed data points, data points
    and new inputs, and new inputs with one another, respectively.
    """
    # first generate matrices of differences
    diff_data = np.abs(x_observed.reshape(-1, 1) - x_observed)
    diff_data_new = np.abs(x_observed.reshape(-1, 1) - new_inputs)
    diff_new_data = diff_data_new.transpose()
    diff_new = np.abs(new_inputs.reshape(-1,1) - new_inputs)

    def RBF_differences(diff):
        return RBF_diff(diff, max_cov, scale)

    # covariance matrices
    KXt = RBF_differences(diff_data)
    KXt_Xstar = RBF_differences(diff_data_new)
    KXstar_Xt = KXt_Xstar.transpose()
    KXstar = RBF_differences(diff_new)

    # Add nosie to the observation covariance matrix
    noise_matrix = np.eye(KXt.shape[0]) * noise_level
    KXt_plus_noise = KXt + noise_matrix
    return KXt_plus_noise, KXt_Xstar, KXstar_Xt, KXstar

def gpr_demo(xt, yt, noise, max_cov, scale):
    # Shift the observations so they have mean 0
    shift = np.mean(yt)
    simulated_inputs = generate_new_inputs(xt)
    K_list = generate_covariance_matrices(xt, simulated_inputs, noise,
                                            max_cov, scale)
    precision_matrix = np.linalg.inv(K_list[0])
    gp_mean = K_list[2] @ precision_matrix @ (yt - shift)
    plt.plot(simulated_inputs, gp_mean, label="GP mean")
    plt.plot(xt, yt - shift, 'ro', label="Data")

if __name__ == "__main__":
    x_sample = np.linspace(0, 10, 20)
    y_sample = (1/3 * (x_sample - 5)) ** 2
    noise = np.random.normal(0, noise_level, 20)
    y_noisy = y_sample + noise
    gpr_demo(x_sample, y_noisy, 0.25, 1, 1)
    plt.title("Demonstrating Gaussian Process Regression")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.legend(loc='lower right')
    plt.show()
	"""Gaussian Process Regression

	This script implements an example of GPR without using 'advanced' libraries,
	e.g., scikit-learn
	"""

	import numpy as np
	import matplotlib.pyplot as plt

	# Constants
	noise_level = 0.25

	def RBF(x1, x2, max_cov, scale):
	squared_distance = -1 * (x1 - x2)**2
	return max_cov * np.exp(squared_distance / (2 * scale ** 2))

	def RBF_diff(delta, max_cov, scale):
	squared_distance = -1 * (delta) ** 2
	return max_cov * np.exp(squared_distance / (2 * scale ** 2))

	def generate_new_inputs(x_observed, n = 100):
	"""
	generates a vector of new input points spanning the interval covered
	by observed x's +/- 10%
	"""
	min_x, max_x = np.min(x_observed), np.max(x_observed)
	ten_percent = 0.1 * abs(max_x - min_x)
	interval_min = min_x - ten_percent
	interval_max = max_x + ten_percent
	return np.linspace(interval_min, interval_max, num=n)

	def generate_covariance_matrices(x_observed, new_inputs, noise_level,
	max_cov, scale):
	"""
	Returns covariance matrices K(X_t, X_t), K(X_, X_t), and K(X_, X_*),
	representing the covariance between the observed data points, data points
	and new inputs, and new inputs with one another, respectively.
	"""
	# first generate matrices of differences
	diff_data = np.abs(x_observed.reshape(-1, 1) - x_observed)
	diff_data_new = np.abs(x_observed.reshape(-1, 1) - new_inputs)
	diff_new_data = diff_data_new.transpose()
	diff_new = np.abs(new_inputs.reshape(-1,1) - new_inputs)

	def RBF_differences(diff):
	return RBF_diff(diff, max_cov, scale)

	# covariance matrices
	KXt = RBF_differences(diff_data)
	KXt_Xstar = RBF_differences(diff_data_new)
	KXstar_Xt = KXt_Xstar.transpose()
	KXstar = RBF_differences(diff_new)

	# Add nosie to the observation covariance matrix
	noise_matrix = np.eye(KXt.shape[0]) * noise_level
	KXt_plus_noise = KXt + noise_matrix
	return KXt_plus_noise, KXt_Xstar, KXstar_Xt, KXstar

	def gpr_demo(xt, yt, noise, max_cov, scale):
	# Shift the observations so they have mean 0
	shift = np.mean(yt)
	simulated_inputs = generate_new_inputs(xt)
	K_list = generate_covariance_matrices(xt, simulated_inputs, noise,
	max_cov, scale)
	precision_matrix = np.linalg.inv(K_list[0])
	gp_mean = K_list[2] @ precision_matrix @ (yt - shift)
	plt.plot(simulated_inputs, gp_mean, label="GP mean")
	plt.plot(xt, yt - shift, 'ro', label="Data")

	if __name__ == "__main__":
	x_sample = np.linspace(0, 10, 20)
	y_sample = (1/3 * (x_sample - 5)) ** 2
	noise = np.random.normal(0, noise_level, 20)
	y_noisy = y_sample + noise
	gpr_demo(x_sample, y_noisy, 0.25, 1, 1)
	plt.title("Demonstrating Gaussian Process Regression")
	plt.xlabel("x")
	plt.ylabel("y")
	plt.legend(loc='lower right')
	plt.show()