Skip to content

Instantly share code, notes, and snippets.

@mblondel
Created August 14, 2013 12:52
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save mblondel/6230778 to your computer and use it in GitHub Desktop.
Save mblondel/6230778 to your computer and use it in GitHub Desktop.
Gaussian Process Regression
"""Gaussian processes"""
# Author: Mathieu Blondel <mathieu@mblondel.org>
# License: BSD 3 clause
import numpy as np
from scipy.linalg import cholesky, solve_triangular
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.metrics.pairwise import pairwise_kernels
class GaussianProcess(BaseEstimator, RegressorMixin):
def __init__(self, alpha=1, kernel="linear", gamma=None, degree=3, coef0=1,
kernel_params=None):
self.alpha = alpha
self.kernel = kernel
self.gamma = gamma
self.degree = degree
self.coef0 = coef0
self.kernel_params = kernel_params
def _get_kernel(self, X, Y=None):
if callable(self.kernel):
params = self.kernel_params or {}
else:
params = {"gamma": self.gamma,
"degree": self.degree,
"coef0": self.coef0}
return pairwise_kernels(X, Y, metric=self.kernel,
filter_params=True, **params)
def fit(self, X, y=None):
n_samples = X.shape[0]
K = self._get_kernel(X)
K.flat[::n_samples + 1] += self.alpha
# Equivalent to: self.dual_coef_ = solve(K, y)
self.L_ = cholesky(K, lower=True)
x = solve_triangular(self.L_, y, lower=True)
self.dual_coef_ = solve_triangular(self.L_.T, x)
self.X_fit_ = X
return self
def predict(self, X, ret_variance=False):
K = self._get_kernel(X, self.X_fit_)
pred_mean = np.dot(K, self.dual_coef_)
if ret_variance:
n_samples = X.shape[0]
V = solve_triangular(self.L_, K.T, lower=True)
v = np.sum(V * V, axis=0)
K = self._get_kernel(X)
pred_var = K.flat[::n_samples + 1] - v
return pred_mean, pred_var
else:
return pred_mean
if __name__ == '__main__':
from sklearn.datasets import make_regression
from sklearn.cross_validation import train_test_split
X, y = make_regression(n_samples=500, random_state=0)
X_tr, X_te, y_tr, y_te = train_test_split(X, y,
train_size=0.75,
test_size=0.25)
gp = GaussianProcess()
gp.fit(X_tr, y_tr)
pred, var = gp.predict(X_te, ret_variance=True)
print pred[:10]
print np.sqrt(var[:10])
print np.mean((y_te - pred) ** 2)
from sklearn.linear_model import Ridge
ridge = Ridge()
ridge.fit(X_tr, y_tr)
pred = ridge.predict(X_te)
print np.mean((y_te - pred) ** 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment