Created
August 14, 2013 12:52
-
-
Save mblondel/6230778 to your computer and use it in GitHub Desktop.
Gaussian Process Regression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Gaussian processes""" | |
# Author: Mathieu Blondel <mathieu@mblondel.org> | |
# License: BSD 3 clause | |
import numpy as np | |
from scipy.linalg import cholesky, solve_triangular | |
from sklearn.base import BaseEstimator, RegressorMixin | |
from sklearn.metrics.pairwise import pairwise_kernels | |
class GaussianProcess(BaseEstimator, RegressorMixin): | |
def __init__(self, alpha=1, kernel="linear", gamma=None, degree=3, coef0=1, | |
kernel_params=None): | |
self.alpha = alpha | |
self.kernel = kernel | |
self.gamma = gamma | |
self.degree = degree | |
self.coef0 = coef0 | |
self.kernel_params = kernel_params | |
def _get_kernel(self, X, Y=None): | |
if callable(self.kernel): | |
params = self.kernel_params or {} | |
else: | |
params = {"gamma": self.gamma, | |
"degree": self.degree, | |
"coef0": self.coef0} | |
return pairwise_kernels(X, Y, metric=self.kernel, | |
filter_params=True, **params) | |
def fit(self, X, y=None): | |
n_samples = X.shape[0] | |
K = self._get_kernel(X) | |
K.flat[::n_samples + 1] += self.alpha | |
# Equivalent to: self.dual_coef_ = solve(K, y) | |
self.L_ = cholesky(K, lower=True) | |
x = solve_triangular(self.L_, y, lower=True) | |
self.dual_coef_ = solve_triangular(self.L_.T, x) | |
self.X_fit_ = X | |
return self | |
def predict(self, X, ret_variance=False): | |
K = self._get_kernel(X, self.X_fit_) | |
pred_mean = np.dot(K, self.dual_coef_) | |
if ret_variance: | |
n_samples = X.shape[0] | |
V = solve_triangular(self.L_, K.T, lower=True) | |
v = np.sum(V * V, axis=0) | |
K = self._get_kernel(X) | |
pred_var = K.flat[::n_samples + 1] - v | |
return pred_mean, pred_var | |
else: | |
return pred_mean | |
if __name__ == '__main__': | |
from sklearn.datasets import make_regression | |
from sklearn.cross_validation import train_test_split | |
X, y = make_regression(n_samples=500, random_state=0) | |
X_tr, X_te, y_tr, y_te = train_test_split(X, y, | |
train_size=0.75, | |
test_size=0.25) | |
gp = GaussianProcess() | |
gp.fit(X_tr, y_tr) | |
pred, var = gp.predict(X_te, ret_variance=True) | |
print pred[:10] | |
print np.sqrt(var[:10]) | |
print np.mean((y_te - pred) ** 2) | |
from sklearn.linear_model import Ridge | |
ridge = Ridge() | |
ridge.fit(X_tr, y_tr) | |
pred = ridge.predict(X_te) | |
print np.mean((y_te - pred) ** 2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment