Skip to content

Instantly share code, notes, and snippets.

@pastewka
Last active September 14, 2021 13:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pastewka/6874f0bb3f4c1263587e0aacc0bede79 to your computer and use it in GitHub Desktop.
Save pastewka/6874f0bb3f4c1263587e0aacc0bede79 to your computer and use it in GitHub Desktop.
Simple Gaussian process classification example
"""Simple Gaussian process classification example (Laplacian approximation)"""
import numpy as np
import scipy.special
import matplotlib.pyplot as plt
class ProbitLikelihood:
"""
Implements log p(y_i|f_i) and derivatives for the probability
..: math::
p(y_i|f_i) = \int_0^{y_i f_i} \dif x \mathcal{N}(x|0, 1)
"""
@classmethod
def p(cls, z):
return (1 + scipy.special.erf(z / np.sqrt(2))) / 2
@classmethod
def gauss(cls, f):
return np.exp(-f**2 / 2) / np.sqrt(2 * np.pi)
@classmethod
def logp(cls, y, f):
return np.log(cls.p(y * f))
@classmethod
def dlogp(cls, y, f):
return y * cls.gauss(f) / cls.p(y * f)
@classmethod
def d2logp(cls, y, f):
g = cls.gauss(f)
p = cls.p(y * f)
return -g**2 / p**2 - y * f * g / p
@classmethod
def integrate_gaussian(cls, f, var_f):
return cls.p(f / np.sqrt(1 + var_f))
class LogitLikelihood:
"""
Implements log p(y_i|f_i) and derivatives for the probability
..: math::
p(y_i|f_i) = 1 / (1 + \exp(y_i f_i))
"""
@classmethod
def p(cls, z):
return 1 / (1 + np.exp(-z))
@classmethod
def logp(cls, y, f):
return -np.log(1 + np.exp(-y * f))
@classmethod
def dlogp(cls, y, f):
return (y + 1) / 2 - cls.p(f)
@classmethod
def d2logp(cls, y, f):
p1 = cls.p(f)
return -p1 * (1 - p1)
@classmethod
def integrate_gaussian(cls, f, var_f, nb_points=101, fac=5):
raise NotImplementedError
def test_likelihood(likelihood, nb_samples=100, step_size=1e-6):
f = np.random.random(nb_samples)
y = 2 * (np.random.random(nb_samples) > 0.5) - 1
logp = likelihood.logp(y, f)
dlogp = likelihood.dlogp(y, f)
d2logp = likelihood.d2logp(y, f)
num_dlogp = (likelihood.logp(y, f + step_size) - likelihood.logp(y, f - step_size)) / (2 * step_size)
num_d2logp = (likelihood.dlogp(y, f + step_size) - likelihood.dlogp(y, f - step_size)) / (2 * step_size)
np.testing.assert_allclose(dlogp, num_dlogp)
np.testing.assert_allclose(d2logp, num_d2logp)
test_likelihood(ProbitLikelihood)
test_likelihood(LogitLikelihood)
class LaplaceGaussianProcessClassifier:
def __init__(self, kernel, likelihood):
self._kernel = kernel
self._likelihood = likelihood
def _KWL(self, x, y, f):
nb_data, = x.shape
# Compute GP covariance matrix
K = self._kernel(x.reshape(nb_data, 1), x.reshape(1, nb_data))
# Compute second derivative of likelihood
W = -self._likelihood.d2logp(y, f)
# H = W + K^-1 is the Hessian; we use B = W^1/2 K H W^-1/2 = 1 + W^1/2 K W^1/2
# See Rasmussen, Williams, page 45
sqrt_W = np.diag(np.sqrt(W))
B = np.identity(nb_data) + sqrt_W.dot(K.dot(sqrt_W))
L = np.linalg.cholesky(B)
return K, W, sqrt_W, L
def train(self, x, y):
"""Algorithm 3.1 from Rasmussen, Williams"""
nb_data, = x.shape
assert y.shape == (nb_data,)
f = np.zeros_like(y)
for i in range(100):
# Compute second derivatives, etc.
K, W, sqrt_W, L = self._KWL(x, y, f)
# Newton step
b = W * f + self._likelihood.dlogp(y, f)
a = b - np.linalg.solve(sqrt_W.dot(L.T),
np.linalg.solve(L, sqrt_W.dot(K.dot(b))))
f = K.dot(a)
# Store training set
self._x = x
self._y = y
# Store posterior
self._f = f
def predict(self, x):
"""Algorithm 3.2 from Rasmussen, Williams"""
nb_data, = self._x.shape
nb_pred, = x.shape
# Compute second derivatives, etc.
K, W, sqrt_W, L = self._KWL(self._x, self._y, self._f)
# Prediction for the mean of the nuisance function
Ks = self._kernel(self._x.reshape(nb_data, 1), x.reshape(1, nb_pred))
f = self._likelihood.dlogp(self._y, self._f).dot(Ks)
# Prediction for the variance of the nuisance function
v = np.linalg.solve(L, sqrt_W.dot(Ks))
cov_f = self._kernel(x.reshape(nb_pred, 1), x.reshape(1, nb_pred)) - v.T.dot(v)
# Return predictive probability
return self._likelihood.integrate_gaussian(f, cov_f.diagonal())
def kernel(x1, x2, length_scale=1, signal_variance=1):
return signal_variance*np.exp(-(x1-x2)**2 / (2*length_scale**2))
n_input = 10
n_pred = 100
x = np.arange(n_input)
y = 2*(x > n_input / 2) - 1
y[0] = 1
x_pred = np.linspace(x.min() - 1, x.max() + 1, n_pred)
classifier = LaplaceGaussianProcessClassifier(kernel, ProbitLikelihood)
classifier.train(x, y)
y_pred = classifier.predict(x_pred)
#plt.fill_between(x_pred, y_pred + np.sqrt(cov_pred.diagonal()),
# y_pred - np.sqrt(cov_pred.diagonal()))
plt.plot(x, y, 'kx')
plt.plot(x_pred, y_pred, 'k-')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment