pastewka/gpc.py

## gpc.py
"""Simple Gaussian process classification example (Laplacian approximation)"""

import numpy as np
import scipy.special

import matplotlib.pyplot as plt


class ProbitLikelihood:
    """
    Implements log p(y_i|f_i) and derivatives for the probability

        ..: math::

          p(y_i|f_i) = \int_0^{y_i f_i} \dif x \mathcal{N}(x|0, 1)
    """
    @classmethod
    def p(cls, z):
        return (1 + scipy.special.erf(z / np.sqrt(2))) / 2

    @classmethod
    def gauss(cls, f):
        return np.exp(-f**2 / 2) / np.sqrt(2 * np.pi)

    @classmethod
    def logp(cls, y, f):
        return np.log(cls.p(y * f))

    @classmethod
    def dlogp(cls, y, f):
        return y * cls.gauss(f) / cls.p(y * f)

    @classmethod
    def d2logp(cls, y, f):
        g = cls.gauss(f)
        p = cls.p(y * f)
        return -g**2 / p**2 - y * f * g / p

    @classmethod
    def integrate_gaussian(cls, f, var_f):
        return cls.p(f / np.sqrt(1 + var_f))


class LogitLikelihood:
    """
    Implements log p(y_i|f_i) and derivatives for the probability

        ..: math::

          p(y_i|f_i) = 1 / (1 + \exp(y_i f_i))
    """
    @classmethod
    def p(cls, z):
        return 1 / (1 + np.exp(-z))

    @classmethod
    def logp(cls, y, f):
        return -np.log(1 + np.exp(-y * f))

    @classmethod
    def dlogp(cls, y, f):
        return (y + 1) / 2 - cls.p(f)

    @classmethod
    def d2logp(cls, y, f):
        p1 = cls.p(f)
        return -p1 * (1 - p1)

    @classmethod
    def integrate_gaussian(cls, f, var_f, nb_points=101, fac=5):
        raise NotImplementedError


def test_likelihood(likelihood, nb_samples=100, step_size=1e-6):
    f = np.random.random(nb_samples)
    y = 2 * (np.random.random(nb_samples) > 0.5) - 1
    logp = likelihood.logp(y, f)
    dlogp = likelihood.dlogp(y, f)
    d2logp = likelihood.d2logp(y, f)
    num_dlogp = (likelihood.logp(y, f + step_size) - likelihood.logp(y, f - step_size)) / (2 * step_size)
    num_d2logp = (likelihood.dlogp(y, f + step_size) - likelihood.dlogp(y, f - step_size)) / (2 * step_size)
    np.testing.assert_allclose(dlogp, num_dlogp)
    np.testing.assert_allclose(d2logp, num_d2logp)

test_likelihood(ProbitLikelihood)
test_likelihood(LogitLikelihood)


class LaplaceGaussianProcessClassifier:
    def __init__(self, kernel, likelihood):
        self._kernel = kernel
        self._likelihood = likelihood

    def _KWL(self, x, y, f):
        nb_data, = x.shape
        # Compute GP covariance matrix
        K = self._kernel(x.reshape(nb_data, 1), x.reshape(1, nb_data))
        # Compute second derivative of likelihood
        W = -self._likelihood.d2logp(y, f)
        # H = W + K^-1 is the Hessian; we use B = W^1/2 K H W^-1/2 = 1 + W^1/2 K W^1/2
        # See Rasmussen, Williams, page 45
        sqrt_W = np.diag(np.sqrt(W))
        B = np.identity(nb_data) + sqrt_W.dot(K.dot(sqrt_W))
        L = np.linalg.cholesky(B)
        return K, W, sqrt_W, L

    def train(self, x, y):
        """Algorithm 3.1 from Rasmussen, Williams"""
        nb_data, = x.shape
        assert y.shape == (nb_data,)
        f = np.zeros_like(y)
        for i in range(100):
            # Compute second derivatives, etc.
            K, W, sqrt_W, L = self._KWL(x, y, f)
            # Newton step
            b = W * f + self._likelihood.dlogp(y, f)
            a = b - np.linalg.solve(sqrt_W.dot(L.T),
                                    np.linalg.solve(L, sqrt_W.dot(K.dot(b))))
            f = K.dot(a)
        # Store training set
        self._x = x
        self._y = y
        # Store posterior
        self._f = f

    def predict(self, x):
        """Algorithm 3.2 from Rasmussen, Williams"""
        nb_data, = self._x.shape
        nb_pred, = x.shape
        # Compute second derivatives, etc.
        K, W, sqrt_W, L = self._KWL(self._x, self._y, self._f)
        # Prediction for the mean of the nuisance function
        Ks = self._kernel(self._x.reshape(nb_data, 1), x.reshape(1, nb_pred))
        f = self._likelihood.dlogp(self._y, self._f).dot(Ks)
        # Prediction for the variance of the nuisance function
        v = np.linalg.solve(L, sqrt_W.dot(Ks))
        cov_f = self._kernel(x.reshape(nb_pred, 1), x.reshape(1, nb_pred)) - v.T.dot(v)
        # Return predictive probability
        return self._likelihood.integrate_gaussian(f, cov_f.diagonal())


def kernel(x1, x2, length_scale=1, signal_variance=1):
    return signal_variance*np.exp(-(x1-x2)**2 / (2*length_scale**2))


n_input = 10
n_pred = 100

x = np.arange(n_input)
y = 2*(x > n_input / 2) - 1
y[0] = 1

x_pred = np.linspace(x.min() - 1, x.max() + 1, n_pred)

classifier = LaplaceGaussianProcessClassifier(kernel, ProbitLikelihood)
classifier.train(x, y)
y_pred = classifier.predict(x_pred)

#plt.fill_between(x_pred, y_pred + np.sqrt(cov_pred.diagonal()),
#                 y_pred - np.sqrt(cov_pred.diagonal()))
plt.plot(x, y, 'kx')
plt.plot(x_pred, y_pred, 'k-')
plt.show()
	"""Simple Gaussian process classification example (Laplacian approximation)"""

	import numpy as np
	import scipy.special

	import matplotlib.pyplot as plt


	class ProbitLikelihood:
	"""
	Implements log p(y_i\|f_i) and derivatives for the probability

	..: math::

	p(y_i\|f_i) = \int_0^{y_i f_i} \dif x \mathcal{N}(x\|0, 1)
	"""
	@classmethod
	def p(cls, z):
	return (1 + scipy.special.erf(z / np.sqrt(2))) / 2

	@classmethod
	def gauss(cls, f):
	return np.exp(-f*2 / 2) / np.sqrt(2 np.pi)

	@classmethod
	def logp(cls, y, f):
	return np.log(cls.p(y * f))

	@classmethod
	def dlogp(cls, y, f):
	return y * cls.gauss(f) / cls.p(y * f)

	@classmethod
	def d2logp(cls, y, f):
	g = cls.gauss(f)
	p = cls.p(y * f)
	return -g2 / p2 - y * f * g / p

	@classmethod
	def integrate_gaussian(cls, f, var_f):
	return cls.p(f / np.sqrt(1 + var_f))


	class LogitLikelihood:
	"""
	Implements log p(y_i\|f_i) and derivatives for the probability

	..: math::

	p(y_i\|f_i) = 1 / (1 + \exp(y_i f_i))
	"""
	@classmethod
	def p(cls, z):
	return 1 / (1 + np.exp(-z))

	@classmethod
	def logp(cls, y, f):
	return -np.log(1 + np.exp(-y * f))

	@classmethod
	def dlogp(cls, y, f):
	return (y + 1) / 2 - cls.p(f)

	@classmethod
	def d2logp(cls, y, f):
	p1 = cls.p(f)
	return -p1 * (1 - p1)

	@classmethod
	def integrate_gaussian(cls, f, var_f, nb_points=101, fac=5):
	raise NotImplementedError


	def test_likelihood(likelihood, nb_samples=100, step_size=1e-6):
	f = np.random.random(nb_samples)
	y = 2 * (np.random.random(nb_samples) > 0.5) - 1
	logp = likelihood.logp(y, f)
	dlogp = likelihood.dlogp(y, f)
	d2logp = likelihood.d2logp(y, f)
	num_dlogp = (likelihood.logp(y, f + step_size) - likelihood.logp(y, f - step_size)) / (2 * step_size)
	num_d2logp = (likelihood.dlogp(y, f + step_size) - likelihood.dlogp(y, f - step_size)) / (2 * step_size)
	np.testing.assert_allclose(dlogp, num_dlogp)
	np.testing.assert_allclose(d2logp, num_d2logp)

	test_likelihood(ProbitLikelihood)
	test_likelihood(LogitLikelihood)


	class LaplaceGaussianProcessClassifier:
	def __init__(self, kernel, likelihood):
	self._kernel = kernel
	self._likelihood = likelihood

	def _KWL(self, x, y, f):
	nb_data, = x.shape
	# Compute GP covariance matrix
	K = self._kernel(x.reshape(nb_data, 1), x.reshape(1, nb_data))
	# Compute second derivative of likelihood
	W = -self._likelihood.d2logp(y, f)
	# H = W + K^-1 is the Hessian; we use B = W^1/2 K H W^-1/2 = 1 + W^1/2 K W^1/2
	# See Rasmussen, Williams, page 45
	sqrt_W = np.diag(np.sqrt(W))
	B = np.identity(nb_data) + sqrt_W.dot(K.dot(sqrt_W))
	L = np.linalg.cholesky(B)
	return K, W, sqrt_W, L

	def train(self, x, y):
	"""Algorithm 3.1 from Rasmussen, Williams"""
	nb_data, = x.shape
	assert y.shape == (nb_data,)
	f = np.zeros_like(y)
	for i in range(100):
	# Compute second derivatives, etc.
	K, W, sqrt_W, L = self._KWL(x, y, f)
	# Newton step
	b = W * f + self._likelihood.dlogp(y, f)
	a = b - np.linalg.solve(sqrt_W.dot(L.T),
	np.linalg.solve(L, sqrt_W.dot(K.dot(b))))
	f = K.dot(a)
	# Store training set
	self._x = x
	self._y = y
	# Store posterior
	self._f = f

	def predict(self, x):
	"""Algorithm 3.2 from Rasmussen, Williams"""
	nb_data, = self._x.shape
	nb_pred, = x.shape
	# Compute second derivatives, etc.
	K, W, sqrt_W, L = self._KWL(self._x, self._y, self._f)
	# Prediction for the mean of the nuisance function
	Ks = self._kernel(self._x.reshape(nb_data, 1), x.reshape(1, nb_pred))
	f = self._likelihood.dlogp(self._y, self._f).dot(Ks)
	# Prediction for the variance of the nuisance function
	v = np.linalg.solve(L, sqrt_W.dot(Ks))
	cov_f = self._kernel(x.reshape(nb_pred, 1), x.reshape(1, nb_pred)) - v.T.dot(v)
	# Return predictive probability
	return self._likelihood.integrate_gaussian(f, cov_f.diagonal())


	def kernel(x1, x2, length_scale=1, signal_variance=1):
	return signal_variancenp.exp(-(x1-x2)2 / (2length_scale**2))


	n_input = 10
	n_pred = 100

	x = np.arange(n_input)
	y = 2*(x > n_input / 2) - 1
	y[0] = 1

	x_pred = np.linspace(x.min() - 1, x.max() + 1, n_pred)

	classifier = LaplaceGaussianProcessClassifier(kernel, ProbitLikelihood)
	classifier.train(x, y)
	y_pred = classifier.predict(x_pred)

	#plt.fill_between(x_pred, y_pred + np.sqrt(cov_pred.diagonal()),
	# y_pred - np.sqrt(cov_pred.diagonal()))
	plt.plot(x, y, 'kx')
	plt.plot(x_pred, y_pred, 'k-')
	plt.show()