Skip to content

Instantly share code, notes, and snippets.

@jasonweiyi
Last active March 29, 2023 17:20
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jasonweiyi/7a79da8a4cf72c2c86a205027c5e2fb9 to your computer and use it in GitHub Desktop.
Save jasonweiyi/7a79da8a4cf72c2c86a205027c5e2fb9 to your computer and use it in GitHub Desktop.
Code to demonstrate a Sparse and Variational Gaussian Process model (SVGP model).
# This code is adapted from the official GPflow document page:
# https://gpflow.readthedocs.io/en/master/notebooks/advanced/gps_for_big_data.html
import numpy as np
import gpflow
import tensorflow as tf
import matplotlib.pyplot as plt
from gpflow.ci_utils import ci_niter
plt.style.use("ggplot")
# Fix random seeds for reproducibility:
rng = np.random.RandomState(123)
tf.random.set_seed(42)
N = 10000 # Number of training observations
# Generate training data.
def func(x):
"""Function to generate training data Y"""
return np.sin(x * 3 * 3.14) + 0.3 * np.cos(x * 9 * 3.14) + 0.5 * np.sin(x * 7 * 3.14)
minX = -1
maxX = 1
X = np.expand_dims(np.linspace(minX, maxX, N, endpoint=False), axis=1)
Y = func(X) + 0.2 * rng.randn(N, 1) # Noisy Y values
data = (X, Y)
# Uncomment to plot only training data.
# plt.plot(X, Y, "x", alpha=0.2)
# Xt = np.linspace(minX, maxX, 100)[:, None]
# Yt = func(Xt)
# # plt.plot(Xt, Yt, c="k")
# plt.show()
# Choose equidistance locations for initial values for inducing locations.
M = 15 # Number of inducing locations
idx = [int(i) for i in list(np.linspace(0, N, M, endpoint=False))]
Z = X[idx, :].copy() # Initialize inducing locations to the first M inputs in the dataset
# Create SVGP model.
kernel = gpflow.kernels.SquaredExponential()
# whiten=True toggles the fₛ=Lu parameterization.
# whiten=False uses the original parameterization.
m = gpflow.models.SVGP(kernel, gpflow.likelihoods.Gaussian(), Z, num_data=N, whiten=True)
# Enable the model to train the inducing locations.
gpflow.set_trainable(m.inducing_variable, True)
def plot(title=""):
"""
Plot model prediction along with training data.
"""
plt.figure(figsize=(12, 4))
plt.title(title, fontsize=11)
pX = np.linspace(-1, 1, 100)[:, None] # Test locations
pY, pYv = m.predict_y(pX) # Predict Y values at test locations
plt.plot(X, Y, "x", label="Training points", alpha=0.2)
(line,) = plt.plot(pX, pY, lw=2.5, label="Mean of predictive posterior")
col = line.get_color()
plt.fill_between(
pX[:, 0],
(pY - 2 * pYv ** 0.5)[:, 0],
(pY + 2 * pYv ** 0.5)[:, 0],
color=col,
alpha=0.2,
lw=1.5,
)
Z = m.inducing_variable.Z.numpy()
plt.plot(Z, np.zeros_like(Z), "k|", mew=2, label="Inducing locations")
plt.legend(loc="lower right", fontsize=11)
# Set batch size.
minibatch_size = 100
train_dataset = tf.data.Dataset.from_tensor_slices((X, Y)).repeat().shuffle(N)
def run_adam(model, iterations):
"""
Utility function running the Adam optimizer
:param model: GPflow model
:param interations: number of iterations
"""
# Create an Adam Optimizer action
logf = []
train_iter = iter(train_dataset.batch(minibatch_size))
training_loss = model.training_loss_closure(train_iter, compile=True)
optimizer = tf.optimizers.Adam()
@tf.function
def optimization_step():
optimizer.minimize(training_loss, model.trainable_variables)
for step in range(iterations):
optimization_step()
if step % 10 == 0:
elbo = -training_loss().numpy()
print(step, elbo)
logf.append(elbo)
return logf
plot("Predictions before training")
# Specify the number of optimization steps.
maxiter = ci_niter(30000)
# Perform optimization.
logf = run_adam(m, maxiter)
plot("Predictions after training")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment