Create a gist now

Instantly share code, notes, and snippets.

@sashagusev /GP.R
Last active Feb 12, 2016

What would you like to do?
Gaussian Process Regression
## An implementation of Gaussian Process regression in R with examples of fitting and plotting with multiple kernels.
## Input: Does not require any input
## Output: Generates multiple SVG plots
## This is work based on Chapter 2 of Rasmussen and Williams, available at [ http://www.gaussianprocess.org/ ].
## It was extremely helpful to follow along with the implementation at [ http://www.jameskeirstead.ca/blog/gaussian-process-regression-with-r/ with some change in notation ]
# this library is necessary for sampling from the Multivariate Normal
require(MASS)
## First, let's define some kernels. These are just ways to quantify differences between points.
## This specific set of kernels was taken from GP Summer School lecture on Kernel Design [ http://gpss.cc/gpss15/ ]
# linear kernel
kfunc_linear = function(x1,x2) {
return( x1 %*% t(x2) )
}
# exponential kernel
kfunc_exp = function(x1,x2) {
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1))
for (i in 1:nrow(Sigma)) {
for (j in 1:ncol(Sigma)) {
Sigma[i,j] <- exp(-1*(abs(x1[i]-x2[j])))
}
}
return(Sigma)
}
# browninan kernel
kfunc_brown = function(x1,x2,l=1) {
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1))
for (i in 1:nrow(Sigma)) {
for (j in 1:ncol(Sigma)) {
Sigma[i,j] <- min(x1[i],x2[j])
}
}
return(Sigma)
}
# matern kernel
kfunc_matern = function(x1,x2,l=1) {
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1))
for (i in 1:nrow(Sigma)) {
for (j in 1:ncol(Sigma)) {
Sigma[i,j] <- (1 + abs(x1[i]-x2[j])) * exp(-1*abs(x1[i]-x2[j]))
}
}
return(Sigma)
}
# gaussian kernel
kfunc_gauss = function(x1,x2,l=1) {
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1))
for (i in 1:nrow(Sigma)) {
for (j in 1:ncol(Sigma)) {
Sigma[i,j] <- exp(-0.5*(x1[i]-x2[j])^2)
}
}
return(Sigma)
}
# sin kernel
kfunc_sinc = function(x1,x2,l=1) {
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1))
for (i in 1:nrow(Sigma)) {
for (j in 1:ncol(Sigma)) {
Sigma[i,j] <- sin(abs(x1[i]-x2[j]))/abs(x1[i]-x2[j])
}
}
diag(Sigma) = 1
return(Sigma)
}
## Function to get the GP solution
gp_solve = function( x.train , y.train , x.pred , kernel , sigma2e = 0 ) {
solution = list()
# compute training covariance matrix (used to get relationships in training)
k.xx = kernel(x.train,x.train)
# compute covariance between training and testing (used to predict weights into new data), these are redundant
k.x_xp = kernel(x.train,x.pred)
k.xp_x = kernel(x.pred,x.train)
# compute covariance between testing (used to estimate covariance of predictions in new data)
k.xp_xp = kernel(x.pred,x.pred)
# compute the prediction without noise = K(xs,x) K(x,x)^{-1} y [eq 2.19]
# Vinv = solve(k.xx)
# Mean and covariance functions now include noise [eq 2.22-2.24], which only requires adding an identity/ridge term to the variance component
Vinv = solve(k.xx + sigma2e * diag(1, ncol(k.xx)))
# compute the estimate and variance for the prediction (note, does not depend on y) [eq 2.19 or 2.23]
solution[["mu"]] = k.xp_x %*% Vinv %*% y.train
solution[["var"]] = k.xp_xp - k.xp_x %*% Vinv %*% k.x_xp
return( solution )
}
## function to plot the GP solution
gp_plot = function( x , y , x.pred , mu.pred , cov.pred , n.sample=0 , main="" , scale.se=T ) {
if ( scale.se ) {
# scale around confidence intervals
yrng = range( c(y,mu.pred + 2*sqrt(diag(cov.pred)),mu.pred - 2*sqrt(diag(cov.pred))) , na.rm=T )
} else {
# scale around training data (useful for multiple plots of same data)
yrng = range( y , na.rm=T ) * 1.05
}
xrng = range( c(x,x.pred) , na.rm=T )
plot(x,y,type="n",xlab="x",ylab="y", ylim=yrng , xlim=xrng , xaxs="i" , main=main )
# plot the confidence interval (diagonal of the predicted cov is the variance of each prediction)
# lines( x.pred , mu.pred + 2*sqrt(diag(cov.pred)) )
# lines( x.pred , mu.pred - 2*sqrt(diag(cov.pred)) )
# plot using pretty polygons
se.pred = 2*sqrt(diag(cov.pred))
poly.x = c(x.pred[1],x.pred,x.pred[length(x.pred)],rev(x.pred))
poly.y = c( (mu.pred - se.pred)[1] , mu.pred + se.pred , (mu.pred - se.pred)[length(x.pred)] , rev(mu.pred - se.pred) )
polygon( poly.x , poly.y , col="#3182bd40" , border="#3182bd50")
# plot samples from the posterior
# (this can be misleading, we don't actually have the function, we have draws from an MvN using the learned mean and covariance of the function)
if ( n.sample > 0 ) {
for (i in 1:n.sample) {
samp.y = mvrnorm(1, mu.pred, cov.pred)
lines( x.pred , samp.y , col="#666666")
}
}
# plot the mean
lines( x.pred , mu.pred , col="#3182bd50",lwd=2)
# plot the observations
points(x,y,col="red",pch=19,cex=0.75)
}
### Analyses:
## Sampling from the prior
# points that we will be making predictions for (i.e. the test set, f^star)
x = matrix(seq(-5,5,len=100),ncol=1,nrow=50)
# covariance matrix for the inputs
sigma = kfunc_gauss( x , x )
# plot some samples from the prior sigma
svg("plot_gp_samples.svg")
gp_plot( NA , NA , x , rep(0,nrow(x)) , sigma , n.sample = 3 , "Samples from a Gaussian prior")
dev.off()
## Fitting the GP without noise (y = k(x))
# generate some sinusoidal training points
x = matrix( rnorm(5,0,2) ,ncol=1)
y = sin(x)
# define some testing points
x.test = matrix(seq(-5,5,len=100),nrow=100,ncol=1)
# solve the GP (without noise) and plot
gp = gp_solve( x , y , x.test , kfunc_gauss )
svg("plot_gp.svg")
gp_plot( x , y , x.test , gp[["mu"]] , gp[["var"]] , main="Gaussian Process regression" )
box()
dev.off()
## Fitting the GP with noise (y = k(x) + e)
# assuming noise is additive and iid (Note: textbook calls it sigma.n)
# Let's do this with multiple kernels,
# List of kernels and kernel names
kernels = c(kfunc_linear,kfunc_exp,kfunc_brown,kfunc_matern,kfunc_sinc,kfunc_gauss)
kernel.names = c("Linear","Exponential","Browninan","Matern","sinc","Gaussian")
# Plotting a generating sin function with multiple kernels
sigma2e = 0.001
y = sin(x)
y.noisy = y + rnorm(nrow(x),0,sqrt(sigma2e))
svg("plot_gp_multi_sin.svg",width=8,height=5)
par(mfrow=c(2,3))
for ( k in 1:length(kernels) ) {
gp = gp_solve( x , y.noisy , x.test , kernels[[k]] , sigma2e )
gp_plot( x , y.noisy , x.test , gp[["mu"]] , gp[["var"]] , main=kernel.names[k] , scale.se=F )
}
dev.off()
# Plotting a generating linear function with multiple kernels
sigma2e = 0.1
y.noisy = x + rnorm(nrow(x),0,sqrt(sigma2e))
svg("plot_gp_multi_linear.svg",width=8,height=5)
par(mfrow=c(2,3))
kernels = c(kfunc_linear,kfunc_exp,kfunc_brown,kfunc_matern,kfunc_sinc,kfunc_gauss)
kernel.names = c("Linear","Exponential","Browninan","Matern","sinc","Gaussian")
for ( k in 1:length(kernels) ) {
gp = gp_solve( x , y.noisy , x.test , kernels[[k]] , sigma2e )
gp_plot( x , y.noisy , x.test , gp[["mu"]] , gp[["var"]] , main=kernel.names[k] , scale.se=F )
}
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment