Instantly share code, notes, and snippets.

# sashagusev/GP.R Last active Feb 12, 2016

Gaussian Process Regression
 ## An implementation of Gaussian Process regression in R with examples of fitting and plotting with multiple kernels. ## Input: Does not require any input ## Output: Generates multiple SVG plots ## This is work based on Chapter 2 of Rasmussen and Williams, available at [ http://www.gaussianprocess.org/ ]. ## It was extremely helpful to follow along with the implementation at [ http://www.jameskeirstead.ca/blog/gaussian-process-regression-with-r/ with some change in notation ] # this library is necessary for sampling from the Multivariate Normal require(MASS) ## First, let's define some kernels. These are just ways to quantify differences between points. ## This specific set of kernels was taken from GP Summer School lecture on Kernel Design [ http://gpss.cc/gpss15/ ] # linear kernel kfunc_linear = function(x1,x2) { return( x1 %*% t(x2) ) } # exponential kernel kfunc_exp = function(x1,x2) { Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1)) for (i in 1:nrow(Sigma)) { for (j in 1:ncol(Sigma)) { Sigma[i,j] <- exp(-1*(abs(x1[i]-x2[j]))) } } return(Sigma) } # browninan kernel kfunc_brown = function(x1,x2,l=1) { Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1)) for (i in 1:nrow(Sigma)) { for (j in 1:ncol(Sigma)) { Sigma[i,j] <- min(x1[i],x2[j]) } } return(Sigma) } # matern kernel kfunc_matern = function(x1,x2,l=1) { Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1)) for (i in 1:nrow(Sigma)) { for (j in 1:ncol(Sigma)) { Sigma[i,j] <- (1 + abs(x1[i]-x2[j])) * exp(-1*abs(x1[i]-x2[j])) } } return(Sigma) } # gaussian kernel kfunc_gauss = function(x1,x2,l=1) { Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1)) for (i in 1:nrow(Sigma)) { for (j in 1:ncol(Sigma)) { Sigma[i,j] <- exp(-0.5*(x1[i]-x2[j])^2) } } return(Sigma) } # sin kernel kfunc_sinc = function(x1,x2,l=1) { Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1)) for (i in 1:nrow(Sigma)) { for (j in 1:ncol(Sigma)) { Sigma[i,j] <- sin(abs(x1[i]-x2[j]))/abs(x1[i]-x2[j]) } } diag(Sigma) = 1 return(Sigma) } ## Function to get the GP solution gp_solve = function( x.train , y.train , x.pred , kernel , sigma2e = 0 ) { solution = list() # compute training covariance matrix (used to get relationships in training) k.xx = kernel(x.train,x.train) # compute covariance between training and testing (used to predict weights into new data), these are redundant k.x_xp = kernel(x.train,x.pred) k.xp_x = kernel(x.pred,x.train) # compute covariance between testing (used to estimate covariance of predictions in new data) k.xp_xp = kernel(x.pred,x.pred) # compute the prediction without noise = K(xs,x) K(x,x)^{-1} y [eq 2.19] # Vinv = solve(k.xx) # Mean and covariance functions now include noise [eq 2.22-2.24], which only requires adding an identity/ridge term to the variance component Vinv = solve(k.xx + sigma2e * diag(1, ncol(k.xx))) # compute the estimate and variance for the prediction (note, does not depend on y) [eq 2.19 or 2.23] solution[["mu"]] = k.xp_x %*% Vinv %*% y.train solution[["var"]] = k.xp_xp - k.xp_x %*% Vinv %*% k.x_xp return( solution ) } ## function to plot the GP solution gp_plot = function( x , y , x.pred , mu.pred , cov.pred , n.sample=0 , main="" , scale.se=T ) { if ( scale.se ) { # scale around confidence intervals yrng = range( c(y,mu.pred + 2*sqrt(diag(cov.pred)),mu.pred - 2*sqrt(diag(cov.pred))) , na.rm=T ) } else { # scale around training data (useful for multiple plots of same data) yrng = range( y , na.rm=T ) * 1.05 } xrng = range( c(x,x.pred) , na.rm=T ) plot(x,y,type="n",xlab="x",ylab="y", ylim=yrng , xlim=xrng , xaxs="i" , main=main ) # plot the confidence interval (diagonal of the predicted cov is the variance of each prediction) # lines( x.pred , mu.pred + 2*sqrt(diag(cov.pred)) ) # lines( x.pred , mu.pred - 2*sqrt(diag(cov.pred)) ) # plot using pretty polygons se.pred = 2*sqrt(diag(cov.pred)) poly.x = c(x.pred[1],x.pred,x.pred[length(x.pred)],rev(x.pred)) poly.y = c( (mu.pred - se.pred)[1] , mu.pred + se.pred , (mu.pred - se.pred)[length(x.pred)] , rev(mu.pred - se.pred) ) polygon( poly.x , poly.y , col="#3182bd40" , border="#3182bd50") # plot samples from the posterior # (this can be misleading, we don't actually have the function, we have draws from an MvN using the learned mean and covariance of the function) if ( n.sample > 0 ) { for (i in 1:n.sample) { samp.y = mvrnorm(1, mu.pred, cov.pred) lines( x.pred , samp.y , col="#666666") } } # plot the mean lines( x.pred , mu.pred , col="#3182bd50",lwd=2) # plot the observations points(x,y,col="red",pch=19,cex=0.75) } ### Analyses: ## Sampling from the prior # points that we will be making predictions for (i.e. the test set, f^star) x = matrix(seq(-5,5,len=100),ncol=1,nrow=50) # covariance matrix for the inputs sigma = kfunc_gauss( x , x ) # plot some samples from the prior sigma svg("plot_gp_samples.svg") gp_plot( NA , NA , x , rep(0,nrow(x)) , sigma , n.sample = 3 , "Samples from a Gaussian prior") dev.off() ## Fitting the GP without noise (y = k(x)) # generate some sinusoidal training points x = matrix( rnorm(5,0,2) ,ncol=1) y = sin(x) # define some testing points x.test = matrix(seq(-5,5,len=100),nrow=100,ncol=1) # solve the GP (without noise) and plot gp = gp_solve( x , y , x.test , kfunc_gauss ) svg("plot_gp.svg") gp_plot( x , y , x.test , gp[["mu"]] , gp[["var"]] , main="Gaussian Process regression" ) box() dev.off() ## Fitting the GP with noise (y = k(x) + e) # assuming noise is additive and iid (Note: textbook calls it sigma.n) # Let's do this with multiple kernels, # List of kernels and kernel names kernels = c(kfunc_linear,kfunc_exp,kfunc_brown,kfunc_matern,kfunc_sinc,kfunc_gauss) kernel.names = c("Linear","Exponential","Browninan","Matern","sinc","Gaussian") # Plotting a generating sin function with multiple kernels sigma2e = 0.001 y = sin(x) y.noisy = y + rnorm(nrow(x),0,sqrt(sigma2e)) svg("plot_gp_multi_sin.svg",width=8,height=5) par(mfrow=c(2,3)) for ( k in 1:length(kernels) ) { gp = gp_solve( x , y.noisy , x.test , kernels[[k]] , sigma2e ) gp_plot( x , y.noisy , x.test , gp[["mu"]] , gp[["var"]] , main=kernel.names[k] , scale.se=F ) } dev.off() # Plotting a generating linear function with multiple kernels sigma2e = 0.1 y.noisy = x + rnorm(nrow(x),0,sqrt(sigma2e)) svg("plot_gp_multi_linear.svg",width=8,height=5) par(mfrow=c(2,3)) kernels = c(kfunc_linear,kfunc_exp,kfunc_brown,kfunc_matern,kfunc_sinc,kfunc_gauss) kernel.names = c("Linear","Exponential","Browninan","Matern","sinc","Gaussian") for ( k in 1:length(kernels) ) { gp = gp_solve( x , y.noisy , x.test , kernels[[k]] , sigma2e ) gp_plot( x , y.noisy , x.test , gp[["mu"]] , gp[["var"]] , main=kernel.names[k] , scale.se=F ) } dev.off()