Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Last active April 17, 2018 01:11
Show Gist options
  • Save sashagusev/ebf675209260880d7568 to your computer and use it in GitHub Desktop.
Save sashagusev/ebf675209260880d7568 to your computer and use it in GitHub Desktop.
Gaussian process regression with fixed effects/basis functions
require(MASS)
## This is work based on Chapter 2 of Rasmussen and Williams, availabler at [ http://www.gaussianprocess.org/ ].
### GP solution and plotting:
gp_solve_basis = function( x.train , y.train , x.pred , h.train, h.pred , kernel , sigma2e = 0 ) {
# Args:
# x.train: matrix of training data
# y.train: matrix of training labels
# x.pred: matrix of prediction data
# h.train: matrix of fixed-effects/basis functions for training data
# h.pred: matrix of fixed-effects for test data
# kernel: function for computing covariance of matrices
# sigma2e: amount of noise in the covariance
solution = list()
# compute training covariance matrix (used to get relationships in training)
k.xx = kernel(x.train,x.train)
# compute covariance between training and testing (used to predict weights into new data), these are redundant
k.x_xp = kernel(x.train,x.pred)
k.xp_x = kernel(x.pred,x.train)
# compute covariance between testing (used to estimate covariance of predictions in new data)
k.xp_xp = kernel(x.pred,x.pred)
# invert the variance component
Vinv = solve(k.xx + sigma2e * diag(1, ncol(k.xx)))
# define b and Binv, the priors on the fixed effects
# invert the fixed effect matrix
b = matrix(0,ncol=1,nrow=ncol(h.train))
B.var = diag(ncol(h.train))
Binv = solve(B.var)
# compute the estimate and variance for the prediction including fixed basis [eq 2.41]
# old mean: solution[["mu"]] = k.xp_x %*% Vinv %*% y.train
# old var: solution[["var"]] = k.xp_xp - k.xp_x %*% Vinv %*% k.x_xp
## we reuse the old (without fixed effects) variance term:
no_fe.var = k.xp_xp - k.xp_x %*% Vinv %*% k.x_xp
## we reuse the old (without fixed effects) mean term:
no_fe.mu = k.xp_x %*% Vinv %*% y.train
## b.bar: solve for the estimated fixed effects
# first paren gets the fixed effects on the V residual, and solves with a ridge
# second paren regresses fixed effects on y on the V residual, and solves with a ridge
b.bar = solve(Binv + t(h.train) %*% Vinv %*% h.train) %*% (t(h.train) %*% Vinv %*% y.train + Binv %*% b)
## R : solve for the fixed effects on the V residual
R = t(h.pred) - t(h.train) %*% Vinv %*% k.x_xp
## new prediction
# now includes a b.bar offset and y residualized on b.bar
# expanded form:
# solution[["mu"]] = h.train %*% b.bar + K.xp_x %*% Vinv %*% (y.train - h %*% b.bar)
# compact form:
# prediction is now the old prediction plus the fixed effects predicted on the V residual
solution[["mu"]] = no_fe.mu + t(R) %*% b.bar
## new covariance
# we take the old variance, and add the uncertainty in the h
solution[["var"]] = no_fe.var + t(R) %*% solve(Binv + t(h) %*% Vinv %*% h) %*% R
solution[["bbar"]] = b.bar
solution[["h"]] = h.pred
solution[["f_h"]] = h.pred %*% b.bar
solution[["x"]] = x.pred
return( solution )
}
gp_plot = function( x , y , GP , n.sample=0 , ... ) {
# Args:
# x: x-values of training data
# y: y-values of training data
# GP: list holding the GP solution
# GP[["x"]]: the GP test points
# GP[["mu"]]: the GP prediction
# GP[["var"]]: the GP prediction covariance
# GP[["h"]]: the fixed basis points
# GP[["f_h"]]: the predicted fixed basis points
# GP[["bbar"]]: the learned \betas
# scale around confidence intervals
# yrng = range( c(y,mu.pred + 2*sqrt(diag(cov.pred)),mu.pred - 2*sqrt(diag(cov.pred))) , na.rm=T )
# scale around training data (useful for multiple plots of same data)
# yrng = range( y , na.rm=T ) * 1.05
# xrng = range( c(x,x.pred) , na.rm=T )
mu.pred = GP[["mu"]]
cov.pred = GP[["var"]]
x.pred = GP[["x"]]
plot(x,y,type="n",xlab="x",ylab="y" , ... )
# plot the confidence interval (diagonal of the predicted cov is the variance of each prediction)
# lines( x.pred , mu.pred + 2*sqrt(diag(cov.pred)) )
# lines( x.pred , mu.pred - 2*sqrt(diag(cov.pred)) )
# plot using pretty polygons
se.pred = 2*sqrt(diag(cov.pred))
poly.x = c(x.pred[1],x.pred,x.pred[length(x.pred)],rev(x.pred))
poly.y = c( (mu.pred - se.pred)[1] , mu.pred + se.pred , (mu.pred - se.pred)[length(x.pred)] , rev(mu.pred - se.pred) )
polygon( poly.x , poly.y , col="#3182bd40" , border="#3182bd50")
# plot samples from the posterior
if ( n.sample > 0 ) {
for (i in 1:n.sample) {
samp.y = mvrnorm(1, mu.pred, cov.pred)
lines( x.pred , samp.y , col="#666666")
}
}
# plot the prediction
lines( x.pred , mu.pred , col="#3182bd50",lwd=2)
# plot the fixed effects (if any)
if ( !is.null( GP[["f_h"]] ) ) {
lines( x.pred , GP[["f_h"]] , col="gray", lwd = 2 )
}
# plot the observations
points(x,y,col="black",pch=19,cex=0.75)
box()
}
### Kernels:
# linear kernel
kfunc_linear = function(x1,x2) {
return( x1 %*% t(x2) )
}
# linear * linear = quadtratic
kfunc_linear2 = function(x1,x2) {
return( kfunc_linear(x1,x2)^2 )
}
# exponential kernel
kfunc_exp = function(x1,x2) {
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1))
for (i in 1:nrow(Sigma)) {
for (j in 1:ncol(Sigma)) {
Sigma[i,j] <- exp(-1*(abs(x1[i]-x2[j])))
}
}
return(Sigma)
}
# browninan kernel
kfunc_brown = function(x1,x2,l=1) {
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1))
for (i in 1:nrow(Sigma)) {
for (j in 1:ncol(Sigma)) {
Sigma[i,j] <- min(x1[i],x2[j])
}
}
return(Sigma)
}
# matern kernel
kfunc_matern = function(x1,x2,l=1) {
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1))
for (i in 1:nrow(Sigma)) {
for (j in 1:ncol(Sigma)) {
Sigma[i,j] <- (1 + abs(x1[i]-x2[j])) * exp(-1*abs(x1[i]-x2[j]))
}
}
return(Sigma)
}
# gaussian kernel
kfunc_gauss = function(x1,x2,l=1) {
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1))
for (i in 1:nrow(Sigma)) {
for (j in 1:ncol(Sigma)) {
Sigma[i,j] <- exp(-0.5*(x1[i]-x2[j])^2)
}
}
return(Sigma)
}
# sin kernel
kfunc_sinc = function(x1,x2,l=1) {
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1))
for (i in 1:nrow(Sigma)) {
for (j in 1:ncol(Sigma)) {
Sigma[i,j] <- sin(abs(x1[i]-x2[j]))/abs(x1[i]-x2[j])
}
}
diag(Sigma) = 1
return(Sigma)
}
### Analyses:
## mean shift
x = matrix( rnorm(10,0,2) ,ncol=1)
x = (x - min(x)) * 4 / (max(x)-min(x)) - 2
h = matrix(1,ncol=1,nrow=nrow(x))
y = 10 * h + sin(x)
yrng = c(0,15)
xrng = c(-5,5)
x.test = matrix(seq(-5,5,len=100),nrow=100,ncol=1)
h.test = matrix(1,ncol=1,nrow=nrow(x.test))
svg("plot_gp_mean_est.svg",width=7,height=6)
par(mfrow=c(1,2),cex.axis=0.75,las=1)
gp = gp_solve_basis( x , y , x.test , matrix(0,ncol=1,nrow=nrow(x)) , matrix(0,ncol=1,nrow=nrow(x.test)) , kfunc_gauss , 0.2)
gp_plot( x , y , gp , ylim=yrng , xlim=xrng , main="Zero mean",xaxs="i")
gp = gp_solve_basis( x , y , x.test , h , h.test , kfunc_gauss , 0.2)
gp_plot( x , y , gp , ylim=yrng , xlim=xrng , main="Inferred mean",xaxs="i")
dev.off()
## mixed effects
svg("plot_gp_basis.svg",width=8,height=8)
par(mfrow=c(2,2),cex.axis=0.75,las=1)
yrng = c(-1,30)
xrng = c(-2,2)
# linear function
x = matrix( rnorm(10,0,2) ,ncol=1)
x = (x - min(x)) * 4 / (max(x)-min(x)) - 2
h = cbind(rep(1,nrow(x)),x)
y = h %*% c(10,-5)
plot( x , y , pch=19 , col="black" , xlab="x" , ylab="y" , main="1: Linear effect\nb = -5", ylim=yrng , cex=0.75 , xlim=xrng)
# non-linear noise
u = 2*x^2
plot( x , u , pch=19 , col="black" , xlab="x" , ylab="y" , main="2: Non-linear effect" , ylim=yrng , cex=0.75 , xlim=xrng)
# combine the two
y = y + u
plot( x , y , pch=19 , col="black" , xlab="x" , ylab="y" , main="3: (1) + (2)" , ylim=yrng , cex=0.75 , xlim=xrng)
# define some testing points
x.test = matrix(seq(-4,4,len=100),nrow=100,ncol=1)
h.test = cbind(rep(1,nrow(x.test)),x.test)
gp = gp_solve_basis( x , y , x.test , h , h.test , kfunc_linear2 , 0.1 )
gp_plot( x , y , gp , main=paste("4: GP with inferred basis\nb-est =",round(gp[["bbar"]][2],1)) , ylim=yrng , xlim=xrng)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment