Last active
April 17, 2018 01:11
-
-
Save sashagusev/ebf675209260880d7568 to your computer and use it in GitHub Desktop.
Gaussian process regression with fixed effects/basis functions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(MASS) | |
## This is work based on Chapter 2 of Rasmussen and Williams, availabler at [ http://www.gaussianprocess.org/ ]. | |
### GP solution and plotting: | |
gp_solve_basis = function( x.train , y.train , x.pred , h.train, h.pred , kernel , sigma2e = 0 ) { | |
# Args: | |
# x.train: matrix of training data | |
# y.train: matrix of training labels | |
# x.pred: matrix of prediction data | |
# h.train: matrix of fixed-effects/basis functions for training data | |
# h.pred: matrix of fixed-effects for test data | |
# kernel: function for computing covariance of matrices | |
# sigma2e: amount of noise in the covariance | |
solution = list() | |
# compute training covariance matrix (used to get relationships in training) | |
k.xx = kernel(x.train,x.train) | |
# compute covariance between training and testing (used to predict weights into new data), these are redundant | |
k.x_xp = kernel(x.train,x.pred) | |
k.xp_x = kernel(x.pred,x.train) | |
# compute covariance between testing (used to estimate covariance of predictions in new data) | |
k.xp_xp = kernel(x.pred,x.pred) | |
# invert the variance component | |
Vinv = solve(k.xx + sigma2e * diag(1, ncol(k.xx))) | |
# define b and Binv, the priors on the fixed effects | |
# invert the fixed effect matrix | |
b = matrix(0,ncol=1,nrow=ncol(h.train)) | |
B.var = diag(ncol(h.train)) | |
Binv = solve(B.var) | |
# compute the estimate and variance for the prediction including fixed basis [eq 2.41] | |
# old mean: solution[["mu"]] = k.xp_x %*% Vinv %*% y.train | |
# old var: solution[["var"]] = k.xp_xp - k.xp_x %*% Vinv %*% k.x_xp | |
## we reuse the old (without fixed effects) variance term: | |
no_fe.var = k.xp_xp - k.xp_x %*% Vinv %*% k.x_xp | |
## we reuse the old (without fixed effects) mean term: | |
no_fe.mu = k.xp_x %*% Vinv %*% y.train | |
## b.bar: solve for the estimated fixed effects | |
# first paren gets the fixed effects on the V residual, and solves with a ridge | |
# second paren regresses fixed effects on y on the V residual, and solves with a ridge | |
b.bar = solve(Binv + t(h.train) %*% Vinv %*% h.train) %*% (t(h.train) %*% Vinv %*% y.train + Binv %*% b) | |
## R : solve for the fixed effects on the V residual | |
R = t(h.pred) - t(h.train) %*% Vinv %*% k.x_xp | |
## new prediction | |
# now includes a b.bar offset and y residualized on b.bar | |
# expanded form: | |
# solution[["mu"]] = h.train %*% b.bar + K.xp_x %*% Vinv %*% (y.train - h %*% b.bar) | |
# compact form: | |
# prediction is now the old prediction plus the fixed effects predicted on the V residual | |
solution[["mu"]] = no_fe.mu + t(R) %*% b.bar | |
## new covariance | |
# we take the old variance, and add the uncertainty in the h | |
solution[["var"]] = no_fe.var + t(R) %*% solve(Binv + t(h) %*% Vinv %*% h) %*% R | |
solution[["bbar"]] = b.bar | |
solution[["h"]] = h.pred | |
solution[["f_h"]] = h.pred %*% b.bar | |
solution[["x"]] = x.pred | |
return( solution ) | |
} | |
gp_plot = function( x , y , GP , n.sample=0 , ... ) { | |
# Args: | |
# x: x-values of training data | |
# y: y-values of training data | |
# GP: list holding the GP solution | |
# GP[["x"]]: the GP test points | |
# GP[["mu"]]: the GP prediction | |
# GP[["var"]]: the GP prediction covariance | |
# GP[["h"]]: the fixed basis points | |
# GP[["f_h"]]: the predicted fixed basis points | |
# GP[["bbar"]]: the learned \betas | |
# scale around confidence intervals | |
# yrng = range( c(y,mu.pred + 2*sqrt(diag(cov.pred)),mu.pred - 2*sqrt(diag(cov.pred))) , na.rm=T ) | |
# scale around training data (useful for multiple plots of same data) | |
# yrng = range( y , na.rm=T ) * 1.05 | |
# xrng = range( c(x,x.pred) , na.rm=T ) | |
mu.pred = GP[["mu"]] | |
cov.pred = GP[["var"]] | |
x.pred = GP[["x"]] | |
plot(x,y,type="n",xlab="x",ylab="y" , ... ) | |
# plot the confidence interval (diagonal of the predicted cov is the variance of each prediction) | |
# lines( x.pred , mu.pred + 2*sqrt(diag(cov.pred)) ) | |
# lines( x.pred , mu.pred - 2*sqrt(diag(cov.pred)) ) | |
# plot using pretty polygons | |
se.pred = 2*sqrt(diag(cov.pred)) | |
poly.x = c(x.pred[1],x.pred,x.pred[length(x.pred)],rev(x.pred)) | |
poly.y = c( (mu.pred - se.pred)[1] , mu.pred + se.pred , (mu.pred - se.pred)[length(x.pred)] , rev(mu.pred - se.pred) ) | |
polygon( poly.x , poly.y , col="#3182bd40" , border="#3182bd50") | |
# plot samples from the posterior | |
if ( n.sample > 0 ) { | |
for (i in 1:n.sample) { | |
samp.y = mvrnorm(1, mu.pred, cov.pred) | |
lines( x.pred , samp.y , col="#666666") | |
} | |
} | |
# plot the prediction | |
lines( x.pred , mu.pred , col="#3182bd50",lwd=2) | |
# plot the fixed effects (if any) | |
if ( !is.null( GP[["f_h"]] ) ) { | |
lines( x.pred , GP[["f_h"]] , col="gray", lwd = 2 ) | |
} | |
# plot the observations | |
points(x,y,col="black",pch=19,cex=0.75) | |
box() | |
} | |
### Kernels: | |
# linear kernel | |
kfunc_linear = function(x1,x2) { | |
return( x1 %*% t(x2) ) | |
} | |
# linear * linear = quadtratic | |
kfunc_linear2 = function(x1,x2) { | |
return( kfunc_linear(x1,x2)^2 ) | |
} | |
# exponential kernel | |
kfunc_exp = function(x1,x2) { | |
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1)) | |
for (i in 1:nrow(Sigma)) { | |
for (j in 1:ncol(Sigma)) { | |
Sigma[i,j] <- exp(-1*(abs(x1[i]-x2[j]))) | |
} | |
} | |
return(Sigma) | |
} | |
# browninan kernel | |
kfunc_brown = function(x1,x2,l=1) { | |
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1)) | |
for (i in 1:nrow(Sigma)) { | |
for (j in 1:ncol(Sigma)) { | |
Sigma[i,j] <- min(x1[i],x2[j]) | |
} | |
} | |
return(Sigma) | |
} | |
# matern kernel | |
kfunc_matern = function(x1,x2,l=1) { | |
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1)) | |
for (i in 1:nrow(Sigma)) { | |
for (j in 1:ncol(Sigma)) { | |
Sigma[i,j] <- (1 + abs(x1[i]-x2[j])) * exp(-1*abs(x1[i]-x2[j])) | |
} | |
} | |
return(Sigma) | |
} | |
# gaussian kernel | |
kfunc_gauss = function(x1,x2,l=1) { | |
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1)) | |
for (i in 1:nrow(Sigma)) { | |
for (j in 1:ncol(Sigma)) { | |
Sigma[i,j] <- exp(-0.5*(x1[i]-x2[j])^2) | |
} | |
} | |
return(Sigma) | |
} | |
# sin kernel | |
kfunc_sinc = function(x1,x2,l=1) { | |
Sigma <- matrix(rep(0, length(x1)*length(x2)), nrow=length(x1)) | |
for (i in 1:nrow(Sigma)) { | |
for (j in 1:ncol(Sigma)) { | |
Sigma[i,j] <- sin(abs(x1[i]-x2[j]))/abs(x1[i]-x2[j]) | |
} | |
} | |
diag(Sigma) = 1 | |
return(Sigma) | |
} | |
### Analyses: | |
## mean shift | |
x = matrix( rnorm(10,0,2) ,ncol=1) | |
x = (x - min(x)) * 4 / (max(x)-min(x)) - 2 | |
h = matrix(1,ncol=1,nrow=nrow(x)) | |
y = 10 * h + sin(x) | |
yrng = c(0,15) | |
xrng = c(-5,5) | |
x.test = matrix(seq(-5,5,len=100),nrow=100,ncol=1) | |
h.test = matrix(1,ncol=1,nrow=nrow(x.test)) | |
svg("plot_gp_mean_est.svg",width=7,height=6) | |
par(mfrow=c(1,2),cex.axis=0.75,las=1) | |
gp = gp_solve_basis( x , y , x.test , matrix(0,ncol=1,nrow=nrow(x)) , matrix(0,ncol=1,nrow=nrow(x.test)) , kfunc_gauss , 0.2) | |
gp_plot( x , y , gp , ylim=yrng , xlim=xrng , main="Zero mean",xaxs="i") | |
gp = gp_solve_basis( x , y , x.test , h , h.test , kfunc_gauss , 0.2) | |
gp_plot( x , y , gp , ylim=yrng , xlim=xrng , main="Inferred mean",xaxs="i") | |
dev.off() | |
## mixed effects | |
svg("plot_gp_basis.svg",width=8,height=8) | |
par(mfrow=c(2,2),cex.axis=0.75,las=1) | |
yrng = c(-1,30) | |
xrng = c(-2,2) | |
# linear function | |
x = matrix( rnorm(10,0,2) ,ncol=1) | |
x = (x - min(x)) * 4 / (max(x)-min(x)) - 2 | |
h = cbind(rep(1,nrow(x)),x) | |
y = h %*% c(10,-5) | |
plot( x , y , pch=19 , col="black" , xlab="x" , ylab="y" , main="1: Linear effect\nb = -5", ylim=yrng , cex=0.75 , xlim=xrng) | |
# non-linear noise | |
u = 2*x^2 | |
plot( x , u , pch=19 , col="black" , xlab="x" , ylab="y" , main="2: Non-linear effect" , ylim=yrng , cex=0.75 , xlim=xrng) | |
# combine the two | |
y = y + u | |
plot( x , y , pch=19 , col="black" , xlab="x" , ylab="y" , main="3: (1) + (2)" , ylim=yrng , cex=0.75 , xlim=xrng) | |
# define some testing points | |
x.test = matrix(seq(-4,4,len=100),nrow=100,ncol=1) | |
h.test = cbind(rep(1,nrow(x.test)),x.test) | |
gp = gp_solve_basis( x , y , x.test , h , h.test , kfunc_linear2 , 0.1 ) | |
gp_plot( x , y , gp , main=paste("4: GP with inferred basis\nb-est =",round(gp[["bbar"]][2],1)) , ylim=yrng , xlim=xrng) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment