Last active
March 12, 2017 16:09
-
-
Save mike-lawrence/b56ac2d9450233837d426dd17fe20557 to your computer and use it in GitHub Desktop.
Comparing cov_exp_quad to alternative gp optimizations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#load packages | |
library(tidyverse) | |
library(rstan) | |
rstan_options(auto_write = TRUE) | |
# Make some fake data ---- | |
#set random seed for reproducibility | |
set.seed(1) | |
#nX: number of unique samples on x-axis | |
nX = 100 | |
#define the x-axis sample points | |
# on a grid: | |
x = seq(-5,5,length.out=nX) | |
# not on a grid (udx will not perform well in this case): | |
#x = sort(runif(nX,-5,5)) | |
#define a wiggly function on x | |
f = sin(x)*dnorm(x,5,8) | |
plot(x,f) | |
#add noise | |
y = rnorm(nX,f,.01) | |
#plot | |
p = ggplot()+ | |
geom_point( | |
data = data.frame(x,y) | |
, mapping = aes( | |
x = x | |
, y = y | |
) | |
, alpha = .5 | |
) | |
print(p) | |
#Prep the data ---- | |
#scale for default prior specification | |
x_scaled = (x-min(x))/max(x-min(x)) #0-1 | |
y_scaled = scale(y)[,1] #mean0,sd1 | |
#compute unique pairwise x differences & indices thereof for the pairwise matrix | |
x_unique = sort(unique(x_scaled)) | |
nX = length(x_unique) | |
dx = matrix(NA,nX,nX) | |
for(i in 1:nX){ | |
for(j in 1:nX){ | |
dx[i,j] = round(-((x_unique[i]-x_unique[j])^2),10) | |
} | |
} | |
dx_unique = rev(sort(unique(as.vector(dx)))) | |
DXind = matrix(NA,nX,nX) | |
for(i in 1:nX){ | |
for(j in 1:nX){ | |
DXind[i,j] = which(dx_unique==dx[i,j]) | |
} | |
} | |
#check that we computed the above correctly | |
temp = matrix(NA,nX,nX) | |
for(i in 1:nX){ | |
for(j in 1:nX){ | |
temp[i,j] = dx[i,j]==dx_unique[DXind[i,j]] | |
} | |
} | |
all(temp) | |
#Sample the model that uses cov_exp_quad ---- | |
post_ceq = rstan::stan( | |
file = "gp_ceq.stan" | |
, data=list( | |
nX = length(x) | |
, X = x_scaled | |
, nY = length(y) | |
, Y = y_scaled | |
, Xind = 1:nX | |
) | |
, iter = 2e2 #2e2 takes about 1m when nX=100 | |
, chains = parallel::detectCores() | |
, cores = parallel::detectCores() | |
, seed = 1 | |
) | |
alarm() | |
summary(rowSums(get_elapsed_time(post_ceq))/60) | |
#look at summary for the GP parameters ---- | |
print( | |
post_ceq | |
, pars = c('eta','rho','noise') | |
, probs = c(.025,.5,.975) | |
, digits = 2 | |
, use_cache = FALSE | |
) | |
#plot the CEQ model's results ---- | |
#get posterior samples on latent function f | |
f = extract( | |
post_ceq | |
, pars = 'f' | |
)[[1]] | |
#revert to original scale | |
f = f*sd(y) + mean(y) | |
#compute median & 95% credible intervals | |
f_med = apply(f,2,median) | |
f_loci = apply(f,2,quantile,prob=.025) | |
f_hici = apply(f,2,quantile,prob=.975) | |
#add to plot | |
p = p + geom_ribbon( | |
data = data.frame( | |
x = x | |
, ymin = f_loci | |
, ymax = f_hici | |
) | |
, mapping = aes( | |
x = x | |
, ymin = ymin | |
, ymax = ymax | |
) | |
, alpha = .5 | |
, fill = 'green' | |
) | |
p = p + geom_line( | |
data = data.frame( | |
x = x | |
, y = f_med | |
) | |
, mapping = aes( | |
x = x | |
, y = y | |
) | |
, alpha = .5 | |
, colour = 'green' | |
) | |
print(p) | |
#Sample the model that uses pre-computed distances ---- | |
post_dx = rstan::stan( | |
file = "gp_dx.stan" | |
, data=list( | |
nX = length(x) | |
, DX = dx | |
, nY = length(y) | |
, Y = y_scaled | |
, Xind = 1:nX | |
) | |
, iter = 2e2 #2e2 takes about 1.5m when nX=100 | |
, chains = parallel::detectCores() | |
, cores = parallel::detectCores() | |
, seed = 1 | |
) | |
alarm() | |
summary(rowSums(get_elapsed_time(post_dx))/60) | |
#slower than ceq, so ceq must have some optimizations internal to cov_exp_quad | |
#look at summary for the GP parameters ---- | |
print( | |
post_dx | |
, pars = c('eta','rho','noise') | |
, probs = c(.025,.5,.975) | |
, digits = 2 | |
, use_cache = FALSE | |
) | |
#plot the DX model's results ---- | |
#get posterior samples on latent function f | |
f = extract( | |
post_dx | |
, pars = 'f' | |
)[[1]] | |
#revert to original scale | |
f = f*sd(y) + mean(y) | |
#compute median & 95% credible intervals | |
f_med = apply(f,2,median) | |
f_loci = apply(f,2,quantile,prob=.025) | |
f_hici = apply(f,2,quantile,prob=.975) | |
#add to plot | |
p = p + geom_ribbon( | |
data = data.frame( | |
x = x | |
, ymin = f_loci | |
, ymax = f_hici | |
) | |
, mapping = aes( | |
x = x | |
, ymin = ymin | |
, ymax = ymax | |
) | |
, alpha = .5 | |
, fill = 'blue' | |
) | |
p = p + geom_line( | |
data = data.frame( | |
x = x | |
, y = f_med | |
) | |
, mapping = aes( | |
x = x | |
, y = y | |
) | |
, alpha = .5 | |
, colour = 'blue' | |
) | |
print(p) | |
#looks good! | |
#sample the model that uses the precomputed unique distances---- | |
post_udx = rstan::stan( | |
file = "gp_udx.stan" | |
, data=list( | |
nDX = length(dx_unique) | |
, dx_unique = dx_unique | |
, nX = length(x_unique) | |
, DXind = DXind | |
, nY = length(y) | |
, Y = y_scaled | |
, Xind = match(x,sort(unique(x))) | |
) | |
, iter = 2e2 #2e2 takes about 1m when nX=100 | |
, chains = parallel::detectCores() | |
, cores = parallel::detectCores() | |
, seed = 1 | |
) | |
alarm() | |
summary(rowSums(get_elapsed_time(post_udx))/60) | |
#about as fast ceq (indeed, ~30% faster with 2e3 chains) | |
#look at summary for the GP parameters ---- | |
print( | |
post_udx | |
, pars = c('eta','rho','noise') | |
, probs = c(.025,.5,.975) | |
, digits = 2 | |
) | |
#about the same as the CEQ model's result | |
#plot the UDX model results ---- | |
#get posterior samples on latent function f | |
f = extract( | |
post_udx | |
, pars = 'f' | |
)[[1]] | |
#revert to original scale | |
f = f*sd(y) + mean(y) | |
#compute median & 95% credible intervals | |
f_med = apply(f,2,median) | |
f_loci = apply(f,2,quantile,prob=.025) | |
f_hici = apply(f,2,quantile,prob=.975) | |
#add to plot | |
p = p + geom_ribbon( | |
data = data.frame( | |
x = x | |
, ymin = f_loci | |
, ymax = f_hici | |
) | |
, mapping = aes( | |
x = x | |
, ymin = ymin | |
, ymax = ymax | |
) | |
, alpha = .5 | |
, fill = 'red' | |
) | |
p = p + geom_line( | |
data = data.frame( | |
x = x | |
, y = f_med | |
) | |
, mapping = aes( | |
x = x | |
, y = y | |
) | |
, alpha = .5 | |
, colour = 'red' | |
) | |
print(p) | |
#about the same as the CEQ model's result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data { | |
# nX: num unique x values | |
int<lower=1> nX ; | |
# nY: num observations of Y | |
int<lower=1> nY ; | |
# Y: outcomes | |
vector[nY] Y ; | |
# X: predictors | |
real X[nX] ; | |
# Xind: index of x value for each outcome | |
int Xind[nY] ; | |
} | |
parameters { | |
# eta: GP parameter, scale of latent function f | |
real<lower=0> eta ; | |
# noise: scale of measurement noise of y given f | |
real<lower=0> noise ; | |
# z: dummy variable to speed up computation of GP | |
# (See "Cholesky Factored and Transformed Implementation" from manual) | |
vector[nX] z ; | |
# q: dummy variable used to imply cauchy(0,1) prior on rho | |
# (See "Reparameterizing the Cauchy" from manual) | |
real<lower=0,upper=pi()/2> q ; | |
} | |
transformed parameters{ | |
# f: latent function values | |
vector[nX] f ; | |
# rho: GP parameter, x-axis scale of influence between points (wiggliness) | |
# a.k.a. inverse-lengthscale | |
real<lower=0> rho ; | |
rho = tan(q) ; #implies rho ~ cauchy(0,1), peaked @ zero with heavy tail | |
{ #local environment to avoid saving unnecessary variables across iterations | |
# L: cholesky decomposition of covariance matrix | |
matrix[nX,nX] L ; | |
# covMat: covariance matrix | |
matrix[nX,nX] covMat ; | |
real eta_sq ; | |
eta_sq = eta^2 ; | |
covMat = cov_exp_quad(X, eta, 1/rho) ; | |
for(i in 1:nX){ | |
covMat[i,i] = eta_sq + .001 ; #jitter diagonal for positive definiteness | |
} | |
L = cholesky_decompose(covMat) ; | |
f = L * z ; | |
} | |
} | |
model { | |
# rho has an implicit cauchy(0,1) prior | |
# prior on eta | |
eta ~ weibull(2,1) ; #peaked around .8 | |
# prior on noise | |
noise ~ weibull(2,1) ; #peaked around .8 | |
# z as standard normal | |
z ~ normal(0,1) ; | |
# y as f plus gaussian noise | |
Y ~ normal(f[Xind],noise) ; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data { | |
# nX: num unique x values | |
int<lower=1> nX ; | |
# DX: matrix of pairwise x distances | |
matrix[nX,nX] DX ; | |
# nY: num observations of Y | |
int<lower=1> nY ; | |
# Y: outcomes | |
vector[nY] Y ; | |
# Xind: index of x value for each outcome | |
int Xind[nY] ; | |
} | |
parameters { | |
# eta: GP parameter, scale of latent function f | |
real<lower=0> eta ; | |
# noise: scale of measurement noise of y given f | |
real<lower=0> noise ; | |
# z: dummy variable to speed up computation of GP | |
# (See "Cholesky Factored and Transformed Implementation" from manual) | |
vector[nX] z ; | |
# q: dummy variable used to imply cauchy(0,1) prior on rho | |
# (See "Reparameterizing the Cauchy" from manual) | |
real<lower=0,upper=pi()/2> q ; | |
} | |
transformed parameters{ | |
# f: latent function values | |
vector[nX] f ; | |
# rho: GP parameter, x-axis scale of influence between points (wiggliness) | |
# a.k.a. inverse-lengthscale | |
real<lower=0> rho ; | |
rho = tan(q) ; #implies rho ~ cauchy(0,1), peaked @ zero with heavy tail | |
{ #local environment to avoid saving unnecessary variables across iterations | |
# L: cholesky decomposition of covariance matrix | |
matrix[nX,nX] L ; | |
# covMat: covariance matrix | |
matrix[nX,nX] covMat ; | |
real rho_sq ; | |
real eta_sq ; | |
rho_sq = rho^2 ; | |
eta_sq = eta^2 ; | |
#could do the following: | |
#covMat = exp(DX*rho_sq)*eta_sq ; | |
#then loop to add jitter to the diagonal, | |
#but it's faster to loop & copy: | |
for(i in 1:(nX-1)){ | |
covMat[i,i] = eta_sq + .001 ; #jitter diagonal for positive definiteness | |
for(j in (i+1):nX){ | |
covMat[i,j] = exp(DX[i,j]*rho_sq)*eta_sq ; | |
covMat[j,i] = covMat[i,j] ; | |
} | |
} | |
covMat[nX,nX] = eta_sq + .001 ; #final diagonal entry | |
L = cholesky_decompose(covMat) ; | |
f = L * z ; | |
} | |
} | |
model { | |
# rho has an implicit cauchy(0,1) prior | |
# prior on eta | |
eta ~ weibull(2,1) ; #peaked around .8 | |
# prior on noise | |
noise ~ weibull(2,1) ; #peaked around .8 | |
# z as standard normal | |
z ~ normal(0,1) ; | |
# y as f plus gaussian noise | |
Y ~ normal(f[Xind],noise) ; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data { | |
# nDX: num unique x values | |
int<lower=1> nDX ; | |
# dx: unique dx values | |
vector[nDX] dx_unique ; | |
# nX: num unique x values | |
int<lower=1> nX ; | |
# DXind: matrix of indices into dx_unique | |
int DXind[nX,nX] ; | |
# nY: num observations of Y | |
int<lower=1> nY ; | |
# Y: outcomes | |
vector[nY] Y ; | |
# Xind: index of x value for each outcome | |
int Xind[nY] ; | |
} | |
parameters { | |
# eta: GP parameter, scale of latent function f | |
real<lower=0> eta ; | |
# noise: scale of measurement noise of y given f | |
real<lower=0> noise ; | |
# z: dummy variable to speed up computation of GP | |
# (See "Cholesky Factored and Transformed Implementation" from manual) | |
vector[nX] z ; | |
# q: dummy variable used to imply cauchy(0,1) prior on rho | |
# (See "Reparameterizing the Cauchy" from manual) | |
real<lower=0,upper=pi()/2> q ; | |
} | |
transformed parameters{ | |
# f: latent function values | |
vector[nX] f ; | |
# rho: GP parameter, x-axis scale of influence between points (wiggliness) | |
# a.k.a. inverse-lengthscale | |
real<lower=0> rho ; | |
rho = tan(q) ; #implies rho ~ cauchy(0,1), peaked @ zero with heavy tail | |
{ #local environment to avoid saving unnecessary variables across iterations | |
# L: cholesky decomposition of covariance matrix | |
matrix[nX,nX] L ; | |
# covMat: covariance matrix | |
matrix[nX,nX] covMat ; | |
# covars: unique entries in covariance matrix | |
vector[nDX] covars ; | |
covars = exp(dx_unique*(rho^2))*(eta^2) ; | |
covars[1] = (eta^2) + .001 ; #jitter diagonal for positive definiteness | |
for(i in 1:(nX-1)){ | |
covMat[i,i] = covars[1] ; | |
for(j in (i+1):nX){ | |
covMat[i,j] = covars[DXind[i,j]] ; | |
covMat[j,i] = covMat[i,j] ; | |
} | |
} | |
covMat[nX,nX] = covars[1] ; #final diagonal entry | |
L = cholesky_decompose(covMat) ; | |
f = L * z ; | |
} | |
} | |
model { | |
# rho has an implicit cauchy(0,1) prior | |
# prior on eta | |
eta ~ weibull(2,1) ; #peaked around .8 | |
# prior on noise | |
noise ~ weibull(2,1) ; #peaked around .8 | |
# z as standard normal | |
z ~ normal(0,1) ; | |
# y as f plus noise | |
Y ~ normal(f[Xind],noise) ; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment