mike-lawrence/compare_ceq_dx_udx.R

## compare_ceq_dx_udx.R
#load packages
library(tidyverse)
library(rstan)
rstan_options(auto_write = TRUE)

# Make some fake data ----

#set random seed for reproducibility
set.seed(1)

#nX: number of unique samples on x-axis
nX = 100

#define the x-axis sample points
#  on a grid:
x = seq(-5,5,length.out=nX)
# not on a grid (udx will not perform well in this case):
#x = sort(runif(nX,-5,5))

#define a wiggly function on x
f = sin(x)*dnorm(x,5,8)
plot(x,f)

#add noise
y = rnorm(nX,f,.01)

#plot
p = ggplot()+
	geom_point(
		data = data.frame(x,y)
		, mapping = aes(
			x = x
			, y = y
		)
		, alpha = .5
	)
print(p)


#Prep the data ----

#scale for default prior specification
x_scaled = (x-min(x))/max(x-min(x)) #0-1
y_scaled = scale(y)[,1] #mean0,sd1

#compute unique pairwise x differences & indices thereof for the pairwise matrix
x_unique = sort(unique(x_scaled))
nX = length(x_unique)
dx = matrix(NA,nX,nX)
for(i in 1:nX){
	for(j in 1:nX){
		dx[i,j] = round(-((x_unique[i]-x_unique[j])^2),10)
	}
}
dx_unique = rev(sort(unique(as.vector(dx))))
DXind = matrix(NA,nX,nX)
for(i in 1:nX){
	for(j in 1:nX){
		DXind[i,j] = which(dx_unique==dx[i,j])
	}
}

#check that we computed the above correctly
temp = matrix(NA,nX,nX)
for(i in 1:nX){
	for(j in 1:nX){
		temp[i,j] = dx[i,j]==dx_unique[DXind[i,j]]
	}
}
all(temp)


#Sample the model that uses cov_exp_quad ----
post_ceq = rstan::stan(
	file = "gp_ceq.stan"
	, data=list(
		nX = length(x)
		, X = x_scaled
		, nY = length(y)
		, Y = y_scaled
		, Xind = 1:nX
	)
	, iter = 2e2 #2e2 takes about 1m when nX=100
	, chains = parallel::detectCores()
	, cores = parallel::detectCores()
	, seed = 1
)
alarm()
summary(rowSums(get_elapsed_time(post_ceq))/60)

#look at summary for the GP parameters ----
print(
	post_ceq
	, pars = c('eta','rho','noise')
	, probs = c(.025,.5,.975)
	, digits = 2
	, use_cache = FALSE
)

#plot the CEQ model's results ----

#get posterior samples on latent function f
f = extract(
	post_ceq
	, pars = 'f'
)[[1]]

#revert to original scale
f = f*sd(y) + mean(y)

#compute median & 95% credible intervals
f_med = apply(f,2,median)
f_loci = apply(f,2,quantile,prob=.025)
f_hici = apply(f,2,quantile,prob=.975)

#add to plot
p = p + geom_ribbon(
	data = data.frame(
		x = x
		, ymin = f_loci
		, ymax = f_hici
	)
	, mapping = aes(
		x = x
		, ymin = ymin
		, ymax = ymax
	)
	, alpha = .5
	, fill = 'green'
)
p = p + geom_line(
	data = data.frame(
		x = x
		, y = f_med
	)
	, mapping = aes(
		x = x
		, y = y
	)
	, alpha = .5
	, colour = 'green'
)
print(p)


#Sample the model that uses pre-computed distances ----
post_dx = rstan::stan(
	file = "gp_dx.stan"
	, data=list(
		nX = length(x)
		, DX = dx
		, nY = length(y)
		, Y = y_scaled
		, Xind = 1:nX
	)
	, iter = 2e2 #2e2 takes about 1.5m when nX=100
	, chains = parallel::detectCores()
	, cores = parallel::detectCores()
	, seed = 1
)
alarm()
summary(rowSums(get_elapsed_time(post_dx))/60)
#slower than ceq, so ceq must have some optimizations internal to cov_exp_quad


#look at summary for the GP parameters ----
print(
	post_dx
	, pars = c('eta','rho','noise')
	, probs = c(.025,.5,.975)
	, digits = 2
	, use_cache = FALSE
)

#plot the DX model's results ----

#get posterior samples on latent function f
f = extract(
	post_dx
	, pars = 'f'
)[[1]]

#revert to original scale
f = f*sd(y) + mean(y)

#compute median & 95% credible intervals
f_med = apply(f,2,median)
f_loci = apply(f,2,quantile,prob=.025)
f_hici = apply(f,2,quantile,prob=.975)

#add to plot
p = p + geom_ribbon(
	data = data.frame(
		x = x
		, ymin = f_loci
		, ymax = f_hici
	)
	, mapping = aes(
		x = x
		, ymin = ymin
		, ymax = ymax
	)
	, alpha = .5
	, fill = 'blue'
)
p = p + geom_line(
	data = data.frame(
		x = x
		, y = f_med
	)
	, mapping = aes(
		x = x
		, y = y
	)
	, alpha = .5
	, colour = 'blue'
)
print(p)

#looks good!

#sample the model that uses the precomputed unique distances----
post_udx = rstan::stan(
	file = "gp_udx.stan"
	, data=list(
		nDX = length(dx_unique)
		, dx_unique = dx_unique
		, nX = length(x_unique)
		, DXind = DXind
		, nY = length(y)
		, Y = y_scaled
		, Xind = match(x,sort(unique(x)))
	)
	, iter = 2e2 #2e2 takes about 1m when nX=100
	, chains = parallel::detectCores()
	, cores = parallel::detectCores()
	, seed = 1
)
alarm()
summary(rowSums(get_elapsed_time(post_udx))/60)
#about as fast ceq (indeed, ~30% faster with 2e3 chains)


#look at summary for the GP parameters ----
print(
	post_udx
	, pars = c('eta','rho','noise')
	, probs = c(.025,.5,.975)
	, digits = 2
)
#about the same as the CEQ model's result

#plot the UDX model results ----

#get posterior samples on latent function f
f = extract(
	post_udx
	, pars = 'f'
)[[1]]

#revert to original scale
f = f*sd(y) + mean(y)

#compute median & 95% credible intervals
f_med = apply(f,2,median)
f_loci = apply(f,2,quantile,prob=.025)
f_hici = apply(f,2,quantile,prob=.975)

#add to plot
p = p + geom_ribbon(
	data = data.frame(
		x = x
		, ymin = f_loci
		, ymax = f_hici
	)
	, mapping = aes(
		x = x
		, ymin = ymin
		, ymax = ymax
	)
	, alpha = .5
	, fill = 'red'
)
p = p + geom_line(
	data = data.frame(
		x = x
		, y = f_med
	)
	, mapping = aes(
		x = x
		, y = y
	)
	, alpha = .5
	, colour = 'red'
)
print(p)
#about the same as the CEQ model's result

## gp_ceq.stan
data {
	# nX: num unique x values
	int<lower=1> nX ;
	# nY: num observations of Y
	int<lower=1> nY ;
	# Y: outcomes
	vector[nY] Y ;
	# X: predictors
	real X[nX] ;
	# Xind: index of x value for each outcome
	int Xind[nY] ;
}
parameters {
	# eta: GP parameter, scale of latent function f
	real<lower=0> eta ;
	# noise: scale of measurement noise of y given f
	real<lower=0> noise ;
	# z: dummy variable to speed up computation of GP
	#   (See "Cholesky Factored and Transformed Implementation" from manual)
	vector[nX] z ;
	# q: dummy variable used to imply cauchy(0,1) prior on rho
	#   (See "Reparameterizing the Cauchy" from manual)
	real<lower=0,upper=pi()/2> q ;
}
transformed parameters{
	# f: latent function values
	vector[nX] f ;
	# rho: GP parameter, x-axis scale of influence between points (wiggliness)
	#       a.k.a. inverse-lengthscale
	real<lower=0> rho ;
	rho = tan(q) ; #implies rho ~ cauchy(0,1), peaked @ zero with heavy tail
	{ #local environment to avoid saving unnecessary variables across iterations
		# L: cholesky decomposition of covariance matrix
		matrix[nX,nX] L ;
		# covMat: covariance matrix
		matrix[nX,nX] covMat ;
		real eta_sq ;
		eta_sq = eta^2 ;
		covMat = cov_exp_quad(X, eta, 1/rho) ;
		for(i in 1:nX){
			covMat[i,i] = eta_sq + .001 ; #jitter diagonal for positive definiteness
		}
		L = cholesky_decompose(covMat) ;
		f = L * z ;
	}
}
model {
	# rho has an implicit cauchy(0,1) prior
	# prior on eta
	eta ~ weibull(2,1) ; #peaked around .8
	# prior on noise
	noise ~ weibull(2,1) ; #peaked around .8
	# z as standard normal
	z ~ normal(0,1) ;
	# y as f plus gaussian noise
	Y ~ normal(f[Xind],noise) ;
}

## gp_dx.stan
data {
	# nX: num unique x values
	int<lower=1> nX ;
	# DX: matrix of pairwise x distances
	matrix[nX,nX] DX ;
	# nY: num observations of Y
	int<lower=1> nY ;
	# Y: outcomes
	vector[nY] Y ;
	# Xind: index of x value for each outcome
	int Xind[nY] ;
}
parameters {
	# eta: GP parameter, scale of latent function f
	real<lower=0> eta ;
	# noise: scale of measurement noise of y given f
	real<lower=0> noise ;
	# z: dummy variable to speed up computation of GP
	#   (See "Cholesky Factored and Transformed Implementation" from manual)
	vector[nX] z ;
	# q: dummy variable used to imply cauchy(0,1) prior on rho
	#   (See "Reparameterizing the Cauchy" from manual)
	real<lower=0,upper=pi()/2> q ;
}
transformed parameters{
	# f: latent function values
	vector[nX] f ;
	# rho: GP parameter, x-axis scale of influence between points (wiggliness)
	#       a.k.a. inverse-lengthscale
	real<lower=0> rho ;
	rho = tan(q) ; #implies rho ~ cauchy(0,1), peaked @ zero with heavy tail
	{ #local environment to avoid saving unnecessary variables across iterations
		# L: cholesky decomposition of covariance matrix
		matrix[nX,nX] L ;
		# covMat: covariance matrix
		matrix[nX,nX] covMat ;
		real rho_sq ;
		real eta_sq ;
		rho_sq = rho^2 ;
		eta_sq = eta^2 ;
		#could do the following:
		#covMat = exp(DX*rho_sq)*eta_sq ;
		#then loop to add jitter to the diagonal,
		#but it's faster to loop & copy:
		for(i in 1:(nX-1)){
			covMat[i,i] = eta_sq + .001 ; #jitter diagonal for positive definiteness
			for(j in (i+1):nX){
				covMat[i,j] = exp(DX[i,j]*rho_sq)*eta_sq ;
				covMat[j,i] = covMat[i,j] ;
			}
		}
		covMat[nX,nX] = eta_sq + .001 ; #final diagonal entry
		L = cholesky_decompose(covMat) ;
		f = L * z ;
	}
}
model {
	# rho has an implicit cauchy(0,1) prior
	# prior on eta
	eta ~ weibull(2,1) ; #peaked around .8
	# prior on noise
	noise ~ weibull(2,1) ; #peaked around .8
	# z as standard normal
	z ~ normal(0,1) ;
	# y as f plus gaussian noise
	Y ~ normal(f[Xind],noise) ;
}

## gp_udx.stan
data {
	# nDX: num unique x values
	int<lower=1> nDX ;
	# dx: unique dx values
	vector[nDX] dx_unique ;
	# nX: num unique x values
	int<lower=1> nX ;
	# DXind: matrix of indices into dx_unique
	int DXind[nX,nX] ;
	# nY: num observations of Y
	int<lower=1> nY ;
	# Y: outcomes
	vector[nY] Y ;
	# Xind: index of x value for each outcome
	int Xind[nY] ;
}
parameters {
	# eta: GP parameter, scale of latent function f
	real<lower=0> eta ;
	# noise: scale of measurement noise of y given f
	real<lower=0> noise ;
	# z: dummy variable to speed up computation of GP
	#   (See "Cholesky Factored and Transformed Implementation" from manual)
	vector[nX] z ;
	# q: dummy variable used to imply cauchy(0,1) prior on rho
	#   (See "Reparameterizing the Cauchy" from manual)
	real<lower=0,upper=pi()/2> q ;
}
transformed parameters{
	# f: latent function values
	vector[nX] f ;
	# rho: GP parameter, x-axis scale of influence between points (wiggliness)
	#       a.k.a. inverse-lengthscale
	real<lower=0> rho ;
	rho = tan(q) ; #implies rho ~ cauchy(0,1), peaked @ zero with heavy tail
	{ #local environment to avoid saving unnecessary variables across iterations
		# L: cholesky decomposition of covariance matrix
		matrix[nX,nX] L ;
		# covMat: covariance matrix
		matrix[nX,nX] covMat ;
		# covars: unique entries in covariance matrix
		vector[nDX] covars ;
		covars = exp(dx_unique*(rho^2))*(eta^2) ;
		covars[1] = (eta^2) + .001 ; #jitter diagonal for positive definiteness
		for(i in 1:(nX-1)){
			covMat[i,i] = covars[1] ;
			for(j in (i+1):nX){
				covMat[i,j] = covars[DXind[i,j]] ;
				covMat[j,i] = covMat[i,j] ;
			}
		}
		covMat[nX,nX] = covars[1] ; #final diagonal entry
		L = cholesky_decompose(covMat) ;
		f = L * z ;
	}
}
model {
	# rho has an implicit cauchy(0,1) prior
	# prior on eta
	eta ~ weibull(2,1) ; #peaked around .8
	# prior on noise
	noise ~ weibull(2,1) ; #peaked around .8
	# z as standard normal
	z ~ normal(0,1) ;
	# y as f plus noise
	Y ~ normal(f[Xind],noise) ;
}
	#load packages
	library(tidyverse)
	library(rstan)
	rstan_options(auto_write = TRUE)

	# Make some fake data ----

	#set random seed for reproducibility
	set.seed(1)

	#nX: number of unique samples on x-axis
	nX = 100

	#define the x-axis sample points
	# on a grid:
	x = seq(-5,5,length.out=nX)
	# not on a grid (udx will not perform well in this case):
	#x = sort(runif(nX,-5,5))

	#define a wiggly function on x
	f = sin(x)*dnorm(x,5,8)
	plot(x,f)

	#add noise
	y = rnorm(nX,f,.01)

	#plot
	p = ggplot()+
	geom_point(
	data = data.frame(x,y)
	, mapping = aes(
	x = x
	, y = y
	)
	, alpha = .5
	)
	print(p)


	#Prep the data ----

	#scale for default prior specification
	x_scaled = (x-min(x))/max(x-min(x)) #0-1
	y_scaled = scale(y)[,1] #mean0,sd1

	#compute unique pairwise x differences & indices thereof for the pairwise matrix
	x_unique = sort(unique(x_scaled))
	nX = length(x_unique)
	dx = matrix(NA,nX,nX)
	for(i in 1:nX){
	for(j in 1:nX){
	dx[i,j] = round(-((x_unique[i]-x_unique[j])^2),10)
	}
	}
	dx_unique = rev(sort(unique(as.vector(dx))))
	DXind = matrix(NA,nX,nX)
	for(i in 1:nX){
	for(j in 1:nX){
	DXind[i,j] = which(dx_unique==dx[i,j])
	}
	}

	#check that we computed the above correctly
	temp = matrix(NA,nX,nX)
	for(i in 1:nX){
	for(j in 1:nX){
	temp[i,j] = dx[i,j]==dx_unique[DXind[i,j]]
	}
	}
	all(temp)


	#Sample the model that uses cov_exp_quad ----
	post_ceq = rstan::stan(
	file = "gp_ceq.stan"
	, data=list(
	nX = length(x)
	, X = x_scaled
	, nY = length(y)
	, Y = y_scaled
	, Xind = 1:nX
	)
	, iter = 2e2 #2e2 takes about 1m when nX=100
	, chains = parallel::detectCores()
	, cores = parallel::detectCores()
	, seed = 1
	)
	alarm()
	summary(rowSums(get_elapsed_time(post_ceq))/60)

	#look at summary for the GP parameters ----
	print(
	post_ceq
	, pars = c('eta','rho','noise')
	, probs = c(.025,.5,.975)
	, digits = 2
	, use_cache = FALSE
	)

	#plot the CEQ model's results ----

	#get posterior samples on latent function f
	f = extract(
	post_ceq
	, pars = 'f'
	)[[1]]

	#revert to original scale
	f = f*sd(y) + mean(y)

	#compute median & 95% credible intervals
	f_med = apply(f,2,median)
	f_loci = apply(f,2,quantile,prob=.025)
	f_hici = apply(f,2,quantile,prob=.975)

	#add to plot
	p = p + geom_ribbon(
	data = data.frame(
	x = x
	, ymin = f_loci
	, ymax = f_hici
	)
	, mapping = aes(
	x = x
	, ymin = ymin
	, ymax = ymax
	)
	, alpha = .5
	, fill = 'green'
	)
	p = p + geom_line(
	data = data.frame(
	x = x
	, y = f_med
	)
	, mapping = aes(
	x = x
	, y = y
	)
	, alpha = .5
	, colour = 'green'
	)
	print(p)


	#Sample the model that uses pre-computed distances ----
	post_dx = rstan::stan(
	file = "gp_dx.stan"
	, data=list(
	nX = length(x)
	, DX = dx
	, nY = length(y)
	, Y = y_scaled
	, Xind = 1:nX
	)
	, iter = 2e2 #2e2 takes about 1.5m when nX=100
	, chains = parallel::detectCores()
	, cores = parallel::detectCores()
	, seed = 1
	)
	alarm()
	summary(rowSums(get_elapsed_time(post_dx))/60)
	#slower than ceq, so ceq must have some optimizations internal to cov_exp_quad


	#look at summary for the GP parameters ----
	print(
	post_dx
	, pars = c('eta','rho','noise')
	, probs = c(.025,.5,.975)
	, digits = 2
	, use_cache = FALSE
	)

	#plot the DX model's results ----

	#get posterior samples on latent function f
	f = extract(
	post_dx
	, pars = 'f'
	)[[1]]

	#revert to original scale
	f = f*sd(y) + mean(y)

	#compute median & 95% credible intervals
	f_med = apply(f,2,median)
	f_loci = apply(f,2,quantile,prob=.025)
	f_hici = apply(f,2,quantile,prob=.975)

	#add to plot
	p = p + geom_ribbon(
	data = data.frame(
	x = x
	, ymin = f_loci
	, ymax = f_hici
	)
	, mapping = aes(
	x = x
	, ymin = ymin
	, ymax = ymax
	)
	, alpha = .5
	, fill = 'blue'
	)
	p = p + geom_line(
	data = data.frame(
	x = x
	, y = f_med
	)
	, mapping = aes(
	x = x
	, y = y
	)
	, alpha = .5
	, colour = 'blue'
	)
	print(p)

	#looks good!

	#sample the model that uses the precomputed unique distances----
	post_udx = rstan::stan(
	file = "gp_udx.stan"
	, data=list(
	nDX = length(dx_unique)
	, dx_unique = dx_unique
	, nX = length(x_unique)
	, DXind = DXind
	, nY = length(y)
	, Y = y_scaled
	, Xind = match(x,sort(unique(x)))
	)
	, iter = 2e2 #2e2 takes about 1m when nX=100
	, chains = parallel::detectCores()
	, cores = parallel::detectCores()
	, seed = 1
	)
	alarm()
	summary(rowSums(get_elapsed_time(post_udx))/60)
	#about as fast ceq (indeed, ~30% faster with 2e3 chains)


	#look at summary for the GP parameters ----
	print(
	post_udx
	, pars = c('eta','rho','noise')
	, probs = c(.025,.5,.975)
	, digits = 2
	)
	#about the same as the CEQ model's result

	#plot the UDX model results ----

	#get posterior samples on latent function f
	f = extract(
	post_udx
	, pars = 'f'
	)[[1]]

	#revert to original scale
	f = f*sd(y) + mean(y)

	#compute median & 95% credible intervals
	f_med = apply(f,2,median)
	f_loci = apply(f,2,quantile,prob=.025)
	f_hici = apply(f,2,quantile,prob=.975)

	#add to plot
	p = p + geom_ribbon(
	data = data.frame(
	x = x
	, ymin = f_loci
	, ymax = f_hici
	)
	, mapping = aes(
	x = x
	, ymin = ymin
	, ymax = ymax
	)
	, alpha = .5
	, fill = 'red'
	)
	p = p + geom_line(
	data = data.frame(
	x = x
	, y = f_med
	)
	, mapping = aes(
	x = x
	, y = y
	)
	, alpha = .5
	, colour = 'red'
	)
	print(p)
	#about the same as the CEQ model's result
	data {
	# nX: num unique x values
	int<lower=1> nX ;
	# nY: num observations of Y
	int<lower=1> nY ;
	# Y: outcomes
	vector[nY] Y ;
	# X: predictors
	real X[nX] ;
	# Xind: index of x value for each outcome
	int Xind[nY] ;
	}
	parameters {
	# eta: GP parameter, scale of latent function f
	real<lower=0> eta ;
	# noise: scale of measurement noise of y given f
	real<lower=0> noise ;
	# z: dummy variable to speed up computation of GP
	# (See "Cholesky Factored and Transformed Implementation" from manual)
	vector[nX] z ;
	# q: dummy variable used to imply cauchy(0,1) prior on rho
	# (See "Reparameterizing the Cauchy" from manual)
	real<lower=0,upper=pi()/2> q ;
	}
	transformed parameters{
	# f: latent function values
	vector[nX] f ;
	# rho: GP parameter, x-axis scale of influence between points (wiggliness)
	# a.k.a. inverse-lengthscale
	real<lower=0> rho ;
	rho = tan(q) ; #implies rho ~ cauchy(0,1), peaked @ zero with heavy tail
	{ #local environment to avoid saving unnecessary variables across iterations
	# L: cholesky decomposition of covariance matrix
	matrix[nX,nX] L ;
	# covMat: covariance matrix
	matrix[nX,nX] covMat ;
	real eta_sq ;
	eta_sq = eta^2 ;
	covMat = cov_exp_quad(X, eta, 1/rho) ;
	for(i in 1:nX){
	covMat[i,i] = eta_sq + .001 ; #jitter diagonal for positive definiteness
	}
	L = cholesky_decompose(covMat) ;
	f = L * z ;
	}
	}
	model {
	# rho has an implicit cauchy(0,1) prior
	# prior on eta
	eta ~ weibull(2,1) ; #peaked around .8
	# prior on noise
	noise ~ weibull(2,1) ; #peaked around .8
	# z as standard normal
	z ~ normal(0,1) ;
	# y as f plus gaussian noise
	Y ~ normal(f[Xind],noise) ;
	}
	data {
	# nDX: num unique x values
	int<lower=1> nDX ;
	# dx: unique dx values
	vector[nDX] dx_unique ;
	# nX: num unique x values
	int<lower=1> nX ;
	# DXind: matrix of indices into dx_unique
	int DXind[nX,nX] ;
	# nY: num observations of Y
	int<lower=1> nY ;
	# Y: outcomes
	vector[nY] Y ;
	# Xind: index of x value for each outcome
	int Xind[nY] ;
	}
	parameters {
	# eta: GP parameter, scale of latent function f
	real<lower=0> eta ;
	# noise: scale of measurement noise of y given f
	real<lower=0> noise ;
	# z: dummy variable to speed up computation of GP
	# (See "Cholesky Factored and Transformed Implementation" from manual)
	vector[nX] z ;
	# q: dummy variable used to imply cauchy(0,1) prior on rho
	# (See "Reparameterizing the Cauchy" from manual)
	real<lower=0,upper=pi()/2> q ;
	}
	transformed parameters{
	# f: latent function values
	vector[nX] f ;
	# rho: GP parameter, x-axis scale of influence between points (wiggliness)
	# a.k.a. inverse-lengthscale
	real<lower=0> rho ;
	rho = tan(q) ; #implies rho ~ cauchy(0,1), peaked @ zero with heavy tail
	{ #local environment to avoid saving unnecessary variables across iterations
	# L: cholesky decomposition of covariance matrix
	matrix[nX,nX] L ;
	# covMat: covariance matrix
	matrix[nX,nX] covMat ;
	# covars: unique entries in covariance matrix
	vector[nDX] covars ;
	covars = exp(dx_unique(rho^2))(eta^2) ;
	covars[1] = (eta^2) + .001 ; #jitter diagonal for positive definiteness
	for(i in 1:(nX-1)){
	covMat[i,i] = covars[1] ;
	for(j in (i+1):nX){
	covMat[i,j] = covars[DXind[i,j]] ;
	covMat[j,i] = covMat[i,j] ;
	}
	}
	covMat[nX,nX] = covars[1] ; #final diagonal entry
	L = cholesky_decompose(covMat) ;
	f = L * z ;
	}
	}
	model {
	# rho has an implicit cauchy(0,1) prior
	# prior on eta
	eta ~ weibull(2,1) ; #peaked around .8
	# prior on noise
	noise ~ weibull(2,1) ; #peaked around .8
	# z as standard normal
	z ~ normal(0,1) ;
	# y as f plus noise
	Y ~ normal(f[Xind],noise) ;
	}