Code for my blog post on estimating priors, posteriors, and simulating hyperparameters in R using stan
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############### FUNCTIONS #################### | |
## Simulate GP based on fixed sigma, rho, and eta | |
## calls gp-predict.stan | |
## this sims over same data (Y,X), but uses infered hyperparameters | |
sim_GP_y <- function(y1, x1, sigma_sq, rho_sq, eta_sq, iter = iter, chains = chains){ | |
sim_fit <- stan(file="gp-predict_SE.stan", data=list(x1=x1, y1=y1, N1=length(x1), | |
x2=x, N2=length(x), eta_sq=eta_sq, | |
rho_sq=rho_sq, sigma_sq=sigma_sq), | |
iter=iter, chains=chains) | |
sims <- extract(sim_fit, permuted=TRUE) | |
return(sims) | |
} | |
## takes simulated Y and makes it ready to plot | |
melt_sim <- function(sim, x_data, param_label){ | |
tmp <- adply(sim, 2) | |
# plot(sim_data[,2], type = "l") same as; plot(sims$y2[1,], type="l") | |
# each col of sim_data is each row of sims$y2; | |
# each col sim_data is a complete simulation across all data points x (n = 201) | |
# except row 1 is added as a factor of 201 levels for ID | |
tmp <- melt(tmp) | |
tmp$model <- param_label | |
names(tmp) <- c("xid", "group1", "y", "param") | |
tmp <- mutate(tmp, x=x_data[xid]) | |
return(tmp) | |
} | |
## like melt_sim, but for two paramaters | |
melt_sim_multiparam <- function(sim, x_data, param_label1, param_label2){ | |
tmp <- adply(sim, 2) | |
# plot(sim_data[,2], type = "l") same as; plot(sims$y2[1,], type="l") | |
# each col of sim_data is each row of sims$y2; | |
# each col sim_data is a complete simulation across all data points x (n = 201) | |
# except row 1 is added as a factor of 201 levels for ID | |
tmp <- melt(tmp) | |
tmp$param1 <- param_label1 | |
tmp$param2 <- param_label2 | |
names(tmp) <- c("xid", "group1", "y", "param1", "param2") | |
tmp <- mutate(tmp, x=x_data[xid]) | |
return(tmp) | |
} | |
# simple power function to emulate pow() in stan | |
r.pow <- function(x,p){x^p} | |
############### END FUNCTIONS #################### | |
############### INCLUDES ######################### | |
## load the required packages | |
library("plyr") | |
library("rstan") | |
# devtools::install_github("hadley/ggplot2") | |
library("ggplot2") | |
# devtools::install_github("hrbrmstr/ggalt") | |
library("ggalt") | |
library("reshape2") | |
library("matrixcalc") | |
library("cowplot") | |
library("MASS") | |
library("lattice") | |
############### END INCLUDES ###################### | |
############### INITIALIZATION #################### | |
eta_sq = 1 | |
rho_sq = 1 | |
# l = sqrt((1/rho_sq)/2) | |
# rho_sq_test = 1/(2*l^2) | |
sigma_sq = 0.0001 | |
x <- seq(-10, 10, 0.1) | |
n <- length(x) | |
iter = 100 | |
chains = 1 | |
rho_sq_range = c(0.01, 0.1, 1, 10) # for faceted kernel plot | |
eta_sq_prior = c(0.1, 1, 10) | |
rho_sq_prior = c(0.1, 1, 10) | |
# new data for fit | |
x1 <- c(-6, -3, -1, 0, 2,4,7,9) | |
y1 <- c(-2, 0, 1, -1, 0,2.5,0,3) | |
############### END INITIALIZATION #################### | |
############### CODE AND PLOTS ######################## | |
############### Kernel testing and configuration | |
## loop to build covraiance matrix and plot. | |
## use this to test kernel code | |
Sigma <- matrix(nrow = n, ncol = n) | |
for (i in 1:n) { | |
for (j in 1:n){ | |
## squared exponential, aka, RBF, aka Gaussian, aka Exponentiated Quadratic | |
## simpified form from other code examples | |
Sigma[i,j] <- eta_sq * exp(-rho_sq*(x[i] - x[j])^2) + ifelse(i==j, sigma_sq, 0.0) | |
# Sigma[i,j] <- eta_sq * exp(-rho_sq*r.pow(x[i] - x[j],2)) + ifelse(i==j, sigma_sq, 0.0) | |
## with length parameter == l approximated from rho_sq | |
# Sigma[i,j] <- eta_sq * exp(-((x[i] - x[j])^2)/(2*(l^2))) + ifelse(i==j, sigma_sq, 0.0) | |
# Sigma[i,j] <- eta_sq * exp(-((r.pow(x[i] - x[j],2))/(2*r.pow(l,2)))) + ifelse(i==j, sigma_sq, 0.0) | |
## intermediate with rho_sq inplace of (2*l^2) | |
# Sigma[i,j] <- eta_sq * exp(-((x[i] - x[j])^2)/(1/rho_sq)) + ifelse(i==j, sigma_sq, 0.0) | |
# Sigma[i,j] <- eta_sq * exp(-((r.pow(x[i] - x[j],2))/(1/rho_sq))) + ifelse(i==j, sigma_sq, 0.0); | |
} | |
} | |
## plot and test for PSD | |
lattice::levelplot(Sigma) | |
matrixcalc::is.positive.semi.definite(Sigma) | |
############### Kernel calculated for various values of rho and eta | |
## loop to calculate matrix for each combination of rho and eta | |
Sigma_all <- NULL | |
for(z in 1:length(eta_sq)){ | |
for(k in 1:length(rho_sq_range)){ | |
Sigma <- matrix(nrow = n, ncol = n) | |
for (i in 1:n){ | |
for (j in 1:n){ | |
# print(paste0("working on eta_sq[z] = ", eta_sq[z], ", rho_sq_range[k] = ", rho_sq_range[k], | |
# ", for i = ", i, ", and j = ", j)) | |
Sigma[i,j] <- eta_sq[z] * exp(-rho_sq_range[k]*(x[i] - x[j])^2) + ifelse(i==j, sigma_sq, 0.0) | |
} | |
} | |
Sigma_df <- data.frame(Sigma) | |
colnames(Sigma_df) <- seq(1:ncol(Sigma_df)) | |
Sigma_df <- reshape2::melt(Sigma_df) | |
Sigma_df$rho <- rho_sq_range[k] | |
Sigma_df$eta <- eta_sq[z] | |
Sigma_df$tmp <- seq(1:nrow(Sigma)) | |
Sigma_all <- rbind(Sigma_all, Sigma_df) | |
} | |
} | |
colnames(Sigma_all) <- c("col", "value", "rho", "eta", "row") | |
## plot all matricies and facet by rho and eta | |
ggplot() + | |
geom_raster(data = Sigma_all, aes(x = row, y = col, fill = value)) + | |
facet_grid(. ~ rho) + | |
scale_fill_viridis(option="inferno", direction = 1) + | |
theme_bw() + | |
labs(title="Squared Exponential Kernel Varying Length Scale", | |
subtitle = expression(eta^2*" = 0.1; "*sigma^2*" = 0.0001; "*rho^2*" range of 0.01, 0.1, 1, 10")) + | |
theme( | |
panel.border = element_rect(colour = "gray90"), | |
panel.grid = element_blank(), | |
strip.background = element_rect(colour = "gray50", fill = "white"), | |
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"), | |
strip.text.x = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"), | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_blank(), | |
axis.text.y = element_blank(), | |
axis.title = element_blank(), | |
axis.ticks = element_blank(), | |
plot.caption = element_text(size = 8, hjust=0, | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Italic"), | |
legend.position = "none" | |
) | |
# ggsave(filename = "GP_SE_kernel_facet_rho_facet.png", width = 7.5 , height = 2.5) | |
############### Fit STAN model to fixed rho and eta & plot | |
## Simulate a process with no data to get priors | |
## The very small sigma_sq value is necessary to avoid an error. Don't set it to zero. | |
fit1 <- stan(file="gp-sim_SE.stan", data=list(x=x, N=n, eta_sq=eta_sq, | |
rho_sq=rho_sq, sigma_sq=sigma_sq), | |
iter=iter, chains=chains) | |
sims1 <- extract(fit1, permuted=TRUE) | |
## Rearrange the data for plotting | |
se_sim_data <- adply(sims1$y, 2) | |
tmp1 <- melt(se_sim_data) | |
names(tmp1) <- c("xid", "group", "y") | |
tmp1 <- mutate(tmp1, x=x[xid]) | |
## plot it | |
fig2a <- ggplot(tmp1, aes(x=x, y=y, color = group)) + | |
geom_line(aes(group=group),alpha=0.2) + | |
theme_bw() + | |
labs(title="Gaussian Process - Prior", | |
subtitle=paste0("Squared Exponential Kernel"), | |
caption=paste0("rho_sq = ", rho_sq, ", sigma_sq = ", sigma_sq, | |
", eta_sq = ", eta_sq), | |
x = "X", | |
y = "Y") + | |
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=1), labels = seq(-10,10,by=1)) + | |
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) + | |
theme( | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 8, hjust=0, | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Italic"), | |
legend.position="none" | |
) | |
plot(fig2a) | |
## plot data points alone | |
fig2b_pnts <- ggplot(data=data.frame(x=x1, y=y1), aes(x=x1, y = y1)) + | |
theme_bw() + | |
geom_point(size = 3) + | |
geom_point(size = 6, color = "black", shape = 1) + | |
labs(title="Gaussian Process - Data", | |
x = "X", | |
y = "Y") + | |
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=1), labels = seq(-10,10,by=1)) + | |
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) + | |
theme( | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 8, hjust=0, | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Italic"), | |
legend.position="none" | |
) | |
plot(fig2b_pnts) | |
## Simulate with a few noise-free data points to get posterior. | |
## still fixed values for eta and rho | |
## Again pretend the noise is almost zero, but not quite. | |
fit2 <- stan(file="gp-predict_SE.stan", data=list(x1=x1, y1=y1, N1=length(x1), | |
x2=x, N2=length(x), | |
eta_sq=eta_sq, | |
rho_sq=rho_sq, | |
sigma_sq=sigma_sq), | |
iter=iter, chains=chains) | |
sims2 <- extract(fit2, permuted=TRUE) | |
## Rearrange the data for plotting | |
se_sim_data2 <- adply(sims2$y, 2) | |
tmp2 <- melt(se_sim_data2) | |
names(tmp2) <- c("xid", "group", "y") | |
tmp2 <- mutate(tmp2, x=x[xid]) | |
# plot it | |
fig2b <- ggplot(data = tmp2, aes(x=x, y=y)) + | |
geom_line(aes(group=group, color = group), alpha=0.2) + | |
theme_bw() + | |
geom_point(data=data.frame(x=x1, y=y1), aes(x=x1, y = y1), size = 3) + | |
geom_point(data=data.frame(x=x1, y=y1), aes(x=x1, y = y1), | |
size = 6, color = "black", shape = 1) + | |
labs(caption=paste0("rho_sq = ", rho_sq, ", eta_sq = ", eta_sq, "sigma_sq = ", sigma_sq), | |
x = "X", | |
y = "Y") + | |
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=1), labels = seq(-10,10,by=1)) + | |
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) + | |
theme( | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 8, hjust=0, | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Italic"), | |
legend.position="none" | |
) | |
plot(fig2b) | |
############### Fit STAN model to subjective range of rho and eta & plot | |
## loop over eta and rho values, compile results for priors plot | |
param_fits <- NULL | |
for(i in 1:length(eta_sq_prior)){ | |
for(j in 1:length(rho_sq_prior)){ | |
fit3 <- stan(file="gp-sim_SE.stan", | |
data=list(x=x, N=n, | |
eta_sq=eta_sq_prior[i], rho_sq=rho_sq_prior[j], | |
sigma_sq=sigma_sq), | |
iter=iter, chains=chains) | |
sims1 <- extract(fit3, permuted=TRUE) | |
## Rearrange the data and plot it | |
data <- adply(sims1$y, 2) | |
tmp1<- melt(data) | |
names(tmp1) <- c("xid", "group1", "y") | |
tmp1 <- mutate(tmp1, x=x[xid]) | |
tmp1$rho <- rho_sq_prior[j] | |
tmp1$eta <- eta_sq_prior[i] | |
sigma_sq < sigma_sq | |
param_fits <- rbind(param_fits, tmp1) | |
} | |
} | |
param_fits$rho_label <- paste0("rho ",param_fits$rho) | |
param_fits$eta_label <- paste0("eta ",param_fits$eta) | |
## plot priors facetted over various eta and rho wvlues | |
fig3 <- ggplot(data = param_fits, aes(x=x, y=y, color = group1)) + | |
geom_line(aes(group=group1),alpha=0.2) + | |
scale_color_viridis(discrete=TRUE) + | |
theme_bw() + | |
labs(title="Gaussian Process - Prior", | |
subtitle = expression("Squared Exponential Kernel wih range of "*rho^2*" and "*eta^2), | |
x = "X", | |
y = "Y") + | |
facet_grid(rho_label~eta_label) + | |
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=2), labels = seq(-10,10,by=2)) + | |
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) + | |
theme( | |
panel.border = element_rect(colour = "gray90"), | |
panel.grid = element_blank(), | |
strip.background = element_rect(colour = "gray50", fill = "white"), | |
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"), | |
strip.text.x = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"), | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 8, hjust=0, | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Italic"), | |
legend.position="none" | |
) | |
plot(fig3) | |
## Simulate with a few noise-free data points for posterior over range of eta and rho | |
## Again pretend the noise is almost zero, but not quite. | |
param_fits4 <- NULL | |
for(i in 1:length(eta_sq_prior)){ | |
for(j in 1:length(rho_sq_prior)){ | |
## Parameter value fixed in given example | |
fit4 <- stan(file="gp-predict_SE.stan", data=list(x1=x1, y1=y1, N1=length(x1), | |
x2=x, N2=length(x), eta_sq=eta_sq_prior[i], | |
rho_sq=rho_sq_prior[j], sigma_sq=sigma_sq), | |
iter=iter, chains=chains) | |
sims4 <- extract(fit4, permuted=TRUE) | |
## Rearrange the data and plot it | |
data4 <- adply(sims4$y2, 2) | |
tmp4 <- melt(data4) | |
names(tmp4) <- c("xid", "group1", "y") | |
tmp4 <- mutate(tmp4, x=x[xid]) | |
tmp4$rho <- rho_sq_prior[j] | |
tmp4$eta <- eta_sq_prior[i] | |
sigma_sq < sigma_sq | |
param_fits4 <- rbind(param_fits4, tmp4) | |
} | |
} | |
param_fits4$rho_label <- paste0("rho ",param_fits4$rho) | |
param_fits4$eta_label <- paste0("eta ",param_fits4$eta) | |
dp <- data.frame(x=x1, y=y1) | |
## plot posreriors and data points for range of eta and rho | |
fig4 <- ggplot() + | |
geom_line(data = param_fits4, aes(x=x, y=y, color = group1), alpha=0.2) + | |
scale_color_viridis(discrete=TRUE) + | |
geom_point(data = dp, aes(x=x, y = y), size = 1) + | |
theme_bw() + | |
labs(title="Gaussian Process - Fitted", | |
subtitle = expression("Squared Exponential Kernel wih range of "*rho^2*" and "*eta^2), | |
x = "X", | |
y = "Y") + | |
facet_grid(rho_label~eta_label) + | |
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=2), labels = seq(-10,10,by=2)) + | |
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) + | |
theme( | |
panel.border = element_rect(colour = "gray90"), | |
panel.grid = element_blank(), | |
strip.background = element_rect(colour = "gray50", fill = "white"), | |
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"), | |
strip.text.x = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"), | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 8, hjust=0, | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Italic"), | |
legend.position="none" | |
) | |
plot(fig4) | |
## compile data from above two loops to make a plot that combines prior, posterior, and data | |
## just for an interesting visualization | |
params_all_plot <- rbind(param_fits, param_fits4) | |
params_all_plot$model <- rep(c("A_fitted", "B_prior"), | |
times = c(nrow(param_fits), | |
nrow(param_fits4))) | |
params_all_plot$model_group <- paste0(params_all_plot$model, params_all_plot$group1) | |
fig4b <- ggplot() + | |
geom_line(data = params_all_plot, aes(x=x, y=y, | |
color = model, group = model_group, alpha = model)) + | |
scale_color_manual(values = c("orange", "purple")) + | |
scale_alpha_manual(values = c(0.1, 0.05)) + | |
geom_point(data = dp, aes(x=x, y = y), size = 1) + | |
theme_bw() + | |
labs(title="Gaussian Process - Fitted over Prior", | |
subtitle = expression("Squared Exponential Kernel wih range of "*rho^2*" and "*eta^2), | |
x = "X", | |
y = "Y") + | |
facet_grid(rho_label~eta_label) + | |
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=2), labels = seq(-10,10,by=2)) + | |
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) + | |
theme( | |
panel.border = element_rect(colour = "gray90"), | |
panel.grid = element_blank(), | |
strip.background = element_rect(colour = "gray50", fill = "white"), | |
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"), | |
strip.text.x = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"), | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 8, hjust=0, | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Italic"), | |
legend.position="none" | |
) | |
plot(fig4b) | |
# quartz.save(file = "GP_SE_fitted_over_prior_range_rho_eta_facet.png", height = 6, width = 7, | |
# type = "png", device = dev.cur(), dpi = 300) | |
############### Fit STAN model with rho and eta as random variables, estimate, and plot | |
## Call STAN with our made-up data points, x1 and y1 | |
## calls GP_estimate_eta_rho_SE.stan with [x,y] sample data | |
## sigma_sq is fixed, but rho_sq, eta_sq, and Y are estimated through MCMC | |
fit5 <- stan(file="GP_estimate_eta_rho_SE.stan", data=list(x1=x1, y1=y1, N1=length(x1), | |
simga_sq = sigma_sq, | |
x2=x, N2=length(x)), | |
iter=iter, chains=chains) | |
## ESIMATE PARAMETERS | |
sims5 <- extract(fit5, permuted=TRUE) | |
## sims5 = 100:201 matrix for each estimated parameter | |
## 100 rows = 1/2 of interation [iter] (non-warmup samples) for each x value | |
## 100 plausible values for y at that datapoint x; | |
## plot(density(sims5$y2[,1])) # density of estimates for y at x[1] | |
## 201 cols = each x value (data point) | |
## plot(sims5$y2[1,], type="l") # response across x for interation 1 | |
## plot median (across all rows) of data point samples (columns) | |
## xx <- apply(sims5$y2,2,median); plot(xx, type="l") | |
## testing on known plausible values from model with fixed rho_sq and sigma_sq | |
## I plotted these as the first of these plots in the blog post | |
## to do so, just uncomment and assign these known values, then run the rest as usual | |
# rho_low <- 0.1 | |
# rho_med <- 1.0 | |
# rho_high <- 10 | |
# eta_low <- 0.1 | |
# eta_med <- 1.0 | |
# eta_high <- 10 | |
## derive 5%, 50%, and 95% quantile from simulated rho_sq and sigma_sq parameters | |
rho_low <- quantile(sims5$rho_sq, 0.05) | |
rho_med <- quantile(sims5$rho_sq, 0.50) | |
rho_high <- quantile(sims5$rho_sq, 0.95) | |
eta_low <- quantile(sims5$eta_sq, 0.05) | |
eta_med <- quantile(sims5$eta_sq, 0.50) | |
eta_high <- quantile(sims5$eta_sq, 0.95) | |
##### SIMULATE FOR 5%, 50%, and 95% levels for parameter estimate | |
## SIMULATE NEW RESPONSE BASED ON RANGE OF HYPERPARAMETERS | |
sim_low <- sim_GP_y(y1, x1, sigma_sq = sigma_sq, rho_low, eta_low, iter=iter, chains=chains) | |
sim_med <- sim_GP_y(y1, x1, sigma_sq = sigma_sq, rho_med, eta_med, iter=iter, chains=chains) | |
sim_high <- sim_GP_y(y1, x1, sigma_sq = sigma_sq, rho_high, eta_high, iter=iter, chains=chains) | |
## PREPARE DATA FOR PLOTTING | |
tmp_low <- melt_sim(sim_low$y2, x, "low rho/eta") | |
tmp_med <- melt_sim(sim_med$y2, x, "median rho/eta") | |
tmp_high <- melt_sim(sim_high$y2, x, "high rho/eta") | |
# bind simulations from various params and order factor for plot | |
tmp <- rbind(tmp_low, tmp_med, tmp_high) | |
tmp$param <- factor(tmp$param,levels(factor(tmp$param))[c(2,3,1)]) | |
# create dataframe of training data for plotting | |
pnt_data <- data.frame(x=rep(x1,3), y=rep(y1,3), | |
param = rep(c("low rho/eta","median rho/eta","high rho/eta"), each = length(x1))) | |
pnt_data$param <- factor(pnt_data$param,levels(factor(pnt_data$param))[c(2,3,1)]) | |
##### plot as 5%/5%, 50%/50%, and 95%/95% parameters | |
Sim_params_plot <- ggplot() + | |
geom_line(data = tmp, aes(x=x, y=y, color = group1), alpha=0.1) + | |
geom_point(data = pnt_data, aes(x = x, y = y)) + | |
theme_bw() + | |
facet_grid(param~.) + | |
scale_color_viridis(discrete=TRUE) + | |
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=2), labels = seq(-10,10,by=2)) + | |
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) + | |
labs(title = "Gaussian Process - Hyperparameter Estimation", | |
subtitle = expression("Squared Exponential Kernel - Simulating "*rho^2*" and "*eta^2*", Sigma Fixed"), | |
x = "X", | |
y = "Y") + | |
theme( | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 8, hjust=0, | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Italic"), | |
legend.position="none", | |
strip.background = element_rect(colour = "gray50", fill = "white"), | |
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS") | |
) | |
plot(Sim_params_plot) | |
# ggsave(filename = "GP_SE_simulated_range_rho_eta_facet.png", width = 7, height = 4) | |
## Simualte as facted matrix of combinations for 5%, 50%, 95% | |
rho_sim_HML <- c(rho_low, rho_med, rho_high) | |
eta_sim_HML <- c(eta_low, eta_med, eta_high) | |
model_label <- c("low", "median", "high") | |
sim_hyperparams_plot <- NULL | |
for(i in 1:length(eta_sim_HML)){ | |
for(j in 1:length(rho_sim_HML)){ | |
## SIMULATE NEW RESPONSE BASED ON RANGE OF HYPERPARAMETERS | |
sim_hyperparams <- sim_GP_y(y1, x1, sigma_sq = sigma_sq, | |
rho_sim_HML[j], | |
eta_sim_HML[i], | |
iter=iter, chains=chains) | |
## PREPARE DATA FOR PLOTTING | |
label1 <- paste0("rho ", model_label[j]) | |
label2 <- paste0("eta ", model_label[i]) | |
tmp_sim_params <- melt_sim_multiparam(sim_hyperparams$y2, x, label1, label2) | |
# bind simulations from various params and order factor for plot | |
sim_hyperparams_plot <- rbind(sim_hyperparams_plot, tmp_sim_params) | |
} | |
} | |
colnames(sim_hyperparams_plot) <- c("xid", "group1", "y", "rho", "eta", "x") | |
sim_hyperparams_plot$rho <- factor(sim_hyperparams_plot$rho, | |
levels(factor(sim_hyperparams_plot$rho))[c(3,2,1)]) | |
sim_hyperparams_plot$eta <- factor(sim_hyperparams_plot$eta, | |
levels(factor(sim_hyperparams_plot$eta))[c(2,3,1)]) | |
# create dataframe of training data for plotting | |
dp <- data.frame(x=x1, y=y1) | |
## PLOT | |
Sim_hyperparams_plot <- ggplot() + | |
geom_line(data = sim_hyperparams_plot, aes(x=x, y=y, color = group1), alpha=0.1) + | |
geom_point(data = dp, aes(x = x, y = y)) + | |
theme_bw() + | |
facet_grid(rho~eta) + | |
scale_color_viridis(discrete=TRUE) + | |
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=2), labels = seq(-10,10,by=2)) + | |
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) + | |
labs(title = "Gaussian Process - Hyperparameter Estimation", | |
subtitle = expression("Squared Exponential Kernel - Simulating "*rho^2*" and "*eta^2*", Sigma Fixed"), | |
x = "X", | |
y = "Y") + | |
theme( | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 8, hjust=0, | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Italic"), | |
legend.position="none", | |
strip.background = element_rect(colour = "gray50", fill = "white"), | |
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS") | |
) | |
plot(Sim_hyperparams_plot) | |
# ggsave(filename = "GP_SE_simulated_facet_rho_eta_facet.png", width = 7, height = 6) | |
## slab plot of simulated parameter densities | |
## random sample from our prior (in STAN code) for plotting | |
## clearly this will differ from actual prior used in STAN code, | |
## but is same distribution and parameters for the Cauchy() | |
prior_dist <- rcauchy(nrow(sims5$rho_sq),0,5) | |
sims_plot_data <- data.frame(value = c(sims5$rho_sq, prior_dist, | |
sims5$eta_sq, prior_dist), | |
param = rep(c("rho", "eta"), each = (nrow(sims5$rho_sq))*2), | |
model1 = rep(c("posterior", "prior", "posterior", "prior"), | |
each = nrow(sims5$rho_sq))) | |
# plot top plot (rho) | |
sims_plot_rho <- ggplot(data = dplyr::filter(sims_plot_data, param == "rho"), | |
aes(value, color = model1, group = model1)) + | |
geom_density() + | |
xlim(c(0,100)) + | |
theme_bw() + | |
labs(title = "Hyperparameter Density", | |
subtitle = "Prior = halfCauchy(0,5)", | |
x = "", | |
y = "Density") + | |
scale_color_manual(values = c("dodgerblue2", "firebrick2")) + | |
theme( | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 8, hjust=0, | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Italic"), | |
strip.background = element_rect(colour = "gray50", fill = "white"), | |
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"), | |
legend.position="none" | |
) | |
# plot bottom plot (eta) | |
sims_plot_eta <- ggplot(data = dplyr::filter(sims_plot_data, param == "eta"), | |
aes(value, color = model1, group = model1)) + | |
geom_density() + | |
xlim(c(0,10)) + | |
theme_bw() + | |
labs(x = "Parameter Value", | |
y = "") + | |
scale_color_manual(values = c("dodgerblue2", "firebrick2"), | |
guide = guide_legend(title = NULL)) + | |
theme( | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS"), | |
plot.caption = element_text(size = 8, hjust=0, | |
family = "Trebuchet MS"), | |
plot.title=element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle=element_text(family="TrebuchetMS-Italic"), | |
legend.position="bottom", | |
strip.background = element_rect(colour = "gray50", fill = "white"), | |
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS") | |
) | |
# use cowplot to combine plots and label | |
slab_plot <- cowplot::plot_grid(sims_plot_rho, sims_plot_eta, | |
labels = c("rho^2", "eta^2"), | |
nrow = 2, align = "v", hjust = -0.1, scale = 0.9, vjust = 3) | |
# cowplot::save_plot("slab_plot_halfcauchy.png", slab_plot, | |
# nrow = 2, # and 2 rows | |
# base_aspect_ratio = 1, | |
# base_width = 6, | |
# base_height = 2.5 | |
# ) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment