Skip to content

Instantly share code, notes, and snippets.

@mrecos
Created May 27, 2016 17:46
Show Gist options
  • Save mrecos/2e04349935534f317a86a260095dafe0 to your computer and use it in GitHub Desktop.
Save mrecos/2e04349935534f317a86a260095dafe0 to your computer and use it in GitHub Desktop.
Code for my blog post on estimating priors, posteriors, and simulating hyperparameters in R using stan
############### FUNCTIONS ####################
## Simulate GP based on fixed sigma, rho, and eta
## calls gp-predict.stan
## this sims over same data (Y,X), but uses infered hyperparameters
sim_GP_y <- function(y1, x1, sigma_sq, rho_sq, eta_sq, iter = iter, chains = chains){
sim_fit <- stan(file="gp-predict_SE.stan", data=list(x1=x1, y1=y1, N1=length(x1),
x2=x, N2=length(x), eta_sq=eta_sq,
rho_sq=rho_sq, sigma_sq=sigma_sq),
iter=iter, chains=chains)
sims <- extract(sim_fit, permuted=TRUE)
return(sims)
}
## takes simulated Y and makes it ready to plot
melt_sim <- function(sim, x_data, param_label){
tmp <- adply(sim, 2)
# plot(sim_data[,2], type = "l") same as; plot(sims$y2[1,], type="l")
# each col of sim_data is each row of sims$y2;
# each col sim_data is a complete simulation across all data points x (n = 201)
# except row 1 is added as a factor of 201 levels for ID
tmp <- melt(tmp)
tmp$model <- param_label
names(tmp) <- c("xid", "group1", "y", "param")
tmp <- mutate(tmp, x=x_data[xid])
return(tmp)
}
## like melt_sim, but for two paramaters
melt_sim_multiparam <- function(sim, x_data, param_label1, param_label2){
tmp <- adply(sim, 2)
# plot(sim_data[,2], type = "l") same as; plot(sims$y2[1,], type="l")
# each col of sim_data is each row of sims$y2;
# each col sim_data is a complete simulation across all data points x (n = 201)
# except row 1 is added as a factor of 201 levels for ID
tmp <- melt(tmp)
tmp$param1 <- param_label1
tmp$param2 <- param_label2
names(tmp) <- c("xid", "group1", "y", "param1", "param2")
tmp <- mutate(tmp, x=x_data[xid])
return(tmp)
}
# simple power function to emulate pow() in stan
r.pow <- function(x,p){x^p}
############### END FUNCTIONS ####################
############### INCLUDES #########################
## load the required packages
library("plyr")
library("rstan")
# devtools::install_github("hadley/ggplot2")
library("ggplot2")
# devtools::install_github("hrbrmstr/ggalt")
library("ggalt")
library("reshape2")
library("matrixcalc")
library("cowplot")
library("MASS")
library("lattice")
############### END INCLUDES ######################
############### INITIALIZATION ####################
eta_sq = 1
rho_sq = 1
# l = sqrt((1/rho_sq)/2)
# rho_sq_test = 1/(2*l^2)
sigma_sq = 0.0001
x <- seq(-10, 10, 0.1)
n <- length(x)
iter = 100
chains = 1
rho_sq_range = c(0.01, 0.1, 1, 10) # for faceted kernel plot
eta_sq_prior = c(0.1, 1, 10)
rho_sq_prior = c(0.1, 1, 10)
# new data for fit
x1 <- c(-6, -3, -1, 0, 2,4,7,9)
y1 <- c(-2, 0, 1, -1, 0,2.5,0,3)
############### END INITIALIZATION ####################
############### CODE AND PLOTS ########################
############### Kernel testing and configuration
## loop to build covraiance matrix and plot.
## use this to test kernel code
Sigma <- matrix(nrow = n, ncol = n)
for (i in 1:n) {
for (j in 1:n){
## squared exponential, aka, RBF, aka Gaussian, aka Exponentiated Quadratic
## simpified form from other code examples
Sigma[i,j] <- eta_sq * exp(-rho_sq*(x[i] - x[j])^2) + ifelse(i==j, sigma_sq, 0.0)
# Sigma[i,j] <- eta_sq * exp(-rho_sq*r.pow(x[i] - x[j],2)) + ifelse(i==j, sigma_sq, 0.0)
## with length parameter == l approximated from rho_sq
# Sigma[i,j] <- eta_sq * exp(-((x[i] - x[j])^2)/(2*(l^2))) + ifelse(i==j, sigma_sq, 0.0)
# Sigma[i,j] <- eta_sq * exp(-((r.pow(x[i] - x[j],2))/(2*r.pow(l,2)))) + ifelse(i==j, sigma_sq, 0.0)
## intermediate with rho_sq inplace of (2*l^2)
# Sigma[i,j] <- eta_sq * exp(-((x[i] - x[j])^2)/(1/rho_sq)) + ifelse(i==j, sigma_sq, 0.0)
# Sigma[i,j] <- eta_sq * exp(-((r.pow(x[i] - x[j],2))/(1/rho_sq))) + ifelse(i==j, sigma_sq, 0.0);
}
}
## plot and test for PSD
lattice::levelplot(Sigma)
matrixcalc::is.positive.semi.definite(Sigma)
############### Kernel calculated for various values of rho and eta
## loop to calculate matrix for each combination of rho and eta
Sigma_all <- NULL
for(z in 1:length(eta_sq)){
for(k in 1:length(rho_sq_range)){
Sigma <- matrix(nrow = n, ncol = n)
for (i in 1:n){
for (j in 1:n){
# print(paste0("working on eta_sq[z] = ", eta_sq[z], ", rho_sq_range[k] = ", rho_sq_range[k],
# ", for i = ", i, ", and j = ", j))
Sigma[i,j] <- eta_sq[z] * exp(-rho_sq_range[k]*(x[i] - x[j])^2) + ifelse(i==j, sigma_sq, 0.0)
}
}
Sigma_df <- data.frame(Sigma)
colnames(Sigma_df) <- seq(1:ncol(Sigma_df))
Sigma_df <- reshape2::melt(Sigma_df)
Sigma_df$rho <- rho_sq_range[k]
Sigma_df$eta <- eta_sq[z]
Sigma_df$tmp <- seq(1:nrow(Sigma))
Sigma_all <- rbind(Sigma_all, Sigma_df)
}
}
colnames(Sigma_all) <- c("col", "value", "rho", "eta", "row")
## plot all matricies and facet by rho and eta
ggplot() +
geom_raster(data = Sigma_all, aes(x = row, y = col, fill = value)) +
facet_grid(. ~ rho) +
scale_fill_viridis(option="inferno", direction = 1) +
theme_bw() +
labs(title="Squared Exponential Kernel Varying Length Scale",
subtitle = expression(eta^2*" = 0.1; "*sigma^2*" = 0.0001; "*rho^2*" range of 0.01, 0.1, 1, 10")) +
theme(
panel.border = element_rect(colour = "gray90"),
panel.grid = element_blank(),
strip.background = element_rect(colour = "gray50", fill = "white"),
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"),
strip.text.x = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"),
panel.border = element_rect(colour = "gray90"),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
plot.caption = element_text(size = 8, hjust=0,
family = "Trebuchet MS"),
plot.title=element_text(family="TrebuchetMS-Bold"),
plot.subtitle=element_text(family="TrebuchetMS-Italic"),
legend.position = "none"
)
# ggsave(filename = "GP_SE_kernel_facet_rho_facet.png", width = 7.5 , height = 2.5)
############### Fit STAN model to fixed rho and eta & plot
## Simulate a process with no data to get priors
## The very small sigma_sq value is necessary to avoid an error. Don't set it to zero.
fit1 <- stan(file="gp-sim_SE.stan", data=list(x=x, N=n, eta_sq=eta_sq,
rho_sq=rho_sq, sigma_sq=sigma_sq),
iter=iter, chains=chains)
sims1 <- extract(fit1, permuted=TRUE)
## Rearrange the data for plotting
se_sim_data <- adply(sims1$y, 2)
tmp1 <- melt(se_sim_data)
names(tmp1) <- c("xid", "group", "y")
tmp1 <- mutate(tmp1, x=x[xid])
## plot it
fig2a <- ggplot(tmp1, aes(x=x, y=y, color = group)) +
geom_line(aes(group=group),alpha=0.2) +
theme_bw() +
labs(title="Gaussian Process - Prior",
subtitle=paste0("Squared Exponential Kernel"),
caption=paste0("rho_sq = ", rho_sq, ", sigma_sq = ", sigma_sq,
", eta_sq = ", eta_sq),
x = "X",
y = "Y") +
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=1), labels = seq(-10,10,by=1)) +
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) +
theme(
panel.border = element_rect(colour = "gray90"),
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"),
axis.text.y = element_text(size = 6, family = "Trebuchet MS"),
axis.title = element_text(size = 8, family = "Trebuchet MS"),
plot.caption = element_text(size = 8, hjust=0,
family = "Trebuchet MS"),
plot.title=element_text(family="TrebuchetMS-Bold"),
plot.subtitle=element_text(family="TrebuchetMS-Italic"),
legend.position="none"
)
plot(fig2a)
## plot data points alone
fig2b_pnts <- ggplot(data=data.frame(x=x1, y=y1), aes(x=x1, y = y1)) +
theme_bw() +
geom_point(size = 3) +
geom_point(size = 6, color = "black", shape = 1) +
labs(title="Gaussian Process - Data",
x = "X",
y = "Y") +
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=1), labels = seq(-10,10,by=1)) +
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) +
theme(
panel.border = element_rect(colour = "gray90"),
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"),
axis.text.y = element_text(size = 6, family = "Trebuchet MS"),
axis.title = element_text(size = 8, family = "Trebuchet MS"),
plot.caption = element_text(size = 8, hjust=0,
family = "Trebuchet MS"),
plot.title=element_text(family="TrebuchetMS-Bold"),
plot.subtitle=element_text(family="TrebuchetMS-Italic"),
legend.position="none"
)
plot(fig2b_pnts)
## Simulate with a few noise-free data points to get posterior.
## still fixed values for eta and rho
## Again pretend the noise is almost zero, but not quite.
fit2 <- stan(file="gp-predict_SE.stan", data=list(x1=x1, y1=y1, N1=length(x1),
x2=x, N2=length(x),
eta_sq=eta_sq,
rho_sq=rho_sq,
sigma_sq=sigma_sq),
iter=iter, chains=chains)
sims2 <- extract(fit2, permuted=TRUE)
## Rearrange the data for plotting
se_sim_data2 <- adply(sims2$y, 2)
tmp2 <- melt(se_sim_data2)
names(tmp2) <- c("xid", "group", "y")
tmp2 <- mutate(tmp2, x=x[xid])
# plot it
fig2b <- ggplot(data = tmp2, aes(x=x, y=y)) +
geom_line(aes(group=group, color = group), alpha=0.2) +
theme_bw() +
geom_point(data=data.frame(x=x1, y=y1), aes(x=x1, y = y1), size = 3) +
geom_point(data=data.frame(x=x1, y=y1), aes(x=x1, y = y1),
size = 6, color = "black", shape = 1) +
labs(caption=paste0("rho_sq = ", rho_sq, ", eta_sq = ", eta_sq, "sigma_sq = ", sigma_sq),
x = "X",
y = "Y") +
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=1), labels = seq(-10,10,by=1)) +
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) +
theme(
panel.border = element_rect(colour = "gray90"),
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"),
axis.text.y = element_text(size = 6, family = "Trebuchet MS"),
axis.title = element_text(size = 8, family = "Trebuchet MS"),
plot.caption = element_text(size = 8, hjust=0,
family = "Trebuchet MS"),
plot.title=element_text(family="TrebuchetMS-Bold"),
plot.subtitle=element_text(family="TrebuchetMS-Italic"),
legend.position="none"
)
plot(fig2b)
############### Fit STAN model to subjective range of rho and eta & plot
## loop over eta and rho values, compile results for priors plot
param_fits <- NULL
for(i in 1:length(eta_sq_prior)){
for(j in 1:length(rho_sq_prior)){
fit3 <- stan(file="gp-sim_SE.stan",
data=list(x=x, N=n,
eta_sq=eta_sq_prior[i], rho_sq=rho_sq_prior[j],
sigma_sq=sigma_sq),
iter=iter, chains=chains)
sims1 <- extract(fit3, permuted=TRUE)
## Rearrange the data and plot it
data <- adply(sims1$y, 2)
tmp1<- melt(data)
names(tmp1) <- c("xid", "group1", "y")
tmp1 <- mutate(tmp1, x=x[xid])
tmp1$rho <- rho_sq_prior[j]
tmp1$eta <- eta_sq_prior[i]
sigma_sq < sigma_sq
param_fits <- rbind(param_fits, tmp1)
}
}
param_fits$rho_label <- paste0("rho ",param_fits$rho)
param_fits$eta_label <- paste0("eta ",param_fits$eta)
## plot priors facetted over various eta and rho wvlues
fig3 <- ggplot(data = param_fits, aes(x=x, y=y, color = group1)) +
geom_line(aes(group=group1),alpha=0.2) +
scale_color_viridis(discrete=TRUE) +
theme_bw() +
labs(title="Gaussian Process - Prior",
subtitle = expression("Squared Exponential Kernel wih range of "*rho^2*" and "*eta^2),
x = "X",
y = "Y") +
facet_grid(rho_label~eta_label) +
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=2), labels = seq(-10,10,by=2)) +
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) +
theme(
panel.border = element_rect(colour = "gray90"),
panel.grid = element_blank(),
strip.background = element_rect(colour = "gray50", fill = "white"),
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"),
strip.text.x = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"),
panel.border = element_rect(colour = "gray90"),
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"),
axis.text.y = element_text(size = 6, family = "Trebuchet MS"),
axis.title = element_text(size = 8, family = "Trebuchet MS"),
plot.caption = element_text(size = 8, hjust=0,
family = "Trebuchet MS"),
plot.title=element_text(family="TrebuchetMS-Bold"),
plot.subtitle=element_text(family="TrebuchetMS-Italic"),
legend.position="none"
)
plot(fig3)
## Simulate with a few noise-free data points for posterior over range of eta and rho
## Again pretend the noise is almost zero, but not quite.
param_fits4 <- NULL
for(i in 1:length(eta_sq_prior)){
for(j in 1:length(rho_sq_prior)){
## Parameter value fixed in given example
fit4 <- stan(file="gp-predict_SE.stan", data=list(x1=x1, y1=y1, N1=length(x1),
x2=x, N2=length(x), eta_sq=eta_sq_prior[i],
rho_sq=rho_sq_prior[j], sigma_sq=sigma_sq),
iter=iter, chains=chains)
sims4 <- extract(fit4, permuted=TRUE)
## Rearrange the data and plot it
data4 <- adply(sims4$y2, 2)
tmp4 <- melt(data4)
names(tmp4) <- c("xid", "group1", "y")
tmp4 <- mutate(tmp4, x=x[xid])
tmp4$rho <- rho_sq_prior[j]
tmp4$eta <- eta_sq_prior[i]
sigma_sq < sigma_sq
param_fits4 <- rbind(param_fits4, tmp4)
}
}
param_fits4$rho_label <- paste0("rho ",param_fits4$rho)
param_fits4$eta_label <- paste0("eta ",param_fits4$eta)
dp <- data.frame(x=x1, y=y1)
## plot posreriors and data points for range of eta and rho
fig4 <- ggplot() +
geom_line(data = param_fits4, aes(x=x, y=y, color = group1), alpha=0.2) +
scale_color_viridis(discrete=TRUE) +
geom_point(data = dp, aes(x=x, y = y), size = 1) +
theme_bw() +
labs(title="Gaussian Process - Fitted",
subtitle = expression("Squared Exponential Kernel wih range of "*rho^2*" and "*eta^2),
x = "X",
y = "Y") +
facet_grid(rho_label~eta_label) +
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=2), labels = seq(-10,10,by=2)) +
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) +
theme(
panel.border = element_rect(colour = "gray90"),
panel.grid = element_blank(),
strip.background = element_rect(colour = "gray50", fill = "white"),
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"),
strip.text.x = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"),
panel.border = element_rect(colour = "gray90"),
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"),
axis.text.y = element_text(size = 6, family = "Trebuchet MS"),
axis.title = element_text(size = 8, family = "Trebuchet MS"),
plot.caption = element_text(size = 8, hjust=0,
family = "Trebuchet MS"),
plot.title=element_text(family="TrebuchetMS-Bold"),
plot.subtitle=element_text(family="TrebuchetMS-Italic"),
legend.position="none"
)
plot(fig4)
## compile data from above two loops to make a plot that combines prior, posterior, and data
## just for an interesting visualization
params_all_plot <- rbind(param_fits, param_fits4)
params_all_plot$model <- rep(c("A_fitted", "B_prior"),
times = c(nrow(param_fits),
nrow(param_fits4)))
params_all_plot$model_group <- paste0(params_all_plot$model, params_all_plot$group1)
fig4b <- ggplot() +
geom_line(data = params_all_plot, aes(x=x, y=y,
color = model, group = model_group, alpha = model)) +
scale_color_manual(values = c("orange", "purple")) +
scale_alpha_manual(values = c(0.1, 0.05)) +
geom_point(data = dp, aes(x=x, y = y), size = 1) +
theme_bw() +
labs(title="Gaussian Process - Fitted over Prior",
subtitle = expression("Squared Exponential Kernel wih range of "*rho^2*" and "*eta^2),
x = "X",
y = "Y") +
facet_grid(rho_label~eta_label) +
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=2), labels = seq(-10,10,by=2)) +
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) +
theme(
panel.border = element_rect(colour = "gray90"),
panel.grid = element_blank(),
strip.background = element_rect(colour = "gray50", fill = "white"),
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"),
strip.text.x = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"),
panel.border = element_rect(colour = "gray90"),
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"),
axis.text.y = element_text(size = 6, family = "Trebuchet MS"),
axis.title = element_text(size = 8, family = "Trebuchet MS"),
plot.caption = element_text(size = 8, hjust=0,
family = "Trebuchet MS"),
plot.title=element_text(family="TrebuchetMS-Bold"),
plot.subtitle=element_text(family="TrebuchetMS-Italic"),
legend.position="none"
)
plot(fig4b)
# quartz.save(file = "GP_SE_fitted_over_prior_range_rho_eta_facet.png", height = 6, width = 7,
# type = "png", device = dev.cur(), dpi = 300)
############### Fit STAN model with rho and eta as random variables, estimate, and plot
## Call STAN with our made-up data points, x1 and y1
## calls GP_estimate_eta_rho_SE.stan with [x,y] sample data
## sigma_sq is fixed, but rho_sq, eta_sq, and Y are estimated through MCMC
fit5 <- stan(file="GP_estimate_eta_rho_SE.stan", data=list(x1=x1, y1=y1, N1=length(x1),
simga_sq = sigma_sq,
x2=x, N2=length(x)),
iter=iter, chains=chains)
## ESIMATE PARAMETERS
sims5 <- extract(fit5, permuted=TRUE)
## sims5 = 100:201 matrix for each estimated parameter
## 100 rows = 1/2 of interation [iter] (non-warmup samples) for each x value
## 100 plausible values for y at that datapoint x;
## plot(density(sims5$y2[,1])) # density of estimates for y at x[1]
## 201 cols = each x value (data point)
## plot(sims5$y2[1,], type="l") # response across x for interation 1
## plot median (across all rows) of data point samples (columns)
## xx <- apply(sims5$y2,2,median); plot(xx, type="l")
## testing on known plausible values from model with fixed rho_sq and sigma_sq
## I plotted these as the first of these plots in the blog post
## to do so, just uncomment and assign these known values, then run the rest as usual
# rho_low <- 0.1
# rho_med <- 1.0
# rho_high <- 10
# eta_low <- 0.1
# eta_med <- 1.0
# eta_high <- 10
## derive 5%, 50%, and 95% quantile from simulated rho_sq and sigma_sq parameters
rho_low <- quantile(sims5$rho_sq, 0.05)
rho_med <- quantile(sims5$rho_sq, 0.50)
rho_high <- quantile(sims5$rho_sq, 0.95)
eta_low <- quantile(sims5$eta_sq, 0.05)
eta_med <- quantile(sims5$eta_sq, 0.50)
eta_high <- quantile(sims5$eta_sq, 0.95)
##### SIMULATE FOR 5%, 50%, and 95% levels for parameter estimate
## SIMULATE NEW RESPONSE BASED ON RANGE OF HYPERPARAMETERS
sim_low <- sim_GP_y(y1, x1, sigma_sq = sigma_sq, rho_low, eta_low, iter=iter, chains=chains)
sim_med <- sim_GP_y(y1, x1, sigma_sq = sigma_sq, rho_med, eta_med, iter=iter, chains=chains)
sim_high <- sim_GP_y(y1, x1, sigma_sq = sigma_sq, rho_high, eta_high, iter=iter, chains=chains)
## PREPARE DATA FOR PLOTTING
tmp_low <- melt_sim(sim_low$y2, x, "low rho/eta")
tmp_med <- melt_sim(sim_med$y2, x, "median rho/eta")
tmp_high <- melt_sim(sim_high$y2, x, "high rho/eta")
# bind simulations from various params and order factor for plot
tmp <- rbind(tmp_low, tmp_med, tmp_high)
tmp$param <- factor(tmp$param,levels(factor(tmp$param))[c(2,3,1)])
# create dataframe of training data for plotting
pnt_data <- data.frame(x=rep(x1,3), y=rep(y1,3),
param = rep(c("low rho/eta","median rho/eta","high rho/eta"), each = length(x1)))
pnt_data$param <- factor(pnt_data$param,levels(factor(pnt_data$param))[c(2,3,1)])
##### plot as 5%/5%, 50%/50%, and 95%/95% parameters
Sim_params_plot <- ggplot() +
geom_line(data = tmp, aes(x=x, y=y, color = group1), alpha=0.1) +
geom_point(data = pnt_data, aes(x = x, y = y)) +
theme_bw() +
facet_grid(param~.) +
scale_color_viridis(discrete=TRUE) +
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=2), labels = seq(-10,10,by=2)) +
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) +
labs(title = "Gaussian Process - Hyperparameter Estimation",
subtitle = expression("Squared Exponential Kernel - Simulating "*rho^2*" and "*eta^2*", Sigma Fixed"),
x = "X",
y = "Y") +
theme(
panel.border = element_rect(colour = "gray90"),
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"),
axis.text.y = element_text(size = 6, family = "Trebuchet MS"),
axis.title = element_text(size = 8, family = "Trebuchet MS"),
plot.caption = element_text(size = 8, hjust=0,
family = "Trebuchet MS"),
plot.title=element_text(family="TrebuchetMS-Bold"),
plot.subtitle=element_text(family="TrebuchetMS-Italic"),
legend.position="none",
strip.background = element_rect(colour = "gray50", fill = "white"),
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS")
)
plot(Sim_params_plot)
# ggsave(filename = "GP_SE_simulated_range_rho_eta_facet.png", width = 7, height = 4)
## Simualte as facted matrix of combinations for 5%, 50%, 95%
rho_sim_HML <- c(rho_low, rho_med, rho_high)
eta_sim_HML <- c(eta_low, eta_med, eta_high)
model_label <- c("low", "median", "high")
sim_hyperparams_plot <- NULL
for(i in 1:length(eta_sim_HML)){
for(j in 1:length(rho_sim_HML)){
## SIMULATE NEW RESPONSE BASED ON RANGE OF HYPERPARAMETERS
sim_hyperparams <- sim_GP_y(y1, x1, sigma_sq = sigma_sq,
rho_sim_HML[j],
eta_sim_HML[i],
iter=iter, chains=chains)
## PREPARE DATA FOR PLOTTING
label1 <- paste0("rho ", model_label[j])
label2 <- paste0("eta ", model_label[i])
tmp_sim_params <- melt_sim_multiparam(sim_hyperparams$y2, x, label1, label2)
# bind simulations from various params and order factor for plot
sim_hyperparams_plot <- rbind(sim_hyperparams_plot, tmp_sim_params)
}
}
colnames(sim_hyperparams_plot) <- c("xid", "group1", "y", "rho", "eta", "x")
sim_hyperparams_plot$rho <- factor(sim_hyperparams_plot$rho,
levels(factor(sim_hyperparams_plot$rho))[c(3,2,1)])
sim_hyperparams_plot$eta <- factor(sim_hyperparams_plot$eta,
levels(factor(sim_hyperparams_plot$eta))[c(2,3,1)])
# create dataframe of training data for plotting
dp <- data.frame(x=x1, y=y1)
## PLOT
Sim_hyperparams_plot <- ggplot() +
geom_line(data = sim_hyperparams_plot, aes(x=x, y=y, color = group1), alpha=0.1) +
geom_point(data = dp, aes(x = x, y = y)) +
theme_bw() +
facet_grid(rho~eta) +
scale_color_viridis(discrete=TRUE) +
scale_x_continuous(limits = c(-10,10), breaks = seq(-10,10,by=2), labels = seq(-10,10,by=2)) +
scale_y_continuous(limits = c(-3,3), breaks = seq(-3,3,by=1), labels = seq(-3,3,by=1)) +
labs(title = "Gaussian Process - Hyperparameter Estimation",
subtitle = expression("Squared Exponential Kernel - Simulating "*rho^2*" and "*eta^2*", Sigma Fixed"),
x = "X",
y = "Y") +
theme(
panel.border = element_rect(colour = "gray90"),
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"),
axis.text.y = element_text(size = 6, family = "Trebuchet MS"),
axis.title = element_text(size = 8, family = "Trebuchet MS"),
plot.caption = element_text(size = 8, hjust=0,
family = "Trebuchet MS"),
plot.title=element_text(family="TrebuchetMS-Bold"),
plot.subtitle=element_text(family="TrebuchetMS-Italic"),
legend.position="none",
strip.background = element_rect(colour = "gray50", fill = "white"),
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS")
)
plot(Sim_hyperparams_plot)
# ggsave(filename = "GP_SE_simulated_facet_rho_eta_facet.png", width = 7, height = 6)
## slab plot of simulated parameter densities
## random sample from our prior (in STAN code) for plotting
## clearly this will differ from actual prior used in STAN code,
## but is same distribution and parameters for the Cauchy()
prior_dist <- rcauchy(nrow(sims5$rho_sq),0,5)
sims_plot_data <- data.frame(value = c(sims5$rho_sq, prior_dist,
sims5$eta_sq, prior_dist),
param = rep(c("rho", "eta"), each = (nrow(sims5$rho_sq))*2),
model1 = rep(c("posterior", "prior", "posterior", "prior"),
each = nrow(sims5$rho_sq)))
# plot top plot (rho)
sims_plot_rho <- ggplot(data = dplyr::filter(sims_plot_data, param == "rho"),
aes(value, color = model1, group = model1)) +
geom_density() +
xlim(c(0,100)) +
theme_bw() +
labs(title = "Hyperparameter Density",
subtitle = "Prior = halfCauchy(0,5)",
x = "",
y = "Density") +
scale_color_manual(values = c("dodgerblue2", "firebrick2")) +
theme(
panel.border = element_rect(colour = "gray90"),
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"),
axis.text.y = element_text(size = 6, family = "Trebuchet MS"),
axis.title = element_text(size = 8, family = "Trebuchet MS"),
plot.caption = element_text(size = 8, hjust=0,
family = "Trebuchet MS"),
plot.title=element_text(family="TrebuchetMS-Bold"),
plot.subtitle=element_text(family="TrebuchetMS-Italic"),
strip.background = element_rect(colour = "gray50", fill = "white"),
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"),
legend.position="none"
)
# plot bottom plot (eta)
sims_plot_eta <- ggplot(data = dplyr::filter(sims_plot_data, param == "eta"),
aes(value, color = model1, group = model1)) +
geom_density() +
xlim(c(0,10)) +
theme_bw() +
labs(x = "Parameter Value",
y = "") +
scale_color_manual(values = c("dodgerblue2", "firebrick2"),
guide = guide_legend(title = NULL)) +
theme(
panel.border = element_rect(colour = "gray90"),
axis.text.x = element_text(angle = 0, size = 6, family = "Trebuchet MS"),
axis.text.y = element_text(size = 6, family = "Trebuchet MS"),
axis.title = element_text(size = 8, family = "Trebuchet MS"),
plot.caption = element_text(size = 8, hjust=0,
family = "Trebuchet MS"),
plot.title=element_text(family="TrebuchetMS-Bold"),
plot.subtitle=element_text(family="TrebuchetMS-Italic"),
legend.position="bottom",
strip.background = element_rect(colour = "gray50", fill = "white"),
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS")
)
# use cowplot to combine plots and label
slab_plot <- cowplot::plot_grid(sims_plot_rho, sims_plot_eta,
labels = c("rho^2", "eta^2"),
nrow = 2, align = "v", hjust = -0.1, scale = 0.9, vjust = 3)
# cowplot::save_plot("slab_plot_halfcauchy.png", slab_plot,
# nrow = 2, # and 2 rows
# base_aspect_ratio = 1,
# base_width = 6,
# base_height = 2.5
# )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment