khakieconomics/GP_example.R

## GP_example.R
# Generate fake data

dgp <- "data {
  int<lower=1> N;
  real x[N];
  real rho;
  real sigma;
}
transformed data {
  vector[N] mu;
  cov_matrix[N] Sigma;
  for (i in 1:N)
    mu[i] <- 0;
  for (i in 1:N)
    for (j in 1:N)
      Sigma[i,j] <- exp(-pow(0.5, 2)*pow(x[i] - x[j],2))
    + if_else(i==j, pow(sigma, 2), 0.0);
}
parameters {
  vector[N] y;
}
model {
  y ~ multi_normal(mu,Sigma);
}"

library(rstan); library(dplyr)
N <- 200
x <- runif(N)

# Known params
sigma <- 1
rho <- 0.5

# draw some samples
dgp_sample <- stan(model_code = dgp, chains = 1, iter = 20)
y_smpl <- extract(dgp_sample, pars = "y", permuted = F) %>%plyr::adply(2) %>% select(-chains)
y <- y_smpl[10,] %>% unlist


# Estimation ------------------------------------------------------------------

# Slightly reparameterised
mod <- "data {
  int<lower=1> N;
  vector[N] x;
  vector[N] y;
}
parameters {
  real<lower=0> rho;
  real<lower=0> sigma;
}
transformed parameters {
  matrix[N,N] Sigma;
  //cholesky_factor_cov[N] L;
  matrix[N, N] L;
  // off-diagonal elements
  for (i in 1:(N-1)) {
    for (j in (i+1):N) {
    Sigma[i,j] <-  exp(-pow(rho, 2) * pow(x[i] - x[j],2));
    Sigma[j,i] <- Sigma[i,j];
    }
  }
  // diagonal elements
  for (k in 1:N){
    Sigma[k,k] <- 1 + pow(sigma, 2); // + jitter
  }
  L <- cholesky_decompose(Sigma);
}
model {
  rho ~ normal(0,1);
  sigma ~ cauchy(0,2);
  y ~ multi_normal_cholesky(rep_vector(0, N), L);
}"

# Compile
stan_mod <- stan_model(model_code = mod)
# Estimate
stan_est <- sampling(stan_mod, iter = 2000, cores = 4, data = list(y = y, x = x, N = N), control= list(adapt_delta = 0.90))
# How good a job does maximum likelihood do?
stan_ml <- optimizing(stan_mod,data = list(y = y, x = x, N = N) )
stan_ml$par[c("rho", "sigma")]

# Have a look at fit
print(stan_est, pars= c("rho", "sigma"))
# Converged pretty well!

# Visualise
library(ggplot2); library(reshape)

# Plot the parameters alongside their true values
extract(stan_est, pars = c("rho", "sigma"), permuted = F) %>% plyr::adply(2) %>% select(-chains) %>% melt %>%
  left_join(data_frame(variable = c("rho", "sigma"), true_value = c(rho, sigma))) %>%
  group_by(variable) %>%
  mutate(expected_value = mean(value)) %>%
  ggplot(aes(x = value)) +
  geom_density(fill = "orange", alpha = 0.5) +
  geom_vline(aes(xintercept = true_value), colour = "red") +
  geom_vline(aes(xintercept = expected_value), colour = "blue") +
  facet_grid(variable~.)


# Plot --------------------------------------------------------------------

rho <- extract(stan_est, pars = c("rho", "sigma"), permuted = F) %>% plyr::adply(2) %>% select(-chains) %>% melt %>%
  left_join(data_frame(variable = c("rho", "sigma"), true_value = c(rho, sigma))) %>%
  filter(variable=="rho") %>%
  mutate(expected_value = mean(value))
MAP <- density(rho$value)$x[which.max(density(rho$value)$y)]

rho %>% mutate(MAP = MAP)%>%
  ggplot(aes(x = value)) +
  geom_density(fill = "orange", alpha = 0.5) +
  geom_vline(aes(xintercept = true_value), colour = "red") +
  geom_vline(aes(xintercept = expected_value), colour = "blue") +
  geom_vline(aes(xintercept = MAP), colour = "orange") +
  xlim(-0.5, 3) +
  ylim(0, 1.6) +
  ggtitle("This is why we do full Bayes\n") +
  annotate("text",x = 1.9, y= 0.25, colour = "orange", label = "Posterior density")+
  annotate("text",x = 0.7, y= 1.5, colour = "red", label = "True\nvalue")+
  annotate("text",x = 0.95, y= 1, colour = "blue", label = "Mean of\nposterior draws") +
  annotate("text",x = -0.05, y= 1.2, colour = "orange", label = "MAP/Penalized\nlikelihood\nestimate")
	# Generate fake data

	dgp <- "data {
	int<lower=1> N;
	real x[N];
	real rho;
	real sigma;
	}
	transformed data {
	vector[N] mu;
	cov_matrix[N] Sigma;
	for (i in 1:N)
	mu[i] <- 0;
	for (i in 1:N)
	for (j in 1:N)
	Sigma[i,j] <- exp(-pow(0.5, 2)*pow(x[i] - x[j],2))
	+ if_else(i==j, pow(sigma, 2), 0.0);
	}
	parameters {
	vector[N] y;
	}
	model {
	y ~ multi_normal(mu,Sigma);
	}"

	library(rstan); library(dplyr)
	N <- 200
	x <- runif(N)

	# Known params
	sigma <- 1
	rho <- 0.5

	# draw some samples
	dgp_sample <- stan(model_code = dgp, chains = 1, iter = 20)
	y_smpl <- extract(dgp_sample, pars = "y", permuted = F) %>%plyr::adply(2) %>% select(-chains)
	y <- y_smpl[10,] %>% unlist


	# Estimation ------------------------------------------------------------------

	# Slightly reparameterised
	mod <- "data {
	int<lower=1> N;
	vector[N] x;
	vector[N] y;
	}
	parameters {
	real<lower=0> rho;
	real<lower=0> sigma;
	}
	transformed parameters {
	matrix[N,N] Sigma;
	//cholesky_factor_cov[N] L;
	matrix[N, N] L;
	// off-diagonal elements
	for (i in 1:(N-1)) {
	for (j in (i+1):N) {
	Sigma[i,j] <- exp(-pow(rho, 2) * pow(x[i] - x[j],2));
	Sigma[j,i] <- Sigma[i,j];
	}
	}
	// diagonal elements
	for (k in 1:N){
	Sigma[k,k] <- 1 + pow(sigma, 2); // + jitter
	}
	L <- cholesky_decompose(Sigma);
	}
	model {
	rho ~ normal(0,1);
	sigma ~ cauchy(0,2);
	y ~ multi_normal_cholesky(rep_vector(0, N), L);
	}"

	# Compile
	stan_mod <- stan_model(model_code = mod)
	# Estimate
	stan_est <- sampling(stan_mod, iter = 2000, cores = 4, data = list(y = y, x = x, N = N), control= list(adapt_delta = 0.90))
	# How good a job does maximum likelihood do?
	stan_ml <- optimizing(stan_mod,data = list(y = y, x = x, N = N) )
	stan_ml$par[c("rho", "sigma")]

	# Have a look at fit
	print(stan_est, pars= c("rho", "sigma"))
	# Converged pretty well!

	# Visualise
	library(ggplot2); library(reshape)

	# Plot the parameters alongside their true values
	extract(stan_est, pars = c("rho", "sigma"), permuted = F) %>% plyr::adply(2) %>% select(-chains) %>% melt %>%
	left_join(data_frame(variable = c("rho", "sigma"), true_value = c(rho, sigma))) %>%
	group_by(variable) %>%
	mutate(expected_value = mean(value)) %>%
	ggplot(aes(x = value)) +
	geom_density(fill = "orange", alpha = 0.5) +
	geom_vline(aes(xintercept = true_value), colour = "red") +
	geom_vline(aes(xintercept = expected_value), colour = "blue") +
	facet_grid(variable~.)


	# Plot --------------------------------------------------------------------

	rho <- extract(stan_est, pars = c("rho", "sigma"), permuted = F) %>% plyr::adply(2) %>% select(-chains) %>% melt %>%
	left_join(data_frame(variable = c("rho", "sigma"), true_value = c(rho, sigma))) %>%
	filter(variable=="rho") %>%
	mutate(expected_value = mean(value))
	MAP <- density(rho$value)$x[which.max(density(rho$value)$y)]

	rho %>% mutate(MAP = MAP)%>%
	ggplot(aes(x = value)) +
	geom_density(fill = "orange", alpha = 0.5) +
	geom_vline(aes(xintercept = true_value), colour = "red") +
	geom_vline(aes(xintercept = expected_value), colour = "blue") +
	geom_vline(aes(xintercept = MAP), colour = "orange") +
	xlim(-0.5, 3) +
	ylim(0, 1.6) +
	ggtitle("This is why we do full Bayes\n") +
	annotate("text",x = 1.9, y= 0.25, colour = "orange", label = "Posterior density")+
	annotate("text",x = 0.7, y= 1.5, colour = "red", label = "True\nvalue")+
	annotate("text",x = 0.95, y= 1, colour = "blue", label = "Mean of\nposterior draws") +
	annotate("text",x = -0.05, y= 1.2, colour = "orange", label = "MAP/Penalized\nlikelihood\nestimate")