jameshenegan/figure-9-3.R

## figure-9-3.R
library(magrittr)
library(ggplot2)

# Trying to reproduce Figure 9.3 from the
# Second Edition of "Statistical Rethinking"
# by Richard McElreath

##########################################################
# Main Steps:
# 1. Try to identify the posterior used by McElreath
# 2. Create the contour lines for the plot
# 3. Write a function that will perform the Metropolis Algorithm
# 4. Make the two plots in Figure 9.3
##########################################################

#######################################################################
# 1. Try to identify the posterior used by McElreath
#######################################################################

# Our first challenge is to identify the 2-dimensional
# posterior mentioned below:

# "Figure 9.3 shows an ordinary Metropolis algorithm
# trying to explore a 2-dimensional posterior with a
# strong negative correlation of -0.9" (page 268)

# We ended up going with a Bivariate normal distribution
# centered at mu = (0,0) with covariance matrix Sigma (given below)

# See "Bivariate Case" of the following article for more info:
# https://en.wikipedia.org/wiki/Multivariate_normal_distribution

# mean
mu <- c(0,0)

# covariance
sigma_X <- 0.22
sigma_Y <- 0.22
rho <- -0.9
Sigma <- matrix(data = c(sigma_X^2,
                         rho*sigma_X*sigma_Y,
                         rho*sigma_X*sigma_Y,
                         sigma_Y^2),
                nrow = 2)

# We can sample from the distribution with mvtnorm::rmvnorm
my_samples <- mvtnorm::rmvnorm(n = 1000, mean = mu, sigma = Sigma)

# The following correlation should be (close to) -0.9
cor(my_samples[,1], my_samples[,2])

# We can evaluate the density function for this distribution
# using mvtnorm::dmvnorm
# We will do this in the next section

# But first, some garbage collection... (clean up the global environment)
rm(sigma_X, sigma_Y, rho, my_samples)

#######################################################################
# 2. Create the contour lines for the plot
#######################################################################

# To create the contour lines, we will first
# we need to make a "grid" of our (x,y) value pairs

x_y_grid <- function(x_start = -1.5,
                     x_stop = 1.5,
                     x_length = 100,
                     y_start = -1.5,
                     y_stop = 1.5,
                     y_length = 100){
  x_domain <- seq(from = x_start, to = x_stop, length.out = x_length)
  y_domain <- seq(from = y_start, to = y_stop, length.out = y_length)
  x_y_grid_tibble <- tidyr::expand_grid(x = x_domain, y = y_domain)
  return(x_y_grid_tibble)
}

contour_plot_dat <- x_y_grid()

# Now we evaluate the density at each point in our grid
z_values <- mvtnorm::dmvnorm(as.matrix(contour_plot_dat), mean = mu, sigma = Sigma)

# Store the result
contour_plot_dat <- contour_plot_dat %>% dplyr::mutate(z = z_values)

# plot the contour lines of the z-values
contour_plot <-
  ggplot() + geom_contour(data = contour_plot_dat, aes(x = x, y = y, z = z))

# Examine the result
contour_plot

# We need to redefine our "breaks" for the contour lines so that
# it looks more like Figure 9.3

# Let's try this:

breaks <- rep(0, 20)

for(i in 1:20){
  breaks[i] <- 5^(-(10*i))
}

contour_plot <-
  ggplot() + geom_contour(data = contour_plot_dat, aes(x = x, y = y, z = z), breaks = breaks)

# Examine the result
contour_plot
# Looks good for now

# Garbage collection
rm(z_values, x_y_grid, contour_plot_dat, breaks, i)

#######################################################################
#  3. Write a function that will perform the Metropolis Algorithm
#######################################################################

# We will perform a Metropolis algorithm (as I understand it) on the
# posterior defined above, while keeping track of the following data:
# - coordinates of candidate (i.e., proposal) points
# - whether or not the candidate points were accepted

# Here is a function that goes through the algorithm

metropolis <- function(num_proposals = 50,
                       step_size = 0.1,
                       starting_point = c(-1,1)){

  # Initialize vectors where we will keep track of relevant
  candidate_x_history <- rep(-Inf, num_proposals)
  candidate_y_history <- rep(-Inf, num_proposals)
  did_move_history <- rep(FALSE, num_proposals)

  # Prepare to begin the algorithm...
  current_point <- starting_point

  for(i in 1:num_proposals){

    # "Proposals are generated by adding random Gaussian noise
    # to each parameter"

    noise <- rnorm(n=2, mean = 0, sd = step_size)
    candidate_point <- current_point + noise

    # store coordinates of the proposal point
    candidate_x_history[i] <- candidate_point[1]
    candidate_y_history[i] <- candidate_point[2]

    # evaluate the density of our posterior at the proposal point
    candidate_prob <- mvtnorm::dmvnorm(candidate_point, mean = mu, sigma = Sigma)

    # evaluate the density of our posterior at the current point
    current_prob <- mvtnorm::dmvnorm(current_point, mean = mu, sigma = Sigma)

    # Decide whether or not we should move to the candidate point
    acceptance_ratio <- candidate_prob/current_prob
    should_move <- ifelse(runif(n = 1) < acceptance_ratio, TRUE, FALSE)

    # Keep track of the decision
    did_move_history[i] <- should_move

    # Move if necessary
    if(should_move){
      current_point <- candidate_point
    }
  }

  # once the loop is complete, store the relevant results in a tibble
  results <- tibble::tibble(
    candidate_x = candidate_x_history,
    candidate_y = candidate_y_history,
    did_move = did_move_history
  )

  # compute the "acceptance rate" by dividing the total number of "moves"
  # by the total number of proposals

  number_of_moves <- results %>% dplyr::pull(did_move) %>% sum(.)
  acceptance_rate <- number_of_moves/num_proposals

  return(list( results = results, acceptance_rate = acceptance_rate))
}

#######################################################################
# 4. Make the two plots in Figure 9.3
#######################################################################

# Run the algorithm for the plot on the left
set.seed(4)
round_1 <- metropolis(num_proposals = 50,
                      step_size = 0.1,
                      starting_point = c(-1,1))

# Create the plot on the left
contour_plot +
  geom_point(data = round_1$results,
             mapping = aes(x = candidate_x, y = candidate_y, color = did_move)) +
  labs(caption = paste("Acceptance Rate: ", round_1$acceptance_rate))

# Run the algorithm for the plot on the right
set.seed(3)
round_2 <- metropolis(num_proposals = 50,
                      step_size = 0.25,
                      starting_point = c(-1,1))

# Create the plot on the right
contour_plot +
  geom_point(data = round_2$results,
             mapping = aes(x = candidate_x, y = candidate_y, color = did_move)) +
  labs(caption = paste("Acceptance Rate: ", round_2$acceptance_rate))


# Garbage collection
rm(contour_plot, round_1, round_2, Sigma, mu, metropolis)
detach("package:magrittr")
detach("package:ggplot2")
	library(magrittr)
	library(ggplot2)

	# Trying to reproduce Figure 9.3 from the
	# Second Edition of "Statistical Rethinking"
	# by Richard McElreath

	##########################################################
	# Main Steps:
	# 1. Try to identify the posterior used by McElreath
	# 2. Create the contour lines for the plot
	# 3. Write a function that will perform the Metropolis Algorithm
	# 4. Make the two plots in Figure 9.3
	##########################################################

	#######################################################################
	# 1. Try to identify the posterior used by McElreath
	#######################################################################

	# Our first challenge is to identify the 2-dimensional
	# posterior mentioned below:

	# "Figure 9.3 shows an ordinary Metropolis algorithm
	# trying to explore a 2-dimensional posterior with a
	# strong negative correlation of -0.9" (page 268)

	# We ended up going with a Bivariate normal distribution
	# centered at mu = (0,0) with covariance matrix Sigma (given below)

	# See "Bivariate Case" of the following article for more info:
	# https://en.wikipedia.org/wiki/Multivariate_normal_distribution

	# mean
	mu <- c(0,0)

	# covariance
	sigma_X <- 0.22
	sigma_Y <- 0.22
	rho <- -0.9
	Sigma <- matrix(data = c(sigma_X^2,
	rhosigma_Xsigma_Y,
	rhosigma_Xsigma_Y,
	sigma_Y^2),
	nrow = 2)

	# We can sample from the distribution with mvtnorm::rmvnorm
	my_samples <- mvtnorm::rmvnorm(n = 1000, mean = mu, sigma = Sigma)

	# The following correlation should be (close to) -0.9
	cor(my_samples[,1], my_samples[,2])

	# We can evaluate the density function for this distribution
	# using mvtnorm::dmvnorm
	# We will do this in the next section

	# But first, some garbage collection... (clean up the global environment)
	rm(sigma_X, sigma_Y, rho, my_samples)

	#######################################################################
	# 2. Create the contour lines for the plot
	#######################################################################

	# To create the contour lines, we will first
	# we need to make a "grid" of our (x,y) value pairs

	x_y_grid <- function(x_start = -1.5,
	x_stop = 1.5,
	x_length = 100,
	y_start = -1.5,
	y_stop = 1.5,
	y_length = 100){
	x_domain <- seq(from = x_start, to = x_stop, length.out = x_length)
	y_domain <- seq(from = y_start, to = y_stop, length.out = y_length)
	x_y_grid_tibble <- tidyr::expand_grid(x = x_domain, y = y_domain)
	return(x_y_grid_tibble)
	}

	contour_plot_dat <- x_y_grid()

	# Now we evaluate the density at each point in our grid
	z_values <- mvtnorm::dmvnorm(as.matrix(contour_plot_dat), mean = mu, sigma = Sigma)

	# Store the result
	contour_plot_dat <- contour_plot_dat %>% dplyr::mutate(z = z_values)

	# plot the contour lines of the z-values
	contour_plot <-
	ggplot() + geom_contour(data = contour_plot_dat, aes(x = x, y = y, z = z))

	# Examine the result
	contour_plot

	# We need to redefine our "breaks" for the contour lines so that
	# it looks more like Figure 9.3

	# Let's try this:

	breaks <- rep(0, 20)

	for(i in 1:20){
	breaks[i] <- 5^(-(10*i))
	}

	contour_plot <-
	ggplot() + geom_contour(data = contour_plot_dat, aes(x = x, y = y, z = z), breaks = breaks)

	# Examine the result
	contour_plot
	# Looks good for now

	# Garbage collection
	rm(z_values, x_y_grid, contour_plot_dat, breaks, i)

	#######################################################################
	# 3. Write a function that will perform the Metropolis Algorithm
	#######################################################################

	# We will perform a Metropolis algorithm (as I understand it) on the
	# posterior defined above, while keeping track of the following data:
	# - coordinates of candidate (i.e., proposal) points
	# - whether or not the candidate points were accepted

	# Here is a function that goes through the algorithm

	metropolis <- function(num_proposals = 50,
	step_size = 0.1,
	starting_point = c(-1,1)){

	# Initialize vectors where we will keep track of relevant
	candidate_x_history <- rep(-Inf, num_proposals)
	candidate_y_history <- rep(-Inf, num_proposals)
	did_move_history <- rep(FALSE, num_proposals)

	# Prepare to begin the algorithm...
	current_point <- starting_point

	for(i in 1:num_proposals){

	# "Proposals are generated by adding random Gaussian noise
	# to each parameter"

	noise <- rnorm(n=2, mean = 0, sd = step_size)
	candidate_point <- current_point + noise

	# store coordinates of the proposal point
	candidate_x_history[i] <- candidate_point[1]
	candidate_y_history[i] <- candidate_point[2]

	# evaluate the density of our posterior at the proposal point
	candidate_prob <- mvtnorm::dmvnorm(candidate_point, mean = mu, sigma = Sigma)

	# evaluate the density of our posterior at the current point
	current_prob <- mvtnorm::dmvnorm(current_point, mean = mu, sigma = Sigma)

	# Decide whether or not we should move to the candidate point
	acceptance_ratio <- candidate_prob/current_prob
	should_move <- ifelse(runif(n = 1) < acceptance_ratio, TRUE, FALSE)

	# Keep track of the decision
	did_move_history[i] <- should_move

	# Move if necessary
	if(should_move){
	current_point <- candidate_point
	}
	}

	# once the loop is complete, store the relevant results in a tibble
	results <- tibble::tibble(
	candidate_x = candidate_x_history,
	candidate_y = candidate_y_history,
	did_move = did_move_history
	)

	# compute the "acceptance rate" by dividing the total number of "moves"
	# by the total number of proposals

	number_of_moves <- results %>% dplyr::pull(did_move) %>% sum(.)
	acceptance_rate <- number_of_moves/num_proposals

	return(list( results = results, acceptance_rate = acceptance_rate))
	}

	#######################################################################
	# 4. Make the two plots in Figure 9.3
	#######################################################################

	# Run the algorithm for the plot on the left
	set.seed(4)
	round_1 <- metropolis(num_proposals = 50,
	step_size = 0.1,
	starting_point = c(-1,1))

	# Create the plot on the left
	contour_plot +
	geom_point(data = round_1$results,
	mapping = aes(x = candidate_x, y = candidate_y, color = did_move)) +
	labs(caption = paste("Acceptance Rate: ", round_1$acceptance_rate))

	# Run the algorithm for the plot on the right
	set.seed(3)
	round_2 <- metropolis(num_proposals = 50,
	step_size = 0.25,
	starting_point = c(-1,1))

	# Create the plot on the right
	contour_plot +
	geom_point(data = round_2$results,
	mapping = aes(x = candidate_x, y = candidate_y, color = did_move)) +
	labs(caption = paste("Acceptance Rate: ", round_2$acceptance_rate))


	# Garbage collection
	rm(contour_plot, round_1, round_2, Sigma, mu, metropolis)
	detach("package:magrittr")
	detach("package:ggplot2")