RobertMyles/gganimate_beta_simulation.R

## gganimate_beta_simulation.R
# packages
library("ggplot2")
# devtools::install_github("dgrtwo/gganimate")
library("gganimate")
library("dplyr")
# set distribution parameters for simulation
p = 0.5
N = 100
y_lim = 20
a_a = 2
a_b = 10
b_a = 8
b_b = 3
# set the sequence for curve to solve over
x = seq(0,1,length.out = 101) # deafult for curve()
# simulated outcome
outcomes <- sample(1:0,N,prob=c(p,1-p),replace=TRUE)
# successes
success <- cumsum(outcomes)
# failures, not used, but could be part of ggplot2::annotate
failures <- cumsum(ifelse(outcomes == 1, 0, 1))
# fit distirbution for starting point
obsA <- data.frame(x = x, y = dbeta(x,a_a,a_b), sim_dist = 0, model = "Observer A")
obsB <- data.frame(x = x, y = dbeta(x,b_a,b_b), sim_dist = 0, model = "Observer B")
# dim variables to hold distributions
beta1 <- NULL
beta2 <- NULL
# loop over N to create alternate parametrization of beta distributions for each of N
for(sim_dist in 1:N){
  # fit a distirbution based on success at each step in sim_dist
  beta1_df <- data.frame(x = x,
                       y = dbeta(x,a_a+success[sim_dist]+1,a_b+(sim_dist-success[sim_dist])+1),
                       model = "Beta 1",
                       sim_dist = sim_dist)
  # bind to previous loop
  beta1 <- rbind(beta1, beta1_df)
  # fit another dist based on alternate parameters for same success
  beta2_df <- data.frame(x = x,
                       y = dbeta(x,b_a+success[sim_dist]+1,b_b+(sim_dist-success[sim_dist])+1),
                       model = "Beta 2",
                       sim_dist = sim_dist)
  # bind with previosu loop
  beta2 <- rbind(beta2, beta2_df)
}
## Start data manipulation for plotting
# key here is that the entire sequence of both beta1/beta2 for each of N are
# repeated N times. This adds up fast!!!
# the point is to allow for transparency (alpha).
# at each "frame" (here = epochs) in the animation, the entire set of densities are drawn
# but the alpha for lines of future epochs are set to 0.
# This works, but it is not elegant and makes for very large data.frames
# probably a better approach out there.
# bind the simulated densities
plot_dat <- rbind(beta1,beta2)
# repate the sequence N times
plot_dat2 <- do.call("rbind", replicate(N, plot_dat, simplify = FALSE))
# add the epoch label that acts as the animation frames
plot_dat2$epoch <- rep(c(1:N), each = (nrow(plot_dat)))
# add successes and failures.  supposed to be for plotting, but
# using anotation with gganimate is really slow.  I bailed on it.
plot_dat2$Successes <- rep(success, each = (nrow(plot_dat)))
plot_dat2$Failures <- rep(failures, each = (nrow(plot_dat)))
# format final plot data
# for each epoch, do the following...
plot_dat2 <- group_by(plot_dat2, epoch) %>%
         # calculate the distance between the sim_dist index and the epoch
         # where sim_dist == epoch is the current density
  mutate(dist = abs(sim_dist - epoch),
         # distance function from epoch to sim_dist to set vis to the desired alpha
         # stole this from @drob, http://varianceexplained.org/files/bandwidth.html
         vis = ((1 - (dist / max(dist))) ^ 3) ^ 3,
         # set the current line (sim_dist) to vis = 1
         vis = ifelse(sim_dist == epoch, 1, vis),
         # zero out epoch > sim_dist (in the future)
         vis = ifelse(sim_dist > epoch, 0, vis)) %>%
  ungroup()

# the ggplot + gganimate
p3 <- ggplot() +
  geom_line(data = plot_dat2,
            aes(x = x, y = y, color = model,
                group = interaction(model, sim_dist),
                sim_dist = epoch, alpha = vis)) +
  scale_alpha(aes(vis), range = c(0,1)) +
  geom_line(data = obsA, aes(x = x, y = y, color = model),
            alpha = 0.8, linetype = 2) +
  geom_line(data = obsB, aes(x = x, y = y, color = model),
            alpha = 0.8, linetype = 2) +
  geom_hline(aes(color = "True P", yintercept = 0), size = 0) +
  geom_vline(color = "gray50", xintercept = 0.5, linetype = 2, size = 0.5) +
  scale_color_manual(breaks = c("Observer A", "Observer B", "True P"),
                     values = c("orange", "purple", "orange", "purple", "gray50")) +
  scale_x_continuous(breaks = seq(0,1,0.2), labels = seq(0,1,0.2), limits = c(0,1)) +
  guides(model = "legend", alpha = FALSE) +
  # xlim(c(0,1)) +
  ylim(c(0,y_lim)) +
  ylab("Posterior Density") +
  xlab("p")+
  theme_bw() +
  theme(legend.position = c(0, 1),
       legend.justification = c(0, 1),
       legend.background = element_rect(colour = "black", fill = "white", size = 0.2),
       legend.title=element_blank(),
       panel.grid.major = element_blank(),
       panel.grid.minor = element_blank(),
       panel.border = element_rect(colour = "black", size = 0.2))
gg_animate(p3, file = 'beta_dist1.gif', interval = 0.075, ani.width=400, ani.height=400, title_sim_dist = FALSE)
	# packages
	library("ggplot2")
	# devtools::install_github("dgrtwo/gganimate")
	library("gganimate")
	library("dplyr")
	# set distribution parameters for simulation
	p = 0.5
	N = 100
	y_lim = 20
	a_a = 2
	a_b = 10
	b_a = 8
	b_b = 3
	# set the sequence for curve to solve over
	x = seq(0,1,length.out = 101) # deafult for curve()
	# simulated outcome
	outcomes <- sample(1:0,N,prob=c(p,1-p),replace=TRUE)
	# successes
	success <- cumsum(outcomes)
	# failures, not used, but could be part of ggplot2::annotate
	failures <- cumsum(ifelse(outcomes == 1, 0, 1))
	# fit distirbution for starting point
	obsA <- data.frame(x = x, y = dbeta(x,a_a,a_b), sim_dist = 0, model = "Observer A")
	obsB <- data.frame(x = x, y = dbeta(x,b_a,b_b), sim_dist = 0, model = "Observer B")
	# dim variables to hold distributions
	beta1 <- NULL
	beta2 <- NULL
	# loop over N to create alternate parametrization of beta distributions for each of N
	for(sim_dist in 1:N){
	# fit a distirbution based on success at each step in sim_dist
	beta1_df <- data.frame(x = x,
	y = dbeta(x,a_a+success[sim_dist]+1,a_b+(sim_dist-success[sim_dist])+1),
	model = "Beta 1",
	sim_dist = sim_dist)
	# bind to previous loop
	beta1 <- rbind(beta1, beta1_df)
	# fit another dist based on alternate parameters for same success
	beta2_df <- data.frame(x = x,
	y = dbeta(x,b_a+success[sim_dist]+1,b_b+(sim_dist-success[sim_dist])+1),
	model = "Beta 2",
	sim_dist = sim_dist)
	# bind with previosu loop
	beta2 <- rbind(beta2, beta2_df)
	}
	## Start data manipulation for plotting
	# key here is that the entire sequence of both beta1/beta2 for each of N are
	# repeated N times. This adds up fast!!!
	# the point is to allow for transparency (alpha).
	# at each "frame" (here = epochs) in the animation, the entire set of densities are drawn
	# but the alpha for lines of future epochs are set to 0.
	# This works, but it is not elegant and makes for very large data.frames
	# probably a better approach out there.
	# bind the simulated densities
	plot_dat <- rbind(beta1,beta2)
	# repate the sequence N times
	plot_dat2 <- do.call("rbind", replicate(N, plot_dat, simplify = FALSE))
	# add the epoch label that acts as the animation frames
	plot_dat2$epoch <- rep(c(1:N), each = (nrow(plot_dat)))
	# add successes and failures. supposed to be for plotting, but
	# using anotation with gganimate is really slow. I bailed on it.
	plot_dat2$Successes <- rep(success, each = (nrow(plot_dat)))
	plot_dat2$Failures <- rep(failures, each = (nrow(plot_dat)))
	# format final plot data
	# for each epoch, do the following...
	plot_dat2 <- group_by(plot_dat2, epoch) %>%
	# calculate the distance between the sim_dist index and the epoch
	# where sim_dist == epoch is the current density
	mutate(dist = abs(sim_dist - epoch),
	# distance function from epoch to sim_dist to set vis to the desired alpha
	# stole this from @drob, http://varianceexplained.org/files/bandwidth.html
	vis = ((1 - (dist / max(dist))) ^ 3) ^ 3,
	# set the current line (sim_dist) to vis = 1
	vis = ifelse(sim_dist == epoch, 1, vis),
	# zero out epoch > sim_dist (in the future)
	vis = ifelse(sim_dist > epoch, 0, vis)) %>%
	ungroup()

	# the ggplot + gganimate
	p3 <- ggplot() +
	geom_line(data = plot_dat2,
	aes(x = x, y = y, color = model,
	group = interaction(model, sim_dist),
	sim_dist = epoch, alpha = vis)) +
	scale_alpha(aes(vis), range = c(0,1)) +
	geom_line(data = obsA, aes(x = x, y = y, color = model),
	alpha = 0.8, linetype = 2) +
	geom_line(data = obsB, aes(x = x, y = y, color = model),
	alpha = 0.8, linetype = 2) +
	geom_hline(aes(color = "True P", yintercept = 0), size = 0) +
	geom_vline(color = "gray50", xintercept = 0.5, linetype = 2, size = 0.5) +
	scale_color_manual(breaks = c("Observer A", "Observer B", "True P"),
	values = c("orange", "purple", "orange", "purple", "gray50")) +
	scale_x_continuous(breaks = seq(0,1,0.2), labels = seq(0,1,0.2), limits = c(0,1)) +
	guides(model = "legend", alpha = FALSE) +
	# xlim(c(0,1)) +
	ylim(c(0,y_lim)) +
	ylab("Posterior Density") +
	xlab("p")+
	theme_bw() +
	theme(legend.position = c(0, 1),
	legend.justification = c(0, 1),
	legend.background = element_rect(colour = "black", fill = "white", size = 0.2),
	legend.title=element_blank(),
	panel.grid.major = element_blank(),
	panel.grid.minor = element_blank(),
	panel.border = element_rect(colour = "black", size = 0.2))
	gg_animate(p3, file = 'beta_dist1.gif', interval = 0.075, ani.width=400, ani.height=400, title_sim_dist = FALSE)