Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RobertMyles/7ec75cf021a9690aac16f56f256f89d7 to your computer and use it in GitHub Desktop.
Save RobertMyles/7ec75cf021a9690aac16f56f256f89d7 to your computer and use it in GitHub Desktop.
Code for a ggplot2/gganimate version of Cory's Bayesian updating animation: https://gist.github.com/cjbayesian/3373348
# packages
library("ggplot2")
# devtools::install_github("dgrtwo/gganimate")
library("gganimate")
library("dplyr")
# set distribution parameters for simulation
p = 0.5
N = 100
y_lim = 20
a_a = 2
a_b = 10
b_a = 8
b_b = 3
# set the sequence for curve to solve over
x = seq(0,1,length.out = 101) # deafult for curve()
# simulated outcome
outcomes <- sample(1:0,N,prob=c(p,1-p),replace=TRUE)
# successes
success <- cumsum(outcomes)
# failures, not used, but could be part of ggplot2::annotate
failures <- cumsum(ifelse(outcomes == 1, 0, 1))
# fit distirbution for starting point
obsA <- data.frame(x = x, y = dbeta(x,a_a,a_b), sim_dist = 0, model = "Observer A")
obsB <- data.frame(x = x, y = dbeta(x,b_a,b_b), sim_dist = 0, model = "Observer B")
# dim variables to hold distributions
beta1 <- NULL
beta2 <- NULL
# loop over N to create alternate parametrization of beta distributions for each of N
for(sim_dist in 1:N){
# fit a distirbution based on success at each step in sim_dist
beta1_df <- data.frame(x = x,
y = dbeta(x,a_a+success[sim_dist]+1,a_b+(sim_dist-success[sim_dist])+1),
model = "Beta 1",
sim_dist = sim_dist)
# bind to previous loop
beta1 <- rbind(beta1, beta1_df)
# fit another dist based on alternate parameters for same success
beta2_df <- data.frame(x = x,
y = dbeta(x,b_a+success[sim_dist]+1,b_b+(sim_dist-success[sim_dist])+1),
model = "Beta 2",
sim_dist = sim_dist)
# bind with previosu loop
beta2 <- rbind(beta2, beta2_df)
}
## Start data manipulation for plotting
# key here is that the entire sequence of both beta1/beta2 for each of N are
# repeated N times. This adds up fast!!!
# the point is to allow for transparency (alpha).
# at each "frame" (here = epochs) in the animation, the entire set of densities are drawn
# but the alpha for lines of future epochs are set to 0.
# This works, but it is not elegant and makes for very large data.frames
# probably a better approach out there.
# bind the simulated densities
plot_dat <- rbind(beta1,beta2)
# repate the sequence N times
plot_dat2 <- do.call("rbind", replicate(N, plot_dat, simplify = FALSE))
# add the epoch label that acts as the animation frames
plot_dat2$epoch <- rep(c(1:N), each = (nrow(plot_dat)))
# add successes and failures. supposed to be for plotting, but
# using anotation with gganimate is really slow. I bailed on it.
plot_dat2$Successes <- rep(success, each = (nrow(plot_dat)))
plot_dat2$Failures <- rep(failures, each = (nrow(plot_dat)))
# format final plot data
# for each epoch, do the following...
plot_dat2 <- group_by(plot_dat2, epoch) %>%
# calculate the distance between the sim_dist index and the epoch
# where sim_dist == epoch is the current density
mutate(dist = abs(sim_dist - epoch),
# distance function from epoch to sim_dist to set vis to the desired alpha
# stole this from @drob, http://varianceexplained.org/files/bandwidth.html
vis = ((1 - (dist / max(dist))) ^ 3) ^ 3,
# set the current line (sim_dist) to vis = 1
vis = ifelse(sim_dist == epoch, 1, vis),
# zero out epoch > sim_dist (in the future)
vis = ifelse(sim_dist > epoch, 0, vis)) %>%
ungroup()
# the ggplot + gganimate
p3 <- ggplot() +
geom_line(data = plot_dat2,
aes(x = x, y = y, color = model,
group = interaction(model, sim_dist),
sim_dist = epoch, alpha = vis)) +
scale_alpha(aes(vis), range = c(0,1)) +
geom_line(data = obsA, aes(x = x, y = y, color = model),
alpha = 0.8, linetype = 2) +
geom_line(data = obsB, aes(x = x, y = y, color = model),
alpha = 0.8, linetype = 2) +
geom_hline(aes(color = "True P", yintercept = 0), size = 0) +
geom_vline(color = "gray50", xintercept = 0.5, linetype = 2, size = 0.5) +
scale_color_manual(breaks = c("Observer A", "Observer B", "True P"),
values = c("orange", "purple", "orange", "purple", "gray50")) +
scale_x_continuous(breaks = seq(0,1,0.2), labels = seq(0,1,0.2), limits = c(0,1)) +
guides(model = "legend", alpha = FALSE) +
# xlim(c(0,1)) +
ylim(c(0,y_lim)) +
ylab("Posterior Density") +
xlab("p")+
theme_bw() +
theme(legend.position = c(0, 1),
legend.justification = c(0, 1),
legend.background = element_rect(colour = "black", fill = "white", size = 0.2),
legend.title=element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black", size = 0.2))
gg_animate(p3, file = 'beta_dist1.gif', interval = 0.075, ani.width=400, ani.height=400, title_sim_dist = FALSE)
@tatakof
Copy link

tatakof commented Jun 12, 2020

Hi Robert, this code is using the old API. Would you mind uploading a new one?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment