Created
May 10, 2020 17:48
-
-
Save cimentadaj/7ac382d0504c581de0a99ae51c6b26a8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rethinking) | |
data(reedfrogs) | |
d <- reedfrogs | |
d$tank <- 1:nrow(d) | |
dat <- list( | |
S = d$surv, | |
N = d$density, | |
tank = d$tank | |
) | |
m13.1 <- ulam( | |
alist( | |
S ~ dbinom(N, p), | |
logit(p) <- a[tank], | |
a[tank] ~ dnorm(0, 1.5) | |
), | |
data = dat, chains = 4, log_lik = TRUE | |
) | |
hist(round(inv_logit(precis(m13.1, depth = 2)[["mean"]]), 2)) | |
m13.2 <- ulam( | |
alist( | |
S ~ dbinom(N, p), | |
logit(p) <- a[tank], | |
a[tank] ~ dnorm(a_bar, sigma), | |
a_bar ~ dnorm(0, 1.5), | |
sigma ~ dexp(1) | |
), | |
data = dat, chains = 4, log_lik = TRUE | |
) | |
hist(round(inv_logit(precis(m13.1, depth = 2)[["mean"]]), 2)) | |
plot(NULL, xlim = c(-3, 4), ylim = c(0, 0.35)) | |
for (i in 1:100) { | |
curve(dnorm(x, post$a_bar[i], post$sigma[i]), | |
add = TRUE, | |
col = col.alpha("black", 0.2)) | |
} | |
sim_tanks <- logistic(rnorm(8000, post$a_bar, post$sigma)) | |
dens(sim_tanks, lwd = 2, adj = 0.1) | |
mean(d$propsurv) | |
a_bar <- 1.5 | |
sigma <- 1.5 | |
nponds <- 60 | |
Ni <- as.integer(rep(c(5,10,25,35), each = 15)) | |
set.seed(5005) | |
a_pond <- rnorm(nponds, a_bar, sigma) | |
dsim <- data.frame(pond=1:nponds, Ni=Ni, true_a=a_pond) | |
dsim$Si <- rbinom(nponds, size = dsim$Ni, prob = logistic(dsim$true_a)) | |
dsim$p_nopool <- with(dsim, Si / Ni) | |
m13.3 <- ulam( | |
alist( | |
Si ~ dbinom(Ni, p), | |
logit(p) <- a[pond], | |
a[pond] ~ dnorm(bar_a, sigma), | |
bar_a ~ dnorm(0, 1.5), | |
sigma ~ dexp(1) | |
), | |
data = dsim, chains = 4 | |
) | |
post <- extract.samples(m13.3) | |
dsim$p_partpool <- round(logistic(apply(post$a, 2, mean)), 1) | |
data(chimpanzees) | |
d <- chimpanzees | |
d$treatment <- 1 + d$prosoc_left + 2*d$condition | |
dat <- | |
list( | |
pulled_left = d$pulled_left, | |
actor = d$actor, | |
block_id = d$block, | |
treatment = as.integer(d$treatment) | |
) | |
m13.4 <- | |
ulam( | |
alist( | |
pulled_left ~ dbinom(1, p), | |
logit(p) <- a[actor] + g[block_id] + b[treatment], | |
b[treatment] ~ dnorm(0, 0.5), | |
a[actor] ~ dnorm(a_alpha, sigma_a), | |
g[block_id] ~ dnorm(0, sigma_y), | |
a_alpha ~ dnorm(0, 1.5), | |
sigma_a ~ dexp(1), | |
sigma_y ~ dexp(1) | |
), | |
data = dat, chains = 4, cores = 6, log_lik = TRUE | |
) | |
precis(m13.4, depth = 2) | |
m13.6 <- | |
ulam( | |
alist( | |
pulled_left ~ dbinom(1, p), | |
logit(p) <- a[actor] + y[block_id] + b[treatment], | |
b[treatment] ~ dnorm(0, sigma_b), | |
a[actor] ~ dnorm(a_alpha, sigma_a), | |
y[block_id] ~ dnorm(0, sigma_y), | |
a_alpha ~ dnorm(0, 1.5), | |
sigma_a ~ dexp(1), | |
sigma_y ~ dexp(1), | |
sigma_b ~ dexp(1) | |
), | |
data = dat, chains = 4, cores = 6, log_lik = TRUE | |
) | |
precis(m13.6, depth = 2) | |
m13.7 <- | |
ulam( | |
alist( | |
v ~ normal(0,3), | |
x ~ normal(0, exp(v)) | |
), | |
data = list(N = 1), chains = 4 | |
) | |
precis(m13.7) | |
m13.8 <- | |
ulam( | |
alist( | |
v ~ normal(0,3), | |
z ~ normal(0, 1), | |
gq> real[1]: x <<- z*exp(v) | |
), | |
data = list(N = 1), chains = 4 | |
) | |
precis(m13.8) | |
set.seed(13) | |
m13.4b <- ulam(m13.4, | |
chains = 4, | |
cores = 6, | |
control = list(adapt_delta = 0.99)) | |
divergent(m13.4b) | |
m13.4c <- | |
ulam( | |
alist( | |
pulled_left ~ dbinom(1, p), | |
logit(p) <- | |
a_bar + z[actor] * sigma_a + # actor intercepts | |
x[block_id] * sigma_g + # block intercepts | |
b[treatment], | |
b[treatment] ~ dnorm(0, 0.5), | |
z[actor] ~ dnorm(0, 1), | |
x[block_id] ~ dnorm(0, 1), | |
a_bar ~ dnorm(0, 1.5), | |
sigma_a ~ dexp(1), | |
sigma_g ~ dexp(1), | |
gq> vector[actor]:a <<- a_bar + z * sigma_a, | |
gq> vector[block_id]:g <<- x * sigma_g | |
), | |
data = dat, | |
chains = 4, | |
cores = 6, | |
log_lik = TRUE | |
) | |
precis(m13.4, depth = 2) | |
coeftab(m13.4, m13.4c) | |
chimp <- 2 | |
d_pred <- | |
expand.grid( | |
actor = 2, | |
treatment = 1:4, | |
block_id = 1 | |
) | |
p <- link(m13.4, data = d_pred) | |
p_mu <- apply(p, 2, mean) | |
p_ci <- apply(p, 2, PI) | |
data(bangladesh) | |
d <- bangladesh | |
d$district_id <- as.integer(as.factor(d$district)) | |
d$use_contraception <- d$use.contraception | |
m1 <- | |
ulam( | |
alist( | |
use_contraception ~ dbinom(1, p), | |
logit(p) <- d[district_id], | |
d[district_id] ~ dnorm(0, 1.5) | |
), | |
data = d[c("use_contraception", "district_id")], | |
chains = 4, | |
cores = 6 | |
) | |
m2 <- | |
ulam( | |
alist( | |
use_contraception ~ dbinom(1, p), | |
logit(p) <- d[district_id], | |
d[district_id] ~ dnorm(bar_d, sigma_d), | |
bar_d ~ dnorm(0, 1.5), | |
sigma_d ~ dexp(1) | |
), | |
data = d[c("use_contraception", "district_id")], | |
chains = 4, | |
cores = 6 | |
) | |
postm1 <- extract.samples(m1) | |
mm1 <- logistic(apply(postm1$d, 2, mean)) | |
pim1 <- logistic(apply(postm1$d, 2, PI)) | |
postm2 <- extract.samples(m2) | |
mm2 <- logistic(apply(postm2$d, 2, mean)) | |
pim2 <- logistic(apply(postm2$d, 2, PI)) | |
res <- | |
data.frame( | |
district_id = 1:60, | |
model_fe = mm1, | |
model_re = mm2 | |
) | |
library(ggplot2) | |
library(tidyr) | |
res_1 <- | |
res %>% | |
mutate(diff = abs(model_re - model_fe)) %>% | |
left_join(count(d, district_id)) | |
res_1 %>% | |
pivot_longer(starts_with("model")) %>% | |
ggplot(aes(district_id, value, color = name)) + | |
geom_point() + | |
geom_line(aes(group = district_id), color = "black") + | |
geom_hline(yintercept = mean(res_1$model_re)) + | |
theme_bw() | |
res_2 <- | |
res_1 %>% | |
filter(n > 40) | |
res_3 <- | |
res_1 %>% | |
filter(n < 20) | |
res_1 %>% | |
ggplot(aes(district_id, diff)) + | |
geom_point() + | |
geom_point(data = res_2, color = "green") + | |
geom_point(data = res_3, color = "red") + | |
theme_bw() | |
## The most extreme points are explained by their low sample | |
## size and the green dots show how the big sample sizes | |
## show the little differences | |
## However, we find that there are red dots (low sample size) | |
## that have either very big differeces or very little. | |
res_4 <- | |
d %>% | |
semi_join(res_3) %>% | |
group_by(district_id) %>% | |
summarize(avg = mean(use.contraception)) %>% | |
left_join(res_3) %>% | |
arrange(n) | |
## I get the impression that the places where there are smaller | |
## differences between re and fe even when low n is because the | |
## average probability of contraception was already closer to | |
## .5, more reasonable value. For example, the lower n districts | |
## have a median probability of 10% less contraception | |
## then the higher n districts from this limited sample. | |
## The closer the district is to the average, the less the prior nudges | |
## the district to the overall mean | |
res_4 %>% | |
mutate(low_high = ifelse(n < 14, 1, 0)) %>% | |
group_by(low_high) %>% | |
summarize(avg = median(avg)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment