Skip to content

Instantly share code, notes, and snippets.

@cimentadaj
Created May 10, 2020 17:48
Show Gist options
  • Save cimentadaj/7ac382d0504c581de0a99ae51c6b26a8 to your computer and use it in GitHub Desktop.
Save cimentadaj/7ac382d0504c581de0a99ae51c6b26a8 to your computer and use it in GitHub Desktop.
library(rethinking)
data(reedfrogs)
d <- reedfrogs
d$tank <- 1:nrow(d)
dat <- list(
S = d$surv,
N = d$density,
tank = d$tank
)
m13.1 <- ulam(
alist(
S ~ dbinom(N, p),
logit(p) <- a[tank],
a[tank] ~ dnorm(0, 1.5)
),
data = dat, chains = 4, log_lik = TRUE
)
hist(round(inv_logit(precis(m13.1, depth = 2)[["mean"]]), 2))
m13.2 <- ulam(
alist(
S ~ dbinom(N, p),
logit(p) <- a[tank],
a[tank] ~ dnorm(a_bar, sigma),
a_bar ~ dnorm(0, 1.5),
sigma ~ dexp(1)
),
data = dat, chains = 4, log_lik = TRUE
)
hist(round(inv_logit(precis(m13.1, depth = 2)[["mean"]]), 2))
plot(NULL, xlim = c(-3, 4), ylim = c(0, 0.35))
for (i in 1:100) {
curve(dnorm(x, post$a_bar[i], post$sigma[i]),
add = TRUE,
col = col.alpha("black", 0.2))
}
sim_tanks <- logistic(rnorm(8000, post$a_bar, post$sigma))
dens(sim_tanks, lwd = 2, adj = 0.1)
mean(d$propsurv)
a_bar <- 1.5
sigma <- 1.5
nponds <- 60
Ni <- as.integer(rep(c(5,10,25,35), each = 15))
set.seed(5005)
a_pond <- rnorm(nponds, a_bar, sigma)
dsim <- data.frame(pond=1:nponds, Ni=Ni, true_a=a_pond)
dsim$Si <- rbinom(nponds, size = dsim$Ni, prob = logistic(dsim$true_a))
dsim$p_nopool <- with(dsim, Si / Ni)
m13.3 <- ulam(
alist(
Si ~ dbinom(Ni, p),
logit(p) <- a[pond],
a[pond] ~ dnorm(bar_a, sigma),
bar_a ~ dnorm(0, 1.5),
sigma ~ dexp(1)
),
data = dsim, chains = 4
)
post <- extract.samples(m13.3)
dsim$p_partpool <- round(logistic(apply(post$a, 2, mean)), 1)
data(chimpanzees)
d <- chimpanzees
d$treatment <- 1 + d$prosoc_left + 2*d$condition
dat <-
list(
pulled_left = d$pulled_left,
actor = d$actor,
block_id = d$block,
treatment = as.integer(d$treatment)
)
m13.4 <-
ulam(
alist(
pulled_left ~ dbinom(1, p),
logit(p) <- a[actor] + g[block_id] + b[treatment],
b[treatment] ~ dnorm(0, 0.5),
a[actor] ~ dnorm(a_alpha, sigma_a),
g[block_id] ~ dnorm(0, sigma_y),
a_alpha ~ dnorm(0, 1.5),
sigma_a ~ dexp(1),
sigma_y ~ dexp(1)
),
data = dat, chains = 4, cores = 6, log_lik = TRUE
)
precis(m13.4, depth = 2)
m13.6 <-
ulam(
alist(
pulled_left ~ dbinom(1, p),
logit(p) <- a[actor] + y[block_id] + b[treatment],
b[treatment] ~ dnorm(0, sigma_b),
a[actor] ~ dnorm(a_alpha, sigma_a),
y[block_id] ~ dnorm(0, sigma_y),
a_alpha ~ dnorm(0, 1.5),
sigma_a ~ dexp(1),
sigma_y ~ dexp(1),
sigma_b ~ dexp(1)
),
data = dat, chains = 4, cores = 6, log_lik = TRUE
)
precis(m13.6, depth = 2)
m13.7 <-
ulam(
alist(
v ~ normal(0,3),
x ~ normal(0, exp(v))
),
data = list(N = 1), chains = 4
)
precis(m13.7)
m13.8 <-
ulam(
alist(
v ~ normal(0,3),
z ~ normal(0, 1),
gq> real[1]: x <<- z*exp(v)
),
data = list(N = 1), chains = 4
)
precis(m13.8)
set.seed(13)
m13.4b <- ulam(m13.4,
chains = 4,
cores = 6,
control = list(adapt_delta = 0.99))
divergent(m13.4b)
m13.4c <-
ulam(
alist(
pulled_left ~ dbinom(1, p),
logit(p) <-
a_bar + z[actor] * sigma_a + # actor intercepts
x[block_id] * sigma_g + # block intercepts
b[treatment],
b[treatment] ~ dnorm(0, 0.5),
z[actor] ~ dnorm(0, 1),
x[block_id] ~ dnorm(0, 1),
a_bar ~ dnorm(0, 1.5),
sigma_a ~ dexp(1),
sigma_g ~ dexp(1),
gq> vector[actor]:a <<- a_bar + z * sigma_a,
gq> vector[block_id]:g <<- x * sigma_g
),
data = dat,
chains = 4,
cores = 6,
log_lik = TRUE
)
precis(m13.4, depth = 2)
coeftab(m13.4, m13.4c)
chimp <- 2
d_pred <-
expand.grid(
actor = 2,
treatment = 1:4,
block_id = 1
)
p <- link(m13.4, data = d_pred)
p_mu <- apply(p, 2, mean)
p_ci <- apply(p, 2, PI)
data(bangladesh)
d <- bangladesh
d$district_id <- as.integer(as.factor(d$district))
d$use_contraception <- d$use.contraception
m1 <-
ulam(
alist(
use_contraception ~ dbinom(1, p),
logit(p) <- d[district_id],
d[district_id] ~ dnorm(0, 1.5)
),
data = d[c("use_contraception", "district_id")],
chains = 4,
cores = 6
)
m2 <-
ulam(
alist(
use_contraception ~ dbinom(1, p),
logit(p) <- d[district_id],
d[district_id] ~ dnorm(bar_d, sigma_d),
bar_d ~ dnorm(0, 1.5),
sigma_d ~ dexp(1)
),
data = d[c("use_contraception", "district_id")],
chains = 4,
cores = 6
)
postm1 <- extract.samples(m1)
mm1 <- logistic(apply(postm1$d, 2, mean))
pim1 <- logistic(apply(postm1$d, 2, PI))
postm2 <- extract.samples(m2)
mm2 <- logistic(apply(postm2$d, 2, mean))
pim2 <- logistic(apply(postm2$d, 2, PI))
res <-
data.frame(
district_id = 1:60,
model_fe = mm1,
model_re = mm2
)
library(ggplot2)
library(tidyr)
res_1 <-
res %>%
mutate(diff = abs(model_re - model_fe)) %>%
left_join(count(d, district_id))
res_1 %>%
pivot_longer(starts_with("model")) %>%
ggplot(aes(district_id, value, color = name)) +
geom_point() +
geom_line(aes(group = district_id), color = "black") +
geom_hline(yintercept = mean(res_1$model_re)) +
theme_bw()
res_2 <-
res_1 %>%
filter(n > 40)
res_3 <-
res_1 %>%
filter(n < 20)
res_1 %>%
ggplot(aes(district_id, diff)) +
geom_point() +
geom_point(data = res_2, color = "green") +
geom_point(data = res_3, color = "red") +
theme_bw()
## The most extreme points are explained by their low sample
## size and the green dots show how the big sample sizes
## show the little differences
## However, we find that there are red dots (low sample size)
## that have either very big differeces or very little.
res_4 <-
d %>%
semi_join(res_3) %>%
group_by(district_id) %>%
summarize(avg = mean(use.contraception)) %>%
left_join(res_3) %>%
arrange(n)
## I get the impression that the places where there are smaller
## differences between re and fe even when low n is because the
## average probability of contraception was already closer to
## .5, more reasonable value. For example, the lower n districts
## have a median probability of 10% less contraception
## then the higher n districts from this limited sample.
## The closer the district is to the average, the less the prior nudges
## the district to the overall mean
res_4 %>%
mutate(low_high = ifelse(n < 14, 1, 0)) %>%
group_by(low_high) %>%
summarize(avg = median(avg))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment