Skip to content

Instantly share code, notes, and snippets.

View monogenea's full-sized avatar

Francisco Lima monogenea

View GitHub Profile
# Sentiment analysis
tknDct <- tokens_lookup(tkn, dictionary = data_dictionary_LSD2015)
saDfm <- dfm(tknDct,
remove = stopwords("en"),
stem = T)
summ <- do.call("rbind", by(convert(saDfm, to="data.frame")[,-1],
INDICES = date(tweetReduced$created_at),
FUN = colSums))
rangeP <- seq(0, 1, length.out = 100)
plot(rangeP, dbinom(x = 8, prob = rangeP, size = 10),
type = "l", xlab = "P(Black)", ylab = "Density")
lines(rangeP, dnorm(x = rangeP, mean = .5, sd = .1) / 15,
col = "red")
lik <- dbinom(x = 8, prob = rangeP, size = 10)
prior <- dnorm(x = rangeP, mean = .5, sd = .1)
lines(rangeP, lik * prior, col = "green")
unstdPost <- lik * prior
stdPost <- unstdPost / sum(unstdPost)
lines(rangeP, stdPost, col = "blue")
legend("topleft", legend = c("Lik", "Prior", "Unstd Post", "Post"),
text.col = 1:4, bty = "n")
# Define real pars mu and sigma, sample 100x
trueMu <- 5
trueSig <- 2
set.seed(100)
randomSample <- rnorm(100, trueMu, trueSig)
# Grid approximation, mu %in% [0, 10] and sigma %in% [1, 3]
grid <- expand.grid(mu = seq(0, 10, length.out = 200),
sigma = seq(1, 3, length.out = 200))
(allTabs <- excel_sheets("data.xlsx")) # list tabs
# Read female reproductive output
fro <- read_xlsx("data.xlsx", sheet = allTabs[2])
# Assess missingness
sum(complete.cases(fro)) / nrow(fro)
# only 0.57 complete records; which vars have at least one NA?
names(which(apply(fro, 2, function(x){any(is.na(x))})))
eggsFMod <- map2stan(alist(
Eggs_fledged ~ dzipois(p, lambda),
logit(p) <- ap,
log(lambda) <- a + a_fem[female_id] + a_year[year_id] + a_group[group_id] +
Parasite*bP + Min_age_Z*bA + Group_size_Z*bGS + Mean_eggsize_Z*bES +
Parasite*Min_age_Z*bPA,
Group_size_Z ~ dnorm(0, 3),
Mean_eggsize_Z ~ dnorm(0, 3),
a_fem[female_id] ~ dnorm(0, sigma1),
a_year[year_id] ~ dnorm(0, sigma2),
# Sample posterior
post <- extract.samples(eggsFMod)
# PI of P(no clutch at all)
dens(logistic(post$ap), show.HPDI = T, xlab = "ZIP Bernoulli(p)")
# Run simulations w/ averages of all predictors, except parasite 0 / 1
lambdaNoP <- exp(post$a + 0*post$bP + 0*post$bA +
0*post$bGS + 0*post$bES + 0*0*post$bPA)
simFledgeNoPar <- rpois(n = length(lambdaNoP), lambda = lambdaNoP)
# Try Eggs_laid ~ dpois
froReduced <- slice(fro, which(!is.na(Eggs_laid))) %>%
as.data.frame()
# Re-do the variable scaling, otherwise the sampling throws an error
froReduced %<>% mutate(female_id = as.integer(factor(Female_ID_coded)),
year_id = as.integer(factor(Year)),
group_id = as.integer(factor(Group_ID_coded)),
Min_age_Z = scale(Min_age),
Group_size_Z = scale(Group_size),