Skip to content

Instantly share code, notes, and snippets.

@expersso
Created February 24, 2019 08:59
Show Gist options
  • Save expersso/804a1e9ffe5cee51dedfece5a2b5fe88 to your computer and use it in GitHub Desktop.
Save expersso/804a1e9ffe5cee51dedfece5a2b5fe88 to your computer and use it in GitHub Desktop.
Simulating DGP assumed by LDA
library(tidyverse)
library(gtools)
library(topicmodels)
library(tidytext)
rcat <- function(n, x, p) {
sample(x, size = n, replace = TRUE, prob = p)
}
generate_document <- function(z, k, w, beta) {
w <- map(seq_len(k), ~rcat(n = z[.], x = w, beta[[.]]))
flatten_chr(w)
}
generate_corpus <- function(M, k, lambda, alpha, w, beta) {
tibble(
d = seq_len(M),
N = rpois(M, lambda),
t = map(d, ~rdirichlet(1, alpha)[1, ]),
z = map2(N, t, ~rmultinom(1, .x, .y)[, 1]),
w = map(z, ~generate_document(., k, w, beta))
)
}
ps <- list(
M = 20,
k = 3,
lambda = 300,
alpha = c(.5, .1, .1),
beta = tribble(
~w, ~z1, ~z2, ~z3,
"1_HICP", .9, .0, .0,
"2_GDP", .1, .9, .0,
"3_NPLs", .0, .1, .9,
"4_bank", .0, .0, .1
)
)
df <- generate_corpus(ps$M, ps$k, ps$lambda, ps$alpha, ps$beta$w, ps$beta[, -1])
dtm <- df %>%
unnest(w) %>%
count(d, w) %>%
cast_dtm(d, w, n)
lda <- LDA(dtm, k)
tidy(lda, matrix = "gamma") %>% spread(topic, gamma)
tidy(lda, matrix = "gamma") %>% spread(topic, gamma)
tidy(lda, matrix = "gamma") %>%
group_by(topic) %>%
summarise(m = mean(gamma))
posterior(lda)
topics(lda, k)
terms(lda, k)
newdata <- tibble(d = 1, w = w, n = c(15, 0, 5, 1)) %>% cast_dtm(d, w, n)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment