Last active
April 21, 2019 03:57
-
-
Save eliardocosta/7c350eece9f09a7caa0f48852621063c to your computer and use it in GitHub Desktop.
Blocked gibbs sampling for Dirichlet process.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
n <- 50 | |
w <- 1 | |
set.seed(161017) | |
lam <- sample(c(5, 25, 50), n, replace = TRUE, prob = c(0.35, 0.3, 0.35)) # 3 clusters! | |
x <- rpois(n, w*lam) | |
hist(x, prob = TRUE, breaks = 15) | |
source("https://gist.githubusercontent.com/eliardocosta/7c350eece9f09a7caa0f48852621063c/raw/dd6eae6d9bb3101bdeeff0d9d087db91f9d7e74d/ppostPoiDP.R") | |
foo <- ppostPoiDP(x = x, w = w, phi = sqrt(mean(x)/var(x)), lam0 = mean(x), alpha = 1, nburn = 500, nsam = 1E3) | |
hist(foo$nclu) # histograma num. de clusters por iteracao | |
plot(foo$lam, foo$fdist) # distribuicao acumulada a posteriori |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ppostPoiDP.R | |
# | |
# INFERENCIA USANDO O ALGORITMO BLOCKED-GIBBS SAMPLING PARA O PROCESSO | |
# DIRICHLET (Ishwaran & Zarepour, 2000). | |
# | |
# Output - uma lista com 3 elementos: 'lam', 'fdist' e 'nclu' | |
# 1) lam: vetor com os valores do gride para o espaco parametrico, o espacamento do gride | |
# é definido pelo argumento 'cgrid' | |
# 2) fdist: probabilidade acumulada a posteriori para cada valor do gride no vetor 'lam' | |
# 3) nclu: vetor com o numero de clusters em cada iteracao. | |
ppostPoiDP <- function (x, w, phi, lam0, alpha, nburn, nsam, cgrid = 0.1, eps = 1E-1) { | |
nx <- length(x) # tamanho da amostra | |
N <- min(nx, ceiling(1 - alpha*log(eps/(4*nx)))) # num. de termos da representacao em soma do DP (stick-breaking) | |
lam <- rgamma(N, shape = phi, rate = phi/lam0) # valores iniciais para lambda | |
p <- rep(1/N, N) # prob's iniciais p/ cada termo da soma truncada | |
K <- numeric() # vetor indexador de cluster | |
pstar <- matrix(NA, nx, N) | |
nclu <- numeric() # vetor para armazenar num. de cluster em cada iteracao | |
for (r in 1:nburn) { # inicio do loop do burn-in | |
for(i in 1:nx) { | |
pstar[i, ] <- p*dpois(x[i], w*lam) | |
} | |
for(i in 1:nx) { | |
K[i] <- sample.int(N, size = 1, prob = pstar[i, ]) | |
} | |
Ktable <- as.data.frame(table(K), stringsAsFactors = FALSE) # tabela freq. dos K's | |
Kstar <- as.vector(Ktable[ ,1], mode = "numeric") # vetor com os K's unicos | |
nclu <- append(nclu, length(Kstar)) | |
m <- rep(0, N) | |
for (j in 1:length(Ktable[ ,2])) { | |
for (i in 1:N) { | |
if (i == Ktable[j, 1]) m[i] <- Ktable[j, 2] | |
} | |
} | |
D <- rgamma(length(m), shape = alpha/N + m, rate = 1) # gammas para gerar a Dirichlet | |
p <- D/sum(D) # atualizando vetor 'p' com uma Dirichlet | |
for (j in 1:length(Kstar)) { | |
set = numeric() | |
for (i in 1:nx) { | |
if (K[i] == Kstar[j]) set <- append(set, i) | |
} | |
lam[Kstar[j]] <- rgamma(1, shape = phi + sum(x[set]), | |
rate = length(set)*w + phi/lam0) | |
} | |
} # fim do loop do burn-in | |
# inicio da amostragem da posteroiri | |
pesos <- matrix(NA, nsam, N) # pesos a posteriori da soma truncada amostrados por iteracao | |
lam.post <- matrix(NA, nsam, N) # lambda's a posteriori amostrados por iteracao | |
for (r in 1:nsam) { # inicio do loop p/ amostras a posteriori | |
for(i in 1:nx) { | |
pstar[i, ] <- p*dpois(x[i], w*lam) | |
} | |
for(i in 1:nx) { | |
K[i] <- sample.int(N, size = 1, prob = pstar[i, ]) | |
} | |
Ktable <- as.data.frame(table(K), stringsAsFactors = FALSE) # tabela freq. dos K's | |
Kstar <- as.vector(Ktable[ ,1], mode = "numeric") # vetor com os K's unicos | |
m <- rep(0, N) | |
for (j in 1:length(Ktable[ ,2])) { | |
for (i in 1:N) { | |
if (i == Ktable[j, 1]) m[i] <- Ktable[j, 2] | |
} | |
} | |
D <- rgamma(length(m), shape = alpha/N + m, rate = 1) # gammas para gerar a Dirichlet | |
p <- D/sum(D) # atualizando vetor 'p' com uma Dirichlet | |
for (j in 1:length(Kstar)) { | |
set = numeric() | |
for (i in 1:nx) { | |
if (K[i] == Kstar[j]) set <- append(set, i) | |
} | |
lam[Kstar[j]] <- rgamma(1, shape = phi + sum(x[set]), | |
rate = length(set)*w + phi/lam0) | |
} | |
lam.post[r, ] <- lam | |
pesos[r, ] <- p | |
} # fim do loop p/ amostragem a posteriori | |
grid.val <- seq(0, ceiling(max(lam.post)), cgrid) # grid de valores que define a particao sobre possiveis valores de lambda | |
grid.val <- tail(grid.val, -1) | |
ppost.it <- matrix(NA, nsam, length(grid.val)) # prob. acumulada a posteriori em relacao aos valores do gride por iteracao | |
for (i in 1:nsam) { | |
for (j in 1:length(grid.val)) { | |
ppost.it[i, j] <- sum(p[which(lam.post[i, ] <= grid.val[j])]) | |
} | |
} | |
ppost <- apply(ppost.it, 2, mean) # estimativa da prob. acumulada a posterirori | |
return(list(lam = grid.val, fdist = ppost, nclu = nclu)) | |
} # FIM |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment