Skip to content

Instantly share code, notes, and snippets.

@twolodzko
Last active September 1, 2017 21:35
Show Gist options
  • Save twolodzko/4f6d7e8dbcabc8c9e67b061eff509c0d to your computer and use it in GitHub Desktop.
Save twolodzko/4f6d7e8dbcabc8c9e67b061eff509c0d to your computer and use it in GitHub Desktop.
library(extraDistr)
bins <- function(x, n = 512L) {
x <- x[!is.na(x)]
h <- diff(range(x))/(n-4)
mids <- seq(min(x)-2*h, max(x)+2*h, length.out = n)
breaks <- c(mids[1L] - h, mids + h)
counts <- unname(tapply(x, cut(x, breaks), length,
default = 0L))
list(
mids = mids,
breaks = breaks,
counts = counts
)
}
#' Bayesian kernel density estimation
#'
#' @param x numeric vector
#' @param bw the smoothing bandwidth to be used
#' @param m size of the grid used for calculating density
#' @param nsim number of simulated draws
#' @param n the number of equally spaced points at which the density is to be estimated
#' @param alpha confidence level of the interval
#' @param na.rm logical; if \code{TRUE}, missing values are removed from \code{x}.
#' If \code{FALSE} any missing values cause an error.
#'
#' @examples
#'
#' x <- c(rnorm(150, 20), rnorm(200, 15))
#'
#' (fit <- bayesDensity(x))
#' plot(fit)
#' lines(density(x), col = "red", lty = 2)
#' rug(x, lwd = 2)
#'
#' x <- mtcars$mpg
#'
#' plot(bayesDensity(x))
#' lines(density(x), col = "red", lty = 2)
#' rug(x, lwd = 2)
#'
#' @references
#' Sibisi, S., & Skilling, J. (1996).
#' Bayesian Density Estimation.
#' In Maximum Entropy and Bayesian Methods (pp. 189-198). Springer.
#'
#' @importFrom extraDistr rdirichlet
#' @importFrom stats dnorm
#'
#' @export
bayesDensity <- function(x, bw = bw.nrd(x), m = 512, nsim = 5000,
n = 512, alpha = 0.95, na.rm = TRUE) {
call <- match.call()
xnam <- as.character(deparse(substitute(x)))
N <- length(x)
x.na <- is.na(x)
if (any(x.na)) {
if (na.rm)
x <- x[!x.na]
else stop("'x' contains missing values")
}
x.finite <- is.finite(x)
if (any(!x.finite))
x <- x[x.finite]
bs <- bins(x, m)
grid <- seq(min(x)-4*bw, max(x)+4*bw, length.out = n)
K <- outer(grid, bs$mids, function(y, mu) dnorm(y, mu, bw))
phi <- rdirichlet(nsim, as.numeric(1/m + bs$counts))
sim <- phi %*% t(K)
quant <- apply(sim, 2, quantile, c((1-alpha)/2, 0.5, 1-(1-alpha)/2))
mean <- colMeans(sim)
sd <- apply(sim, 2, sd)
structure(
list(
x = grid,
y = mean,
low = quant[1, ],
high = quant[3, ],
median = quant[2, ],
mean = mean,
sd = sd,
m = m,
n = N,
bw = bw,
nsim = nsim,
alpha = alpha,
call = call,
data.name = xnam,
hist = bins,
has.na = FALSE
), class = c("bayesDensity", "density")
)
}
plot.bayesDensity <- function(x, main = NULL, xlab = NULL,
ylab = "Density", type = "l",
zero.line = TRUE, ...) {
if (is.null(xlab))
xlab <- paste("N =", x$n, " Bandwidth =", formatC(x$bw))
if (is.null(main))
main <- deparse(x$call)
plot.default(x, main = main, xlab = xlab, ylab = ylab, type = type,
ylim = c(0, max(x$high)), ...)
if (zero.line)
abline(h = 0, lwd = 0.1, col = "gray")
lines(x$x, x$low, col = "gray", lty = 2)
lines(x$x, x$high, col = "gray", lty = 2)
invisible(NULL)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment