Skip to content

Instantly share code, notes, and snippets.

@bbolker
Created December 11, 2021 23:29
Show Gist options
  • Save bbolker/945e684dc148239364ab0f99c9a1beed to your computer and use it in GitHub Desktop.
Save bbolker/945e684dc148239364ab0f99c9a1beed to your computer and use it in GitHub Desktop.
various machinery for binned plots
library(ggplot2); theme_set(theme_bw())
library(dplyr)
library(broom)
#' Summarize binary data à la arm::binnedplot
#' @param x continuous predictor variable
#' @param y 0/1 response variable
#' @param nclass number of categories for binning
#' @param cifun function for extracting confidence intervals
#' @return a data frame with columns "xbar" (centerpoint of bin),
#' "prop" (proportion of successes), "nval" (number of samples),
#' "lwr", "upr" (95 pct confidence intervals from \code{cifun})
bin_vals <- function(x,y,nclass=NULL,cifun=prop.test) {
n <- length(x)
if (is.null(nclass)) {
## following arm::binnedplot
nclass <- if (n<10) floor(n/2) else
if (n<100) 10 else
floor(sqrt(n))
}
sumfun <- function(d){
nval <- nrow(d)
npos <- sum(d$y)
prop <- mean(d$y)
xbar <- mean(d$x)
## cat(npos,nval,"\n")
bi <- cifun(x=npos,n=nval)$conf.int
data.frame(xbar, prop, nval, lwr = bi[1], upr = bi[2])
}
cc <- ggplot2::cut_number(x,nclass)
dd <- data.frame(x,y)
## the same as plyr::ddply (but avoid package dependence)
## could use tidyverse instead
ss <- do.call(rbind,
lapply(split(dd,cc),sumfun))
return(ss)
}
## can't decide whether binom.test or prop.test is a better default
prop_cl_binom <- function(x, ...) {
bb <- binom.test(sum(x), length(x))
data.frame(y = mean(x), ymin = bb$conf.int[1], ymax = bb$conf.int[2])
}
prop_size_binom <- function(x, ...) {
data.frame(y = mean(x), size = sum(x))
}
## example: generate data (boring)
set.seed(101)
x <- runif(1000)
y <- rbinom(1000,prob=0.3,size=1)
## get binned data
b <- bin_vals(x,y)
## plot raw data + smoothing line + binned data
dd <- data.frame(x,y)
ggplot(dd,aes(x,y)) +
geom_point() +
geom_smooth(method="gam",
method.args=list(family=binomial))+
stat_summary_bin(fun.data = prop_cl_binom)
if (requireNamespace("mlmRev")) {
data("Contraception", package = "mlmRev")
m <- glm(use ~ livch + age + urban, family = binomial, data = Contraception)
aa <- broom::augment(m)
gg1 <- ggplot(aa, aes(.fitted, .resid)) +
stat_summary_bin(fun.data = mean_cl_boot) +
geom_hline(yintercept = 0, lty = 2) +
geom_smooth()
print(gg1)
## re-bin by rank
gg1 + aes(x = rank(.fitted))
## with a little more effort, figure out how to make *uneven* bins?
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment