Created
December 11, 2021 23:29
-
-
Save bbolker/945e684dc148239364ab0f99c9a1beed to your computer and use it in GitHub Desktop.
various machinery for binned plots
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2); theme_set(theme_bw()) | |
library(dplyr) | |
library(broom) | |
#' Summarize binary data à la arm::binnedplot | |
#' @param x continuous predictor variable | |
#' @param y 0/1 response variable | |
#' @param nclass number of categories for binning | |
#' @param cifun function for extracting confidence intervals | |
#' @return a data frame with columns "xbar" (centerpoint of bin), | |
#' "prop" (proportion of successes), "nval" (number of samples), | |
#' "lwr", "upr" (95 pct confidence intervals from \code{cifun}) | |
bin_vals <- function(x,y,nclass=NULL,cifun=prop.test) { | |
n <- length(x) | |
if (is.null(nclass)) { | |
## following arm::binnedplot | |
nclass <- if (n<10) floor(n/2) else | |
if (n<100) 10 else | |
floor(sqrt(n)) | |
} | |
sumfun <- function(d){ | |
nval <- nrow(d) | |
npos <- sum(d$y) | |
prop <- mean(d$y) | |
xbar <- mean(d$x) | |
## cat(npos,nval,"\n") | |
bi <- cifun(x=npos,n=nval)$conf.int | |
data.frame(xbar, prop, nval, lwr = bi[1], upr = bi[2]) | |
} | |
cc <- ggplot2::cut_number(x,nclass) | |
dd <- data.frame(x,y) | |
## the same as plyr::ddply (but avoid package dependence) | |
## could use tidyverse instead | |
ss <- do.call(rbind, | |
lapply(split(dd,cc),sumfun)) | |
return(ss) | |
} | |
## can't decide whether binom.test or prop.test is a better default | |
prop_cl_binom <- function(x, ...) { | |
bb <- binom.test(sum(x), length(x)) | |
data.frame(y = mean(x), ymin = bb$conf.int[1], ymax = bb$conf.int[2]) | |
} | |
prop_size_binom <- function(x, ...) { | |
data.frame(y = mean(x), size = sum(x)) | |
} | |
## example: generate data (boring) | |
set.seed(101) | |
x <- runif(1000) | |
y <- rbinom(1000,prob=0.3,size=1) | |
## get binned data | |
b <- bin_vals(x,y) | |
## plot raw data + smoothing line + binned data | |
dd <- data.frame(x,y) | |
ggplot(dd,aes(x,y)) + | |
geom_point() + | |
geom_smooth(method="gam", | |
method.args=list(family=binomial))+ | |
stat_summary_bin(fun.data = prop_cl_binom) | |
if (requireNamespace("mlmRev")) { | |
data("Contraception", package = "mlmRev") | |
m <- glm(use ~ livch + age + urban, family = binomial, data = Contraception) | |
aa <- broom::augment(m) | |
gg1 <- ggplot(aa, aes(.fitted, .resid)) + | |
stat_summary_bin(fun.data = mean_cl_boot) + | |
geom_hline(yintercept = 0, lty = 2) + | |
geom_smooth() | |
print(gg1) | |
## re-bin by rank | |
gg1 + aes(x = rank(.fitted)) | |
## with a little more effort, figure out how to make *uneven* bins? | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment