Last active
April 7, 2023 14:50
-
-
Save rabutler/bd97a6f49db87860f987156842fd4ee5 to your computer and use it in GitHub Desktop.
Use custom percentiles for whiskers in `stat_boxplot`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# modified from https://github.com/tidyverse/ggplot2/blob/master/R/stat-boxplot.r | |
# now takes qs argument instead of coef to extend the whiskers to a specific | |
# percentile | |
library(ggplot2) | |
stat_boxplot_custom <- function(mapping = NULL, data = NULL, | |
geom = "boxplot", position = "dodge", | |
..., | |
qs = c(.05, .25, 0.5, 0.75, 0.95), | |
na.rm = FALSE, | |
show.legend = NA, | |
inherit.aes = TRUE) { | |
layer( | |
data = data, | |
mapping = mapping, | |
stat = StatBoxplotCustom, | |
geom = geom, | |
position = position, | |
show.legend = show.legend, | |
inherit.aes = inherit.aes, | |
params = list( | |
na.rm = na.rm, | |
qs = qs, | |
... | |
) | |
) | |
} | |
StatBoxplotCustom <- ggproto("StatBoxplotCustom", Stat, | |
required_aes = c("x", "y"), | |
non_missing_aes = "weight", | |
setup_params = function(data, params) { | |
params$width <- ggplot2:::"%||%"(params$width, (resolution(data$x) * 0.75)) | |
if (is.double(data$x) && !ggplot2:::has_groups(data) && any(data$x != data$x[1L])) { | |
warning( | |
"Continuous x aesthetic -- did you forget aes(group=...)?", | |
call. = FALSE) | |
} | |
params | |
}, | |
compute_group = function(data, scales, width = NULL, na.rm = FALSE, qs = c(.05, .25, 0.5, 0.75, 0.95)) { | |
if (!is.null(data$weight)) { | |
mod <- quantreg::rq(y ~ 1, weights = weight, data = data, tau = qs) | |
stats <- as.numeric(stats::coef(mod)) | |
} else { | |
stats <- as.numeric(stats::quantile(data$y, qs)) | |
} | |
names(stats) <- c("ymin", "lower", "middle", "upper", "ymax") | |
iqr <- diff(stats[c(2, 4)]) | |
outliers <- (data$y < stats[1]) | (data$y > stats[5]) | |
#if (any(outliers)) { | |
# stats[c(1, 5)] <- range(c(stats[2:4], data$y[!outliers]), na.rm = TRUE) | |
#} | |
if (length(unique(data$x)) > 1) | |
width <- diff(range(data$x)) * 0.9 | |
df <- as.data.frame(as.list(stats)) | |
df$outliers <- list(data$y[outliers]) | |
if (is.null(data$weight)) { | |
n <- sum(!is.na(data$y)) | |
} else { | |
# Sum up weights for non-NA positions of y and weight | |
n <- sum(data$weight[!is.na(data$y) & !is.na(data$weight)]) | |
} | |
df$notchupper <- df$middle + 1.58 * iqr / sqrt(n) | |
df$notchlower <- df$middle - 1.58 * iqr / sqrt(n) | |
df$x <- if (is.factor(data$x)) data$x[1] else mean(range(data$x)) | |
df$width <- width | |
df$relvarwidth <- sqrt(n) | |
df | |
} | |
) |
I'm sorry that I didn't see your comment until now..not sure why. Yes, you're correct. In you're example the whiskers would extend to the 1st and 99th percentiles.
Can stat_boxplot_custom be combined with geom_boxplot in a ggplot? I want to use it for grouped boxplots and in combination with geom_dotplot.
I think so. I've used stat_boxplot_custom()
in place of geom_boxplot()
and it works in most code. You could also try geom_boxplot(stat = 'boxplot_custom')
.
I'm noticing that I'm getting a warning while using it with ggplot v3.4.1. I haven't updated this gist in quite some time, so not 100% sure it will work with newer versions of ggplot2.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks. Most helpful! Am I right that changing the call to stat_boxplot_custom() to have alternative qs values will drive the new qs into compute_group, e.g. stat_boxplot_custom*(... qs = c(.01, .25, 0.5, 0.75, 0.99)) would yield correct calculations and plots,