Skip to content

Instantly share code, notes, and snippets.

@eliocamp
Last active July 24, 2023 13:50
Show Gist options
  • Save eliocamp/c73ab8d2c87fc9a668ea88d04ad8ca20 to your computer and use it in GitHub Desktop.
Save eliocamp/c73ab8d2c87fc9a668ea88d04ad8ca20 to your computer and use it in GitHub Desktop.
Percentogram (histogram with bins of equal number of observations)
# This is now available into ggpercentogram.
# https://github.com/eliocamp/ggpercentogram/
StatQuantileBin <- ggplot2::ggproto("StatQuantileBin", ggplot2::StatBin,
default_aes = ggplot2::aes(x = ggplot2::after_stat(density), y = ggplot2::after_stat(density), weight = 1),
compute_group = function(data, scales,
binwidth = NULL, bins = 30, breaks = NULL, trim = 0,
closed = c("right", "left"), pad = FALSE,
flipped_aes = FALSE,
# The following arguments are not used, but must
# be listed so parameters are computed correctly
origin = NULL, right = NULL, drop = NULL,
width = NULL) {
x <- ggplot2::flipped_names(flipped_aes)$x
if (is.null(breaks)) { # If breaks is not provided, we need to compute them
if (!is.null(binwidth)) { # Either with binwidth
trim <- trim/2
quantiles <- seq(trim, 1 - trim, binwidth)
} else { # or the number of bins
quantiles <- seq(trim, 1 - trim, length.out = bins)
}
} else {
quantiles <- breaks
}
breaks <- quantile(data[[x]], quantiles)
keep <- !duplicated(breaks)
# If quantiles is too close, sometimes you get duplicates
breaks <- breaks[keep]
quantiles <- quantiles[keep]
bins <- ggplot2:::bin_breaks(breaks, closed)
bins <- ggplot2:::bin_vector(data[[x]], bins, weight = data$weight, pad = pad)
bins$quantile <- quantiles[-length(quantiles)]
bins$flipped_aes <- flipped_aes
ggplot2::flip_data(bins, flipped_aes)
}
)
@eliocamp
Copy link
Author

library(ggplot2)
ggplot(data.frame(y = rnorm(1e5)), aes(y)) +
  geom_histogram(stat = StatQuantileBin, 
                 aes(fill = after_stat(quantile)))
#> `stat_quantile_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data.frame(y = rcauchy(1e5)), aes(y)) +
  geom_histogram(stat = StatQuantileBin, trim = 0.02, 
                 aes(fill = after_stat(quantile)))
#> `stat_quantile_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(diamonds, aes(carat)) +
  geom_histogram(stat = StatQuantileBin, trim = 0.02, 
                 aes(fill = after_stat(quantile)), color = "black")
#> `stat_quantile_bin()` using `bins = 30`. Pick better value with `binwidth`.

Created on 2023-04-13 with reprex v2.0.2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment