Created
August 11, 2022 16:43
-
-
Save kjhealy/446868cc4afa1e9a190c6c789660d790 to your computer and use it in GitHub Desktop.
Using position_nudge() to make the comparison to a background distribution a little nicer.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Using position_nudge() to make showing a background | |
## distribution a little nicer, by pushing it very slightly | |
## to the right. Most of the work is getting the penguins | |
## data to the point where I can demonstrate position_nudge() | |
library(tidyverse) | |
library(palmerpenguins) | |
# Classify every penguin's flipper length into | |
# bins with widths of 10mm; then sum up | |
# the n in each bin and calculate a proportion. | |
df_all <- penguins |> | |
mutate(flip_f = cut_width(flipper_length_mm, | |
width = 10, | |
boundary = 10)) |> | |
group_by(flip_f) |> | |
summarize(all_n = n()) |> | |
mutate(all_prop = all_n/sum(all_n)) |> | |
drop_na() | |
# Do the same again (every individual penguin classified | |
# into the 10mm bins), but now calculate the | |
# sums and proportions within species. Ungroup and | |
# complete to restore any zero-count category rows within | |
# species | |
df_species <- penguins |> | |
mutate(flip_f = cut_width(flipper_length_mm, | |
width = 10, | |
boundary = 10)) |> | |
group_by(species, flip_f) |> | |
summarize(species_n = n()) |> | |
ungroup() |> | |
complete(species, flip_f, | |
fill = list(species_n = 0)) |> | |
group_by(species) |> | |
mutate(species_prop = species_n/sum(species_n)) | |
# Join the two tables | |
df <- left_join(df_species, df_all, by = "flip_f") | |
# Now we have a binned measure of flipper width, | |
# with proportions calculated by Species and also | |
# overall, using the same bins. After the join, | |
# the values for all species are repeated within | |
# every species. This means that when we draw it | |
# below and facet by species, it just repeats the | |
# same in every panel. | |
df | |
# Put the overall distribution in the background, facet on | |
# species, and nudge the overall one a tiny bit to make the | |
# comparison look nicer. | |
df |> | |
drop_na() |> | |
ggplot() + | |
# Add the background layer, nudged with position_nudge() | |
geom_col(mapping = aes(x = flip_f, y = all_prop), | |
color = "black", size = 0.1, fill = "gray50", | |
alpha = 0.7, position = position_nudge(x = 0.05)) + | |
# Then add the species layer we'll also end up faceting by | |
geom_col(mapping = aes(x = flip_f, y = species_prop, | |
fill = species), | |
color = "black", size = 0.1, | |
alpha = 0.8) + | |
ggokabeito::scale_fill_okabe_ito() + | |
scale_y_continuous(labels = scales::label_percent()) + | |
guides(fill = "none") + | |
facet_wrap(~ species, ncol = 1) + | |
labs(x = "Flipper width in mm", y = "Percent of Penguins", | |
title = "Flipper Width Distribution by Species", | |
subtitle = "Distribution for all penguins shown in grey", | |
caption = "Data: palmerpenguins. Graph: Kieran Healy / @kjhealy.", | |
fill = "Species") + | |
theme(axis.text.x = element_text(size = rel(0.6))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment