kjhealy/position_nudge.r

## position_nudge.r
## Using position_nudge() to make showing a background
## distribution a little nicer, by pushing it very slightly
## to the right. Most of the work is getting the penguins
## data to the point where I can demonstrate position_nudge()

library(tidyverse)
library(palmerpenguins)

# Classify every penguin's flipper length into
# bins with widths of 10mm; then sum up
# the n in each bin and calculate a proportion.
df_all <- penguins |>
  mutate(flip_f = cut_width(flipper_length_mm,
                           width = 10,
                           boundary = 10)) |>
  group_by(flip_f) |>
  summarize(all_n = n()) |>
  mutate(all_prop = all_n/sum(all_n)) |>
  drop_na()

# Do the same again (every individual penguin classified
# into the 10mm bins), but now calculate the
# sums and proportions within species. Ungroup and
# complete to restore any zero-count category rows within
# species
df_species <- penguins |>
  mutate(flip_f = cut_width(flipper_length_mm,
                           width = 10,
                           boundary = 10)) |>
  group_by(species, flip_f) |>
  summarize(species_n = n()) |>
  ungroup() |>
  complete(species, flip_f,
           fill = list(species_n = 0)) |>
  group_by(species) |>
  mutate(species_prop = species_n/sum(species_n))

# Join the two tables
df <- left_join(df_species, df_all, by = "flip_f")


# Now we have a binned measure of flipper width,
# with proportions calculated by Species and also
# overall, using the same bins. After the join,
# the values for all species are repeated within
# every species. This means that when we draw it
# below and facet by species, it just repeats the
# same in every panel.

df

# Put the overall distribution in the background, facet on
# species, and nudge the overall one a tiny bit to make the
# comparison look nicer.
df |>
  drop_na() |>
  ggplot() +
  # Add the background layer, nudged with position_nudge()
  geom_col(mapping = aes(x = flip_f, y = all_prop),
           color = "black", size = 0.1, fill = "gray50",
           alpha = 0.7, position = position_nudge(x = 0.05)) +
  # Then add the species layer we'll also end up faceting by
  geom_col(mapping = aes(x = flip_f, y = species_prop,
                         fill = species),
           color = "black", size = 0.1,
           alpha = 0.8) +
  ggokabeito::scale_fill_okabe_ito() +
  scale_y_continuous(labels = scales::label_percent()) +
  guides(fill = "none") +
  facet_wrap(~ species, ncol = 1) +
  labs(x = "Flipper width in mm", y = "Percent of Penguins",
       title = "Flipper Width Distribution by Species",
       subtitle = "Distribution for all penguins shown in grey",
       caption = "Data: palmerpenguins. Graph: Kieran Healy / @kjhealy.",
       fill = "Species") +
  theme(axis.text.x = element_text(size = rel(0.6)))
	## Using position_nudge() to make showing a background
	## distribution a little nicer, by pushing it very slightly
	## to the right. Most of the work is getting the penguins
	## data to the point where I can demonstrate position_nudge()

	library(tidyverse)
	library(palmerpenguins)

	# Classify every penguin's flipper length into
	# bins with widths of 10mm; then sum up
	# the n in each bin and calculate a proportion.
	df_all <- penguins \|>
	mutate(flip_f = cut_width(flipper_length_mm,
	width = 10,
	boundary = 10)) \|>
	group_by(flip_f) \|>
	summarize(all_n = n()) \|>
	mutate(all_prop = all_n/sum(all_n)) \|>
	drop_na()

	# Do the same again (every individual penguin classified
	# into the 10mm bins), but now calculate the
	# sums and proportions within species. Ungroup and
	# complete to restore any zero-count category rows within
	# species
	df_species <- penguins \|>
	mutate(flip_f = cut_width(flipper_length_mm,
	width = 10,
	boundary = 10)) \|>
	group_by(species, flip_f) \|>
	summarize(species_n = n()) \|>
	ungroup() \|>
	complete(species, flip_f,
	fill = list(species_n = 0)) \|>
	group_by(species) \|>
	mutate(species_prop = species_n/sum(species_n))

	# Join the two tables
	df <- left_join(df_species, df_all, by = "flip_f")


	# Now we have a binned measure of flipper width,
	# with proportions calculated by Species and also
	# overall, using the same bins. After the join,
	# the values for all species are repeated within
	# every species. This means that when we draw it
	# below and facet by species, it just repeats the
	# same in every panel.

	df

	# Put the overall distribution in the background, facet on
	# species, and nudge the overall one a tiny bit to make the
	# comparison look nicer.
	df \|>
	drop_na() \|>
	ggplot() +
	# Add the background layer, nudged with position_nudge()
	geom_col(mapping = aes(x = flip_f, y = all_prop),
	color = "black", size = 0.1, fill = "gray50",
	alpha = 0.7, position = position_nudge(x = 0.05)) +
	# Then add the species layer we'll also end up faceting by
	geom_col(mapping = aes(x = flip_f, y = species_prop,
	fill = species),
	color = "black", size = 0.1,
	alpha = 0.8) +
	ggokabeito::scale_fill_okabe_ito() +
	scale_y_continuous(labels = scales::label_percent()) +
	guides(fill = "none") +
	facet_wrap(~ species, ncol = 1) +
	labs(x = "Flipper width in mm", y = "Percent of Penguins",
	title = "Flipper Width Distribution by Species",
	subtitle = "Distribution for all penguins shown in grey",
	caption = "Data: palmerpenguins. Graph: Kieran Healy / @kjhealy.",
	fill = "Species") +
	theme(axis.text.x = element_text(size = rel(0.6)))