tjmahr/label-hints.md

## label-hints.md

      
    Raw
  

              label-hints.md
            
          
    library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 4.3.2
#> Warning: package 'dplyr' was built under R version 4.3.2
#> Warning: package 'stringr' was built under R version 4.3.2
#> Warning: package 'lubridate' was built under R version 4.3.2
Suppose we have a dataframe with labels and their locations
data <- tibble(
  group = c("control", "control", "test", "test"),
  x = c(1, 2, 2, 3),
  y = c(1, 2, 2.1, 3),
  hjust = 0,
  vjust = 1,
  nudge_x = 0,
  nudge_y = 0
)
And the label locations are not the best
ggplot(data) +
  aes(x = x, y = y) +
  geom_point() +
  geom_text(
    aes(
      label = group,
      x = x + nudge_x,
      y = y + nudge_y,
      hjust = hjust,
      vjust = vjust)
  )

We want to fix them by overwriting specific values. Call these specific values
“hints” for plotting label locations. We only want to provide just enough
information to identify the label and to overwrite the specific value.
# Hints provide just enough information and use NAs for values
# that don't need be adjusted
data_hints <- list(
  tibble(group = "test", x = 3, hjust = 1,            nudge_x = -.05),
  tibble(group = "test", x = 2,            vjust = 0, nudge_x = +.05)
) |>
  bind_rows()

data_hints
#> # A tibble: 2 × 5
#>   group     x hjust nudge_x vjust
#>   <chr> <dbl> <dbl>   <dbl> <dbl>
#> 1 test      3     1   -0.05    NA
#> 2 test      2    NA    0.05     0
Here we can apply the hints. We only have to tell it which columns
are used to uniquely identify each label.
apply_hints <- function(data_labs, data_hints, names_join) {
  n_rows <- nrow(data_labs)
  n_indices <- data_labs |> select(all_of(names_join)) |> distinct() |> nrow()
  stopifnot(
    "Columns in `names_join` do not uniquely identify rows" =
      n_rows == n_indices
  )

  data_labs <- data_labs |> tibble::rowid_to_column(".rowid")

  data_labs_index <- data_labs |>
    select(.rowid, all_of(names_join))

  data_hints_skeleton <- data_hints |>
    inner_join(data_labs_index, by = names_join) |>
    # Add missing columns but initialized to NA.
    bind_rows(data_labs[0, ])

    stopifnot(
      "each label row can only be updated by 1 hint row" = 
        length(data_hints_skeleton$.rowid) == 
          length(unique(data_hints_skeleton$.rowid))
    )

  data_labs |>
    # Remove rows that need to be updated
    anti_join(data_hints_skeleton, by = ".rowid") |>
    # Add partial data from the hints
    bind_rows(data_hints_skeleton) |>
    # Fill in the missing data in the hints with original values
    rows_patch(data_labs, by = ".rowid") |>
    arrange(.rowid) |>
    select(-.rowid)
}
And here is the plot with adjusted labels.
ggplot(data |> apply_hints(data_hints, c("group", "x"))) +
  aes(x = x, y = y) +
  geom_point() +
  geom_text(
    aes(
      label = group,
      x = x + nudge_x,
      y = y + nudge_y,
      hjust = hjust,
      vjust = vjust)
  )

^{Created on 2023-11-30 with reprex v2.0.2}