Last active
July 8, 2025 09:14
-
-
Save fkeck/21f743df14df519cfa6a94487e5b4203 to your computer and use it in GitHub Desktop.
Em dash ecology
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(tidyverse) | |
| library(ggtext) | |
| library(openalexR) | |
| library(httr2) | |
| ##### OpenAlex ##### | |
| query_url <- | |
| oa_query(entity = "works", | |
| topics.subfield.id = "subfields/2303", | |
| has_abstract = TRUE, | |
| language = "en", | |
| from_publication_date = "2021-01-01", | |
| to_publication_date = "2021-12-31", | |
| type = "article") |> | |
| paste0("&mailto=XXXXX", | |
| "&per-page=200", | |
| "&sample=10000&seed=123") | |
| dat_2021 <- vector("list", 50L) | |
| for (i in 1:(10000/200)) { | |
| cat("Page:", i) | |
| query_url_page <- paste0(query_url, "&page=",i) | |
| req <- request(query_url_page) | |
| resp <- req_perform(req) | |
| if(resp$status_code != 200) { | |
| stop("Server error") | |
| } | |
| json <- resp_body_json(resp) | |
| dat_2021[[i]] <- json$results |> | |
| works2df() | |
| } | |
| dat_2021 <- bind_rows(dat_2021) | |
| query_url <- | |
| oa_query(entity = "works", | |
| topics.subfield.id = "subfields/2303", | |
| has_abstract = TRUE, | |
| language = "en", | |
| from_publication_date = "2025-01-01", | |
| to_publication_date = "2025-12-31", | |
| type = "article") |> | |
| paste0("&mailto=XXXXX", | |
| "&per-page=200", | |
| "&sample=10000&seed=123") | |
| dat_2025 <- vector("list", 50L) | |
| for (i in 1:(10000/200)) { | |
| cat("Page:", i) | |
| query_url_page <- paste0(query_url, "&page=",i) | |
| req <- request(query_url_page) | |
| resp <- req_perform(req) | |
| if(resp$status_code != 200) { | |
| stop("Server error") | |
| } | |
| json <- resp_body_json(resp) | |
| dat_2025[[i]] <- json$results |> | |
| works2df() | |
| } | |
| dat_2025 <- bind_rows(dat_2025) | |
| dat <- bind_rows(dat_2021, dat_2025) |> | |
| mutate(n_char = nchar(abstract)) |> | |
| filter(n_char > 600, n_char < 3500) |> | |
| mutate(count_chars = map(abstract, \(x) { | |
| res <- c(n_em = str_count(x, "—"), | |
| n_en = str_count(x, "–"), | |
| n_min = str_count(x, "-"), | |
| n_aster = str_count(x, "\\*"), | |
| n_plus = str_count(x, "\\+"), | |
| n_comma = str_count(x, ","), | |
| n_col = str_count(x, ":"), | |
| n_semicol = str_count(x, ";"), | |
| n_tilde = str_count(x, "~"), | |
| n_dot = str_count(x, "\\."), | |
| n_par = str_count(x, "\\("), | |
| n_question = str_count(x, "\\?"), | |
| n_slash = str_count(x, "/"), | |
| n_eq = str_count(x, "="), | |
| n_perc = str_count(x, "%"), | |
| n_amp = str_count(x, "&") | |
| ) | |
| enframe(res, name = "spchar", "n_spchar") | |
| }, .progress = TRUE)) |> | |
| unnest(count_chars) | |
| sp_labs <- c(n_em = "Em dash —", | |
| n_en = "En dash –", | |
| n_min = "Minus sign -", | |
| n_aster = "Asterisk *", | |
| n_plus = "Plus sign +", | |
| n_comma = "Comma ,", | |
| n_col = "Colon :", | |
| n_semicol = "Semicolon ;", | |
| n_tilde = "Tilde ~", | |
| n_dot = "Dot .", | |
| n_par = "Parenthesis (", | |
| n_question = "Question mark ?", | |
| n_slash = "Slash /", | |
| n_eq = "Equal sign =", | |
| n_perc = "Percent %", | |
| n_amp = "Ampersand &") |> | |
| enframe(name = "spchar", value = "spchar_lab") | |
| dat |> | |
| group_by(publication_year, spchar) |> | |
| summarise(freq = sum(n_spchar) / sum(n_char)) |> | |
| pivot_wider(names_from = publication_year, values_from = freq) |> | |
| mutate(ratio = (`2025` - `2021`) / `2021`) |> | |
| left_join(sp_labs) |> | |
| ggplot() + | |
| geom_hline(aes(yintercept = 0L)) + | |
| geom_col(aes(fct_reorder(spchar_lab, ratio), ratio)) + | |
| geom_curve( | |
| aes(x = x, y = y, xend = xend, yend = yend), | |
| data = data.frame(x = 12.3, y = 0.75, xend = 16, yend = 0.9), | |
| curvature = 0.5, | |
| angle = 90 | |
| ) + | |
| geom_point( | |
| aes(x, y), | |
| data = data.frame(x = 16, y = 0.9), | |
| color = "black" | |
| ) + | |
| geom_text( | |
| aes(x, y, label = "Use of Em dash\ndoubled over the period"), | |
| data = data.frame(x = 12.3, y = 0.75), | |
| hjust = 0, | |
| vjust = 1, | |
| nudge_y = -0.5, | |
| nudge_x = 0.25, | |
| size = 3, | |
| ) + | |
| coord_flip() + | |
| scale_y_continuous(limits = c(-1.1, 1.1), labels = scales::percent) + | |
| labs(title = "The rise of Em dash in ecology article abstracts", | |
| subtitle = "Change in frequency of use of different characters in the abstracts of ecology articles between 2021 and 2025.", | |
| caption = "Abstract data: OpenAlex<br>Chart: @francoiskeck.bsky.social") + | |
| ylab("Usage in 2025 relative to 2021") + | |
| theme_minimal(base_family = "Lato") + | |
| theme(axis.title.y = element_blank(), | |
| plot.title = element_text( | |
| color = "grey10", | |
| size = 16, | |
| face = "bold", | |
| margin = margin(t = 15) | |
| ), | |
| plot.subtitle = element_textbox_simple( | |
| color = "grey30", | |
| size = 12, | |
| lineheight = 1.35, | |
| margin = margin(t = 15, b = 40) | |
| ), | |
| plot.caption = element_markdown( | |
| lineheight = 1.1, | |
| color = "grey30", | |
| size = 8, | |
| margin = margin(t = 20) | |
| ), | |
| plot.title.position = "plot", | |
| plot.margin = margin(15, 40, 15, 20), | |
| plot.background = element_rect(fill = "grey96", color = "grey96"), | |
| panel.background = element_rect(fill = "grey96", color = "grey96")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment