Skip to content

Instantly share code, notes, and snippets.

@fkeck
Last active July 8, 2025 09:14
Show Gist options
  • Select an option

  • Save fkeck/21f743df14df519cfa6a94487e5b4203 to your computer and use it in GitHub Desktop.

Select an option

Save fkeck/21f743df14df519cfa6a94487e5b4203 to your computer and use it in GitHub Desktop.
Em dash ecology
library(tidyverse)
library(ggtext)
library(openalexR)
library(httr2)
##### OpenAlex #####
query_url <-
oa_query(entity = "works",
topics.subfield.id = "subfields/2303",
has_abstract = TRUE,
language = "en",
from_publication_date = "2021-01-01",
to_publication_date = "2021-12-31",
type = "article") |>
paste0("&mailto=XXXXX",
"&per-page=200",
"&sample=10000&seed=123")
dat_2021 <- vector("list", 50L)
for (i in 1:(10000/200)) {
cat("Page:", i)
query_url_page <- paste0(query_url, "&page=",i)
req <- request(query_url_page)
resp <- req_perform(req)
if(resp$status_code != 200) {
stop("Server error")
}
json <- resp_body_json(resp)
dat_2021[[i]] <- json$results |>
works2df()
}
dat_2021 <- bind_rows(dat_2021)
query_url <-
oa_query(entity = "works",
topics.subfield.id = "subfields/2303",
has_abstract = TRUE,
language = "en",
from_publication_date = "2025-01-01",
to_publication_date = "2025-12-31",
type = "article") |>
paste0("&mailto=XXXXX",
"&per-page=200",
"&sample=10000&seed=123")
dat_2025 <- vector("list", 50L)
for (i in 1:(10000/200)) {
cat("Page:", i)
query_url_page <- paste0(query_url, "&page=",i)
req <- request(query_url_page)
resp <- req_perform(req)
if(resp$status_code != 200) {
stop("Server error")
}
json <- resp_body_json(resp)
dat_2025[[i]] <- json$results |>
works2df()
}
dat_2025 <- bind_rows(dat_2025)
dat <- bind_rows(dat_2021, dat_2025) |>
mutate(n_char = nchar(abstract)) |>
filter(n_char > 600, n_char < 3500) |>
mutate(count_chars = map(abstract, \(x) {
res <- c(n_em = str_count(x, "—"),
n_en = str_count(x, "–"),
n_min = str_count(x, "-"),
n_aster = str_count(x, "\\*"),
n_plus = str_count(x, "\\+"),
n_comma = str_count(x, ","),
n_col = str_count(x, ":"),
n_semicol = str_count(x, ";"),
n_tilde = str_count(x, "~"),
n_dot = str_count(x, "\\."),
n_par = str_count(x, "\\("),
n_question = str_count(x, "\\?"),
n_slash = str_count(x, "/"),
n_eq = str_count(x, "="),
n_perc = str_count(x, "%"),
n_amp = str_count(x, "&")
)
enframe(res, name = "spchar", "n_spchar")
}, .progress = TRUE)) |>
unnest(count_chars)
sp_labs <- c(n_em = "Em dash —",
n_en = "En dash –",
n_min = "Minus sign -",
n_aster = "Asterisk *",
n_plus = "Plus sign +",
n_comma = "Comma ,",
n_col = "Colon :",
n_semicol = "Semicolon ;",
n_tilde = "Tilde ~",
n_dot = "Dot .",
n_par = "Parenthesis (",
n_question = "Question mark ?",
n_slash = "Slash /",
n_eq = "Equal sign =",
n_perc = "Percent %",
n_amp = "Ampersand &") |>
enframe(name = "spchar", value = "spchar_lab")
dat |>
group_by(publication_year, spchar) |>
summarise(freq = sum(n_spchar) / sum(n_char)) |>
pivot_wider(names_from = publication_year, values_from = freq) |>
mutate(ratio = (`2025` - `2021`) / `2021`) |>
left_join(sp_labs) |>
ggplot() +
geom_hline(aes(yintercept = 0L)) +
geom_col(aes(fct_reorder(spchar_lab, ratio), ratio)) +
geom_curve(
aes(x = x, y = y, xend = xend, yend = yend),
data = data.frame(x = 12.3, y = 0.75, xend = 16, yend = 0.9),
curvature = 0.5,
angle = 90
) +
geom_point(
aes(x, y),
data = data.frame(x = 16, y = 0.9),
color = "black"
) +
geom_text(
aes(x, y, label = "Use of Em dash\ndoubled over the period"),
data = data.frame(x = 12.3, y = 0.75),
hjust = 0,
vjust = 1,
nudge_y = -0.5,
nudge_x = 0.25,
size = 3,
) +
coord_flip() +
scale_y_continuous(limits = c(-1.1, 1.1), labels = scales::percent) +
labs(title = "The rise of Em dash in ecology article abstracts",
subtitle = "Change in frequency of use of different characters in the abstracts of ecology articles between 2021 and 2025.",
caption = "Abstract data: OpenAlex<br>Chart: @francoiskeck.bsky.social") +
ylab("Usage in 2025 relative to 2021") +
theme_minimal(base_family = "Lato") +
theme(axis.title.y = element_blank(),
plot.title = element_text(
color = "grey10",
size = 16,
face = "bold",
margin = margin(t = 15)
),
plot.subtitle = element_textbox_simple(
color = "grey30",
size = 12,
lineheight = 1.35,
margin = margin(t = 15, b = 40)
),
plot.caption = element_markdown(
lineheight = 1.1,
color = "grey30",
size = 8,
margin = margin(t = 20)
),
plot.title.position = "plot",
plot.margin = margin(15, 40, 15, 20),
plot.background = element_rect(fill = "grey96", color = "grey96"),
panel.background = element_rect(fill = "grey96", color = "grey96"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment