Skip to content

Instantly share code, notes, and snippets.

@juliasilge
Last active November 28, 2019 23:27
Show Gist options
  • Save juliasilge/4b03b7c92316366358f53f5170685e45 to your computer and use it in GitHub Desktop.
Save juliasilge/4b03b7c92316366358f53f5170685e45 to your computer and use it in GitHub Desktop.
Beatles lyrics
library(tidyverse)
library(scales)
theme_set(silgelib::theme_plex())
library(geniusR)
library(tidytext)
albums <- tibble(artist = rep("The Beatles", 13),
album = c("Please Please Me",
"With the Beatles",
"A Hard Day's Night",
"Beatles for Sale",
"Help!",
"Rubber Soul",
"Revolver",
"Sgt. Pepper's Lonely Hearts Club Band",
"Magical Mystery Tour",
"The Beatles (The White Album)",
"Yellow Submarine",
"Abbey Road",
"Let It Be"),
year = c(1963,
1963,
1964,
1964,
1965,
1965,
1966,
1967,
1967,
1968,
1969,
1969,
1970))
albums
album_lyrics <- albums %>%
mutate(tracks = map2(artist, album, genius_album))
album_lyrics
tidy_lyrics <- album_lyrics %>%
unnest(tracks) %>%
unnest_tokens(word, lyric)
words_by_year <- tidy_lyrics %>%
count(year, word) %>%
anti_join(get_stopwords()) %>%
complete(year, word, fill = list(n = 0)) %>%
group_by(word) %>%
mutate(word_total = sum(n)) %>%
ungroup %>%
filter(word_total > 70) %>%
group_by(year) %>%
mutate(year_total = sum(n)) %>%
ungroup %>%
rename(count = n)
library(broom)
nested_models <- words_by_year %>%
nest(-word) %>%
mutate(models = map(data, ~ glm(cbind(count, year_total) ~ year, .,
family = "binomial")))
slopes <- nested_models %>%
unnest(map(models, tidy)) %>%
filter(term == "year") %>%
arrange(estimate) %>%
filter(p.value < 0.05)
library(ggrepel)
plot_words <- slopes %>%
group_by(sign = estimate > 0) %>%
top_n(5, abs(estimate)) %>%
ungroup %>%
mutate(sign = ifelse(sign, "Increasing", "Decreasing")) %>%
inner_join(words_by_year) %>%
mutate(freq = count / year_total)
plot_labels <- plot_words %>%
group_by(word) %>%
mutate(yearselect = ifelse(sign == "Decreasing",
min(year),
max(year))) %>%
group_by(word) %>%
filter(year == yearselect) %>%
ungroup
ggplot(plot_words, aes(year, freq,
color = sign, group = word)) +
geom_smooth(aes(year, freq, group = sign, color = sign),
method = "lm", lty = 2, alpha = 0.2, size = 0.8) +
geom_line(alpha = 0.8, size = 1.3, show.legend = FALSE) +
geom_text_repel(data = filter(plot_labels, sign == "Decreasing"),
aes(year, freq, label = word, color = sign),
nudge_x = -0.5,
hjust = 0,
family = "IBMPlexSans") +
geom_text_repel(data = filter(plot_labels, sign == "Increasing"),
aes(year, freq, label = word, color = sign),
nudge_x = 0.5,
hjust = 1,
family = "IBMPlexSans") +
facet_wrap(~ sign, nrow = 2, scales = "free_y") +
scale_y_continuous(labels = scales::percent_format()) +
theme(legend.position="none") +
labs(x = NULL,
y = "Word frequency",
title = "Words changing in frequency in Beatles' lyrics",
subtitle = "The Beatles sang less about babies and more about getting back somewhere as time passed")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment