Last active
November 28, 2019 23:27
-
-
Save juliasilge/4b03b7c92316366358f53f5170685e45 to your computer and use it in GitHub Desktop.
Beatles lyrics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(scales) | |
theme_set(silgelib::theme_plex()) | |
library(geniusR) | |
library(tidytext) | |
albums <- tibble(artist = rep("The Beatles", 13), | |
album = c("Please Please Me", | |
"With the Beatles", | |
"A Hard Day's Night", | |
"Beatles for Sale", | |
"Help!", | |
"Rubber Soul", | |
"Revolver", | |
"Sgt. Pepper's Lonely Hearts Club Band", | |
"Magical Mystery Tour", | |
"The Beatles (The White Album)", | |
"Yellow Submarine", | |
"Abbey Road", | |
"Let It Be"), | |
year = c(1963, | |
1963, | |
1964, | |
1964, | |
1965, | |
1965, | |
1966, | |
1967, | |
1967, | |
1968, | |
1969, | |
1969, | |
1970)) | |
albums | |
album_lyrics <- albums %>% | |
mutate(tracks = map2(artist, album, genius_album)) | |
album_lyrics | |
tidy_lyrics <- album_lyrics %>% | |
unnest(tracks) %>% | |
unnest_tokens(word, lyric) | |
words_by_year <- tidy_lyrics %>% | |
count(year, word) %>% | |
anti_join(get_stopwords()) %>% | |
complete(year, word, fill = list(n = 0)) %>% | |
group_by(word) %>% | |
mutate(word_total = sum(n)) %>% | |
ungroup %>% | |
filter(word_total > 70) %>% | |
group_by(year) %>% | |
mutate(year_total = sum(n)) %>% | |
ungroup %>% | |
rename(count = n) | |
library(broom) | |
nested_models <- words_by_year %>% | |
nest(-word) %>% | |
mutate(models = map(data, ~ glm(cbind(count, year_total) ~ year, ., | |
family = "binomial"))) | |
slopes <- nested_models %>% | |
unnest(map(models, tidy)) %>% | |
filter(term == "year") %>% | |
arrange(estimate) %>% | |
filter(p.value < 0.05) | |
library(ggrepel) | |
plot_words <- slopes %>% | |
group_by(sign = estimate > 0) %>% | |
top_n(5, abs(estimate)) %>% | |
ungroup %>% | |
mutate(sign = ifelse(sign, "Increasing", "Decreasing")) %>% | |
inner_join(words_by_year) %>% | |
mutate(freq = count / year_total) | |
plot_labels <- plot_words %>% | |
group_by(word) %>% | |
mutate(yearselect = ifelse(sign == "Decreasing", | |
min(year), | |
max(year))) %>% | |
group_by(word) %>% | |
filter(year == yearselect) %>% | |
ungroup | |
ggplot(plot_words, aes(year, freq, | |
color = sign, group = word)) + | |
geom_smooth(aes(year, freq, group = sign, color = sign), | |
method = "lm", lty = 2, alpha = 0.2, size = 0.8) + | |
geom_line(alpha = 0.8, size = 1.3, show.legend = FALSE) + | |
geom_text_repel(data = filter(plot_labels, sign == "Decreasing"), | |
aes(year, freq, label = word, color = sign), | |
nudge_x = -0.5, | |
hjust = 0, | |
family = "IBMPlexSans") + | |
geom_text_repel(data = filter(plot_labels, sign == "Increasing"), | |
aes(year, freq, label = word, color = sign), | |
nudge_x = 0.5, | |
hjust = 1, | |
family = "IBMPlexSans") + | |
facet_wrap(~ sign, nrow = 2, scales = "free_y") + | |
scale_y_continuous(labels = scales::percent_format()) + | |
theme(legend.position="none") + | |
labs(x = NULL, | |
y = "Word frequency", | |
title = "Words changing in frequency in Beatles' lyrics", | |
subtitle = "The Beatles sang less about babies and more about getting back somewhere as time passed") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment