Skip to content

Instantly share code, notes, and snippets.

@mkiang
Created December 23, 2020 20:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mkiang/e9c2b292156c4dfbeb549ffcd40cd278 to your computer and use it in GitHub Desktop.
Save mkiang/e9c2b292156c4dfbeb549ffcd40cd278 to your computer and use it in GitHub Desktop.
Taylor Swift Profanity Rate
## Imports ----
library(tidyverse)
library(genius)
library(here)
library(fs)
data("stop_words")
source("https://raw.githubusercontent.com/mkiang/airline_testing_strategies/master/code/mk_nytimes.R")
dir_create(here("data"))
## Set up a skeleton df ----
taytaframe <- tribble(
~ artist, ~ album, ~ release_date,
"Taylor Swift", "Taylor Swift", "2006-10-24",
"Taylor Swift", "Fearless", "2008-11-11",
"Taylor Swift", "Speak Now", "2010-10-25",
"Taylor Swift", "Red", "2012-10-22",
"Taylor Swift", "1989", "2014-10-27",
"Taylor Swift", "Reputation", "2017-11-10",
"Taylor Swift", "Lover", "2019-08-23",
"Taylor Swift", "Folklore", "2020-07-24",
"Taylor Swift", "Evermore", "2020-12-11"
)
write_csv(taytaframe, here("data", "disco_info.csv"))
## Add lyrics ----
if (!file_exists(here("data", "working_data.RDS"))) {
swiftframe <- taytaframe %>%
select(-release_date) %>%
add_genius(artist, album, "album")
## Save them ----
write_csv(swiftframe, here("data", "lyrics_by_line.csv"))
saveRDS(list(taytaframe = taytaframe,
swiftframe = swiftframe),
here("data", "working_data.RDS"))
}
swiftframe <- readRDS(here("data", "working_data.RDS"))$swiftframe
taytaframe <- readRDS(here("data", "working_data.RDS"))$taytaframe %>%
mutate(release_date = lubridate::ymd(release_date),
album_cat = factor(album, ordered = TRUE),
label = sprintf("%s (%s)", album, format(release_date, "%b %Y"))) %>%
mutate(label = case_when(album == "Evermore" ~ paste0("\n\n", label),
album == "Folklore" ~ paste0("\n", label),
TRUE ~ label))
words <- swiftframe %>%
unnest_tokens(word, lyric) %>%
mutate(stopword = (word %in% stop_words) + 0,
shit = grepl("\\<shit", word) + 0,
fuck = grepl("\\<fuck", word) + 0,
damn = grepl("damn", word) + 0,
crazy = grepl("\\<crazy", word) + 0)
curses <- words %>%
filter(stopword == 0) %>%
group_by(artist, album) %>%
summarize(
n_words = n(),
shit = sum(shit),
fuck = sum(fuck),
damn = sum(damn),
crazy = sum(crazy)
) %>%
gather(curse, n_curse, shit:crazy) %>%
ungroup() %>%
left_join(taytaframe) %>%
mutate(
rate_per_5k = n_curse / n_words * 5000,
curse_cat = factor(
curse,
levels = c("crazy", "damn", "fuck", "shit"),
labels = c("crazy", "[god]damn", "fuck[ed]", "shit"),
ordered = TRUE
)
) %>%
arrange(album_cat, curse_cat)
p1 <- ggplot(curses,
aes(x = release_date,
y = rate_per_5k,
color = curse_cat,
group = curse_cat)) +
geom_line(size = 1) +
geom_point(color = "white",
size = 3.5) +
geom_point(size = 2) +
mk_nytimes() +
scale_color_brewer(NULL, palette = "Set1") +
scale_y_continuous("Profanity rate (curse word per 5,000 words") +
scale_x_date(NULL,
breaks = taytaframe$release_date,
labels = taytaframe$label) +
labs(title = "Taylor Swift's Profanity Rate, 2006—2020")
ggsave("./shits_over_time.jpg", p1, width = 6, height = 3.5, scale = 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment