Skip to content

Instantly share code, notes, and snippets.

@josefslerka
Created November 28, 2018 13:39
Show Gist options
  • Save josefslerka/160088e187061285ffa04390e9a7ca1b to your computer and use it in GitHub Desktop.
Save josefslerka/160088e187061285ffa04390e9a7ca1b to your computer and use it in GitHub Desktop.
library(geniusR)
library(tidyverse)
library(tidytext)
album_texty <- genius_album(artist = "The Doors", album = "The Doors")
tidy_album <- album_texty %>%
unnest_tokens(word,lyric) %>%
anti_join(stop_words)
song_words <- tidy_album %>%
count(track_title, word, sort = TRUE) %>%
ungroup()
song_words <- song_words %>%
bind_tf_idf(word, track_title, n) %>%
arrange(desc(tf_idf))
top <- song_words %>%
arrange(desc(tf_idf)) %>%
mutate(text = factor(word, levels = rev(unique(word)))) %>%
group_by(track_title) %>%
top_n(5) %>%
ungroup
top %>%
mutate(word = factor(word, levels = rev(unique(word)))) %>%
group_by(track_title) %>%
top_n(5) %>%
ungroup() %>%
ggplot(aes(word, tf_idf, fill = track_title)) +
geom_col(show.legend = FALSE) +
labs(x = NULL, y = "tf-idf") +
facet_wrap(~track_title, ncol = 2, scales = "free") +
coord_flip()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment