Skip to content

Instantly share code, notes, and snippets.

@jebyrnes
Last active October 25, 2019 18:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jebyrnes/d9d55665d7ff2777f4e13b80f45f4e7a to your computer and use it in GitHub Desktop.
Save jebyrnes/d9d55665d7ff2777f4e13b80f45f4e7a to your computer and use it in GitHub Desktop.
Some tidytext anaylysis of sea chanties
library(tidyverse)
library(tidytext)
library(rvest)
library(ggplot)
#get the initial info from the site
site <- "http://www.guscantieni.com/bindarrr/"
index <- read_html(paste0(site, "index.html"))
links <- html_nodes(index, "a") %>%
html_attr("href") %>%
keep(~str_detect(.x, "^songs")) %>%
discard(~str_detect(.x, "table-of-contents")) %>%
discard(~str_detect(.x, "songs/what-will-we-do\\?.html")) #broken
#function to get the text
keelhaul_one_page <- function(a_link){
print(a_link)
a_chanty <- read_html(paste0(site, a_link))
title <- verses <- a_chanty %>% html_nodes("h1") %>%
html_text
verses <- a_chanty %>% html_nodes("p") %>%
html_text
if(length(verses)==0)
verses <- a_chanty %>% html_nodes("td") %>%
html_text
tibble(chanty = title, verse = 1:length(verses), words = verses)
}
#get the lyrics and tokenize
chanties <- map_df(links, keelhaul_one_page)
chanty_words <- chanties %>%
unnest_tokens(word, words) %>%
anti_join(get_stopwords())
#count words and ascribe sentiments across all
bing <- get_sentiments("bing")
wordcount <- chanty_words %>%
count(word, sort=TRUE) %>%
inner_join(bing)
ggplot(wordcount %>%
filter(n>10) %>%
mutate(n = ifelse(sentiment == "negative", -n, n)) %>%
mutate(word = reorder(word, n)),
aes (x = word, y = n, fill = sentiment)) +
geom_col() +
coord_flip() +
ggtitle("Word contribution to sentiment of chanties\nlisted on http://www.guscantieni.com/bindarrr/")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment