Skip to content

Instantly share code, notes, and snippets.

@Myfanwy
Created September 16, 2016 22:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Myfanwy/9d92ec34797651a31a5974091555b996 to your computer and use it in GitHub Desktop.
Save Myfanwy/9d92ec34797651a31a5974091555b996 to your computer and use it in GitHub Desktop.
Matt Lieber Is
library(rvest)
library(dplyr)
library(stringr)
toMatch <- "Matt Lieber is"
# Try with first page
url <- "https://gimletmedia.com/show/reply-all/episodes/"
pgfull <- read_html(url)
texts <- pgfull %>%
html_nodes(".feedItem~ .feedItem+ .feedItem .primary-color , .feedItem:nth-child(1) .primary-color") %>%
html_attr("href") %>%
xml2::url_absolute(url)
p1 <- lapply(texts, . %>% read_html() %>% html_text())
p1 <- as.character(unlist(p1))
p1 <- unlist(strsplit(p1,split="\\."))[grep(paste(toMatch, collapse="|"),unlist(strsplit(p1,split="\\.")))]
p1
# Functionalize
getallpages <- function(pgno) {
url <- paste0("https://gimletmedia.com/show/reply-all/episodes/page/", pgno)
pgfull <- read_html(url)
try(
texts <- pgfull %>%
html_nodes(".feedItem~ .feedItem+ .feedItem .primary-color , .feedItem:nth-child(1) .primary-color") %>%
html_attr("href") %>%
xml2::url_absolute(url)
)
p1 <- lapply(texts, . %>% read_html() %>% html_text())
p1 <- as.character(unlist(p1))
toMatch <- "Matt Lieber is"
unlist(strsplit(p1,split="\\."))[grep(paste(toMatch, collapse="|"),unlist(strsplit(p1,split="\\.")))]
}
pgnos <- c(2:8)
total <- lapply(pgnos, getallpages)
phrases <- unlist(total)
phrases <- c(phrases, p1)
# Anonymize list:
blank <- str_replace(phrases, "\n", '')
blank <- str_replace(blank, "Matt Lieber", '')
blank <- str_trim(blank, side = "left")
blank
blankdf <- data.frame(x = blank)
head(blankdf)
save(blankdf, file = "blanks.Rda") # becomes the template for the internal dataframe in the MattLiebeR package
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment