Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Scraping the Joseph Smith papers
library(rvest)
library(dplyr)
library(magrittr)
# First find the list of people and parse out their names and urls.
base <- "http://josephsmithpapers.org"
list_of_people <- "/reference/people#a::"
results <- paste0(base, list_of_people) %>%
html() %>%
html_nodes(".alphaItem")
names <- results %>%
html_text() %>%
unlist()
path <- results %>%
html_attr("href") %>%
unlist()
people <- data_frame(names, path)
get_person_data <- function(url) {
result <- html(url)
full_name <- result %>%
html_node(".metadata:nth-child(1) dd") %>%
html_text()
gender <- result %>%
html_node(".metadata:nth-child(2) dd") %>%
html_text()
bio <- result %>%
html_nodes("p") %>%
.[3] %>%
html_text()
mentions <- result %>%
html_nodes("#paper-link a") %>%
as.list()
data_frame(full_name, gender, bio) %>%
bind_cols(mentions)
}
temp <- paste0(base, people[1,2]) %>%
get_person_data()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.