Skip to content

Instantly share code, notes, and snippets.

View jroberayalas's full-sized avatar

Jose Roberto Ayala Solares jroberayalas

View GitHub Profile
xml_contents(first_result)
#> {xml_nodeset (3)}
#> [1] <strong>Jan. 21 </strong>
#> [2] “I wasn't a fan of Iraq. I didn't want to go into Iraq.”
#> [3] <span class="short-truth"><a href="https://www.buzzfeed.com/andrewkaczynski/in-2002-don ...
xml_contents(first_result)[2] %>% html_text(trim = TRUE)
#> [1] "“I wasn't a fan of Iraq. I didn't want to go into Iraq.”"
lie <- xml_contents(first_result)[2] %>% html_text(trim = TRUE)
str_sub(lie, 2, -2)
#> [1] "I wasn't a fan of Iraq. I didn't want to go into Iraq."
explanation <- first_result %>% html_node(".short-truth") %>% html_text(trim = TRUE)
str_sub(explanation, 2, -2)
#> [1] "He was for an invasion before he was against it."
url <- first_result %>% html_node("a") %>% html_attr("href")
url
#> [1] "https://www.buzzfeed.com/andrewkaczynski/in-2002-donald-trump-said-he-supported-invading-iraq-on-the"
library(dplyr)
records <- vector("list", length = length(results))
for (i in seq_along(results)) {
date <- str_c(results[i] %>% html_nodes("strong") %>% html_text(trim = TRUE), ", 2017")
lie <- str_sub(xml_contents(results[i])[2] %>% html_text(trim = TRUE), 2, -2)
explanation <- str_sub(results[i] %>% html_nodes(".short-truth") %>% html_text(trim = TRUE), 2, -2)
url <- results[i] %>% html_nodes("a") %>% html_attr("href")
records[[i]] <- data_frame(date = date, lie = lie, explanation = explanation, url = url)
}
library(lubridate)
df$date <- mdy(df$date)
glimpse(df)
#> Observations: 116
#> Variables: 4
#> $ date <date> 2017-01-21, 2017-01-21, 2017-01-23, 2017-01-25, 2017-01-25, 2017-01-...
#> $ lie <chr> "I wasn't a fan of Iraq. I didn't want to go into Iraq.", "A reporter...
#> $ explanation <chr> "He was for an invasion before he was against it.", "Trump was on the...
#> $ url <chr> "https://www.buzzfeed.com/andrewkaczynski/in-2002-donald-trump-said-h...
# Load packages
library(rvest)
library(stringr)
library(dplyr)
library(lubridate)
library(readr)
# Read web page
webpage <- read_html("https://www.nytimes.com/interactive/2017/06/23/opinion/trumps-lies.html")
write_csv(df, "trump_lies.csv")
df <- read_csv("trump_lies.csv")
#> Parsed with column specification:
#> cols(
#> date = col_date(format = ""),
#> lie = col_character(),
#> explanation = col_character(),
#> url = col_character()
#> )