Skip to content

Instantly share code, notes, and snippets.

Jose Roberto Ayala Solares jroberayalas

Block or report user

Report or block jroberayalas

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View web_scraping_import.R
df <- read_csv("trump_lies.csv")
#> Parsed with column specification:
#> cols(
#> date = col_date(format = ""),
#> lie = col_character(),
#> explanation = col_character(),
#> url = col_character()
#> )
View web_scraping_final.R
# Load packages
library(rvest)
library(stringr)
library(dplyr)
library(lubridate)
library(readr)
# Read web page
webpage <- read_html("https://www.nytimes.com/interactive/2017/06/23/opinion/trumps-lies.html")
View web_scraping_dataset_2.R
library(lubridate)
df$date <- mdy(df$date)
glimpse(df)
#> Observations: 116
#> Variables: 4
#> $ date <date> 2017-01-21, 2017-01-21, 2017-01-23, 2017-01-25, 2017-01-25, 2017-01-...
#> $ lie <chr> "I wasn't a fan of Iraq. I didn't want to go into Iraq.", "A reporter...
#> $ explanation <chr> "He was for an invasion before he was against it.", "Trump was on the...
#> $ url <chr> "https://www.buzzfeed.com/andrewkaczynski/in-2002-donald-trump-said-h...
View web_scraping_dataset_1.R
library(dplyr)
records <- vector("list", length = length(results))
for (i in seq_along(results)) {
date <- str_c(results[i] %>% html_nodes("strong") %>% html_text(trim = TRUE), ", 2017")
lie <- str_sub(xml_contents(results[i])[2] %>% html_text(trim = TRUE), 2, -2)
explanation <- str_sub(results[i] %>% html_nodes(".short-truth") %>% html_text(trim = TRUE), 2, -2)
url <- results[i] %>% html_nodes("a") %>% html_attr("href")
records[[i]] <- data_frame(date = date, lie = lie, explanation = explanation, url = url)
}
View web_scraping_url.R
url <- first_result %>% html_node("a") %>% html_attr("href")
url
#> [1] "https://www.buzzfeed.com/andrewkaczynski/in-2002-donald-trump-said-he-supported-invading-iraq-on-the"
View web_scraping_explanation.R
explanation <- first_result %>% html_node(".short-truth") %>% html_text(trim = TRUE)
str_sub(explanation, 2, -2)
#> [1] "He was for an invasion before he was against it."
View web_scraping_lie_3.R
lie <- xml_contents(first_result)[2] %>% html_text(trim = TRUE)
str_sub(lie, 2, -2)
#> [1] "I wasn't a fan of Iraq. I didn't want to go into Iraq."
View web_scraping_lie_2.R
xml_contents(first_result)[2] %>% html_text(trim = TRUE)
#> [1] "“I wasn't a fan of Iraq. I didn't want to go into Iraq.”"
View web_scraping_lie_1.R
xml_contents(first_result)
#> {xml_nodeset (3)}
#> [1] <strong>Jan. 21 </strong>
#> [2] “I wasn't a fan of Iraq. I didn't want to go into Iraq.”
#> [3] <span class="short-truth"><a href="https://www.buzzfeed.com/andrewkaczynski/in-2002-don ...
You can’t perform that action at this time.