Jose Roberto Ayala Solares jroberayalas

## web_scraping_lie_1.R
xml_contents(first_result)
#> {xml_nodeset (3)}
#> [1] <strong>Jan. 21 </strong>
#> [2] “I wasn't a fan of Iraq. I didn't want to go into Iraq.”
#> [3] <span class="short-truth"><a href="https://www.buzzfeed.com/andrewkaczynski/in-2002-don ...

## web_scraping_lie_2.R
xml_contents(first_result)[2] %>% html_text(trim = TRUE)
#> [1] "“I wasn't a fan of Iraq. I didn't want to go into Iraq.”"

## web_scraping_lie_3.R
lie <- xml_contents(first_result)[2] %>% html_text(trim = TRUE)
str_sub(lie, 2, -2)
#> [1] "I wasn't a fan of Iraq. I didn't want to go into Iraq."

## web_scraping_explanation.R
explanation <- first_result %>% html_node(".short-truth") %>% html_text(trim = TRUE)
str_sub(explanation, 2, -2)
#> [1] "He was for an invasion before he was against it."

## web_scraping_url.R
url <- first_result %>% html_node("a") %>% html_attr("href")
url
#> [1] "https://www.buzzfeed.com/andrewkaczynski/in-2002-donald-trump-said-he-supported-invading-iraq-on-the"

## web_scraping_dataset_1.R
library(dplyr)
records <- vector("list", length = length(results))

for (i in seq_along(results)) {
    date <- str_c(results[i] %>% html_nodes("strong") %>% html_text(trim = TRUE), ", 2017")
    lie <- str_sub(xml_contents(results[i])[2] %>% html_text(trim = TRUE), 2, -2)
    explanation <- str_sub(results[i] %>% html_nodes(".short-truth") %>% html_text(trim = TRUE), 2, -2)
    url <- results[i] %>% html_nodes("a") %>% html_attr("href")
    records[[i]] <- data_frame(date = date, lie = lie, explanation = explanation, url = url)
}

## web_scraping_dataset_2.R
library(lubridate)
df$date <- mdy(df$date)
glimpse(df)
#> Observations: 116
#> Variables: 4
#> $ date        <date> 2017-01-21, 2017-01-21, 2017-01-23, 2017-01-25, 2017-01-25, 2017-01-...
#> $ lie         <chr> "I wasn't a fan of Iraq. I didn't want to go into Iraq.", "A reporter...
#> $ explanation <chr> "He was for an invasion before he was against it.", "Trump was on the...
#> $ url         <chr> "https://www.buzzfeed.com/andrewkaczynski/in-2002-donald-trump-said-h...

## web_scraping_final.R
# Load packages
library(rvest)
library(stringr)
library(dplyr)
library(lubridate)
library(readr)

# Read web page
webpage <- read_html("https://www.nytimes.com/interactive/2017/06/23/opinion/trumps-lies.html")

## web_scraping_export.R
write_csv(df, "trump_lies.csv")

## web_scraping_import.R
df <- read_csv("trump_lies.csv")
#> Parsed with column specification:
#> cols(
#>   date = col_date(format = ""),
#>   lie = col_character(),
#>   explanation = col_character(),
#>   url = col_character()
#> )
	xml_contents(first_result)
	#> {xml_nodeset (3)}
	#> [1] <strong>Jan. 21 </strong>
	#> [2] “I wasn't a fan of Iraq. I didn't want to go into Iraq.”
	#> [3] <span class="short-truth"><a href="https://www.buzzfeed.com/andrewkaczynski/in-2002-don ...
	xml_contents(first_result)[2] %>% html_text(trim = TRUE)
	#> [1] "“I wasn't a fan of Iraq. I didn't want to go into Iraq.”"
	lie <- xml_contents(first_result)[2] %>% html_text(trim = TRUE)
	str_sub(lie, 2, -2)
	#> [1] "I wasn't a fan of Iraq. I didn't want to go into Iraq."
	explanation <- first_result %>% html_node(".short-truth") %>% html_text(trim = TRUE)
	str_sub(explanation, 2, -2)
	#> [1] "He was for an invasion before he was against it."
	url <- first_result %>% html_node("a") %>% html_attr("href")
	url
	#> [1] "https://www.buzzfeed.com/andrewkaczynski/in-2002-donald-trump-said-he-supported-invading-iraq-on-the"
	library(dplyr)
	records <- vector("list", length = length(results))

	for (i in seq_along(results)) {
	date <- str_c(results[i] %>% html_nodes("strong") %>% html_text(trim = TRUE), ", 2017")
	lie <- str_sub(xml_contents(results[i])[2] %>% html_text(trim = TRUE), 2, -2)
	explanation <- str_sub(results[i] %>% html_nodes(".short-truth") %>% html_text(trim = TRUE), 2, -2)
	url <- results[i] %>% html_nodes("a") %>% html_attr("href")
	records[[i]] <- data_frame(date = date, lie = lie, explanation = explanation, url = url)
	}
	library(lubridate)
	df$date <- mdy(df$date)
	glimpse(df)
	#> Observations: 116
	#> Variables: 4
	#> $ date <date> 2017-01-21, 2017-01-21, 2017-01-23, 2017-01-25, 2017-01-25, 2017-01-...
	#> $ lie <chr> "I wasn't a fan of Iraq. I didn't want to go into Iraq.", "A reporter...
	#> $ explanation <chr> "He was for an invasion before he was against it.", "Trump was on the...
	#> $ url <chr> "https://www.buzzfeed.com/andrewkaczynski/in-2002-donald-trump-said-h...
	# Load packages
	library(rvest)
	library(stringr)
	library(dplyr)
	library(lubridate)
	library(readr)

	# Read web page
	webpage <- read_html("https://www.nytimes.com/interactive/2017/06/23/opinion/trumps-lies.html")
	df <- read_csv("trump_lies.csv")
	#> Parsed with column specification:
	#> cols(
	#> date = col_date(format = ""),
	#> lie = col_character(),
	#> explanation = col_character(),
	#> url = col_character()
	#> )