Skip to content

Instantly share code, notes, and snippets.

View jroberayalas's full-sized avatar

Jose Roberto Ayala Solares jroberayalas

View GitHub Profile
explanation <- first_result %>% html_node(".short-truth") %>% html_text(trim = TRUE)
str_sub(explanation, 2, -2)
#> [1] "He was for an invasion before he was against it."
lie <- xml_contents(first_result)[2] %>% html_text(trim = TRUE)
str_sub(lie, 2, -2)
#> [1] "I wasn't a fan of Iraq. I didn't want to go into Iraq."
xml_contents(first_result)[2] %>% html_text(trim = TRUE)
#> [1] "“I wasn't a fan of Iraq. I didn't want to go into Iraq.”"
xml_contents(first_result)
#> {xml_nodeset (3)}
#> [1] <strong>Jan. 21 </strong>
#> [2] “I wasn't a fan of Iraq. I didn't want to go into Iraq.”
#> [3] <span class="short-truth"><a href="https://www.buzzfeed.com/andrewkaczynski/in-2002-don ...
first_result <- results[1]
first_result %>% html_nodes("strong")
#> {xml_nodeset (1)}
#> [1] <strong>Jan. 21 </strong>
first_result <- results[1]
date <- first_result %>% html_nodes("strong") %>% html_text(trim = TRUE)
library(stringr)
str_c(date, ', 2017')
#> [1] "Jan. 21, 2017"
results <- webpage %>% html_nodes(".short-desc")
results
#> {xml_nodeset (116)}
#> [1] <span class="short-desc"><strong>Jan. 21 </strong>“I wasn't a fan of Iraq. I didn't want to go into Ir ...
#> [2] <span class="short-desc"><strong>Jan. 21 </strong>“A reporter for Time magazine — and I have been on t ...
#> [3] <span class="short-desc"><strong>Jan. 23 </strong>“Between 3 million and 5 million illegal votes cause ...
#> [4] <span class="short-desc"><strong>Jan. 25 </strong>“Now, the audience was the biggest ever. But this cr ...
#> [5] <span class="short-desc"><strong>Jan. 25 </strong>“Take a look at the Pew reports (which show voter fr ...
#> [6] <span class="short-desc"><strong>Jan. 25 </strong>“You had millions of people that now aren't insured ...
#> [7] <span class="short-desc"><strong>Jan. 25 </strong>“So, look, when President Obama was there two weeks ...
library(rvest)
webpage <- read_html("https://www.nytimes.com/interactive/2017/06/23/opinion/trumps-lies.html")
webpage
#> {xml_document}
#> <html lang="en" class="no-js page-interactive section-opinion page-theme-standard tone-opinion page-interactive-default limit-small layout-xlarge app-interactive" itemid="https://www.nytimes.com/interactive/2017/06/23/opinion/trumps-lies.html" itemtype="http://schema.org/NewsArticle" itemscope="" xmlns:og="http://opengraphprotocol.org/schema/">
#> [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n<title>President Trump’s L ...
#> [2] <body>\n \n <style>\n .lt-ie10 .messenger.suggestions {\n display: block !important;\n ...
# Final plot
p <- ggplot()
p <- p + geom_point(data = train, aes(x = ds, y = y), size = 0.5)
p <- p + geom_line(data = forecast, aes(x = ds, y = yhat), color = "#0072B2")
p <- p + geom_ribbon(data = forecast, aes(x = ds, ymin = yhat_lower, ymax = yhat_upper), fill = "#0072B2", alpha = 0.3)
p <- p + geom_point(data = valid, aes(x = ds, y = y), size = 0.5, color = '#4daf4a')
p <- p + geom_point(data = test, aes(x = ds, y = y), size = 0.5, color = 'red')
p
# Retrain using train and validation set
retrain <- bind_rows(train, valid)
retrain$cap <- best_params$capacity
m <- prophet(retrain, growth = best_params$growth, holidays = holidays,
seasonality.prior.scale = best_params$seasonality_prior_scale,
changepoint.prior.scale = best_params$changepoint_prior_scale,
holidays.prior.scale = best_params$holidays_prior_scale)
future <- make_future_dataframe(m, periods = 184)
future$cap <- best_params$capacity