Skip to content

Instantly share code, notes, and snippets.

View jroberayalas's full-sized avatar

Jose Roberto Ayala Solares jroberayalas

View GitHub Profile
# Libraries
library(tidyverse)
library(lubridate)
library(prophet)
library(forecast)
# Read data
bikes <- read_csv('bikes.csv') %>%
mutate(datetime = date(datetime))
holidays <- filter(bikes, holiday == 1) %>%
select(datetime) %>%
distinct()
holidays$holiday = c('Martin Luther King', 'Emancipation Day', 'Independence Day',
'Labor Day', 'Columbus Day', 'Veterans Day', 'New Year',
'Martin Luther King', 'Emancipation Day', 'Independence Day',
'Labor Day', 'Columbus Day', 'Veterans Day')
names(holidays) <- c('ds', 'holiday')
# Search grid
prophetGrid <- expand.grid(changepoint_prior_scale = c(0.05, 0.5, 0.001),
seasonality_prior_scale = c(100, 10, 1),
holidays_prior_scale = c(100, 10, 1),
capacity = c(6043, 6500, 7000, 8000),
growth = 'logistic')
results <- vector(mode = 'numeric', length = nrow(prophetGrid))
# Search best parameters
for (i in seq_len(nrow(prophetGrid))) {
parameters <- prophetGrid[i, ]
if (parameters$growth == 'logistic') {train$cap <- parameters$capacity}
m <- prophet(train, growth = parameters$growth, holidays = holidays,
seasonality.prior.scale = parameters$seasonality_prior_scale,
changepoint.prior.scale = parameters$changepoint_prior_scale,
# Retrain using train and validation set
retrain <- bind_rows(train, valid)
retrain$cap <- best_params$capacity
m <- prophet(retrain, growth = best_params$growth, holidays = holidays,
seasonality.prior.scale = best_params$seasonality_prior_scale,
changepoint.prior.scale = best_params$changepoint_prior_scale,
holidays.prior.scale = best_params$holidays_prior_scale)
future <- make_future_dataframe(m, periods = 184)
future$cap <- best_params$capacity
# Final plot
p <- ggplot()
p <- p + geom_point(data = train, aes(x = ds, y = y), size = 0.5)
p <- p + geom_line(data = forecast, aes(x = ds, y = yhat), color = "#0072B2")
p <- p + geom_ribbon(data = forecast, aes(x = ds, ymin = yhat_lower, ymax = yhat_upper), fill = "#0072B2", alpha = 0.3)
p <- p + geom_point(data = valid, aes(x = ds, y = y), size = 0.5, color = '#4daf4a')
p <- p + geom_point(data = test, aes(x = ds, y = y), size = 0.5, color = 'red')
p
library(rvest)
webpage <- read_html("https://www.nytimes.com/interactive/2017/06/23/opinion/trumps-lies.html")
webpage
#> {xml_document}
#> <html lang="en" class="no-js page-interactive section-opinion page-theme-standard tone-opinion page-interactive-default limit-small layout-xlarge app-interactive" itemid="https://www.nytimes.com/interactive/2017/06/23/opinion/trumps-lies.html" itemtype="http://schema.org/NewsArticle" itemscope="" xmlns:og="http://opengraphprotocol.org/schema/">
#> [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n<title>President Trump’s L ...
#> [2] <body>\n \n <style>\n .lt-ie10 .messenger.suggestions {\n display: block !important;\n ...
results <- webpage %>% html_nodes(".short-desc")
results
#> {xml_nodeset (116)}
#> [1] <span class="short-desc"><strong>Jan. 21 </strong>“I wasn't a fan of Iraq. I didn't want to go into Ir ...
#> [2] <span class="short-desc"><strong>Jan. 21 </strong>“A reporter for Time magazine — and I have been on t ...
#> [3] <span class="short-desc"><strong>Jan. 23 </strong>“Between 3 million and 5 million illegal votes cause ...
#> [4] <span class="short-desc"><strong>Jan. 25 </strong>“Now, the audience was the biggest ever. But this cr ...
#> [5] <span class="short-desc"><strong>Jan. 25 </strong>“Take a look at the Pew reports (which show voter fr ...
#> [6] <span class="short-desc"><strong>Jan. 25 </strong>“You had millions of people that now aren't insured ...
#> [7] <span class="short-desc"><strong>Jan. 25 </strong>“So, look, when President Obama was there two weeks ...
first_result <- results[1]
date <- first_result %>% html_nodes("strong") %>% html_text(trim = TRUE)
library(stringr)
str_c(date, ', 2017')
#> [1] "Jan. 21, 2017"
first_result <- results[1]
first_result %>% html_nodes("strong")
#> {xml_nodeset (1)}
#> [1] <strong>Jan. 21 </strong>