Skip to content

Instantly share code, notes, and snippets.

@EmilHvitfeldt
Created January 2, 2018 02:03
Show Gist options
  • Save EmilHvitfeldt/53e5e33a0ebc5e084dcbcdefacb8ed9a to your computer and use it in GitHub Desktop.
Save EmilHvitfeldt/53e5e33a0ebc5e084dcbcdefacb8ed9a to your computer and use it in GitHub Desktop.
library(rvest)
library(tidyverse)
library(lubridate)
library(glue)
#library(ehlib) # devtools::install_github("EmilHvitfeldt/ehlib")
str_between <- function(string, start, end) {
stringr::str_extract(string,
stringr::str_c(start, '(.*?)', end, collapse = '')) %>%
stringr::str_replace(start, "") %>%
stringr::str_replace(end, "")
}
str_before <- function(string, pattern) {
stringr::str_extract(string, stringr::str_c(".+?(?=", pattern, ")"))
}
all_dates_in <- function(year) {
if(ymd(glue::glue("{year}0101")) > as.Date(Sys.time())) {
stop("Please select a past or current year.")
}
start <- ymd(glue::glue("{year}0101"))
if(as.Date(Sys.time()) > ymd(glue::glue("{year}1231"))) {
end <- ymd(glue::glue("{year}1231"))
} else {
end <- as.Date(Sys.time())
}
seq(start, end, by = "day")
}
airport_code <- "KCQT"
full_data <- map(dates, ~{
weather_data_html(.x, airport_code)
Sys.sleep(5)
cat(month(dates), "/", mday(dates), "\n", sep = "")
})
map_lgl(full_data, ~ is.null(.x$error))
ampm_minutes <- function(x) {
as.numeric(str_between(x, ":", " ")) +
as.numeric(str_replace(str_before(x, ":"), "12", "0")) * 60 +
60 * 12 * str_detect(x, "PM")
}
data_wrangling <- function(html_url, date) {
# Sun rise time
sun_rise <- html_url %>%
html_nodes('div[id="astronomy-mod"] table') %>%
html_text() %>%
.[1] %>%
str_between("Time\n\t\t", "\n\t\t")
# Sun set time
sun_set <- html_url %>%
html_nodes('div[id="astronomy-mod"] table') %>%
html_text() %>%
.[1] %>%
str_between("\n\t\t", "\n\t\tCivil")
# Table
table <- html_url %>%
html_nodes('table[id="obsTable"]') %>%
html_table() %>%
.[[1]]
# Time column standardization
is_daylight <- any("Time (PDT)" == names(table),
"Time (MDT)" == names(table),
"Time (CDT)" == names(table),
"Time (EDT)" == names(table))
time_names <- str_c("Time", c(" (PDT)", " (MDT)", " (CDT)", " (EDT)",
" (PST)", " (MST)", " (CST)", " (EST)"))
names(table) <- if_else(names(table) %in% time_names,
"Time",
names(table))
table %>%
mutate(sun_set = sun_set,
sun_rise = sun_rise,
date = date,
yday = yday(date),
day_minutes = ampm_minutes(Time) - is_daylight * 60,
set_minutes = ampm_minutes(sun_set) - is_daylight * 60,
rise_minutes = ampm_minutes(sun_rise) - is_daylight * 60,
sun_up = day_minutes > (rise_minutes + 90) &
day_minutes < (set_minutes - 30))
}
full_data2 <- map2_df(full_data[-303], dates[-303], ~ .x$result %>%
data_wrangling(.y))
x_axis <- dates %>% month() %>% table() %>% cumsum()
names(x_axis) <- month.abb[1:12]
y_axis <- 1:24 * 60
names(y_axis) <- str_c(c(12, rep(1:12, 2, length.out = 23)),
rep(c("AM", "PM"), each = 12))
full_data2 %>%
mutate(con = Conditions == "Clear",
day_minutes2 = lag(day_minutes)) %>%
filter(sun_up) %>%
ggplot(aes(fill = con)) +
geom_rect(aes(xmin = yday, xmax = yday + 1,
ymin = day_minutes, ymax = day_minutes2)) +
geom_line(aes(yday, set_minutes)) +
geom_line(aes(yday, rise_minutes)) +
scale_fill_manual(values = c("grey40", NA)) +
theme_minimal() +
guides(fill = "none") +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
axis.text.x.bottom = element_text(hjust = 1.7)
) +
scale_x_continuous(breaks = x_axis, position = "right") +
scale_y_continuous(breaks = y_axis, limits = c(0, 24 * 60)) +
labs(x = NULL, y = NULL, title = "Sunshine report of Los Angeles 2017")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment