Created
September 17, 2017 21:14
-
-
Save ehbick01/3c619b3b6ae3bc14f604cc8653dee8b4 to your computer and use it in GitHub Desktop.
Pulling NOAA weather station info using rvest
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ----------------------------------- | |
# Pulling Historical Buoy Information | |
# October 2016 | |
# ----------------------------------- | |
## Load Packages | |
library(tidyverse) | |
library(xml2) | |
library(rvest) | |
library(RCurl) | |
library(lubridate) | |
library(ggthemes) | |
library(ggplot2) | |
library(extrafont) | |
## Load Theme | |
theme_set( | |
theme_bw(base_family = 'Trebuchet MS', base_size = 12) + | |
theme( | |
plot.title = element_text(face = 'bold', hjust = 0), | |
text = element_text(colour = 'black'), | |
panel.background = element_rect('white'), | |
strip.background = element_rect('#f0f2f3', colour = 'white'), | |
plot.background = element_rect('white'), | |
panel.border = element_rect(colour = 'white'), | |
panel.grid.major.x = element_blank(), | |
panel.grid.major.y = element_blank(), | |
panel.grid.minor.y = element_blank(), | |
legend.background = element_rect('white'), | |
legend.title = element_blank(), | |
legend.position = 'right', | |
legend.direction = 'vertical', | |
legend.key = element_blank(), | |
strip.text = element_text(face = 'bold', size = 10), | |
axis.text.y = element_text(face = 'bold', size = 9), | |
axis.text.x = element_text(face = 'bold', size = 9), | |
axis.title = element_blank(), | |
axis.ticks = element_blank() | |
) | |
) | |
## Load Data | |
# -- Direction data is measured in degrees from true north | |
# Define sequence of stations | |
# Define URL | |
url <- "http://www.ndbc.noaa.gov/to_station.shtml" | |
url <- getURL(url) | |
url <- read_html(url) | |
# Pull property hyperlinks only | |
links <- url %>% | |
html_nodes('a') %>% | |
html_attr('href') | |
# Convert to dataframe and remove all non-station ID numbers | |
links <- as.data.frame(links) | |
links <- links %>% | |
filter(grepl('station_page', links)) | |
# Subset down to the 365 within the National Data Buoy Center Stations | |
links <- data.frame('links' = links[c(1:365),]) | |
# Split to isolate station names | |
links <- strsplit(as.character(links$links), '\\=') | |
links <- data.frame(do.call('rbind', links)) | |
names(links) <- c('station.page', 'station.id') | |
# Convert station id's to list | |
links.list <- as.list(links$station.id) | |
# Pull Data | |
# Loop through every bouy and year | |
station.list <- lapply(links.list, function(x) { | |
return(tryCatch( | |
read.table( | |
paste0('http://www.ndbc.noaa.gov/data/realtime2/', x, '.txt'), | |
header = FALSE, | |
stringsAsFactors = FALSE | |
), | |
error = function(e) | |
NULL | |
)) | |
}) | |
# Remove NULL station data | |
station.list <- station.list[!sapply(station.list, is.null)] | |
# Convert to dataframe and rename columns | |
stations.df <- do.call('rbind', station.list) | |
names(stations.df) <- c( | |
'year', 'month', | |
'day', 'hr', | |
'min', 'wind.direction.degT', | |
'wind.speed.mps', 'gust.speed.mps', | |
'wave.height.m', 'dom.wave.per.sec', | |
'avg.wave.per.sec', 'mean.wave.dir.degT', | |
'pressure.hpa', 'air.temp.degC', | |
'water.temp.degC', 'dewpoint.temp.degC', | |
'visibility.mi', 'ptdy', 'tide.ft' | |
) | |
## Data Manipulation | |
# Convert values to numeric | |
stations.df <- sapply(stations.df, as.numeric) | |
stations.df <- as.data.frame(stations.df) | |
# Create YMD time series based on 'year', 'month', 'day' columns | |
stations.df$date <- | |
paste0( | |
as.numeric(stations.df$year), '-', as.numeric(stations.df$month), '-', as.numeric(stations.df$day) | |
) | |
stations.df$date <- ymd(as.Date(stations.df$date)) | |
## Visualization | |
# Plot boxplot of various metrics | |
stations.df %>% | |
filter(date >= '2016-9-01') %>% | |
ggplot(aes(x = date, y = gust.speed.mps, group = date)) + | |
geom_boxplot(colour = '#f0f2f3', outlier.colour = '#fd7d47') + | |
labs(title = 'Gust Speed Peaks Across All NOAA Weather Station Buoys', | |
subtitle = 'Gusts are measured in meters per second. The average speedis pretty consistent, \nbut gusts of 20+ m/s (45+ mph) have been more common with Hurricane Matthew') | |
stations.df %>% | |
filter(date >= '2016-9-01') %>% | |
ggplot(aes(x = date, y = wave.height.m, group = date)) + | |
geom_boxplot(colour = '#f0f2f3', outlier.colour = '#fd7d47') + | |
labs(title = 'Wave Heights Across All NOAA Weather Station Buoys', | |
subtitle = 'Wave heights (measured in meters) have pushed significantly higher with Michael, \nwith peaks seen above 10 meters (32 feet).') | |
# Plot wind speed by dat | |
stations.days <- stations.df %>% | |
group_by(date) %>% | |
summarise( | |
'avg.speed' = mean(wind.speed.mps, na.rm = TRUE), | |
'max.speed' = max(wind.speed.mps, na.rm = TRUE), | |
'avg.gust.speed' = mean(gust.speed.mps, na.rm = TRUE), | |
'max.gust.speed' = max(gust.speed.mps, na.rm = TRUE), | |
'avg.wave.height' = mean(wave.height.m, na.rm = TRUE), | |
'max.wave.height' = max(wave.height.m, na.rm = TRUE), | |
'avg.pressure' = mean(pressure.hpa, na.rm = TRUE), | |
'max.pressure' = max(pressure.hpa, na.rm = TRUE), | |
'min.pressure' = min(pressure.hpa, na.rm = TRUE) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment