Skip to content

Instantly share code, notes, and snippets.

@ehbick01
Created September 17, 2017 21:14
Show Gist options
  • Save ehbick01/3c619b3b6ae3bc14f604cc8653dee8b4 to your computer and use it in GitHub Desktop.
Save ehbick01/3c619b3b6ae3bc14f604cc8653dee8b4 to your computer and use it in GitHub Desktop.
Pulling NOAA weather station info using rvest
# -----------------------------------
# Pulling Historical Buoy Information
# October 2016
# -----------------------------------
## Load Packages
library(tidyverse)
library(xml2)
library(rvest)
library(RCurl)
library(lubridate)
library(ggthemes)
library(ggplot2)
library(extrafont)
## Load Theme
theme_set(
theme_bw(base_family = 'Trebuchet MS', base_size = 12) +
theme(
plot.title = element_text(face = 'bold', hjust = 0),
text = element_text(colour = 'black'),
panel.background = element_rect('white'),
strip.background = element_rect('#f0f2f3', colour = 'white'),
plot.background = element_rect('white'),
panel.border = element_rect(colour = 'white'),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
legend.background = element_rect('white'),
legend.title = element_blank(),
legend.position = 'right',
legend.direction = 'vertical',
legend.key = element_blank(),
strip.text = element_text(face = 'bold', size = 10),
axis.text.y = element_text(face = 'bold', size = 9),
axis.text.x = element_text(face = 'bold', size = 9),
axis.title = element_blank(),
axis.ticks = element_blank()
)
)
## Load Data
# -- Direction data is measured in degrees from true north
# Define sequence of stations
# Define URL
url <- "http://www.ndbc.noaa.gov/to_station.shtml"
url <- getURL(url)
url <- read_html(url)
# Pull property hyperlinks only
links <- url %>%
html_nodes('a') %>%
html_attr('href')
# Convert to dataframe and remove all non-station ID numbers
links <- as.data.frame(links)
links <- links %>%
filter(grepl('station_page', links))
# Subset down to the 365 within the National Data Buoy Center Stations
links <- data.frame('links' = links[c(1:365),])
# Split to isolate station names
links <- strsplit(as.character(links$links), '\\=')
links <- data.frame(do.call('rbind', links))
names(links) <- c('station.page', 'station.id')
# Convert station id's to list
links.list <- as.list(links$station.id)
# Pull Data
# Loop through every bouy and year
station.list <- lapply(links.list, function(x) {
return(tryCatch(
read.table(
paste0('http://www.ndbc.noaa.gov/data/realtime2/', x, '.txt'),
header = FALSE,
stringsAsFactors = FALSE
),
error = function(e)
NULL
))
})
# Remove NULL station data
station.list <- station.list[!sapply(station.list, is.null)]
# Convert to dataframe and rename columns
stations.df <- do.call('rbind', station.list)
names(stations.df) <- c(
'year', 'month',
'day', 'hr',
'min', 'wind.direction.degT',
'wind.speed.mps', 'gust.speed.mps',
'wave.height.m', 'dom.wave.per.sec',
'avg.wave.per.sec', 'mean.wave.dir.degT',
'pressure.hpa', 'air.temp.degC',
'water.temp.degC', 'dewpoint.temp.degC',
'visibility.mi', 'ptdy', 'tide.ft'
)
## Data Manipulation
# Convert values to numeric
stations.df <- sapply(stations.df, as.numeric)
stations.df <- as.data.frame(stations.df)
# Create YMD time series based on 'year', 'month', 'day' columns
stations.df$date <-
paste0(
as.numeric(stations.df$year), '-', as.numeric(stations.df$month), '-', as.numeric(stations.df$day)
)
stations.df$date <- ymd(as.Date(stations.df$date))
## Visualization
# Plot boxplot of various metrics
stations.df %>%
filter(date >= '2016-9-01') %>%
ggplot(aes(x = date, y = gust.speed.mps, group = date)) +
geom_boxplot(colour = '#f0f2f3', outlier.colour = '#fd7d47') +
labs(title = 'Gust Speed Peaks Across All NOAA Weather Station Buoys',
subtitle = 'Gusts are measured in meters per second. The average speedis pretty consistent, \nbut gusts of 20+ m/s (45+ mph) have been more common with Hurricane Matthew')
stations.df %>%
filter(date >= '2016-9-01') %>%
ggplot(aes(x = date, y = wave.height.m, group = date)) +
geom_boxplot(colour = '#f0f2f3', outlier.colour = '#fd7d47') +
labs(title = 'Wave Heights Across All NOAA Weather Station Buoys',
subtitle = 'Wave heights (measured in meters) have pushed significantly higher with Michael, \nwith peaks seen above 10 meters (32 feet).')
# Plot wind speed by dat
stations.days <- stations.df %>%
group_by(date) %>%
summarise(
'avg.speed' = mean(wind.speed.mps, na.rm = TRUE),
'max.speed' = max(wind.speed.mps, na.rm = TRUE),
'avg.gust.speed' = mean(gust.speed.mps, na.rm = TRUE),
'max.gust.speed' = max(gust.speed.mps, na.rm = TRUE),
'avg.wave.height' = mean(wave.height.m, na.rm = TRUE),
'max.wave.height' = max(wave.height.m, na.rm = TRUE),
'avg.pressure' = mean(pressure.hpa, na.rm = TRUE),
'max.pressure' = max(pressure.hpa, na.rm = TRUE),
'min.pressure' = min(pressure.hpa, na.rm = TRUE)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment