Skip to content

Instantly share code, notes, and snippets.

@srvanderplas
Last active June 14, 2016 14:18
Show Gist options
  • Save srvanderplas/f9f43c9855117eebc7dddc466184abda to your computer and use it in GitHub Desktop.
Save srvanderplas/f9f43c9855117eebc7dddc466184abda to your computer and use it in GitHub Desktop.
Demonstrating that 100 days of weather can be used to predict reasonable weather in 3 days
# --- Packages -----------------------------------------------------------------
library(ggplot2) # Plots
library(magrittr) # Pipes
library(dplyr) # Split-Apply-Combine
library(tidyr) # Reshape data
library(stringr) # String manipulation
library(lubridate) # Dates and times
library(readr) # Read in csv/tsv/fwf data
library(readxl) # Read in excel data
# ------------------------------------------------------------------------------
# Load data: Weather around Minneapolis from 3/1/2016 - 6/11/2016
weather_data <- read_csv("../Downloads/756608.csv", na = "-9999") %>%
mutate(DATE = ymd(DATE),
ddate = decimal_date(DATE)) %>%
mutate(predict.data = DATE <= ymd("2016-06-01")) %>%
select(which(colSums(!is.na(.)) > 0)) %>%
select(DATE, ddate, predict.data, STATION, ELEVATION, LONGITUDE, LATITUDE, PRCP, TMAX, TMIN, TOBS)
# Summarize - average all stations in the area
weather_summary <- weather_data %>%
select(-starts_with("STATION")) %>%
group_by(DATE, ddate, predict.data) %>%
summarize_each(funs(mean(., na.rm = T)))
# Result: 103 observations of 9 variables
# Plot temperature data
qplot(x = DATE, y = TMAX, data = weather_summary) + ylab("Max Temperature")
qplot(x = DATE, y = TMIN, data = weather_summary) + ylab("Min Temperature")
# Create sinusoidal functions and verify they are appropriately periodic:
qplot(x = ddate, y = sin(ddate*2*pi), data = weather_summary)
qplot(x = ddate, y = cos(ddate*2*pi), data = weather_summary)
qplot(x = ddate, y = sin(ddate*pi), data = weather_summary)
qplot(x = ddate, y = cos(ddate*pi), data = weather_summary)
# Temperature model
max_temp_model <- lm(TMAX ~ sin(ddate*2*pi) + cos(ddate*2*pi) + sin(ddate*pi) + cos(ddate*pi), data = filter(weather_data, predict.data))
# Predict for summary data
weather_summary$TMAX.pred <- predict(max_temp_model, newdata = weather_summary)
qplot(x = DATE, y = TMAX, color = predict.data, data = weather_summary) + ylab("Max Temperature") +
geom_smooth(aes(x = DATE, y = TMAX.pred), inherit.aes = F) +
scale_color_manual("Data\nused\nto fit\nmodel", values = c("grey40", "black")) + xlab("Date") +
ggtitle("Maximum Temperatures: Minneapolis")
# Temperature model
min_temp_model <- lm(TMIN ~ sin(ddate*2*pi) + cos(ddate*2*pi) + sin(ddate*pi) + cos(ddate*pi), data = filter(weather_data, predict.data))
# Predict for summary data
weather_summary$TMIN.pred <- predict(min_temp_model, newdata = weather_summary)
qplot(x = DATE, y = TMIN, color = predict.data, data = weather_summary) + ylab("Min Temperature") +
geom_smooth(aes(x = DATE, y = TMIN.pred), inherit.aes = F) +
scale_color_manual("Data\nused\nto fit\nmodel", values = c("grey40", "black")) + xlab("Date") +
ggtitle("Minimum Temperatures: Minneapolis")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment