Skip to content

Instantly share code, notes, and snippets.

@MattSandy
Created March 12, 2020 16:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MattSandy/8c189ed3357b5be8f7a000513cad0408 to your computer and use it in GitHub Desktop.
Save MattSandy/8c189ed3357b5be8f7a000513cad0408 to your computer and use it in GitHub Desktop.
Plots a 7 day forecast for the Coronavirus
library(tidyverse)
library(lubridate)
library(ggthemes)
library(forecast)
library(xts)
library(timetk)
future <- 7
confirmed <- read_csv("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")
confirmed_A <- confirmed %>% select(`Province/State`:`3/9/20`)
confirmed_B <- confirmed %>% select(!`1/22/20`:`3/9/20`)
# Split data by Count Level Reporting Change ------------------------------
US <- list()
US$Confirmed_A <- confirmed_A %>%
filter(`Country/Region`=="US") %>%
select(-(`Province/State`:Long),`Country/Region`) %>%
summarise_if(is.numeric, sum)
US$Confirmed_B <- confirmed_B %>%
filter(`Country/Region`=="US" & !`Province/State` %in% state.name) %>%
select(-(`Province/State`:Long),`Country/Region`) %>%
summarise_if(is.numeric, sum)
# Bind into single dataframe ----------------------------------------------
US$Confirmed <- US$Confirmed_A %>% cbind(US$Confirmed_B)
# Future ------------------------------------------------------------------
US$melted <- US$Confirmed %>%
gather %>%
mutate(date = key %>% mdy) %>%
rename(counts = value)
US$xts <- xts(US$melted %>% select(counts),US$melted$date)
d.arima <- auto.arima(US$xts)
d.forecast <- forecast(d.arima, level = c(95), h = future)
# Build that model out ----------------------------------------------------
ts_future <- cbind(y = d.forecast$mean,
y.lo = d.forecast$lower,
y.hi = d.forecast$upper) %>%
xts(US$melted$date %>% tk_make_future_timeseries(future))
#> y y.lo y.hi
#> 2012-12-20 148 70.33991 225.6601
#> 2012-12-22 148 70.33991 225.6601
#> 2012-12-23 148 70.33991 225.6601
# Format original xts object
ts_final <- cbind(y = US$xts,
y.lo = NA,
y.hi = NA) %>% rbind(ts_future) %>% tk_tbl
# Plot forecast - Note ggplot uses data frames, tk_tbl() converts to df
ts_final$Status <- "Confirmed Case Count"
ts_final$Status[which(!is.na(ts_final$y.lo))] <- "Predicted Case Count"
ts_final %>%
ggplot(aes(x = index, y = counts, fill = Status)) +
geom_bar(stat="identity") +
geom_ribbon(aes(ymin = y.lo, ymax = y.hi), alpha = 0.6, fill = "red") +
theme_fivethirtyeight() +
scale_fill_manual(values = c("#666666","#B00000")) +
theme(axis.title = element_text()) +
scale_x_date(date_breaks = "7 days", date_labels = "%b %d") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Confirmed Cases of the Corona Virus in the United States",
subtitle = "ARIMA model used for future predictions",
caption = "\n@appupio") +
ylab("Number of Confirmed Cases\n") + xlab("\nDate")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment