Skip to content

Instantly share code, notes, and snippets.

@MattSandy
Created March 17, 2020 01:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MattSandy/6fa5c0c689e01b18bfab50bd5324e5b6 to your computer and use it in GitHub Desktop.
Save MattSandy/6fa5c0c689e01b18bfab50bd5324e5b6 to your computer and use it in GitHub Desktop.
Plot confirmed COVID-19 cases in SK
library(tidyverse)
library(lubridate)
library(ggthemes)
library(forecast)
library(xts)
library(timetk)
future <- 7
confirmed <- read_csv("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")
South_Korea <- list()
# Bind into single dataframe ----------------------------------------------
South_Korea$Confirmed <- confirmed %>%
filter(`Country/Region`=="Korea, South") %>%
select(-(`Province/State`:Long),`Country/Region`) %>%
summarise_if(is.numeric, sum)
# Future ------------------------------------------------------------------
South_Korea$melted <- South_Korea$Confirmed %>%
gather %>%
mutate(date = key %>% mdy) %>%
rename(counts = value)
South_Korea$xts <- xts(South_Korea$melted %>% select(counts),South_Korea$melted$date)
d.arima <- auto.arima(South_Korea$xts)
d.forecast <- forecast(d.arima, level = c(95), h = future)
# Build that model out ----------------------------------------------------
ts_future <- cbind(y = d.forecast$mean,
y.lo = d.forecast$lower,
y.hi = d.forecast$upper) %>%
xts(South_Korea$melted$date %>% tk_make_future_timeseries(future))
# Format original xts object
ts_final <- cbind(y = South_Korea$xts,
y.lo = NA,
y.hi = NA) %>% rbind(ts_future) %>%
tk_tbl %>% mutate(counts = round(counts))
# Plot forecast - Note ggplot South_Korea data frames, tk_tbl() converts to df
ts_final$StatSouth_Korea <- "Confirmed Case Count"
ts_final$StatSouth_Korea[which(!is.na(ts_final$y.lo))] <- "Predicted Case Count"
# +(x)% -------------------------------------------------------------------
ts_final$diff <- 1:nrow(ts_final) %>% sapply(function(index){
if(index==1) { return(0) }
(((ts_final$counts[index] - ts_final$counts[index-1]) / ts_final$counts[index-1]) * 100) %>%
round(digits = 1) %>% return
})
ts_final$label <- paste0(scales::comma(ts_final$counts,big.mark=","), '\n(+',ts_final$diff,'%)')
ts_final$label[-seq(1,length(ts_final$label),3)] <- NA
ts_final %>% filter(index >= ymd("2020-02-01")) %>%
ggplot(aes(x = index, y = counts, fill = StatSouth_Korea, label = label)) +
geom_bar(stat="identity") +
geom_label(color = "#ffffff", nudge_y = 500) +
theme_fivethirtyeight() +
#theme_blue() +
scale_fill_manual(values = c("#E16036","#D6A99A")) +
theme(axis.title = element_text()) +
scale_y_continuous(expand = c(0, 0),limits = c(0,max(ts_final$counts)*1.2)) +
scale_x_date(date_breaks = "1 days", date_labels = "%b %d") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.direction = "horizontal",
legend.title = element_blank()) +
labs(title = "Confirmed Cases of COVID-19 in South Korea",
subtitle = "ARIMA model South Korea for future predictions. Percentages are based on increase from previous day.",
caption = "\n@appupio") +
ylab("Number of Confirmed Cases\n") + xlab("\n\nDate")
ggsave("last.png",width = 16,height = 8,units = "in")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment