Skip to content

Instantly share code, notes, and snippets.

@johnjdavisiv
Last active March 25, 2020 23:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnjdavisiv/d14aaf11315949517545c6f7b6efa30f to your computer and use it in GitHub Desktop.
Save johnjdavisiv/d14aaf11315949517545c6f7b6efa30f to your computer and use it in GitHub Desktop.
Plot the total number of COVID-19 cases for each US state
library(tidyverse)
library(ggplot2)
library(shadowtext)
#John J Davis IV
#Biomechanics PhD Student
#Indiana University School of Public Health
#Twitter: @JDRuns
#Email: jjd1@iu.edu
#25 March 2020
#NOTE: This still breaks Lat/Long for some instances.
#It's an easy fix with an inner_join()
#This code is public domain
#Adapted in part from code by John Burn-Murdoch
#Read data directly from Johns Hopkins GitHub
covid_df <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")
#Gather the state-wide data, ignoring cruise ships
covid_state_data <- covid_df %>% filter(`Country/Region` == "US") %>%
filter(!grepl(".,.", `Province/State`)) %>%
filter(!`Province/State` %in% c("Diamond Princess",
"Grand Princess",
"US", "United States Virgin Islands")) %>%
rename(state = `Province/State`) %>%
pivot_longer(-(1:4), names_to = "date", values_to = "cases")
#Gather county-wide data (collected up until 3/9) and pool with state-data
covid_us <- covid_df %>% filter(`Country/Region` == "US") %>%
filter(grepl(paste(".",
paste(state.abb, collapse="|"),
".", sep = ""),
`Province/State`)) %>%
separate(`Province/State`, into = c("city", "state"), sep = ",\\s") %>%
group_by(state) %>% summarise_if(is.numeric, sum) %>%
mutate(`Country/Region` = "US") %>%
select(state, `Country/Region`, everything()) %>%
mutate(state = state.name[match(state, state.abb)]) %>%
pivot_longer(-(1:4), names_to = "date", values_to = "cases") %>%
bind_rows(covid_state_data) %>%
mutate(date = as.Date(date, "%m/%d/%y")) %>%
arrange(state, date)
#Plot total cases as a function of time, on log scale.
display_day <- format(Sys.Date() + 7, format="%m/%d/%y")
state_terr_abb <- c(state.abb, "DC", "VI", "GU", "PR")
state_terr_name <- c(state.name, "District of Columbia", "Virgin Islands", "Guam", "Puerto Rico")
covid_us %>% filter(cases > 10) %>%
ggplot(aes(x=date, y=cases, color = state)) +
geom_line(lwd=1) +
geom_point(size=2, pch = 21, fill="white", alpha=0.8) +
geom_shadowtext(data = . %>%
filter (cases > 1) %>%
top_n(1, date),
aes(label = state_terr_abb[match(state, state_terr_name)]),
bg.color = "white",
size=5,
position = position_jitter(height = 0.05, width = 0.8)
) +
scale_y_log10(limits = c(10, 50000), breaks = c(10,100,1000,10000,50000)) +
scale_x_date(limits = as.Date(c("2/25/2020", display_day), "%m/%d/%y"),
date_breaks = "1 month",
date_labels = "%b %d"
) +
labs(x = "Date", subtitle = "Total cases (after 10th case) by US state") +
theme(legend.position = "none",
text = element_text(size=16),
strip.text = element_text(size=12))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment