Last active
March 25, 2020 23:42
-
-
Save johnjdavisiv/d14aaf11315949517545c6f7b6efa30f to your computer and use it in GitHub Desktop.
Plot the total number of COVID-19 cases for each US state
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(ggplot2) | |
library(shadowtext) | |
#John J Davis IV | |
#Biomechanics PhD Student | |
#Indiana University School of Public Health | |
#Twitter: @JDRuns | |
#Email: jjd1@iu.edu | |
#25 March 2020 | |
#NOTE: This still breaks Lat/Long for some instances. | |
#It's an easy fix with an inner_join() | |
#This code is public domain | |
#Adapted in part from code by John Burn-Murdoch | |
#Read data directly from Johns Hopkins GitHub | |
covid_df <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv") | |
#Gather the state-wide data, ignoring cruise ships | |
covid_state_data <- covid_df %>% filter(`Country/Region` == "US") %>% | |
filter(!grepl(".,.", `Province/State`)) %>% | |
filter(!`Province/State` %in% c("Diamond Princess", | |
"Grand Princess", | |
"US", "United States Virgin Islands")) %>% | |
rename(state = `Province/State`) %>% | |
pivot_longer(-(1:4), names_to = "date", values_to = "cases") | |
#Gather county-wide data (collected up until 3/9) and pool with state-data | |
covid_us <- covid_df %>% filter(`Country/Region` == "US") %>% | |
filter(grepl(paste(".", | |
paste(state.abb, collapse="|"), | |
".", sep = ""), | |
`Province/State`)) %>% | |
separate(`Province/State`, into = c("city", "state"), sep = ",\\s") %>% | |
group_by(state) %>% summarise_if(is.numeric, sum) %>% | |
mutate(`Country/Region` = "US") %>% | |
select(state, `Country/Region`, everything()) %>% | |
mutate(state = state.name[match(state, state.abb)]) %>% | |
pivot_longer(-(1:4), names_to = "date", values_to = "cases") %>% | |
bind_rows(covid_state_data) %>% | |
mutate(date = as.Date(date, "%m/%d/%y")) %>% | |
arrange(state, date) | |
#Plot total cases as a function of time, on log scale. | |
display_day <- format(Sys.Date() + 7, format="%m/%d/%y") | |
state_terr_abb <- c(state.abb, "DC", "VI", "GU", "PR") | |
state_terr_name <- c(state.name, "District of Columbia", "Virgin Islands", "Guam", "Puerto Rico") | |
covid_us %>% filter(cases > 10) %>% | |
ggplot(aes(x=date, y=cases, color = state)) + | |
geom_line(lwd=1) + | |
geom_point(size=2, pch = 21, fill="white", alpha=0.8) + | |
geom_shadowtext(data = . %>% | |
filter (cases > 1) %>% | |
top_n(1, date), | |
aes(label = state_terr_abb[match(state, state_terr_name)]), | |
bg.color = "white", | |
size=5, | |
position = position_jitter(height = 0.05, width = 0.8) | |
) + | |
scale_y_log10(limits = c(10, 50000), breaks = c(10,100,1000,10000,50000)) + | |
scale_x_date(limits = as.Date(c("2/25/2020", display_day), "%m/%d/%y"), | |
date_breaks = "1 month", | |
date_labels = "%b %d" | |
) + | |
labs(x = "Date", subtitle = "Total cases (after 10th case) by US state") + | |
theme(legend.position = "none", | |
text = element_text(size=16), | |
strip.text = element_text(size=12)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment