Created
February 25, 2022 04:24
-
-
Save ericpgreen/d2b8eb6e8f6d86a5bebb25d81e0bf9b5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Durham Public Schools publishes a COVID-19 "dashboard" | |
# https://docs.google.com/spreadsheets/d/1kQ35JKpk3vNaM7ZfFK2DFb1B9kQIQ4IODr7lOM394nQ/edit#gid=769268401 | |
# Code by @ericpgreen | |
# load | |
library(tidyverse) | |
library(googlesheets4) | |
gs4_deauth() | |
# import | |
raw <- read_sheet("https://docs.google.com/spreadsheets/d/1kQ35JKpk3vNaM7ZfFK2DFb1B9kQIQ4IODr7lOM394nQ/edit#gid=769268401") %>% | |
select(-c(3:5)) | |
# create vector of column names called new_names | |
start_date <- raw %>% | |
slice(2) %>% | |
unlist(use.names = FALSE) %>% | |
as_tibble() %>% | |
mutate(value = str_remove(value, "Positive COVID-19 Cases\n")) %>% | |
mutate(value = str_trim(value, "both")) %>% | |
slice(3:n()) %>% | |
mutate(value = str_replace(value, "\\,.*", "")) %>% | |
mutate(value = str_replace(value, "\\-.*", "")) | |
dates <- rep(start_date$value, each = 3) | |
labels <- rep(c("students", "staff", "clusters"), nrow(start_date)) | |
new_names <- c("code", "site_name", paste(dates, labels, sep = "_")) | |
# create helpers | |
school_levels <- c("Central Services Total", | |
"Elementary Total", | |
"Middle Total", | |
"High/Secondary Total", | |
"GRAND TOTAL") | |
elem <- c("320304", "320308", "320374", "320318", | |
"320319", "320363", "320313", "320310", | |
"320315", "320344", "320332", "320347", | |
"320320", "320324", "320328", "320327", | |
"320339", "320340", "320348", "320352", | |
"320354", "320360", "320362", "320364", | |
"320367", "320369", "320372", "320376", | |
"320388", "320400", "320289") | |
middle <- c("320306", "320316", "320338", "320342", | |
"320346", "320343", "320355", "320370", | |
"320366") | |
high <- c("320312", "320317", "320323", "320322", | |
"320309", "320325", "320701", "320341", | |
"320353", "320356", "320365", "320368", | |
"320314", "320401") | |
# tidy(ish) | |
dps <- raw %>% | |
setNames(new_names) %>% | |
slice(5:n()) %>% | |
mutate(across(everything(), as.character)) %>% | |
filter(!is.na(site_name)) %>% | |
filter(!(site_name %in% school_levels)) %>% | |
mutate(level = case_when( | |
code %in% "320LEA" ~ "Central Services", | |
code %in% elem ~ "Elementary", | |
code %in% middle ~ "Middle", | |
code %in% high ~ "High", | |
TRUE ~ "Hmmm, what are you?" | |
)) %>% | |
mutate(across(everything(), | |
~ case_when(. == "NULL" ~ "0", | |
. == "N/A" ~ NA_character_, | |
TRUE ~ .))) %>% | |
mutate(across(-c(code, site_name, level), | |
~ as.numeric(.))) %>% | |
pivot_longer(cols = -c(code, site_name, level), | |
names_to = c("start_date", "type"), | |
values_to = "value", | |
names_sep = "_") %>% | |
pivot_wider(id_cols = c(code, site_name, level, start_date), | |
names_from = type, | |
values_from = value) %>% | |
mutate(year = case_when( | |
grepl("August", start_date) ~ 2021, | |
grepl("September", start_date) ~ 2021, | |
grepl("October", start_date) ~ 2021, | |
grepl("November", start_date) ~ 2021, | |
grepl("December", start_date) ~ 2021, | |
TRUE ~ 2022), | |
start_date = paste(start_date, year), | |
start_date = lubridate::parse_date_time(start_date, | |
"%b d y"), | |
start_date = lubridate::date(start_date)) %>% | |
select(-year) | |
# plot | |
# median line | |
dps_median <- dps %>% | |
filter(level!="Central Services") %>% | |
group_by(level, start_date) %>% | |
summarize(students = median(students)) | |
# annotate n | |
dps_n <- dps %>% | |
filter(level!="Central Services") %>% | |
distinct(site_name, .keep_all = TRUE) %>% | |
count(level) %>% | |
mutate(date = lubridate::ymd("2021-08-01")) %>% | |
mutate(label = paste0(n, " schools")) | |
# annotate cases | |
dps_cases <- dps %>% | |
filter(level!="Central Services") %>% | |
group_by(level) %>% | |
summarize(total_students = sum(students)) %>% | |
mutate(date = lubridate::ymd("2021-08-01")) %>% | |
mutate(label = paste0(total_students, " total cases")) | |
# subtitle date min | |
date_min <- dps %>% | |
summarize(min = min(start_date)) %>% | |
mutate(month = lubridate::month(min, label = TRUE, abbr = FALSE), | |
day = lubridate::day(min), | |
year = lubridate::year(min), | |
date = paste0(month, " ", day, ", ", year)) %>% | |
pull(date) | |
# subtitle date max | |
date_max <- dps %>% | |
summarize(max = max(start_date)) %>% | |
mutate(month = lubridate::month(max, label = TRUE, abbr = FALSE), | |
day = lubridate::day(max), | |
year = lubridate::year(max), | |
date = paste0(month, " ", day, ", ", year)) %>% | |
pull(date) | |
dps %>% | |
filter(level!="Central Services") %>% | |
ggplot(aes(x=start_date, y=students)) + | |
geom_smooth(color="grey", aes(group=site_name), se = FALSE, | |
alpha = 0.3, size = .4) + | |
geom_smooth(data = dps_median, aes(x=start_date, y=students), se = FALSE, | |
alpha = 1, size = 1) + | |
geom_jitter(data = dps %>% filter(students > 10), | |
alpha = 0.3, shape = 21) + | |
geom_text(data = dps_n, aes(x=date, y = 50, label=label), | |
hjust=0) + | |
geom_text(data = dps_cases, aes(x=date, y = 43, label=label), | |
hjust=0) + | |
facet_wrap(~ level, ncol = 1) + | |
theme_bw() + | |
theme(panel.grid.major.x = element_blank(), | |
panel.grid.minor = element_blank(), | |
plot.caption = element_text(margin=margin(15,0,0,0)), | |
plot.title = element_text(face="bold")) + | |
labs(x=NULL, y=NULL, | |
title = str_wrap("Fewer than 10% of students at Durham Public Schools have tested positive for COVID-19 during Delta and Omicron waves", 60), | |
subtitle = paste0("Weekly reported cases ", | |
"(", date_min, "-", date_max, ")"), | |
caption = "~32,000 students attending school on-site. Median cases show in blue.\nData Source: Durham Public Schools, https://tinyurl.com/mr32wdzs\n@ericpgreen") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment