Skip to content

Instantly share code, notes, and snippets.

@ericpgreen
Created February 25, 2022 04:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ericpgreen/d2b8eb6e8f6d86a5bebb25d81e0bf9b5 to your computer and use it in GitHub Desktop.
Save ericpgreen/d2b8eb6e8f6d86a5bebb25d81e0bf9b5 to your computer and use it in GitHub Desktop.
# Durham Public Schools publishes a COVID-19 "dashboard"
# https://docs.google.com/spreadsheets/d/1kQ35JKpk3vNaM7ZfFK2DFb1B9kQIQ4IODr7lOM394nQ/edit#gid=769268401
# Code by @ericpgreen
# load
library(tidyverse)
library(googlesheets4)
gs4_deauth()
# import
raw <- read_sheet("https://docs.google.com/spreadsheets/d/1kQ35JKpk3vNaM7ZfFK2DFb1B9kQIQ4IODr7lOM394nQ/edit#gid=769268401") %>%
select(-c(3:5))
# create vector of column names called new_names
start_date <- raw %>%
slice(2) %>%
unlist(use.names = FALSE) %>%
as_tibble() %>%
mutate(value = str_remove(value, "Positive COVID-19 Cases\n")) %>%
mutate(value = str_trim(value, "both")) %>%
slice(3:n()) %>%
mutate(value = str_replace(value, "\\,.*", "")) %>%
mutate(value = str_replace(value, "\\-.*", ""))
dates <- rep(start_date$value, each = 3)
labels <- rep(c("students", "staff", "clusters"), nrow(start_date))
new_names <- c("code", "site_name", paste(dates, labels, sep = "_"))
# create helpers
school_levels <- c("Central Services Total",
"Elementary Total",
"Middle Total",
"High/Secondary Total",
"GRAND TOTAL")
elem <- c("320304", "320308", "320374", "320318",
"320319", "320363", "320313", "320310",
"320315", "320344", "320332", "320347",
"320320", "320324", "320328", "320327",
"320339", "320340", "320348", "320352",
"320354", "320360", "320362", "320364",
"320367", "320369", "320372", "320376",
"320388", "320400", "320289")
middle <- c("320306", "320316", "320338", "320342",
"320346", "320343", "320355", "320370",
"320366")
high <- c("320312", "320317", "320323", "320322",
"320309", "320325", "320701", "320341",
"320353", "320356", "320365", "320368",
"320314", "320401")
# tidy(ish)
dps <- raw %>%
setNames(new_names) %>%
slice(5:n()) %>%
mutate(across(everything(), as.character)) %>%
filter(!is.na(site_name)) %>%
filter(!(site_name %in% school_levels)) %>%
mutate(level = case_when(
code %in% "320LEA" ~ "Central Services",
code %in% elem ~ "Elementary",
code %in% middle ~ "Middle",
code %in% high ~ "High",
TRUE ~ "Hmmm, what are you?"
)) %>%
mutate(across(everything(),
~ case_when(. == "NULL" ~ "0",
. == "N/A" ~ NA_character_,
TRUE ~ .))) %>%
mutate(across(-c(code, site_name, level),
~ as.numeric(.))) %>%
pivot_longer(cols = -c(code, site_name, level),
names_to = c("start_date", "type"),
values_to = "value",
names_sep = "_") %>%
pivot_wider(id_cols = c(code, site_name, level, start_date),
names_from = type,
values_from = value) %>%
mutate(year = case_when(
grepl("August", start_date) ~ 2021,
grepl("September", start_date) ~ 2021,
grepl("October", start_date) ~ 2021,
grepl("November", start_date) ~ 2021,
grepl("December", start_date) ~ 2021,
TRUE ~ 2022),
start_date = paste(start_date, year),
start_date = lubridate::parse_date_time(start_date,
"%b d y"),
start_date = lubridate::date(start_date)) %>%
select(-year)
# plot
# median line
dps_median <- dps %>%
filter(level!="Central Services") %>%
group_by(level, start_date) %>%
summarize(students = median(students))
# annotate n
dps_n <- dps %>%
filter(level!="Central Services") %>%
distinct(site_name, .keep_all = TRUE) %>%
count(level) %>%
mutate(date = lubridate::ymd("2021-08-01")) %>%
mutate(label = paste0(n, " schools"))
# annotate cases
dps_cases <- dps %>%
filter(level!="Central Services") %>%
group_by(level) %>%
summarize(total_students = sum(students)) %>%
mutate(date = lubridate::ymd("2021-08-01")) %>%
mutate(label = paste0(total_students, " total cases"))
# subtitle date min
date_min <- dps %>%
summarize(min = min(start_date)) %>%
mutate(month = lubridate::month(min, label = TRUE, abbr = FALSE),
day = lubridate::day(min),
year = lubridate::year(min),
date = paste0(month, " ", day, ", ", year)) %>%
pull(date)
# subtitle date max
date_max <- dps %>%
summarize(max = max(start_date)) %>%
mutate(month = lubridate::month(max, label = TRUE, abbr = FALSE),
day = lubridate::day(max),
year = lubridate::year(max),
date = paste0(month, " ", day, ", ", year)) %>%
pull(date)
dps %>%
filter(level!="Central Services") %>%
ggplot(aes(x=start_date, y=students)) +
geom_smooth(color="grey", aes(group=site_name), se = FALSE,
alpha = 0.3, size = .4) +
geom_smooth(data = dps_median, aes(x=start_date, y=students), se = FALSE,
alpha = 1, size = 1) +
geom_jitter(data = dps %>% filter(students > 10),
alpha = 0.3, shape = 21) +
geom_text(data = dps_n, aes(x=date, y = 50, label=label),
hjust=0) +
geom_text(data = dps_cases, aes(x=date, y = 43, label=label),
hjust=0) +
facet_wrap(~ level, ncol = 1) +
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
plot.caption = element_text(margin=margin(15,0,0,0)),
plot.title = element_text(face="bold")) +
labs(x=NULL, y=NULL,
title = str_wrap("Fewer than 10% of students at Durham Public Schools have tested positive for COVID-19 during Delta and Omicron waves", 60),
subtitle = paste0("Weekly reported cases ",
"(", date_min, "-", date_max, ")"),
caption = "~32,000 students attending school on-site. Median cases show in blue.\nData Source: Durham Public Schools, https://tinyurl.com/mr32wdzs\n@ericpgreen")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment