Last active
February 14, 2022 19:00
-
-
Save ctesta01/0b4952ebf456e777eee47e8fb1a00138 to your computer and use it in GitHub Desktop.
Plot the weekday variation in reported COVID-19 deaths in the United States
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(ggdist) | |
library(tidyverse) | |
library(magrittr) | |
library(cowplot) | |
library(ISOweek) | |
df <- readr::read_csv("https://github.com/nytimes/covid-19-data/raw/master/rolling-averages/us.csv") | |
df %<>% mutate(wday = lubridate::wday(lubridate::ymd(date))) | |
weekdays <- c('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday') | |
# write a function to help us render similar plot layouts for different variables | |
make_plot <- function(y_variable, y_title) { | |
df %>% | |
filter(date >= lubridate::ymd('2020-03-01')) %>% | |
ggplot(aes(x = factor(wday, labels = weekdays), y = {{ y_variable }}, fill = factor(wday))) + | |
stat_dots(side = 'right', color = NA) + | |
stat_halfeye( | |
aes( | |
fill = factor(wday), | |
fill_ramp = stat(cut_cdf_qi( | |
cdf, | |
.width = c(.5, .8, .95), | |
labels = scales::percent_format() | |
)) | |
), | |
alpha = 0.6, | |
interval_color = NA, | |
position = "dodgejust", | |
side = 'left' | |
)+ | |
scale_fill_viridis_d(end = 0.8) + | |
scale_fill_ramp_discrete(range = c(1, 0.2), na.translate = FALSE) + | |
labs( | |
x = "Day of the Week", | |
y = y_title | |
) + | |
guides(fill = 'none', fill_ramp = 'none') + | |
labs( | |
title = "Day of Week Effect on Daily COVID-19 Deaths in the United States", | |
subtitle = "Colored dots show the distribution, filled areas show 50%, 80%, and 95% intervals, black dots show weekday averages", | |
caption = stringr::str_c( | |
"@_christiantesta\n", | |
"data period: 2020-03-01 through ", max(df$date), "\n", | |
"https://github.com/nytimes/covid-19-data/blob/master/rolling-averages/us.csv" | |
) | |
) + | |
cowplot::theme_cowplot() + | |
scale_y_continuous(labels = scales::comma_format(), breaks = c(0, 1e3, 2e3, 3e3, 4e3, 5e3)) + | |
theme(axis.text.x = element_text(angle = 75, hjust = 1)) | |
} | |
# plot daily number of deaths by weekday | |
plt <- make_plot(deaths, "Daily Number of Reported Deaths") | |
ggsave(plot = plt, "weekday_effect_on_COVID_deaths.png", width = 10, height = 8) | |
# plot daily number of deaths by weekday on log scale | |
plt <- plt + | |
scale_y_continuous(trans = 'log', labels = scales::comma_format(), breaks = c(1, 10, 1e2, 1e3, 2e3, 3e3, 4e3, 5e3)) + | |
labs(y = "Daily Number of Reported Deaths, Log Scale") | |
ggsave(plot = plt, "weekday_effect_on_COVID_deaths_logscale.png", width = 10, height = 8) | |
# calculate comparisons of daily death counts to each weekly average – | |
# note that this is a weekly average for each calendar week, not a 7-day moving average | |
df %<>% mutate(ISOweek = ISOweek(date)) | |
df %<>% group_by(ISOweek) %>% | |
mutate(difference_from_weekly_mean = deaths - mean(deaths), | |
pct_from_weekly_mean = (deaths - mean(deaths))/mean(deaths)) | |
# plot the difference in daily deaths from each week's average daily deaths | |
plt <- | |
make_plot(difference_from_weekly_mean, 'Daily Deaths Beyond the Weekly Average of Daily Deaths') + | |
scale_y_continuous(labels = scales::comma_format(), breaks = c(-2e3, -1e3, 0, 1e3, 2e3)) | |
ggsave(plot = plt, "weekday_effect_on_COVID_deaths_difference.png", width = 10, height = 8) | |
# plot the percent difference in daily deaths from each week's average daily deaths | |
plt <- | |
make_plot(pct_from_weekly_mean, 'Percent of Deaths Over the Weekly Average of Daily Deaths') + | |
scale_y_continuous(labels = scales::percent_format()) + | |
background_grid(major = 'y', minor = 'y') | |
ggsave(plot = plt, "weekday_effect_on_COVID_deaths_percent.png", width = 10, height = 8) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The effect the day-of-the-week or the weekend have on reported COVID-19 statistics has been discussed before in places such as 1234.
↑ This is originally from 1, also reported on in 23, showing roughly March - November 2020. What I like about this is that by separating the weekday from weekend in this time-series style, we can see the weekend effect relative to that week's death data.
↑ These are from 4, and while I think they're definitely helpful visualizations, I enjoy that my visualizations also depict the variation within the data.
Footnotes
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7990432/ (Accessed February 14 2022) ↩ ↩2
https://consultqd.clevelandclinic.org/the-weekend-effect-and-covid-19-mortality/ (Accessed February 12 2022) ↩ ↩2
https://covidtracking.com/analysis-updates/how-day-of-week-effects-impact-covid-19-data (Accessed February 12 2022) ↩ ↩2
https://dearpandemic.org/covid-19-numbers-lower-on-the-weekends/ (Accessed February 14 2022) ↩ ↩2