-
-
Save ctesta01/0b4952ebf456e777eee47e8fb1a00138 to your computer and use it in GitHub Desktop.
library(readr) | |
library(ggdist) | |
library(tidyverse) | |
library(magrittr) | |
library(cowplot) | |
library(ISOweek) | |
df <- readr::read_csv("https://github.com/nytimes/covid-19-data/raw/master/rolling-averages/us.csv") | |
df %<>% mutate(wday = lubridate::wday(lubridate::ymd(date))) | |
weekdays <- c('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday') | |
# write a function to help us render similar plot layouts for different variables | |
make_plot <- function(y_variable, y_title) { | |
df %>% | |
filter(date >= lubridate::ymd('2020-03-01')) %>% | |
ggplot(aes(x = factor(wday, labels = weekdays), y = {{ y_variable }}, fill = factor(wday))) + | |
stat_dots(side = 'right', color = NA) + | |
stat_halfeye( | |
aes( | |
fill = factor(wday), | |
fill_ramp = stat(cut_cdf_qi( | |
cdf, | |
.width = c(.5, .8, .95), | |
labels = scales::percent_format() | |
)) | |
), | |
alpha = 0.6, | |
interval_color = NA, | |
position = "dodgejust", | |
side = 'left' | |
)+ | |
scale_fill_viridis_d(end = 0.8) + | |
scale_fill_ramp_discrete(range = c(1, 0.2), na.translate = FALSE) + | |
labs( | |
x = "Day of the Week", | |
y = y_title | |
) + | |
guides(fill = 'none', fill_ramp = 'none') + | |
labs( | |
title = "Day of Week Effect on Daily COVID-19 Deaths in the United States", | |
subtitle = "Colored dots show the distribution, filled areas show 50%, 80%, and 95% intervals, black dots show weekday averages", | |
caption = stringr::str_c( | |
"@_christiantesta\n", | |
"data period: 2020-03-01 through ", max(df$date), "\n", | |
"https://github.com/nytimes/covid-19-data/blob/master/rolling-averages/us.csv" | |
) | |
) + | |
cowplot::theme_cowplot() + | |
scale_y_continuous(labels = scales::comma_format(), breaks = c(0, 1e3, 2e3, 3e3, 4e3, 5e3)) + | |
theme(axis.text.x = element_text(angle = 75, hjust = 1)) | |
} | |
# plot daily number of deaths by weekday | |
plt <- make_plot(deaths, "Daily Number of Reported Deaths") | |
ggsave(plot = plt, "weekday_effect_on_COVID_deaths.png", width = 10, height = 8) | |
# plot daily number of deaths by weekday on log scale | |
plt <- plt + | |
scale_y_continuous(trans = 'log', labels = scales::comma_format(), breaks = c(1, 10, 1e2, 1e3, 2e3, 3e3, 4e3, 5e3)) + | |
labs(y = "Daily Number of Reported Deaths, Log Scale") | |
ggsave(plot = plt, "weekday_effect_on_COVID_deaths_logscale.png", width = 10, height = 8) | |
# calculate comparisons of daily death counts to each weekly average – | |
# note that this is a weekly average for each calendar week, not a 7-day moving average | |
df %<>% mutate(ISOweek = ISOweek(date)) | |
df %<>% group_by(ISOweek) %>% | |
mutate(difference_from_weekly_mean = deaths - mean(deaths), | |
pct_from_weekly_mean = (deaths - mean(deaths))/mean(deaths)) | |
# plot the difference in daily deaths from each week's average daily deaths | |
plt <- | |
make_plot(difference_from_weekly_mean, 'Daily Deaths Beyond the Weekly Average of Daily Deaths') + | |
scale_y_continuous(labels = scales::comma_format(), breaks = c(-2e3, -1e3, 0, 1e3, 2e3)) | |
ggsave(plot = plt, "weekday_effect_on_COVID_deaths_difference.png", width = 10, height = 8) | |
# plot the percent difference in daily deaths from each week's average daily deaths | |
plt <- | |
make_plot(pct_from_weekly_mean, 'Percent of Deaths Over the Weekly Average of Daily Deaths') + | |
scale_y_continuous(labels = scales::percent_format()) + | |
background_grid(major = 'y', minor = 'y') | |
ggsave(plot = plt, "weekday_effect_on_COVID_deaths_percent.png", width = 10, height = 8) |
The effect the day-of-the-week or the weekend have on reported COVID-19 statistics has been discussed before in places such as 1234.
↑ This is originally from 1, also reported on in 23, showing roughly March - November 2020. What I like about this is that by separating the weekday from weekend in this time-series style, we can see the weekend effect relative to that week's death data.
↑ These are from 4, and while I think they're definitely helpful visualizations, I enjoy that my visualizations also depict the variation within the data.
Footnotes
-
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7990432/ (Accessed February 14 2022) ↩ ↩2
-
https://consultqd.clevelandclinic.org/the-weekend-effect-and-covid-19-mortality/ (Accessed February 12 2022) ↩ ↩2
-
https://covidtracking.com/analysis-updates/how-day-of-week-effects-impact-covid-19-data (Accessed February 12 2022) ↩ ↩2
-
https://dearpandemic.org/covid-19-numbers-lower-on-the-weekends/ (Accessed February 14 2022) ↩ ↩2
These figures show the tendency for reported COVID-19 deaths to be lower on the weekend, and within the weekend, especially on Sunday. Of each weekday, Wednesday appears to have the highest average number of daily deaths both overall and after adjusting for each calendar week's (e.g. each Sunday to Saturday period) average of daily deaths.