-
-
Save cddesja/5a03b4727be9131beb55621ff3ed4923 to your computer and use it in GitHub Desktop.
covid <- read.csv("https://data.cdc.gov/api/views/y5bj-9g5w/rows.csv?accessType=DOWNLOAD&bom=true&format=true%20target=") | |
names(covid)[1] <- "Jurisdiction" | |
library(dplyr) | |
library(ggplot2) | |
ct <- covid %>% | |
group_by(Week, Year) %>% | |
filter(Type == "Unweighted", Jurisdiction != "United States") %>% | |
summarize(totl = sum(Number.of.Deaths)) | |
labels <- data.frame(Week = c(rep(54, 5), 45) , totl = c(53023, 58328, 61719, 56648, 58994, 57728), lab = 2015:2020) | |
options(scipen = 999) | |
# ggplot's default | |
ct %>% | |
ggplot() + | |
geom_line(aes(x = Week, y = totl, col = as.factor(Year), group = as.factor(Year))) + | |
theme_bw() + | |
theme(legend.position = "none", | |
panel.grid.minor = element_blank()) + | |
scale_color_manual("", values = c(rep("gray", 5), "red")) + | |
geom_text(labels, mapping = aes(x = Week, y = totl, label = lab)) + | |
ylab("All weekly deaths in the U.S. any cause") + | |
xlab("Week of the Year") | |
# with 0 | |
ct %>% | |
ggplot() + | |
geom_line(aes(x = Week, y = totl, col = as.factor(Year), group = as.factor(Year))) + | |
coord_cartesian(ylim = c(0, 80000)) + | |
theme_bw() + | |
theme(legend.position = "none", | |
panel.grid.minor = element_blank()) + | |
geom_text(labels, mapping = aes(x = Week, y = totl, label = lab)) + | |
scale_color_manual("", values = c(rep("gray", 5), "red")) + | |
ylab("All weekly deaths in the U.S. any cause") + | |
xlab("Week of the Year") | |
# deviation from 2015 - 2019 weekly average | |
ct.year <- ct %>% | |
filter(Year != "2020") %>% | |
group_by(Week) %>% | |
summarize(M = mean(totl)) | |
ct.diff <- left_join(ct.year, ct) | |
ct.diff$diff <- ct.diff$totl - ct.diff$M | |
labels <- data.frame(Week = c(rep(54, 5), 45) , totl = c(-4719, 586, 3977, -1094, 1252, 4717), lab = 2015:2020) | |
ct.diff %>% | |
ggplot() + | |
geom_line(aes(x = Week, y = diff, col = as.factor(Year), group = as.factor(Year))) + | |
theme_bw() + | |
theme(legend.position = "none", | |
panel.grid.minor = element_blank()) + | |
geom_text(labels, mapping = aes(x = Week, y = totl, label = lab)) + | |
scale_color_manual("", values = c(rep("gray", 5), "red")) + | |
ylab("Deviations of death from the weekly average of deaths in 2015 - 2019") + | |
xlab("Week of the Year") |
Thanks for the comments @bklingen. I don't use readr::read_csv() because I have had bad experiences with tibbles breaking code and was only using dplyr here to quickly create a script for illustration. I would typically use by().
The NAs are for some weeks in 2020, so don't really affect the final figure. I appreciate your feedback and your points are certainly noted!
But when I run the code using read.csv, I get:
names(covid)
[1] "ï..Jurisdiction" "Week.Ending.Date" "State.Abbreviation" "Year" "Week"
[6] "Age.Group" "Number.of.Deaths" "Time.Period" "Type" "Suppress"
[11] "Note"
making the following code not reproducible because "Jurisdiction" is scrambled.
What's your OS? I get this
covid <- read.csv("https://data.cdc.gov/api/views/y5bj-9g5w/rows.csv?accessType=DOWNLOAD&bom=true&format=true%20target=")
names(covid)
> names(covid)
[1] "Jurisdiction" "Week.Ending.Date" "State.Abbreviation" "Year"
[5] "Week" "Age.Group" "Number.of.Deaths" "Time.Period"
[9] "Type" "Suppress" "Note"
Could also add
names(covid)[1] <- "Jurisdiction"
But I'd be curious why it's adding those characters for you.
Yeah, might be OS. I'm running Windows 10.
(And when I just download the file and open it in Excel, I do get "Jurisdiction".) Strange.
Thanks for letting me know! My fix should take care of it. Adding readr::read_csv()
certainly would work, too.
You might want to consider read_csv(). When I use your code, the Jurisdiction variable has some character in front.
Also, some deaths seem to be NA, so I used na.rm=FALSE in the sum.
Something along these lines for the first few lines:
library(tidyverse)
covid <- read_csv("https://data.cdc.gov/api/views/y5bj-9g5w/rows.csv?accessType=DOWNLOAD&bom=true&format=true%20target=")
ct <- covid %>%
group_by(Week, Year) %>%
filter(Type == "Unweighted", Jurisdiction != "United States")%>%
select(Deaths='Number of Deaths') %>%
summarize(totl = sum(Deaths, na.rm=TRUE))