Skip to content

Instantly share code, notes, and snippets.

@lashlee
Created May 19, 2020 05:58
Show Gist options
  • Save lashlee/cd432838604fdaaff35319e8154d28f4 to your computer and use it in GitHub Desktop.
Save lashlee/cd432838604fdaaff35319e8154d28f4 to your computer and use it in GitHub Desktop.
Use the NYT's data to plot Santa Clara County's COVID-19 cases.
library(ggplot2)
url <- 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
dat <- read.csv(url, header = TRUE, stringsAsFactors = FALSE)
scc <- dat[dat$county == 'Santa Clara' & dat$state == 'California', ]
scc$date <- as.Date(scc$date)
scc <- scc[order(scc$date), ]
# County advises that the five most recent days may change.
# So remove those days from analysis.
scc <- scc[!(scc$date %in% seq.Date(from = Sys.Date() - 4, to = Sys.Date(), by = 1)), ]
scc$incremental_cases <- scc$cases - c(NA_integer_, scc$cases[-nrow(scc)])
scc$incremental_deaths <- scc$deaths - c(NA_integer_, scc$deaths[-nrow(scc)])
scc$incremental_cases[1] <- 0
scc$incremental_deaths[1] <- 0
scc$sdma_incremental_cases <- 0
scc$sdma_incremental_deaths <- 0
if (nrow(scc) > 6) {
for (row in 7:nrow(scc)) {
scc$sdma_incremental_cases[row] <- mean(scc$incremental_cases[(row-6):row])
scc$sdma_incremental_deaths[row] <- mean(scc$incremental_deaths[(row-6):row])
}
}
# Base plots
rm(row)
png("scc-sars-cov-2-cases.png", width = 6, height = 4.5, units = 'in', res = 400)
par(mar = c(2.5,2.5,2,1))
plot(scc$date, scc$sdma_incremental_cases, type = 'l', xlab = '', ylab = '', main = 'Santa Clara County, 7 Day Moving Average of New Cases')
barplot(scc$incremental_cases)
dev.off()
png("scc-sars-cov-2-deaths.png", width = 6, height = 4.5, units = 'in', res = 400)
par(mar = c(2.5,2.5,2,1))
plot(scc$date, scc$sdma_incremental_deaths, type = 'l', xlab = '', ylab = '', main = 'Santa Clara County, 7 Day Moving Average of New Deaths')
dev.off()
# ggplots
ggplot(scc[scc$date >= '2020-03-01',]) +
geom_col(aes(date, incremental_cases), color = NA, fill = 'red', alpha = .35) +
geom_line(aes(date, sdma_incremental_cases), size = 1.1, color = 'red2') +
scale_y_continuous(name = '', breaks = seq(0, 200, 40)) +
scale_x_date(name = '') +
scale_fill_manual(name = "", values = c("bar_label" = "red")) +
scale_color_manual(name = "", values = c("line_label" = "red")) +
ggtitle('New reported COVID-19 cases by day in Santa Clara County') +
theme(
text = element_text(family = "Helvetica"),
plot.title.position = "plot",
plot.title = element_text(face = 'bold', size = 22, color = 'gray10', margin = margin(b = 0)),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(color = "gray70", linetype = "dashed"),
panel.grid.minor.y = element_line(color = "gray70", linetype = "dashed"),
panel.background = element_blank(),
axis.text.y = element_text(size = 18, colour = 'grey30', margin = margin(l = -14)),
axis.text.x = element_text(size = 18, margin = margin(t = -10)),
axis.ticks = element_blank(),
plot.margin = margin(t = 10, r = 14, b = -4, l = 10)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment