Last active
March 12, 2020 14:17
-
-
Save retrography/4d2ba50ef39cecc6d833e899d7ed2efa to your computer and use it in GitHub Desktop.
Plotting COVID-19 spread in The Netherlands
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(tidyverse) | |
# Set timezone to Central European Time | |
Sys.setenv(TZ='CET') | |
# The prefix to load the data from the web | |
stub <- "https://www.volksgezondheidenzorg.info/sites/default/files/map/detail_data/klik_corona" | |
# Day statistics are normally published before 4:30 | |
upTime <- | |
make_datetime( | |
year = year(now()), | |
month = month(now()), | |
day = day(now()), | |
hour = 16, | |
min = 30, | |
tz = "CET" | |
) | |
lastPub <- date(if (now() >= upTime) now() else now() - days(1)) | |
# First publication by the Dutch authorities (RIVM) | |
firstPub <- ymd("2020-03-03") | |
# All dates to be downloaded | |
allDates <- as.Date((firstPub:lastPub), origin = "1970-01-01") | |
# Fetch the data online | |
df <- | |
lapply(allDates, | |
function(d) { | |
url <- paste0(stub, format(d, "%d%m%Y"), ".csv") | |
read_delim( | |
url, | |
delim = ";", | |
locale = locale(decimal_mark = ","), | |
col_types = cols( | |
id = "d", | |
Gemeente = "c", | |
Indicator = "_", | |
Aantal = "d" | |
) | |
) %>% | |
transmute( | |
date = d, | |
id = as.integer(id), | |
town = Gemeente, | |
count = as.integer(replace_na(Aantal, 0)) | |
) | |
}) %>% | |
bind_rows | |
# Save the data locally | |
write_rds(df, "corona.RDS") | |
# Calculate overall Netherlands cases | |
dfa <- | |
df %>% | |
group_by(date) %>% | |
summarise(cases = sum(count)) | |
# RIVM started pubishing the data on March 3rd, on the 6th day of the outbreak | |
# I fetched the data for the five first dats from ECDC (https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide) | |
dfa <- bind_rows( | |
tibble( | |
date = c(ymd("2020-02-27"), ymd("2020-02-28"), ymd("2020-02-29"), ymd("2020-03-01"), ymd("2020-03-02")), | |
cases = c(1, 2, 7, 13, 18) | |
), | |
dfa | |
) | |
# Calculate the closest contagion multiplier | |
multiplier <- | |
round((dfa[[nrow(dfa), 2]] / dfa[[1, 2]]) ^ (1 / (nrow(dfa) - 1)), 2) | |
dfa$mult <- | |
dfa[[1, 2]] * (multiplier ^ (0:(nrow(dfa) - 1))) | |
dfa$int <- | |
dfa[[1, 2]] * (1.33 ^ (0:(nrow(dfa) - 1))) | |
# Format the data for ggplot | |
dfplot <- dfa | |
colnames(dfplot) <- | |
c("Date", "Netherlands", paste0((multiplier - 1) * 100, "% Day-on-day\nIncrease"), "33% Day-on-day\nIncrease") | |
dfplot <- | |
pivot_longer(dfplot,-Date, names_to = "Legend", values_to = "Count") | |
# Some extrapolation | |
dfplot <- bind_rows( | |
dfplot, | |
tibble( | |
Date = allDates[[length(allDates)]] + (1:10), | |
Legend = paste0((multiplier - 1) * 100, "% Day-on-day\nIncrease"), | |
Count = dfa[[1, 2]] * (multiplier ^ ((nrow(dfa):(nrow(dfa) + 9)))) | |
), | |
tibble( | |
Date = allDates[[length(allDates)]] + (1:10), | |
Legend = "33% Day-on-day\nIncrease", | |
Count = dfa[[1, 2]] * (1.33 ^ ((nrow(dfa):(nrow(dfa) + 9)))) | |
) | |
) | |
dfplot$DateStr <- format(dfplot$Date, "%b %d") | |
# Plot | |
ggplot(dfplot, | |
aes( | |
x = DateStr, | |
y = Count, | |
group = Legend, | |
label = round(Count) | |
) | |
) + | |
geom_line(aes(color = Legend)) + | |
geom_point(aes(color = Legend)) + | |
geom_label(data = subset( | |
dfplot, | |
( | |
Date == allDates[[length(allDates)]] | Date == allDates[[length(allDates)]] + 10 | |
) & | |
Legend != "Netherlands" | |
), | |
nudge_y = 0.25, | |
nudge_x = -0.3, | |
size = 2 | |
) + | |
labs(title="COVID-19 Positively Tested Cases in the Netherlands") + | |
scale_y_log10(labels = function(x) format(x, scientific = FALSE)) + | |
theme(axis.text.x = element_text(angle = 45, hjust = 1)) | |
# Save the plot | |
ggsave( | |
filename = paste0("Corona_", lastPub, ".png"), | |
device = "png", | |
width = 6.4, | |
height = 3.6, | |
dpi = 300, | |
units = "in" | |
) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment