Skip to content

Instantly share code, notes, and snippets.

@retrography
Last active March 12, 2020 14:17
Show Gist options
  • Save retrography/4d2ba50ef39cecc6d833e899d7ed2efa to your computer and use it in GitHub Desktop.
Save retrography/4d2ba50ef39cecc6d833e899d7ed2efa to your computer and use it in GitHub Desktop.
Plotting COVID-19 spread in The Netherlands
require(tidyverse)
# Set timezone to Central European Time
Sys.setenv(TZ='CET')
# The prefix to load the data from the web
stub <- "https://www.volksgezondheidenzorg.info/sites/default/files/map/detail_data/klik_corona"
# Day statistics are normally published before 4:30
upTime <-
make_datetime(
year = year(now()),
month = month(now()),
day = day(now()),
hour = 16,
min = 30,
tz = "CET"
)
lastPub <- date(if (now() >= upTime) now() else now() - days(1))
# First publication by the Dutch authorities (RIVM)
firstPub <- ymd("2020-03-03")
# All dates to be downloaded
allDates <- as.Date((firstPub:lastPub), origin = "1970-01-01")
# Fetch the data online
df <-
lapply(allDates,
function(d) {
url <- paste0(stub, format(d, "%d%m%Y"), ".csv")
read_delim(
url,
delim = ";",
locale = locale(decimal_mark = ","),
col_types = cols(
id = "d",
Gemeente = "c",
Indicator = "_",
Aantal = "d"
)
) %>%
transmute(
date = d,
id = as.integer(id),
town = Gemeente,
count = as.integer(replace_na(Aantal, 0))
)
}) %>%
bind_rows
# Save the data locally
write_rds(df, "corona.RDS")
# Calculate overall Netherlands cases
dfa <-
df %>%
group_by(date) %>%
summarise(cases = sum(count))
# RIVM started pubishing the data on March 3rd, on the 6th day of the outbreak
# I fetched the data for the five first dats from ECDC (https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
dfa <- bind_rows(
tibble(
date = c(ymd("2020-02-27"), ymd("2020-02-28"), ymd("2020-02-29"), ymd("2020-03-01"), ymd("2020-03-02")),
cases = c(1, 2, 7, 13, 18)
),
dfa
)
# Calculate the closest contagion multiplier
multiplier <-
round((dfa[[nrow(dfa), 2]] / dfa[[1, 2]]) ^ (1 / (nrow(dfa) - 1)), 2)
dfa$mult <-
dfa[[1, 2]] * (multiplier ^ (0:(nrow(dfa) - 1)))
dfa$int <-
dfa[[1, 2]] * (1.33 ^ (0:(nrow(dfa) - 1)))
# Format the data for ggplot
dfplot <- dfa
colnames(dfplot) <-
c("Date", "Netherlands", paste0((multiplier - 1) * 100, "% Day-on-day\nIncrease"), "33% Day-on-day\nIncrease")
dfplot <-
pivot_longer(dfplot,-Date, names_to = "Legend", values_to = "Count")
# Some extrapolation
dfplot <- bind_rows(
dfplot,
tibble(
Date = allDates[[length(allDates)]] + (1:10),
Legend = paste0((multiplier - 1) * 100, "% Day-on-day\nIncrease"),
Count = dfa[[1, 2]] * (multiplier ^ ((nrow(dfa):(nrow(dfa) + 9))))
),
tibble(
Date = allDates[[length(allDates)]] + (1:10),
Legend = "33% Day-on-day\nIncrease",
Count = dfa[[1, 2]] * (1.33 ^ ((nrow(dfa):(nrow(dfa) + 9))))
)
)
dfplot$DateStr <- format(dfplot$Date, "%b %d")
# Plot
ggplot(dfplot,
aes(
x = DateStr,
y = Count,
group = Legend,
label = round(Count)
)
) +
geom_line(aes(color = Legend)) +
geom_point(aes(color = Legend)) +
geom_label(data = subset(
dfplot,
(
Date == allDates[[length(allDates)]] | Date == allDates[[length(allDates)]] + 10
) &
Legend != "Netherlands"
),
nudge_y = 0.25,
nudge_x = -0.3,
size = 2
) +
labs(title="COVID-19 Positively Tested Cases in the Netherlands") +
scale_y_log10(labels = function(x) format(x, scientific = FALSE)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Save the plot
ggsave(
filename = paste0("Corona_", lastPub, ".png"),
device = "png",
width = 6.4,
height = 3.6,
dpi = 300,
units = "in"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment