Skip to content

Instantly share code, notes, and snippets.

@stephenc
Created August 15, 2020 22:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stephenc/1300c2f0ebd4f21a803de12a2e338948 to your computer and use it in GitHub Desktop.
Save stephenc/1300c2f0ebd4f21a803de12a2e338948 to your computer and use it in GitHub Desktop.
Correllation vs Causation
library(tidyverse)
library(grid)
library(zoo)
all = read.csv("owid-covid-data.csv", header = TRUE)
irl <- all %>%
filter(iso_code == "IRL") %>%
mutate(date=as.Date(date, format="%Y-%m-%d")) %>%
mutate(cma_cases = rollmean(new_cases, k=5, fill = NA)) %>%
mutate(cma_deaths = rollmean(new_deaths, k=5, fill = NA))
png("death_vs_cases.png",width=1200,height=628)
p1 <- ggplot(irl, aes(x=date,y=new_cases)) +
scale_x_date() +
ylab("New Cases") +
scale_y_continuous(expand = c(0, 0)) +
geom_col(colour=rgb(.8,.8,0),fill=rgb(.7,.7,0)) + theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
p2 <- ggplot(irl, aes(x=date,y=new_deaths)) +
xlab("Date") +
scale_x_date() +
ylab("New Deaths") +
scale_y_continuous(trans = "reverse", expand = c(0, 0)) +
geom_col(colour=rgb(.0,.8,.8),fill=rgb(0,.7,.7))
g1 <- ggplotGrob(p1)
g2 <- ggplotGrob(p2)
g <- rbind(g1, g2, size="first")
grid.newpage()
grid.draw(g)
dev.off()
irl_cor <- c()
for (off in 0:40) {
irl_cor <- rbind(irl_cor,c(off,cor(x=irl$cma_cases[60:180],y=irl$cma_deaths[(60+off):(180+off)])))
}
irl_cor <- data.frame(offset=irl_cor[,1],correlation=irl_cor[,2])
png("death_vs_cases_correlation.png",width=1200,height=628)
ggplot(irl_cor,aes(x=offset,y=correlation)) +
geom_line() +
ggtitle("Correllation of new deaths after new cases (up to 1st July 2020)") +
xlab("Offset (days)") +
ylab("Correlation coefficient")
dev.off()
png("deaths_vs_cases_correlation.png",width=1200,height=628)
hist(irl$cma_deaths[73:190]/irl$cma_cases[63:180], xlab="New deaths / New cases", main="New deaths compared to new cases 10 days prior")
dev.off()
mean(irl$cma_deaths[73:190]/irl$cma_cases[63:180])
irl_pred <- data.frame(date=irl$date[73:226],new_cases=irl$new_cases[73:226],pred_deaths=irl$cma_cases[63:216]*0.08263967,new_deaths=irl$new_deaths[73:226])
png("pred_deaths.png",width=1200,height=628)
ggplot(irl_pred, aes(x=date,y=new_deaths)) +
ggtitle("Actual deaths vs 8.2% of 5 day moving average of new cases offset by -10 days") +
xlab("Date") +
scale_x_date() +
ylab("New Deaths") +
coord_cartesian(ylim=c(0,75)) +
geom_col(colour=rgb(.0,.8,.8),fill=rgb(0,.7,.7)) +
geom_line(y=irl_pred$pred_deaths)
dev.off()
png("pred_deaths_zoom.png",width=1200,height=628)
ggplot(irl_pred, aes(x=date,y=new_deaths)) +
ggtitle("Actual deaths vs 8.2% of 5 day moving average of new cases offset by -10 days") +
xlab("Date") +
scale_x_date() +
ylab("New Deaths") +
coord_cartesian(ylim=c(0,10), xlim=c(as.Date("2020-07-01",format="%Y-%m-%d"),as.Date("2020-08-15",format="%Y-%m-%d"))) +
geom_col(colour=rgb(.0,.8,.8),fill=rgb(0,.7,.7)) +
geom_line(y=irl_pred$pred_deaths)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment