Last active
March 6, 2020 21:27
-
-
Save simoncozens/0bd19949634b9e3cf942dc1998c6f949 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(tidyr) | |
logTicks <- function(n = 5, base = 10){ | |
# Divisors of the logarithm base. E.g. for base 10: 1, 2, 5, 10. | |
divisors <- which((base / seq_len(base)) %% 1 == 0) | |
mkTcks <- function(min, max, base, divisor){ | |
f <- seq(divisor, base, by = divisor) | |
return(unique(c(base^min, as.vector(outer(f, base^(min:max), `*`))))) | |
} | |
function(x) { | |
rng <- range(x, na.rm = TRUE) | |
lrng <- log(rng, base = base) | |
min <- floor(lrng[1]) | |
max <- ceiling(lrng[2]) | |
tck <- function(divisor){ | |
t <- mkTcks(min, max, base, divisor) | |
t[t >= rng[1] & t <= rng[2]] | |
} | |
# For all possible divisors, produce a set of ticks and count how many ticks | |
# result | |
tcks <- lapply(divisors, function(d) tck(d)) | |
l <- vapply(tcks, length, numeric(1)) | |
# Take the set of ticks which is nearest to the desired number of ticks | |
i <- which.min(abs(n - l)) | |
if(l[i] < 2){ | |
# The data range is too small to show more than 1 logarithm tick, fall | |
# back to linear interpolation | |
ticks <- pretty(x, n = n, min.n = 2) | |
}else{ | |
ticks <- tcks[[i]] | |
} | |
return(ticks) | |
} | |
} | |
covid <- read_csv(url("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")) %>% gather(ends_with("/20"), key="Date", value="Cases") %>% filter(!(`Country/Region` == 'Mainland China')) %>% group_by(Date) %>% summarise(Cases=sum(Cases)) | |
covid$Date <- parse_date(covid$Date,"%m/%d/%y") | |
model <- lm(log(Cases) ~ Date,data=covid,na.action=na.exclude) | |
newdata <- data.frame(Date=seq(as.Date("2020/02/15"),by="day",length.out=40)) | |
newdata$Cases <- exp(predict(model,data=tail(covid,25), newdata=newdata)) | |
ggplot(covid,aes(x=Date,y=Cases)) + theme_grey(base_size=20) +geom_point() + geom_line(data=newdata,color='red') + scale_x_date(date_breaks="7 days") + theme(axis.text.x = element_text(angle = 70,vjust=0.5)) + scale_y_log10(breaks = logTicks(n = 10), minor_breaks = logTicks(n = 40),labels=scales::comma) + theme(plot.margin=unit(c(10,30,10,10),"pt"),axis.title.x=element_blank()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment