Skip to content

Instantly share code, notes, and snippets.

@arnavs
Last active March 31, 2020 17:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arnavs/b43ddc4e6e4228a9336acf1e4ebff13e to your computer and use it in GitHub Desktop.
Save arnavs/b43ddc4e6e4228a9336acf1e4ebff13e to your computer and use it in GitHub Desktop.
COVID Analysis
# data source is: https://pomber.github.io/covid19/timeseries.json
# data is updated thrice daily from Johns Hopkins numbers
using Pkg
pkg"activate .."
using JSON, Statistics, Plots, Dates, LsqFit, TimeSeries
data = JSON.parsefile("../data/timeseries_03_31_20.json");
# Aggregate statistics
function unpack_country(country)
country_data = data[country]
confirmed = [datum["confirmed"] for datum in country_data]
dates = Date.([datum["date"] for datum in country_data])
diffs = diff(confirmed)
return country_data, confirmed, dates, diffs
end
function plot_country_aggregates(country)
country_data, confirmed, dates, diffs = unpack_country(country)
p = plot(dates[2:end], confirmed[2:end], lw = 3, legend = :topleft, title = "$country Aggregate", label = "Confirmed Cases")
plot!(p, dates[2:end], diffs, lw = 3, label = "Day-to-Day Difference", xticks = dates[2:10:end])
savefig(p, "~/Desktop/$(country)_aggregate.jpeg")
end
plot_country_aggregates("Belgium")
# plot_country_aggregates("Canada")
# plot_country_aggregates("France") # because this gives us a longer view
# Regression modeling
function fit_country(country, model, p_guess)
country_data, confirmed, dates, diffs = unpack_country(country)
t = 1:length(dates)
fit = curve_fit(model, t, confirmed, p_guess) # return this
end
# Exponential growth (y(t) = Ae^(kt))
@. model(t, p) = p[1]*exp(p[2]*t)
fr_fit = fit_country("France", model, [0.5, 0.5])
@show coef(fr_fit) # ends up being y(t) = 3.64e^(0.13t)
@show stderror(fr_fit) # stderror(fr_fit) = [0.5807248710582464, 0.002419476241731794]
be_fit = fit_country("Belgium", model, [0.5, 0.5])
@show coef(be_fit) # ends up being y(t) = 0.07*e^(0.17t)
@show stderror(be_fit) # stderror(be_fit) = [0.009881708341035871, 0.0018586282749802605]
prc_fit = fit_country("China", model, [0.5, 0.5])
@show coef(prc_fit) # ends up being y(t) = 30700e^0.017t... so, apparently, an order of magnitude better than Belgium and France? seems dubious
@show stderror(prc_fit) # stderror(prc_fit) = [3123.3555378369188, 0.0020020903719526564]
# Logistic modeling
# The logistic growth equation is (Carrying Capacity)/(1 + e^(=growth rate*(t - t_peak)))
# Let's try filling in the gaps with stuff we know
L = 0.7*11.4*1e6 # millions
be_rate = 0.17 # from above
@. model(t, p) = L/(1 + exp(-be_rate*(t - p[1])))
be_data, be_conf, be_dates, diffs = unpack_country("Belgium")
t = 1:length(be_dates)
be_logistic_fit = curve_fit(model, t, be_conf, [80.])
@show coef(be_logistic_fit) # 107.08398211493504
@show stderror(be_logistic_fit) # stderror(be_logistic_fit) = [0.03196080987817913]
# Plot regression
p = coef(be_logistic_fit)[1]
restricted_data = model.(1:length(be_dates), Ref(p))
pl = plot(be_dates, restricted_data, lw = 3, label = "Model", title = "Belgium So Far", legend = :topleft)
plot!(be_dates, be_conf, lw = 3, label = "Data", xticks = be_dates[1:10:end])
savefig(pl, "~/Desktop/Belgium_restricted.jpeg")
# Belgium Overall
full_data = model.(1:200, Ref(p))
dates = collect(range(be_dates[1], step = Day(1), length = 200))
pl = plot(dates, full_data, lw = 3, label = "Model", legend = :topleft, title = "Belgium Model Full Timescale", xticks = dates[1:50:end])
peak = dates[1] + Day(floor(p))
vline!(pl, [peak], label = "$peak", lw = 3)
savefig(pl, "~/Desktop/Belgium_model.jpeg")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment