Skip to content

Instantly share code, notes, and snippets.

@jstults
Last active December 2, 2020 10:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jstults/ca3dd6160aa443188f7b1d3c750c303a to your computer and use it in GitHub Desktop.
Save jstults/ca3dd6160aa443188f7b1d3c750c303a to your computer and use it in GitHub Desktop.
Fit a vector autoregression moving average model to COVID-19 data
#
# filename: varcovid.py
# author: josh stults
# date: 30 Nov 2020
#
# fit some vector auto regression models to deaths and cases
#
# data from:
# https://covid.ourworldindata.org/data/owid-covid-data.csv
#
# inspiration from:
# https://www.theatlantic.com/science/archive/2020/11/coronavirus-death-rate-third-surge/617150/
# one of the researchers quoted in that article found a lag between
# deaths and cases that maximized linear correlation between the time
# series; pretty crude (poor practice to fit to averaged/smoothed
# data), but interesting
#
# documentation of the vector auto regression model:
# https://www.statsmodels.org/dev/generated/statsmodels.tsa.vector_ar.var_model.VAR.html#statsmodels.tsa.vector_ar.var_model.VAR
#
# numerical tools
import numpy as np
# data management tools
import pandas as pd
# statistical models
import statsmodels.api as sm
from statsmodels.tsa.api import VAR
from statsmodels.tsa.statespace.varmax import VARMAX
# visualization tools
from matplotlib import pyplot as plt
import seaborn as sns
# XXX codes = pd.read_csv("owid/owid-covid-codebook.csv") XXX #
data = pd.read_csv("owid/owid-covid-data.csv")
usdata = data.loc[data["location"]=="United States"][["new_tests","new_cases","new_deaths"]]
usdata.index = pd.DatetimeIndex(data.loc[data["location"]=="United States"]["date"])
recent = usdata['20200630':'20201124'] # limit model to more recent data
drecent = recent.diff().dropna() # make the time series more stationary
p = 14
q = 3
model = VARMAX(drecent, order=(p,q))
results = model.fit(maxiter=1000, maxfun=10000000)
pdays = 30 # predict 30 days ahead
predict = results.forecast(steps=pdays)
# undifference
predict = recent[-1:].values + predict.cumsum(axis=0)
predict = pd.DataFrame(predict, columns=["new_tests","new_cases","new_deaths"], index=pd.date_range('2020-11-25', periods=pdays, freq='D'))
# visualizations
plt.figure()
plt.plot(usdata.index.to_pydatetime(),
usdata["new_tests"], 'o', label="New Tests")
plt.plot(predict.index.to_pydatetime(), predict["new_tests"], '-', label="VARMA(%d,%d) Forecast, fit 07-01 to 11-24" % (p,q))
plt.legend(loc=0)
plt.title("Daily New U.S. Covid-19 Tests")
plt.text('20200301',0,"https://covid.ourworldindata.org/data/owid-covid-data.csv")
plt.savefig("us-new-tests.png", bbox_inches="tight")
plt.figure()
plt.plot(usdata.index.to_pydatetime(),
usdata["new_cases"], 'o', label="New Cases")
plt.plot(predict.index.to_pydatetime(), predict["new_cases"], '-', label="VARMA(%d,%d) Forecast, fit 07-01 to 11-24" % (p,q))
plt.legend(loc=0)
plt.title("Daily New U.S. Covid-19 Cases")
plt.text('20200101',0,"https://covid.ourworldindata.org/data/owid-covid-data.csv")
plt.savefig("us-new-cases.png", bbox_inches="tight")
plt.figure()
plt.plot(usdata.index.to_pydatetime(),
usdata["new_deaths"], 'o', label="New Deaths")
plt.plot(predict.index.to_pydatetime(), predict["new_deaths"], '-', label="VARMA(%d,%d) Forecast, fit 07-01 to 11-24" % (p,q))
plt.legend(loc=0)
plt.title("Daily New U.S. Covid-19 Deaths")
plt.text('20200101',0,"https://covid.ourworldindata.org/data/owid-covid-data.csv")
plt.savefig("us-new-deaths.png", bbox_inches="tight")
plt.figure()
plt.plot(drecent.index.to_pydatetime(), drecent["new_tests"], 'o')
plt.savefig("differenced-tests.png", bbox_inches="tight")
plt.figure()
plt.plot(drecent.index.to_pydatetime(), drecent["new_cases"], 'o')
plt.savefig("differenced-cases.png", bbox_inches="tight")
plt.figure()
plt.plot(drecent.index.to_pydatetime(), drecent["new_deaths"], 'o')
plt.savefig("differenced-deaths.png", bbox_inches="tight")
@jstults
Copy link
Author

jstults commented Nov 21, 2020

us-new-tests

@jstults
Copy link
Author

jstults commented Nov 21, 2020

us-new-cases

@jstults
Copy link
Author

jstults commented Nov 21, 2020

us-new-deaths

@jstults
Copy link
Author

jstults commented Dec 2, 2020

us-new-tests

@jstults
Copy link
Author

jstults commented Dec 2, 2020

us-new-cases

@jstults
Copy link
Author

jstults commented Dec 2, 2020

us-new-deaths

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment