Skip to content

Instantly share code, notes, and snippets.

@campagnola
Created April 24, 2020 19:28
Show Gist options
  • Save campagnola/8aa2f30dd752bf44b6c8fa2257f42978 to your computer and use it in GitHub Desktop.
Save campagnola/8aa2f30dd752bf44b6c8fa2257f42978 to your computer and use it in GitHub Desktop.
A week-by-week comparison of deaths caused by influenza and COVID-19 un the United States.
"""
A week-by-week comparison of deaths caused by influenza and COVID-19 un the United States.
CDC influenza / pneumonia data:
https://www.cdc.gov/flu/weekly/#S2
COVID19 data:
http://www.healthdata.org/covid/data-downloads
"""
import io
import zipfile
import urllib.request
from datetime import datetime
import numpy as np
import pyqtgraph as pg
import pandas
# Download flu data from CDC and parse csv file
flu_url = 'https://www.cdc.gov/flu/weekly/weeklyarchives2019-2020/data/NCHSData15.csv'
flu_fh = urllib.request.urlopen(flu_url)
flu = pandas.read_csv(flu_fh)
# Download COVID-19 data from IHME, extract csv from zip file, and parse
cov_url = 'https://ihmecovid19storage.blob.core.windows.net/latest/ihme-covid19.zip'
cov_fh = urllib.request.urlopen(cov_url)
cov_zf = zipfile.ZipFile(io.BytesIO(cov_fh.read()))
csv_fn = [f for f in cov_zf.namelist() if f.endswith('.csv')][0]
cov_fh = cov_zf.open(csv_fn)
cov = pandas.read_csv(cov_fh)
cov['date'] = pandas.to_datetime(cov['date']) # convert date string to timestamp
cov['week'] = cov['date'].dt.week # add a column with the week number
# Shift by 25 weeks so the peak of flu season is in the center of the plot
# (otherwise it's split at opposite ends of the plot)
week_offset = 25
flu['Week'] = ((flu['Week'] + week_offset) % 52) - week_offset
cov['week'] = ((cov['week'] + week_offset) % 52) - week_offset
# Flu data goes back to 2014, so for each week, find the best and worst cases
# across all years
min_flu_deaths = flu.groupby('Week').min()['Influenza Deaths'].iloc[:52]
max_flu_deaths = flu.groupby('Week').max()['Influenza Deaths'].iloc[:52]
# Select US data from covid19 dataset
cov_us = cov[cov['location_name']=='United States of America']
# Covid19 data is tracked per day, so add up death counts for each week
cov_us_deaths_upper = cov_us.groupby('week').sum()['deaths_upper']
cov_us_deaths_lower = cov_us.groupby('week').sum()['deaths_lower']
# Remove future predictions from the plot; only show historical data
cov_us_deaths = cov_us_deaths_upper[cov_us_deaths_upper==cov_us_deaths_lower]
# Initialize plot
pg.setConfigOption('background', 'w')
pg.setConfigOption('foreground', 'k')
plt = pg.plot(labels={'left': 'US Deaths Per Week'})
# Add month labels to bottom axis
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
month_vals = np.array([datetime(year=2020, month=m, day=1).timetuple().tm_yday / 7 for m in range(1, 13)])
month_vals = ((month_vals + week_offset) % 52) - week_offset
plt.getAxis('bottom').setTicks([list(zip(month_vals, month_names))])
# Plot worst-case flu deaths
c2 = plt.plot(max_flu_deaths.index.to_numpy(), max_flu_deaths.to_numpy(), antialias=True, fillLevel=0, fillBrush=(100, 100, 0, 200), pen='k')
# Plot covid-19 deaths
c1 = plt.plot(cov_us_deaths.index.to_numpy(), cov_us_deaths.to_numpy(), antialias=True, fillLevel=0, fillBrush=(200, 50, 0, 200), pen='k')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment