Created
April 24, 2020 19:28
-
-
Save campagnola/8aa2f30dd752bf44b6c8fa2257f42978 to your computer and use it in GitHub Desktop.
A week-by-week comparison of deaths caused by influenza and COVID-19 un the United States.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A week-by-week comparison of deaths caused by influenza and COVID-19 un the United States. | |
CDC influenza / pneumonia data: | |
https://www.cdc.gov/flu/weekly/#S2 | |
COVID19 data: | |
http://www.healthdata.org/covid/data-downloads | |
""" | |
import io | |
import zipfile | |
import urllib.request | |
from datetime import datetime | |
import numpy as np | |
import pyqtgraph as pg | |
import pandas | |
# Download flu data from CDC and parse csv file | |
flu_url = 'https://www.cdc.gov/flu/weekly/weeklyarchives2019-2020/data/NCHSData15.csv' | |
flu_fh = urllib.request.urlopen(flu_url) | |
flu = pandas.read_csv(flu_fh) | |
# Download COVID-19 data from IHME, extract csv from zip file, and parse | |
cov_url = 'https://ihmecovid19storage.blob.core.windows.net/latest/ihme-covid19.zip' | |
cov_fh = urllib.request.urlopen(cov_url) | |
cov_zf = zipfile.ZipFile(io.BytesIO(cov_fh.read())) | |
csv_fn = [f for f in cov_zf.namelist() if f.endswith('.csv')][0] | |
cov_fh = cov_zf.open(csv_fn) | |
cov = pandas.read_csv(cov_fh) | |
cov['date'] = pandas.to_datetime(cov['date']) # convert date string to timestamp | |
cov['week'] = cov['date'].dt.week # add a column with the week number | |
# Shift by 25 weeks so the peak of flu season is in the center of the plot | |
# (otherwise it's split at opposite ends of the plot) | |
week_offset = 25 | |
flu['Week'] = ((flu['Week'] + week_offset) % 52) - week_offset | |
cov['week'] = ((cov['week'] + week_offset) % 52) - week_offset | |
# Flu data goes back to 2014, so for each week, find the best and worst cases | |
# across all years | |
min_flu_deaths = flu.groupby('Week').min()['Influenza Deaths'].iloc[:52] | |
max_flu_deaths = flu.groupby('Week').max()['Influenza Deaths'].iloc[:52] | |
# Select US data from covid19 dataset | |
cov_us = cov[cov['location_name']=='United States of America'] | |
# Covid19 data is tracked per day, so add up death counts for each week | |
cov_us_deaths_upper = cov_us.groupby('week').sum()['deaths_upper'] | |
cov_us_deaths_lower = cov_us.groupby('week').sum()['deaths_lower'] | |
# Remove future predictions from the plot; only show historical data | |
cov_us_deaths = cov_us_deaths_upper[cov_us_deaths_upper==cov_us_deaths_lower] | |
# Initialize plot | |
pg.setConfigOption('background', 'w') | |
pg.setConfigOption('foreground', 'k') | |
plt = pg.plot(labels={'left': 'US Deaths Per Week'}) | |
# Add month labels to bottom axis | |
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] | |
month_vals = np.array([datetime(year=2020, month=m, day=1).timetuple().tm_yday / 7 for m in range(1, 13)]) | |
month_vals = ((month_vals + week_offset) % 52) - week_offset | |
plt.getAxis('bottom').setTicks([list(zip(month_vals, month_names))]) | |
# Plot worst-case flu deaths | |
c2 = plt.plot(max_flu_deaths.index.to_numpy(), max_flu_deaths.to_numpy(), antialias=True, fillLevel=0, fillBrush=(100, 100, 0, 200), pen='k') | |
# Plot covid-19 deaths | |
c1 = plt.plot(cov_us_deaths.index.to_numpy(), cov_us_deaths.to_numpy(), antialias=True, fillLevel=0, fillBrush=(200, 50, 0, 200), pen='k') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment