Created
April 22, 2020 23:28
-
-
Save graham-thomson/08791e9faf705d6bbdf0188b9c0a9c3b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# ## Plotting COVID19 Data in the US | |
# In[ ]: | |
get_ipython().run_cell_magic('bash', '', '\nCOVID_DATA_DIR=./covid-19-data/\n\nif [ ! -d ${COVID_DATA_DIR} ]; then\n git clone https://github.com/nytimes/covid-19-data.git\nelse\n cd ${COVID_DATA_DIR} && git pull\nfi') | |
# In[ ]: | |
get_ipython().run_line_magic('matplotlib', 'inline') | |
import plotly.express as px | |
import pandas as pd | |
import numpy as np | |
import plotly.io as pio | |
import seaborn as sns | |
import datetime as dt | |
import matplotlib.pyplot as plt | |
import requests | |
from io import StringIO | |
from IPython.display import Markdown | |
sns.set() | |
today = dt.datetime.now() | |
states = pd.read_csv( | |
"./covid-19-data/us-states.csv", | |
header=0, | |
parse_dates=["date"] | |
) | |
counties = pd.read_csv( | |
"./covid-19-data/us-counties.csv", | |
header=0, | |
parse_dates=["date"] | |
).rename(columns={"state": "state_name"}) | |
# ### Summary Metrics | |
# In[ ]: | |
total_cases_deaths = states.groupby("state").agg({"cases": "max", "deaths": "max"}) | |
Markdown(data=f""" | |
## TOTAL US CASES: {total_cases_deaths['cases'].sum():,} | |
## TOTAL US DEATHS: {total_cases_deaths['deaths'].sum():,} | |
_latest data date: {states['date'].max()}_ | |
_updated on: {today}_ | |
""") | |
# ### Time Series Cases/Deaths | |
# In[ ]: | |
cases = px.line( | |
data_frame=states, | |
x="date", | |
y="cases", | |
color="state", | |
line_group="state", | |
hover_name="state", | |
line_shape="spline", | |
render_mode="svg", | |
log_y=True | |
) | |
cases.update_layout( | |
title=f"COVID-19 Cases by US State ({states['date'].min().date()} - {states['date'].max().date()})" | |
) | |
cases.show() | |
# In[ ]: | |
deaths = px.line( | |
data_frame=states, | |
x="date", | |
y="deaths", | |
color="state", | |
line_group="state", | |
hover_name="state", | |
line_shape="spline", | |
render_mode="svg", | |
log_y=True | |
) | |
deaths.update_layout( | |
title=f"COVID-19 Deaths by US State ({states['date'].min().date()} - {states['date'].max().date()})" | |
) | |
deaths.show() | |
# ### Mapping Cases by County | |
# In[ ]: | |
# found this set of state names to abbreviations | |
state_lookup = pd.read_csv( | |
"https://raw.githubusercontent.com/jasonong/List-of-US-States/master/states.csv" | |
).rename(columns={"State": "state_name", "Abbreviation": "state"}) | |
state_lookup.head(1) | |
# In[ ]: | |
# thanks obama | |
county_lookup_data = StringIO( | |
requests.get("https://data.healthcare.gov/api/views/52wv-g36k/rows.csv?accessType=DOWNLOAD&sorting=true")\ | |
.content.decode() | |
) | |
county_lookup = pd.read_csv(county_lookup_data) | |
county_lookup.head(1) | |
# In[ ]: | |
# inner join will cause us to lose US territories etc. | |
county_lookup = county_lookup.merge(state_lookup, on="state", how="inner") | |
county_lookup.head(1) | |
# In[ ]: | |
# average lat logs by state | |
state_lookup_mean = county_lookup.groupby(["state_name"]).agg({"latitude": "mean", "longitude": "mean"}).reset_index() | |
# In[ ]: | |
# average lat logs by county/state | |
county_lookup_mean = county_lookup.groupby(["county", "state_name"]).agg({"latitude": "mean", "longitude": "mean"}).reset_index() | |
# In[ ]: | |
# get max cases (i.e. the most recent number) | |
counties_total_cases = counties.groupby(["county", "state_name"]).agg({"cases": "max"}).reset_index() | |
# In[ ]: | |
# join to our county metadata | |
lat_long_cases = counties_total_cases.merge( | |
county_lookup_mean, | |
on=["county", "state_name"], | |
how="left" | |
).sort_values('state_name') | |
for i in range(len(lat_long_cases)): | |
if lat_long_cases.loc[i, "county"] == "Unknown" and pd.isna(lat_long_cases.loc[i, "latitude"]): | |
i_state_name = lat_long_cases.loc[i, "state_name"] | |
if i_state_name in set(state_lookup_mean["state_name"]): | |
lat_long_cases.loc[i, "latitude"] = float(state_lookup_mean[state_lookup_mean["state_name"] == i_state_name]["latitude"]) | |
lat_long_cases.loc[i, "longitude"] = float(state_lookup_mean[state_lookup_mean["state_name"] == i_state_name]["longitude"]) | |
def set_lat_long(county, state_name, lat, long): | |
lat_long_cases.loc[ | |
(lat_long_cases["county"] == county) & | |
(lat_long_cases["state_name"] == state_name), "latitude" | |
] = lat | |
lat_long_cases.loc[ | |
(lat_long_cases["county"] == county) & | |
(lat_long_cases["state_name"] == state_name), "longitude" | |
] = long | |
# set some by hand | |
set_lat_long("New York City", "New York", 40.7128072, -74.0056247) | |
set_lat_long("Orleans", "Louisiana", 30.0326996, -90.1627533) | |
set_lat_long("Jefferson", "Louisiana", 29.9589877, -90.203087) | |
set_lat_long("DuPage", "Illinois", 41.8398502, -88.2288021) | |
set_lat_long("DeKalb", "Georgia", 33.7929946, -84.3270541) | |
set_lat_long("East Baton Rouge", "Louisiana", 30.5161109, -91.2204589) | |
set_lat_long("Baltimore city", "Maryland", 39.2848183, -76.6905369) | |
set_lat_long("Caddo", "Louisiana", 32.6074573, -94.3130353) | |
set_lat_long("Unknown", "Puerto Rico", 18.2078371, -67.7072376) | |
set_lat_long("St. Tammany", "Louisiana", 30.4257161, -90.1605642) | |
set_lat_long("St. Louis city", "Missouri", 38.6532851, -90.383547) | |
lat_long_cases.head(1) | |
# In[ ]: | |
# plot | |
mapbox_token = "abc123" | |
px.set_mapbox_access_token(mapbox_token) | |
county_map_plot = px.scatter_mapbox( | |
data_frame=lat_long_cases, | |
lat="latitude", | |
lon="longitude", | |
size="cases", | |
color="state_name", | |
hover_data=lat_long_cases.columns, | |
color_continuous_scale=px.colors.cyclical.Twilight, | |
size_max=40, | |
zoom=3 | |
) | |
county_map_plot.update_layout( | |
title=f"Total COVID-19 Cases by US County (as of {states['date'].max().date()})" | |
) | |
county_map_plot.show() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment