Skip to content

Instantly share code, notes, and snippets.

@graham-thomson
Created April 22, 2020 23:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save graham-thomson/08791e9faf705d6bbdf0188b9c0a9c3b to your computer and use it in GitHub Desktop.
Save graham-thomson/08791e9faf705d6bbdf0188b9c0a9c3b to your computer and use it in GitHub Desktop.
# coding: utf-8
# ## Plotting COVID19 Data in the US
# In[ ]:
get_ipython().run_cell_magic('bash', '', '\nCOVID_DATA_DIR=./covid-19-data/\n\nif [ ! -d ${COVID_DATA_DIR} ]; then\n git clone https://github.com/nytimes/covid-19-data.git\nelse\n cd ${COVID_DATA_DIR} && git pull\nfi')
# In[ ]:
get_ipython().run_line_magic('matplotlib', 'inline')
import plotly.express as px
import pandas as pd
import numpy as np
import plotly.io as pio
import seaborn as sns
import datetime as dt
import matplotlib.pyplot as plt
import requests
from io import StringIO
from IPython.display import Markdown
sns.set()
today = dt.datetime.now()
states = pd.read_csv(
"./covid-19-data/us-states.csv",
header=0,
parse_dates=["date"]
)
counties = pd.read_csv(
"./covid-19-data/us-counties.csv",
header=0,
parse_dates=["date"]
).rename(columns={"state": "state_name"})
# ### Summary Metrics
# In[ ]:
total_cases_deaths = states.groupby("state").agg({"cases": "max", "deaths": "max"})
Markdown(data=f"""
## TOTAL US CASES: {total_cases_deaths['cases'].sum():,}
## TOTAL US DEATHS: {total_cases_deaths['deaths'].sum():,}
_latest data date: {states['date'].max()}_
_updated on: {today}_
""")
# ### Time Series Cases/Deaths
# In[ ]:
cases = px.line(
data_frame=states,
x="date",
y="cases",
color="state",
line_group="state",
hover_name="state",
line_shape="spline",
render_mode="svg",
log_y=True
)
cases.update_layout(
title=f"COVID-19 Cases by US State ({states['date'].min().date()} - {states['date'].max().date()})"
)
cases.show()
# In[ ]:
deaths = px.line(
data_frame=states,
x="date",
y="deaths",
color="state",
line_group="state",
hover_name="state",
line_shape="spline",
render_mode="svg",
log_y=True
)
deaths.update_layout(
title=f"COVID-19 Deaths by US State ({states['date'].min().date()} - {states['date'].max().date()})"
)
deaths.show()
# ### Mapping Cases by County
# In[ ]:
# found this set of state names to abbreviations
state_lookup = pd.read_csv(
"https://raw.githubusercontent.com/jasonong/List-of-US-States/master/states.csv"
).rename(columns={"State": "state_name", "Abbreviation": "state"})
state_lookup.head(1)
# In[ ]:
# thanks obama
county_lookup_data = StringIO(
requests.get("https://data.healthcare.gov/api/views/52wv-g36k/rows.csv?accessType=DOWNLOAD&sorting=true")\
.content.decode()
)
county_lookup = pd.read_csv(county_lookup_data)
county_lookup.head(1)
# In[ ]:
# inner join will cause us to lose US territories etc.
county_lookup = county_lookup.merge(state_lookup, on="state", how="inner")
county_lookup.head(1)
# In[ ]:
# average lat logs by state
state_lookup_mean = county_lookup.groupby(["state_name"]).agg({"latitude": "mean", "longitude": "mean"}).reset_index()
# In[ ]:
# average lat logs by county/state
county_lookup_mean = county_lookup.groupby(["county", "state_name"]).agg({"latitude": "mean", "longitude": "mean"}).reset_index()
# In[ ]:
# get max cases (i.e. the most recent number)
counties_total_cases = counties.groupby(["county", "state_name"]).agg({"cases": "max"}).reset_index()
# In[ ]:
# join to our county metadata
lat_long_cases = counties_total_cases.merge(
county_lookup_mean,
on=["county", "state_name"],
how="left"
).sort_values('state_name')
for i in range(len(lat_long_cases)):
if lat_long_cases.loc[i, "county"] == "Unknown" and pd.isna(lat_long_cases.loc[i, "latitude"]):
i_state_name = lat_long_cases.loc[i, "state_name"]
if i_state_name in set(state_lookup_mean["state_name"]):
lat_long_cases.loc[i, "latitude"] = float(state_lookup_mean[state_lookup_mean["state_name"] == i_state_name]["latitude"])
lat_long_cases.loc[i, "longitude"] = float(state_lookup_mean[state_lookup_mean["state_name"] == i_state_name]["longitude"])
def set_lat_long(county, state_name, lat, long):
lat_long_cases.loc[
(lat_long_cases["county"] == county) &
(lat_long_cases["state_name"] == state_name), "latitude"
] = lat
lat_long_cases.loc[
(lat_long_cases["county"] == county) &
(lat_long_cases["state_name"] == state_name), "longitude"
] = long
# set some by hand
set_lat_long("New York City", "New York", 40.7128072, -74.0056247)
set_lat_long("Orleans", "Louisiana", 30.0326996, -90.1627533)
set_lat_long("Jefferson", "Louisiana", 29.9589877, -90.203087)
set_lat_long("DuPage", "Illinois", 41.8398502, -88.2288021)
set_lat_long("DeKalb", "Georgia", 33.7929946, -84.3270541)
set_lat_long("East Baton Rouge", "Louisiana", 30.5161109, -91.2204589)
set_lat_long("Baltimore city", "Maryland", 39.2848183, -76.6905369)
set_lat_long("Caddo", "Louisiana", 32.6074573, -94.3130353)
set_lat_long("Unknown", "Puerto Rico", 18.2078371, -67.7072376)
set_lat_long("St. Tammany", "Louisiana", 30.4257161, -90.1605642)
set_lat_long("St. Louis city", "Missouri", 38.6532851, -90.383547)
lat_long_cases.head(1)
# In[ ]:
# plot
mapbox_token = "abc123"
px.set_mapbox_access_token(mapbox_token)
county_map_plot = px.scatter_mapbox(
data_frame=lat_long_cases,
lat="latitude",
lon="longitude",
size="cases",
color="state_name",
hover_data=lat_long_cases.columns,
color_continuous_scale=px.colors.cyclical.Twilight,
size_max=40,
zoom=3
)
county_map_plot.update_layout(
title=f"Total COVID-19 Cases by US County (as of {states['date'].max().date()})"
)
county_map_plot.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment