Skip to content

Instantly share code, notes, and snippets.

@clamytoe
Last active June 3, 2020 10:23
Show Gist options
  • Save clamytoe/1ce5baa3260920933ff8853210d1e810 to your computer and use it in GitHub Desktop.
Save clamytoe/1ce5baa3260920933ff8853210d1e810 to your computer and use it in GitHub Desktop.
Daily COVID plot generator.
#!/usr/bin/env python3
from urllib.error import URLError
from pathlib import Path
import click
import matplotlib.pyplot as plt # type: ignore
import pandas as pd # type: ignore
import seaborn as sns # type: ignore
from us_state_abbrev import us_state_abbrev
__version__: str = "0.1.1"
IMG_DIR = Path.home() / "Pictures" / "covid"
URL: str = (
"https://github.com/COVID19Tracking/covid-tracking-data/"
"raw/master/data/states_daily_4pm_et.csv"
)
US_STATES = {abbrev: state for state, abbrev in us_state_abbrev.items()}
@click.command()
@click.option(
"--state", "-s", default="tx", help="State abbreviation to generate plot for", show_default=True,
)
@click.version_option(version=__version__)
def main(state: str) -> None:
"""Entry point into the program.
Arguments:
state {str} -- Two letter identifier for the US State.
"""
if state.upper() in US_STATES:
df = gather_stats(state.upper())
generate_plot(df, state)
else:
raise ValueError(f"{state} is not a valid US State abbreviation!")
def gather_stats(state: str) -> pd.DataFrame:
"""Gather the data from online resource.
Arguments:
state {str} -- Two letter identifier for the US State. For example, if you
wanted to generate a plot for Texas, the identifier would be `tx`.
Returns:
pd.DataFrame -- Pandas DataFrame with the parsed and formatted data
"""
try:
df = pd.read_csv(URL, parse_dates=["date", "dateChecked"])
except URLError:
print("Temporary failure in name resolution")
exit()
target = df[df["state"] == state].copy()
unwanted = [
"pending",
"hospitalized",
"hospitalizedCumulative",
"inIcuCurrently",
"inIcuCumulative",
"onVentilatorCurrently",
"onVentilatorCumulative",
"hash",
"dateChecked",
"totalTestResults",
"posNeg",
"fips",
"negativeIncrease",
"totalTestResultsIncrease",
"state",
"negative",
"total",
]
target = target.drop(columns=unwanted)
target = target.set_index(["date"])
target["hospitalizedIncrease"] = target.sort_index()["hospitalizedCurrently"].diff()
target["recoveredIncrease"] = target.sort_index()["recovered"].diff()
return target
def generate_plot(df: pd.DataFrame, state: str) -> None:
"""Generate the plot.
Arguments:
df {pd.DataFrame} -- Pandas DataFrame with the data to plot
state {str} -- Two letter identifier for the US States
"""
data = df[["positiveIncrease", "recoveredIncrease", "deathIncrease"]].copy()
latest = list(df.head(1).itertuples(index=False))
latest_pos = int(latest[0].positive)
latest_dod = int(latest[0].death)
try:
latest_rec = int(latest[0].recovered)
except ValueError:
latest_rec = 0
data.columns = [
f"positive ({latest_pos:,})",
f"recovered ({latest_rec:,})",
f"death ({latest_dod:,})",
]
if latest_rec == 0:
data.drop(f"recovered ({latest_rec:,})", axis=1, inplace=True)
day = data.shape[0]
dims = (20, 10)
fig, ax = plt.subplots(figsize=dims)
sns.set_context("poster")
sns.set_style("white")
sns.despine()
sns.lineplot(ax=ax, data=data)
ax.set_title(f"{US_STATES[state.upper()]} COVID-19: Day {day}", loc="left", fontsize=32)
ax.set(xlabel="", ylabel="change +/-")
plt.legend(loc="upper left")
plt.xticks(rotation=45)
image = IMG_DIR.joinpath(f"{state}-covid_{day}.png")
plt.savefig(str(image), dpi=300)
click.echo(f"Image saved to: {image}")
if __name__ == "__main__":
main()
name: dataviz
channels:
- conda-forge
- defaults
dependencies:
- bokeh
- ipykernel
- jupyterlab
- line_profiler
- matplotlib
- nb_conda_kernels
- numpy
- pandas
- pip
- pyglet
- python>=3.8
- seaborn
- squarify
- pip:
- geoplotlib
# United States of America Python Dictionary to translate States,
# Districts & Territories to Two-Letter codes and vice versa.
#
# https://gist.github.com/rogerallen/1583593
#
# Dedicated to the public domain. To the extent possible under law,
# Roger Allen has waived all copyright and related or neighboring
# rights to this code.
us_state_abbrev = {
'Alabama': 'AL',
'Alaska': 'AK',
'American Samoa': 'AS',
'Arizona': 'AZ',
'Arkansas': 'AR',
'California': 'CA',
'Colorado': 'CO',
'Connecticut': 'CT',
'Delaware': 'DE',
'District of Columbia': 'DC',
'Florida': 'FL',
'Georgia': 'GA',
'Guam': 'GU',
'Hawaii': 'HI',
'Idaho': 'ID',
'Illinois': 'IL',
'Indiana': 'IN',
'Iowa': 'IA',
'Kansas': 'KS',
'Kentucky': 'KY',
'Louisiana': 'LA',
'Maine': 'ME',
'Maryland': 'MD',
'Massachusetts': 'MA',
'Michigan': 'MI',
'Minnesota': 'MN',
'Mississippi': 'MS',
'Missouri': 'MO',
'Montana': 'MT',
'Nebraska': 'NE',
'Nevada': 'NV',
'New Hampshire': 'NH',
'New Jersey': 'NJ',
'New Mexico': 'NM',
'New York': 'NY',
'North Carolina': 'NC',
'North Dakota': 'ND',
'Northern Mariana Islands':'MP',
'Ohio': 'OH',
'Oklahoma': 'OK',
'Oregon': 'OR',
'Pennsylvania': 'PA',
'Puerto Rico': 'PR',
'Rhode Island': 'RI',
'South Carolina': 'SC',
'South Dakota': 'SD',
'Tennessee': 'TN',
'Texas': 'TX',
'Utah': 'UT',
'Vermont': 'VT',
'Virgin Islands': 'VI',
'Virginia': 'VA',
'Washington': 'WA',
'West Virginia': 'WV',
'Wisconsin': 'WI',
'Wyoming': 'WY'
}
# thank you to @kinghelix and @trevormarburger for this idea
abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))
# Simple test examples
if __name__ == '__main__':
print("Wisconin --> WI?", us_state_abbrev['Wisconsin'] == 'WI')
print("WI --> Wisconin?", abbrev_us_state['WI'] == 'Wisconsin')
print("Number of entries (50 states, DC, 5 Territories) == 56? ", 56 == len(us_state_abbrev))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment