Skip to content

Instantly share code, notes, and snippets.

@mtavkhelidze
Created April 12, 2020 10:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mtavkhelidze/4cb4396b1337b47af717bdefe5098efc to your computer and use it in GitHub Desktop.
Save mtavkhelidze/4cb4396b1337b47af717bdefe5098efc to your computer and use it in GitHub Desktop.
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame
from pandas.io.common import urlopen
import numpy as np
URL = "https://covidtracking.com/api/v1/states/daily.json"
ALPHA = 0.1
LOGARITHMIC = False
def fetch_data(*, url: str) -> DataFrame:
response = urlopen(url)
return pd.read_json(response, convert_dates=["date", "dateChecked"])
def calc_ema(*, col_name: str, d: DataFrame, alpha: float = 0.5) -> None:
ema_name = f"{col_name}_ema_{alpha}"
d[ema_name] = d[col_name].ewm(alpha=alpha, adjust=True).mean()
# noinspection PyShadowingNames
def get_data(*, url=URL, alpha=ALPHA) -> DataFrame:
df = fetch_data(url=url)
df["date"] = pd.to_datetime(df["date"], format="%Y%m%d")
df = df.groupby(by="date").sum()
calc_ema(col_name="death", d=df, alpha=alpha)
calc_ema(col_name="positive", d=df, alpha=alpha)
calc_ema(col_name="negative", d=df, alpha=alpha)
calc_ema(col_name="recovered", d=df, alpha=alpha)
calc_ema(col_name="hospitalized", d=df, alpha=alpha)
calc_ema(col_name="inIcuCurrently", d=df, alpha=alpha)
return df
# noinspection PyShadowingNames
def create_fig(*, df: DataFrame, log=LOGARITHMIC, alpha=ALPHA):
plt.grid(True)
plt.bar(
df.index,
df["totalTestResults"],
label="Total Test Results",
color="tab:grey",
log=log,
)
plt.plot(
df[f"positive_ema_{alpha}"], label=f"Positive EMA α={alpha}", color="blue", lw=3
)
# plt.plot(
# df[f"recovered_ema_{alpha}"],
# label=f"Positive EMA α={alpha}",
# color="black",
# lw=3,
# )
plt.plot(
df[f"negative_ema_{alpha}"],
label=f"Negative EMA α={alpha}",
color="green",
lw=3,
)
plt.plot(df[f"death_ema_{alpha}"], label=f"Deaths EMA α={alpha}", color="red", lw=3)
plt.plot(
df[f"hospitalized_ema_{alpha}"],
label=f"In Hospitals EMA α={alpha}",
color="yellow",
lw=3,
)
plt.plot(
df[f"inIcuCurrently_ema_{alpha}"],
label=f"In ICU EMA α={alpha}",
color="purple",
lw=3,
)
upto = df.index[-1].strftime("%d %b, %Y")
plt.suptitle(f"The U.S. COVID-19 Stats as of {upto}{' (logarithmic)' if log else ''}")
plt.gca().set_ylabel(f"Number of people{' (logarithmic)' if log else ''}")
plt.gca().set_xlabel("Days")
plt.annotate(
"Source: covidtracking.com",
xy=(1, 0),
xycoords=("axes fraction", "figure fraction"),
xytext=(0, 10),
textcoords="offset points",
ha="right",
va="bottom",
)
plt.legend(loc="upper left")
plt.gcf().set_size_inches([12.8, 9.6])
plt.tight_layout(rect=[0, 0.03, 1, 0.97])
def calc_cors(*, d: DataFrame, method="pearson"):
return {
"Positive/Death": d["positive"].corr(d["death"], method=method),
"Positive/Negative": d["positive"].corr(d["negative"], method=method),
"Total/Negative": d["totalTestResults"].corr(d["negative"], method=method),
"Total/Positive": d["totalTestResults"].corr(d["positive"], method=method),
}
df = get_data()
print(
df[
[
"totalTestResults",
"positive",
"negative",
"death",
"hospitalized",
"inIcuCurrently",
"recovered",
]
]
)
corr_method = "pearson"
print(f"Correlations ({corr_method.capitalize()})")
for name, value in calc_cors(d=df, method=corr_method).items():
print(f" {name}: {np.round(value, 4)}")
create_fig(df=df, log=LOGARITHMIC)
plt.savefig(f"us-stats{'-log' if LOGARITHMIC else ''}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment