mtavkhelidze/us_covid_19_stats.py

## us_covid_19_stats.py
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame
from pandas.io.common import urlopen
import numpy as np

URL = "https://covidtracking.com/api/v1/states/daily.json"
ALPHA = 0.1
LOGARITHMIC = False


def fetch_data(*, url: str) -> DataFrame:
    response = urlopen(url)
    return pd.read_json(response, convert_dates=["date", "dateChecked"])


def calc_ema(*, col_name: str, d: DataFrame, alpha: float = 0.5) -> None:
    ema_name = f"{col_name}_ema_{alpha}"
    d[ema_name] = d[col_name].ewm(alpha=alpha, adjust=True).mean()


# noinspection PyShadowingNames
def get_data(*, url=URL, alpha=ALPHA) -> DataFrame:
    df = fetch_data(url=url)
    df["date"] = pd.to_datetime(df["date"], format="%Y%m%d")
    df = df.groupby(by="date").sum()

    calc_ema(col_name="death", d=df, alpha=alpha)
    calc_ema(col_name="positive", d=df, alpha=alpha)
    calc_ema(col_name="negative", d=df, alpha=alpha)
    calc_ema(col_name="recovered", d=df, alpha=alpha)
    calc_ema(col_name="hospitalized", d=df, alpha=alpha)
    calc_ema(col_name="inIcuCurrently", d=df, alpha=alpha)
    return df


# noinspection PyShadowingNames
def create_fig(*, df: DataFrame, log=LOGARITHMIC, alpha=ALPHA):
    plt.grid(True)

    plt.bar(
        df.index,
        df["totalTestResults"],
        label="Total Test Results",
        color="tab:grey",
        log=log,
    )
    plt.plot(
        df[f"positive_ema_{alpha}"], label=f"Positive EMA α={alpha}", color="blue", lw=3
    )
    # plt.plot(
    #     df[f"recovered_ema_{alpha}"],
    #     label=f"Positive EMA α={alpha}",
    #     color="black",
    #     lw=3,
    # )
    plt.plot(
        df[f"negative_ema_{alpha}"],
        label=f"Negative EMA α={alpha}",
        color="green",
        lw=3,
    )
    plt.plot(df[f"death_ema_{alpha}"], label=f"Deaths EMA α={alpha}", color="red", lw=3)
    plt.plot(
        df[f"hospitalized_ema_{alpha}"],
        label=f"In Hospitals EMA α={alpha}",
        color="yellow",
        lw=3,
    )
    plt.plot(
        df[f"inIcuCurrently_ema_{alpha}"],
        label=f"In ICU EMA α={alpha}",
        color="purple",
        lw=3,
    )

    upto = df.index[-1].strftime("%d %b, %Y")
    plt.suptitle(f"The U.S. COVID-19 Stats as of {upto}{' (logarithmic)' if log else ''}")
    plt.gca().set_ylabel(f"Number of people{' (logarithmic)' if log else ''}")
    plt.gca().set_xlabel("Days")
    plt.annotate(
        "Source: covidtracking.com",
        xy=(1, 0),
        xycoords=("axes fraction", "figure fraction"),
        xytext=(0, 10),
        textcoords="offset points",
        ha="right",
        va="bottom",
    )
    plt.legend(loc="upper left")
    plt.gcf().set_size_inches([12.8, 9.6])
    plt.tight_layout(rect=[0, 0.03, 1, 0.97])


def calc_cors(*, d: DataFrame, method="pearson"):
    return {
            "Positive/Death": d["positive"].corr(d["death"], method=method),
            "Positive/Negative": d["positive"].corr(d["negative"], method=method),
            "Total/Negative": d["totalTestResults"].corr(d["negative"], method=method),
            "Total/Positive": d["totalTestResults"].corr(d["positive"], method=method),
    }


df = get_data()
print(
    df[
        [
            "totalTestResults",
            "positive",
            "negative",
            "death",
            "hospitalized",
            "inIcuCurrently",
            "recovered",
        ]
    ]
)
corr_method = "pearson"
print(f"Correlations ({corr_method.capitalize()})")
for name, value in calc_cors(d=df, method=corr_method).items():
    print(f"  {name}: {np.round(value, 4)}")

create_fig(df=df, log=LOGARITHMIC)
plt.savefig(f"us-stats{'-log' if LOGARITHMIC else ''}")
	import matplotlib.pyplot as plt
	import pandas as pd
	from pandas import DataFrame
	from pandas.io.common import urlopen
	import numpy as np

	URL = "https://covidtracking.com/api/v1/states/daily.json"
	ALPHA = 0.1
	LOGARITHMIC = False


	def fetch_data(*, url: str) -> DataFrame:
	response = urlopen(url)
	return pd.read_json(response, convert_dates=["date", "dateChecked"])


	def calc_ema(*, col_name: str, d: DataFrame, alpha: float = 0.5) -> None:
	ema_name = f"{col_name}_ema_{alpha}"
	d[ema_name] = d[col_name].ewm(alpha=alpha, adjust=True).mean()


	# noinspection PyShadowingNames
	def get_data(*, url=URL, alpha=ALPHA) -> DataFrame:
	df = fetch_data(url=url)
	df["date"] = pd.to_datetime(df["date"], format="%Y%m%d")
	df = df.groupby(by="date").sum()

	calc_ema(col_name="death", d=df, alpha=alpha)
	calc_ema(col_name="positive", d=df, alpha=alpha)
	calc_ema(col_name="negative", d=df, alpha=alpha)
	calc_ema(col_name="recovered", d=df, alpha=alpha)
	calc_ema(col_name="hospitalized", d=df, alpha=alpha)
	calc_ema(col_name="inIcuCurrently", d=df, alpha=alpha)
	return df


	# noinspection PyShadowingNames
	def create_fig(*, df: DataFrame, log=LOGARITHMIC, alpha=ALPHA):
	plt.grid(True)

	plt.bar(
	df.index,
	df["totalTestResults"],
	label="Total Test Results",
	color="tab:grey",
	log=log,
	)
	plt.plot(
	df[f"positive_ema_{alpha}"], label=f"Positive EMA α={alpha}", color="blue", lw=3
	)
	# plt.plot(
	# df[f"recovered_ema_{alpha}"],
	# label=f"Positive EMA α={alpha}",
	# color="black",
	# lw=3,
	# )
	plt.plot(
	df[f"negative_ema_{alpha}"],
	label=f"Negative EMA α={alpha}",
	color="green",
	lw=3,
	)
	plt.plot(df[f"death_ema_{alpha}"], label=f"Deaths EMA α={alpha}", color="red", lw=3)
	plt.plot(
	df[f"hospitalized_ema_{alpha}"],
	label=f"In Hospitals EMA α={alpha}",
	color="yellow",
	lw=3,
	)
	plt.plot(
	df[f"inIcuCurrently_ema_{alpha}"],
	label=f"In ICU EMA α={alpha}",
	color="purple",
	lw=3,
	)

	upto = df.index[-1].strftime("%d %b, %Y")
	plt.suptitle(f"The U.S. COVID-19 Stats as of {upto}{' (logarithmic)' if log else ''}")
	plt.gca().set_ylabel(f"Number of people{' (logarithmic)' if log else ''}")
	plt.gca().set_xlabel("Days")
	plt.annotate(
	"Source: covidtracking.com",
	xy=(1, 0),
	xycoords=("axes fraction", "figure fraction"),
	xytext=(0, 10),
	textcoords="offset points",
	ha="right",
	va="bottom",
	)
	plt.legend(loc="upper left")
	plt.gcf().set_size_inches([12.8, 9.6])
	plt.tight_layout(rect=[0, 0.03, 1, 0.97])


	def calc_cors(*, d: DataFrame, method="pearson"):
	return {
	"Positive/Death": d["positive"].corr(d["death"], method=method),
	"Positive/Negative": d["positive"].corr(d["negative"], method=method),
	"Total/Negative": d["totalTestResults"].corr(d["negative"], method=method),
	"Total/Positive": d["totalTestResults"].corr(d["positive"], method=method),
	}


	df = get_data()
	print(
	df[
	[
	"totalTestResults",
	"positive",
	"negative",
	"death",
	"hospitalized",
	"inIcuCurrently",
	"recovered",
	]
	]
	)
	corr_method = "pearson"
	print(f"Correlations ({corr_method.capitalize()})")
	for name, value in calc_cors(d=df, method=corr_method).items():
	print(f" {name}: {np.round(value, 4)}")

	create_fig(df=df, log=LOGARITHMIC)
	plt.savefig(f"us-stats{'-log' if LOGARITHMIC else ''}")