Skip to content

Instantly share code, notes, and snippets.

@joelanders
Created February 9, 2021 11:54
Show Gist options
  • Save joelanders/ecd80e77a57a99c559c4e252862b4607 to your computer and use it in GitHub Desktop.
Save joelanders/ecd80e77a57a99c559c4e252862b4607 to your computer and use it in GitHub Desktop.
from datetime import datetime
from datetime import timedelta
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import statsmodels.api as sm
import math
# df = pd.read_csv('owid-covid-data.csv')
df = pd.read_csv('vaccinations.csv')
df = df[df.date >= "2021-01-01 00:00:00.000"]
df = df[df.date <= "2021-02-08 00:00:00.000"]
df['day_of_year'] = [(datetime.strptime(d, "%Y-%m-%d") - datetime(2021,1,1)).days for d in df['date']]
df['daily_vaccinations_per_hundred'] = (1.0 / 10_000) * df['daily_vaccinations_per_million']
# XXX sloppy way to set the x scale
x_days = 250
x_max_date = datetime(2021,1,1) + timedelta(days=x_days)
fig = go.Figure(
layout_yaxis_range=[0,4],
layout_xaxis_range=["2021-01-01 00:00:00.0000", x_max_date],
layout_yaxis_title_text="daily vaccinations per hundred (7-day smoothed)",
layout_title_text="time to X% of population vaccinated",
layout_title_x=0.5,
)
# for country_index, country in enumerate(['United Kingdom', 'United States', 'Italy', 'France', 'Germany']):
for country_index, country in enumerate(['United Kingdom', 'United States', 'Germany']):
country_df = df[df.location == country]
country_df = country_df.dropna(subset=['daily_vaccinations_per_hundred'])
x = sm.add_constant(country_df['day_of_year'])
model = sm.OLS(country_df['daily_vaccinations_per_hundred'], x)
results = model.fit()
print(results.params)
b = results.params[0]
m = results.params[1]
# Area of triangle is H * W / 2
# y = mx + b
# A(x) = x * (mx + b) / 2
# solve for x via quadratic formula
# i'm assuming everyone starts at ~0 vaccinations before the line starts...
for p in [50, 100, 200]:
x_p = (((-1.0 * b) / 2) + math.sqrt(((b * b) / 4) + (2 * m * p)) / m)
date_p = datetime(2021,1,1) + timedelta(days=x_p)
print(f"time to hit {p}%: {x_p} days")
# vertical line marker
fig.add_shape(
type="line",
x0=date_p,
y0=0,
x1=date_p,
y1=((m * x_p) + b),
line_color=px.colors.qualitative.Dark2[country_index],
)
# text label for vertical line
fig.add_trace(
go.Scatter(
x=[date_p],
y=[((m * x_p) + b)],
text=[f"{country} {p}%"],
textfont_color=px.colors.qualitative.Dark2[country_index],
textfont_size=10,
mode="text",
showlegend=False,
)
)
# scatter plot for data
fig.add_trace(
go.Scatter(
x=country_df['date'],
y=country_df['daily_vaccinations_per_hundred'],
mode="markers",
marker_color=px.colors.qualitative.Dark2[country_index],
name=country,
showlegend=False,
)
)
# extrapolated line
fig.add_trace(
go.Scatter(
x=["2021-01-01 00:00:00.0000", x_max_date],
y=[results.params[0], (m*x_days + b)],
mode="lines",
marker_color=px.colors.qualitative.Dark2[country_index],
name=country,
showlegend=True,
)
)
fig.show()
@joelanders
Copy link
Author

Screenshot 2021-02-09 at 12 53 17

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment