Skip to content

Instantly share code, notes, and snippets.

@mcohen01
Last active June 13, 2020 17:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mcohen01/355c99dc7934df95957eb25d950568a9 to your computer and use it in GitHub Desktop.
Save mcohen01/355c99dc7934df95957eb25d950568a9 to your computer and use it in GitHub Desktop.
import math
import os
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import us as us_states
START_DATE = pd.to_datetime('2020-02-22')
url = './COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
ts = pd.read_csv(url)
ts = ts[(ts.FIPS > 1000) & (ts.FIPS < 80000)]
ts.FIPS = ts.FIPS.astype(int)
cols_to_drop = ['UID', 'iso2', 'iso3', 'code3', 'Admin2', 'Province_State', 'Lat', 'Long_', 'Country_Region', 'Combined_Key']
ts = ts.drop(cols_to_drop, axis=1).melt(id_vars=['FIPS'])
# convert to a DateTime object so we can do math on dates
ts.loc[:, 'Date'] = pd.to_datetime(ts.variable)
ts.columns = ['FIPS', 'variable', 'Cases', 'Date']
ts.index = ts.Date
ts.index.name = None
ts = ts[ts.Date > START_DATE]
ts.loc[:, 'Day'] = (ts.Date - START_DATE).dt.days
ts.drop(['variable'], axis=1, inplace=True)
def difference(fip, df):
df.loc[:, 'Cases'] = df.Cases.diff().apply(lambda x: max(x, 0))
df.iloc[0, 1] = 0
df.loc[:, 'Cases'] = df.Cases.astype(int)
all_counties = pd.DataFrame()
for fip in ts.FIPS.unique():
df = ts[ts.FIPS == fip].copy()
difference(fip, df)
all_counties = pd.concat([all_counties, df])
all_counties.drop(['Date'], axis=1).to_csv('fips_cases.csv', index=False)
def good_states(x):
non_states = ['Evacuee', 'Islands', 'Recovered', 'Princess', 'Guam', 'Samoa']
return x is not None and x.split(' ')[-1] not in non_states
def lookup_state(x):
s = us_states.states.lookup(x.split(',')[-1].strip())
if s is not None:
return s.name
return None
def parse_daily_reports(path):
states = None
for f in sorted(os.listdir(path)):
if f[-3:] == 'csv':
df = pd.read_csv(path + f)
df['Date'] = pd.to_datetime(f[0:-4])
try:
us = df[df.Country_Region == 'US']
except:
us = df[df['Country/Region'] == 'US']
us['Province_State'] = us['Province/State'].apply(lookup_state)
idx = us['Province_State'].apply(good_states)
us = us[idx]
if states is None:
states = us
else:
states = pd.concat([states, us])
return states
def daily_new_cases(state):
if state:
df = all_states_df[all_states_df.Province_State == state]
else:
df = all_states_df
START_DATE = pd.to_datetime('2020-02-22')
df = df[df.Date > START_DATE]
diff = df.groupby(by='Date').Confirmed.sum().diff().apply(lambda x: max(x, 0))
tail = diff.rolling(10).mean().tail(20)
slope = stats.linregress(range(len(tail)), tail).slope
return slope, diff, state
all_states_df = parse_daily_reports('./COVID-19/csse_covid_19_data/csse_covid_19_daily_reports/')
us = daily_new_cases(None)[1]
us.values[0] = 0
pd.DataFrame({
'cumulative': us.cumsum(),
'daily': us
}).to_csv('us_cases.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment