Skip to content

Instantly share code, notes, and snippets.

@jasonrahm
Created June 23, 2020 17:50
Show Gist options
  • Save jasonrahm/3e1d6966a46fc62d1d25c94d1f4006f3 to your computer and use it in GitHub Desktop.
Save jasonrahm/3e1d6966a46fc62d1d25c94d1f4006f3 to your computer and use it in GitHub Desktop.
Covid Stats analysis
import csv
from datetime import datetime
from datetime import timedelta
__author__ = 'Jason Rahm'
# Assumes this script is in a <scripts> folder inside a cloned version of https://github.com/CSSEGISandData/COVID-19
# Total Kansas City Metro Area Stats
# Population: 2646933
# Cases: 5159 (per capita: 194.90482003133437 per 100,000 people)
# Deaths: 191 (per capita: 7.215898551266692 per 100,000 people)
# Death Rate by Known Cases (percentage): 3.702267881372359
#
#
# Total St Louis Metro Area Stats
# Population: 2803228
# Cases: 10017 (per capita: 357.3380402878396 per 100,000 people)
# Deaths: 752 (per capita: 26.82621606233956 per 100,000 people)
# Death Rate by Known Cases (percentage): 7.507237695916941
#
#
# Total Combined Kansas City & St Louis Metro Area Stats
# Population: 5450161
# Cases: 15176 (per capita: 278.4504898112184 per 100,000 people)
# Deaths: 943 (per capita: 17.30224116315096 per 100,000 people)
# Death Rate by Known Cases (percentage): 6.21375856615709
# St Louis Metro Statistical Area Counties
# UID, Name
# Illinois Counties
# 84017005, Bond
# 84017013, Calhoun
# 84017027, Clinton
# 84017083, Jersey
# 84017117, Macoupin
# 84017119, Madison
# 84017133, Monroe
# 84017163, St. Clair
# Missouri Counties
# 84029071, Franklin
# 84029099, Jefferson
# 84029113, Lincoln
# 84029183, St. Charles
# 84029189, St. Louis
# 84029510, St. Louis City
# 84029219, Warren
# Kansas City Metro Statistical Area Counties
# FIPS, Name
# Kansas Counties
# 84020091, Johnson
# 84020103, Leavenworth
# 84020107, Linn
# 84020121, Miami
# 84020209, Wyandotte
# Missouri Counties
# 84029013, Bates
# 84029025, Caldwell
# 84029037, Cass
# 84029047, Clay
# 84029049, Clinton
# 84029107, Lafayette
# 84029095, Jackson
# 84070003, Kansas City
# 84029165, Platte
# 84029177, Ray
stl_msa = [84017005,
84017013,
84017027,
84017083,
84017117,
84017119,
84017133,
84017163,
84029071,
84029099,
84029113,
84029183,
84029189,
84029510,
84029219]
kc_msa = [84020091,
84020103,
84020107,
84020121,
84020209,
84029013,
84029025,
84029037,
84029047,
84029049,
84029107,
84029095,
84029165,
84029177,
84070003]
s_stl = set(stl_msa)
s_kc = set(kc_msa)
all_counties = s_stl.union(s_kc)
# Get keys for today/yesterday
today = datetime.now()
yesterday = today - timedelta(days = 1)
today = today.strftime('%-m/%-d/%y')
yesterday = yesterday.strftime('%-m/%-d/%y')
# TESTING, Need to excuse non-date keys from the sort
# for x in combined_data:
# print(sorted(x.items(), key=lambda x:datetime.strptime(x[0], '%m/%d/%y') ))
def merge(d1, d2):
for k in set({**d1, **d2}):
if k in d1:
v1 = d1[k]
if k in d2:
v2 = d2[k]
if k in ['FIPS', 'Combined_Key', 'Lat', 'Long_', 'Population']:
yield k, (v1 if v1 == v2 else [v1, v2])
else:
yield k, ([v1, v2])
else:
yield k, v1
else:
yield k, d2[k]
def parse_covid_data(csv_file):
with open(csv_file) as f:
rdr = csv.reader(f, delimiter=',')
colnames = next(rdr)
colnames = [colnames[0]] + colnames[8:]
ldata = []
for row in rdr:
if int(row[0]) in all_counties:
row_data = [row[0]] + row[8:]
data_dict = {colnames[i]: row_data[i] for i in range(len(colnames))}
ldata.append(data_dict)
if row[0] == str(max(all_counties)):
break
return ldata
def print_aggregate_stats(population, cases, deaths):
print(f'\tPopulation: {population}')
print(f'\tCases: {cases} (per capita: {100000 * cases / population} per 100,000 people)')
print(f'\tDeaths: {deaths} (per capita: {100000 * deaths / population} per 100,000 people)')
print(f'\tDeath Rate by Known Cases (percentage): {100 * deaths / cases}')
cases_data = parse_covid_data('../csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv')
deaths_data = parse_covid_data('../csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv')
print(deaths_data)
combined_data = [dict(merge(d1, d2)) for d1, d2 in zip(cases_data, deaths_data)]
total_cases = 0
total_kc_cases = 0
total_stl_cases = 0
total_deaths = 0
total_kc_deaths = 0
total_stl_deaths = 0
total_population = 0
total_kc_population = 0
total_stl_population = 0
print('\n\n#################')
print('# Today\'s Stats #')
print('#################')
for county in combined_data:
# todo Trend Data for All Dates
# Aggregate Data for Current Date
population = int(county["Population"])
if county.get(today) != None:
stats = county[today]
else:
stats = county[yesterday]
cases = int(stats[0])
deaths = int(stats[1])
total_cases += cases
total_deaths += deaths
total_population += population
if int(county["UID"][0]) in stl_msa:
print(f'\n\nStats for {county["Combined_Key"]} (STL Metro)')
total_stl_cases += cases
total_stl_deaths += deaths
total_stl_population += population
elif int(county["UID"][0]) in kc_msa:
print(f'\n\nStats for {county["Combined_Key"]} (KC Metro)')
total_kc_cases += cases
total_kc_deaths += deaths
total_kc_population += population
population = int(county["Population"])
print_aggregate_stats(population, cases, deaths)
print('\n\nTotal Kansas City Metro Area Stats')
print_aggregate_stats(total_kc_population, total_kc_cases, total_kc_deaths)
print('\n\nTotal St Louis Metro Area Stats')
print_aggregate_stats(total_stl_population, total_stl_cases, total_stl_deaths)
print('\n\nTotal Combined Kansas City & St Louis Metro Area Stats')
print_aggregate_stats(total_population, total_cases, total_deaths)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment