Created
June 23, 2020 17:50
-
-
Save jasonrahm/3e1d6966a46fc62d1d25c94d1f4006f3 to your computer and use it in GitHub Desktop.
Covid Stats analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from datetime import datetime | |
from datetime import timedelta | |
__author__ = 'Jason Rahm' | |
# Assumes this script is in a <scripts> folder inside a cloned version of https://github.com/CSSEGISandData/COVID-19 | |
# Total Kansas City Metro Area Stats | |
# Population: 2646933 | |
# Cases: 5159 (per capita: 194.90482003133437 per 100,000 people) | |
# Deaths: 191 (per capita: 7.215898551266692 per 100,000 people) | |
# Death Rate by Known Cases (percentage): 3.702267881372359 | |
# | |
# | |
# Total St Louis Metro Area Stats | |
# Population: 2803228 | |
# Cases: 10017 (per capita: 357.3380402878396 per 100,000 people) | |
# Deaths: 752 (per capita: 26.82621606233956 per 100,000 people) | |
# Death Rate by Known Cases (percentage): 7.507237695916941 | |
# | |
# | |
# Total Combined Kansas City & St Louis Metro Area Stats | |
# Population: 5450161 | |
# Cases: 15176 (per capita: 278.4504898112184 per 100,000 people) | |
# Deaths: 943 (per capita: 17.30224116315096 per 100,000 people) | |
# Death Rate by Known Cases (percentage): 6.21375856615709 | |
# St Louis Metro Statistical Area Counties | |
# UID, Name | |
# Illinois Counties | |
# 84017005, Bond | |
# 84017013, Calhoun | |
# 84017027, Clinton | |
# 84017083, Jersey | |
# 84017117, Macoupin | |
# 84017119, Madison | |
# 84017133, Monroe | |
# 84017163, St. Clair | |
# Missouri Counties | |
# 84029071, Franklin | |
# 84029099, Jefferson | |
# 84029113, Lincoln | |
# 84029183, St. Charles | |
# 84029189, St. Louis | |
# 84029510, St. Louis City | |
# 84029219, Warren | |
# Kansas City Metro Statistical Area Counties | |
# FIPS, Name | |
# Kansas Counties | |
# 84020091, Johnson | |
# 84020103, Leavenworth | |
# 84020107, Linn | |
# 84020121, Miami | |
# 84020209, Wyandotte | |
# Missouri Counties | |
# 84029013, Bates | |
# 84029025, Caldwell | |
# 84029037, Cass | |
# 84029047, Clay | |
# 84029049, Clinton | |
# 84029107, Lafayette | |
# 84029095, Jackson | |
# 84070003, Kansas City | |
# 84029165, Platte | |
# 84029177, Ray | |
stl_msa = [84017005, | |
84017013, | |
84017027, | |
84017083, | |
84017117, | |
84017119, | |
84017133, | |
84017163, | |
84029071, | |
84029099, | |
84029113, | |
84029183, | |
84029189, | |
84029510, | |
84029219] | |
kc_msa = [84020091, | |
84020103, | |
84020107, | |
84020121, | |
84020209, | |
84029013, | |
84029025, | |
84029037, | |
84029047, | |
84029049, | |
84029107, | |
84029095, | |
84029165, | |
84029177, | |
84070003] | |
s_stl = set(stl_msa) | |
s_kc = set(kc_msa) | |
all_counties = s_stl.union(s_kc) | |
# Get keys for today/yesterday | |
today = datetime.now() | |
yesterday = today - timedelta(days = 1) | |
today = today.strftime('%-m/%-d/%y') | |
yesterday = yesterday.strftime('%-m/%-d/%y') | |
# TESTING, Need to excuse non-date keys from the sort | |
# for x in combined_data: | |
# print(sorted(x.items(), key=lambda x:datetime.strptime(x[0], '%m/%d/%y') )) | |
def merge(d1, d2): | |
for k in set({**d1, **d2}): | |
if k in d1: | |
v1 = d1[k] | |
if k in d2: | |
v2 = d2[k] | |
if k in ['FIPS', 'Combined_Key', 'Lat', 'Long_', 'Population']: | |
yield k, (v1 if v1 == v2 else [v1, v2]) | |
else: | |
yield k, ([v1, v2]) | |
else: | |
yield k, v1 | |
else: | |
yield k, d2[k] | |
def parse_covid_data(csv_file): | |
with open(csv_file) as f: | |
rdr = csv.reader(f, delimiter=',') | |
colnames = next(rdr) | |
colnames = [colnames[0]] + colnames[8:] | |
ldata = [] | |
for row in rdr: | |
if int(row[0]) in all_counties: | |
row_data = [row[0]] + row[8:] | |
data_dict = {colnames[i]: row_data[i] for i in range(len(colnames))} | |
ldata.append(data_dict) | |
if row[0] == str(max(all_counties)): | |
break | |
return ldata | |
def print_aggregate_stats(population, cases, deaths): | |
print(f'\tPopulation: {population}') | |
print(f'\tCases: {cases} (per capita: {100000 * cases / population} per 100,000 people)') | |
print(f'\tDeaths: {deaths} (per capita: {100000 * deaths / population} per 100,000 people)') | |
print(f'\tDeath Rate by Known Cases (percentage): {100 * deaths / cases}') | |
cases_data = parse_covid_data('../csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv') | |
deaths_data = parse_covid_data('../csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv') | |
print(deaths_data) | |
combined_data = [dict(merge(d1, d2)) for d1, d2 in zip(cases_data, deaths_data)] | |
total_cases = 0 | |
total_kc_cases = 0 | |
total_stl_cases = 0 | |
total_deaths = 0 | |
total_kc_deaths = 0 | |
total_stl_deaths = 0 | |
total_population = 0 | |
total_kc_population = 0 | |
total_stl_population = 0 | |
print('\n\n#################') | |
print('# Today\'s Stats #') | |
print('#################') | |
for county in combined_data: | |
# todo Trend Data for All Dates | |
# Aggregate Data for Current Date | |
population = int(county["Population"]) | |
if county.get(today) != None: | |
stats = county[today] | |
else: | |
stats = county[yesterday] | |
cases = int(stats[0]) | |
deaths = int(stats[1]) | |
total_cases += cases | |
total_deaths += deaths | |
total_population += population | |
if int(county["UID"][0]) in stl_msa: | |
print(f'\n\nStats for {county["Combined_Key"]} (STL Metro)') | |
total_stl_cases += cases | |
total_stl_deaths += deaths | |
total_stl_population += population | |
elif int(county["UID"][0]) in kc_msa: | |
print(f'\n\nStats for {county["Combined_Key"]} (KC Metro)') | |
total_kc_cases += cases | |
total_kc_deaths += deaths | |
total_kc_population += population | |
population = int(county["Population"]) | |
print_aggregate_stats(population, cases, deaths) | |
print('\n\nTotal Kansas City Metro Area Stats') | |
print_aggregate_stats(total_kc_population, total_kc_cases, total_kc_deaths) | |
print('\n\nTotal St Louis Metro Area Stats') | |
print_aggregate_stats(total_stl_population, total_stl_cases, total_stl_deaths) | |
print('\n\nTotal Combined Kansas City & St Louis Metro Area Stats') | |
print_aggregate_stats(total_population, total_cases, total_deaths) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment