Erotemic/napkin_math_hospotializations_by_age_2021.py

## napkin_math_hospotializations_by_age_2021.py
# https://gis.cdc.gov/grasp/covidnet/COVID19_5.html
# 2020-21
import ubelt as ub

US_POP_APPROX = 330e6
SAMPLE_SIZE = US_POP_APPROX * 0.1
print(SAMPLE_SIZE)

# NOTE: These are lab-verified COVID numbers. The sample size is 190509. This
# represents about 10% of the US population.  This chart says total observed
# true positives 190_509 IN THE SAMPLE SET are observed.  Google says total
# deaths is 612_000, lets see how accounting for sample size holds up.

groups = {
    '0-4'    : 1237,
    '5-17'   : 2062,
    '18-49'  : 51317,
    '50-64'  : 53054,
    '65+'    : 82839,
}

total = sum([v for k, v in groups.items()])
print('total = {!r}'.format(total))

print('groups = {}'.format(ub.repr2(groups, nl=1)))

for key in groups:
    frac = groups[key] / total
    print(f'{key} frac = {frac * 100:07.04f}% = {groups[key]} / {total}')


# Extrapolate the data, based on sample size
frac_of_population = 0.1
est_groups = ub.map_vals(lambda x: x / frac_of_population, groups)

est_total = sum([v for k, v in est_groups.items()])
print('est_groups = {}'.format(ub.repr2(est_groups, nl=2, align=':')))
print('est_total = {}'.format(ub.repr2(est_total, nl=2)))

# Over 1_905_090 hospitlizations leading to 612_000 deaths. So that is
# consistent.  I suppose its always imporant to read the notes about the data
# collect when reading notes about raw data.


# Expected kid hospilizations 12370.0,
	# https://gis.cdc.gov/grasp/covidnet/COVID19_5.html
	# 2020-21
	import ubelt as ub

	US_POP_APPROX = 330e6
	SAMPLE_SIZE = US_POP_APPROX * 0.1
	print(SAMPLE_SIZE)

	# NOTE: These are lab-verified COVID numbers. The sample size is 190509. This
	# represents about 10% of the US population. This chart says total observed
	# true positives 190_509 IN THE SAMPLE SET are observed. Google says total
	# deaths is 612_000, lets see how accounting for sample size holds up.

	groups = {
	'0-4' : 1237,
	'5-17' : 2062,
	'18-49' : 51317,
	'50-64' : 53054,
	'65+' : 82839,
	}

	total = sum([v for k, v in groups.items()])
	print('total = {!r}'.format(total))

	print('groups = {}'.format(ub.repr2(groups, nl=1)))

	for key in groups:
	frac = groups[key] / total
	print(f'{key} frac = {frac * 100:07.04f}% = {groups[key]} / {total}')


	# Extrapolate the data, based on sample size
	frac_of_population = 0.1
	est_groups = ub.map_vals(lambda x: x / frac_of_population, groups)

	est_total = sum([v for k, v in est_groups.items()])
	print('est_groups = {}'.format(ub.repr2(est_groups, nl=2, align=':')))
	print('est_total = {}'.format(ub.repr2(est_total, nl=2)))

	# Over 1_905_090 hospitlizations leading to 612_000 deaths. So that is
	# consistent. I suppose its always imporant to read the notes about the data
	# collect when reading notes about raw data.


	# Expected kid hospilizations 12370.0,