{{ message }}

Instantly share code, notes, and snippets.

# aflaxman/2019_09_18a_pr_hh_actb_case.ipynb

Created Sep 26, 2019
 { "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wed Sep 18 12:54:05 PDT 2019\r\n" ] } ], "source": [ "import numpy as np, matplotlib.pyplot as plt, pandas as pd\n", "pd.set_option('display.max_rows', 8)\n", "!date\n", "\n", "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# How to calculate the probability an individual lives in a household with an Active TB case\n", "\n", "(for a given year and location, say 2017 South Africa)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "
agesexhh_id
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
24172.145127female1
443523.869853male1
62454.463637female1
99796.070569female2
............
454681.487253male2999
862579.135925male2999
26672.139917male2999
884634.395619female2999
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "

10000 rows × 3 columns

\n", "" ], "text/plain": [ " age sex hh_id\n", "241 72.145127 female 1\n", "4435 23.869853 male 1\n", "624 54.463637 female 1\n", "9979 6.070569 female 2\n", "... ... ... ...\n", "4546 81.487253 male 2999\n", "8625 79.135925 male 2999\n", "266 72.139917 male 2999\n", "8846 34.395619 female 2999\n", "\n", "[10000 rows x 3 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# first load data on individuals, including their age, sex, and household ID\n", "\n", "# I'll just simulate it for now\n", "N = 10_000\n", "\n", "# set random seed for reproducibility\n", "np.random.seed(12345)\n", "\n", "# simulate data (to be replaced with real data, e.g. from DHS, eventually)\n", "df = pd.DataFrame(index=range(N))\n", "df['age'] = np.random.uniform(0, 100, size=N)\n", "df['sex'] = np.random.choice(['male', 'female'], size=N)\n", "df['hh_id'] = np.random.choice(range(3_000), size=N)\n", "df.sort_values('hh_id')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# then for a given age_group/sex combo\n", "age_start = 1\n", "age_end = 5\n", "sex = 'male'\n", "\n", "# find all the households with such a person\n", "hh_with = df.query(f'age >= {age_start} and age < {age_end} and sex == \"{sex}\"').hh_id.unique()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "________________________________________________________________________________\n", "[Memory] Calling vivarium_gbd_access.gbd.get_incidence_prevalence...\n", "get_incidence_prevalence(entity_id=cid(954), location_id=196, entity_type='cause')\n", "_______________________________________get_incidence_prevalence - 177.5s, 3.0min\n" ] } ], "source": [ "# then for each of those households, compute the\n", "# probability that there is at least one person with active tb\n", "\n", "import vivarium_inputs, gbd_mapping\n", "prev_ltbi = vivarium_inputs.interface.get_measure(\n", " gbd_mapping.causes.latent_tuberculosis_infection, 'prevalence', 'South Africa')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "________________________________________________________________________________\n", "[Memory] Calling vivarium_gbd_access.gbd.get_incidence_prevalence...\n", "get_incidence_prevalence(entity_id=cid(297), location_id=196, entity_type='cause')\n", "_______________________________________get_incidence_prevalence - 103.1s, 1.7min\n" ] } ], "source": [ "prev_any_tb = vivarium_inputs.interface.get_measure(\n", " gbd_mapping.causes.tuberculosis, 'prevalence', 'South Africa')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "
value
drawlocationsexage_group_startage_group_endyear_startyear_end
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0South AfricaFemale0.00.019178199019910.000047
199119920.000038
199219930.000030
199319940.000022
........................
999South AfricaMale95.0125.000000201420150.009573
201520160.009185
201620170.008744
201720180.008260
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "

1288000 rows × 1 columns

\n", "" ], "text/plain": [ " value\n", "draw location sex age_group_start age_group_end year_start year_end \n", "0 South Africa Female 0.0 0.019178 1990 1991 0.000047\n", " 1991 1992 0.000038\n", " 1992 1993 0.000030\n", " 1993 1994 0.000022\n", "... ...\n", "999 South Africa Male 95.0 125.000000 2014 2015 0.009573\n", " 2015 2016 0.009185\n", " 2016 2017 0.008744\n", " 2017 2018 0.008260\n", "\n", "[1288000 rows x 1 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "index_cols = ['draw', 'location', 'sex', 'age_group_start', 'age_group_end', 'year_start', 'year_end']\n", "prev_active_tb = prev_any_tb.set_index(index_cols) - prev_ltbi.set_index(index_cols)\n", "prev_active_tb" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# there is probably a way to use vivarium to add a column to df\n", "# that includes the probability of active tb for each simulant!\n", "\n", "# I'll just simulate this for now, too\n", "df['pr_active_tb'] = np.random.uniform(0, .01, size=N)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.009567952684606862" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def pr_ac_tb_in_hh(df_hh):\n", " pr_no_tb = 1 - df_hh.pr_active_tb\n", " pr_no_tb_in_hh = np.prod(pr_no_tb)\n", " return 1 - pr_no_tb_in_hh\n", "pr_ac_tb_in_hh(df[df.hh_id == 1])" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "
agesexhh_idpr_active_tb
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
24172.145127female10.001116
62454.463637female10.004184
443523.869853male10.004295
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "" ], "text/plain": [ " age sex hh_id pr_active_tb\n", "241 72.145127 female 1 0.001116\n", "624 54.463637 female 1 0.004184\n", "4435 23.869853 male 1 0.004295" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.hh_id==1]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "hh_id\n", "5 0.003730\n", "58 0.028011\n", "68 0.010399\n", "72 0.011235\n", " ... \n", "2939 0.014072\n", "2953 0.015080\n", "2958 0.028687\n", "2963 0.022994\n", "Length: 175, dtype: float64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pr_ac_tb = df[df.hh_id.isin(hh_with)].groupby('hh_id').apply(pr_ac_tb_in_hh)\n", "pr_ac_tb" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.01976982850644556" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pr_ac_tb_age_sex = pr_ac_tb.mean()\n", "pr_ac_tb_age_sex" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "# Now repeat that for each age and sex group, and you\n", "# will get the probability of an active TB case in the household\n", "# as a function of age and sex for the location and year\n", "\n", "age_bins = [0, 1, 5, 10] + list(range(15, 101, 5))\n", "for i, age_start in enumerate(age_bins[:-1]):\n", " age_end = age_bins[i+1]\n", " for sex in ['male', 'female']:\n", " # find and store pr_ac_tb_age_sex\n", " pr_ac_tb_age_sex" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "# and do that for each draw to propagate through the uncertainty\n", "# probably good to do a bootstrap resampling of the person data, too" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "vivarium_conic_sqlns", "language": "python", "name": "vivarium_conic_sqlns" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }