Skip to content

Instantly share code, notes, and snippets.

@ditsuke
Created August 7, 2021 23:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ditsuke/5db8f8a9b641f32aa2a453e8c10c1ace to your computer and use it in GitHub Desktop.
Save ditsuke/5db8f8a9b641f32aa2a453e8c10c1ace to your computer and use it in GitHub Desktop.
VAXX_ANALYSIS_VAERS.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "VAXX_ANALYSIS_VAERS.ipynb",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true,
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/ditsuke/5db8f8a9b641f32aa2a453e8c10c1ace/vaxx_analysis_vaers.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "41bpAVVy2BG5"
},
"source": [
"# ML/DL Intern Project\n",
"## Predicting Adverse Effects of COVID-19 Vaccines"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "D9-SEVlEIQ26"
},
"source": [
"### Environment and Data Prep"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "3Vnw-Q61gf2e",
"cellView": "form",
"outputId": "41e439ab-f242-4990-b4c2-abda510f27fb"
},
"source": [
"#@title Mount Storage\n",
"from google.colab import drive\n",
"drive.mount('/content/drive', force_remount=True)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Mounted at /content/drive\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "lt8Q1oRm07pw"
},
"source": [
"#@title Import Libraries, Declare Utility Functions \n",
"\n",
"import random\n",
"\n",
"# Imports :: base\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# Imports :: ML\n",
"from sklearn.decomposition import SparsePCA \n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
"from sklearn.metrics import roc_auc_score\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import balanced_accuracy_score\n",
"\n",
"# Imports :: utility\n",
"from os.path import exists\n",
"from IPython.display import Markdown, display\n",
"\n",
"# Declarations :: utlity\n",
"def transform_md(obj):\n",
" if isinstance(obj, str):\n",
" return Markdown(obj)\n",
" return obj\n",
"\n",
"def printmd(*objs) -> None:\n",
" # print(type(strings))\n",
" d_args = tuple(map(transform_md, objs))\n",
" display(*d_args)\n",
" return\n",
"\n",
"def get_nan_columns(df: pd.DataFrame, threshold_fraction: float = 1.0): \n",
" threshold = threshold_fraction * len(df.index)\n",
" return [c for c in df.columns if sum(df[c].isnull()) >= threshold]\n",
"\n",
"def is_nan(x):\n",
" return (x != x)\n",
"\n",
"def get_accuracy(hostday_pred, hostday_true):\n",
" num_pred = hostday_pred.shape[0]\n",
" zero_hostday = hostday_true[hostday_true == 0]\n",
" num_zero_day = zero_hostday.shape[0]\n",
" total_norm = np.linalg.norm(hostday_pred - hostday_true) \n",
" return [total_norm / (num_pred - num_zero_day), total_norm / num_pred]\n",
"\n",
" def predict_hostday(symptom, theta):\n",
" # symptom - list of array of symptom index\n",
" # theta - linear regression parameter\n",
" n_pred = len(symptom)\n",
" n_dim = theta.shape[0]\n",
"\n",
" # prediction date\n",
" num_hostday = np.zeros([n_pred, ])\n",
"\n",
" # compute each num_hostday\n",
" for i in range(n_pred):\n",
" t_symptom = symptom[i]\n",
" symptom_array = (np.array(t_symptom)).astype('int')\n",
" test_x = np.zeros([n_dim, ])\n",
" test_x[symptom_array] = 1\n",
" test_x[-1] = 1\n",
"\n",
" # predict\n",
" num_hostday[i] = theta.T.dot(test_x)\n",
" \n",
" return num_hostday"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "B_IUsSTe1vmg",
"outputId": "df390892-14f0-4b62-8f2b-234d772d85d9"
},
"source": [
"!unzip /content/drive/MyDrive/cv19/2021VAERSData.zip -d ."
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Archive: /content/drive/MyDrive/cv19/2021VAERSData.zip\n",
" inflating: ./2021VAERSDATA.csv \n",
" inflating: ./2021VAERSSYMPTOMS.csv \n",
" inflating: ./2021VAERSVAX.csv \n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "aglEXA39FuTR",
"outputId": "b4f06bfa-fa86-491e-f38f-976914ee4957"
},
"source": [
"exists(f\"{base_path}/2021VAERSDATA.csv\")"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {
"tags": []
},
"execution_count": 15
}
]
},
{
"cell_type": "code",
"metadata": {
"cellView": "form",
"id": "jORHCDtcCFLD"
},
"source": [
"#@title Read in data from the latest VAERS datasets\n",
"base_path = \".\"\n",
"df_patients = pd.read_csv(f\"{base_path}/2021VAERSDATA.csv\", engine=\"python\")\n",
"df_symptoms = pd.read_csv(f\"{base_path}/2021VAERSSYMPTOMS.csv\")\n",
"df_vax = pd.read_csv(f\"{base_path}/2021VAERSVAX.csv\", engine=\"python\")"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "IPhAf-CyIa0Z"
},
"source": [
"### Describe and Analyse the Data"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"cellView": "form",
"id": "OUX3f-nrR_qz",
"outputId": "0620f448-e04a-4436-ecf7-c31eb77121be"
},
"source": [
"#@title Dataframe Shapes\n",
"#@markdown This gives an idea about entries and column counts \n",
"print(\"Patients DF : \", df_patients.shape)\n",
"print(\"Symptoms DF : \", df_symptoms.shape)\n",
"print(\"Vax DF : \", df_vax.shape)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Patients DF : (418781, 35)\n",
"Symptoms DF : (561558, 11)\n",
"Vax DF : (434899, 8)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"cellView": "form",
"id": "8A1wwWitxn9d",
"outputId": "c542966d-1671-45b1-d4d8-2ec302a6a81f"
},
"source": [
"#@title Verify Patient Count\n",
"#@markdown **Observation:** All DFs reference **418781** unique VAERS IDs \n",
"\n",
"#@markdown **Inference:** DFs are consistent with each other\n",
"print(\"IDs in patients : \", df_base['VAERS_ID'].nunique())\n",
"print(\"IDs in symptoms : \", df_patients['VAERS_ID'].nunique())\n",
"print(\"IDs in vax : \", df_vax['VAERS_ID'].nunique())"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"IDs in patients : 418781\n",
"IDs in symptoms : 418781\n",
"IDs in vax : 418781\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 426
},
"cellView": "form",
"id": "GXfz5JxJTyOp",
"outputId": "13ccc357-82af-4855-d7d5-26c55d2a8f15"
},
"source": [
"#@title Columns in each DF\n",
"printmd(\"**Patients DF columns:**\")\n",
"print(df_patients.columns, \"\\n\")\n",
"printmd(\"**Symptoms DF columns:**\")\n",
"print(df_symptoms.columns, \"\\n\")\n",
"printmd(\"**Vax DF columns:**\")\n",
"print(df_vax.columns, \"\\n\")"
],
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/markdown": "**Patients DF columns:**",
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"Index(['VAERS_ID', 'RECVDATE', 'STATE', 'AGE_YRS', 'CAGE_YR', 'CAGE_MO', 'SEX',\n",
" 'RPT_DATE', 'SYMPTOM_TEXT', 'DIED', 'DATEDIED', 'L_THREAT', 'ER_VISIT',\n",
" 'HOSPITAL', 'HOSPDAYS', 'X_STAY', 'DISABLE', 'RECOVD', 'VAX_DATE',\n",
" 'ONSET_DATE', 'NUMDAYS', 'LAB_DATA', 'V_ADMINBY', 'V_FUNDBY',\n",
" 'OTHER_MEDS', 'CUR_ILL', 'HISTORY', 'PRIOR_VAX', 'SPLTTYPE',\n",
" 'FORM_VERS', 'TODAYS_DATE', 'BIRTH_DEFECT', 'OFC_VISIT', 'ER_ED_VISIT',\n",
" 'ALLERGIES'],\n",
" dtype='object') \n",
"\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"text/markdown": "**Symptoms DF columns:**",
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"Index(['VAERS_ID', 'SYMPTOM1', 'SYMPTOMVERSION1', 'SYMPTOM2',\n",
" 'SYMPTOMVERSION2', 'SYMPTOM3', 'SYMPTOMVERSION3', 'SYMPTOM4',\n",
" 'SYMPTOMVERSION4', 'SYMPTOM5', 'SYMPTOMVERSION5'],\n",
" dtype='object') \n",
"\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"text/markdown": "**Vax DF columns:**",
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"Index(['VAERS_ID', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES',\n",
" 'VAX_ROUTE', 'VAX_SITE', 'VAX_NAME'],\n",
" dtype='object') \n",
"\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "BGdOtuyCxHKi"
},
"source": [
"#### Dataframe Heads"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 573
},
"id": "G0s0vGc2G8f7",
"outputId": "e3605ab2-0839-4f52-84e0-70207533e600"
},
"source": [
"#@title The Patients Dataframe\n",
"df_patients.head()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>VAERS_ID</th>\n",
" <th>RECVDATE</th>\n",
" <th>STATE</th>\n",
" <th>AGE_YRS</th>\n",
" <th>CAGE_YR</th>\n",
" <th>CAGE_MO</th>\n",
" <th>SEX</th>\n",
" <th>RPT_DATE</th>\n",
" <th>SYMPTOM_TEXT</th>\n",
" <th>DIED</th>\n",
" <th>DATEDIED</th>\n",
" <th>L_THREAT</th>\n",
" <th>ER_VISIT</th>\n",
" <th>HOSPITAL</th>\n",
" <th>HOSPDAYS</th>\n",
" <th>X_STAY</th>\n",
" <th>DISABLE</th>\n",
" <th>RECOVD</th>\n",
" <th>VAX_DATE</th>\n",
" <th>ONSET_DATE</th>\n",
" <th>NUMDAYS</th>\n",
" <th>LAB_DATA</th>\n",
" <th>V_ADMINBY</th>\n",
" <th>V_FUNDBY</th>\n",
" <th>OTHER_MEDS</th>\n",
" <th>CUR_ILL</th>\n",
" <th>HISTORY</th>\n",
" <th>PRIOR_VAX</th>\n",
" <th>SPLTTYPE</th>\n",
" <th>FORM_VERS</th>\n",
" <th>TODAYS_DATE</th>\n",
" <th>BIRTH_DEFECT</th>\n",
" <th>OFC_VISIT</th>\n",
" <th>ER_ED_VISIT</th>\n",
" <th>ALLERGIES</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>916600</td>\n",
" <td>01/01/2021</td>\n",
" <td>TX</td>\n",
" <td>33.0</td>\n",
" <td>33.0</td>\n",
" <td>NaN</td>\n",
" <td>F</td>\n",
" <td>NaN</td>\n",
" <td>Right side of epiglottis swelled up and hinder...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" <td>12/28/2020</td>\n",
" <td>12/30/2020</td>\n",
" <td>2.0</td>\n",
" <td>None</td>\n",
" <td>PVT</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>01/01/2021</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" <td>NaN</td>\n",
" <td>Pcn and bee venom</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>916601</td>\n",
" <td>01/01/2021</td>\n",
" <td>CA</td>\n",
" <td>73.0</td>\n",
" <td>73.0</td>\n",
" <td>NaN</td>\n",
" <td>F</td>\n",
" <td>NaN</td>\n",
" <td>Approximately 30 min post vaccination administ...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" <td>12/31/2020</td>\n",
" <td>12/31/2020</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>SEN</td>\n",
" <td>NaN</td>\n",
" <td>Patient residing at nursing facility. See pati...</td>\n",
" <td>Patient residing at nursing facility. See pati...</td>\n",
" <td>Patient residing at nursing facility. See pati...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>01/01/2021</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" <td>NaN</td>\n",
" <td>\"Dairy\"</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>916602</td>\n",
" <td>01/01/2021</td>\n",
" <td>WA</td>\n",
" <td>23.0</td>\n",
" <td>23.0</td>\n",
" <td>NaN</td>\n",
" <td>F</td>\n",
" <td>NaN</td>\n",
" <td>About 15 minutes after receiving the vaccine, ...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>U</td>\n",
" <td>12/31/2020</td>\n",
" <td>12/31/2020</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>SEN</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>01/01/2021</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" <td>Shellfish</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>916603</td>\n",
" <td>01/01/2021</td>\n",
" <td>WA</td>\n",
" <td>58.0</td>\n",
" <td>58.0</td>\n",
" <td>NaN</td>\n",
" <td>F</td>\n",
" <td>NaN</td>\n",
" <td>extreme fatigue, dizziness,. could not lift my...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" <td>12/23/2020</td>\n",
" <td>12/23/2020</td>\n",
" <td>0.0</td>\n",
" <td>none</td>\n",
" <td>WRK</td>\n",
" <td>NaN</td>\n",
" <td>none</td>\n",
" <td>kidney infection</td>\n",
" <td>diverticulitis, mitral valve prolapse, osteoar...</td>\n",
" <td>got measles from measel shot, mums from mumps ...</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>01/01/2021</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Diclofenac, novacaine, lidocaine, pickles, tom...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>916604</td>\n",
" <td>01/01/2021</td>\n",
" <td>TX</td>\n",
" <td>47.0</td>\n",
" <td>47.0</td>\n",
" <td>NaN</td>\n",
" <td>F</td>\n",
" <td>NaN</td>\n",
" <td>Injection site swelling, redness, warm to the ...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>N</td>\n",
" <td>12/22/2020</td>\n",
" <td>12/29/2020</td>\n",
" <td>7.0</td>\n",
" <td>NaN</td>\n",
" <td>PUB</td>\n",
" <td>NaN</td>\n",
" <td>Na</td>\n",
" <td>Na</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>01/01/2021</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Na</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" VAERS_ID ... ALLERGIES\n",
"0 916600 ... Pcn and bee venom\n",
"1 916601 ... \"Dairy\"\n",
"2 916602 ... Shellfish\n",
"3 916603 ... Diclofenac, novacaine, lidocaine, pickles, tom...\n",
"4 916604 ... Na\n",
"\n",
"[5 rows x 35 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 24
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"cellView": "form",
"id": "DKELY5nQIlfP",
"outputId": "984128f2-645c-416a-ab71-f0c8117c35f3"
},
"source": [
"#@title The Symptoms Dataframe\n",
"df_symptoms.head()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>VAERS_ID</th>\n",
" <th>SYMPTOM1</th>\n",
" <th>SYMPTOMVERSION1</th>\n",
" <th>SYMPTOM2</th>\n",
" <th>SYMPTOMVERSION2</th>\n",
" <th>SYMPTOM3</th>\n",
" <th>SYMPTOMVERSION3</th>\n",
" <th>SYMPTOM4</th>\n",
" <th>SYMPTOMVERSION4</th>\n",
" <th>SYMPTOM5</th>\n",
" <th>SYMPTOMVERSION5</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>916600</td>\n",
" <td>Dysphagia</td>\n",
" <td>23.1</td>\n",
" <td>Epiglottitis</td>\n",
" <td>23.1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>916601</td>\n",
" <td>Anxiety</td>\n",
" <td>23.1</td>\n",
" <td>Dyspnoea</td>\n",
" <td>23.1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>916602</td>\n",
" <td>Chest discomfort</td>\n",
" <td>23.1</td>\n",
" <td>Dysphagia</td>\n",
" <td>23.1</td>\n",
" <td>Pain in extremity</td>\n",
" <td>23.1</td>\n",
" <td>Visual impairment</td>\n",
" <td>23.1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>916603</td>\n",
" <td>Dizziness</td>\n",
" <td>23.1</td>\n",
" <td>Fatigue</td>\n",
" <td>23.1</td>\n",
" <td>Mobility decreased</td>\n",
" <td>23.1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>916604</td>\n",
" <td>Injection site erythema</td>\n",
" <td>23.1</td>\n",
" <td>Injection site pruritus</td>\n",
" <td>23.1</td>\n",
" <td>Injection site swelling</td>\n",
" <td>23.1</td>\n",
" <td>Injection site warmth</td>\n",
" <td>23.1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" VAERS_ID SYMPTOM1 ... SYMPTOM5 SYMPTOMVERSION5\n",
"0 916600 Dysphagia ... NaN NaN\n",
"1 916601 Anxiety ... NaN NaN\n",
"2 916602 Chest discomfort ... NaN NaN\n",
"3 916603 Dizziness ... NaN NaN\n",
"4 916604 Injection site erythema ... NaN NaN\n",
"\n",
"[5 rows x 11 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 25
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 223
},
"cellView": "form",
"id": "rhRVbYlmRGdd",
"outputId": "20310730-ff6e-4681-beac-0ea0ea47ff3d"
},
"source": [
"#@title The Vax Dataframe\n",
"df_vax.head()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>VAERS_ID</th>\n",
" <th>VAX_TYPE</th>\n",
" <th>VAX_MANU</th>\n",
" <th>VAX_LOT</th>\n",
" <th>VAX_DOSE_SERIES</th>\n",
" <th>VAX_ROUTE</th>\n",
" <th>VAX_SITE</th>\n",
" <th>VAX_NAME</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>916600</td>\n",
" <td>COVID19</td>\n",
" <td>MODERNA</td>\n",
" <td>037K20A</td>\n",
" <td>1</td>\n",
" <td>IM</td>\n",
" <td>LA</td>\n",
" <td>COVID19 (COVID19 (MODERNA))</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>916601</td>\n",
" <td>COVID19</td>\n",
" <td>MODERNA</td>\n",
" <td>025L20A</td>\n",
" <td>1</td>\n",
" <td>IM</td>\n",
" <td>RA</td>\n",
" <td>COVID19 (COVID19 (MODERNA))</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>916602</td>\n",
" <td>COVID19</td>\n",
" <td>PFIZER\\BIONTECH</td>\n",
" <td>EL1284</td>\n",
" <td>1</td>\n",
" <td>IM</td>\n",
" <td>LA</td>\n",
" <td>COVID19 (COVID19 (PFIZER-BIONTECH))</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>916603</td>\n",
" <td>COVID19</td>\n",
" <td>MODERNA</td>\n",
" <td>unknown</td>\n",
" <td>UNK</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>COVID19 (COVID19 (MODERNA))</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>916604</td>\n",
" <td>COVID19</td>\n",
" <td>MODERNA</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>IM</td>\n",
" <td>LA</td>\n",
" <td>COVID19 (COVID19 (MODERNA))</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" VAERS_ID VAX_TYPE ... VAX_SITE VAX_NAME\n",
"0 916600 COVID19 ... LA COVID19 (COVID19 (MODERNA))\n",
"1 916601 COVID19 ... RA COVID19 (COVID19 (MODERNA))\n",
"2 916602 COVID19 ... LA COVID19 (COVID19 (PFIZER-BIONTECH))\n",
"3 916603 COVID19 ... NaN COVID19 (COVID19 (MODERNA))\n",
"4 916604 COVID19 ... LA COVID19 (COVID19 (MODERNA))\n",
"\n",
"[5 rows x 8 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 41
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "U8fFrSFqChWe"
},
"source": [
"## Data Cleanup"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 159
},
"id": "jTiUrEqvCpU8",
"outputId": "d0df4d02-d432-4d33-c677-b8d097ce40fc"
},
"source": [
"#@title Check for NaN Columns\n",
"printmd(\"**NaN Columns, Patients DF :** \", get_nan_columns(df_patients))\n",
"printmd(\"**NaN Columns, Symptoms DF :** \", get_nan_columns(df_symptoms))\n",
"printmd(\"**NaN Columns, Vax DF :** \", get_nan_columns(df_vax))\n",
"#@markdown **Observation:** None of the columns are 100% NaN"
],
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/markdown": "**NaN Columns, Patients DF :** ",
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"[]"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "display_data",
"data": {
"text/markdown": "**NaN Columns, Symptoms DF :** ",
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"[]"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "display_data",
"data": {
"text/markdown": "**NaN Columns, Vax DF :** ",
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"[]"
]
},
"metadata": {
"tags": []
}
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 479
},
"cellView": "form",
"id": "BZ_7SIWkalPb",
"outputId": "f05dde08-957a-49a9-91d0-3b859c25b190"
},
"source": [
"#@title Check for >= 50% NaN Columns\n",
"printmd(\"**>= 50% NaN Columns, Patients DF :** \", get_nan_columns(df_patients, 0.5))\n",
"print()\n",
"printmd(\"**>= 50% NaN Columns, Symptoms DF :** \", get_nan_columns(df_symptoms, 0.5))\n",
"print()\n",
"printmd(\"**>= 50% NaN Columns, Vax DF :** \", get_nan_columns(df_vax, 0.5))\n",
"\n",
"#@markdown **Observation:** Some columns in the Base DF and Patients DF match this level of sparsity"
],
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/markdown": "**>= 50% NaN Columns, Patients DF :** ",
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"['CAGE_MO',\n",
" 'RPT_DATE',\n",
" 'DIED',\n",
" 'DATEDIED',\n",
" 'L_THREAT',\n",
" 'ER_VISIT',\n",
" 'HOSPITAL',\n",
" 'HOSPDAYS',\n",
" 'X_STAY',\n",
" 'DISABLE',\n",
" 'LAB_DATA',\n",
" 'V_FUNDBY',\n",
" 'PRIOR_VAX',\n",
" 'SPLTTYPE',\n",
" 'BIRTH_DEFECT',\n",
" 'OFC_VISIT',\n",
" 'ER_ED_VISIT']"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"text/markdown": "**>= 50% NaN Columns, Symptoms DF :** ",
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"['SYMPTOM4', 'SYMPTOMVERSION4', 'SYMPTOM5', 'SYMPTOMVERSION5']"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"text/markdown": "**>= 50% NaN Columns, Vax DF :** ",
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"[]"
]
},
"metadata": {
"tags": []
}
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"cellView": "form",
"id": "soCZ6cfi5ljX",
"outputId": "48f84058-a5ab-4c2f-8384-78eff4064959"
},
"source": [
"#@title Check for higher (greq. 99%) sparsity columns in Patients DF\n",
"get_nan_columns(df_patients, 0.99)"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['CAGE_MO', 'RPT_DATE', 'ER_VISIT', 'X_STAY', 'V_FUNDBY', 'BIRTH_DEFECT']"
]
},
"metadata": {
"tags": []
},
"execution_count": 30
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ywNZWZJ8-HWy"
},
"source": [
"**Observations:** We see that the _RPT_DATE_, _ER\\_VISIT_, _X\\_STAY_, _V\\_FUNDBY_ and _BIRTH_DEFECT_ colums have very high sparsity. \n",
"However, the **X_STAY** and **BIRTH_DEFECT** columns can be logically relevant to the data and warrant further study."
]
},
{
"cell_type": "code",
"metadata": {
"cellView": "form",
"id": "vv4QJrIvAy6o"
},
"source": [
"#@title **Drop Irrelevant, High-sparcity Columns**\n",
"\n",
"#@markdown Drop CAGE_MO. This is the month component of age only available for infants less than 2 years of age\n",
"df_patients.drop(columns=\"CAGE_MO\", inplace=True)\n",
"#@markdown Drop RPT_DATE. High sparcity and low relevance\n",
"df_patients.drop(columns=\"RPT_DATE\", inplace=True)\n",
"#@markdown Drop ER_VISIT. Replaced by ER_ED_VISIT, which has low sparcity too.\n",
"df_patients.drop(columns=\"ER_VISIT\", inplace=True)\n",
"#@markdown Drop V_FUNDBY. High sparcity and very low relevance to any statistical study.\n",
"df_patients.drop(columns=\"V_FUNDBY\", inplace=True)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"cellView": "form",
"id": "jCzgwSxmFRVi"
},
"source": [
"#@title Binary Transforms for Applicable Columns\n",
"# df_patients.head()\n",
"\n",
"#@markdown Store applicable columns in a list\n",
"b_applicable = ['DIED', 'L_THREAT', 'HOSPITAL', 'X_STAY',\n",
" 'DISABLE', 'BIRTH_DEFECT', 'OFC_VISIT',\n",
" 'ER_ED_VISIT']\n",
"\n",
"#@markdown Iterate over list, transform columns\n",
"for column in b_applicable:\n",
" df_patients[column] = np.where(is_nan(df_patients[column]), 0, 1)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "vl4E4AO1Mgf2",
"cellView": "form"
},
"source": [
"#@title **Normalise Data**\n",
"#@markdown ----\n",
"\n",
"#@markdown ##### **Replace N/A Indicators with Numpy's NaN** \n",
"#@markdown 1. In **CUR_ILL** (Current Illnesses), a handful of values converge to NaN \n",
"df_patients[\"CUR_ILL\"].replace(\n",
" [\"None\", \"No\", \"NONE\", \"unknown\", \"Unknown\", \"none\", \"no\", \"None known\", \"none known\", \n",
" \"None reported\", \"none reported\", \"UNKNOWN\", \"N/a\", \"None stated/Denied\",\n",
" \"No other illness prior to vaccination or within the month prior\", \"NKDA\", \n",
" \"Individual was healthy prior to vaccination.\", \"None.\", \"UNK\", \"As noted above\", \"unsure\", \n",
" \"See item 12\", \"no acute illnesses\", \"No symptoms after COVID vaccinations\"\n",
" ], \n",
" np.nan, inplace=True\n",
")\n",
"\n",
"#@markdown 2. In **ALLERGIES**, we see a similar trend. Several phrases convergent with NaN\n",
"df_patients[\"ALLERGIES\"].replace(\n",
" [\"None\", \"none\", \"NKDA\", \"NKA\", \"No known allergies\", \"unknown\", \n",
" \"No\", \"Unknown\", \"no\", \"NONE\", \"No Known Allergies\", \"no known allergies\",\n",
" \"nka\", \"None known\", \"NKA to medications\", \"No known allergies to drugs or food\"\n",
" ],\n",
" np.nan, inplace=True\n",
")\n",
"\n",
"#@markdown 3. In **HISTORY**, multiple values convergent to NaN are again found\n",
"df_patients[\"HISTORY\"].replace([\"None\", \"none\", \"unknown\", \"unsure\", \"Unknown\", \"no\", \n",
" \"Unsure\", \"No\", \"NONE\", \"UNKNOWN\", \"N/a\", \"None known\",\n",
" \"None reported\", \"none reported\", \"None stated/Denied\", \n",
" \"none known\", \"Medical History/Concurrent Conditions: No adverse event (No reported medical history.)\",\n",
" \"Medical History/Concurrent Conditions: No adverse event (No reported medical history)\", \"None disclosed\"\n",
" ], np.nan, inplace=True)\n",
"\n",
"df_patients[\"OTHER_MEDS\"].replace(\n",
" [\"None\", \"none\", \"unknown\", \"Unknown\", \"no\",\n",
" \"NONE\", \"UNKNOWN\", \"No\"\n",
" ],\n",
" np.nan, inplace=True\n",
")\n",
"\n",
"#@markdown ----\n",
"#@markdown * For **HOSPDAYS**, safe to replace NaN with 0\n",
"df_patients[\"HOSPDAYS\"].replace(np.nan, 0, inplace=True)\n",
"#@markdown * ##### Remove Redundancy for Target Illness (`s/Covid 19/COVID_19/g`)\n",
"df_patients[\"CUR_ILL\"].replace(\"Covid 19\", \"COVID_19\", inplace=True)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Y7-SNLONwysn",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "8935f492-77b4-45b9-f032-b24cc9c72b86"
},
"source": [
"#@title Make a dictionary of symptoms\n",
"\n",
"# Import processed data\n",
"data_path = \"/content/drive/My Drive/vaers.csv\"\n",
"\n",
"vaers_data = pd.read_csv(data_path, low_memory=False)\n",
"\n",
"\"\"\"\n",
"[\n",
" 'VAERS_ID', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES',\n",
" 'VAX_ROUTE', 'VAX_SITE', 'VAX_NAME', 'RECVDATE', 'STATE',\n",
" 'AGE_YRS', 'CAGE_YR', 'CAGE_MO', 'SEX', 'RPT_DATE', \n",
" 'SYMPTOM_TEXT', 'DIED', 'DATEDIED', 'L_THREAT', 'ER_VISIT',\n",
" 'HOSPITAL', 'HOSPDAYS', 'X_STAY', 'DISABLE', 'RECOVD',\n",
" 'VAX_DATE', 'ONSET_DATE', 'NUMDAYS', 'LAB_DATA', 'V_ADMINBY', \n",
" 'V_FUNDBY', 'OTHER_MEDS', 'CUR_ILL', 'HISTORY', 'PRIOR_VAX',\n",
" 'SPLTTYPE', 'FORM_VERS', 'TODAYS_DATE', 'BIRTH_DEFECT', 'OFC_VISIT',\n",
" 'ER_ED_VISIT', 'ALLERGIES', 'SYMPTOM1', 'SYMPTOMVERSION1','SYMPTOM2',\n",
" 'SYMPTOMVERSION2', 'SYMPTOM3', 'SYMPTOMVERSION3', 'SYMPTOM4', 'SYMPTOMVERSION4', \n",
" 'SYMPTOM5', 'SYMPTOMVERSION5', 'date', 'date.vax', 'dur', \n",
" 'Allergic_his', 'diabetes', 'hypertension', 'arthritis', 'Asthma', \n",
" 'Migraine', 'copd', 'Anxiety', 'obesity', 'depression', \n",
" 'Thyroid', 'Anemia', 'Dementia', 'Cancer', 'Kidney',\n",
" 'Hyperlipidemia', 'CVD', 'AF', 'othermeds', 'currill',\n",
" 'allergies','sex', 'disable', 'manu'\n",
"]\n",
"\"\"\"\n",
"\n",
"#@markdown Make a list of symptoms\n",
"symptom_list = []\n",
"symptom_list.extend(vaers_data['SYMPTOM1'].to_list())\n",
"symptom_list.extend(vaers_data['SYMPTOM2'].to_list())\n",
"symptom_list.extend(vaers_data['SYMPTOM3'].to_list())\n",
"symptom_list.extend(vaers_data['SYMPTOM4'].to_list())\n",
"symptom_list.extend(vaers_data['SYMPTOM5'].to_list())\n",
"\n",
"print('Symptom count : ', len(symptom_list))\n",
"\n",
"# A `unique` list of symptoms\n",
"symptom_list_u = list(set(symptom_list))\n",
"symptom_list_u = [x for x in u_symptom_list if x == x]\n",
"\n",
"print('Unique symptom count: ', len(symptom_list_u))\n",
"\n",
"#@markdown Make the dictionary with unique symptoms\n",
"symptom_dict = {symptom_list_u[i]: i for i in range(len(symptom_list_u))}\n",
"\n",
"num_symptom = len(symptom_list_u)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Symptom count : 751975\n",
"Unique symptom count: 5487\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "dKsbcrP4wypT",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "6440fb2f-26de-4cd0-80d1-c34a2ab5fc0d"
},
"source": [
"# A list of VAERS IDs\n",
"v_id = vaers_data['VAERS_ID'].to_list()\n",
"unique_id = list(set(v_id))\n",
"\n",
"# Get a list of Days Hospitalised\n",
"Vaers_hostday = vaers_data['HOSPDAYS'].to_list()\n",
"\n",
"# Get Symptoms - Segregated by Heat\n",
"symptom_1 = vaers_data['SYMPTOM1'].to_list()\n",
"symptom_2 = vaers_data['SYMPTOM2'].to_list()\n",
"symptom_3 = vaers_data['SYMPTOM3'].to_list()\n",
"symptom_4 = vaers_data['SYMPTOM4'].to_list()\n",
"symptom_5 = vaers_data['SYMPTOM5'].to_list()\n",
"\n",
"\n",
"# List of hospitalized days\n",
"hospitalized_status = []\n",
"# List of symptom index for each patient - according to symptom dictionary\n",
"symptom_status = []\n",
"for i in range(len(unique_id)):\n",
" print(i)\n",
" id = unique_id[i]\n",
" # Find index of v_id corresponding to `id`\n",
" ind_id = [i for i, x in enumerate(v_id) if x == id]\n",
" # Find symptom\n",
" t_symptom = []\n",
" # Hospital days\n",
" hospital_day = 0\n",
" for j in range(len(ind_id)):\n",
" # Symptoms\n",
" t_symptom.append(symptom_1[ind_id[j]])\n",
" t_symptom.append(symptom_2[ind_id[j]])\n",
" t_symptom.append(symptom_3[ind_id[j]])\n",
" t_symptom.append(symptom_4[ind_id[j]])\n",
" t_symptom.append(symptom_5[ind_id[j]])\n",
" t_symptom = [x for x in t_symptom if x == x]\n",
" # Days in hospital\n",
" t_hostday = Vaers_hostday[ind_id[j]]\n",
" if t_hostday == 'nan':\n",
" hospital_day = max(hospital_day ,0)\n",
" else:\n",
" hospital_day = max(hospital_day ,t_hostday)\n",
" # Number of days hospitalized\n",
" hospitalized_status.append(hospital_day)\n",
" # Symptom list\n",
" unique_symptom = list(set(t_symptom))\n",
" symptom_idx = np.zeros([len(unique_symptom), ])\n",
" for k in range(len(unique_symptom)):\n",
" symptom_idx[k] = symptom_dict.get(unique_symptom[k])\n",
" symptom_status.append(symptom_idx)\n"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"\u001b[1;30;43mStreaming output truncated to the last 5000 lines.\u001b[0m\n",
"102806\n",
"102807\n",
"102808\n",
"102809\n",
"102810\n",
"102811\n",
"102812\n",
"102813\n",
"102814\n",
"102815\n",
"102816\n",
"102817\n",
"102818\n",
"102819\n",
"102820\n",
"102821\n",
"102822\n",
"102823\n",
"102824\n",
"102825\n",
"102826\n",
"102827\n",
"102828\n",
"102829\n",
"102830\n",
"102831\n",
"102832\n",
"102833\n",
"102834\n",
"102835\n",
"102836\n",
"102837\n",
"102838\n",
"102839\n",
"102840\n",
"102841\n",
"102842\n",
"102843\n",
"102844\n",
"102845\n",
"102846\n",
"102847\n",
"102848\n",
"102849\n",
"102850\n",
"102851\n",
"102852\n",
"102853\n",
"102854\n",
"102855\n",
"102856\n",
"102857\n",
"102858\n",
"102859\n",
"102860\n",
"102861\n",
"102862\n",
"102863\n",
"102864\n",
"102865\n",
"102866\n",
"102867\n",
"102868\n",
"102869\n",
"102870\n",
"102871\n",
"102872\n",
"102873\n",
"102874\n",
"102875\n",
"102876\n",
"102877\n",
"102878\n",
"102879\n",
"102880\n",
"102881\n",
"102882\n",
"102883\n",
"102884\n",
"102885\n",
"102886\n",
"102887\n",
"102888\n",
"102889\n",
"102890\n",
"102891\n",
"102892\n",
"102893\n",
"102894\n",
"102895\n",
"102896\n",
"102897\n",
"102898\n",
"102899\n",
"102900\n",
"102901\n",
"102902\n",
"102903\n",
"102904\n",
"102905\n",
"102906\n",
"102907\n",
"102908\n",
"102909\n",
"102910\n",
"102911\n",
"102912\n",
"102913\n",
"102914\n",
"102915\n",
"102916\n",
"102917\n",
"102918\n",
"102919\n",
"102920\n",
"102921\n",
"102922\n",
"102923\n",
"102924\n",
"102925\n",
"102926\n",
"102927\n",
"102928\n",
"102929\n",
"102930\n",
"102931\n",
"102932\n",
"102933\n",
"102934\n",
"102935\n",
"102936\n",
"102937\n",
"102938\n",
"102939\n",
"102940\n",
"102941\n",
"102942\n",
"102943\n",
"102944\n",
"102945\n",
"102946\n",
"102947\n",
"102948\n",
"102949\n",
"102950\n",
"102951\n",
"102952\n",
"102953\n",
"102954\n",
"102955\n",
"102956\n",
"102957\n",
"102958\n",
"102959\n",
"102960\n",
"102961\n",
"102962\n",
"102963\n",
"102964\n",
"102965\n",
"102966\n",
"102967\n",
"102968\n",
"102969\n",
"102970\n",
"102971\n",
"102972\n",
"102973\n",
"102974\n",
"102975\n",
"102976\n",
"102977\n",
"102978\n",
"102979\n",
"102980\n",
"102981\n",
"102982\n",
"102983\n",
"102984\n",
"102985\n",
"102986\n",
"102987\n",
"102988\n",
"102989\n",
"102990\n",
"102991\n",
"102992\n",
"102993\n",
"102994\n",
"102995\n",
"102996\n",
"102997\n",
"102998\n",
"102999\n",
"103000\n",
"103001\n",
"103002\n",
"103003\n",
"103004\n",
"103005\n",
"103006\n",
"103007\n",
"103008\n",
"103009\n",
"103010\n",
"103011\n",
"103012\n",
"103013\n",
"103014\n",
"103015\n",
"103016\n",
"103017\n",
"103018\n",
"103019\n",
"103020\n",
"103021\n",
"103022\n",
"103023\n",
"103024\n",
"103025\n",
"103026\n",
"103027\n",
"103028\n",
"103029\n",
"103030\n",
"103031\n",
"103032\n",
"103033\n",
"103034\n",
"103035\n",
"103036\n",
"103037\n",
"103038\n",
"103039\n",
"103040\n",
"103041\n",
"103042\n",
"103043\n",
"103044\n",
"103045\n",
"103046\n",
"103047\n",
"103048\n",
"103049\n",
"103050\n",
"103051\n",
"103052\n",
"103053\n",
"103054\n",
"103055\n",
"103056\n",
"103057\n",
"103058\n",
"103059\n",
"103060\n",
"103061\n",
"103062\n",
"103063\n",
"103064\n",
"103065\n",
"103066\n",
"103067\n",
"103068\n",
"103069\n",
"103070\n",
"103071\n",
"103072\n",
"103073\n",
"103074\n",
"103075\n",
"103076\n",
"103077\n",
"103078\n",
"103079\n",
"103080\n",
"103081\n",
"103082\n",
"103083\n",
"103084\n",
"103085\n",
"103086\n",
"103087\n",
"103088\n",
"103089\n",
"103090\n",
"103091\n",
"103092\n",
"103093\n",
"103094\n",
"103095\n",
"103096\n",
"103097\n",
"103098\n",
"103099\n",
"103100\n",
"103101\n",
"103102\n",
"103103\n",
"103104\n",
"103105\n",
"103106\n",
"103107\n",
"103108\n",
"103109\n",
"103110\n",
"103111\n",
"103112\n",
"103113\n",
"103114\n",
"103115\n",
"103116\n",
"103117\n",
"103118\n",
"103119\n",
"103120\n",
"103121\n",
"103122\n",
"103123\n",
"103124\n",
"103125\n",
"103126\n",
"103127\n",
"103128\n",
"103129\n",
"103130\n",
"103131\n",
"103132\n",
"103133\n",
"103134\n",
"103135\n",
"103136\n",
"103137\n",
"103138\n",
"103139\n",
"103140\n",
"103141\n",
"103142\n",
"103143\n",
"103144\n",
"103145\n",
"103146\n",
"103147\n",
"103148\n",
"103149\n",
"103150\n",
"103151\n",
"103152\n",
"103153\n",
"103154\n",
"103155\n",
"103156\n",
"103157\n",
"103158\n",
"103159\n",
"103160\n",
"103161\n",
"103162\n",
"103163\n",
"103164\n",
"103165\n",
"103166\n",
"103167\n",
"103168\n",
"103169\n",
"103170\n",
"103171\n",
"103172\n",
"103173\n",
"103174\n",
"103175\n",
"103176\n",
"103177\n",
"103178\n",
"103179\n",
"103180\n",
"103181\n",
"103182\n",
"103183\n",
"103184\n",
"103185\n",
"103186\n",
"103187\n",
"103188\n",
"103189\n",
"103190\n",
"103191\n",
"103192\n",
"103193\n",
"103194\n",
"103195\n",
"103196\n",
"103197\n",
"103198\n",
"103199\n",
"103200\n",
"103201\n",
"103202\n",
"103203\n",
"103204\n",
"103205\n",
"103206\n",
"103207\n",
"103208\n",
"103209\n",
"103210\n",
"103211\n",
"103212\n",
"103213\n",
"103214\n",
"103215\n",
"103216\n",
"103217\n",
"103218\n",
"103219\n",
"103220\n",
"103221\n",
"103222\n",
"103223\n",
"103224\n",
"103225\n",
"103226\n",
"103227\n",
"103228\n",
"103229\n",
"103230\n",
"103231\n",
"103232\n",
"103233\n",
"103234\n",
"103235\n",
"103236\n",
"103237\n",
"103238\n",
"103239\n",
"103240\n",
"103241\n",
"103242\n",
"103243\n",
"103244\n",
"103245\n",
"103246\n",
"103247\n",
"103248\n",
"103249\n",
"103250\n",
"103251\n",
"103252\n",
"103253\n",
"103254\n",
"103255\n",
"103256\n",
"103257\n",
"103258\n",
"103259\n",
"103260\n",
"103261\n",
"103262\n",
"103263\n",
"103264\n",
"103265\n",
"103266\n",
"103267\n",
"103268\n",
"103269\n",
"103270\n",
"103271\n",
"103272\n",
"103273\n",
"103274\n",
"103275\n",
"103276\n",
"103277\n",
"103278\n",
"103279\n",
"103280\n",
"103281\n",
"103282\n",
"103283\n",
"103284\n",
"103285\n",
"103286\n",
"103287\n",
"103288\n",
"103289\n",
"103290\n",
"103291\n",
"103292\n",
"103293\n",
"103294\n",
"103295\n",
"103296\n",
"103297\n",
"103298\n",
"103299\n",
"103300\n",
"103301\n",
"103302\n",
"103303\n",
"103304\n",
"103305\n",
"103306\n",
"103307\n",
"103308\n",
"103309\n",
"103310\n",
"103311\n",
"103312\n",
"103313\n",
"103314\n",
"103315\n",
"103316\n",
"103317\n",
"103318\n",
"103319\n",
"103320\n",
"103321\n",
"103322\n",
"103323\n",
"103324\n",
"103325\n",
"103326\n",
"103327\n",
"103328\n",
"103329\n",
"103330\n",
"103331\n",
"103332\n",
"103333\n",
"103334\n",
"103335\n",
"103336\n",
"103337\n",
"103338\n",
"103339\n",
"103340\n",
"103341\n",
"103342\n",
"103343\n",
"103344\n",
"103345\n",
"103346\n",
"103347\n",
"103348\n",
"103349\n",
"103350\n",
"103351\n",
"103352\n",
"103353\n",
"103354\n",
"103355\n",
"103356\n",
"103357\n",
"103358\n",
"103359\n",
"103360\n",
"103361\n",
"103362\n",
"103363\n",
"103364\n",
"103365\n",
"103366\n",
"103367\n",
"103368\n",
"103369\n",
"103370\n",
"103371\n",
"103372\n",
"103373\n",
"103374\n",
"103375\n",
"103376\n",
"103377\n",
"103378\n",
"103379\n",
"103380\n",
"103381\n",
"103382\n",
"103383\n",
"103384\n",
"103385\n",
"103386\n",
"103387\n",
"103388\n",
"103389\n",
"103390\n",
"103391\n",
"103392\n",
"103393\n",
"103394\n",
"103395\n",
"103396\n",
"103397\n",
"103398\n",
"103399\n",
"103400\n",
"103401\n",
"103402\n",
"103403\n",
"103404\n",
"103405\n",
"103406\n",
"103407\n",
"103408\n",
"103409\n",
"103410\n",
"103411\n",
"103412\n",
"103413\n",
"103414\n",
"103415\n",
"103416\n",
"103417\n",
"103418\n",
"103419\n",
"103420\n",
"103421\n",
"103422\n",
"103423\n",
"103424\n",
"103425\n",
"103426\n",
"103427\n",
"103428\n",
"103429\n",
"103430\n",
"103431\n",
"103432\n",
"103433\n",
"103434\n",
"103435\n",
"103436\n",
"103437\n",
"103438\n",
"103439\n",
"103440\n",
"103441\n",
"103442\n",
"103443\n",
"103444\n",
"103445\n",
"103446\n",
"103447\n",
"103448\n",
"103449\n",
"103450\n",
"103451\n",
"103452\n",
"103453\n",
"103454\n",
"103455\n",
"103456\n",
"103457\n",
"103458\n",
"103459\n",
"103460\n",
"103461\n",
"103462\n",
"103463\n",
"103464\n",
"103465\n",
"103466\n",
"103467\n",
"103468\n",
"103469\n",
"103470\n",
"103471\n",
"103472\n",
"103473\n",
"103474\n",
"103475\n",
"103476\n",
"103477\n",
"103478\n",
"103479\n",
"103480\n",
"103481\n",
"103482\n",
"103483\n",
"103484\n",
"103485\n",
"103486\n",
"103487\n",
"103488\n",
"103489\n",
"103490\n",
"103491\n",
"103492\n",
"103493\n",
"103494\n",
"103495\n",
"103496\n",
"103497\n",
"103498\n",
"103499\n",
"103500\n",
"103501\n",
"103502\n",
"103503\n",
"103504\n",
"103505\n",
"103506\n",
"103507\n",
"103508\n",
"103509\n",
"103510\n",
"103511\n",
"103512\n",
"103513\n",
"103514\n",
"103515\n",
"103516\n",
"103517\n",
"103518\n",
"103519\n",
"103520\n",
"103521\n",
"103522\n",
"103523\n",
"103524\n",
"103525\n",
"103526\n",
"103527\n",
"103528\n",
"103529\n",
"103530\n",
"103531\n",
"103532\n",
"103533\n",
"103534\n",
"103535\n",
"103536\n",
"103537\n",
"103538\n",
"103539\n",
"103540\n",
"103541\n",
"103542\n",
"103543\n",
"103544\n",
"103545\n",
"103546\n",
"103547\n",
"103548\n",
"103549\n",
"103550\n",
"103551\n",
"103552\n",
"103553\n",
"103554\n",
"103555\n",
"103556\n",
"103557\n",
"103558\n",
"103559\n",
"103560\n",
"103561\n",
"103562\n",
"103563\n",
"103564\n",
"103565\n",
"103566\n",
"103567\n",
"103568\n",
"103569\n",
"103570\n",
"103571\n",
"103572\n",
"103573\n",
"103574\n",
"103575\n",
"103576\n",
"103577\n",
"103578\n",
"103579\n",
"103580\n",
"103581\n",
"103582\n",
"103583\n",
"103584\n",
"103585\n",
"103586\n",
"103587\n",
"103588\n",
"103589\n",
"103590\n",
"103591\n",
"103592\n",
"103593\n",
"103594\n",
"103595\n",
"103596\n",
"103597\n",
"103598\n",
"103599\n",
"103600\n",
"103601\n",
"103602\n",
"103603\n",
"103604\n",
"103605\n",
"103606\n",
"103607\n",
"103608\n",
"103609\n",
"103610\n",
"103611\n",
"103612\n",
"103613\n",
"103614\n",
"103615\n",
"103616\n",
"103617\n",
"103618\n",
"103619\n",
"103620\n",
"103621\n",
"103622\n",
"103623\n",
"103624\n",
"103625\n",
"103626\n",
"103627\n",
"103628\n",
"103629\n",
"103630\n",
"103631\n",
"103632\n",
"103633\n",
"103634\n",
"103635\n",
"103636\n",
"103637\n",
"103638\n",
"103639\n",
"103640\n",
"103641\n",
"103642\n",
"103643\n",
"103644\n",
"103645\n",
"103646\n",
"103647\n",
"103648\n",
"103649\n",
"103650\n",
"103651\n",
"103652\n",
"103653\n",
"103654\n",
"103655\n",
"103656\n",
"103657\n",
"103658\n",
"103659\n",
"103660\n",
"103661\n",
"103662\n",
"103663\n",
"103664\n",
"103665\n",
"103666\n",
"103667\n",
"103668\n",
"103669\n",
"103670\n",
"103671\n",
"103672\n",
"103673\n",
"103674\n",
"103675\n",
"103676\n",
"103677\n",
"103678\n",
"103679\n",
"103680\n",
"103681\n",
"103682\n",
"103683\n",
"103684\n",
"103685\n",
"103686\n",
"103687\n",
"103688\n",
"103689\n",
"103690\n",
"103691\n",
"103692\n",
"103693\n",
"103694\n",
"103695\n",
"103696\n",
"103697\n",
"103698\n",
"103699\n",
"103700\n",
"103701\n",
"103702\n",
"103703\n",
"103704\n",
"103705\n",
"103706\n",
"103707\n",
"103708\n",
"103709\n",
"103710\n",
"103711\n",
"103712\n",
"103713\n",
"103714\n",
"103715\n",
"103716\n",
"103717\n",
"103718\n",
"103719\n",
"103720\n",
"103721\n",
"103722\n",
"103723\n",
"103724\n",
"103725\n",
"103726\n",
"103727\n",
"103728\n",
"103729\n",
"103730\n",
"103731\n",
"103732\n",
"103733\n",
"103734\n",
"103735\n",
"103736\n",
"103737\n",
"103738\n",
"103739\n",
"103740\n",
"103741\n",
"103742\n",
"103743\n",
"103744\n",
"103745\n",
"103746\n",
"103747\n",
"103748\n",
"103749\n",
"103750\n",
"103751\n",
"103752\n",
"103753\n",
"103754\n",
"103755\n",
"103756\n",
"103757\n",
"103758\n",
"103759\n",
"103760\n",
"103761\n",
"103762\n",
"103763\n",
"103764\n",
"103765\n",
"103766\n",
"103767\n",
"103768\n",
"103769\n",
"103770\n",
"103771\n",
"103772\n",
"103773\n",
"103774\n",
"103775\n",
"103776\n",
"103777\n",
"103778\n",
"103779\n",
"103780\n",
"103781\n",
"103782\n",
"103783\n",
"103784\n",
"103785\n",
"103786\n",
"103787\n",
"103788\n",
"103789\n",
"103790\n",
"103791\n",
"103792\n",
"103793\n",
"103794\n",
"103795\n",
"103796\n",
"103797\n",
"103798\n",
"103799\n",
"103800\n",
"103801\n",
"103802\n",
"103803\n",
"103804\n",
"103805\n",
"103806\n",
"103807\n",
"103808\n",
"103809\n",
"103810\n",
"103811\n",
"103812\n",
"103813\n",
"103814\n",
"103815\n",
"103816\n",
"103817\n",
"103818\n",
"103819\n",
"103820\n",
"103821\n",
"103822\n",
"103823\n",
"103824\n",
"103825\n",
"103826\n",
"103827\n",
"103828\n",
"103829\n",
"103830\n",
"103831\n",
"103832\n",
"103833\n",
"103834\n",
"103835\n",
"103836\n",
"103837\n",
"103838\n",
"103839\n",
"103840\n",
"103841\n",
"103842\n",
"103843\n",
"103844\n",
"103845\n",
"103846\n",
"103847\n",
"103848\n",
"103849\n",
"103850\n",
"103851\n",
"103852\n",
"103853\n",
"103854\n",
"103855\n",
"103856\n",
"103857\n",
"103858\n",
"103859\n",
"103860\n",
"103861\n",
"103862\n",
"103863\n",
"103864\n",
"103865\n",
"103866\n",
"103867\n",
"103868\n",
"103869\n",
"103870\n",
"103871\n",
"103872\n",
"103873\n",
"103874\n",
"103875\n",
"103876\n",
"103877\n",
"103878\n",
"103879\n",
"103880\n",
"103881\n",
"103882\n",
"103883\n",
"103884\n",
"103885\n",
"103886\n",
"103887\n",
"103888\n",
"103889\n",
"103890\n",
"103891\n",
"103892\n",
"103893\n",
"103894\n",
"103895\n",
"103896\n",
"103897\n",
"103898\n",
"103899\n",
"103900\n",
"103901\n",
"103902\n",
"103903\n",
"103904\n",
"103905\n",
"103906\n",
"103907\n",
"103908\n",
"103909\n",
"103910\n",
"103911\n",
"103912\n",
"103913\n",
"103914\n",
"103915\n",
"103916\n",
"103917\n",
"103918\n",
"103919\n",
"103920\n",
"103921\n",
"103922\n",
"103923\n",
"103924\n",
"103925\n",
"103926\n",
"103927\n",
"103928\n",
"103929\n",
"103930\n",
"103931\n",
"103932\n",
"103933\n",
"103934\n",
"103935\n",
"103936\n",
"103937\n",
"103938\n",
"103939\n",
"103940\n",
"103941\n",
"103942\n",
"103943\n",
"103944\n",
"103945\n",
"103946\n",
"103947\n",
"103948\n",
"103949\n",
"103950\n",
"103951\n",
"103952\n",
"103953\n",
"103954\n",
"103955\n",
"103956\n",
"103957\n",
"103958\n",
"103959\n",
"103960\n",
"103961\n",
"103962\n",
"103963\n",
"103964\n",
"103965\n",
"103966\n",
"103967\n",
"103968\n",
"103969\n",
"103970\n",
"103971\n",
"103972\n",
"103973\n",
"103974\n",
"103975\n",
"103976\n",
"103977\n",
"103978\n",
"103979\n",
"103980\n",
"103981\n",
"103982\n",
"103983\n",
"103984\n",
"103985\n",
"103986\n",
"103987\n",
"103988\n",
"103989\n",
"103990\n",
"103991\n",
"103992\n",
"103993\n",
"103994\n",
"103995\n",
"103996\n",
"103997\n",
"103998\n",
"103999\n",
"104000\n",
"104001\n",
"104002\n",
"104003\n",
"104004\n",
"104005\n",
"104006\n",
"104007\n",
"104008\n",
"104009\n",
"104010\n",
"104011\n",
"104012\n",
"104013\n",
"104014\n",
"104015\n",
"104016\n",
"104017\n",
"104018\n",
"104019\n",
"104020\n",
"104021\n",
"104022\n",
"104023\n",
"104024\n",
"104025\n",
"104026\n",
"104027\n",
"104028\n",
"104029\n",
"104030\n",
"104031\n",
"104032\n",
"104033\n",
"104034\n",
"104035\n",
"104036\n",
"104037\n",
"104038\n",
"104039\n",
"104040\n",
"104041\n",
"104042\n",
"104043\n",
"104044\n",
"104045\n",
"104046\n",
"104047\n",
"104048\n",
"104049\n",
"104050\n",
"104051\n",
"104052\n",
"104053\n",
"104054\n",
"104055\n",
"104056\n",
"104057\n",
"104058\n",
"104059\n",
"104060\n",
"104061\n",
"104062\n",
"104063\n",
"104064\n",
"104065\n",
"104066\n",
"104067\n",
"104068\n",
"104069\n",
"104070\n",
"104071\n",
"104072\n",
"104073\n",
"104074\n",
"104075\n",
"104076\n",
"104077\n",
"104078\n",
"104079\n",
"104080\n",
"104081\n",
"104082\n",
"104083\n",
"104084\n",
"104085\n",
"104086\n",
"104087\n",
"104088\n",
"104089\n",
"104090\n",
"104091\n",
"104092\n",
"104093\n",
"104094\n",
"104095\n",
"104096\n",
"104097\n",
"104098\n",
"104099\n",
"104100\n",
"104101\n",
"104102\n",
"104103\n",
"104104\n",
"104105\n",
"104106\n",
"104107\n",
"104108\n",
"104109\n",
"104110\n",
"104111\n",
"104112\n",
"104113\n",
"104114\n",
"104115\n",
"104116\n",
"104117\n",
"104118\n",
"104119\n",
"104120\n",
"104121\n",
"104122\n",
"104123\n",
"104124\n",
"104125\n",
"104126\n",
"104127\n",
"104128\n",
"104129\n",
"104130\n",
"104131\n",
"104132\n",
"104133\n",
"104134\n",
"104135\n",
"104136\n",
"104137\n",
"104138\n",
"104139\n",
"104140\n",
"104141\n",
"104142\n",
"104143\n",
"104144\n",
"104145\n",
"104146\n",
"104147\n",
"104148\n",
"104149\n",
"104150\n",
"104151\n",
"104152\n",
"104153\n",
"104154\n",
"104155\n",
"104156\n",
"104157\n",
"104158\n",
"104159\n",
"104160\n",
"104161\n",
"104162\n",
"104163\n",
"104164\n",
"104165\n",
"104166\n",
"104167\n",
"104168\n",
"104169\n",
"104170\n",
"104171\n",
"104172\n",
"104173\n",
"104174\n",
"104175\n",
"104176\n",
"104177\n",
"104178\n",
"104179\n",
"104180\n",
"104181\n",
"104182\n",
"104183\n",
"104184\n",
"104185\n",
"104186\n",
"104187\n",
"104188\n",
"104189\n",
"104190\n",
"104191\n",
"104192\n",
"104193\n",
"104194\n",
"104195\n",
"104196\n",
"104197\n",
"104198\n",
"104199\n",
"104200\n",
"104201\n",
"104202\n",
"104203\n",
"104204\n",
"104205\n",
"104206\n",
"104207\n",
"104208\n",
"104209\n",
"104210\n",
"104211\n",
"104212\n",
"104213\n",
"104214\n",
"104215\n",
"104216\n",
"104217\n",
"104218\n",
"104219\n",
"104220\n",
"104221\n",
"104222\n",
"104223\n",
"104224\n",
"104225\n",
"104226\n",
"104227\n",
"104228\n",
"104229\n",
"104230\n",
"104231\n",
"104232\n",
"104233\n",
"104234\n",
"104235\n",
"104236\n",
"104237\n",
"104238\n",
"104239\n",
"104240\n",
"104241\n",
"104242\n",
"104243\n",
"104244\n",
"104245\n",
"104246\n",
"104247\n",
"104248\n",
"104249\n",
"104250\n",
"104251\n",
"104252\n",
"104253\n",
"104254\n",
"104255\n",
"104256\n",
"104257\n",
"104258\n",
"104259\n",
"104260\n",
"104261\n",
"104262\n",
"104263\n",
"104264\n",
"104265\n",
"104266\n",
"104267\n",
"104268\n",
"104269\n",
"104270\n",
"104271\n",
"104272\n",
"104273\n",
"104274\n",
"104275\n",
"104276\n",
"104277\n",
"104278\n",
"104279\n",
"104280\n",
"104281\n",
"104282\n",
"104283\n",
"104284\n",
"104285\n",
"104286\n",
"104287\n",
"104288\n",
"104289\n",
"104290\n",
"104291\n",
"104292\n",
"104293\n",
"104294\n",
"104295\n",
"104296\n",
"104297\n",
"104298\n",
"104299\n",
"104300\n",
"104301\n",
"104302\n",
"104303\n",
"104304\n",
"104305\n",
"104306\n",
"104307\n",
"104308\n",
"104309\n",
"104310\n",
"104311\n",
"104312\n",
"104313\n",
"104314\n",
"104315\n",
"104316\n",
"104317\n",
"104318\n",
"104319\n",
"104320\n",
"104321\n",
"104322\n",
"104323\n",
"104324\n",
"104325\n",
"104326\n",
"104327\n",
"104328\n",
"104329\n",
"104330\n",
"104331\n",
"104332\n",
"104333\n",
"104334\n",
"104335\n",
"104336\n",
"104337\n",
"104338\n",
"104339\n",
"104340\n",
"104341\n",
"104342\n",
"104343\n",
"104344\n",
"104345\n",
"104346\n",
"104347\n",
"104348\n",
"104349\n",
"104350\n",
"104351\n",
"104352\n",
"104353\n",
"104354\n",
"104355\n",
"104356\n",
"104357\n",
"104358\n",
"104359\n",
"104360\n",
"104361\n",
"104362\n",
"104363\n",
"104364\n",
"104365\n",
"104366\n",
"104367\n",
"104368\n",
"104369\n",
"104370\n",
"104371\n",
"104372\n",
"104373\n",
"104374\n",
"104375\n",
"104376\n",
"104377\n",
"104378\n",
"104379\n",
"104380\n",
"104381\n",
"104382\n",
"104383\n",
"104384\n",
"104385\n",
"104386\n",
"104387\n",
"104388\n",
"104389\n",
"104390\n",
"104391\n",
"104392\n",
"104393\n",
"104394\n",
"104395\n",
"104396\n",
"104397\n",
"104398\n",
"104399\n",
"104400\n",
"104401\n",
"104402\n",
"104403\n",
"104404\n",
"104405\n",
"104406\n",
"104407\n",
"104408\n",
"104409\n",
"104410\n",
"104411\n",
"104412\n",
"104413\n",
"104414\n",
"104415\n",
"104416\n",
"104417\n",
"104418\n",
"104419\n",
"104420\n",
"104421\n",
"104422\n",
"104423\n",
"104424\n",
"104425\n",
"104426\n",
"104427\n",
"104428\n",
"104429\n",
"104430\n",
"104431\n",
"104432\n",
"104433\n",
"104434\n",
"104435\n",
"104436\n",
"104437\n",
"104438\n",
"104439\n",
"104440\n",
"104441\n",
"104442\n",
"104443\n",
"104444\n",
"104445\n",
"104446\n",
"104447\n",
"104448\n",
"104449\n",
"104450\n",
"104451\n",
"104452\n",
"104453\n",
"104454\n",
"104455\n",
"104456\n",
"104457\n",
"104458\n",
"104459\n",
"104460\n",
"104461\n",
"104462\n",
"104463\n",
"104464\n",
"104465\n",
"104466\n",
"104467\n",
"104468\n",
"104469\n",
"104470\n",
"104471\n",
"104472\n",
"104473\n",
"104474\n",
"104475\n",
"104476\n",
"104477\n",
"104478\n",
"104479\n",
"104480\n",
"104481\n",
"104482\n",
"104483\n",
"104484\n",
"104485\n",
"104486\n",
"104487\n",
"104488\n",
"104489\n",
"104490\n",
"104491\n",
"104492\n",
"104493\n",
"104494\n",
"104495\n",
"104496\n",
"104497\n",
"104498\n",
"104499\n",
"104500\n",
"104501\n",
"104502\n",
"104503\n",
"104504\n",
"104505\n",
"104506\n",
"104507\n",
"104508\n",
"104509\n",
"104510\n",
"104511\n",
"104512\n",
"104513\n",
"104514\n",
"104515\n",
"104516\n",
"104517\n",
"104518\n",
"104519\n",
"104520\n",
"104521\n",
"104522\n",
"104523\n",
"104524\n",
"104525\n",
"104526\n",
"104527\n",
"104528\n",
"104529\n",
"104530\n",
"104531\n",
"104532\n",
"104533\n",
"104534\n",
"104535\n",
"104536\n",
"104537\n",
"104538\n",
"104539\n",
"104540\n",
"104541\n",
"104542\n",
"104543\n",
"104544\n",
"104545\n",
"104546\n",
"104547\n",
"104548\n",
"104549\n",
"104550\n",
"104551\n",
"104552\n",
"104553\n",
"104554\n",
"104555\n",
"104556\n",
"104557\n",
"104558\n",
"104559\n",
"104560\n",
"104561\n",
"104562\n",
"104563\n",
"104564\n",
"104565\n",
"104566\n",
"104567\n",
"104568\n",
"104569\n",
"104570\n",
"104571\n",
"104572\n",
"104573\n",
"104574\n",
"104575\n",
"104576\n",
"104577\n",
"104578\n",
"104579\n",
"104580\n",
"104581\n",
"104582\n",
"104583\n",
"104584\n",
"104585\n",
"104586\n",
"104587\n",
"104588\n",
"104589\n",
"104590\n",
"104591\n",
"104592\n",
"104593\n",
"104594\n",
"104595\n",
"104596\n",
"104597\n",
"104598\n",
"104599\n",
"104600\n",
"104601\n",
"104602\n",
"104603\n",
"104604\n",
"104605\n",
"104606\n",
"104607\n",
"104608\n",
"104609\n",
"104610\n",
"104611\n",
"104612\n",
"104613\n",
"104614\n",
"104615\n",
"104616\n",
"104617\n",
"104618\n",
"104619\n",
"104620\n",
"104621\n",
"104622\n",
"104623\n",
"104624\n",
"104625\n",
"104626\n",
"104627\n",
"104628\n",
"104629\n",
"104630\n",
"104631\n",
"104632\n",
"104633\n",
"104634\n",
"104635\n",
"104636\n",
"104637\n",
"104638\n",
"104639\n",
"104640\n",
"104641\n",
"104642\n",
"104643\n",
"104644\n",
"104645\n",
"104646\n",
"104647\n",
"104648\n",
"104649\n",
"104650\n",
"104651\n",
"104652\n",
"104653\n",
"104654\n",
"104655\n",
"104656\n",
"104657\n",
"104658\n",
"104659\n",
"104660\n",
"104661\n",
"104662\n",
"104663\n",
"104664\n",
"104665\n",
"104666\n",
"104667\n",
"104668\n",
"104669\n",
"104670\n",
"104671\n",
"104672\n",
"104673\n",
"104674\n",
"104675\n",
"104676\n",
"104677\n",
"104678\n",
"104679\n",
"104680\n",
"104681\n",
"104682\n",
"104683\n",
"104684\n",
"104685\n",
"104686\n",
"104687\n",
"104688\n",
"104689\n",
"104690\n",
"104691\n",
"104692\n",
"104693\n",
"104694\n",
"104695\n",
"104696\n",
"104697\n",
"104698\n",
"104699\n",
"104700\n",
"104701\n",
"104702\n",
"104703\n",
"104704\n",
"104705\n",
"104706\n",
"104707\n",
"104708\n",
"104709\n",
"104710\n",
"104711\n",
"104712\n",
"104713\n",
"104714\n",
"104715\n",
"104716\n",
"104717\n",
"104718\n",
"104719\n",
"104720\n",
"104721\n",
"104722\n",
"104723\n",
"104724\n",
"104725\n",
"104726\n",
"104727\n",
"104728\n",
"104729\n",
"104730\n",
"104731\n",
"104732\n",
"104733\n",
"104734\n",
"104735\n",
"104736\n",
"104737\n",
"104738\n",
"104739\n",
"104740\n",
"104741\n",
"104742\n",
"104743\n",
"104744\n",
"104745\n",
"104746\n",
"104747\n",
"104748\n",
"104749\n",
"104750\n",
"104751\n",
"104752\n",
"104753\n",
"104754\n",
"104755\n",
"104756\n",
"104757\n",
"104758\n",
"104759\n",
"104760\n",
"104761\n",
"104762\n",
"104763\n",
"104764\n",
"104765\n",
"104766\n",
"104767\n",
"104768\n",
"104769\n",
"104770\n",
"104771\n",
"104772\n",
"104773\n",
"104774\n",
"104775\n",
"104776\n",
"104777\n",
"104778\n",
"104779\n",
"104780\n",
"104781\n",
"104782\n",
"104783\n",
"104784\n",
"104785\n",
"104786\n",
"104787\n",
"104788\n",
"104789\n",
"104790\n",
"104791\n",
"104792\n",
"104793\n",
"104794\n",
"104795\n",
"104796\n",
"104797\n",
"104798\n",
"104799\n",
"104800\n",
"104801\n",
"104802\n",
"104803\n",
"104804\n",
"104805\n",
"104806\n",
"104807\n",
"104808\n",
"104809\n",
"104810\n",
"104811\n",
"104812\n",
"104813\n",
"104814\n",
"104815\n",
"104816\n",
"104817\n",
"104818\n",
"104819\n",
"104820\n",
"104821\n",
"104822\n",
"104823\n",
"104824\n",
"104825\n",
"104826\n",
"104827\n",
"104828\n",
"104829\n",
"104830\n",
"104831\n",
"104832\n",
"104833\n",
"104834\n",
"104835\n",
"104836\n",
"104837\n",
"104838\n",
"104839\n",
"104840\n",
"104841\n",
"104842\n",
"104843\n",
"104844\n",
"104845\n",
"104846\n",
"104847\n",
"104848\n",
"104849\n",
"104850\n",
"104851\n",
"104852\n",
"104853\n",
"104854\n",
"104855\n",
"104856\n",
"104857\n",
"104858\n",
"104859\n",
"104860\n",
"104861\n",
"104862\n",
"104863\n",
"104864\n",
"104865\n",
"104866\n",
"104867\n",
"104868\n",
"104869\n",
"104870\n",
"104871\n",
"104872\n",
"104873\n",
"104874\n",
"104875\n",
"104876\n",
"104877\n",
"104878\n",
"104879\n",
"104880\n",
"104881\n",
"104882\n",
"104883\n",
"104884\n",
"104885\n",
"104886\n",
"104887\n",
"104888\n",
"104889\n",
"104890\n",
"104891\n",
"104892\n",
"104893\n",
"104894\n",
"104895\n",
"104896\n",
"104897\n",
"104898\n",
"104899\n",
"104900\n",
"104901\n",
"104902\n",
"104903\n",
"104904\n",
"104905\n",
"104906\n",
"104907\n",
"104908\n",
"104909\n",
"104910\n",
"104911\n",
"104912\n",
"104913\n",
"104914\n",
"104915\n",
"104916\n",
"104917\n",
"104918\n",
"104919\n",
"104920\n",
"104921\n",
"104922\n",
"104923\n",
"104924\n",
"104925\n",
"104926\n",
"104927\n",
"104928\n",
"104929\n",
"104930\n",
"104931\n",
"104932\n",
"104933\n",
"104934\n",
"104935\n",
"104936\n",
"104937\n",
"104938\n",
"104939\n",
"104940\n",
"104941\n",
"104942\n",
"104943\n",
"104944\n",
"104945\n",
"104946\n",
"104947\n",
"104948\n",
"104949\n",
"104950\n",
"104951\n",
"104952\n",
"104953\n",
"104954\n",
"104955\n",
"104956\n",
"104957\n",
"104958\n",
"104959\n",
"104960\n",
"104961\n",
"104962\n",
"104963\n",
"104964\n",
"104965\n",
"104966\n",
"104967\n",
"104968\n",
"104969\n",
"104970\n",
"104971\n",
"104972\n",
"104973\n",
"104974\n",
"104975\n",
"104976\n",
"104977\n",
"104978\n",
"104979\n",
"104980\n",
"104981\n",
"104982\n",
"104983\n",
"104984\n",
"104985\n",
"104986\n",
"104987\n",
"104988\n",
"104989\n",
"104990\n",
"104991\n",
"104992\n",
"104993\n",
"104994\n",
"104995\n",
"104996\n",
"104997\n",
"104998\n",
"104999\n",
"105000\n",
"105001\n",
"105002\n",
"105003\n",
"105004\n",
"105005\n",
"105006\n",
"105007\n",
"105008\n",
"105009\n",
"105010\n",
"105011\n",
"105012\n",
"105013\n",
"105014\n",
"105015\n",
"105016\n",
"105017\n",
"105018\n",
"105019\n",
"105020\n",
"105021\n",
"105022\n",
"105023\n",
"105024\n",
"105025\n",
"105026\n",
"105027\n",
"105028\n",
"105029\n",
"105030\n",
"105031\n",
"105032\n",
"105033\n",
"105034\n",
"105035\n",
"105036\n",
"105037\n",
"105038\n",
"105039\n",
"105040\n",
"105041\n",
"105042\n",
"105043\n",
"105044\n",
"105045\n",
"105046\n",
"105047\n",
"105048\n",
"105049\n",
"105050\n",
"105051\n",
"105052\n",
"105053\n",
"105054\n",
"105055\n",
"105056\n",
"105057\n",
"105058\n",
"105059\n",
"105060\n",
"105061\n",
"105062\n",
"105063\n",
"105064\n",
"105065\n",
"105066\n",
"105067\n",
"105068\n",
"105069\n",
"105070\n",
"105071\n",
"105072\n",
"105073\n",
"105074\n",
"105075\n",
"105076\n",
"105077\n",
"105078\n",
"105079\n",
"105080\n",
"105081\n",
"105082\n",
"105083\n",
"105084\n",
"105085\n",
"105086\n",
"105087\n",
"105088\n",
"105089\n",
"105090\n",
"105091\n",
"105092\n",
"105093\n",
"105094\n",
"105095\n",
"105096\n",
"105097\n",
"105098\n",
"105099\n",
"105100\n",
"105101\n",
"105102\n",
"105103\n",
"105104\n",
"105105\n",
"105106\n",
"105107\n",
"105108\n",
"105109\n",
"105110\n",
"105111\n",
"105112\n",
"105113\n",
"105114\n",
"105115\n",
"105116\n",
"105117\n",
"105118\n",
"105119\n",
"105120\n",
"105121\n",
"105122\n",
"105123\n",
"105124\n",
"105125\n",
"105126\n",
"105127\n",
"105128\n",
"105129\n",
"105130\n",
"105131\n",
"105132\n",
"105133\n",
"105134\n",
"105135\n",
"105136\n",
"105137\n",
"105138\n",
"105139\n",
"105140\n",
"105141\n",
"105142\n",
"105143\n",
"105144\n",
"105145\n",
"105146\n",
"105147\n",
"105148\n",
"105149\n",
"105150\n",
"105151\n",
"105152\n",
"105153\n",
"105154\n",
"105155\n",
"105156\n",
"105157\n",
"105158\n",
"105159\n",
"105160\n",
"105161\n",
"105162\n",
"105163\n",
"105164\n",
"105165\n",
"105166\n",
"105167\n",
"105168\n",
"105169\n",
"105170\n",
"105171\n",
"105172\n",
"105173\n",
"105174\n",
"105175\n",
"105176\n",
"105177\n",
"105178\n",
"105179\n",
"105180\n",
"105181\n",
"105182\n",
"105183\n",
"105184\n",
"105185\n",
"105186\n",
"105187\n",
"105188\n",
"105189\n",
"105190\n",
"105191\n",
"105192\n",
"105193\n",
"105194\n",
"105195\n",
"105196\n",
"105197\n",
"105198\n",
"105199\n",
"105200\n",
"105201\n",
"105202\n",
"105203\n",
"105204\n",
"105205\n",
"105206\n",
"105207\n",
"105208\n",
"105209\n",
"105210\n",
"105211\n",
"105212\n",
"105213\n",
"105214\n",
"105215\n",
"105216\n",
"105217\n",
"105218\n",
"105219\n",
"105220\n",
"105221\n",
"105222\n",
"105223\n",
"105224\n",
"105225\n",
"105226\n",
"105227\n",
"105228\n",
"105229\n",
"105230\n",
"105231\n",
"105232\n",
"105233\n",
"105234\n",
"105235\n",
"105236\n",
"105237\n",
"105238\n",
"105239\n",
"105240\n",
"105241\n",
"105242\n",
"105243\n",
"105244\n",
"105245\n",
"105246\n",
"105247\n",
"105248\n",
"105249\n",
"105250\n",
"105251\n",
"105252\n",
"105253\n",
"105254\n",
"105255\n",
"105256\n",
"105257\n",
"105258\n",
"105259\n",
"105260\n",
"105261\n",
"105262\n",
"105263\n",
"105264\n",
"105265\n",
"105266\n",
"105267\n",
"105268\n",
"105269\n",
"105270\n",
"105271\n",
"105272\n",
"105273\n",
"105274\n",
"105275\n",
"105276\n",
"105277\n",
"105278\n",
"105279\n",
"105280\n",
"105281\n",
"105282\n",
"105283\n",
"105284\n",
"105285\n",
"105286\n",
"105287\n",
"105288\n",
"105289\n",
"105290\n",
"105291\n",
"105292\n",
"105293\n",
"105294\n",
"105295\n",
"105296\n",
"105297\n",
"105298\n",
"105299\n",
"105300\n",
"105301\n",
"105302\n",
"105303\n",
"105304\n",
"105305\n",
"105306\n",
"105307\n",
"105308\n",
"105309\n",
"105310\n",
"105311\n",
"105312\n",
"105313\n",
"105314\n",
"105315\n",
"105316\n",
"105317\n",
"105318\n",
"105319\n",
"105320\n",
"105321\n",
"105322\n",
"105323\n",
"105324\n",
"105325\n",
"105326\n",
"105327\n",
"105328\n",
"105329\n",
"105330\n",
"105331\n",
"105332\n",
"105333\n",
"105334\n",
"105335\n",
"105336\n",
"105337\n",
"105338\n",
"105339\n",
"105340\n",
"105341\n",
"105342\n",
"105343\n",
"105344\n",
"105345\n",
"105346\n",
"105347\n",
"105348\n",
"105349\n",
"105350\n",
"105351\n",
"105352\n",
"105353\n",
"105354\n",
"105355\n",
"105356\n",
"105357\n",
"105358\n",
"105359\n",
"105360\n",
"105361\n",
"105362\n",
"105363\n",
"105364\n",
"105365\n",
"105366\n",
"105367\n",
"105368\n",
"105369\n",
"105370\n",
"105371\n",
"105372\n",
"105373\n",
"105374\n",
"105375\n",
"105376\n",
"105377\n",
"105378\n",
"105379\n",
"105380\n",
"105381\n",
"105382\n",
"105383\n",
"105384\n",
"105385\n",
"105386\n",
"105387\n",
"105388\n",
"105389\n",
"105390\n",
"105391\n",
"105392\n",
"105393\n",
"105394\n",
"105395\n",
"105396\n",
"105397\n",
"105398\n",
"105399\n",
"105400\n",
"105401\n",
"105402\n",
"105403\n",
"105404\n",
"105405\n",
"105406\n",
"105407\n",
"105408\n",
"105409\n",
"105410\n",
"105411\n",
"105412\n",
"105413\n",
"105414\n",
"105415\n",
"105416\n",
"105417\n",
"105418\n",
"105419\n",
"105420\n",
"105421\n",
"105422\n",
"105423\n",
"105424\n",
"105425\n",
"105426\n",
"105427\n",
"105428\n",
"105429\n",
"105430\n",
"105431\n",
"105432\n",
"105433\n",
"105434\n",
"105435\n",
"105436\n",
"105437\n",
"105438\n",
"105439\n",
"105440\n",
"105441\n",
"105442\n",
"105443\n",
"105444\n",
"105445\n",
"105446\n",
"105447\n",
"105448\n",
"105449\n",
"105450\n",
"105451\n",
"105452\n",
"105453\n",
"105454\n",
"105455\n",
"105456\n",
"105457\n",
"105458\n",
"105459\n",
"105460\n",
"105461\n",
"105462\n",
"105463\n",
"105464\n",
"105465\n",
"105466\n",
"105467\n",
"105468\n",
"105469\n",
"105470\n",
"105471\n",
"105472\n",
"105473\n",
"105474\n",
"105475\n",
"105476\n",
"105477\n",
"105478\n",
"105479\n",
"105480\n",
"105481\n",
"105482\n",
"105483\n",
"105484\n",
"105485\n",
"105486\n",
"105487\n",
"105488\n",
"105489\n",
"105490\n",
"105491\n",
"105492\n",
"105493\n",
"105494\n",
"105495\n",
"105496\n",
"105497\n",
"105498\n",
"105499\n",
"105500\n",
"105501\n",
"105502\n",
"105503\n",
"105504\n",
"105505\n",
"105506\n",
"105507\n",
"105508\n",
"105509\n",
"105510\n",
"105511\n",
"105512\n",
"105513\n",
"105514\n",
"105515\n",
"105516\n",
"105517\n",
"105518\n",
"105519\n",
"105520\n",
"105521\n",
"105522\n",
"105523\n",
"105524\n",
"105525\n",
"105526\n",
"105527\n",
"105528\n",
"105529\n",
"105530\n",
"105531\n",
"105532\n",
"105533\n",
"105534\n",
"105535\n",
"105536\n",
"105537\n",
"105538\n",
"105539\n",
"105540\n",
"105541\n",
"105542\n",
"105543\n",
"105544\n",
"105545\n",
"105546\n",
"105547\n",
"105548\n",
"105549\n",
"105550\n",
"105551\n",
"105552\n",
"105553\n",
"105554\n",
"105555\n",
"105556\n",
"105557\n",
"105558\n",
"105559\n",
"105560\n",
"105561\n",
"105562\n",
"105563\n",
"105564\n",
"105565\n",
"105566\n",
"105567\n",
"105568\n",
"105569\n",
"105570\n",
"105571\n",
"105572\n",
"105573\n",
"105574\n",
"105575\n",
"105576\n",
"105577\n",
"105578\n",
"105579\n",
"105580\n",
"105581\n",
"105582\n",
"105583\n",
"105584\n",
"105585\n",
"105586\n",
"105587\n",
"105588\n",
"105589\n",
"105590\n",
"105591\n",
"105592\n",
"105593\n",
"105594\n",
"105595\n",
"105596\n",
"105597\n",
"105598\n",
"105599\n",
"105600\n",
"105601\n",
"105602\n",
"105603\n",
"105604\n",
"105605\n",
"105606\n",
"105607\n",
"105608\n",
"105609\n",
"105610\n",
"105611\n",
"105612\n",
"105613\n",
"105614\n",
"105615\n",
"105616\n",
"105617\n",
"105618\n",
"105619\n",
"105620\n",
"105621\n",
"105622\n",
"105623\n",
"105624\n",
"105625\n",
"105626\n",
"105627\n",
"105628\n",
"105629\n",
"105630\n",
"105631\n",
"105632\n",
"105633\n",
"105634\n",
"105635\n",
"105636\n",
"105637\n",
"105638\n",
"105639\n",
"105640\n",
"105641\n",
"105642\n",
"105643\n",
"105644\n",
"105645\n",
"105646\n",
"105647\n",
"105648\n",
"105649\n",
"105650\n",
"105651\n",
"105652\n",
"105653\n",
"105654\n",
"105655\n",
"105656\n",
"105657\n",
"105658\n",
"105659\n",
"105660\n",
"105661\n",
"105662\n",
"105663\n",
"105664\n",
"105665\n",
"105666\n",
"105667\n",
"105668\n",
"105669\n",
"105670\n",
"105671\n",
"105672\n",
"105673\n",
"105674\n",
"105675\n",
"105676\n",
"105677\n",
"105678\n",
"105679\n",
"105680\n",
"105681\n",
"105682\n",
"105683\n",
"105684\n",
"105685\n",
"105686\n",
"105687\n",
"105688\n",
"105689\n",
"105690\n",
"105691\n",
"105692\n",
"105693\n",
"105694\n",
"105695\n",
"105696\n",
"105697\n",
"105698\n",
"105699\n",
"105700\n",
"105701\n",
"105702\n",
"105703\n",
"105704\n",
"105705\n",
"105706\n",
"105707\n",
"105708\n",
"105709\n",
"105710\n",
"105711\n",
"105712\n",
"105713\n",
"105714\n",
"105715\n",
"105716\n",
"105717\n",
"105718\n",
"105719\n",
"105720\n",
"105721\n",
"105722\n",
"105723\n",
"105724\n",
"105725\n",
"105726\n",
"105727\n",
"105728\n",
"105729\n",
"105730\n",
"105731\n",
"105732\n",
"105733\n",
"105734\n",
"105735\n",
"105736\n",
"105737\n",
"105738\n",
"105739\n",
"105740\n",
"105741\n",
"105742\n",
"105743\n",
"105744\n",
"105745\n",
"105746\n",
"105747\n",
"105748\n",
"105749\n",
"105750\n",
"105751\n",
"105752\n",
"105753\n",
"105754\n",
"105755\n",
"105756\n",
"105757\n",
"105758\n",
"105759\n",
"105760\n",
"105761\n",
"105762\n",
"105763\n",
"105764\n",
"105765\n",
"105766\n",
"105767\n",
"105768\n",
"105769\n",
"105770\n",
"105771\n",
"105772\n",
"105773\n",
"105774\n",
"105775\n",
"105776\n",
"105777\n",
"105778\n",
"105779\n",
"105780\n",
"105781\n",
"105782\n",
"105783\n",
"105784\n",
"105785\n",
"105786\n",
"105787\n",
"105788\n",
"105789\n",
"105790\n",
"105791\n",
"105792\n",
"105793\n",
"105794\n",
"105795\n",
"105796\n",
"105797\n",
"105798\n",
"105799\n",
"105800\n",
"105801\n",
"105802\n",
"105803\n",
"105804\n",
"105805\n",
"105806\n",
"105807\n",
"105808\n",
"105809\n",
"105810\n",
"105811\n",
"105812\n",
"105813\n",
"105814\n",
"105815\n",
"105816\n",
"105817\n",
"105818\n",
"105819\n",
"105820\n",
"105821\n",
"105822\n",
"105823\n",
"105824\n",
"105825\n",
"105826\n",
"105827\n",
"105828\n",
"105829\n",
"105830\n",
"105831\n",
"105832\n",
"105833\n",
"105834\n",
"105835\n",
"105836\n",
"105837\n",
"105838\n",
"105839\n",
"105840\n",
"105841\n",
"105842\n",
"105843\n",
"105844\n",
"105845\n",
"105846\n",
"105847\n",
"105848\n",
"105849\n",
"105850\n",
"105851\n",
"105852\n",
"105853\n",
"105854\n",
"105855\n",
"105856\n",
"105857\n",
"105858\n",
"105859\n",
"105860\n",
"105861\n",
"105862\n",
"105863\n",
"105864\n",
"105865\n",
"105866\n",
"105867\n",
"105868\n",
"105869\n",
"105870\n",
"105871\n",
"105872\n",
"105873\n",
"105874\n",
"105875\n",
"105876\n",
"105877\n",
"105878\n",
"105879\n",
"105880\n",
"105881\n",
"105882\n",
"105883\n",
"105884\n",
"105885\n",
"105886\n",
"105887\n",
"105888\n",
"105889\n",
"105890\n",
"105891\n",
"105892\n",
"105893\n",
"105894\n",
"105895\n",
"105896\n",
"105897\n",
"105898\n",
"105899\n",
"105900\n",
"105901\n",
"105902\n",
"105903\n",
"105904\n",
"105905\n",
"105906\n",
"105907\n",
"105908\n",
"105909\n",
"105910\n",
"105911\n",
"105912\n",
"105913\n",
"105914\n",
"105915\n",
"105916\n",
"105917\n",
"105918\n",
"105919\n",
"105920\n",
"105921\n",
"105922\n",
"105923\n",
"105924\n",
"105925\n",
"105926\n",
"105927\n",
"105928\n",
"105929\n",
"105930\n",
"105931\n",
"105932\n",
"105933\n",
"105934\n",
"105935\n",
"105936\n",
"105937\n",
"105938\n",
"105939\n",
"105940\n",
"105941\n",
"105942\n",
"105943\n",
"105944\n",
"105945\n",
"105946\n",
"105947\n",
"105948\n",
"105949\n",
"105950\n",
"105951\n",
"105952\n",
"105953\n",
"105954\n",
"105955\n",
"105956\n",
"105957\n",
"105958\n",
"105959\n",
"105960\n",
"105961\n",
"105962\n",
"105963\n",
"105964\n",
"105965\n",
"105966\n",
"105967\n",
"105968\n",
"105969\n",
"105970\n",
"105971\n",
"105972\n",
"105973\n",
"105974\n",
"105975\n",
"105976\n",
"105977\n",
"105978\n",
"105979\n",
"105980\n",
"105981\n",
"105982\n",
"105983\n",
"105984\n",
"105985\n",
"105986\n",
"105987\n",
"105988\n",
"105989\n",
"105990\n",
"105991\n",
"105992\n",
"105993\n",
"105994\n",
"105995\n",
"105996\n",
"105997\n",
"105998\n",
"105999\n",
"106000\n",
"106001\n",
"106002\n",
"106003\n",
"106004\n",
"106005\n",
"106006\n",
"106007\n",
"106008\n",
"106009\n",
"106010\n",
"106011\n",
"106012\n",
"106013\n",
"106014\n",
"106015\n",
"106016\n",
"106017\n",
"106018\n",
"106019\n",
"106020\n",
"106021\n",
"106022\n",
"106023\n",
"106024\n",
"106025\n",
"106026\n",
"106027\n",
"106028\n",
"106029\n",
"106030\n",
"106031\n",
"106032\n",
"106033\n",
"106034\n",
"106035\n",
"106036\n",
"106037\n",
"106038\n",
"106039\n",
"106040\n",
"106041\n",
"106042\n",
"106043\n",
"106044\n",
"106045\n",
"106046\n",
"106047\n",
"106048\n",
"106049\n",
"106050\n",
"106051\n",
"106052\n",
"106053\n",
"106054\n",
"106055\n",
"106056\n",
"106057\n",
"106058\n",
"106059\n",
"106060\n",
"106061\n",
"106062\n",
"106063\n",
"106064\n",
"106065\n",
"106066\n",
"106067\n",
"106068\n",
"106069\n",
"106070\n",
"106071\n",
"106072\n",
"106073\n",
"106074\n",
"106075\n",
"106076\n",
"106077\n",
"106078\n",
"106079\n",
"106080\n",
"106081\n",
"106082\n",
"106083\n",
"106084\n",
"106085\n",
"106086\n",
"106087\n",
"106088\n",
"106089\n",
"106090\n",
"106091\n",
"106092\n",
"106093\n",
"106094\n",
"106095\n",
"106096\n",
"106097\n",
"106098\n",
"106099\n",
"106100\n",
"106101\n",
"106102\n",
"106103\n",
"106104\n",
"106105\n",
"106106\n",
"106107\n",
"106108\n",
"106109\n",
"106110\n",
"106111\n",
"106112\n",
"106113\n",
"106114\n",
"106115\n",
"106116\n",
"106117\n",
"106118\n",
"106119\n",
"106120\n",
"106121\n",
"106122\n",
"106123\n",
"106124\n",
"106125\n",
"106126\n",
"106127\n",
"106128\n",
"106129\n",
"106130\n",
"106131\n",
"106132\n",
"106133\n",
"106134\n",
"106135\n",
"106136\n",
"106137\n",
"106138\n",
"106139\n",
"106140\n",
"106141\n",
"106142\n",
"106143\n",
"106144\n",
"106145\n",
"106146\n",
"106147\n",
"106148\n",
"106149\n",
"106150\n",
"106151\n",
"106152\n",
"106153\n",
"106154\n",
"106155\n",
"106156\n",
"106157\n",
"106158\n",
"106159\n",
"106160\n",
"106161\n",
"106162\n",
"106163\n",
"106164\n",
"106165\n",
"106166\n",
"106167\n",
"106168\n",
"106169\n",
"106170\n",
"106171\n",
"106172\n",
"106173\n",
"106174\n",
"106175\n",
"106176\n",
"106177\n",
"106178\n",
"106179\n",
"106180\n",
"106181\n",
"106182\n",
"106183\n",
"106184\n",
"106185\n",
"106186\n",
"106187\n",
"106188\n",
"106189\n",
"106190\n",
"106191\n",
"106192\n",
"106193\n",
"106194\n",
"106195\n",
"106196\n",
"106197\n",
"106198\n",
"106199\n",
"106200\n",
"106201\n",
"106202\n",
"106203\n",
"106204\n",
"106205\n",
"106206\n",
"106207\n",
"106208\n",
"106209\n",
"106210\n",
"106211\n",
"106212\n",
"106213\n",
"106214\n",
"106215\n",
"106216\n",
"106217\n",
"106218\n",
"106219\n",
"106220\n",
"106221\n",
"106222\n",
"106223\n",
"106224\n",
"106225\n",
"106226\n",
"106227\n",
"106228\n",
"106229\n",
"106230\n",
"106231\n",
"106232\n",
"106233\n",
"106234\n",
"106235\n",
"106236\n",
"106237\n",
"106238\n",
"106239\n",
"106240\n",
"106241\n",
"106242\n",
"106243\n",
"106244\n",
"106245\n",
"106246\n",
"106247\n",
"106248\n",
"106249\n",
"106250\n",
"106251\n",
"106252\n",
"106253\n",
"106254\n",
"106255\n",
"106256\n",
"106257\n",
"106258\n",
"106259\n",
"106260\n",
"106261\n",
"106262\n",
"106263\n",
"106264\n",
"106265\n",
"106266\n",
"106267\n",
"106268\n",
"106269\n",
"106270\n",
"106271\n",
"106272\n",
"106273\n",
"106274\n",
"106275\n",
"106276\n",
"106277\n",
"106278\n",
"106279\n",
"106280\n",
"106281\n",
"106282\n",
"106283\n",
"106284\n",
"106285\n",
"106286\n",
"106287\n",
"106288\n",
"106289\n",
"106290\n",
"106291\n",
"106292\n",
"106293\n",
"106294\n",
"106295\n",
"106296\n",
"106297\n",
"106298\n",
"106299\n",
"106300\n",
"106301\n",
"106302\n",
"106303\n",
"106304\n",
"106305\n",
"106306\n",
"106307\n",
"106308\n",
"106309\n",
"106310\n",
"106311\n",
"106312\n",
"106313\n",
"106314\n",
"106315\n",
"106316\n",
"106317\n",
"106318\n",
"106319\n",
"106320\n",
"106321\n",
"106322\n",
"106323\n",
"106324\n",
"106325\n",
"106326\n",
"106327\n",
"106328\n",
"106329\n",
"106330\n",
"106331\n",
"106332\n",
"106333\n",
"106334\n",
"106335\n",
"106336\n",
"106337\n",
"106338\n",
"106339\n",
"106340\n",
"106341\n",
"106342\n",
"106343\n",
"106344\n",
"106345\n",
"106346\n",
"106347\n",
"106348\n",
"106349\n",
"106350\n",
"106351\n",
"106352\n",
"106353\n",
"106354\n",
"106355\n",
"106356\n",
"106357\n",
"106358\n",
"106359\n",
"106360\n",
"106361\n",
"106362\n",
"106363\n",
"106364\n",
"106365\n",
"106366\n",
"106367\n",
"106368\n",
"106369\n",
"106370\n",
"106371\n",
"106372\n",
"106373\n",
"106374\n",
"106375\n",
"106376\n",
"106377\n",
"106378\n",
"106379\n",
"106380\n",
"106381\n",
"106382\n",
"106383\n",
"106384\n",
"106385\n",
"106386\n",
"106387\n",
"106388\n",
"106389\n",
"106390\n",
"106391\n",
"106392\n",
"106393\n",
"106394\n",
"106395\n",
"106396\n",
"106397\n",
"106398\n",
"106399\n",
"106400\n",
"106401\n",
"106402\n",
"106403\n",
"106404\n",
"106405\n",
"106406\n",
"106407\n",
"106408\n",
"106409\n",
"106410\n",
"106411\n",
"106412\n",
"106413\n",
"106414\n",
"106415\n",
"106416\n",
"106417\n",
"106418\n",
"106419\n",
"106420\n",
"106421\n",
"106422\n",
"106423\n",
"106424\n",
"106425\n",
"106426\n",
"106427\n",
"106428\n",
"106429\n",
"106430\n",
"106431\n",
"106432\n",
"106433\n",
"106434\n",
"106435\n",
"106436\n",
"106437\n",
"106438\n",
"106439\n",
"106440\n",
"106441\n",
"106442\n",
"106443\n",
"106444\n",
"106445\n",
"106446\n",
"106447\n",
"106448\n",
"106449\n",
"106450\n",
"106451\n",
"106452\n",
"106453\n",
"106454\n",
"106455\n",
"106456\n",
"106457\n",
"106458\n",
"106459\n",
"106460\n",
"106461\n",
"106462\n",
"106463\n",
"106464\n",
"106465\n",
"106466\n",
"106467\n",
"106468\n",
"106469\n",
"106470\n",
"106471\n",
"106472\n",
"106473\n",
"106474\n",
"106475\n",
"106476\n",
"106477\n",
"106478\n",
"106479\n",
"106480\n",
"106481\n",
"106482\n",
"106483\n",
"106484\n",
"106485\n",
"106486\n",
"106487\n",
"106488\n",
"106489\n",
"106490\n",
"106491\n",
"106492\n",
"106493\n",
"106494\n",
"106495\n",
"106496\n",
"106497\n",
"106498\n",
"106499\n",
"106500\n",
"106501\n",
"106502\n",
"106503\n",
"106504\n",
"106505\n",
"106506\n",
"106507\n",
"106508\n",
"106509\n",
"106510\n",
"106511\n",
"106512\n",
"106513\n",
"106514\n",
"106515\n",
"106516\n",
"106517\n",
"106518\n",
"106519\n",
"106520\n",
"106521\n",
"106522\n",
"106523\n",
"106524\n",
"106525\n",
"106526\n",
"106527\n",
"106528\n",
"106529\n",
"106530\n",
"106531\n",
"106532\n",
"106533\n",
"106534\n",
"106535\n",
"106536\n",
"106537\n",
"106538\n",
"106539\n",
"106540\n",
"106541\n",
"106542\n",
"106543\n",
"106544\n",
"106545\n",
"106546\n",
"106547\n",
"106548\n",
"106549\n",
"106550\n",
"106551\n",
"106552\n",
"106553\n",
"106554\n",
"106555\n",
"106556\n",
"106557\n",
"106558\n",
"106559\n",
"106560\n",
"106561\n",
"106562\n",
"106563\n",
"106564\n",
"106565\n",
"106566\n",
"106567\n",
"106568\n",
"106569\n",
"106570\n",
"106571\n",
"106572\n",
"106573\n",
"106574\n",
"106575\n",
"106576\n",
"106577\n",
"106578\n",
"106579\n",
"106580\n",
"106581\n",
"106582\n",
"106583\n",
"106584\n",
"106585\n",
"106586\n",
"106587\n",
"106588\n",
"106589\n",
"106590\n",
"106591\n",
"106592\n",
"106593\n",
"106594\n",
"106595\n",
"106596\n",
"106597\n",
"106598\n",
"106599\n",
"106600\n",
"106601\n",
"106602\n",
"106603\n",
"106604\n",
"106605\n",
"106606\n",
"106607\n",
"106608\n",
"106609\n",
"106610\n",
"106611\n",
"106612\n",
"106613\n",
"106614\n",
"106615\n",
"106616\n",
"106617\n",
"106618\n",
"106619\n",
"106620\n",
"106621\n",
"106622\n",
"106623\n",
"106624\n",
"106625\n",
"106626\n",
"106627\n",
"106628\n",
"106629\n",
"106630\n",
"106631\n",
"106632\n",
"106633\n",
"106634\n",
"106635\n",
"106636\n",
"106637\n",
"106638\n",
"106639\n",
"106640\n",
"106641\n",
"106642\n",
"106643\n",
"106644\n",
"106645\n",
"106646\n",
"106647\n",
"106648\n",
"106649\n",
"106650\n",
"106651\n",
"106652\n",
"106653\n",
"106654\n",
"106655\n",
"106656\n",
"106657\n",
"106658\n",
"106659\n",
"106660\n",
"106661\n",
"106662\n",
"106663\n",
"106664\n",
"106665\n",
"106666\n",
"106667\n",
"106668\n",
"106669\n",
"106670\n",
"106671\n",
"106672\n",
"106673\n",
"106674\n",
"106675\n",
"106676\n",
"106677\n",
"106678\n",
"106679\n",
"106680\n",
"106681\n",
"106682\n",
"106683\n",
"106684\n",
"106685\n",
"106686\n",
"106687\n",
"106688\n",
"106689\n",
"106690\n",
"106691\n",
"106692\n",
"106693\n",
"106694\n",
"106695\n",
"106696\n",
"106697\n",
"106698\n",
"106699\n",
"106700\n",
"106701\n",
"106702\n",
"106703\n",
"106704\n",
"106705\n",
"106706\n",
"106707\n",
"106708\n",
"106709\n",
"106710\n",
"106711\n",
"106712\n",
"106713\n",
"106714\n",
"106715\n",
"106716\n",
"106717\n",
"106718\n",
"106719\n",
"106720\n",
"106721\n",
"106722\n",
"106723\n",
"106724\n",
"106725\n",
"106726\n",
"106727\n",
"106728\n",
"106729\n",
"106730\n",
"106731\n",
"106732\n",
"106733\n",
"106734\n",
"106735\n",
"106736\n",
"106737\n",
"106738\n",
"106739\n",
"106740\n",
"106741\n",
"106742\n",
"106743\n",
"106744\n",
"106745\n",
"106746\n",
"106747\n",
"106748\n",
"106749\n",
"106750\n",
"106751\n",
"106752\n",
"106753\n",
"106754\n",
"106755\n",
"106756\n",
"106757\n",
"106758\n",
"106759\n",
"106760\n",
"106761\n",
"106762\n",
"106763\n",
"106764\n",
"106765\n",
"106766\n",
"106767\n",
"106768\n",
"106769\n",
"106770\n",
"106771\n",
"106772\n",
"106773\n",
"106774\n",
"106775\n",
"106776\n",
"106777\n",
"106778\n",
"106779\n",
"106780\n",
"106781\n",
"106782\n",
"106783\n",
"106784\n",
"106785\n",
"106786\n",
"106787\n",
"106788\n",
"106789\n",
"106790\n",
"106791\n",
"106792\n",
"106793\n",
"106794\n",
"106795\n",
"106796\n",
"106797\n",
"106798\n",
"106799\n",
"106800\n",
"106801\n",
"106802\n",
"106803\n",
"106804\n",
"106805\n",
"106806\n",
"106807\n",
"106808\n",
"106809\n",
"106810\n",
"106811\n",
"106812\n",
"106813\n",
"106814\n",
"106815\n",
"106816\n",
"106817\n",
"106818\n",
"106819\n",
"106820\n",
"106821\n",
"106822\n",
"106823\n",
"106824\n",
"106825\n",
"106826\n",
"106827\n",
"106828\n",
"106829\n",
"106830\n",
"106831\n",
"106832\n",
"106833\n",
"106834\n",
"106835\n",
"106836\n",
"106837\n",
"106838\n",
"106839\n",
"106840\n",
"106841\n",
"106842\n",
"106843\n",
"106844\n",
"106845\n",
"106846\n",
"106847\n",
"106848\n",
"106849\n",
"106850\n",
"106851\n",
"106852\n",
"106853\n",
"106854\n",
"106855\n",
"106856\n",
"106857\n",
"106858\n",
"106859\n",
"106860\n",
"106861\n",
"106862\n",
"106863\n",
"106864\n",
"106865\n",
"106866\n",
"106867\n",
"106868\n",
"106869\n",
"106870\n",
"106871\n",
"106872\n",
"106873\n",
"106874\n",
"106875\n",
"106876\n",
"106877\n",
"106878\n",
"106879\n",
"106880\n",
"106881\n",
"106882\n",
"106883\n",
"106884\n",
"106885\n",
"106886\n",
"106887\n",
"106888\n",
"106889\n",
"106890\n",
"106891\n",
"106892\n",
"106893\n",
"106894\n",
"106895\n",
"106896\n",
"106897\n",
"106898\n",
"106899\n",
"106900\n",
"106901\n",
"106902\n",
"106903\n",
"106904\n",
"106905\n",
"106906\n",
"106907\n",
"106908\n",
"106909\n",
"106910\n",
"106911\n",
"106912\n",
"106913\n",
"106914\n",
"106915\n",
"106916\n",
"106917\n",
"106918\n",
"106919\n",
"106920\n",
"106921\n",
"106922\n",
"106923\n",
"106924\n",
"106925\n",
"106926\n",
"106927\n",
"106928\n",
"106929\n",
"106930\n",
"106931\n",
"106932\n",
"106933\n",
"106934\n",
"106935\n",
"106936\n",
"106937\n",
"106938\n",
"106939\n",
"106940\n",
"106941\n",
"106942\n",
"106943\n",
"106944\n",
"106945\n",
"106946\n",
"106947\n",
"106948\n",
"106949\n",
"106950\n",
"106951\n",
"106952\n",
"106953\n",
"106954\n",
"106955\n",
"106956\n",
"106957\n",
"106958\n",
"106959\n",
"106960\n",
"106961\n",
"106962\n",
"106963\n",
"106964\n",
"106965\n",
"106966\n",
"106967\n",
"106968\n",
"106969\n",
"106970\n",
"106971\n",
"106972\n",
"106973\n",
"106974\n",
"106975\n",
"106976\n",
"106977\n",
"106978\n",
"106979\n",
"106980\n",
"106981\n",
"106982\n",
"106983\n",
"106984\n",
"106985\n",
"106986\n",
"106987\n",
"106988\n",
"106989\n",
"106990\n",
"106991\n",
"106992\n",
"106993\n",
"106994\n",
"106995\n",
"106996\n",
"106997\n",
"106998\n",
"106999\n",
"107000\n",
"107001\n",
"107002\n",
"107003\n",
"107004\n",
"107005\n",
"107006\n",
"107007\n",
"107008\n",
"107009\n",
"107010\n",
"107011\n",
"107012\n",
"107013\n",
"107014\n",
"107015\n",
"107016\n",
"107017\n",
"107018\n",
"107019\n",
"107020\n",
"107021\n",
"107022\n",
"107023\n",
"107024\n",
"107025\n",
"107026\n",
"107027\n",
"107028\n",
"107029\n",
"107030\n",
"107031\n",
"107032\n",
"107033\n",
"107034\n",
"107035\n",
"107036\n",
"107037\n",
"107038\n",
"107039\n",
"107040\n",
"107041\n",
"107042\n",
"107043\n",
"107044\n",
"107045\n",
"107046\n",
"107047\n",
"107048\n",
"107049\n",
"107050\n",
"107051\n",
"107052\n",
"107053\n",
"107054\n",
"107055\n",
"107056\n",
"107057\n",
"107058\n",
"107059\n",
"107060\n",
"107061\n",
"107062\n",
"107063\n",
"107064\n",
"107065\n",
"107066\n",
"107067\n",
"107068\n",
"107069\n",
"107070\n",
"107071\n",
"107072\n",
"107073\n",
"107074\n",
"107075\n",
"107076\n",
"107077\n",
"107078\n",
"107079\n",
"107080\n",
"107081\n",
"107082\n",
"107083\n",
"107084\n",
"107085\n",
"107086\n",
"107087\n",
"107088\n",
"107089\n",
"107090\n",
"107091\n",
"107092\n",
"107093\n",
"107094\n",
"107095\n",
"107096\n",
"107097\n",
"107098\n",
"107099\n",
"107100\n",
"107101\n",
"107102\n",
"107103\n",
"107104\n",
"107105\n",
"107106\n",
"107107\n",
"107108\n",
"107109\n",
"107110\n",
"107111\n",
"107112\n",
"107113\n",
"107114\n",
"107115\n",
"107116\n",
"107117\n",
"107118\n",
"107119\n",
"107120\n",
"107121\n",
"107122\n",
"107123\n",
"107124\n",
"107125\n",
"107126\n",
"107127\n",
"107128\n",
"107129\n",
"107130\n",
"107131\n",
"107132\n",
"107133\n",
"107134\n",
"107135\n",
"107136\n",
"107137\n",
"107138\n",
"107139\n",
"107140\n",
"107141\n",
"107142\n",
"107143\n",
"107144\n",
"107145\n",
"107146\n",
"107147\n",
"107148\n",
"107149\n",
"107150\n",
"107151\n",
"107152\n",
"107153\n",
"107154\n",
"107155\n",
"107156\n",
"107157\n",
"107158\n",
"107159\n",
"107160\n",
"107161\n",
"107162\n",
"107163\n",
"107164\n",
"107165\n",
"107166\n",
"107167\n",
"107168\n",
"107169\n",
"107170\n",
"107171\n",
"107172\n",
"107173\n",
"107174\n",
"107175\n",
"107176\n",
"107177\n",
"107178\n",
"107179\n",
"107180\n",
"107181\n",
"107182\n",
"107183\n",
"107184\n",
"107185\n",
"107186\n",
"107187\n",
"107188\n",
"107189\n",
"107190\n",
"107191\n",
"107192\n",
"107193\n",
"107194\n",
"107195\n",
"107196\n",
"107197\n",
"107198\n",
"107199\n",
"107200\n",
"107201\n",
"107202\n",
"107203\n",
"107204\n",
"107205\n",
"107206\n",
"107207\n",
"107208\n",
"107209\n",
"107210\n",
"107211\n",
"107212\n",
"107213\n",
"107214\n",
"107215\n",
"107216\n",
"107217\n",
"107218\n",
"107219\n",
"107220\n",
"107221\n",
"107222\n",
"107223\n",
"107224\n",
"107225\n",
"107226\n",
"107227\n",
"107228\n",
"107229\n",
"107230\n",
"107231\n",
"107232\n",
"107233\n",
"107234\n",
"107235\n",
"107236\n",
"107237\n",
"107238\n",
"107239\n",
"107240\n",
"107241\n",
"107242\n",
"107243\n",
"107244\n",
"107245\n",
"107246\n",
"107247\n",
"107248\n",
"107249\n",
"107250\n",
"107251\n",
"107252\n",
"107253\n",
"107254\n",
"107255\n",
"107256\n",
"107257\n",
"107258\n",
"107259\n",
"107260\n",
"107261\n",
"107262\n",
"107263\n",
"107264\n",
"107265\n",
"107266\n",
"107267\n",
"107268\n",
"107269\n",
"107270\n",
"107271\n",
"107272\n",
"107273\n",
"107274\n",
"107275\n",
"107276\n",
"107277\n",
"107278\n",
"107279\n",
"107280\n",
"107281\n",
"107282\n",
"107283\n",
"107284\n",
"107285\n",
"107286\n",
"107287\n",
"107288\n",
"107289\n",
"107290\n",
"107291\n",
"107292\n",
"107293\n",
"107294\n",
"107295\n",
"107296\n",
"107297\n",
"107298\n",
"107299\n",
"107300\n",
"107301\n",
"107302\n",
"107303\n",
"107304\n",
"107305\n",
"107306\n",
"107307\n",
"107308\n",
"107309\n",
"107310\n",
"107311\n",
"107312\n",
"107313\n",
"107314\n",
"107315\n",
"107316\n",
"107317\n",
"107318\n",
"107319\n",
"107320\n",
"107321\n",
"107322\n",
"107323\n",
"107324\n",
"107325\n",
"107326\n",
"107327\n",
"107328\n",
"107329\n",
"107330\n",
"107331\n",
"107332\n",
"107333\n",
"107334\n",
"107335\n",
"107336\n",
"107337\n",
"107338\n",
"107339\n",
"107340\n",
"107341\n",
"107342\n",
"107343\n",
"107344\n",
"107345\n",
"107346\n",
"107347\n",
"107348\n",
"107349\n",
"107350\n",
"107351\n",
"107352\n",
"107353\n",
"107354\n",
"107355\n",
"107356\n",
"107357\n",
"107358\n",
"107359\n",
"107360\n",
"107361\n",
"107362\n",
"107363\n",
"107364\n",
"107365\n",
"107366\n",
"107367\n",
"107368\n",
"107369\n",
"107370\n",
"107371\n",
"107372\n",
"107373\n",
"107374\n",
"107375\n",
"107376\n",
"107377\n",
"107378\n",
"107379\n",
"107380\n",
"107381\n",
"107382\n",
"107383\n",
"107384\n",
"107385\n",
"107386\n",
"107387\n",
"107388\n",
"107389\n",
"107390\n",
"107391\n",
"107392\n",
"107393\n",
"107394\n",
"107395\n",
"107396\n",
"107397\n",
"107398\n",
"107399\n",
"107400\n",
"107401\n",
"107402\n",
"107403\n",
"107404\n",
"107405\n",
"107406\n",
"107407\n",
"107408\n",
"107409\n",
"107410\n",
"107411\n",
"107412\n",
"107413\n",
"107414\n",
"107415\n",
"107416\n",
"107417\n",
"107418\n",
"107419\n",
"107420\n",
"107421\n",
"107422\n",
"107423\n",
"107424\n",
"107425\n",
"107426\n",
"107427\n",
"107428\n",
"107429\n",
"107430\n",
"107431\n",
"107432\n",
"107433\n",
"107434\n",
"107435\n",
"107436\n",
"107437\n",
"107438\n",
"107439\n",
"107440\n",
"107441\n",
"107442\n",
"107443\n",
"107444\n",
"107445\n",
"107446\n",
"107447\n",
"107448\n",
"107449\n",
"107450\n",
"107451\n",
"107452\n",
"107453\n",
"107454\n",
"107455\n",
"107456\n",
"107457\n",
"107458\n",
"107459\n",
"107460\n",
"107461\n",
"107462\n",
"107463\n",
"107464\n",
"107465\n",
"107466\n",
"107467\n",
"107468\n",
"107469\n",
"107470\n",
"107471\n",
"107472\n",
"107473\n",
"107474\n",
"107475\n",
"107476\n",
"107477\n",
"107478\n",
"107479\n",
"107480\n",
"107481\n",
"107482\n",
"107483\n",
"107484\n",
"107485\n",
"107486\n",
"107487\n",
"107488\n",
"107489\n",
"107490\n",
"107491\n",
"107492\n",
"107493\n",
"107494\n",
"107495\n",
"107496\n",
"107497\n",
"107498\n",
"107499\n",
"107500\n",
"107501\n",
"107502\n",
"107503\n",
"107504\n",
"107505\n",
"107506\n",
"107507\n",
"107508\n",
"107509\n",
"107510\n",
"107511\n",
"107512\n",
"107513\n",
"107514\n",
"107515\n",
"107516\n",
"107517\n",
"107518\n",
"107519\n",
"107520\n",
"107521\n",
"107522\n",
"107523\n",
"107524\n",
"107525\n",
"107526\n",
"107527\n",
"107528\n",
"107529\n",
"107530\n",
"107531\n",
"107532\n",
"107533\n",
"107534\n",
"107535\n",
"107536\n",
"107537\n",
"107538\n",
"107539\n",
"107540\n",
"107541\n",
"107542\n",
"107543\n",
"107544\n",
"107545\n",
"107546\n",
"107547\n",
"107548\n",
"107549\n",
"107550\n",
"107551\n",
"107552\n",
"107553\n",
"107554\n",
"107555\n",
"107556\n",
"107557\n",
"107558\n",
"107559\n",
"107560\n",
"107561\n",
"107562\n",
"107563\n",
"107564\n",
"107565\n",
"107566\n",
"107567\n",
"107568\n",
"107569\n",
"107570\n",
"107571\n",
"107572\n",
"107573\n",
"107574\n",
"107575\n",
"107576\n",
"107577\n",
"107578\n",
"107579\n",
"107580\n",
"107581\n",
"107582\n",
"107583\n",
"107584\n",
"107585\n",
"107586\n",
"107587\n",
"107588\n",
"107589\n",
"107590\n",
"107591\n",
"107592\n",
"107593\n",
"107594\n",
"107595\n",
"107596\n",
"107597\n",
"107598\n",
"107599\n",
"107600\n",
"107601\n",
"107602\n",
"107603\n",
"107604\n",
"107605\n",
"107606\n",
"107607\n",
"107608\n",
"107609\n",
"107610\n",
"107611\n",
"107612\n",
"107613\n",
"107614\n",
"107615\n",
"107616\n",
"107617\n",
"107618\n",
"107619\n",
"107620\n",
"107621\n",
"107622\n",
"107623\n",
"107624\n",
"107625\n",
"107626\n",
"107627\n",
"107628\n",
"107629\n",
"107630\n",
"107631\n",
"107632\n",
"107633\n",
"107634\n",
"107635\n",
"107636\n",
"107637\n",
"107638\n",
"107639\n",
"107640\n",
"107641\n",
"107642\n",
"107643\n",
"107644\n",
"107645\n",
"107646\n",
"107647\n",
"107648\n",
"107649\n",
"107650\n",
"107651\n",
"107652\n",
"107653\n",
"107654\n",
"107655\n",
"107656\n",
"107657\n",
"107658\n",
"107659\n",
"107660\n",
"107661\n",
"107662\n",
"107663\n",
"107664\n",
"107665\n",
"107666\n",
"107667\n",
"107668\n",
"107669\n",
"107670\n",
"107671\n",
"107672\n",
"107673\n",
"107674\n",
"107675\n",
"107676\n",
"107677\n",
"107678\n",
"107679\n",
"107680\n",
"107681\n",
"107682\n",
"107683\n",
"107684\n",
"107685\n",
"107686\n",
"107687\n",
"107688\n",
"107689\n",
"107690\n",
"107691\n",
"107692\n",
"107693\n",
"107694\n",
"107695\n",
"107696\n",
"107697\n",
"107698\n",
"107699\n",
"107700\n",
"107701\n",
"107702\n",
"107703\n",
"107704\n",
"107705\n",
"107706\n",
"107707\n",
"107708\n",
"107709\n",
"107710\n",
"107711\n",
"107712\n",
"107713\n",
"107714\n",
"107715\n",
"107716\n",
"107717\n",
"107718\n",
"107719\n",
"107720\n",
"107721\n",
"107722\n",
"107723\n",
"107724\n",
"107725\n",
"107726\n",
"107727\n",
"107728\n",
"107729\n",
"107730\n",
"107731\n",
"107732\n",
"107733\n",
"107734\n",
"107735\n",
"107736\n",
"107737\n",
"107738\n",
"107739\n",
"107740\n",
"107741\n",
"107742\n",
"107743\n",
"107744\n",
"107745\n",
"107746\n",
"107747\n",
"107748\n",
"107749\n",
"107750\n",
"107751\n",
"107752\n",
"107753\n",
"107754\n",
"107755\n",
"107756\n",
"107757\n",
"107758\n",
"107759\n",
"107760\n",
"107761\n",
"107762\n",
"107763\n",
"107764\n",
"107765\n",
"107766\n",
"107767\n",
"107768\n",
"107769\n",
"107770\n",
"107771\n",
"107772\n",
"107773\n",
"107774\n",
"107775\n",
"107776\n",
"107777\n",
"107778\n",
"107779\n",
"107780\n",
"107781\n",
"107782\n",
"107783\n",
"107784\n",
"107785\n",
"107786\n",
"107787\n",
"107788\n",
"107789\n",
"107790\n",
"107791\n",
"107792\n",
"107793\n",
"107794\n",
"107795\n",
"107796\n",
"107797\n",
"107798\n",
"107799\n",
"107800\n",
"107801\n",
"107802\n",
"107803\n",
"107804\n",
"107805\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "5Z6ZKG2Xwymb"
},
"source": [
"# we use a fixed seed for reproducibility\n",
"random.seed(3)\n",
"\n",
"# Datapoint count \n",
"count_total = len(unique_id)\n",
"\n",
"# Theta dimensionlity\n",
"theta_dim = len(u_symptom_list) + 1 # with intercept\n",
"\n",
"\"\"\"\n",
"Test:Train Split\n",
"train data: 80000\n",
"test data: 27882\n",
"\"\"\"\n",
"\n",
"train_count = 80000\n",
"test_count = 27805\n",
"\n",
"ind_rand = np.array(range(train_count + test_count))\n",
"random.shuffle(ind_rand)\n",
"\n",
"# set number of hospitalized date as np array\n",
"hosp_day_count = np.array(hospitalized_status)\n",
"# split to train and test\n",
"train_hosp_day_count = hosp_day_count[ind_rand[0:train_count]]\n",
"test_hosp_day_count = hosp_day_count[ind_rand[train_count: train_count+test_count]]\n",
"\n",
"# split symptom list to train and test\n",
"train_symptoms = []\n",
"test_symptoms = []\n",
"for i in range(train_count):\n",
" train_symptoms.append(symptom_status[ind_rand[i]])\n",
"for i in range(test_count):\n",
" test_symptoms.append(symptom_status[ind_rand[i+train_count]])"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "jT1mB5b5xd0u"
},
"source": [
"## **Sparse PCA**"
]
},
{
"cell_type": "code",
"metadata": {
"id": "3i6dpU-UwygX",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "0074d76e-5c8e-47ba-9731-079652affe4c"
},
"source": [
"# Hosp_days\n",
"num_host_day = np.unique(train_hosp_day_count)\n",
"class_host = np.zeros(train_hosp_day_count.shape)\n",
"class_host[train_hosp_day_count > 0] = 1\n",
"a = np.argwhere(class_host).reshape(-1)\n",
"print(a.shape) #why dis trailing commaaa"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"(3952,)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Xm9KYZU5wydk"
},
"source": [
"# generate x_symptom (train_count, num_feature)\n",
"x_symptom = np.zeros([a.shape[0], num_symptom])\n",
"jj = 0\n",
"for i in a:\n",
" t_symptom = np.zeros([num_symptom, ])\n",
" # print(train_symptoms[i])\n",
" t_symptom[train_symptoms[i].astype('int')] = 1\n",
" x_symptom[jj, :] = t_symptom\n",
" jj += 1"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "G5pgAWVUwyar"
},
"source": [
"main_symptoms_count = 5\n",
"spca = SparsePCA(n_components=main_symptoms_count)\n",
"z_symptom = spca.fit_transform(x_symptom)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "TbczWXa4wyXW"
},
"source": [
"# get all weights\n",
"E_mat = np.diag(np.ones([num_symptom, ]))\n",
"Weights_symptom = spca.transform(E_mat)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "a1_WR6J6wyUf",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "6ea74b34-fa83-4ad6-9bf8-3ac14656bbc3"
},
"source": [
"#@title Find Top _K_-Symptoms\n",
"k_symptoms = 5#@param {type:\"integer\"}\n",
"determinant_symptoms = []\n",
"for i in range(main_symptoms_count):\n",
" ind_k = np.argpartition(-np.abs(Weights_symptom[:, i]), k_symptoms)\n",
" ind_k = ind_k[:k_symptoms]\n",
" for j in range(k_symptoms):\n",
" for symptom, ind_symptom in symptom_dict.items():\n",
" if ind_symptom == ind_k[j]:\n",
" # print(symptom)\n",
" determinant_symptoms.append(symptom)\n",
"\n",
"# Unique\n",
"determinant_symptoms = list(set(determinant_symptoms))\n",
"print(determinant_symptoms)\n",
"print(len(determinant_symptoms))\n"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"['Chills', 'Death', 'Blood test', 'Vomiting', 'Pain', 'Blood sodium normal', 'Nausea', 'Platelet count normal', 'Blood potassium normal', 'Haemoglobin decreased', 'Cerebrovascular accident', 'Blood glucose normal', 'Pyrexia', 'Computerised tomogram', 'Magnetic resonance imaging', 'Dyspnoea']\n",
"16\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "lODITRVjy5pg"
},
"source": [
"pca using for linear regression predict hospital duration"
]
},
{
"cell_type": "code",
"metadata": {
"id": "BXGfRj2dwyOH",
"cellView": "form"
},
"source": [
"#@title A linear sparse train function\n",
"\n",
"def lin_sparse_train(symptom, hostday, num_dim, batch_size = 40, reg_lambda = 0.01, learning_rate = 0.00001, max_iter = 1000):\n",
" \"\"\"\n",
" symptom: array of PCA symptom\n",
" hostday: array of number of hospitalized day\n",
" num_dim: theta dimension\n",
" batch_size: size of the batch\n",
" n_train: number of training sample\n",
" reg_lambda: an l1 norm regularization for sparse constraint\n",
" \"\"\"\n",
"\n",
" # Get a normalized random array of num_dim size\n",
" theta = np.random.normal(size=[num_dim, ])\n",
"\n",
" # Batch count\n",
" n_batch = int(n_train / batch_size)\n",
"\n",
" i = 0\n",
" # iteration\n",
" while True:\n",
" i += 1\n",
" print(i)\n",
" # randomly get index to set batch \n",
" ind_train = list(range(n_train))\n",
" random.shuffle(ind_train)\n",
" # iterate\n",
" old_theta = theta\n",
" for j in range(n_batch):\n",
" t_theta = np.zeros([num_dim, ])\n",
" train_x = np.zeros([batch_size, num_dim])\n",
" train_x[:, 0:num_dim] = symptom[ind_train[j*batch_size:(j+1)*batch_size], :]\n",
" train_x[:, num_dim] = 1\n",
" train_y = hostday[ind_train[j*batch_size:(j+1)*batch_size]]\n",
" grad = (theta.T.dot(train_x) - train_y) * theta + reg_lambda * np.sign(theta)\n",
" theta = theta - learning_rate * (grad/batch_size)\n",
" print(theta)\n",
" if np.linalg.norm(old_theta - theta) < 1e-6:\n",
" print(np.linalg.norm(old_theta - theta))\n",
" return theta\n",
" if i > max_iter:\n",
" return theta"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "e5QyB8gYzaOu"
},
"source": [
"\n",
"\n",
"---\n",
"\n",
"\n",
"handle imbalanced data with undersampling"
]
},
{
"cell_type": "code",
"metadata": {
"id": "jYRfjLuwwyKz"
},
"source": [
"from sklearn.linear_model import LogisticRegression"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "fKuyThctwyH3"
},
"source": [
"# Generate index hosp / nohosp\n",
"# Index Hosp\n",
"ind_1 = np.argwhere(train_hosp_day_count).reshape(-1)\n",
"# Index Nohosp\n",
"ind_0 = np.argwhere(train_hosp_day_count == 0).reshape(-1)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "zcOr-rHtwyE4"
},
"source": [
"# linear regression\n",
"lr = LogisticRegression(random_state=0, warm_start=True)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "DtCd3kWFwyCD"
},
"source": [
"# Undersampling of ind_0 [nohosp]\n",
"random.seed(4)\n",
"num_hospitalized = ind_1.shape[0]\n",
"temp_ind_0 = ind_0\n",
"np.random.shuffle(temp_ind_0)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "GQ7LeIJQwx-P"
},
"source": [
"# Sample set count\n",
"n_set = int(temp_ind_0.shape[0]/num_hospitalized)\n",
"\n",
"for i in range(n_set):\n",
" train_ind = np.zeros([2*num_hospitalized, ])\n",
" train_ind[0:num_hospitalized] = ind_1\n",
" train_ind[num_hospitalized:2*num_hospitalized] = temp_ind_0[i*num_hospitalized:(i+1)*num_hospitalized]\n",
" np.random.shuffle(train_ind)\n",
" x_symptom = np.zeros([2*num_hospitalized, num_symptom])\n",
" for j in range(2*num_hospitalized):\n",
" t_symptom = np.zeros([num_symptom, ])\n",
" t_symptom[train_symptoms[train_ind[i].astype('int')].astype('int')] = 1\n",
" x_symptom[i, :] = t_symptom\n",
" \n",
" # PCA\n",
" z_symptom = spca.transform(x_symptom)\n",
" \n",
" class_host = np.zeros([2*num_hospitalized, ])\n",
" class_host = train_hosp_day_count[train_ind.astype('int')]\n",
" class_host[class_host > 0] = 1\n",
"\n",
" # Fit to LogisticRegression inst\n",
" lr.fit(z_symptom, class_host)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "onUdI4tywx7U",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "d572236b-4e57-4da9-ed0f-3277e428b378"
},
"source": [
"# Get weight\n",
"weight_host = lr.coef_[0, :].reshape(1, -1)\n",
"weight_host = np.sum(weight_host * Weights_symptom, axis = 1)\n",
"\n",
"# Find top k-symptoms\n",
"k_symptoms = 20\n",
"determinant_symptoms = []\n",
"\n",
"ind_k = np.argpartition(-np.abs(weight_host), k_symptoms)\n",
"ind_k = ind_k[:k_symptoms]\n",
"for j in range(k_symptoms):\n",
" for symptom, ind_symptom in symptom_dict.items():\n",
" if ind_symptom == ind_k[j]:\n",
" determinant_symptoms.append(symptom)\n",
"\n",
"print(determinant_symptoms)\n",
"print(len(determinant_symptoms))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"['Haemodialysis', 'Activated partial thromboplastin time prolonged', 'Echocardiogram', 'Hypoaesthesia', 'Urine analysis', 'Blood test', 'Nausea', 'Computerised tomogram', 'Ultrasound scan', 'Electrocardiogram', 'Laboratory test', 'Cerebrovascular accident', 'Chest X-ray', 'X-ray', 'Vomiting', 'Scan with contrast', 'Magnetic resonance imaging', 'Unevaluable event', 'Cholecystectomy', 'Hypercalcaemia']\n",
"20\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UokhNLwx0GnI"
},
"source": [
"prediction of test\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ph-NqlL0wx3y"
},
"source": [
"X_test_symptom = np.zeros([test_count, num_symptom])\n",
"for i in range(test_count):\n",
" t_symptom = np.zeros([num_symptom, ])\n",
" t_symptom[test_symptoms[i].astype('int')] = 1\n",
" X_test_symptom[i, :] = t_symptom\n",
"\n",
"# Transform to SPCA\n",
"z_test_symptom = spca.transform(X_test_symptom)\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "C1EArjpr0WaD"
},
"source": [
"class_pred = lr.predict(z_test_symptom)\n",
"\n",
"class_test = np.zeros([test_count, ])\n",
"class_test[test_hosp_day_count > 0] = 1"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "6McjXRQI0WWt",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "856ccce3-4fda-4534-e322-f77510820888"
},
"source": [
"# Compute recall (tpositive / (tpositive + fpositive))\n",
"num_true_pos = np.sum(np.logical_and((class_pred==1), (class_test==1)))\n",
"num_false_pos = np.sum(np.logical_and((class_pred==1),(class_test==0)))\n",
"num_false_neg = np.sum(np.logical_and((class_pred==0),(class_test==1)))\n",
"num_true_neg = np.sum(np.logical_and((class_pred==0),(class_test==0)))\n",
"\n",
"print('Hospitalization Precision: ', num_true_pos/(num_true_pos + num_false_pos))\n",
"print('Hospitalization Recall: ', num_true_pos/(num_true_pos + num_false_neg))\n",
"# print(num_true_pos)\n",
"# print(num_false_pos)\n",
"# print(num_false_neg)\n",
"# print(num_true_neg)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Hospitalization Precision: 0.17303683113273105\n",
"Hospitalization Recall: 0.5570469798657718\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "_E9VSXhS0WTW",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "19022ac1-7003-4018-b4d5-db744bd57213"
},
"source": [
"print('Hospitalization Sensitivity: ', num_true_pos/(num_true_pos + num_false_neg))\n",
"print('Hospitalization Specificity: ', num_true_neg/(num_true_neg + num_false_pos))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Hospitalization Sensitivity: 0.04832995951417004\n",
"Hospitalization Specificity: 0.9980144119503471\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "fiVEX05c0uuJ"
},
"source": [
"For imbalanced data, we use balanced class_weight built into LogisticRegression"
]
},
{
"cell_type": "code",
"metadata": {
"id": "aJ0F1vBG0WRk"
},
"source": [
"from sklearn.linear_model import LogisticRegression"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "vQUccjDWoMva"
},
"source": [
"## Linear Regression"
]
},
{
"cell_type": "code",
"metadata": {
"id": "r8Dno7Ut0WM1"
},
"source": [
"lr = LogisticRegression(random_state=0,class_weight='balanced', max_iter=1000, penalty='l2', warm_start=True)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "gyfuyJYm0WKU"
},
"source": [
"x_symptom = np.zeros([train_count, num_symptom])\n",
"for i in range(train_count):\n",
" t_symptom = np.zeros([num_symptom, ])\n",
" t_symptom[train_symptoms[i].astype('int')] = 1\n",
" x_symptom[i, :] = t_symptom\n",
"\n",
"z_symptom = spca.transform(x_symptom)\n",
" \n",
"class_host = np.zeros([train_count, ])\n",
"class_host[train_hosp_day_count> 0] = 1"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "l20ipu5r0WGs",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "11bcea28-960b-4829-8a97-26abff10278d"
},
"source": [
"# fit\n",
"lr.fit(z_symptom, class_host)"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"LogisticRegression(C=1.0, class_weight='balanced', dual=False,\n",
" fit_intercept=True, intercept_scaling=1, l1_ratio=None,\n",
" max_iter=1000, multi_class='auto', n_jobs=None, penalty='l2',\n",
" random_state=0, solver='lbfgs', tol=0.0001, verbose=0,\n",
" warm_start=True)"
]
},
"metadata": {
"tags": []
},
"execution_count": 30
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "tmbBFxfX0WEb",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "65dfd923-5626-4074-e11f-832b5b80cc14"
},
"source": [
"# get weight\n",
"weight_host = lr.coef_[0, :].reshape(1, -1)\n",
"weight_host = np.sum(weight_host * Weights_symptom, axis = 1)\n",
"\n",
"# find top k-symptoms\n",
"k_symptoms = 20\n",
"determinant_symptoms = []\n",
"\n",
"ind_k = np.argpartition(-np.abs(weight_host), k_symptoms)\n",
"ind_k = ind_k[:k_symptoms]\n",
"for j in range(k_symptoms):\n",
" for symptom, ind_symptom in symptom_dict.items():\n",
" if ind_symptom == ind_k[j]:\n",
" determinant_symptoms.append(symptom)\n",
"\n",
"print(determinant_symptoms)\n",
"print(len(determinant_symptoms))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"['Chills', 'Nausea', 'Pain', 'Pyrexia', 'Vomiting', 'Fatigue', 'Asthenia', 'Diarrhoea', 'Pain in extremity', 'Hyperhidrosis', 'Dizziness', 'Death', 'Abdominal pain', 'Decreased appetite', 'Myalgia', 'Dehydration', 'Malaise', 'Tachycardia', 'Arthralgia', 'Loss of consciousness']\n",
"20\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "VEgUb_SY10g_"
},
"source": [
"compute optimal threshold"
]
},
{
"cell_type": "code",
"metadata": {
"id": "SZP5rUfc0WAH"
},
"source": [
"train_pred = lr.predict(z_symptom)\n",
"prob_pred_train = lr.predict_proba(z_symptom)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "pXqS4NR50V9p",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 313
},
"outputId": "e36162ab-9c7d-4aec-a509-b31f0078dd1c"
},
"source": [
"#@title Compute and plot ROC curve\n",
"cut_off_thresh = np.array(range(0, 100))\n",
"cut_off_thresh.astype('float')\n",
"cut_off_thresh = cut_off_thresh / 100.0\n",
"\n",
"val_sensitivity = np.zeros([100, ])\n",
"val_specifity = np.zeros([100, ])\n",
"\n",
"for i in range(100):\n",
" temp_thresh = cut_off_thresh[i]\n",
" temp_pred_class = np.zeros([train_count, ])\n",
" temp_pred_class[prob_pred_train[:, 1] > cut_off_thresh[i]] = 1\n",
"\n",
" # Compute recall (true positive / (true positive + false positive))\n",
" num_true_pos = np.sum(np.logical_and((temp_pred_class==1), (class_host==1)))\n",
" num_false_pos = np.sum(np.logical_and((temp_pred_class==1),(class_host==0)))\n",
" num_false_neg = np.sum(np.logical_and((temp_pred_class==0),(class_host==1)))\n",
" num_true_neg = np.sum(np.logical_and((temp_pred_class==0),(class_host==0)))\n",
"\n",
" val_sensitivity[i] = num_true_pos/(num_true_pos + num_false_neg)\n",
" val_specifity[i] = num_true_neg/(num_true_neg + num_false_pos)\n",
"\n",
"#@markdown Plot ROC curve\n",
"print('-- ROC Curve --')\n",
"plt.plot(1-val_specifity, val_sensitivity)\n",
"plt.xlabel('1 - specificity')\n",
"plt.ylabel('sensitivity')"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"-- ROC Curve --\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Text(0, 0.5, 'sensitivity')"
]
},
"metadata": {
"tags": []
},
"execution_count": 121
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXxVd53/8deHkAAhCYGEJRBC2LeyNtBdra1dbdEuCtpO66h1dOo4OuP8OqM/deoszrg8tFqX2qod29JWWy1Wukw3q7UFQiktUJZAISskQPaQ9X5+f9xLfwEDuZTcnNyc9/Px4JF7zzk353OA5P095/s932PujoiIhNeQoAsQEZFgKQhEREJOQSAiEnIKAhGRkFMQiIiE3NCgCzhVubm5XlhYGHQZIiJJZePGjQfdfWxP65IuCAoLCykuLg66DBGRpGJm+060TpeGRERCTkEgIhJyCgIRkZBTEIiIhJyCQEQk5BIWBGb2MzOrNrMtJ1hvZnaHmZWY2etmtjRRtYiIyIkl8ozgF8BlJ1l/OTAz9ucW4EcJrEVERE4gYfcRuPuLZlZ4kk1WAP/j0XmwXzGzbDPLc/eqRNUkIpIM2jq7qG5oY3dNEwcaWqlt6aClrZOL5o5n0eTsPt9fkDeUTQLKur0vjy37iyAws1uInjVQUFDQL8WJiPSlzq4INU1tHGhoY9+hZl7dV0tVfSutnRFaO7po64zQ2t5FTVMbh5vbe/we47KGD7ogiJu73wXcBVBUVKQn6YjIgOHuVNQdoaS6ierGNg42tXGoqf2Yrweb2jnU3Eb354Clp6VQMCadYakpDB86hOwRqQzLHMaZhaOZkDWc8VnDKMwZyaTRI8hOT2NkWgpmlpBjCDIIKoDJ3d7nx5aJiAwYnV0R9je0UlF7hIq6I1TWHaG6MfpLvrzuCCUHGmlu7zrmM+lpKeRmDCMnI43JY9JZUpDN2MzoL/fxmcPJyx7OrPGZpKYMjIGbQQbBGuBWM3sQOAuoV/+AiASpoyvCm1UNbNxXy8Z9tWwur6OyrpWuyLEXIkaNSCUnI428UcO5vmgyM8dnMHNcJnmjhpOTkUZ6WlJcbHlbwqo1s9XAe4BcMysHvgqkArj7j4G1wBVACdACfCxRtYiI9KSlvZMd+xv58+5D/HFXDa+V1dHaEQFg4qjhLCkYzQcWj2Ri9ggmZY9g0ujo1+GpKQFX3rcSOWpoVS/rHfjbRO1fRKS72uZ2Xi2t5Y2KerZXNbJ9fwP7Dre8fd1+bl4Wq5YXcOaU0SwtGM3E7BHBFtyPkuv8RUQkDu7OnoPNbNxbS/G+w2zcV8vummYAzGBqzkjm5mXxwSX5zJ6QydIp2YzLHB5w1cFREIhIUjvc3M72qgZ2HGhkd00TJdVN7DzQ9PYQzFEjUjlzymiuWZpP0ZTRLMzPZkTa4Lq0c7oUBCKSdLZU1PPA+lKeffMABxra3l6eNXwoM8ZlcPHccSwpGE3RlNFMH5vBkCGJGXY5WCgIRCQpNLV1sua1SlavL+WNinqGpw7h4rnjWZSfzdy8LGZPyCQ3Iy1hY+0HMwWBiAxY1Y2tvLLnMH/cWcPaN6pobu9izoRMbl8xnxWLJzFqRGrQJQ4KCgIRGRBaO7ooqW5i+/5GXiur5eXdh97u4M0cNpQrFuTxkbMKWDw5W63+PqYgEJHAvFnVwCMby3l+RzVvHWzm6H1bGcOGsqxwNB8qmsw503OYP3EUKbrOnzAKAhHpV9UNrazZXMmjr1awraqB1BTjvBm5XLkgj9kTspiTl0lhzkj94u9HCgIRSbj2zgi/f6OSRzZW8OfdB4k4LMwfxb9ePZ+rFk1kzMi0oEsMNQWBiCSEu7P3UAtPbKni3j/v5UBDGwVj0rn1whmsWDKJ6WMzgi5RYhQEItJnqhtb+XPJIV4qOchLJQeprG8F4PwZuXzzukVcMDNXHb0DkIJARN4xd2dbVQO/21zFc9sPsPNAEwDZ6amcMy2Hz1yYy/kzcinMHRlwpXIyCgIROSXuzs4DTTy5ZT9rNlewu6aZoUOMs6flcM3SfM6bnsu8iVnq7E0iCgIR6VVl3ZG3L/e8tPsQNY1tmMHywjH89flTufyMPHX4JjEFgYj0yN15fkc13356J1srGwDIzUjj3Om5nDcjh3fPGseEUeGdsXMwURCIyDFa2jv5066D3POnt1j31mEKc9L58pVzOX9mLrPHZ6qzdxBSEIgI5bUtPL+9mme3V/Pn3Ydo74yQm5HG7Svms2p5wYB5tq4khoJAJIS6Is5rZXU8++YBnttezfb9jQAU5qRz49lTuGjOOJZNHaMACAkFgUhItHV28fz2ap7edoAXdtRwuLmdlCHGssLRfOmKuVw0dxzTdJNXKCkIRAYxd2dzeT2PbCxnzeZK6o90kJ2eyoWzx/HeOeN416yxmspZFAQig1FHV4R7/7yX1etL2V3TzLChQ7h0/gSuOzOfc6fnMFSXfKQbBYHIILO7ponPP/Qar5fXc+aU0fznNdO4cmEeWcPV8peeKQhEBgl35/51pfzb77cxPDWFH350KVcsyAu6LEkCCgKRJBeJOE9u3c/3nyvhzaoGLpiZy7euX8T4LN3sJfFREIgkqa6I8/jrldz5fAk7DzQxLXck375+ER9cMokhmudHToGCQCTJdHZFWLO5kh88X8KemmZmjsvgeysX8/6FEzXRm7wjCgKRJNHRFeE3r1Zw5wsl7DvUwpwJmfzwo0u5bP4EnQHIaVEQiAxwnV0RfrOpgu8/V0Lp4RbOmJTFXTeeycVzxysApE8oCEQGqM6uCI+9Vsn3n9vF3kPRALj7r4q4aO44TfwmfUpBIDLAdEWc322u5I5nd7HnYDPz8qJnAO+bN14BIAmhIBAZII6OArrj2V3srmlmzoRMfnzDmVwyT5eAJLEUBCIBi0SctVuq+N4zu9hV3cSs8RnqBJZ+pSAQCcjRG8G+98wudhxoZOa4DH7wkSVccUaeAkD6VUKDwMwuA74HpAB3u/s3jltfANwLZMe2uc3d1yayJpGgRSLO09v2891ndrF9fyPTx47kjlVLuHJBnu4DkEAkLAjMLAW4E3gfUA5sMLM17r6t22ZfBh529x+Z2TxgLVCYqJpEgvbHXTX859rtbKtqYFruSL774cVctUg3gkmwEnlGsBwocfc9AGb2ILAC6B4EDmTFXo8CKhNYj0hgOrsifPt/d/KjF3YzJSed73xoEVcvmqjpoGVASGQQTALKur0vB846bpuvAU+b2WeBkcDFPX0jM7sFuAWgoKCgzwsVSaQDDa189oFNrN97mFXLC/jqVfMYnpoSdFkibwu6s3gV8At3/7aZnQP80szOcPdI943c/S7gLoCioiIPoE6RU1bf0sEvX9nLPX96i7bOCN/98GI+sGRS0GWJ/IVEBkEFMLnb+/zYsu4+DlwG4O4vm9lwIBeoTmBdIglVWXeEe/70FqvXl9LS3sW7Z43l/75/LjPGZQZdmkiPEhkEG4CZZjaVaACsBD5y3DalwEXAL8xsLjAcqElgTSIJs2N/Iz95cTdrXqvEgasW5vGpd09nbl5Wr58VCVLCgsDdO83sVuApokNDf+buW83sdqDY3dcA/wD81Mw+T7Tj+GZ316UfSRqtHV08uWU/D6wvZf1bhxmRmsINZ0/hExdMJX90etDlicTFku33blFRkRcXFwddhoTcrgONrF5fxiOvllN/pIMpOemsXFbAymWTGT0yLejyRP6CmW1096Ke1gXdWSySNFo7ulj7RhWr15eyYW8tqSnGJfMn8JHlBZwzLUd3A0vSUhCI9GLngUYeWFfKo6+W09DaSWFOOv98+RyuPTOf3IxhQZcnctoUBCI9ONLexe9jrf+N+2pJSxnCpWdMYNXyyZwzLUfTQcugoiAQ6Wb7/gZWryvl0U0VNLZ2Mi13JF+6Yi7XLJ1Ejlr/MkgpCCT0Wto7efz1aOt/U2kdaSlDuHzBBFYtL+CsqWPU+pdBT0EgobWtsoHV60v57aYKGts6mT52JF++ci7XLs3XyB8JFQWBhEpzWyePv17JA+vL2FxWR9rQIVy5II9VywtYVjharX8JJQWBhMLWyvpY67+SprZOZozL4Cvvn8c1SyeRna7Wv4SbgkAGrea2Tn63uZLV60vZXF7PsKFDuHJhHh9ZXsCZU9T6FzlKQSCDzpaKeh5YX8pjmypobu9i1vgMvnbVPD64JJ9R6alBlycy4CgIZFBoautkzWvR1v8bFfUMTx3C+xdOZNXyApYWZKv1L3ISCgJJWu7OGxXRa/+PvVZJS3sXcyZkcvuK+axYPIlRI9T6F4mHgkCSTmNrB4/FWv9bKxsYkZrCVYuiI38WT1brX+RUKQgkKbg7m8vrWb2ulDWbKznSEW39f33FfFYsmUTWcLX+Rd4pBYEMaA2tHTy2qYIH1pfxZlW09X/1oomsOquARfmj1PoX6QMKAhlw3J1NZXWsXlfK469XcaSji3l5WfzbB85gxeKJZKr1L9KnFAQyYNQf6eC3mypYvb6U7fsbSU9L4QNLoiN/FkxS618kURQEEih359XSWh5YV8bv36iktSPCgkmj+I8PLuDqxRPJGKb/oiKJpp8yCUR9SwePbipn9fpSdh5oYmRaCtcszWfVsgIW5I8KujyRUFEQSL9xd4r31bJ6XSm/f6OKts4Ii/JH8Y1rFnDVoomMVOtfJBD6yZOEq2tp59FXo9f+d1U3kTFsKNcX5bNyWQFnTFLrXyRoCgJJCHdnw95aVq+Ptv7bOyMsmpzNf1+7kCsX5qn1LzKA6KdR+lRtczuPvBq99r+7ppnMYUNZuWwyK5cVMG9iVtDliUgPFARy2tyddW8dZvX6Up54Yz/tXRGWFGTzzeuirf/0NP03ExnI9BMq79jh5nYe2Rht/e852Ezm8KGsWj6ZlcsLmJun1r9IslAQyClxd17ec4jV68t4aku09X/mlNF868IZXLkgjxFpKUGXKCKnSEEgcTnY1MYjG8t5cEMZbx1sJmv4UD5yVgGrlhcwe0Jm0OWJyGlQEMgJRSLR1v8D60t5eut+OrqcZYWj+ex7Z3DFgjyGp6r1LzIYxBUEZvYocA/whLtHEluSBK2msY1fbyznwQ2l7DvUwqgRqdx4diGrlk9m5ni1/kUGm3jPCH4IfAy4w8x+Bfzc3Xckrizpb5GI89Lug6xeX8rTWw/QGXGWTx3D5y+exWVnTFDrX2QQiysI3P0Z4BkzGwWsir0uA34K3OfuHQmsURKourGVXxVHW/9lh4+QnZ7KzecWsnJ5ATPGZQRdnoj0g7j7CMwsB7gBuBHYBNwPnA/cBLwnEcVJYkQizh9LDrJ6XSnPvBlt/Z89bQz/eMlsLp2v1r9I2MTbR/AbYDbwS+Aqd6+KrXrIzIpP8rnLgO8BKcDd7v6NHrb5EPA1wIHN7v6RUzoCiVt1QysPF5fx4IYyymuPMDo9lY+dF239Tx+r1r9IWMV7RvBTd1/bfYGZDXP3Nncv6ukDZpYC3Am8DygHNpjZGnff1m2bmcA/A+e5e62ZjXtHRyEn1BVx/rirhtXrS3nmzWq6Is6503P4p8vmcOn88Qwbqta/SNjFGwT/Bqw9btnLwNKTfGY5UOLuewDM7EFgBbCt2zafBO5091oAd6+Osx7pxYGGVh7eEG39V9QdYczIND5x/lRWLi9gau7IoMsTkQHkpEFgZhOAScAIM1sCHH1WYBaQ3sv3ngSUdXtfDpx13DazYvt5iejlo6+5+5M91HELcAtAQUFBL7sNr66I8+LOGh5YX8pz26Ot//Nm5PDPV8zhffPU+heRnvV2RnApcDOQD3yn2/JG4F/6aP8ziXY25wMvmtkCd6/rvpG73wXcBVBUVOR9sN9Bpar+CA9vKOehDaVU1reSm5HGJy+YxsplkylU619EenHSIHD3e4F7zexad3/kFL93BTC52/v82LLuyoF1seGnb5nZTqLBsOEU9xU6XRHnhR3VrI61/iMOF8zM5cvvn8fFc8eTNnRI0CWKSJLo7dLQDe5+H1BoZl84fr27f6eHjx21AZhpZlOJBsBK4PgRQb8lel/Cz80sl+iloj2nUH/oVNYd4aENZTxcXEZVfSu5GcP4m3dPZ+WyAgpyertaJyLyl3q7NHT0usIpjy10904zuxV4iuj1/5+5+1Yzux0odvc1sXWXmNk2oAv4orsfOtV9DXadXRGe3xEd+fPCjmocuGDmWL561Twumjue1BS1/kXknTP33i+5m9lYd6/ph3p6VVRU5MXFJ7x1YVApr23h4Q1lPFRcxoGGNsZmDuPDRZP58LLJTB6j1r+IxM/MNp5ouH+8w0dfMrO9wEPAo0eHe0rf6+iK8Nz26LX/P+yMZu+7Z43l9hUFvHfOOLX+RaTPxTvX0CwzW070Ov+XYpdyHoz1H0gfKDvc8va1/+rGNsZnDeOzF87gQ8smkz9arX8RSZy45xpy9/XAejP7D6JDSe8FFASnyd2549kSvvvsTgAunD2OVcsLuHD2WIaq9S8i/SDeuYaygA8SPSOYDvyG6J3DchraOyP8y2/e4Ncby/nA4ol88bI5TMoeEXRZIhIy8Z4RbCY61PN2d385gfWERkNrB5++byMvlRzi7y+eyecumomZ9f5BEZE+Fm8QTPN4hhdJXCrqjvCxn69nT00z37p+EdedmR90SSISYr3dUPZdd/97YI2Z/UUQuPvVCatskNp1oJGP3r2OI+1d3PvXyzlvRm7QJYlIyPV2RvDL2NdvJbqQsPjPJ7bT0RXh158+l9kT9PxfEQneSYeluPvG2MvF7v6H7n+AxYkvb3DZd6iZ53dUc+PZUxQCIjJgxDs+8aYelt3ch3WEwn2v7CPFjI+ePSXoUkRE3tZbH8EqohPFTTWzNd1WZQKHE1nYYHOkvYuHNpRx6RkTGJ81POhyRETe1lsfwZ+BKiAX+Ha35Y3A64kqajB67LUKGlo7uemcwqBLERE5Rm/PI9gH7APO6Z9yBid3596X9zFnQibLCkcHXY6IyDFO2kdgZn+KfW00s4ZufxrNrKF/Skx+xftqebOqgZvOLdRNYyIy4PR2RnB+7KuGuLxD7s4Pnisha/hQViyeGHQ5IiJ/Ia5RQ2Y23cyGxV6/x8z+zsyyE1va4PDYa5X8YWcNn7t4Fulpcc/xJyLSb+IdPvoI0GVmM4g+RH4y8EDCqhokDjW18a+/28riydncfG5h0OWIiPQo3iCIuHsn0RlIv+/uXwTyElfW4PCvv9tGU1sn/33dQlKGqG9ARAameIOgI3ZPwU3A47FlqYkpaXB4bvsB1myu5G8vnMGs8epiEZGBK94g+BjRIaT/7u5vmdlU/v88RNKD7z2zi2ljR/KZ98wIuhQRkZOK91GV24C/6/b+LeC/ElVUsiuvbWFzeT23XT6HtKF6ypiIDGzxPqHsPOBrwJTYZwxwd5+WuNKS15Nb9gNw+RkTAq5ERKR38Y5nvAf4PLAR6EpcOcmvsyvCbzZVMC8viyk5I4MuR0SkV/EGQb27P5HQSgYBd+era7aytbKBb1+/KOhyRETiEm8QPG9m3wQeBdqOLnT3VxNSVZK675V93L+ulE+/ZzrX6vGTIpIk4g2Cs2Jfi7otc+C9fVtOcrt/XSlLCrL54iWzgy5FRCRu8Y4aujDRhSS7PTVNbN/fyG2Xz2GIbh4TkSQS71xD483sHjN7IvZ+npl9PLGlJZcfvbCbYUOHcO1SXRISkeQS7yD3XwBPAUenz9wJ/H0iCkpG9S0d/Pa1Cj68bDJjM4cFXY6IyCmJNwhy3f1hIAIQm3dIw0hjfvd6JR1dzjU6GxCRJBRvEDSbWQ7RDmLM7GygPmFVJZlfbSxnXl4Wi/JHBV2KiMgpi3fU0BeANcB0M3sJGAtcl7Cqkkh5bQuby+r4x0tm6eljIpKU4j0jmA5cDpxLtK9gF/GHyKD28u5DAFwyX9NJiEhyijcI/q+7NwCjgQuBHwI/6u1DZnaZme0wsxIzu+0k211rZm5mRSfaZqB6tbSWrOFDmTE2I+hSRETekXiD4GjH8JXAT93990DayT5gZinAnUTPJOYBq8xsXg/bZQKfA9bFW/RAsmFvLUsKRuveARFJWvEGQYWZ/QT4MLA29vzi3j67HChx9z3u3g48CKzoYbuvE53SujXOWgaM8toWSqqbuGBmbtCliIi8Y/EGwYeI9g1c6u51wBjgi718ZhJQ1u19eWzZ28xsKTA5doZxQmZ2i5kVm1lxTU1NnCUn3i9f2ccQg/fNGx90KSIi71i8U0y0EJ1w7uj7KqDqdHZsZkOA7wA3x7H/u4C7AIqKivx09tuXXtx5kHOn52q6aRFJaol8fFYFMLnb+/zYsqMygTOAF8xsL3A2sCZZOowPN7ez80AjSwuygy5FROS0JDIINgAzzWyqmaUBK4neiwCAu9e7e667F7p7IfAKcLW7Fyewpj7z5Jb9dEVcw0ZFJOklLAhi01DcSrRv4U3gYXffama3m9nVidpvf2jt6OLHf9jN3Lws5k/MCrocEZHTktCbwtx9LbD2uGVfOcG270lkLX3pv57cTunhFu7/xFm6m1hEkl4iLw0NSr8qLuPnL+3lpnOmcN4MDRsVkeSnIDgFL+8+xG2PvsEFM3O57fK5QZcjItInFARx6oo4tz36OlPGpPOjG85kRFpK0CWJiPQJBUGcnt66n32HWviny2aTMUzz7YnI4KEgiNPdf3qLKTnpvG+ehouKyOCiIIjD6+V1bNxXy03nFJKiyeVEZJBREMThly/vIz0theuK9ChKERl8FAS9ONLexRNb9vP+hXlkDU8NuhwRkT6nIOjFk1uraGrr5ANLJvW+sYhIElIQ9OL+V0qZmjuSs6fmBF2KiEhCKAhO4sktVRTvq+XGs6foCWQiMmgpCE6gobWDL/92K/MnZnHjOVOCLkdEJGF0Z9QJPL65ioNNbfzkxjNJTVFeisjgpd9wPWjvjHDfK/uYPnakHjwjIoOegqAHtz++lW1VDfzDJbM1zbSIDHoKguM8XFzGfa+U8ql3TeOKBXlBlyMiknAKgm7eKK/ny7/dwnkzcvjipbODLkdEpF8oCGION7fzN/dtZGzGMO5YuYSh6iAWkZDQqKGYO57dxYGGVh79zLnkZAwLuhwRkX6jZi/Rs4EHN5TywSWTWJivUUIiEi4KAqLPIW7tiPDJd00LuhQRkX6nICA6UmhZ4Whmjc8MuhQRkX4X+iDYX9/K7ppmLp2vJ4+JSDiFPgg27D0MwPKpYwKuREQkGKEPgo37ahmRmsK8vKygSxERCUTog2B3TRMzx2fovgERCa1Q//brijhbKurVSSwioRbqINhW2UBtSwfnz8gNuhQRkcCEOgjeqKgH4MwpowOuREQkOKEOgtLDLaSlDGFS9oigSxERCUyog2BPTRP5Y0boecQiEmqhDoKS6iZmq6NYREIuoUFgZpeZ2Q4zKzGz23pY/wUz22Zmr5vZs2bWb0+J7+yKUHq4ham5I/trlyIiA1LCgsDMUoA7gcuBecAqM5t33GabgCJ3Xwj8GvjvRNVzvL2HWuiMODPGZfTXLkVEBqREnhEsB0rcfY+7twMPAiu6b+Duz7t7S+ztK0B+Aus5Rkl1IwAzx+nSkIiEWyKDYBJQ1u19eWzZiXwceKKnFWZ2i5kVm1lxTU1NnxS360ATANPH6dKQiITbgOgsNrMbgCLgmz2td/e73L3I3YvGjh3bJ/vcWd3E5DEjSE/TQ9pEJNwS+VuwApjc7X1+bNkxzOxi4EvAu929LYH1HOPNqgZm6bKQiEhCzwg2ADPNbKqZpQErgTXdNzCzJcBPgKvdvTqBtRyj/kgHJdVNLJ6sx1KKiCQsCNy9E7gVeAp4E3jY3bea2e1mdnVss28CGcCvzOw1M1tzgm/Xp14rqwM0tYSICCT20hDuvhZYe9yyr3R7fXEi938ir5fVYQYLdUYgIjIwOov72/YDjRSMSSdjmDqKRURCGQS7DjQyUzeSiYgAIQyCroiz91AL08YqCEREIIRBsPdQM+2dEU0tISISE7ogOHpH8dwJeli9iAiEMAiq6o8AMDF7eMCViIgMDKELgvLaI4xITWHMyLSgSxERGRBCFwRlh1vIHz0CMz2VTEQEQhgEFXVHmDRazygWETkqdEFQWXeEfAWBiMjbQhUELe2d1LZ0MDFbQSAiclSogqCyLjZiaJSCQETkqFAFQUVdK4D6CEREuglVELx9RqBLQyIibwtdEAwxGJ85LOhSREQGjFAFQUXdEcZnDWdoSqgOW0TkpEL1G/FQUzvjdDYgInKMUAVBbUs72emaWkJEpLvQBcHo9NSgyxARGVBCFQR1zR2M1mRzIiLHCE0QtHV20djWyRhdGhIROUZogqCupQOAMRkKAhGR7kITBIeb2wF0RiAicpzwBYH6CEREjhGaIDikIBAR6VF4gqCpDVAQiIgcLzRBMCl7BO+bN143lImIHGdo0AX0l0vmT+CS+ROCLkNEZMAJzRmBiIj0TEEgIhJyCgIRkZBTEIiIhJyCQEQk5BQEIiIhpyAQEQk5BYGISMiZuwddwykxsxpg3zv8eC5wsA/LSQY65nDQMYfD6RzzFHcf29OKpAuC02Fmxe5eFHQd/UnHHA465nBI1DHr0pCISMgpCEREQi5sQXBX0AUEQMccDjrmcEjIMYeqj0BERP5S2M4IRETkOAoCEZGQG3RBYGaXmdkOMysxs9t6WD/MzB6KrV9nZoX9X2XfiuOYv2Bm28zsdTN71symBFFnX+rtmLttd62ZuZkl/TDDeI7ZzD4U+7feamYP9HeNfS2O/9sFZva8mW2K/f++Iog6+5KZ/czMqs1sywnWm5ndEfs7ed3Mlp72Tt190PwBUoDdwDQgDdgMzDtum88AP469Xgk8FHTd/XDMFwLpsdefDsMxx7bLBF4EXgGKgq67H/6dZwKbgNGx9+OCrrsfjvku4NOx1/OAvUHX3QfH/S5gKbDlBOuvAJ4ADDgbWHe6+xxsZwTLgRJ33+Pu7cCDwIrjtlkB3Bt7/WvgIjOzfqyxr/V6zO7+vLu3xN6+AuT3c419LZ5/Z4CvA/8FtPZncQkSzzF/ErjT3WsB3L26n2vsa/EcswNZsdejgMp+rC8h3P1F4PBJNlkB/I9HvRek610AAATxSURBVAJkm1ne6exzsAXBJKCs2/vy2LIet3H3TqAeyOmX6hIjnmPu7uNEWxPJrNdjjp0uT3b33/dnYQkUz7/zLGCWmb1kZq+Y2WX9Vl1ixHPMXwNuMLNyYC3w2f4pLVCn+jPfq9A8vF7AzG4AioB3B11LIpnZEOA7wM0Bl9LfhhK9PPQeomd9L5rZAnevC7SqxFoF/MLdv21m5wC/NLMz3D0SdGHJZLCdEVQAk7u9z48t63EbMxtK9HTyUL9UlxjxHDNmdjHwJeBqd2/rp9oSpbdjzgTOAF4ws71Er6OuSfIO43j+ncuBNe7e4e5vATuJBkOyiueYPw48DODuLwPDiU7MNpjF9TN/KgZbEGwAZprZVDNLI9oZvOa4bdYAN8VeXwc857EemCTV6zGb2RLgJ0RDINmvG0Mvx+zu9e6e6+6F7l5ItF/kancvDqbcPhHP/+3fEj0bwMxyiV4q2tOfRfaxeI65FLgIwMzmEg2Cmn6tsv+tAf4qNnrobKDe3atO5xsOqktD7t5pZrcCTxEdcfAzd99qZrcDxe6+BriH6OljCdEOmZXBVXz64jzmbwIZwK9i/eKl7n51YEWfpjiPeVCJ85ifAi4xs21AF/BFd0/as904j/kfgJ+a2eeJdhzfnOQNO8xsNdFAz431fXwVSAVw9x8T7Qu5AigBWoCPnfY+k/zvTERETtNguzQkIiKnSEEgIhJyCgIRkZBTEIiIhJyCQEQk5BQEMmj0NmtjkMzs9thNfZjZBbHZQV8zs0lm9utePnu3mc2Lvf6X/qhXwkXDR2XQMLN3AU1EJ+Q6I+h6TsTMfgz8yd3vewefbXL3jASUJSGmMwIZNOKYtfGkzOwb3Z7b8K3Ysl+Y2Y/NrNjMdprZ+2PLU8zsm2a2Ibb9p7p9n/9jZm+Y2WYz+0a373OdmX0C+BDwdTO738wKj57BxL7nt8xsS+x7fja2/AUzK4p9rxGxM4n7Y2cZf99tv/9uZp97p8cv4TWo7iwWeafMLAf4IDDH3d3MsrutLiQ6JfJ04HkzmwH8FdFb+5eZ2TDgJTN7GphDdJrgs9y9xczGdN+Pu99tZucDj7v7r+3YByPdEtvX4thdtcd/9jYzu9XdF8dqLgQeBb4bm2hvZaxOkVOiIBCJqif63IJ7zOxx4PFu6x6OzWa5y8z2EP1lfwmw0Myui20ziugEbxcDPz/6/Ad3P5UzlIuJPjSpM57PuvteMzsUm0tqPLApmaeUkOAoCCQ0zCwF2Bh7u8bdv3J0XawFvpzoBGbXAbcC7z26+rhv5USfDvVZd3/quH1cmojaT+JuotNtTwB+1s/7lkFCfQQSGu7e5e6LY3++0n2dmWUAo9x9LfB5YFG31deb2RAzm070sYk7iE6E9mkzS419fpaZjQT+F/iYmaXHlh9zeacX/wt8KjY9+ok+23F0nzG/AS4DlsVqEjllOiOQQaOnWRvd/Z44P54JPGZmw4m29r/QbV0psJ7oIxH/xt1bzexuotfzX7XolK41wAfc/UkzWwwUm1k70Zki4x3yeTfRqaNfN7MO4KfAD47b5q7Y+lfd/aPu3m5mzwN17t4V535EjqHhoyInYWa/INaxG3QtPYl1Er8KXO/uu4KuR5KTLg2JJKnYTWYlwLMKATkdOiMQEQk5nRGIiIScgkBEJOQUBCIiIacgEBEJOQWBiEjI/T8k60zwwuAOIgAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "NreD2D5Q0V3B",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e9411c95-8864-470a-e8e4-6fdb6e692678"
},
"source": [
"temp = val_sensitivity + val_specifity - 1\n",
"cut_off_thresh[np.argmax(temp)]"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.46"
]
},
"metadata": {
"tags": []
},
"execution_count": 35
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "xuB6Aqqo0VzG",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "dacd822c-b456-4034-af36-8c5b119454f3"
},
"source": [
"from sklearn.metrics import roc_auc_score\n",
"roc_auc_score(class_host, prob_pred_train[:, 1])"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.7625002389019593"
]
},
"metadata": {
"tags": []
},
"execution_count": 36
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "N9XxwyM02EVR"
},
"source": [
"train sensitivity\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "MasD32UOSZS3"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "mL-w9iiz0Vvo"
},
"source": [
"train_pred = np.zeros([train_count, ])\n",
"train_pred[prob_pred_train[:,1] > 0.46] = 1"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "vhJTGxA90VsO",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "49963b74-334b-458b-ec95-9929a6b122c8"
},
"source": [
"# compute recall (true positive / (true positive + false positive))\n",
"num_true_pos = np.sum(np.logical_and((train_pred==1), (class_host==1)))\n",
"num_false_pos = np.sum(np.logical_and((train_pred==1),(class_host==0)))\n",
"num_false_neg = np.sum(np.logical_and((train_pred==0),(class_host==1)))\n",
"num_true_neg = np.sum(np.logical_and((train_pred==0),(class_host==0)))\n",
"\n",
"print('precision for hospitalization: ', num_true_pos/(num_true_pos + num_false_pos))\n",
"print('recall for hospitalization: ', num_true_pos/(num_true_pos + num_false_neg))\n",
"print(num_true_pos)\n",
"print(num_false_pos)\n",
"print(num_false_neg)\n",
"print(num_true_neg)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"precision for hospitalization: 0.1826367090698827\n",
"recall for hospitalization: 0.5594635627530364\n",
"2211\n",
"9895\n",
"1741\n",
"66153\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "bS3R2ONU0Von",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "69e9119d-1c3a-4fe0-ea7b-6b92456b23e7"
},
"source": [
"print('sensitivity for hospitalization: ', num_true_pos/(num_true_pos + num_false_neg))\n",
"print('specificity for hospitalization: ', num_true_neg/(num_true_neg + num_false_pos))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"sensitivity for hospitalization: 0.5594635627530364\n",
"specificity for hospitalization: 0.8698848095939407\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UQR8Jdiz2t6O"
},
"source": [
"prediction of test"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ft-iBJHR0Vld"
},
"source": [
"X_test_symptom = np.zeros([test_count, num_symptom])\n",
"for i in range(test_count):\n",
" t_symptom = np.zeros([num_symptom, ])\n",
" t_symptom[test_symptoms[i].astype('int')] = 1\n",
" X_test_symptom[i, :] = t_symptom\n",
"\n",
"# convert to pca\n",
"z_test_symptom = spca.transform(X_test_symptom)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "zkhEOAu10ViH"
},
"source": [
"class_pred = lr.predict(z_test_symptom)\n",
"prob_pred = lr.predict_proba(z_test_symptom)\n",
"class_test = np.zeros([test_count, ])\n",
"class_test[test_hosp_day_count > 0] = 1"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "3AFEDQDp0Ve5",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "ea38cf6f-34cd-4156-e349-8937b26610c6"
},
"source": [
"#@title Compute and Plot ROC Curve\n",
"cut_off_thresh = np.array(range(0, 100))\n",
"cut_off_thresh.astype('float')\n",
"cut_off_thresh = cut_off_thresh / 100.0\n",
"\n",
"val_sensitivity = np.zeros([100, ])\n",
"val_specifity = np.zeros([100, ])\n",
"\n",
"for i in range(100):\n",
" temp_thresh = cut_off_thresh[i]\n",
" temp_pred_class = np.zeros([test_count, ])\n",
" temp_pred_class[prob_pred[:, 1] > cut_off_thresh[i]] = 1\n",
"\n",
" # compute recall (true positive / (true positive + false positive))\n",
" num_true_pos = np.sum(np.logical_and((temp_pred_class==1), (class_test==1)))\n",
" num_false_pos = np.sum(np.logical_and((temp_pred_class==1),(class_test==0)))\n",
" num_false_neg = np.sum(np.logical_and((temp_pred_class==0),(class_test==1)))\n",
" num_true_neg = np.sum(np.logical_and((temp_pred_class==0),(class_test==0)))\n",
"\n",
" val_sensitivity[i] = num_true_pos/(num_true_pos + num_false_neg)\n",
" val_specifity[i] = num_true_neg/(num_true_neg + num_false_pos)\n",
"\n",
"# Plot\n",
"plt.plot(1-val_specifity, val_sensitivity)\n",
"plt.xlabel('1 - specificity')\n",
"plt.ylabel('sensitivity')"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Text(0, 0.5, 'sensitivity')"
]
},
"metadata": {
"tags": []
},
"execution_count": 124
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXxV9Z3/8deHQBIgIQkkrEkIqxpQQSNuuLQuIHb016lra12qdX7OT7vYX38/W6fWnzP9TafbOJ1xahFculjr1ilVrFVrFVAQEEEWgZAACQRIgCyQPfnMH/diLwjkArm5ufe8n49HHtx7zrn3fA6E+77nc875HnN3REQkuPrEuwAREYkvBYGISMApCEREAk5BICIScAoCEZGA6xvvAo5Vbm6uFxUVxbsMEZGEsnz58hp3zzvcvIQLgqKiIpYtWxbvMkREEoqZbTnSPLWGREQCTkEgIhJwCgIRkYBTEIiIBJyCQEQk4GIWBGb2uJntMrPVR5hvZvZTMys1s1VmdkasahERkSOL5R7Bk8DMo8y/ApgQ/rkT+FkMaxERkSOIWRC4+9vAnqMscjXwCw9ZDGSb2YhY1SMikoj2tbTzyodV/ORP61lZURuTdcTzgrJRQEXE88rwtKpDFzSzOwntNVBYWNgjxYmIxJq7U9fURlVdM1V1TVTVNVPb2EZ9cxs76ppZsbWWrXsaP15+6KB0Ti/I7vY6EuLKYnefDcwGKCkp0Z10RKTX6ux0apva2L2vhZp9rdTsa2H3vhb2NLbR2NJOU1sHTa0dbK9rYvW2eva1tH/iPVJT+pCbkcrpBdlcV5JP8chBXDRxKCl9LCY1xzMItgEFEc/zw9NERBLCzvpmVlbUsrKyllWVdazf0cDu/a10dH7y+6oZ9O+XEvpJTWFIRhqfnTqK0UMGMCKrPyOy0xmRlU7OgFTS+6X06HbEMwjmAXeb2TPA2UCdu3+iLSQiEg/NbR2U7tpHWc1+6hpbqW9up765jfqmdmr2tbB6Wx1Vdc0ApPQxThqWyYUT8xg2KI3cjDSGZKSRm5EaejwwlZwBqfSJ0Tf6ExWzIDCz3wAXA7lmVgl8F+gH4O6PAvOBWUAp0AjcFqtaREQO1dHp7N7fwq76FqobWtizv5VttU2s39HARzvq2by78RPf7FNT+jCof1+yB6RyVtFgTi/IZkpBFsUjsuif2rPf4rtTzILA3W/sYr4D/ytW6xcRiVSxp5GXVlXx+rqdVO5tpGbf4Vs4hYMHcNLwTGadOoKThmcyYWgmOQP7MSi9X4+3bHpKQhwsFhE5HhV7Gpn/YRXzV+/4+NTL0/KzuGhiHkMz0xk2KI28zHTyMkNtnLzMNAakBu9jMXhbLCJJpbaxlcVle1hctpud9c00tnbQ1NbB3v2tbNy1Dwh9+N93xclceeoICgYPiHPFvY+CQEQS0vIte/nRq+tZXL4bdxiQmkJ+Tn/690shvV8KhYMHcM2Z+czSh3+XFAQiklA2Ve/jB3/8iFfX7CQvM42vfHoC0yfkcnp+Nql9NY7m8VAQiEhC2NXQzL+9vpFnllaQ3rcP9142kdunj2Fgmj7GTpT+BkWk19rX0s6bH+3ij2t28Ma6nbR3ODedXcg9l0wgNyMt3uUlDQWBiPQa7R2dbKrez4qte3lt7U4WlNbQ2t5JbkYqf3tGPndeMJai3IHxLjPpKAhEJK727G/lqXc2s6i0hjXb62lq6wBgVHZ/vnjOaGZOHs4ZhTkxG2dHFAQiEicVexp5YtFmfvPeVprbOzijMIcbphVwWn4Wp47KZlzeQMz04d8TFAQiElMdnc7WPY3s2d9KfVMbKypqeW3tTtZV1ZPSx7h6ykj+/uJxjB+aGe9SA0tBICLdprmtgw07G1izvZ412+tYs72ej6oaPm73APQxOHN0Dt+edTKzTh1Bfo7O8Y83BYGInJD9Le28snoHLyyvZOnmPbSHx+/JTOvLKSMHccO0Ak4ZMYihmWkM6t+PoiEDGTwwNc5VSyQFgYgcs+a2DhaX7eYPK6t4ZXUVja0dFA4ewB0XjOW0/CwmjRxEQc6AXjvsshxMQSAiXXJ3NlXv560N1by1oZolZbtpae8kM60vV50+ks+dmU/J6Bwd3E1QCgIR+YTOTmdbbRMfVNSycGMNC0tr2FbbBMDYvIF8/uxCLpqYxzljhyTt0MxBoiAQCbADH/gbdzWwYec+Nu7cx8ZdDZTu2kdja+gA76D0vpw3Lpe7Lh7HRRPzNIBbElIQiARAZ6dTubfp4w/5DYf5wAcYmpnGxGGZXH9WAROGZlI8chCnjsrSxVxJTkEgkqTaOzp5+r2t/HZpBZuq99Hc1vnxvAMf+NeVFDBxWCYTh2UwYWgmWQP6xbFiiRcFgUgSWlK2m+/OW8NHOxqYWpjNTWePZsKwDMYPzWB8nj7w5WAKApEkUrGnkX/540e8tKqKUdn9+dkXzmDm5OE6m0eOSkEgkgTqmtr4zzdLeWLRZvr0ga9cMoG7LhpH/1Sd0SNdUxCIJLC2jk6eXrKVh1/fQG1TG387NZ//PWMiI7L6x7s0SSAKApEE5O68vm4X//zKOsqq93Pu2CHcf+UpTB6VFe/SJAEpCEQSRGen89GOBhaWVvPqmp0s37KXsXkDmXNzCZecMlTHAeS4KQhEerEddc0s2FjNwtIaFpXWULOvFYDxQzN46OpJ3DitkH4pumG7nBgFgUgvsr+lnSXlu1mwsYaFG2vYuGsfALkZqZw/Ppfp43OZPiFXxwCkWykIROKoo9P5cFsdCzZUs6C0hhVb99LW4aT17cO0MYO5tiSf6ePzOHl4pkbylJhREIjEwfIte5mzoIx3Nu2mrqkNgEkjB/Gl6WO4cEIeZ47O0WBu0mMUBCI9qLmtg399bQOPLShj8MA0ZkwaxvQJeZw/bghDMtLiXZ4ElIJApIes3V7PV55ZQemufdw4rZBvzzqZzHQN9SDxpyAQ6QF/WLmdbz6/kkHp/XjqS9O4aGJevEsS+ZiCQCSGOjqdH766nkff2sSZo3P42U1nMDQzPd5liRwkpicgm9lMM1tvZqVmdt9h5hea2ZtmtsLMVpnZrFjWI9KT6hrbuO3JpTz61iY+f3Yhv/nyOQoB6ZVitkdgZinAI8BlQCWw1MzmufvaiMX+AXjW3X9mZsXAfKAoVjWJ9JQNOxu48xfL2FbbxPc+O5kvnD063iWJHFEsW0PTgFJ3LwMws2eAq4HIIHBgUPhxFrA9hvWIxFzFnkaeemczT7+3lQGpfXn6y+dwVtHgeJclclSxDIJRQEXE80rg7EOWeRD4k5ndAwwELo1hPSIx8/7WvcxdUM4rq6swM648dQTfmnWyrgCWhBDvg8U3Ak+6+4/N7Fzgl2Y22d07IxcyszuBOwEKCwvjUKbIJ7V3dPKntTuZs6CM97fWkpnely9fOJZbzi1iZLYCQBJHLINgG1AQ8Tw/PC3S7cBMAHd/18zSgVxgV+RC7j4bmA1QUlLisSpYJBoNzW08u6ySJxaVU7m3icLBA3jwb4q5tqSAgWnx/m4lcuxi+Vu7FJhgZmMIBcANwOcPWWYrcAnwpJmdAqQD1TGsSeS4Ve5t5MlFm/nt0goaWto5qyiHf7iymMuKh5GicYAkgcUsCNy93czuBl4FUoDH3X2NmT0ELHP3ecA3gMfM7OuEDhzf6u76xi+9yoqte5mzsJw/rt4BwJWnjuD26WM4vSA7zpWJdI+Y7se6+3xCp4RGTnsg4vFa4PxY1iByPDo6nT+t2cGcheUs37KXzPS+3DF9DLecp/6/JB81NEUiHOj/P/lOORV7/tr/v6akgAz1/yVJ6TdbhFD//6l3NvPMe3/t/98/S/1/CQYFgQTaBxW1zFlQxivh/v+scP9/ivr/EiAKAgmcT/T/0/pye7j/P0r9fwkgBYEExr6Wdp5dWsET4f5/weD+fDd8/r/6/xJk+u2XpLettomn3tnMb5ZspaGlnZLROdw/6xQuKx6u/r8ICgJJYof2/6+YPJzbp49hamFOnCsT6V0UBJJUOjqd19buYM6Ccpap/y8SFQWBJIV9Le08t6yCxxeF+v/5Of154DPFXHeW+v8iXdH/EEloH/f/39tKQ3M7Z47O4dtXnMLlk9T/F4mWgkAS0sqKWuYsLGf+h1WA+v8iJ0JBIAkj1P/fydyFZSzdHOr/f+n8Im45r4j8nAHxLk8kYSkIpNc70P9/YtFmtu5pJD+nP9/5TDHXleSTmd4v3uWJJDwFgfRa28P9/6cj+v/fuuJkLiseRt+UPvEuTyRpKAik11lVWcucBeW8/GEV7s4V4fF/zlD/XyQmFATSKxzo/z++sJz3Nu8hM60vt50X6v8XDFb/XySWFAQSV/s/Pv8/1P8fla3+v0hPUxBIXFTVNfHkO5t5ekmo/39GYTb3XXEyl6v/L9LjFATSo1ZV1jJ3YTkvr6qi050rJo/gS9PHcOZo9f9F4kVBIDHX0em8vm4ncxeE+v8ZaX25Vf1/kV5DQSAxs7+lneeXV/L4onK27A71///hylO4/qwC9f9FehEFgXS7A/3/3yzZSn1zO1MLs/k/M05mxiT1/0V6IwWBdJsPK+uYs7BM/X+RBKMgkBPS0em8sW4ncxaW8155qP9/y3lF3Kr+v0jCUBDIcTnQ/39iUTmbI/r/151VwCD1/0USioJAjklVXRNPvbOFp5dsob65nSkF2Tyi/r9IQlMQSFQ+rKxj7sIyXgr3/2dOHs7t08eq/y+SBBQEckSdnc4bH+1izoIylpTvYWBqCjefW8Rt56v/L5JMFATyCY2t4fP/F6r/LxIEUQWBmb0IzAVecffO2JYk8bKjrpmn3g2N/1PX1MbpBdn8x4yTmDlpuPr/Ikks2j2C/wRuA35qZs8BT7j7+tiVJT1p9bY65i4s5w8rt9PpzoxJw7njgtD4/2a6AbxIsosqCNz9deB1M8sCbgw/rgAeA37l7m0xrFFiQP1/ETkg6mMEZjYEuAn4IrAC+DUwHbgFuDgWxUn3a2xt54XllTy+aDPlNfsZmZXO/bNO4fpp6v+LBFW0xwh+B5wE/BL4G3evCs/6rZktO8rrZgL/BqQAc9z9+4dZ5jrgQcCBle7++WPaAonKjrpmfvHuZn4d0f//9xuncsVk9f9Fgi7aPYLH3H1+5AQzS3P3FncvOdwLzCwFeAS4DKgElprZPHdfG7HMBOBbwPnuvtfMhh7XVsgRqf8vIl2JNgj+CZh/yLR3gTOO8pppQKm7lwGY2TPA1cDaiGW+DDzi7nsB3H1XlPXIUXR2On/+aBdzFpaxuCzU///iuaO57bwxFA5R/19EDnbUIDCz4cAooL+ZTQUOfIUcBHT1iTIKqIh4XgmcfcgyE8PrWUSoffSgu//xMHXcCdwJUFhY2MVqg6uxtZ0X3t/G4wvLP+7/f3vWyVx/ViFZ/dX/F5HD62qPYAZwK5AP/CRiegPw7W5a/wRCB5vzgbfN7FR3r41cyN1nA7MBSkpKvBvWm1R21jfz1DsR/f/8LH4a7v/3U/9fRLpw1CBw96eAp8zsc+7+wjG+9zagIOJ5fnhapEpgSfj003Iz20AoGJYe47oCafW2Oh5fWM4fVm2nvdOZURzq/585Wv1/EYleV62hm9z9V0CRmd176Hx3/8lhXnbAUmCCmY0hFAA3AIeeEfRfhK5LeMLMcgm1isqOof7AOdD/n7uwnHfLdjMgNYUvnD2a284vYvSQgfEuT0QSUFetoQOfLBnH+sbu3m5mdwOvEur/P+7ua8zsIWCZu88Lz7vczNYCHcA33X33sa4rCJpaO3j+/UqeWFhOWc1+RmSl860rTuaGaer/i8iJMfeuW+5mlufu1T1QT5dKSkp82bIjXrqQdHbW//X8/9rGNk7Lz+KOC8aq/y8ix8TMlh/pdP9oTx9dZGabgd8CLx443VNiZ832v57/397pXF48jDsuGEuJ+v8i0s2iHWtooplNI9Tnvz/cynkmfPxAuklnp/Pm+lD//51N6v+LSM+Ieqwhd38PeM/M/j+hU0mfAhQE3WTN9jq+8exKPtrRoP6/iPSoaMcaGgR8ltAewTjgd4SuHJYT1NnpPLagjB/9aT05A1J5+PopXHnaCPX/RaTHRLtHsJLQqZ4Pufu7MawnULbXNnHvsx+wuGwPMyYN45//9jQGD0yNd1kiEjDRBsFYj+b0IonaH1Zu5/7ffUh7p/ODz53GtSX5OggsInHR1QVlD7v714B5ZvaJIHD3q2JWWRJ79K1NfP+Vj5hamM3D10/RgWARiauu9gh+Gf7zR7EuJCha2zt57O0yLpiQyxO3nqV7AYhI3HU11tDy8MMp7v5vkfPM7KvAW7EqLFm9sW4nu/e38qXzxygERKRXiPaT6JbDTLu1G+sIjGeWVjB8UDoXTsyLdykiIkDXxwhuJDRQ3BgzmxcxKxPYE8vCktG22ibe3ljNPZ8aT0ofHRgWkd6hq2ME7wBVQC7w44jpDcCqWBWVrJ5bFrpPz7UlBV0sKSLSc7o6RrAF2AKc2zPlJK+OTue5ZZVMH59LwWDdLlJEeo+jHiMws4XhPxvMrD7ip8HM6numxOSwsLSGbbVN3HCWbrUpIr1LV3sE08N/ZvZMOcmpvaOTn7+1icEDU7m0eGi8yxEROUhUZw2Z2TgzSws/vtjMvmJm2bEtLTl0djr3vfgh72zazb2XTSStb0q8SxIROUi0p4++AHSY2XhCN5EvAJ6OWVVJwt35p5fX8fzySr526QRuOmd0vEsSEfmEaIOg093bCY1A+u/u/k1gROzKSg7/8edSHl9Uzq3nFfHVSybEuxwRkcOKNgjawtcU3AK8FJ6mgfKPoqquiR+/toGrTh/JA58p1oByItJrRRsEtxE6hfR77l5uZmP46zhEchgLNtQA8PefGkcfXTwmIr1YtLeqXAt8JeJ5OfAvsSoqGby9sZq8zDROGqYTrkSkd4v2DmXnAw8Co8OvMcDdfWzsSktcHZ3OwtIaPn3yULWERKTXi/bGNHOBrwPLgY7YlZMcFmyspraxjQsnaGA5Een9og2COnd/JaaVJIm6pja+/eKHjM0dyIxJw+NdjohIl6INgjfN7IfAi0DLgYnu/n5Mqkpg/2/eGnY2tPDCXefRP1UXj4lI7xdtEJwd/rMkYpoDn+7echJb6a4GXlyxjbs/NZ4pBbrwWkQSQ7RnDX0q1oUkg3krq+hjcPO5uoJYRBJHtGMNDTOzuWb2Svh5sZndHtvSEou7818rtnHuuCEMHZQe73JERKIW7QVlTwKvAiPDzzcAX4tFQYlq6ea9bN3TyGen5se7FBGRYxJtEOS6+7NAJ0B43CGdRhrW1NrBQy+tIat/P648VUMwiUhiifZg8X4zG0LoADFmdg5QF7OqEsyra3awels9D18/RWcKiUjCiTYI7gXmAePMbBGQB1wTs6oSzKLSGrIH9OOq00d2vbCISC8TbWtoHHAFcB6hYwUbiSJEzGymma03s1Izu+8oy33OzNzMSo60TG/24bY6Ts/P1uByIpKQog2C77h7PZADfAr4T+BnR3uBmaUAjxAKkGLgRjMrPsxymcBXgSXHUHev0dLeQemufUwaOSjepYiIHJdog+DAgeErgcfc/WUgtYvXTANK3b3M3VuBZ4CrD7PcPxIaybQ5ylp6lfU7GmjvdIoVBCKSoKINgm1m9nPgemB++P7FXb12FFAR8bwyPO1jZnYGUBAOliMyszvNbJmZLauuro6y5J7xl/XVmMHZY4bEuxQRkeMSbRBcR+jYwAx3rwUGA988kRWbWR/gJ8A3ulrW3We7e4m7l+Tl9Z4RPXfUNfP0kq1MLcgmLzMt3uWIiByXaIeYaCQ04NyB51VAVRcv20boJvcH5IenHZAJTAb+Eh6zfzgwz8yucvdl0dQVb4++tYmafS3MvvnMeJciInLcot0jOB5LgQlmNsbMUoEbCJ2CCoC717l7rrsXuXsRsBhImBBobe/kpVXbuax4GKfla4A5EUlcMQuC8NXHdxNqKa0DnnX3NWb2kJldFav19pRfLd5Czb5WbphWGO9SREROSLQXlB0Xd58PzD9k2gNHWPbiWNbSnZ5dWsE/vbyWCyfmceGE3HiXIyJyQmLZGkpKW3bv5zu/X82544bwyOen6p7EIpLwFATH6MF5a+jbx/jxtVPITO8X73JERE6YguAYvLtpN2+ur+arl05geJbuOSAiyUFBcAz+9fUNDM1M4+Zzi+JdiohIt1EQRGnF1r28V76Hv7toHOn9NNS0iCQPBUGU5i4sJzO9L9efVdD1wiIiCURBEIVttU28snoHn59WSEZaTM+4FRHpcQqCKPzy3S24O188d3S8SxER6XYKgi40trbzzNKtXFY8jPycAfEuR0Sk2ykIuvDC+9uobWzjjgvGxrsUEZGYUBAcRV1TGz97s5TTC7IpGZ0T73JERGJCQXAUD/x+NTsbWnjoqkkaSkJEkpaC4AiWb9nL7z/Yzj2fHs/pBRpmWkSSl4LgMNydh1/fQGZ6X76sYwMikuQUBIfxy8VbWLCxhm/OOImBum5ARJKcguAQpbsa+N7L67j4pDy+eI6uGxCR5KcgiNDa3slXn/mAgWl9+cE1p+kAsYgEgvoeEX7y2gbWbK/nsZtLGJqpYaZFJBi0RxC2bPMefv72Jm6cVshlxcPiXY6ISI9REIT9+59Lyc1I4zufOSXepYiI9CgFAaH7EL+1oZovnjOaAanqlolIsCgIgDc/2gXAZ6eOinMlIiI9T0EAvFu2m/yc/hQM1uiiIhI8gQ+ClvYO3tm0m/PGDYl3KSIicRH4IPjzul00NLdz5Wkj412KiEhcBD4IXv6wityMNM7XHoGIBFTgg2D5lr2cN24IfVMC/1chIgEV6E+//S3tVNU1c9LwzHiXIiISN4EOgq17GgEYPURnC4lIcAU6CHY1tAAwIkvjColIcAU6CMqr9wFogDkRCbRAB8G6qgZyM9LIz+kf71JEROImpkFgZjPNbL2ZlZrZfYeZf6+ZrTWzVWb2hpn16J1gqve1kJeZpvsOiEigxSwIzCwFeAS4AigGbjSz4kMWWwGUuPtpwPPAD2JVz+Gs3V7PhKEZPblKEZFeJ5Z7BNOAUncvc/dW4Bng6sgF3P1Nd28MP10M5MewnoPUNbaxo76ZSSMH9dQqRUR6pVgGwSigIuJ5ZXjakdwOvBLDeg5SsTeUP4UaaE5EAq5XDL5vZjcBJcBFR5h/J3AnQGFhYbessyJ8DYFGHBWRoIvlHsE2oCDieX542kHM7FLgfuAqd2853Bu5+2x3L3H3kry8vG4p7sAeQUGOgkBEgi2WQbAUmGBmY8wsFbgBmBe5gJlNBX5OKAR2xbCWTyir3k/OgH5kDejXk6sVEel1YhYE7t4O3A28CqwDnnX3NWb2kJldFV7sh0AG8JyZfWBm847wdt1ubVU9p4zQgWIRkZgeI3D3+cD8Q6Y9EPH40liu/0jqm9tYs72euy4aF4/Vi4j0KoG8svjdTbvp6HSmT8iNdykiInEXyCD4y/pdDEhN4YzCnHiXIiISd4ELgrqmNp5bVsmMScNJ7Ru4zRcR+YTAfRJu2b2f9k5n5uTh8S5FRKRXCFwQVO5tAnT9gIjIAQEMgtCFZKM09LSICBDIIGgiM70vWf11IZmICAQ0CPLVFhIR+VjggmBzzX6KdLN6EZGPBSoI3J1ttU26NaWISIRABUF9Uzst7Z0MG6Sb1YuIHBCoINjV0AxAXmZanCsREek9AhUEu/e3ApCboSAQETkgUEGwNxwE2boHgYjIx4IVBI1tAOQMSI1zJSIivUeggmBnfTNmag2JiEQKVBBU1TWRm5GmUUdFRCIE6hNxb2MbQwaqLSQiEilQQbC/pZ2MtJjenVNEJOEELggGKghERA4SqCCob24nM11BICISKTBB4O7srG9maKaGlxARiRSYIGhoaaextYPhWTp1VEQkUmCCYFd9aJwhDTgnInKwwARBdUNoeIk8XUwmInKQwATB7v0tAAxREIiIHCQwQVDTEAqC3AxdUCYiEikwQTAyuz+XFw8jWwPOiYgcJDAn1V8+aTiXTxoe7zJERHqdwOwRiIjI4SkIREQCTkEgIhJwCgIRkYCLaRCY2UwzW29mpWZ232Hmp5nZb8Pzl5hZUSzrERGRT4pZEJhZCvAIcAVQDNxoZsWHLHY7sNfdxwP/CvxLrOoREZHDi+UewTSg1N3L3L0VeAa4+pBlrgaeCj9+HrjEzCyGNYmIyCFiGQSjgIqI55XhaYddxt3bgTpgyKFvZGZ3mtkyM1tWXV0do3JFRIIpIS4oc/fZwGwAM6s2sy3H+Va5QE23FZYYtM3BoG0OhhPZ5tFHmhHLINgGFEQ8zw9PO9wylWbWF8gCdh/tTd0973gLMrNl7l5yvK9PRNrmYNA2B0OstjmWraGlwAQzG2NmqcANwLxDlpkH3BJ+fA3wZ3f3GNYkIiKHiNkegbu3m9ndwKtACvC4u68xs4eAZe4+D5gL/NLMSoE9hMJCRER6UEyPEbj7fGD+IdMeiHjcDFwbyxoOMbsH19VbaJuDQdscDDHZZlMnRkQk2DTEhIhIwCkIREQCLumCIIjjG0Wxzfea2VozW2Vmb5jZEc8nThRdbXPEcp8zMzezhD/NMJptNrPrwv/Wa8zs6Z6usbtF8btdaGZvmtmK8O/3rHjU2Z3M7HEz22Vmq48w38zsp+G/k1VmdsYJr9Tdk+aH0NlJm4CxQCqwEig+ZJm/Bx4NP74B+G286+6Bbf4UMCD8+K4gbHN4uUzgbWAxUBLvunvg33kCsALICT8fGu+6e2CbZwN3hR8XA5vjXXc3bPeFwBnA6iPMnwW8AhhwDrDkRNeZbHsEQRzfqMttdvc33b0x/HQxoYv7Elk0/84A/0hoIMPmniwuRqLZ5i8Dj7j7XgB339XDNXa3aLbZgUHhx1nA9h6sLybc/W1Cp9MfydXALzxkMZBtZiNOZJ3JFgTdNr5RAolmmyPdTujbRCLrcpvDu8sF7v5yTxYWQ9H8O08EJprZIjNbbGYze6y62Ihmmx8EbjKzSkKnqt/TM6XF1bH+n+9SQow1JN3DzG4CSoCL4l1LLJlZH+AnwK1xLqWn9SXUHrqY0F7f22Z2qrvXxrWq2LoReNLdf8QxmHwAAAQRSURBVGxm5xK6QHWyu3fGu7BEkmx7BMcyvhHRjm/Uy0WzzZjZpcD9wFXu3tJDtcVKV9ucCUwG/mJmmwn1Uecl+AHjaP6dK4F57t7m7uXABkLBkKii2ebbgWcB3P1dIJ3QwGzJLKr/88ci2YIgiOMbdbnNZjYV+DmhEEj0vjF0sc3uXufuue5e5O5FhI6LXOXuy+JTbreI5nf7vwjtDWBmuYRaRWU9WWQ3i2abtwKXAJjZKYSCINnHqp8H3Bw+e+gcoM7dq07kDZOqNeQBHN8oym3+IZABPBc+Lr7V3a+KW9EnKMptTipRbvOrwOVmthboAL7p7gm7txvlNn8DeMzMvk7owPGtCf7FDjP7DaFAzw0f+/gu0A/A3R8ldCxkFlAKNAK3nfA6E/zvTERETlCytYZEROQYKQhERAJOQSAiEnAKAhGRgFMQiIgEnIJAkkZXozbGk5k9FL6oDzO7IDw66AdmNsrMnu/itXPMrDj8+Ns9Ua8Ei04flaRhZhcC+wgNyDU53vUciZk9Cix0918dx2v3uXtGDMqSANMegSSNKEZtPCoz+37EfRt+FJ72pJk9ambLzGyDmX0mPD3FzH5oZkvDy/9dxPv8XzP70MxWmtn3I97nGjO7A7gO+Ecz+7WZFR3Ygwm/54/MbHX4Pe8JT/+LmZWE36t/eE/i1+G9jK9FrPd7ZvbV491+Ca6kurJY5HiZ2RDgs8DJ7u5mlh0xu4jQkMjjgDfNbDxwM6FL+88yszRgkZn9CTiZ0DDBZ7t7o5kNjlyPu88xs+nAS+7+vB18Y6Q7w+uaEr6q9tDX3mdmd7v7lHDNRcCLwMPhgfZuCNcpckwUBCIhdYTuWzDXzF4CXoqY92x4NMuNZlZG6MP+cuA0M7smvEwWoQHeLgWeOHD/B3c/lj2USwndNKk9mte6+2Yz2x0eS2oYsCKRh5SQ+FEQSGCYWQqwPPx0nrs/cGBe+Bv4NEIDmF0D3A18+sDsQ97KCd0d6h53f/WQdcyIRe1HMYfQcNvDgcd7eN2SJHSMQALD3TvcfUr454HIeWaWAWS5+3zg68DpEbOvNbM+ZjaO0G0T1xMaCO0uM+sXfv1EMxsIvAbcZmYDwtMPau904TXg78LDox/ptW0H1hn2O2AmcFa4JpFjpj0CSRqHG7XR3edG+fJM4Pdmlk7o2/69EfO2Au8RuiXi/3T3ZjObQ6if/76FhnStBv6Hu//RzKYAy8ysldBIkdGe8jmH0NDRq8ysDXgM+I9Dlpkdnv++u3/B3VvN7E2g1t07olyPyEF0+qjIUZjZk4QP7Ma7lsMJHyR+H7jW3TfGux5JTGoNiSSo8EVmpcAbCgE5EdojEBEJOO0RiIgEnIJARCTgFAQiIgGnIBARCTgFgYhIwP03Oxp0qqwxNxYAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "m-KGyTJY0VX9",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "d4ae7586-d1bd-45e6-82fe-4e6dcf3118ad"
},
"source": [
"temp = val_sensitivity + val_specifity - 1\n",
"cut_off_thresh[np.argmax(temp)]"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.46"
]
},
"metadata": {
"tags": []
},
"execution_count": 48
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "lebclLaM0VVA"
},
"source": [
"from sklearn.metrics import roc_auc_score"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "yQ_mE0-Z0VRN",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "645eb60a-9d1e-4cd9-f1af-8bf70820e6d3"
},
"source": [
"roc_auc_score(class_test, prob_pred[:, 1])"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.7660188066892274"
]
},
"metadata": {
"tags": []
},
"execution_count": 52
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "rTaiq-ci0VO3"
},
"source": [
"class_pred = np.zeros([test_count, ])\n",
"class_pred[prob_pred[:,1] > 0.46] = 1"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "ec3_1AY80VKg",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "08bbd1a8-0ea3-4bf7-c9f2-d6f8a3ac4aaf"
},
"source": [
"# compute recall (true positive / (true positive + false positive))\n",
"num_true_pos = np.sum(np.logical_and((class_pred==1), (class_test==1)))\n",
"num_false_pos = np.sum(np.logical_and((class_pred==1),(class_test==0)))\n",
"num_false_neg = np.sum(np.logical_and((class_pred==0),(class_test==1)))\n",
"num_true_neg = np.sum(np.logical_and((class_pred==0),(class_test==0)))\n",
"\n",
"print('precision for hospitalization: ', num_true_pos/(num_true_pos + num_false_pos))\n",
"print('recall for hospitalization: ', num_true_pos/(num_true_pos + num_false_neg))\n",
"print(num_true_pos)\n",
"print(num_false_pos)\n",
"print(num_false_neg)\n",
"print(num_true_neg)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"precision for hospitalization: 0.20369149637442321\n",
"recall for hospitalization: 0.4608501118568233\n",
"618\n",
"2416\n",
"723\n",
"24048\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "2vqAeLmH0VGa",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "53d919c9-2b85-42b7-b127-c2fa9e3f9267"
},
"source": [
"print('sensitivity for hospitalization: ', num_true_pos/(num_true_pos + num_false_neg))\n",
"print('specificity for hospitalization: ', num_true_neg/(num_true_neg + num_false_pos))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"sensitivity for hospitalization: 0.4608501118568233\n",
"specificity for hospitalization: 0.9087061668681983\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_RPIhsactcCZ"
},
"source": [
"## Sparse Naive Bayes"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4eiylzR637QR"
},
"source": [
"### Extract Important Features"
]
},
{
"cell_type": "code",
"metadata": {
"id": "MermHDTb0VDp"
},
"source": [
"# generate index of hospitalized / nonhospitalized\n",
"# index: hosp\n",
"ind_1 = np.argwhere(train_hosp_day_count).reshape(-1)\n",
"# index: nohosp\n",
"ind_0 = np.argwhere(train_hosp_day_count == 0).reshape(-1)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "BgURN2k00U-P"
},
"source": [
"# Gen hosp features\n",
"# equation page 2, data setup \n",
"func_1 = np.zeros([num_symptom, ])\n",
"func_0 = np.zeros([num_symptom, ])\n",
"\n",
"for i in ind_0:\n",
" func_0[train_symptoms[i].astype('int')] += 1\n",
"\n",
"for i in ind_1:\n",
" func_1[train_symptoms[i].astype('int')] += 1"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "rNRbOjX-0U8H"
},
"source": [
"#@title Naive Bayes\n",
"# Laplace smoothing used [mentor help pls]\n",
"v = (func_0 + func_1) * np.log((func_0 + func_1 + 1)/(train_count + 2)) + (train_count - func_0 - func_1) * np.log(1 - (func_0 + func_1 + 1)/(train_count+2))\n",
"num_1 = ind_1.shape[0]\n",
"num_0 = ind_0.shape[0]\n",
"w_1 = func_1 * np.log((func_1+1) / (num_1+2)) + (num_1 - func_1) * np.log(1 - (func_1+1) / (num_1+2))\n",
"w_0 = func_0 * np.log((func_0+1) / (num_0+2)) + (num_0 - func_0) * np.log(1 - (func_0+1) / (num_0+2))\n",
"w = w_0 + w_1\n",
"\n",
"det_wv = w - v"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "o0fGxjAt0U4I"
},
"source": [
"# find the num_symptom - main_symptom smallest index\n",
"k_symptoms = 10\n",
"# index of not important symptoms\n",
"ind_nk = np.argpartition(det_wv, num_symptom - k_symptoms)\n",
"ind_nk = ind_nk[:(num_symptom - k_symptoms)]\n",
"# index of important symptoms\n",
"ind_k = np.argpartition(-det_wv, k_symptoms)\n",
"ind_k = ind_k[:k_symptoms]\n",
"\n",
"# weight of hospitalization - give hospitalized, the probability that a specific symptom exists in patient\n",
"theta_1 = np.zeros([num_symptom, ])\n",
"theta_1[ind_k] = (func_1[ind_k]+1) / (num_1+2)\n",
"theta_1[ind_nk] = (func_1[ind_nk] + func_0[ind_nk] + 1) / (train_count+2)\n",
"\n",
"# weight of not hospitalization\n",
"theta_0 = np.zeros([num_symptom, ])\n",
"theta_0[ind_k] = (func_0[ind_k]+1) / (num_0+2)\n",
"theta_0[ind_nk] = (func_1[ind_nk] + func_0[ind_nk]+1) / (train_count+2)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "YBQQ_SEP0U0m",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "bd562b2f-7531-47d5-ba9c-8165eaca4482"
},
"source": [
"# find the top 20 symptoms that leads to hospitalization\n",
"main_symptoms_count = 20\n",
"# index of top weights\n",
"ind_m = np.argpartition(-np.abs(theta_0), main_symptoms_count)\n",
"ind_m = ind_m[:main_symptoms_count]\n",
"\n",
"determinant_symptoms = []\n",
"for i in range(main_symptoms_count):\n",
" for symptom, ind_symptom in symptom_dict.items():\n",
" if ind_symptom == ind_m[i]:\n",
" # print(symptom)\n",
" determinant_symptoms.append(symptom)\n",
"print(determinant_symptoms)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"['Pain', 'Pain in extremity', 'Dizziness', 'Nausea', 'Pyrexia', 'Fatigue', 'Chills', 'Headache', 'Injection site pain', 'Injection site swelling', 'Arthralgia', 'Myalgia', 'Injection site erythema', 'Pruritus', 'Dyspnoea', 'Vomiting', 'Rash', 'Asthenia', 'Injection site pruritus', 'Paraesthesia']\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "FDcuQWBDuaio"
},
"source": [
"Test-Train"
]
},
{
"cell_type": "code",
"metadata": {
"id": "nQpthZ8k0Uxq"
},
"source": [
"#@title Train\n",
"pred_class = np.zeros([train_count, ])\n",
"pred_prob = np.zeros([train_count, ]) # probability of hospitalization\n",
"p_y1 = num_1 / train_count\n",
"\n",
"for i in range(test_count):\n",
" ind_1 = train_symptoms[i].astype('int')\n",
" ind_0 = np.array(range(num_symptom))\n",
" ind_0 = np.delete(ind_0, ind_1)\n",
" py1 = np.exp(np.sum(np.log(theta_1[ind_1])) + np.sum(np.log(1 - theta_1[ind_0]))) * p_y1\n",
" py0 = np.exp(np.sum(np.log(theta_0[ind_1])) + np.sum(np.log(1 - theta_0[ind_0]))) * (1 - p_y1)\n",
" pred_prob[i] = py1/(py0+py1)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "djtGY4HU0Uuk"
},
"source": [
"#@title Test\n",
"train_class = np.zeros([train_count, ])\n",
"train_class[train_hosp_day_count > 0] = 1"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "yK0IenUr0Urd",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "2b974990-09c4-4248-b4f0-eaa0a4945807"
},
"source": [
"# compute ROC curve\n",
"cut_off_thresh = np.array(range(0, 100))\n",
"cut_off_thresh.astype('float')\n",
"cut_off_thresh = cut_off_thresh / 100.0\n",
"\n",
"val_sensitivity = np.zeros([100, ])\n",
"val_specifity = np.zeros([100, ])\n",
"\n",
"for i in range(100):\n",
" temp_thresh = cut_off_thresh[i]\n",
" temp_pred_class = np.zeros([train_count, ])\n",
" temp_pred_class[pred_prob > cut_off_thresh[i]] = 1\n",
"\n",
" # compute recall (true positive / (true positive + false positive))\n",
" num_true_pos = np.sum(np.logical_and((temp_pred_class==1), (train_class==1)))\n",
" num_false_pos = np.sum(np.logical_and((temp_pred_class==1),(train_class==0)))\n",
" num_false_pos = np.sum(np.logical_and((temp_pred_class==1),(train_class==0)))\n",
" num_false_neg = np.sum(np.logical_and((temp_pred_class==0),(train_class==1)))\n",
" num_true_neg = np.sum(np.logical_and((temp_pred_class==0),(train_class==0)))\n",
"\n",
" val_sensitivity[i] = num_true_pos/(num_true_pos + num_false_neg)\n",
" val_specifity[i] = num_true_neg/(num_true_neg + num_false_pos)\n",
"\n",
"# plot ROC curve\n",
"plt.plot(1-val_specifity, val_sensitivity)\n",
"plt.xlabel('1 - specificity')\n",
"plt.ylabel('sensitivity')"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Text(0, 0.5, 'sensitivity')"
]
},
"metadata": {
"tags": []
},
"execution_count": 133
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEGCAYAAAB/+QKOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXxU93nv8c8jCUkgEKtYpWGzMAZjwBZSs3jHNokXktiOQcmNm7p1nWs3i3t7m8VNet2m101yc29ur9PEcdwmKQK8JSaOE+LEdnZrYTEGbMxiM5JYJBAgNi2jee4fc6CDMsAAGs1I+r5fL72YOef8Zr4ew3n0nN/Mb8zdERER6S4r3QFERCQzqUCIiEhCKhAiIpKQCoSIiCSkAiEiIgnlpDtATxkzZoxPmTIl3TFERPqUNWvW7HP3okT7+k2BmDJlCnV1demOISLSp5jZztPt0yUmERFJSAVCREQSUoEQEZGEVCBERCQhFQgREUlIBUJERBJSgRARkYRUIERE+qj2SBfPrW+kqjqcksdP6QflzGwR8A0gG3jc3R/ptv8+4H6gCzgC3Ovum81sCvAGsCU49FV3vy+VWUVE+ortzUdYURPm6TUNHDjWyfzQCJaWl2BmPfo8KSsQZpYNPArcADQAtWa2yt03xx1W5e7fCo6/Dfg6sCjYt93d56Uqn4hIX9Ie6WL1pr1UVe/k1R0t5GQZN84eR2X5ZN49fXSPFwdIbQdRDmxz9x0AZrYCWAycLBDu3hp3fAGgr7cTEYnzzr6jLK8N83RdA/uPdlA8cjB/c9PF3FlWzNhh+Sl97lQWiElAfdz9BqCi+0Fmdj/wIJALXBe3a6qZrQNagYfc/TcJxt4L3AsQCoV6LrmISBp1dkV5cfNeqqrD/HbbPrKzjIWXjKWyYjJXXjSGrKye7xYSSftife7+KPComVUCDwF3A7uBkLvvN7MrgB+Z2exuHQfu/hjwGEBZWZm6DxHp0+pbjrG8JsyTdQ3sO9LOpBGD+esbZvDhBSWMK0xtt5BIKgtEI1ASd7842HY6K4B/BXD3dqA9uL3GzLYDMwAt1yoi/UpnV5RfvtHEsuqd/HbbPgy4buY4PlIR4qoZRWT3UreQSCoLRC1QamZTiRWGJUBl/AFmVuruW4O7NwNbg+1FQIu7d5nZNKAU2JHCrCIivarhwDFW1tazsraepsPtTBiez6euL+WuBSVMGD443fGAFBYId4+Y2QPAamJvc33C3TeZ2cNAnbuvAh4ws4VAJ3CA2OUlgKuAh82sE4gC97l7S6qyioj0hkhXlJfebGJ5TZhX3moG4NqLx1JZHuKai4vIyc6sj6aZe/+4dF9WVub6wiARyUS7Dh5nRW09T9bWs6e1jXGFedxVVsJd5SEmjUhvt2Bma9y9LNG+tE9Si4j0R11R51dvNVFVHealN5tw4KrSIv7H4tlcP3NsxnULiahAiIj0oL2tbSfnFhoPHmfM0Dw+cc10liwIUTJqSLrjnRMVCBGRC9QVdX6ztZmq6jC/fLOJrqhzZekYHrr5EhbOGsegPtAtJKICISJynppa23iyrp7lNbFuYXRBLn9x5TSWlpcweXRBuuNdMBUIEZFzEI06v9u+j6rqMC9u3ksk6rx7+mg+9/6Z3DhrPLk5fbNbSEQFQkQkCc2H23l6TQPLa8KEW44xcsgg/uy9U1myoIRpRUPTHS8lVCBERE4jGnX+sGM/VdVhfr55D51dTsXUUfz1jTNYdOl48nKy0x0xpVQgRES6aTnawdNr6qmqDvPO/mOMGDKIj71rCkvLQ1w0tn92C4moQIiIAO5O9dstVFWH+dnGPXR0RVkwZSSfWljK+y6dQP6g/t0tJKICISID2oGjHTyzNja3sL35KIX5OVRWhKisCDFj3LB0x0srFQgRGXDcndp3DlBVvZMXNu6hIxLl8tAIvnbnXG6eM4HBuQOvW0hEBUJEBoxDxzp5dl0DVdVhtjYdYVheDksWlLC0PMQlEwrTHS/jqECISL/m7qwNH6Cqup7nN+yiPRJlbskIvnL7ZdwydwJDcnUaPB29MiLSL7W2dfLDtY0srwnz5p7DFORmc8cVxVRWhJg9cXi64/UJKhAi0m+4O+vrD1JVHebHG3bR1hllzqTh/M8PzeG2uRMpyNMp71zo1RKRPu9wWyfPrd9FVXWYzbtbGZKbzQfnT6KyfDJzitUtnC8VCBHpszY0HGR5TZjn1u/iWEcXsyYU8o8fuJTF8yYyLH9QuuP1eSoQItKnHG2PxLqFmp1sbGwlf1AWt82dSGXFZOYWD8fM0h2x31CBEJE+YWPjIapqwjy3rpGjHV3MHD+MhxfP5gPzJ1GobiElVCBEJGMd64jw/Gu7WVYT5rX6g+TlZHHLZROprAhxeWiEuoUUS2mBMLNFwDeAbOBxd3+k2/77gPuBLuAIcK+7bw72fQ64J9j3SXdfncqsIpI53tjdyvKaMD9c28jh9gilY4fypVtn8aH5xQwfom6ht6SsQJhZNvAocAPQANSa2aoTBSBQ5e7fCo6/Dfg6sMjMZgFLgNnAROAXZjbD3btSlVdE0ut4RxfPb9hFVU2YdeGD5OZkcfOcCVRWhCibPFLdQhqksoMoB7a5+w4AM1sBLAZOFgh3b407vgDw4PZiYIW7twNvm9m24PH+kMK8IpIGW/ceZll1mGfXNtDaFmFaUQEP3XwJt19ezMiC3HTHG9BSWSAmAfVx9xuAiu4Hmdn9wINALnBd3NhXu42dlGDsvcC9AKFQqEdCi0jqtXV28cLru1leE6b2nQPkZmex6NLxVFaEqJg6St1Chkj7JLW7Pwo8amaVwEPA3ecw9jHgMYCysjI/y+Eikmbbmo5QVR3mmbUNHDreydQxBXz+/TO5/fJiRg/NS3c86SaVBaIRKIm7XxxsO50VwL+e51gRyVDtkS5+tnEPy6rD1LzdwqBs48bZ4/lIeYg/mTaarCx1C5kqlQWiFig1s6nETu5LgMr4A8ys1N23BndvBk7cXgVUmdnXiU1SlwI1KcwqIj3s7X1HWV4T5uk1DbQc7SA0agj/fdHF3HlFCUXD1C30BSkrEO4eMbMHgNXE3ub6hLtvMrOHgTp3XwU8YGYLgU7gAMHlpeC4J4lNaEeA+/UOJpHM1xGJsnrTHpbXhPn99v3kZBk3zBpHZUWI90wfo26hjzH3/nHpvqyszOvq6tIdQ2RA2rn/KFU1YZ6ua2D/0Q6KRw5maXmIO68oZmxhfrrjyRmY2Rp3L0u0L+2T1CLSN3V2RfnF5r0sqw7z2237yM4yrp85lsqKEFeWFpGtbqHPU4EQkXNS33KM5TVhnqxrYN+RdiYOz+czC2dw14ISxg9Xt9CfqECIyFlFuqL84o0mqmrC/GZrMwZcF3QLV88Yq26hn1KBEJHTajx4nBU1YVbW1tN0uJ3xhfl88rpS7lpQwsQRg9MdT1JMBUJEThHpivLKlmaqasK8vKUJgKtnFPHlislce3EROdlZaU4ovUUFQkQA2H3oOCtq6nmyrp7dh9ooGpbH/ddcxF0LSigZNSTd8SQNVCBEBrCuqPOrt5qoqg7z0ptNOHBlaRFfunUW118yjkHqFgY0FQiRAWhvaxsra+tZWVtP48HjjBmax31XT2dpeUjdgpykAiEyQESjzq+3NlNVHeaXbzbRFXXec9FoPv/+S7hh1jhyc9QtyKlUIET6uabDbTxV18DymjANB44zqiCXP3/vVJaWh5gypiDd8SSDqUCI9EPRqPO77fuoqg7z4ua9RKLOu6aN5r8vmslNs8eRl5Od7ojSB6hAiPQj+46081RdAytqw+zcf4wRQwbx8fdMYWl5iGlFQ9MdT/oYFQiRPs7d+cP2/SyrCfPzTXvo7HLKp4ziMwtnsOjS8eQPUrcg50cFQqSPajnawdNr6lleU8/b+45SmJ/DR/9kMpXlIUrHDUt3POkHVCBE+hB3p/rtFqqqw/xs4x46uqKUTR7JA9dexM2XTVC3ID1KBUKkDzh4rIOn18TeibS9+SjD8nOorAixtDzExePVLUhqqECIZCh3p27nAaqqw/zk9d10RKLMKxnBV+64jFsvm8jgXHULkloqECIZ5tCxTp5dF+sW3tp7hKF5OXy4rJjK8snMmliY7ngygKhAiGQAd2dt+GDQLeyirTPK3OLhPPKhOdw6dyIFefqnKr1Pf+tE0qi1rZMfrWukqjrMm3sOU5CbzQfnF/ORihCXThqe7ngywKW0QJjZIuAbQDbwuLs/0m3/g8CfAxGgGfgzd98Z7OsCXg8ODbv7banMKtJb3J3XGg5RVb2TH7+2m+OdXcyeWMiXP3gpi+dNYqi6BckQKfubaGbZwKPADUADUGtmq9x9c9xh64Aydz9mZp8AvgLcFew77u7zUpVPpLcdaY+c7BY2725l8KBsbps7kcqKEJcVD8dMX9spmSWVv6qUA9vcfQeAma0AFgMnC4S7vxx3/KvAR1OYRyQtXm84RFXNTp5bv4tjHV3MHD+Mf1g8m8XzJ1GYPyjd8UROK5UFYhJQH3e/Aag4w/H3AD+Nu59vZnXELj894u4/6j7AzO4F7gUIhUIXHFikpxxtj7DqtV1UVYd5vfEQ+YOyuOWyWLcwv2SEugXpEzLiYqeZfRQoA66O2zzZ3RvNbBrwkpm97u7b48e5+2PAYwBlZWXea4FFTmPTrkNUVYd5bv0ujrRHmDFuKH9/6yw+eHkxwwerW5C+JZUFohEoibtfHGw7hZktBL4AXO3u7Se2u3tj8OcOM3sFmA9s7z5eJN2OdUR4/rXdLKsJ81r9QXJzsrhlzgQqK0JcMXmkugXps1JZIGqBUjObSqwwLAEq4w8ws/nAt4FF7t4Ut30kcMzd281sDPAeYhPYIhnjzT2tVFWH+eG6Rg63RZheVMDf3TKL2y+fxIghuemOJ3LBUlYg3D1iZg8Aq4m9zfUJd99kZg8Dde6+CvgqMBR4Kvgt68TbWS8Bvm1mUSCL2BzE5oRPJNKL2jq7eH7Dbqqqd7I2fJDc7CzeN2c8leUhyqeOUrcg/Yq5949L92VlZV5XV5fuGNJPbd17mGXVYZ5d20BrW4RpYwpYWh7i9iuKGVWgbkH6LjNb4+5lifZlxCS1SCZq6+zipxt3U1UdpvadAwzKNm6aPZ7KihDvmjZa3YL0eyoQIt1sazrC8powz6xt4OCxTiaPHsJn3zeTO64oZszQvHTHE+k1KhAiQHuki59t3ENVdZjqt1vIyTJunD2OyvLJvHv6aLKy1C3IwKMCIQPa2/uOsrwmzNNrGmg52kHJqMH8zU0Xc2dZMWOH5ac7nkhaqUDIgNMRifLzzbFu4ffb95OdZSy8ZCyVFZO58qIx6hZEAioQMmCE9x+jqibM02vq2Xekg0kjBvPXN8zgwwtKGFeobkGkOxUI6dc6u6L8YvNeqmrC/GbrPrIMrps5jo9UhLhqRhHZ6hZETksFQvql+pZjrKgN82RdA82H25kwPJ9PLyzlrgUlTBg+ON3xRPoEFQjpNyJdUX75ZhNV1WF+vbUZgGsvHktleYhrLi4iJzsrzQlF+pakCoSZPQt8F/ipu0dTG0nk3DQePM7KmjAr6+rZ29rOuMI8/urai7irPMSkEeoWRM5Xsh3EN4GPA//XzJ4C/s3dt6QulsiZdUWdl99soqomzCtbmnDgqtIiHl4c4vqZY9UtiPSApAqEu/8C+IWZDQeWBrfrge8A/+HunSnMKHLS7kPHWVlbz8raenYfaqNoWB6fuGY6SxaEKBk1JN3xRPqVpOcgzGw0sa8E/S/Evkt6GfBe4G7gmlSEE4FYt/Drt5pZVh3mpTf3EnW4snQMX7xlFgtnjWOQugWRlEh2DuKHwMXAD4Bb3X13sGtl8LWgIj2uqbWNlbX1rKitp/HgccYMzeXeq6aztLyEyaML0h1PpN9LtoP4jru/EL/BzPLcvf10y8SKnI9o1PnNtn1UVe/kF2800RV13j19NJ97/0xunDWe3Bx1CyK9JdkC8Y/AC922/QG4vGfjyEDVfLidJ+vqWVEbpr7lOKMKcrnnvVNZWh5i6hh1CyLpcMYCYWbjgUnA4ODrQU987LQQ0IygXJBo1Pn99v1U1ezk55v2Eok6FVNH8d9uvJhFl44nLyc73RFFBrSzdRA3AX8KFANfj9t+GPh8ijJJP7f/SDtPrWlgRU2Yd/YfY8SQQdz97iksLQ9x0dih6Y4nIoEzFgh3/x7wPTO73d2f6aVM0g+5O3/YsZ+q6jCrN+2hs8tZMGUkn1pYyvsunUD+IHULIpnmbJeYPuru/wFMMbMHu+93968nGCZyUsvRDp5Z08DymjA79h2lMD+Hj1RMprIixIxxw9IdT0TO4GyXmE7MDp5X329mi4BvANnA4+7+SLf9DwJ/DkSAZuDP3H1nsO9u4KHg0H8MuhnpA9ydmrdbqKoJ89PX99DRFeXy0Ai+dudcbp4zgcG56hZE+oKzXWL6dnDzm+7efC4PbGbZwKPADUADUGtmq9x9c9xh64Aydz9mZp8AvgLcZWajgC8BZYADa4KxB84lg/Sug8c6eGZtI8trwmxrOsKwvByWlJdQWRFi5vjCdMcTkXOU7Ntcf2dm7wArgWeTPFGXA9vcfQeAma0AFgMnC4S7vxx3/KvEPqkNscnxF929JRj7IrAIWJ5kXukl7s6anQeoqg7zk9d30x6JMrdkBF+5/TJumTuBIblaMFikr0p2LaYZZlYOLAG+YGabgRXB/MTpTALq4+43ABVnOP4e4KdnGDup+wAzuxe4FyAUCp3tP0N60KHjnfxwbQPLa+rZsvcwQ/NyuOOKYiorQsyeODzd8USkByT965271wA1ZvZPxN7y+j3gTAUiaWb2UWKXk64+l3Hu/hjwGEBZWZn3RBY5PXdnXf1BqqrDPL9hF22dUeZMGs7//NAcbps7kYI8dQsi/UmyazEVAh8k1kFMB35I7BLSmTQCJXH3i4Nt3R97IfAF4Gp3b48be023sa8kk1V6XmtbJ8+ta2RZdZg39xxmSG42H5w/icryycwpVrcg0l8l+yvfa8CPgIfd/Q9JjqkFSs1sKrET/hKgMv6A4NPZ3wYWuXtT3K7VwD+Z2cjg/o3A55J8XukB7s6GhkNUVYdZ9doujnd2MWtCIf/4gUtZPG8iw/IHpTuiiKRYsgVimruf0yUcd4+Y2QPETvbZwBPuvsnMHgbq3H0V8FVib6F9yswAwu5+m7u3mNk/ECsyECtMLefy/HJ+jrRHeG59I1XVYTbtamXwoGxunTuByorJzC0eTvD/SUQGADvTed/M/o+7f9rMfkzs7aancPfbUhnuXJSVlXldnVYeP18bGw+xrDrMqvWNHO3oYub4YVRWhPjA/EkUqlsQ6bfMbM3pVuU+Wwfxg+DPr/VsJMkER9sj/Pi1XVTVhNnQcIi8nCxuuWwilRUhLg+NULcgMsCd7YNya4Kb89z9G/H7zOxTwK9SFUxSZ/OuVqpqdvKjdbs40h6hdOxQvnTrLD40v5jhQ9QtiEhMsnMQdxNbMiPenybYJhnqeEcXP96wi6rqMOvrD5Kbk8XNcyZQWRGibPJIdQsi8kfOtljfUmLvPJpqZqvidg0DNGncB2zZc5iq6p08u66Rw20RphUV8NDNl3D75cWMLMhNdzwRyWBn6yB+D+wGxgD/K277YWBDqkLJhWnr7OInG3ZTVRNmzc4D5GZnsejS8VRWhKiYOkrdgogk5WxzEDuBncC7eieOXIhtTYdZVh3m2bWNHDreydQxBXz+/TO544oSRqlbEJFzdLZLTL919/ea2WFOfZurAe7uWqIzzdo6u/jZxj1UVYepeaeFQdnGjbPH85HyEO+aPlrdgoict7N1EO8N/tQ3u2SY7c1HWF4d5pm1DRw41klo1BD+dtFM7iwrZszQvHTHE5F+INm1mKYDDe7ebmbXAJcB33f3g6kMJ6dqj3SxetNeqqp38uqOFnKyjBtmjaOyIsR7po8hK0vdgoj0nGTf5voMUGZmFxFbPfU5oAp4f6qCyX96Z99RlteEeWpNAy1HOygeOZi/ueli7iwrZuyw/HTHE5F+KtkCEQ3WVvog8C/u/i9mti6VwQa6jkiUFzfvpapmJ7/btp/sLOP6mWOprAhxVWmRugURSblkC0Rn8JmIu4Fbg236yG0KhPcfY3ltmKfq6tl3pIOJw/N58IYZfLishPHD1S2ISO9JtkB8HLgP+LK7vx0s4f2Ds4yRJHV2RfnlG3tZVh3mN1v3kWVwXdAtXD1jLNnqFkQkDZL9ytHNwCfj7r8N/HOqQg0UDQeOsaKmnifr6mk63M74wnw+dX0pdy0oYeKIwemOJyIDXLLvYnoP8PfA5GDMic9BTEtdtP4p0hXlpTebqKoJ86u3mgG4ZkYRX66YzLUXF5GTnZXmhCIiMcleYvou8BlgDdCVujj9166Dx1lRW8+TtfXsaW1j7LA8Hrj2Iu5aUELxyCHpjici8keSLRCH3P2nKU3SD3VFnVe2NFFVHeblLU04cGVpEX9/22yuv2Qsg9QtiEgGS7ZAvGxmXwWeBdpPbHT3tSlJ1Q+89OZeHvrhRnYdamPM0Dzuu3o6S8tDlIxStyAifUOyBaIi+DP+a+kcuK5n4/QPGxsP8V+XrWXK6AIeumUWCy8ZR26OugUR6VuSfRfTtakO0l80tbbxF9+vY9SQXH5wTwVFw7Qukoj0TUn9Wmtm48zsu2b20+D+LDO7J4lxi8xsi5ltM7PPJth/lZmtNbOImd3RbV+Xma0PflZ1H5uJ2jq7uPcHazh4rJPv3F2m4iAifVqy1z3+HVgNTAzuvwV8+kwDzCwbeBR4HzALWGpms7odFib21aVVCR7iuLvPC35uSzJn2rg7f/vMBtbXH+R/3zWX2ROHpzuSiMgFSbZAjHH3J4EogLtHOPvbXcuBbe6+w907gBXA4vgD3P0dd99w4nH7sm++sp3n1u/iv904g0WXTkh3HBGRC5ZsgThqZqMJvjTIzP4EOHSWMZOA+rj7DcG2ZOWbWZ2ZvWpmH0h0gJndGxxT19zcfA4P3bNWb9rDV1dv4ba5E7n/2ovSlkNEpCcl+y6mB4FVwHQz+x1QBNxx5iEXbLK7N5rZNOAlM3vd3bfHH+DujxFbfpyysjJP9CCptnlXK59ZuZ65xcP5yh2X6RvcRKTfSLaDmE5sLuHdxOYitnL24tIIlMTdLw62JcXdG4M/dwCvAPOTHdtbmg+38xffr6MwfxDf+VgZ+YOy0x1JRKTHJFsg/s7dW4GRwLXAN4F/PcuYWqDUzKaaWS6whFgXclZmNtLM8oLbY4D3AJuTzNor2iNd3Pcfa9h/tJ3vfKyMsYVailtE+pdkC8SJCembge+4+0+A3DMNCCayHyDWcbwBPOnum8zsYTO7DcDMFphZA3An8G0z2xQMvwSoM7PXgJeBR4IVZTOCu/P5ZzeyZucB/ted85hTrHcsiUj/k+wcRKOZfRu4Afjn4Lf7sxYXd38BeKHbti/G3a4ldump+7jfA3OSzNbrHvv1Dp5Z28Cnri/l5sv0jiUR6Z+S7SA+TKwTuMndDwKjgL9JWaoM9ss39vLIz97k5jkT+NT1pemOIyKSMskutXGM2EJ9J+7vBnanKlSm2rLnMJ9cvo7ZEwv52p1z9b3QItKvaQW5JO0/0s4936ulIC+H73ysjMG5eseSiPRvyc5BDGgdkSifWLaW5sPtrPzLdzFhuL4OVET6PxWIs3B3/u5HG6l5u4VvLJnHvJIR6Y4kItIrdInpLJ743TusrKvngWsvYvG8c1kpRESkb1OBOINXtjTx5Z9s5qbZ43jwhhnpjiMi0qtUIE6jsyvKp1eu5+LxhXz9w/P0jiURGXBUIE5je/MRDh7r5C+vmkZBnqZqRGTgUYE4jTd2twIwa2JhmpOIiKSHCsRpvLH7MLk5WUwbU5DuKCIiaaECcRqbd7UyY9xQcrL1EonIwKSzXwLuzhu7W5k1QZeXRGTgUoFIoPlwO/uPdnCJCoSIDGAqEAlsDiaoVSBEZCBTgUhg0y4VCBERFYgENjQcZMroIQwfPCjdUURE0kYFIoENDYe4rFiL8onIwKYC0U3z4XZ2H2pjziR9z7SIDGwqEN1sbDwEwJxiFQgRGdhSWiDMbJGZbTGzbWb22QT7rzKztWYWMbM7uu2728y2Bj93pzJnvBMFYraW2BCRAS5lBcLMsoFHgfcBs4ClZjar22Fh4E+Bqm5jRwFfAiqAcuBLZjYyVVnj7Tp0nDFD8xiWrwlqERnYUtlBlAPb3H2Hu3cAK4DF8Qe4+zvuvgGIdht7E/Ciu7e4+wHgRWBRCrOetO9IB2OG5vbGU4mIZLRUFohJQH3c/YZgW4+NNbN7zazOzOqam5vPO2i8fUfaGTM0r0ceS0SkL+vTk9Tu/pi7l7l7WVFRUY885v4jHYxWByEiktIC0QiUxN0vDraleuwFaTnawegCdRAiIqksELVAqZlNNbNcYAmwKsmxq4EbzWxkMDl9Y7AtpdojXRxpjzCqQBPUIiIpKxDuHgEeIHZifwN40t03mdnDZnYbgJktMLMG4E7g22a2KRjbAvwDsSJTCzwcbEupg8c6ARhZoEtMIiIp/bJld38BeKHbti/G3a4ldvko0dgngCdSma+7lqMdAIwaogIhItKnJ6l72oETBUIdhIiICkS8/SoQIiInqUDEOXAsViA0ByEiogJxiubD7WRnGSP0PRAiIioQ8XYfamPssDxysvWyiIjoTBhnz6E2xhXmpzuGiEhGUIGIs7e1jXGF+hS1iAioQJzi0PFORuozECIigArEKVrbOinUBLWICKACcVJ7pIu2ziiF+Sn9cLmISJ+hAhE43BYB0DfJiYgEVCACrcdjC/UN1yUmERFABeKk1qCDKBysS0wiIqACcdKJDqJQl5hERAAViJMOn+wgVCBEREAF4qTWNnUQIiLxVCACJy8xaQ5CRARQgTipta2T7Cxj8KDsdEcREckIKhCB4x1RhuRmY2bpjiIikhFUIAJtkS7yctQ9iIickNICYWaLzGyLmW0zs88m2J9nZiuD/dVmNiXYPsXMjpvZ+uDnW6nMCdDeGSUvR/VSROSElM3Imlk28ChwA9AA1JrZKnffHHfYPcABd7/IzJYA/wzcFezb7oMs4i0AAAo9SURBVO7zUpWvu827WxlZoHcwiYickMpfmcuBbe6+w907gBXA4m7HLAa+F9x+Grje0jQJ4O64p+OZRUQyUyoLxCSgPu5+Q7At4THuHgEOAaODfVPNbJ2Z/crMrkz0BGZ2r5nVmVldc3PzBYU92hFhxrhhF/QYIiL9SaZedN8NhNx9PvAgUGVmhd0PcvfH3L3M3cuKioou6AmPd3QxOFeT1CIiJ6SyQDQCJXH3i4NtCY8xsxxgOLDf3dvdfT+Au68BtgMzUpiV4x1dDNFnIERETkplgagFSs1sqpnlAkuAVd2OWQXcHdy+A3jJ3d3MioJJbsxsGlAK7EhV0GjUOdrRxZA8fYpaROSElJ0R3T1iZg8Aq4Fs4Al332RmDwN17r4K+C7wAzPbBrQQKyIAVwEPm1knEAXuc/eWVGU9sVCfvgtCROQ/pfRXZnd/AXih27Yvxt1uA+5MMO4Z4JlUZot38HgHACNUIERETsrUSepe1XjwOADjCvPTnEREJHOoQADbm44AcNHYoWlOIiKSOVQggI2NrYwYMohxhXnpjiIikjFUIID9RzuYMHywVnIVEYmjAgG0R7rIH6SXQkQkns6KaCVXEZFEdFZE3wUhIpKICgSxD8oNy9enqEVE4qlAAIfbOinUh+RERE6hAgG0RzQHISLSnc6KQEckSq4KhIjIKQb8WTHSFaU9EqUgV3MQIiLxBnyB6OiKAqiDEBHpZsCfFSPR2BdR52TpU9QiIvFUILpUIEREEhnwBSI7y7h5zgSmjClIdxQRkYwy4Gdmhw8exKMfuTzdMUREMs6A7yBERCQxFQgREUlIBUJERBJKaYEws0VmtsXMtpnZZxPszzOzlcH+ajObErfvc8H2LWZ2UypziojIH0tZgTCzbOBR4H3ALGCpmc3qdtg9wAF3vwj438A/B2NnAUuA2cAi4JvB44mISC9JZQdRDmxz9x3u3gGsABZ3O2Yx8L3g9tPA9Rb73s/FwAp3b3f3t4FtweOJiEgvSWWBmATUx91vCLYlPMbdI8AhYHSSYzGze82szszqmpubezC6iIj06Ulqd3/M3cvcvayoqCjdcURE+pVUflCuESiJu18cbEt0TIOZ5QDDgf1Jjj3FmjVr9pnZzgvIOwbYdwHje1NfygrKm2rKm1p9Ke/5ZJ18uh2pLBC1QKmZTSV2cl8CVHY7ZhVwN/AH4A7gJXd3M1sFVJnZ14GJQClQc6Ync/cLaiHMrM7dyy7kMXpLX8oKyptqyptafSlvT2dNWYFw94iZPQCsBrKBJ9x9k5k9DNS5+yrgu8APzGwb0EKsiBAc9ySwGYgA97t7V6qyiojIH0vpWkzu/gLwQrdtX4y73QbceZqxXwa+nMp8IiJyen16krqHPZbuAOegL2UF5U015U2tvpS3R7Oau/fk44mISD+hDkJERBJSgRARkYT6fYHoawsGnm9eM5tiZsfNbH3w860MyXuVma01s4iZ3dFt391mtjX4ubsP5O2Ke31XZUDWB81ss5ltMLNfmtnkuH2Z+NqeKW+vvrZJ5r3PzF4PMv02fi25DD03JMx7QecGd++3P8TeXrsdmAbkAq8Bs7od81+BbwW3lwArg9uzguPzgKnB42RncN4pwMYMfH2nAJcB3wfuiNs+CtgR/DkyuD0yU/MG+45k2Gt7LTAkuP2JuL8LmfraJszb26/tOeQtjLt9G/Cz4HamnhtOl/e8zw39vYPoawsGXkjedDhrXnd/x903ANFuY28CXnT3Fnc/ALxIbOXeTM3b25LJ+rK7HwvuvkpsxQHI3Nf2dHnTIZm8rXF3C4AT7+jJyHPDGfKet/5eIFK+YGAPu5C8AFPNbJ2Z/crMrkxx1lOyBM7lNcrU1/dM8i22OOSrZvaBno32R8416z3AT89zbE+4kLzQu68tJL8g6P1mth34CvDJcxnbwy4kL5znuSGlH5STXrUbCLn7fjO7AviRmc3u9luFXJjJ7t5oZtOAl8zsdXffnu5QZvZRoAy4Ot1ZknGavBn52rr7o8CjZlYJPERsaaCMdZq8531u6O8dxLksGIhd4IKBPeC88wbt7n4Ad19D7HrljAzIm4qx5+uCntPdG4M/dwCvAPN7Mlw3SWU1s4XAF4Db3L39XMb2sAvJ29uvLZz7a7QCONHZZOzrG+dk3gs6N6RyYiXdP8Q6pB3EJpJOTOzM7nbM/Zw66ftkcHs2p05E7SD1E1EXkrfoRD5iE1mNwKh054079t/540nqt4lNoo4Mbmdy3pFAXnB7DLCVbpOEafi7MD/4x17abXtGvrZnyNurr+055C2Nu30rsTXkMvnccLq8531uSNl/UKb8AO8H3gr+Yn4h2PYwsd9gAPKBp4hNNNUA0+LGfiEYtwV4XybnBW4HNgHrgbXArRmSdwGx66VHiXVmm+LG/lnw37EN+Hgm5wXeDbwe/MN8HbgnA7L+Atgb/D9fD6zK8Nc2Yd50vLZJ5v1G3L+pl4k7IWfouSFh3gs5N2ipDRERSai/z0GIiMh5UoEQEZGEVCBERCQhFQgREUlIBUJERBJSgZB+z8yeMLMmM9uY7izdmdnDwYfHMLMrzWxTsOLmJDN7+ixjH49bsfPzvZFXBha9zVX6PTO7CjgCfN/dL013ntMJlmH+rbv/x3mMPeLuQ1MQSwYwdRDS77n7r4GW8x1vZo/EfY/B14Jt/25m3woWmHvLzG4Jtmeb2VfNrDY4/i/jHudvg/X6XzOzR+Ie5w4z+3Pgw8A/mNmyYA3/jXGP+TUz2xg85l8F218xs7LgsQYHnceyoCv5dNzzftnMPnW+//0ycGmxPpEzMLPRwAeBme7uZjYibvcUYsswTwdeNrOLgI8Bh9x9gZnlAb8zs58DM4ktz1zh7sfMbFT887j742b2XuB5d3/a4r64Crg3eK557h5JMPazZvaAu88LMk8BngX+j5llEVuSJdXLUUs/pAIhcmaHgDbgu2b2PPB83L4n3T0KbDWzHcSKwI3AZfaf30Y3HCgFFgL/5sH3Ibj7uXQ0C4mtvxVJZqy7v2Nm+81sPjAOWOfBYm0i50IFQgY8M8sG1gR3V7n7F0/sC35jLweuB+4AHgCuO7G720M5YMBfufvqbs/RK19LGedx4E+B8cATvfzc0k9oDkIGPHfvcvd5wc8X4/eZ2VBguLu/AHwGmBu3+04zyzKz6cRWydwCrAY+YWaDgvEzzKyA2Le6fdzMhgTbT7lMdBYvAn8ZLO9+urGdJ54z8ENi3yK3IMgkcs7UQUi/Z2bLgWuAMWbWAHzJ3b+b5PBhwHNmlk+sO3gwbl+Y2Iq6hcB97t5mZo8Tmy9Ya2YGNAMfcPefmdk8oM7MOoAXgGTfmvo4sfX7N5hZJ/Ad4P91O+axYP9ad/+Iu3eY2cvAQXfvSvJ5RE6ht7mKnAcz+3eCCeV0Z0kkmJxeC9zp7lvTnUf6Jl1iEulngg/PbQN+qeIgF0IdhIiIJKQOQkREElKBEBGRhFQgREQkIRUIERFJSAVCREQS+v/TYUqzy87cSwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "O1ZEcH_V0Uk1",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "654a188f-0797-4aef-8fad-a99712994772"
},
"source": [
"temp = val_sensitivity + val_specifity - 1\n",
"cut_off_thresh[np.argmax(temp)]"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.03"
]
},
"metadata": {
"tags": []
},
"execution_count": 134
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "FW1gwEhR0Uhl",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "6e4dce49-bc99-46f7-ee40-43843034b98c"
},
"source": [
"from sklearn.metrics import roc_auc_score\n",
"roc_auc_score(train_class, pred_prob)"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.5317844117043914"
]
},
"metadata": {
"tags": []
},
"execution_count": 135
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "9haYQVcJ0Ueo"
},
"source": [
"pred_class = np.zeros([train_count, ])\n",
"p_y1 = num_1 / train_count\n",
"\n",
"for i in range(test_count):\n",
" ind_1 = train_symptoms[i].astype('int')\n",
" ind_0 = np.array(range(num_symptom))\n",
" ind_0 = np.delete(ind_0, ind_1)\n",
" py1 = np.exp(np.sum(np.log(theta_1[ind_1])) + np.sum(np.log(1 - theta_1[ind_0]))) * p_y1\n",
" py0 = np.exp(np.sum(np.log(theta_0[ind_1])) + np.sum(np.log(1 - theta_0[ind_0]))) * (1 - p_y1)\n",
" \n",
" if py1/(py0+py1) > 0.03:\n",
" pred_class[i] = 1"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "YnC3tU-S0Ub1",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "b321c8f7-ab69-4f2a-886c-99f46a340201"
},
"source": [
"# compute recall (true positive / (true positive + false positive))\n",
"num_true_pos = np.sum(np.logical_and((pred_class==1), (train_class==1)))\n",
"num_false_pos = np.sum(np.logical_and((pred_class==1),(train_class==0)))\n",
"num_false_neg = np.sum(np.logical_and((pred_class==0),(train_class==1)))\n",
"num_true_neg = np.sum(np.logical_and((pred_class==0),(train_class==0)))\n",
"\n",
"print('precision for hospitalization: ', num_true_pos/(num_true_pos + num_false_pos))\n",
"print('recall for hospitalization: ', num_true_pos/(num_true_pos + num_false_neg))\n",
"print(num_true_pos)\n",
"print(num_false_pos)\n",
"print(num_false_neg)\n",
"print(num_true_neg)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"precision for hospitalization: 0.24940143655227454\n",
"recall for hospitalization: 0.15814777327935223\n",
"625\n",
"1881\n",
"3327\n",
"74167\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "HXzf_r0C0UYd",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e8450057-0fb0-4406-f71d-52a664b943de"
},
"source": [
"print('sensitivity for hospitalization: ', num_true_pos/(num_true_pos + num_false_neg))\n",
"print('specificity for hospitalization: ', num_true_neg/(num_true_neg + num_false_pos))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"sensitivity for hospitalization: 0.15814777327935223\n",
"specificity for hospitalization: 0.9752656217126026\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Q3h4AYby0UVE"
},
"source": [
"pred_class = np.zeros([test_count, ])\n",
"pred_prob = np.zeros([test_count, ]) # probability of hospitalization\n",
"p_y1 = num_1 / train_count\n",
"\n",
"for i in range(test_count):\n",
" ind_1 = test_symptoms[i].astype('int')\n",
" ind_0 = np.array(range(num_symptom))\n",
" ind_0 = np.delete(ind_0, ind_1)\n",
" py1 = np.exp(np.sum(np.log(theta_1[ind_1])) + np.sum(np.log(1 - theta_1[ind_0]))) * p_y1\n",
" py0 = np.exp(np.sum(np.log(theta_0[ind_1])) + np.sum(np.log(1 - theta_0[ind_0]))) * (1 - p_y1)\n",
" pred_prob[i] = py1/(py0+py1)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "PpvVEMxH0UR8"
},
"source": [
"# test\n",
"test_class = np.zeros([test_count, ])\n",
"test_class[test_hosp_day_count > 0] = 1"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Ypt1hVUn0UO-"
},
"source": [
"# compute ROC curve\n",
"cut_off_thresh = np.array(range(0, 100))\n",
"cut_off_thresh.astype('float')\n",
"cut_off_thresh = cut_off_thresh / 100.0\n",
"\n",
"val_sensitivity = np.zeros([100, ])\n",
"val_specifity = np.zeros([100, ])\n",
"\n",
"for i in range(100):\n",
" temp_thresh = cut_off_thresh[i]\n",
" temp_pred_class = np.zeros([test_count, ])\n",
" temp_pred_class[pred_prob > cut_off_thresh[i]] = 1\n",
"\n",
" # compute recall (true positive / (true positive + false positive))\n",
" num_true_pos = np.sum(np.logical_and((temp_pred_class==1), (test_class==1)))\n",
" num_false_pos = np.sum(np.logical_and((temp_pred_class==1),(test_class==0)))\n",
" num_false_neg = np.sum(np.logical_and((temp_pred_class==0),(test_class==1)))\n",
" num_true_neg = np.sum(np.logical_and((temp_pred_class==0),(test_class==0)))\n",
"\n",
" val_sensitivity[i] = num_true_pos/(num_true_pos + num_false_neg)\n",
" val_specifity[i] = num_true_neg/(num_true_neg + num_false_pos)\n",
"\n",
"# plot ROC curve\n",
"plt.plot(1-val_specifity, val_sensitivity)\n",
"plt.xlabel('1 - specificity')\n",
"plt.ylabel('sensitivity')"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "WTKDINeP0UIs",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "477c26a4-f5e1-4c48-b230-cae1bc77d149"
},
"source": [
"temp = val_sensitivity + val_specifity - 1\n",
"cut_off_thresh[np.argmax(temp)]"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.03"
]
},
"metadata": {
"tags": []
},
"execution_count": 74
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "2w0u3yhowxxv",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "d2da13da-37b9-4f6e-c32f-46b344e29a49"
},
"source": [
"roc_auc_score(test_class, pred_prob)"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.7241817313822186"
]
},
"metadata": {
"tags": []
},
"execution_count": 76
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "1Ujjkqcp8oya"
},
"source": [
"result"
]
},
{
"cell_type": "code",
"metadata": {
"id": "DLYqkhlSwxu1"
},
"source": [
"pred_class = np.zeros([test_count, ])\n",
"p_y1 = num_1 / train_count\n",
"\n",
"for i in range(test_count):\n",
" ind_1 = test_symptoms[i].astype('int')\n",
" ind_0 = np.array(range(num_symptom))\n",
" ind_0 = np.delete(ind_0, ind_1)\n",
" py1 = np.exp(np.sum(np.log(theta_1[ind_1])) + np.sum(np.log(1 - theta_1[ind_0]))) * p_y1\n",
" py0 = np.exp(np.sum(np.log(theta_0[ind_1])) + np.sum(np.log(1 - theta_0[ind_0]))) * (1 - p_y1)\n",
" \n",
" if py1/(py0+py1) > 0.03:\n",
" pred_class[i] = 1\n",
"\n",
"# test\n",
"test_class = np.zeros([test_count, ])\n",
"test_class[test_hosp_day_count > 0] = 1"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "mKpouS6Ewxrr",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "386ad561-4f8c-41ba-9c66-abc3bb40d73f"
},
"source": [
"# Compute Recall (tpositive / (tpositive + fpositive))\n",
"\n",
"num_true_pos = np.sum(np.logical_and((pred_class==1), (test_class==1)))\n",
"num_false_pos = np.sum(np.logical_and((pred_class==1),(test_class==0)))\n",
"num_false_neg = np.sum(np.logical_and((pred_class==0),(test_class==1)))\n",
"num_true_neg = np.sum(np.logical_and((pred_class==0),(test_class==0)))\n",
"\n",
"print('Hospitalization Precision: ', num_true_pos/(num_true_pos + num_false_pos))\n",
"print('Hospitalization Recall: ', num_true_pos/(num_true_pos + num_false_neg))\n",
"# print(num_true_pos)\n",
"# print(num_false_pos)\n",
"# print(num_false_neg)\n",
"# print(num_true_neg)"
],
"execution_count": 150,
"outputs": [
{
"output_type": "stream",
"text": [
"Hospitalization Precision: 0.24840255591054314\n",
"Hospitalization Recall: 0.4638329604772558\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "ehayaBKDwxoU",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "8ddbfa50-77b9-41b6-93ee-4b85b7e02a51"
},
"source": [
"print('Hospitalization Sensitivity: ', num_true_pos/(num_true_pos + num_false_neg))\n",
"print('Hospitalization Specificity: ', num_true_neg/(num_true_neg + num_false_pos))"
],
"execution_count": 151,
"outputs": [
{
"output_type": "stream",
"text": [
"Hospitalization Sensitivity: 0.4638329604772558\n",
"Hospitalization Specificity: 0.9288845223700121\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_v0UyjONydLD"
},
"source": [
"## ------\n",
"------\n",
"## ------"
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment