Skip to content

Instantly share code, notes, and snippets.

@darthgera123
Created March 27, 2020 11:13
Show Gist options
  • Save darthgera123/416d21ea2de094e8460b07779560eb9d to your computer and use it in GitHub Desktop.
Save darthgera123/416d21ea2de094e8460b07779560eb9d to your computer and use it in GitHub Desktop.
Baseline for DBSRA
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Baseline submission for the challenge DBSRA"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split \n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from sklearn import metrics"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"train_data = pd.read_csv('../data/public/train.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Clean and Analyse Data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>race</th>\n",
" <th>gender</th>\n",
" <th>age</th>\n",
" <th>weight</th>\n",
" <th>admission_type_id</th>\n",
" <th>discharge_disposition_id</th>\n",
" <th>admission_source_id</th>\n",
" <th>time_in_hospital</th>\n",
" <th>payer_code</th>\n",
" <th>medical_specialty</th>\n",
" <th>...</th>\n",
" <th>citoglipton</th>\n",
" <th>insulin</th>\n",
" <th>glyburide-metformin</th>\n",
" <th>glipizide-metformin</th>\n",
" <th>glimepiride-pioglitazone</th>\n",
" <th>metformin-rosiglitazone</th>\n",
" <th>metformin-pioglitazone</th>\n",
" <th>change</th>\n",
" <th>diabetesMed</th>\n",
" <th>readmitted</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>AfricanAmerican</td>\n",
" <td>Female</td>\n",
" <td>[70-80)</td>\n",
" <td>?</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>?</td>\n",
" <td>InternalMedicine</td>\n",
" <td>...</td>\n",
" <td>No</td>\n",
" <td>Steady</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Caucasian</td>\n",
" <td>Female</td>\n",
" <td>[90-100)</td>\n",
" <td>?</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>SP</td>\n",
" <td>Pulmonology</td>\n",
" <td>...</td>\n",
" <td>No</td>\n",
" <td>Down</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Ch</td>\n",
" <td>Yes</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Caucasian</td>\n",
" <td>Female</td>\n",
" <td>[80-90)</td>\n",
" <td>?</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>MC</td>\n",
" <td>Osteopath</td>\n",
" <td>...</td>\n",
" <td>No</td>\n",
" <td>Steady</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Caucasian</td>\n",
" <td>Male</td>\n",
" <td>[60-70)</td>\n",
" <td>?</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>MC</td>\n",
" <td>Radiologist</td>\n",
" <td>...</td>\n",
" <td>No</td>\n",
" <td>Steady</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Ch</td>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>?</td>\n",
" <td>Female</td>\n",
" <td>[70-80)</td>\n",
" <td>?</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>UN</td>\n",
" <td>InternalMedicine</td>\n",
" <td>...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 48 columns</p>\n",
"</div>"
],
"text/plain": [
" race gender age weight admission_type_id \\\n",
"0 AfricanAmerican Female [70-80) ? 1 \n",
"1 Caucasian Female [90-100) ? 3 \n",
"2 Caucasian Female [80-90) ? 1 \n",
"3 Caucasian Male [60-70) ? 3 \n",
"4 ? Female [70-80) ? 1 \n",
"\n",
" discharge_disposition_id admission_source_id time_in_hospital payer_code \\\n",
"0 1 7 2 ? \n",
"1 1 1 8 SP \n",
"2 2 7 1 MC \n",
"3 1 6 6 MC \n",
"4 3 6 3 UN \n",
"\n",
" medical_specialty ... citoglipton insulin glyburide-metformin \\\n",
"0 InternalMedicine ... No Steady No \n",
"1 Pulmonology ... No Down No \n",
"2 Osteopath ... No Steady No \n",
"3 Radiologist ... No Steady No \n",
"4 InternalMedicine ... No No No \n",
"\n",
" glipizide-metformin glimepiride-pioglitazone metformin-rosiglitazone \\\n",
"0 No No No \n",
"1 No No No \n",
"2 No No No \n",
"3 No No No \n",
"4 No No No \n",
"\n",
" metformin-pioglitazone change diabetesMed readmitted \n",
"0 No No Yes 1 \n",
"1 No Ch Yes 1 \n",
"2 No No Yes 0 \n",
"3 No Ch Yes 0 \n",
"4 No No No 0 \n",
"\n",
"[5 rows x 48 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_data = train_data.drop('encounter_id',1)\n",
"train_data = train_data.drop('patient_nbr',1)\n",
"train_data.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Since most of the columns have categorical columns we have to convert it into integers. The most basic way is to do an Ordinal Mapping. Note: Here we have not replaced question marks with some other data and they are also accounted into ordinal mapping."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>race</th>\n",
" <th>gender</th>\n",
" <th>age</th>\n",
" <th>weight</th>\n",
" <th>admission_type_id</th>\n",
" <th>discharge_disposition_id</th>\n",
" <th>admission_source_id</th>\n",
" <th>time_in_hospital</th>\n",
" <th>payer_code</th>\n",
" <th>medical_specialty</th>\n",
" <th>...</th>\n",
" <th>citoglipton</th>\n",
" <th>insulin</th>\n",
" <th>glyburide-metformin</th>\n",
" <th>glipizide-metformin</th>\n",
" <th>glimepiride-pioglitazone</th>\n",
" <th>metformin-rosiglitazone</th>\n",
" <th>metformin-pioglitazone</th>\n",
" <th>change</th>\n",
" <th>diabetesMed</th>\n",
" <th>readmitted</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>19</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>15</td>\n",
" <td>51</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>30</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>52</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>16</td>\n",
" <td>19</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 48 columns</p>\n",
"</div>"
],
"text/plain": [
" race gender age weight admission_type_id discharge_disposition_id \\\n",
"0 1 0 7 1 1 1 \n",
"1 3 0 9 1 3 1 \n",
"2 3 0 8 1 1 2 \n",
"3 3 1 6 1 3 1 \n",
"4 0 0 7 1 1 3 \n",
"\n",
" admission_source_id time_in_hospital payer_code medical_specialty ... \\\n",
"0 7 2 0 19 ... \n",
"1 1 8 15 51 ... \n",
"2 7 1 8 30 ... \n",
"3 6 6 8 52 ... \n",
"4 6 3 16 19 ... \n",
"\n",
" citoglipton insulin glyburide-metformin glipizide-metformin \\\n",
"0 0 2 1 0 \n",
"1 0 0 1 0 \n",
"2 0 2 1 0 \n",
"3 0 2 1 0 \n",
"4 0 1 1 0 \n",
"\n",
" glimepiride-pioglitazone metformin-rosiglitazone metformin-pioglitazone \\\n",
"0 0 0 0 \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"\n",
" change diabetesMed readmitted \n",
"0 1 1 1 \n",
"1 0 1 1 \n",
"2 1 1 0 \n",
"3 0 1 0 \n",
"4 1 0 0 \n",
"\n",
"[5 rows x 48 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"labelencoder = LabelEncoder()\n",
"n_train_data = train_data\n",
"for col in train_data.columns:\n",
" s = train_data[col]\n",
" if s.dtype == 'O':\n",
" s = labelencoder.fit_transform(s)\n",
" n_train_data[col] = s\n",
"n_train_data.head()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Split Data into Train and Validation"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"X = n_train_data.drop('readmitted',1)\n",
"y = n_train_data['readmitted']\n",
"# Validation testing\n",
"X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Define the Classifier and Train"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/gera/anaconda3/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"/home/gera/anaconda3/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n"
]
},
{
"data": {
"text/plain": [
"LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
" intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
" multi_class='warn', n_jobs=None, penalty='l2',\n",
" random_state=None, solver='warn', tol=0.0001, verbose=0,\n",
" warm_start=False)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"classifier = LogisticRegression()\n",
"classifier.fit(X_train,y_train)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Predict on Validation"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"y_pred = classifier.predict(X_val)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Actual</th>\n",
" <th>Predicted</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>26342</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59142</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57537</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58128</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29821</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62897</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43572</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62329</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44309</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20882</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49075</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20668</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76856</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32858</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74292</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80549</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8588</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57768</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10658</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51569</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59914</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32874</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54656</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77456</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35300</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Actual Predicted\n",
"26342 1 0\n",
"59142 1 0\n",
"57537 1 0\n",
"58128 0 0\n",
"29821 1 0\n",
"62897 0 0\n",
"43572 0 0\n",
"62329 2 0\n",
"44309 0 0\n",
"20882 0 0\n",
"49075 0 0\n",
"20668 0 0\n",
"76856 1 0\n",
"32858 1 1\n",
"74292 1 0\n",
"80549 1 0\n",
"8588 1 0\n",
"57768 1 0\n",
"10658 1 0\n",
"51569 0 0\n",
"59914 1 0\n",
"32874 0 0\n",
"54656 1 0\n",
"77456 0 0\n",
"35300 0 0"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({'Actual': y_val, 'Predicted': y_pred})\n",
"df1 = df.head(25)\n",
"df1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Evaluate the Performance"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"F1 score Score: 0.5688110513843131\n"
]
}
],
"source": [
"print('F1 score Score:', metrics.f1_score(y_val, y_pred,average='micro')) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Test Set"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"test_data = pd.read_csv('../data/public/test.csv')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>race</th>\n",
" <th>gender</th>\n",
" <th>age</th>\n",
" <th>weight</th>\n",
" <th>admission_type_id</th>\n",
" <th>discharge_disposition_id</th>\n",
" <th>admission_source_id</th>\n",
" <th>time_in_hospital</th>\n",
" <th>payer_code</th>\n",
" <th>medical_specialty</th>\n",
" <th>...</th>\n",
" <th>examide</th>\n",
" <th>citoglipton</th>\n",
" <th>insulin</th>\n",
" <th>glyburide-metformin</th>\n",
" <th>glipizide-metformin</th>\n",
" <th>glimepiride-pioglitazone</th>\n",
" <th>metformin-rosiglitazone</th>\n",
" <th>metformin-pioglitazone</th>\n",
" <th>change</th>\n",
" <th>diabetesMed</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>11</td>\n",
" <td>15</td>\n",
" <td>16</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>12</td>\n",
" <td>4</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 47 columns</p>\n",
"</div>"
],
"text/plain": [
" race gender age weight admission_type_id discharge_disposition_id \\\n",
"0 3 0 7 1 1 1 \n",
"1 3 1 5 1 1 1 \n",
"2 3 0 6 1 3 6 \n",
"3 3 1 3 1 2 1 \n",
"4 1 0 6 1 1 2 \n",
"\n",
" admission_source_id time_in_hospital payer_code medical_specialty ... \\\n",
"0 6 11 15 16 ... \n",
"1 1 1 6 0 ... \n",
"2 1 4 6 0 ... \n",
"3 1 12 4 10 ... \n",
"4 7 1 0 0 ... \n",
"\n",
" examide citoglipton insulin glyburide-metformin glipizide-metformin \\\n",
"0 0 0 2 1 0 \n",
"1 0 0 1 1 0 \n",
"2 0 0 1 1 0 \n",
"3 0 0 1 1 0 \n",
"4 0 0 1 1 0 \n",
"\n",
" glimepiride-pioglitazone metformin-rosiglitazone metformin-pioglitazone \\\n",
"0 0 0 0 \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"\n",
" change diabetesMed \n",
"0 1 1 \n",
"1 1 1 \n",
"2 1 1 \n",
"3 1 1 \n",
"4 1 1 \n",
"\n",
"[5 rows x 47 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_data = test_data.drop('encounter_id',1)\n",
"test_data = test_data.drop('patient_nbr',1)\n",
"n_test_data = test_data\n",
"for col in test_data.columns:\n",
" s = test_data[col]\n",
" if s.dtype == 'O':\n",
" s = labelencoder.fit_transform(s)\n",
" n_test_data[col] = s\n",
"n_test_data.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Predict Test Set"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"y_test = classifier.predict(test_data)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame(y_test,columns=['readmitted'])\n",
"df.to_csv('../data/public/submission.csv',index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"To participate in the challenge click [here](https://www.aicrowd.com/challenges/dbsra-diabettes-readmission-prediction/)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment