Created
July 20, 2018 16:00
-
-
Save BabaCafe/a3149d8c09f438a5aba7d631d5a3e28e to your computer and use it in GitHub Desktop.
Mckinsey Hackathon (Healthcare Analytics)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"F:\\Anaconda\\lib\\site-packages\\sklearn\\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", | |
" \"This module will be removed in 0.20.\", DeprecationWarning)\n" | |
] | |
} | |
], | |
"source": [ | |
"import operator\n", | |
"import xgboost as xgb\n", | |
"from xgboost.sklearn import XGBClassifier\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import random as rnd\n", | |
"from sklearn import preprocessing\n", | |
"\n", | |
"\n", | |
"from sklearn.cross_validation import train_test_split , StratifiedKFold\n", | |
"from sklearn.feature_selection import RFECV\n", | |
"\n", | |
"import seaborn as sns\n", | |
"\n", | |
"\n", | |
"import matplotlib as mpl\n", | |
"import matplotlib.pyplot as plt\n", | |
"import matplotlib.pylab as pylab" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"label=preprocessing.LabelEncoder()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def describe_more( df ):\n", | |
" var = [] ; l = [] ; t = []\n", | |
" for x in df:\n", | |
" var.append( x )\n", | |
" l.append( len( pd.value_counts( df[ x ] ) ) )\n", | |
" t.append( df[ x ].dtypes )\n", | |
" #print(var,l,t)\n", | |
" levels = pd.DataFrame( { 'Variable' : var , 'Levels' : l , 'Datatype' : t } )\n", | |
" levels.sort_values( by = 'Levels' , inplace = True )\n", | |
" return levels" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"train=pd.read_csv('train.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>gender</th>\n", | |
" <th>age</th>\n", | |
" <th>hypertension</th>\n", | |
" <th>heart_disease</th>\n", | |
" <th>ever_married</th>\n", | |
" <th>work_type</th>\n", | |
" <th>Residence_type</th>\n", | |
" <th>avg_glucose_level</th>\n", | |
" <th>bmi</th>\n", | |
" <th>smoking_status</th>\n", | |
" <th>stroke</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>30669</td>\n", | |
" <td>Male</td>\n", | |
" <td>3.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>No</td>\n", | |
" <td>children</td>\n", | |
" <td>Rural</td>\n", | |
" <td>95.12</td>\n", | |
" <td>18.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>30468</td>\n", | |
" <td>Male</td>\n", | |
" <td>58.0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Private</td>\n", | |
" <td>Urban</td>\n", | |
" <td>87.96</td>\n", | |
" <td>39.2</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>16523</td>\n", | |
" <td>Female</td>\n", | |
" <td>8.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>No</td>\n", | |
" <td>Private</td>\n", | |
" <td>Urban</td>\n", | |
" <td>110.89</td>\n", | |
" <td>17.6</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>56543</td>\n", | |
" <td>Female</td>\n", | |
" <td>70.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Private</td>\n", | |
" <td>Rural</td>\n", | |
" <td>69.04</td>\n", | |
" <td>35.9</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>46136</td>\n", | |
" <td>Male</td>\n", | |
" <td>14.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>No</td>\n", | |
" <td>Never_worked</td>\n", | |
" <td>Rural</td>\n", | |
" <td>161.28</td>\n", | |
" <td>19.1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id gender age hypertension heart_disease ever_married \\\n", | |
"0 30669 Male 3.0 0 0 No \n", | |
"1 30468 Male 58.0 1 0 Yes \n", | |
"2 16523 Female 8.0 0 0 No \n", | |
"3 56543 Female 70.0 0 0 Yes \n", | |
"4 46136 Male 14.0 0 0 No \n", | |
"\n", | |
" work_type Residence_type avg_glucose_level bmi smoking_status \\\n", | |
"0 children Rural 95.12 18.0 NaN \n", | |
"1 Private Urban 87.96 39.2 never smoked \n", | |
"2 Private Urban 110.89 17.6 NaN \n", | |
"3 Private Rural 69.04 35.9 formerly smoked \n", | |
"4 Never_worked Rural 161.28 19.1 NaN \n", | |
"\n", | |
" stroke \n", | |
"0 0 \n", | |
"1 0 \n", | |
"2 0 \n", | |
"3 0 \n", | |
"4 0 " | |
] | |
}, | |
"execution_count": 34, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 278, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"id 0\n", | |
"gender 0\n", | |
"age 0\n", | |
"hypertension 0\n", | |
"heart_disease 0\n", | |
"ever_married 0\n", | |
"work_type 0\n", | |
"Residence_type 0\n", | |
"avg_glucose_level 0\n", | |
"bmi 1462\n", | |
"smoking_status 13292\n", | |
"stroke 0\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 278, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.isnull().sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"test=pd.read_csv('test.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 280, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"id 0\n", | |
"gender 0\n", | |
"age 0\n", | |
"hypertension 0\n", | |
"heart_disease 0\n", | |
"ever_married 0\n", | |
"work_type 0\n", | |
"Residence_type 0\n", | |
"avg_glucose_level 0\n", | |
"bmi 591\n", | |
"smoking_status 5751\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 280, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"test.isnull().sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 281, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"43400" | |
] | |
}, | |
"execution_count": 281, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 282, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"18601" | |
] | |
}, | |
"execution_count": 282, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"Total=train.append(test,ignore_index=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 344, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data['bmi_interval'].fillna(data['bmi_interval'].mode()[0],inplace=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 345, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data['smoking_status'].fillna(data['smoking_status'].mode()[0],inplace=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 321, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Residence_type</th>\n", | |
" <th>age</th>\n", | |
" <th>age_interval</th>\n", | |
" <th>avg_glucose_level</th>\n", | |
" <th>bmi</th>\n", | |
" <th>bmi_interval</th>\n", | |
" <th>ever_married</th>\n", | |
" <th>gender</th>\n", | |
" <th>glucose_level</th>\n", | |
" <th>heart_disease</th>\n", | |
" <th>hypertension</th>\n", | |
" <th>id</th>\n", | |
" <th>smoking_status</th>\n", | |
" <th>stroke</th>\n", | |
" <th>work_type</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Rural</td>\n", | |
" <td>3.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>95.12</td>\n", | |
" <td>18.0</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Male</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>30669</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>children</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Urban</td>\n", | |
" <td>58.0</td>\n", | |
" <td>(50.0, 64.0]</td>\n", | |
" <td>87.96</td>\n", | |
" <td>39.2</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Male</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>30468</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Urban</td>\n", | |
" <td>8.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>110.89</td>\n", | |
" <td>17.6</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>16523</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Rural</td>\n", | |
" <td>70.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>69.04</td>\n", | |
" <td>35.9</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>56543</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Rural</td>\n", | |
" <td>14.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>161.28</td>\n", | |
" <td>19.1</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Male</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>46136</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Never_worked</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>Urban</td>\n", | |
" <td>47.0</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>210.95</td>\n", | |
" <td>50.1</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>32257</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>Urban</td>\n", | |
" <td>52.0</td>\n", | |
" <td>(50.0, 64.0]</td>\n", | |
" <td>77.59</td>\n", | |
" <td>17.7</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>52800</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>Rural</td>\n", | |
" <td>75.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>243.53</td>\n", | |
" <td>27.0</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>41413</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Self-employed</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>Rural</td>\n", | |
" <td>32.0</td>\n", | |
" <td>(20.0, 36.0]</td>\n", | |
" <td>77.67</td>\n", | |
" <td>32.3</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>15266</td>\n", | |
" <td>smokes</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>Urban</td>\n", | |
" <td>74.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>205.84</td>\n", | |
" <td>54.6</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>28674</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Self-employed</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>Urban</td>\n", | |
" <td>79.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>77.08</td>\n", | |
" <td>35.0</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>10460</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Govt_job</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>Urban</td>\n", | |
" <td>79.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>57.08</td>\n", | |
" <td>22.0</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Male</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>64908</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>Rural</td>\n", | |
" <td>37.0</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>162.96</td>\n", | |
" <td>39.4</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>63884</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>Rural</td>\n", | |
" <td>37.0</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>73.50</td>\n", | |
" <td>26.1</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>37893</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>Rural</td>\n", | |
" <td>40.0</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>95.04</td>\n", | |
" <td>42.4</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>67855</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>Rural</td>\n", | |
" <td>35.0</td>\n", | |
" <td>(20.0, 36.0]</td>\n", | |
" <td>85.37</td>\n", | |
" <td>33.0</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>No</td>\n", | |
" <td>Male</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>25774</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>Urban</td>\n", | |
" <td>20.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>84.62</td>\n", | |
" <td>19.7</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>19584</td>\n", | |
" <td>smokes</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>Rural</td>\n", | |
" <td>42.0</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>82.67</td>\n", | |
" <td>22.5</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>24447</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>Urban</td>\n", | |
" <td>44.0</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>57.33</td>\n", | |
" <td>24.6</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>49589</td>\n", | |
" <td>smokes</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Govt_job</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>Urban</td>\n", | |
" <td>79.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>67.84</td>\n", | |
" <td>25.2</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>17986</td>\n", | |
" <td>smokes</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Self-employed</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>20</th>\n", | |
" <td>Rural</td>\n", | |
" <td>65.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>75.70</td>\n", | |
" <td>41.8</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>29217</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>21</th>\n", | |
" <td>Rural</td>\n", | |
" <td>57.0</td>\n", | |
" <td>(50.0, 64.0]</td>\n", | |
" <td>129.54</td>\n", | |
" <td>60.9</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>72911</td>\n", | |
" <td>smokes</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>22</th>\n", | |
" <td>Rural</td>\n", | |
" <td>49.0</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>60.22</td>\n", | |
" <td>31.5</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>47175</td>\n", | |
" <td>smokes</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>23</th>\n", | |
" <td>Urban</td>\n", | |
" <td>71.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>198.21</td>\n", | |
" <td>27.3</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Male</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4057</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>24</th>\n", | |
" <td>Urban</td>\n", | |
" <td>59.0</td>\n", | |
" <td>(50.0, 64.0]</td>\n", | |
" <td>109.82</td>\n", | |
" <td>23.7</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>48588</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25</th>\n", | |
" <td>Urban</td>\n", | |
" <td>25.0</td>\n", | |
" <td>(20.0, 36.0]</td>\n", | |
" <td>60.84</td>\n", | |
" <td>24.5</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>70336</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>26</th>\n", | |
" <td>Rural</td>\n", | |
" <td>67.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>94.61</td>\n", | |
" <td>28.4</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>66767</td>\n", | |
" <td>smokes</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Govt_job</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>Rural</td>\n", | |
" <td>38.0</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>97.49</td>\n", | |
" <td>26.9</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>45801</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>28</th>\n", | |
" <td>Rural</td>\n", | |
" <td>54.0</td>\n", | |
" <td>(50.0, 64.0]</td>\n", | |
" <td>206.72</td>\n", | |
" <td>26.7</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>36275</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>29</th>\n", | |
" <td>Rural</td>\n", | |
" <td>70.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>214.45</td>\n", | |
" <td>31.2</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>11577</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Self-employed</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18571</th>\n", | |
" <td>Rural</td>\n", | |
" <td>76.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>84.49</td>\n", | |
" <td>23.7</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Male</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>30015</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Self-employed</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18572</th>\n", | |
" <td>Urban</td>\n", | |
" <td>29.0</td>\n", | |
" <td>(20.0, 36.0]</td>\n", | |
" <td>85.78</td>\n", | |
" <td>22.1</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>24761</td>\n", | |
" <td>smokes</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18573</th>\n", | |
" <td>Rural</td>\n", | |
" <td>16.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>80.14</td>\n", | |
" <td>22.1</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>27094</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18574</th>\n", | |
" <td>Rural</td>\n", | |
" <td>2.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>69.56</td>\n", | |
" <td>18.9</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>43495</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>children</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18575</th>\n", | |
" <td>Urban</td>\n", | |
" <td>51.0</td>\n", | |
" <td>(50.0, 64.0]</td>\n", | |
" <td>97.49</td>\n", | |
" <td>26.2</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>56348</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18576</th>\n", | |
" <td>Urban</td>\n", | |
" <td>24.0</td>\n", | |
" <td>(20.0, 36.0]</td>\n", | |
" <td>72.11</td>\n", | |
" <td>23.5</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>11876</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18577</th>\n", | |
" <td>Rural</td>\n", | |
" <td>45.0</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>75.37</td>\n", | |
" <td>NaN</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Male</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>585</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18578</th>\n", | |
" <td>Urban</td>\n", | |
" <td>23.0</td>\n", | |
" <td>(20.0, 36.0]</td>\n", | |
" <td>84.13</td>\n", | |
" <td>29.6</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>No</td>\n", | |
" <td>Male</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>43879</td>\n", | |
" <td>smokes</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18579</th>\n", | |
" <td>Rural</td>\n", | |
" <td>24.0</td>\n", | |
" <td>(20.0, 36.0]</td>\n", | |
" <td>76.93</td>\n", | |
" <td>23.7</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>38239</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18580</th>\n", | |
" <td>Urban</td>\n", | |
" <td>2.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>93.32</td>\n", | |
" <td>21.8</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Male</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>28943</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>children</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18581</th>\n", | |
" <td>Rural</td>\n", | |
" <td>22.0</td>\n", | |
" <td>(20.0, 36.0]</td>\n", | |
" <td>81.87</td>\n", | |
" <td>27.9</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>17895</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18582</th>\n", | |
" <td>Rural</td>\n", | |
" <td>69.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>105.31</td>\n", | |
" <td>26.7</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Male</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>35147</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18583</th>\n", | |
" <td>Rural</td>\n", | |
" <td>39.0</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>76.09</td>\n", | |
" <td>32.7</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>72758</td>\n", | |
" <td>smokes</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18584</th>\n", | |
" <td>Urban</td>\n", | |
" <td>9.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>98.39</td>\n", | |
" <td>24.6</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>28661</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>children</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18585</th>\n", | |
" <td>Rural</td>\n", | |
" <td>21.0</td>\n", | |
" <td>(20.0, 36.0]</td>\n", | |
" <td>114.73</td>\n", | |
" <td>24.8</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>46853</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18586</th>\n", | |
" <td>Rural</td>\n", | |
" <td>25.0</td>\n", | |
" <td>(20.0, 36.0]</td>\n", | |
" <td>178.14</td>\n", | |
" <td>27.8</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>No</td>\n", | |
" <td>Male</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>69190</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18587</th>\n", | |
" <td>Urban</td>\n", | |
" <td>79.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>110.27</td>\n", | |
" <td>36.2</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>No</td>\n", | |
" <td>Male</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>26705</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18588</th>\n", | |
" <td>Urban</td>\n", | |
" <td>82.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>180.24</td>\n", | |
" <td>40.4</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>36976</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18589</th>\n", | |
" <td>Urban</td>\n", | |
" <td>24.0</td>\n", | |
" <td>(20.0, 36.0]</td>\n", | |
" <td>97.05</td>\n", | |
" <td>27.6</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Male</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>58578</td>\n", | |
" <td>smokes</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18590</th>\n", | |
" <td>Urban</td>\n", | |
" <td>52.0</td>\n", | |
" <td>(50.0, 64.0]</td>\n", | |
" <td>85.57</td>\n", | |
" <td>21.4</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>61508</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18591</th>\n", | |
" <td>Urban</td>\n", | |
" <td>13.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>62.33</td>\n", | |
" <td>27.0</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>70296</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18592</th>\n", | |
" <td>Rural</td>\n", | |
" <td>11.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>83.25</td>\n", | |
" <td>15.9</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Male</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>35299</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>children</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18593</th>\n", | |
" <td>Rural</td>\n", | |
" <td>80.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>102.85</td>\n", | |
" <td>25.3</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Male</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>15023</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18594</th>\n", | |
" <td>Urban</td>\n", | |
" <td>40.0</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>62.75</td>\n", | |
" <td>29.8</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Male</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>56291</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18595</th>\n", | |
" <td>Rural</td>\n", | |
" <td>5.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>126.32</td>\n", | |
" <td>17.0</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Male</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>53431</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>children</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18596</th>\n", | |
" <td>Rural</td>\n", | |
" <td>20.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>74.43</td>\n", | |
" <td>18.4</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Male</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>67353</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18597</th>\n", | |
" <td>Rural</td>\n", | |
" <td>61.0</td>\n", | |
" <td>(50.0, 64.0]</td>\n", | |
" <td>211.55</td>\n", | |
" <td>31.6</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Male</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>362</td>\n", | |
" <td>smokes</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Govt_job</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18598</th>\n", | |
" <td>Rural</td>\n", | |
" <td>79.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>125.74</td>\n", | |
" <td>29.4</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>29839</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18599</th>\n", | |
" <td>Rural</td>\n", | |
" <td>55.0</td>\n", | |
" <td>(50.0, 64.0]</td>\n", | |
" <td>69.46</td>\n", | |
" <td>33.8</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Male</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>6438</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Govt_job</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18600</th>\n", | |
" <td>Rural</td>\n", | |
" <td>38.0</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>91.23</td>\n", | |
" <td>24.4</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>16770</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>62001 rows × 15 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Residence_type age age_interval avg_glucose_level bmi \\\n", | |
"0 Rural 3.0 (0.079, 20.0] 95.12 18.0 \n", | |
"1 Urban 58.0 (50.0, 64.0] 87.96 39.2 \n", | |
"2 Urban 8.0 (0.079, 20.0] 110.89 17.6 \n", | |
"3 Rural 70.0 (64.0, 82.0] 69.04 35.9 \n", | |
"4 Rural 14.0 (0.079, 20.0] 161.28 19.1 \n", | |
"5 Urban 47.0 (36.0, 50.0] 210.95 50.1 \n", | |
"6 Urban 52.0 (50.0, 64.0] 77.59 17.7 \n", | |
"7 Rural 75.0 (64.0, 82.0] 243.53 27.0 \n", | |
"8 Rural 32.0 (20.0, 36.0] 77.67 32.3 \n", | |
"9 Urban 74.0 (64.0, 82.0] 205.84 54.6 \n", | |
"10 Urban 79.0 (64.0, 82.0] 77.08 35.0 \n", | |
"11 Urban 79.0 (64.0, 82.0] 57.08 22.0 \n", | |
"12 Rural 37.0 (36.0, 50.0] 162.96 39.4 \n", | |
"13 Rural 37.0 (36.0, 50.0] 73.50 26.1 \n", | |
"14 Rural 40.0 (36.0, 50.0] 95.04 42.4 \n", | |
"15 Rural 35.0 (20.0, 36.0] 85.37 33.0 \n", | |
"16 Urban 20.0 (0.079, 20.0] 84.62 19.7 \n", | |
"17 Rural 42.0 (36.0, 50.0] 82.67 22.5 \n", | |
"18 Urban 44.0 (36.0, 50.0] 57.33 24.6 \n", | |
"19 Urban 79.0 (64.0, 82.0] 67.84 25.2 \n", | |
"20 Rural 65.0 (64.0, 82.0] 75.70 41.8 \n", | |
"21 Rural 57.0 (50.0, 64.0] 129.54 60.9 \n", | |
"22 Rural 49.0 (36.0, 50.0] 60.22 31.5 \n", | |
"23 Urban 71.0 (64.0, 82.0] 198.21 27.3 \n", | |
"24 Urban 59.0 (50.0, 64.0] 109.82 23.7 \n", | |
"25 Urban 25.0 (20.0, 36.0] 60.84 24.5 \n", | |
"26 Rural 67.0 (64.0, 82.0] 94.61 28.4 \n", | |
"27 Rural 38.0 (36.0, 50.0] 97.49 26.9 \n", | |
"28 Rural 54.0 (50.0, 64.0] 206.72 26.7 \n", | |
"29 Rural 70.0 (64.0, 82.0] 214.45 31.2 \n", | |
"... ... ... ... ... ... \n", | |
"18571 Rural 76.0 (64.0, 82.0] 84.49 23.7 \n", | |
"18572 Urban 29.0 (20.0, 36.0] 85.78 22.1 \n", | |
"18573 Rural 16.0 (0.079, 20.0] 80.14 22.1 \n", | |
"18574 Rural 2.0 (0.079, 20.0] 69.56 18.9 \n", | |
"18575 Urban 51.0 (50.0, 64.0] 97.49 26.2 \n", | |
"18576 Urban 24.0 (20.0, 36.0] 72.11 23.5 \n", | |
"18577 Rural 45.0 (36.0, 50.0] 75.37 NaN \n", | |
"18578 Urban 23.0 (20.0, 36.0] 84.13 29.6 \n", | |
"18579 Rural 24.0 (20.0, 36.0] 76.93 23.7 \n", | |
"18580 Urban 2.0 (0.079, 20.0] 93.32 21.8 \n", | |
"18581 Rural 22.0 (20.0, 36.0] 81.87 27.9 \n", | |
"18582 Rural 69.0 (64.0, 82.0] 105.31 26.7 \n", | |
"18583 Rural 39.0 (36.0, 50.0] 76.09 32.7 \n", | |
"18584 Urban 9.0 (0.079, 20.0] 98.39 24.6 \n", | |
"18585 Rural 21.0 (20.0, 36.0] 114.73 24.8 \n", | |
"18586 Rural 25.0 (20.0, 36.0] 178.14 27.8 \n", | |
"18587 Urban 79.0 (64.0, 82.0] 110.27 36.2 \n", | |
"18588 Urban 82.0 (64.0, 82.0] 180.24 40.4 \n", | |
"18589 Urban 24.0 (20.0, 36.0] 97.05 27.6 \n", | |
"18590 Urban 52.0 (50.0, 64.0] 85.57 21.4 \n", | |
"18591 Urban 13.0 (0.079, 20.0] 62.33 27.0 \n", | |
"18592 Rural 11.0 (0.079, 20.0] 83.25 15.9 \n", | |
"18593 Rural 80.0 (64.0, 82.0] 102.85 25.3 \n", | |
"18594 Urban 40.0 (36.0, 50.0] 62.75 29.8 \n", | |
"18595 Rural 5.0 (0.079, 20.0] 126.32 17.0 \n", | |
"18596 Rural 20.0 (0.079, 20.0] 74.43 18.4 \n", | |
"18597 Rural 61.0 (50.0, 64.0] 211.55 31.6 \n", | |
"18598 Rural 79.0 (64.0, 82.0] 125.74 29.4 \n", | |
"18599 Rural 55.0 (50.0, 64.0] 69.46 33.8 \n", | |
"18600 Rural 38.0 (36.0, 50.0] 91.23 24.4 \n", | |
"\n", | |
" bmi_interval ever_married gender glucose_level heart_disease \\\n", | |
"0 (10.099, 27.7] No Male (82.16, 103.3] 0 \n", | |
"1 (27.7, 97.6] Yes Male (82.16, 103.3] 0 \n", | |
"2 (10.099, 27.7] No Female (103.3, 291.05] 0 \n", | |
"3 (27.7, 97.6] Yes Female (54.999, 82.16] 0 \n", | |
"4 (10.099, 27.7] No Male (103.3, 291.05] 0 \n", | |
"5 (27.7, 97.6] Yes Female (103.3, 291.05] 0 \n", | |
"6 (10.099, 27.7] Yes Female (54.999, 82.16] 0 \n", | |
"7 (10.099, 27.7] Yes Female (103.3, 291.05] 1 \n", | |
"8 (27.7, 97.6] Yes Female (54.999, 82.16] 0 \n", | |
"9 (27.7, 97.6] Yes Female (103.3, 291.05] 0 \n", | |
"10 (27.7, 97.6] Yes Female (54.999, 82.16] 0 \n", | |
"11 (10.099, 27.7] Yes Male (54.999, 82.16] 1 \n", | |
"12 (27.7, 97.6] Yes Female (103.3, 291.05] 0 \n", | |
"13 (10.099, 27.7] Yes Female (54.999, 82.16] 0 \n", | |
"14 (27.7, 97.6] Yes Female (82.16, 103.3] 0 \n", | |
"15 (27.7, 97.6] No Male (82.16, 103.3] 0 \n", | |
"16 (10.099, 27.7] No Female (82.16, 103.3] 0 \n", | |
"17 (10.099, 27.7] Yes Female (82.16, 103.3] 0 \n", | |
"18 (10.099, 27.7] Yes Female (54.999, 82.16] 0 \n", | |
"19 (10.099, 27.7] Yes Female (54.999, 82.16] 1 \n", | |
"20 (27.7, 97.6] Yes Female (54.999, 82.16] 0 \n", | |
"21 (27.7, 97.6] Yes Female (103.3, 291.05] 0 \n", | |
"22 (27.7, 97.6] Yes Female (54.999, 82.16] 0 \n", | |
"23 (10.099, 27.7] Yes Male (103.3, 291.05] 0 \n", | |
"24 (10.099, 27.7] Yes Female (103.3, 291.05] 0 \n", | |
"25 (10.099, 27.7] Yes Female (54.999, 82.16] 0 \n", | |
"26 (27.7, 97.6] Yes Female (82.16, 103.3] 0 \n", | |
"27 (10.099, 27.7] No Female (82.16, 103.3] 0 \n", | |
"28 (10.099, 27.7] Yes Female (103.3, 291.05] 0 \n", | |
"29 (27.7, 97.6] Yes Female (103.3, 291.05] 0 \n", | |
"... ... ... ... ... ... \n", | |
"18571 (10.099, 27.7] Yes Male (82.16, 103.3] 0 \n", | |
"18572 (10.099, 27.7] No Female (82.16, 103.3] 0 \n", | |
"18573 (10.099, 27.7] No Female (54.999, 82.16] 0 \n", | |
"18574 (10.099, 27.7] No Female (54.999, 82.16] 0 \n", | |
"18575 (10.099, 27.7] Yes Female (82.16, 103.3] 0 \n", | |
"18576 (10.099, 27.7] No Female (54.999, 82.16] 0 \n", | |
"18577 (10.099, 27.7] Yes Male (54.999, 82.16] 0 \n", | |
"18578 (27.7, 97.6] No Male (82.16, 103.3] 0 \n", | |
"18579 (10.099, 27.7] No Female (54.999, 82.16] 0 \n", | |
"18580 (10.099, 27.7] No Male (82.16, 103.3] 0 \n", | |
"18581 (27.7, 97.6] No Female (54.999, 82.16] 0 \n", | |
"18582 (10.099, 27.7] Yes Male (103.3, 291.05] 1 \n", | |
"18583 (27.7, 97.6] No Female (54.999, 82.16] 0 \n", | |
"18584 (10.099, 27.7] No Female (82.16, 103.3] 0 \n", | |
"18585 (10.099, 27.7] No Female (103.3, 291.05] 0 \n", | |
"18586 (27.7, 97.6] No Male (103.3, 291.05] 0 \n", | |
"18587 (27.7, 97.6] No Male (103.3, 291.05] 1 \n", | |
"18588 (27.7, 97.6] Yes Female (103.3, 291.05] 0 \n", | |
"18589 (10.099, 27.7] No Male (82.16, 103.3] 0 \n", | |
"18590 (10.099, 27.7] Yes Female (82.16, 103.3] 0 \n", | |
"18591 (10.099, 27.7] No Female (54.999, 82.16] 0 \n", | |
"18592 (10.099, 27.7] No Male (82.16, 103.3] 0 \n", | |
"18593 (10.099, 27.7] Yes Male (82.16, 103.3] 0 \n", | |
"18594 (27.7, 97.6] Yes Male (54.999, 82.16] 0 \n", | |
"18595 (10.099, 27.7] No Male (103.3, 291.05] 0 \n", | |
"18596 (10.099, 27.7] No Male (54.999, 82.16] 0 \n", | |
"18597 (27.7, 97.6] Yes Male (103.3, 291.05] 0 \n", | |
"18598 (27.7, 97.6] Yes Female (103.3, 291.05] 0 \n", | |
"18599 (27.7, 97.6] Yes Male (54.999, 82.16] 0 \n", | |
"18600 (10.099, 27.7] No Female (82.16, 103.3] 0 \n", | |
"\n", | |
" hypertension id smoking_status stroke work_type \n", | |
"0 0 30669 never smoked 0.0 children \n", | |
"1 1 30468 never smoked 0.0 Private \n", | |
"2 0 16523 never smoked 0.0 Private \n", | |
"3 0 56543 formerly smoked 0.0 Private \n", | |
"4 0 46136 never smoked 0.0 Never_worked \n", | |
"5 0 32257 never smoked 0.0 Private \n", | |
"6 0 52800 formerly smoked 0.0 Private \n", | |
"7 0 41413 never smoked 0.0 Self-employed \n", | |
"8 0 15266 smokes 0.0 Private \n", | |
"9 1 28674 never smoked 0.0 Self-employed \n", | |
"10 0 10460 never smoked 0.0 Govt_job \n", | |
"11 0 64908 formerly smoked 0.0 Private \n", | |
"12 0 63884 never smoked 0.0 Private \n", | |
"13 0 37893 formerly smoked 0.0 Private \n", | |
"14 0 67855 never smoked 0.0 Private \n", | |
"15 0 25774 never smoked 0.0 Private \n", | |
"16 0 19584 smokes 0.0 Private \n", | |
"17 0 24447 never smoked 0.0 Private \n", | |
"18 0 49589 smokes 0.0 Govt_job \n", | |
"19 0 17986 smokes 0.0 Self-employed \n", | |
"20 1 29217 never smoked 0.0 Private \n", | |
"21 1 72911 smokes 0.0 Private \n", | |
"22 0 47175 smokes 0.0 Private \n", | |
"23 0 4057 formerly smoked 0.0 Private \n", | |
"24 0 48588 never smoked 0.0 Private \n", | |
"25 0 70336 never smoked 0.0 Private \n", | |
"26 0 66767 smokes 0.0 Govt_job \n", | |
"27 0 45801 never smoked 0.0 Private \n", | |
"28 0 36275 never smoked 0.0 Private \n", | |
"29 0 11577 never smoked 0.0 Self-employed \n", | |
"... ... ... ... ... ... \n", | |
"18571 0 30015 never smoked NaN Self-employed \n", | |
"18572 0 24761 smokes NaN Private \n", | |
"18573 0 27094 never smoked NaN Private \n", | |
"18574 0 43495 never smoked NaN children \n", | |
"18575 0 56348 never smoked NaN Private \n", | |
"18576 0 11876 formerly smoked NaN Private \n", | |
"18577 0 585 never smoked NaN Private \n", | |
"18578 0 43879 smokes NaN Private \n", | |
"18579 0 38239 never smoked NaN Private \n", | |
"18580 0 28943 never smoked NaN children \n", | |
"18581 0 17895 never smoked NaN Private \n", | |
"18582 0 35147 never smoked NaN Private \n", | |
"18583 0 72758 smokes NaN Private \n", | |
"18584 0 28661 never smoked NaN children \n", | |
"18585 0 46853 never smoked NaN Private \n", | |
"18586 0 69190 never smoked NaN Private \n", | |
"18587 0 26705 never smoked NaN Private \n", | |
"18588 1 36976 formerly smoked NaN Private \n", | |
"18589 0 58578 smokes NaN Private \n", | |
"18590 0 61508 never smoked NaN Private \n", | |
"18591 0 70296 never smoked NaN Private \n", | |
"18592 0 35299 never smoked NaN children \n", | |
"18593 0 15023 formerly smoked NaN Private \n", | |
"18594 1 56291 formerly smoked NaN Private \n", | |
"18595 0 53431 never smoked NaN children \n", | |
"18596 0 67353 formerly smoked NaN Private \n", | |
"18597 0 362 smokes NaN Govt_job \n", | |
"18598 0 29839 never smoked NaN Private \n", | |
"18599 0 6438 never smoked NaN Govt_job \n", | |
"18600 0 16770 never smoked NaN Private \n", | |
"\n", | |
"[62001 rows x 15 columns]" | |
] | |
}, | |
"execution_count": 321, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def order(data,output,on,by):\n", | |
" D=data[data[output]==on].groupby([output,by])[by].agg({'Frequency':'count'}).sort_values(by='Frequency',ascending=False)\n", | |
" return D" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def order1(data,output,by):\n", | |
" D=data.groupby([output,by],as_index=False)[by].agg({'Frequency':'count'})#.sort_values(by='Frequency',ascending=False)\n", | |
" return D" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def order2(data,output,bye):\n", | |
" D=data[[output,bye]].groupby([output],as_index=False).mean().sort_values(by=bye,ascending=False)\n", | |
" return D" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 343, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data=train.append(test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#data.drop('stroke',inplace=True, axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 300, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"62001" | |
] | |
}, | |
"execution_count": 300, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 328, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"F:\\Anaconda\\lib\\site-packages\\ipykernel\\__main__.py:2: FutureWarning: using a dict on a Series for aggregation\n", | |
"is deprecated and will be removed in a future version\n", | |
" from ipykernel import kernelapp as app\n" | |
] | |
} | |
], | |
"source": [ | |
"G_s=order(train,'stroke',1,'gender')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 329, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>stroke</th>\n", | |
" <th>gender</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">1</th>\n", | |
" <th>Female</th>\n", | |
" <td>431</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Male</th>\n", | |
" <td>352</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Frequency\n", | |
"stroke gender \n", | |
"1 Female 431\n", | |
" Male 352" | |
] | |
}, | |
"execution_count": 329, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"G_s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 330, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"G_s1=order1(train,'stroke','gender')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 331, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stroke</th>\n", | |
" <th>gender</th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>Female</td>\n", | |
" <td>25234</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>Male</td>\n", | |
" <td>17372</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>Other</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>Female</td>\n", | |
" <td>431</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>Male</td>\n", | |
" <td>352</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stroke gender Frequency\n", | |
"0 0 Female 25234\n", | |
"1 0 Male 17372\n", | |
"2 0 Other 11\n", | |
"3 1 Female 431\n", | |
"4 1 Male 352" | |
] | |
}, | |
"execution_count": 331, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"G_s1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['gender']=='Female','gender']=0.017" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['gender']=='Male','gender']=0.02" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['gender']=='Other','gender']=0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train['age_interval'],bins=pd.qcut(train['age'],5,retbins=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"test['age_interval']=pd.cut(test['age'],bins=bins,include_lowest=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train['glucose_level'],bins=pd.qcut(train['avg_glucose_level'],3,retbins=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"test['glucose_level']=pd.cut(test['avg_glucose_level'],bins=bins,include_lowest=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train['bmi_interval'],bins=pd.qcut(train['bmi'],2,retbins=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"test['bmi_interval']=pd.cut(test['bmi'],bins=bins,include_lowest=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 338, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"F:\\Anaconda\\lib\\site-packages\\ipykernel\\__main__.py:2: FutureWarning: using a dict on a Series for aggregation\n", | |
"is deprecated and will be removed in a future version\n", | |
" from ipykernel import kernelapp as app\n" | |
] | |
} | |
], | |
"source": [ | |
"B_s=order(train,'stroke',1,'bmi_interval')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 339, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"B_s1=order1(train,'stroke','bmi_interval')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 340, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stroke</th>\n", | |
" <th>bmi_interval</th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>20708</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>20587</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>(10.099, 27.7]</td>\n", | |
" <td>270</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>(27.7, 97.6]</td>\n", | |
" <td>373</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stroke bmi_interval Frequency\n", | |
"0 0 (10.099, 27.7] 20708\n", | |
"1 0 (27.7, 97.6] 20587\n", | |
"2 1 (10.099, 27.7] 270\n", | |
"3 1 (27.7, 97.6] 373" | |
] | |
}, | |
"execution_count": 340, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"B_s1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def encod(col): \n", | |
" label.fit(col)\n", | |
" col=label.transform(col)\n", | |
" return col" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def wts(dist):\n", | |
" n=len(dist)-1\n", | |
" m=(n+1)/2-1\n", | |
" wt=[]\n", | |
" while m>0 or m==0:\n", | |
" wt.append(dist.loc[n,'Frequency']/dist.loc[m,'Frequency'])\n", | |
" m=m-1\n", | |
" n=n-1\n", | |
" return wt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 304, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[0.018118229950939913, 0.013038439250531195]" | |
] | |
}, | |
"execution_count": 304, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wts(B_s1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 352, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data['bmi_interval']=encod(data['bmi_interval'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 354, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['bmi_interval']==1,'bmi_interval']=0.013" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 355, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['bmi_interval']==0,'bmi_interval']=0.018" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 182, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"F:\\Anaconda\\lib\\site-packages\\ipykernel\\__main__.py:2: FutureWarning: using a dict on a Series for aggregation\n", | |
"is deprecated and will be removed in a future version\n", | |
" from ipykernel import kernelapp as app\n" | |
] | |
} | |
], | |
"source": [ | |
"Gl_s=order(train,'stroke',1,'glucose_level')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 183, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>stroke</th>\n", | |
" <th>glucose_level</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"3\" valign=\"top\">1</th>\n", | |
" <th>(103.3, 291.05]</th>\n", | |
" <td>396</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>(54.999, 82.16]</th>\n", | |
" <td>211</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>(82.16, 103.3]</th>\n", | |
" <td>176</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Frequency\n", | |
"stroke glucose_level \n", | |
"1 (103.3, 291.05] 396\n", | |
" (54.999, 82.16] 211\n", | |
" (82.16, 103.3] 176" | |
] | |
}, | |
"execution_count": 183, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Gl_s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 357, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"Gl_s1=order1(train,'stroke','glucose_level')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 358, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stroke</th>\n", | |
" <th>glucose_level</th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>14260</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>14287</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>14070</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>(54.999, 82.16]</td>\n", | |
" <td>211</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>(82.16, 103.3]</td>\n", | |
" <td>176</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>1</td>\n", | |
" <td>(103.3, 291.05]</td>\n", | |
" <td>396</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stroke glucose_level Frequency\n", | |
"0 0 (54.999, 82.16] 14260\n", | |
"1 0 (82.16, 103.3] 14287\n", | |
"2 0 (103.3, 291.05] 14070\n", | |
"3 1 (54.999, 82.16] 211\n", | |
"4 1 (82.16, 103.3] 176\n", | |
"5 1 (103.3, 291.05] 396" | |
] | |
}, | |
"execution_count": 358, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Gl_s1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 359, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[0.02814498933901919, 0.01231889129978302, 0.014796633941093968]" | |
] | |
}, | |
"execution_count": 359, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wts(Gl_s1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 360, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data['glucose_level']=encod(data['glucose_level'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 361, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Residence_type</th>\n", | |
" <th>age</th>\n", | |
" <th>age_interval</th>\n", | |
" <th>avg_glucose_level</th>\n", | |
" <th>bmi</th>\n", | |
" <th>bmi_interval</th>\n", | |
" <th>ever_married</th>\n", | |
" <th>gender</th>\n", | |
" <th>glucose_level</th>\n", | |
" <th>heart_disease</th>\n", | |
" <th>hypertension</th>\n", | |
" <th>id</th>\n", | |
" <th>smoking_status</th>\n", | |
" <th>stroke</th>\n", | |
" <th>work_type</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Rural</td>\n", | |
" <td>3.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>95.12</td>\n", | |
" <td>18.0</td>\n", | |
" <td>0.018</td>\n", | |
" <td>No</td>\n", | |
" <td>Male</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>30669</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>children</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Urban</td>\n", | |
" <td>58.0</td>\n", | |
" <td>(50.0, 64.0]</td>\n", | |
" <td>87.96</td>\n", | |
" <td>39.2</td>\n", | |
" <td>0.013</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Male</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>30468</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Urban</td>\n", | |
" <td>8.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>110.89</td>\n", | |
" <td>17.6</td>\n", | |
" <td>0.018</td>\n", | |
" <td>No</td>\n", | |
" <td>Female</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>16523</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Rural</td>\n", | |
" <td>70.0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>69.04</td>\n", | |
" <td>35.9</td>\n", | |
" <td>0.013</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Female</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>56543</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Private</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Rural</td>\n", | |
" <td>14.0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>161.28</td>\n", | |
" <td>19.1</td>\n", | |
" <td>0.018</td>\n", | |
" <td>No</td>\n", | |
" <td>Male</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>46136</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Never_worked</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Residence_type age age_interval avg_glucose_level bmi bmi_interval \\\n", | |
"0 Rural 3.0 (0.079, 20.0] 95.12 18.0 0.018 \n", | |
"1 Urban 58.0 (50.0, 64.0] 87.96 39.2 0.013 \n", | |
"2 Urban 8.0 (0.079, 20.0] 110.89 17.6 0.018 \n", | |
"3 Rural 70.0 (64.0, 82.0] 69.04 35.9 0.013 \n", | |
"4 Rural 14.0 (0.079, 20.0] 161.28 19.1 0.018 \n", | |
"\n", | |
" ever_married gender glucose_level heart_disease hypertension id \\\n", | |
"0 No Male 1 0 0 30669 \n", | |
"1 Yes Male 1 0 1 30468 \n", | |
"2 No Female 2 0 0 16523 \n", | |
"3 Yes Female 0 0 0 56543 \n", | |
"4 No Male 2 0 0 46136 \n", | |
"\n", | |
" smoking_status stroke work_type \n", | |
"0 never smoked 0.0 children \n", | |
"1 never smoked 0.0 Private \n", | |
"2 never smoked 0.0 Private \n", | |
"3 formerly smoked 0.0 Private \n", | |
"4 never smoked 0.0 Never_worked " | |
] | |
}, | |
"execution_count": 361, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 362, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['glucose_level']==2,'glucose_level']=0.028" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 363, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['glucose_level']==1,'glucose_level']=0.0123" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 364, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['glucose_level']==0,'glucose_level']=0.0147" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"F:\\Anaconda\\lib\\site-packages\\ipykernel\\__main__.py:2: FutureWarning: using a dict on a Series for aggregation\n", | |
"is deprecated and will be removed in a future version\n", | |
" from ipykernel import kernelapp as app\n" | |
] | |
} | |
], | |
"source": [ | |
"A_s=order(train,'stroke',1,'age_interval')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>stroke</th>\n", | |
" <th>age_interval</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">1</th>\n", | |
" <th>(64.0, 82.0]</th>\n", | |
" <td>528</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>(50.0, 64.0]</th>\n", | |
" <td>180</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>(36.0, 50.0]</th>\n", | |
" <td>63</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>(20.0, 36.0]</th>\n", | |
" <td>10</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>(0.079, 20.0]</th>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Frequency\n", | |
"stroke age_interval \n", | |
"1 (64.0, 82.0] 528\n", | |
" (50.0, 64.0] 180\n", | |
" (36.0, 50.0] 63\n", | |
" (20.0, 36.0] 10\n", | |
" (0.079, 20.0] 2" | |
] | |
}, | |
"execution_count": 45, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"A_s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"A_s1=order1(train,'stroke','age_interval')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stroke</th>\n", | |
" <th>age_interval</th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>9013</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>(20.0, 36.0]</td>\n", | |
" <td>8416</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>8694</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>(50.0, 64.0]</td>\n", | |
" <td>8863</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>7631</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>1</td>\n", | |
" <td>(0.079, 20.0]</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>1</td>\n", | |
" <td>(20.0, 36.0]</td>\n", | |
" <td>10</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>1</td>\n", | |
" <td>(36.0, 50.0]</td>\n", | |
" <td>63</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>1</td>\n", | |
" <td>(50.0, 64.0]</td>\n", | |
" <td>180</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>1</td>\n", | |
" <td>(64.0, 82.0]</td>\n", | |
" <td>528</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stroke age_interval Frequency\n", | |
"0 0 (0.079, 20.0] 9013\n", | |
"1 0 (20.0, 36.0] 8416\n", | |
"2 0 (36.0, 50.0] 8694\n", | |
"3 0 (50.0, 64.0] 8863\n", | |
"4 0 (64.0, 82.0] 7631\n", | |
"5 1 (0.079, 20.0] 2\n", | |
"6 1 (20.0, 36.0] 10\n", | |
"7 1 (36.0, 50.0] 63\n", | |
"8 1 (50.0, 64.0] 180\n", | |
"9 1 (64.0, 82.0] 528" | |
] | |
}, | |
"execution_count": 47, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"A_s1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[0.069191455903551302,\n", | |
" 0.020309150400541577,\n", | |
" 0.007246376811594203,\n", | |
" 0.001188212927756654,\n", | |
" 0.00022190169754798624]" | |
] | |
}, | |
"execution_count": 48, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wts(A_s1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 97, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"Data=train.append(test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 98, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data['age_interval']=encod(Data['age_interval'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 366, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data['age_interval']=encod(data['age_interval'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['age_interval']==0.069,'age_interval']=4 #0.069" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 86, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['age_interval']==0.020,'age_interval']=3 #0.020" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 91, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['age_interval']==0.007,'age_interval']=2 #0.007" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 95, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Series([], Name: age_interval, dtype: float64)" | |
] | |
}, | |
"execution_count": 95, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data.loc[data['age_interval']==0.007,'age_interval']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 88, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['age_interval']==0.0011,'age_interval']=1 #0.0011" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 89, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['age_interval']==0.0002,'age_interval']=0 #0.0002" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"F:\\Anaconda\\lib\\site-packages\\ipykernel\\__main__.py:2: FutureWarning: using a dict on a Series for aggregation\n", | |
"is deprecated and will be removed in a future version\n", | |
" from ipykernel import kernelapp as app\n" | |
] | |
} | |
], | |
"source": [ | |
"H_s=order(train,'stroke',1,'hypertension')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>stroke</th>\n", | |
" <th>hypertension</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">1</th>\n", | |
" <th>0</th>\n", | |
" <td>583</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>200</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Frequency\n", | |
"stroke hypertension \n", | |
"1 0 583\n", | |
" 1 200" | |
] | |
}, | |
"execution_count": 53, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"H_s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"H_s1=order1(train,'stroke','hypertension')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stroke</th>\n", | |
" <th>hypertension</th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>38756</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>3861</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>583</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>200</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stroke hypertension Frequency\n", | |
"0 0 0 38756\n", | |
"1 0 1 3861\n", | |
"2 1 0 583\n", | |
"3 1 1 200" | |
] | |
}, | |
"execution_count": 55, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"H_s1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 368, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[0.051800051800051802, 0.015042832077613789]" | |
] | |
}, | |
"execution_count": 368, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wts(H_s1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 369, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['hypertension']==1,'hypertension']=0.051" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 370, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['hypertension']==0,'hypertension']=0.015" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"F:\\Anaconda\\lib\\site-packages\\ipykernel\\__main__.py:2: FutureWarning: using a dict on a Series for aggregation\n", | |
"is deprecated and will be removed in a future version\n", | |
" from ipykernel import kernelapp as app\n" | |
] | |
} | |
], | |
"source": [ | |
"Hd_s=order(train,'stroke',1,'heart_disease')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>stroke</th>\n", | |
" <th>heart_disease</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">1</th>\n", | |
" <th>0</th>\n", | |
" <td>606</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>177</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Frequency\n", | |
"stroke heart_disease \n", | |
"1 0 606\n", | |
" 1 177" | |
] | |
}, | |
"execution_count": 59, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Hd_s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"Hd_s1=order1(train,'stroke','heart_disease')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"Hd_s1=Hd_s1.astype(int)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 138, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stroke</th>\n", | |
" <th>heart_disease</th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>40732</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1885</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>606</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>177</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stroke heart_disease Frequency\n", | |
"0 0 0 40732\n", | |
"1 0 1 1885\n", | |
"2 1 0 606\n", | |
"3 1 1 177" | |
] | |
}, | |
"execution_count": 138, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Hd_s1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 371, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[0.093899204244031836, 0.014877737405479721]" | |
] | |
}, | |
"execution_count": 371, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wts(Hd_s1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 372, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['heart_disease']==1,'heart_disease']=0.094" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 373, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['heart_disease']==0,'heart_disease']=0.014" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 144, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"F:\\Anaconda\\lib\\site-packages\\ipykernel\\__main__.py:2: FutureWarning: using a dict on a Series for aggregation\n", | |
"is deprecated and will be removed in a future version\n", | |
" from ipykernel import kernelapp as app\n" | |
] | |
} | |
], | |
"source": [ | |
"W_s=order(train,'stroke',1,'work_type')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 145, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>stroke</th>\n", | |
" <th>work_type</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"4\" valign=\"top\">1</th>\n", | |
" <th>Private</th>\n", | |
" <td>441</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Self-employed</th>\n", | |
" <td>251</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Govt_job</th>\n", | |
" <td>89</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>children</th>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Frequency\n", | |
"stroke work_type \n", | |
"1 Private 441\n", | |
" Self-employed 251\n", | |
" Govt_job 89\n", | |
" children 2" | |
] | |
}, | |
"execution_count": 145, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"W_s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 146, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"W_s1=order1(train,'stroke','work_type')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 147, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stroke</th>\n", | |
" <th>work_type</th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>Govt_job</td>\n", | |
" <td>5351</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>Never_worked</td>\n", | |
" <td>177</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>Private</td>\n", | |
" <td>24393</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>Self-employed</td>\n", | |
" <td>6542</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>children</td>\n", | |
" <td>6154</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>1</td>\n", | |
" <td>Govt_job</td>\n", | |
" <td>89</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>1</td>\n", | |
" <td>Private</td>\n", | |
" <td>441</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>1</td>\n", | |
" <td>Self-employed</td>\n", | |
" <td>251</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>1</td>\n", | |
" <td>children</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stroke work_type Frequency\n", | |
"0 0 Govt_job 5351\n", | |
"1 0 Never_worked 177\n", | |
"2 0 Private 24393\n", | |
"3 0 Self-employed 6542\n", | |
"4 0 children 6154\n", | |
"5 1 Govt_job 89\n", | |
"6 1 Private 441\n", | |
"7 1 Self-employed 251\n", | |
"8 1 children 2" | |
] | |
}, | |
"execution_count": 147, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"W_s1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 376, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['work_type']=='Govt_job','work_type']=0.016" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['work_type']=='Self-employed','work_type']=0.038" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 404, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['work_type']=='Private','work_type']=0.018" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 405, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['work_type']=='children','work_type']=0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['work_type']=='Never_worked','work_type']=0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 149, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"R_s1=order1(train,'stroke','Residence_type')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 150, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stroke</th>\n", | |
" <th>Residence_type</th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>Rural</td>\n", | |
" <td>21260</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>Urban</td>\n", | |
" <td>21357</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>Rural</td>\n", | |
" <td>384</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>Urban</td>\n", | |
" <td>399</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stroke Residence_type Frequency\n", | |
"0 0 Rural 21260\n", | |
"1 0 Urban 21357\n", | |
"2 1 Rural 384\n", | |
"3 1 Urban 399" | |
] | |
}, | |
"execution_count": 150, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"R_s1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 381, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['Residence_type']=='Rural','Residence_type']=0.018" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 382, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['Residence_type']=='Urban','Residence_type']=0.018" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 153, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"S_s=order1(train,'stroke','smoking_status')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 154, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stroke</th>\n", | |
" <th>smoking_status</th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>7272</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>15769</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>smokes</td>\n", | |
" <td>6429</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>formerly smoked</td>\n", | |
" <td>221</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>never smoked</td>\n", | |
" <td>284</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>1</td>\n", | |
" <td>smokes</td>\n", | |
" <td>133</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stroke smoking_status Frequency\n", | |
"0 0 formerly smoked 7272\n", | |
"1 0 never smoked 15769\n", | |
"2 0 smokes 6429\n", | |
"3 1 formerly smoked 221\n", | |
"4 1 never smoked 284\n", | |
"5 1 smokes 133" | |
] | |
}, | |
"execution_count": 154, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"S_s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 383, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['smoking_status']=='formerly smoked','smoking_status']=0.03" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 384, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['smoking_status']=='never smoked','smoking_status']=0.018" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 385, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['smoking_status']=='smokes','smoking_status']=0.02" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 155, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"M_s=order1(train,'stroke','ever_married')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 156, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stroke</th>\n", | |
" <th>ever_married</th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>No</td>\n", | |
" <td>15382</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>Yes</td>\n", | |
" <td>27235</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>No</td>\n", | |
" <td>80</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>Yes</td>\n", | |
" <td>703</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stroke ever_married Frequency\n", | |
"0 0 No 15382\n", | |
"1 0 Yes 27235\n", | |
"2 1 No 80\n", | |
"3 1 Yes 703" | |
] | |
}, | |
"execution_count": 156, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"M_s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 386, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['ever_married']=='Yes','ever_married']=0.0258" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 387, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['ever_married']=='No','ever_married']=0.0052" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 83, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data=pd.read_csv('data.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 101, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data=pd.get_dummies(data,columns=['age_interval','gender'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 118, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['Residence_type', 'bmi_interval', 'ever_married', 'glucose_level',\n", | |
" 'heart_disease', 'hypertension', 'smoking_status', 'stroke',\n", | |
" 'work_type', 'age_interval_0', 'age_interval_1', 'age_interval_2',\n", | |
" 'age_interval_3', 'age_interval_4', 'gender_0.0', 'gender_0.017',\n", | |
" 'gender_0.02'],\n", | |
" dtype='object')" | |
] | |
}, | |
"execution_count": 118, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data.columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 115, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data.drop('weights',inplace=True, axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 117, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data.to_csv('Datafinal.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data['wts']=data['bmi_interval']+data['hypertension']+data['heart_disease']+data['glucose_level']+data['smoking_status']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data['weights']=data['wts']+data['wts1']+data['age_interval']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data.to_csv('data.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Residence_type</th>\n", | |
" <th>bmi_interval</th>\n", | |
" <th>ever_married</th>\n", | |
" <th>glucose_level</th>\n", | |
" <th>heart_disease</th>\n", | |
" <th>hypertension</th>\n", | |
" <th>smoking_status</th>\n", | |
" <th>stroke</th>\n", | |
" <th>work_type</th>\n", | |
" <th>age_interval_0</th>\n", | |
" <th>age_interval_1</th>\n", | |
" <th>age_interval_2</th>\n", | |
" <th>age_interval_3</th>\n", | |
" <th>age_interval_4</th>\n", | |
" <th>gender_0.0</th>\n", | |
" <th>gender_0.017</th>\n", | |
" <th>gender_0.02</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.018</td>\n", | |
" <td>0.018</td>\n", | |
" <td>0.0052</td>\n", | |
" <td>0.0123</td>\n", | |
" <td>0.014</td>\n", | |
" <td>0.015</td>\n", | |
" <td>0.018</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.018</td>\n", | |
" <td>0.013</td>\n", | |
" <td>0.0258</td>\n", | |
" <td>0.0123</td>\n", | |
" <td>0.014</td>\n", | |
" <td>0.051</td>\n", | |
" <td>0.018</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.018</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.018</td>\n", | |
" <td>0.018</td>\n", | |
" <td>0.0052</td>\n", | |
" <td>0.0280</td>\n", | |
" <td>0.014</td>\n", | |
" <td>0.015</td>\n", | |
" <td>0.018</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.018</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.018</td>\n", | |
" <td>0.013</td>\n", | |
" <td>0.0258</td>\n", | |
" <td>0.0147</td>\n", | |
" <td>0.014</td>\n", | |
" <td>0.015</td>\n", | |
" <td>0.030</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.018</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0.018</td>\n", | |
" <td>0.018</td>\n", | |
" <td>0.0052</td>\n", | |
" <td>0.0280</td>\n", | |
" <td>0.014</td>\n", | |
" <td>0.015</td>\n", | |
" <td>0.018</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Residence_type bmi_interval ever_married glucose_level heart_disease \\\n", | |
"0 0.018 0.018 0.0052 0.0123 0.014 \n", | |
"1 0.018 0.013 0.0258 0.0123 0.014 \n", | |
"2 0.018 0.018 0.0052 0.0280 0.014 \n", | |
"3 0.018 0.013 0.0258 0.0147 0.014 \n", | |
"4 0.018 0.018 0.0052 0.0280 0.014 \n", | |
"\n", | |
" hypertension smoking_status stroke work_type age_interval_0 \\\n", | |
"0 0.015 0.018 0.0 0.000 1 \n", | |
"1 0.051 0.018 0.0 0.018 0 \n", | |
"2 0.015 0.018 0.0 0.018 1 \n", | |
"3 0.015 0.030 0.0 0.018 0 \n", | |
"4 0.015 0.018 0.0 0.000 1 \n", | |
"\n", | |
" age_interval_1 age_interval_2 age_interval_3 age_interval_4 gender_0.0 \\\n", | |
"0 0 0 0 0 0 \n", | |
"1 0 0 1 0 0 \n", | |
"2 0 0 0 0 0 \n", | |
"3 0 0 0 1 0 \n", | |
"4 0 0 0 0 0 \n", | |
"\n", | |
" gender_0.017 gender_0.02 \n", | |
"0 0 1 \n", | |
"1 0 1 \n", | |
"2 1 0 \n", | |
"3 1 0 \n", | |
"4 0 1 " | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data['Weights']=data['Residence_type'].astype(float)+data['ever_married'].astype(float)+data['work_type'].astype(float)+data['bmi_interval']+data['hypertension']+data['heart_disease']+data['glucose_level']+data['smoking_status']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"Train=data[0:43400]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"Test=data[43400:]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"F:\\Anaconda\\lib\\site-packages\\ipykernel\\__main__.py:1: SettingWithCopyWarning: \n", | |
"A value is trying to be set on a copy of a slice from a DataFrame.\n", | |
"Try using .loc[row_indexer,col_indexer] = value instead\n", | |
"\n", | |
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", | |
" if __name__ == '__main__':\n" | |
] | |
} | |
], | |
"source": [ | |
"Train['Weights_interval'],bins=pd.qcut(Train['Weights'],4,retbins=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"F:\\Anaconda\\lib\\site-packages\\ipykernel\\__main__.py:1: SettingWithCopyWarning: \n", | |
"A value is trying to be set on a copy of a slice from a DataFrame.\n", | |
"Try using .loc[row_indexer,col_indexer] = value instead\n", | |
"\n", | |
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", | |
" if __name__ == '__main__':\n" | |
] | |
} | |
], | |
"source": [ | |
"Test['Weights_interval']=pd.cut(Test['Weights'],bins=bins,include_lowest=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"F:\\Anaconda\\lib\\site-packages\\ipykernel\\__main__.py:2: FutureWarning: using a dict on a Series for aggregation\n", | |
"is deprecated and will be removed in a future version\n", | |
" from ipykernel import kernelapp as app\n" | |
] | |
} | |
], | |
"source": [ | |
"Wt_s=order(Train,'stroke',1,'Weights_interval')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>stroke</th>\n", | |
" <th>Weights_interval</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"4\" valign=\"top\">1.0</th>\n", | |
" <th>(0.157, 0.303]</th>\n", | |
" <td>468</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>(0.14, 0.157]</th>\n", | |
" <td>203</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>(0.121, 0.14]</th>\n", | |
" <td>91</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>(0.0945, 0.121]</th>\n", | |
" <td>21</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Frequency\n", | |
"stroke Weights_interval \n", | |
"1.0 (0.157, 0.303] 468\n", | |
" (0.14, 0.157] 203\n", | |
" (0.121, 0.14] 91\n", | |
" (0.0945, 0.121] 21" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Wt_s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"Wt_s1=order1(Train,'stroke','Weights_interval')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stroke</th>\n", | |
" <th>Weights_interval</th>\n", | |
" <th>Frequency</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.0</td>\n", | |
" <td>(0.0945, 0.121]</td>\n", | |
" <td>11026</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.0</td>\n", | |
" <td>(0.121, 0.14]</td>\n", | |
" <td>10732</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.0</td>\n", | |
" <td>(0.14, 0.157]</td>\n", | |
" <td>10744</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.0</td>\n", | |
" <td>(0.157, 0.303]</td>\n", | |
" <td>10115</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1.0</td>\n", | |
" <td>(0.0945, 0.121]</td>\n", | |
" <td>21</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>1.0</td>\n", | |
" <td>(0.121, 0.14]</td>\n", | |
" <td>91</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>1.0</td>\n", | |
" <td>(0.14, 0.157]</td>\n", | |
" <td>203</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>1.0</td>\n", | |
" <td>(0.157, 0.303]</td>\n", | |
" <td>468</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stroke Weights_interval Frequency\n", | |
"0 0.0 (0.0945, 0.121] 11026\n", | |
"1 0.0 (0.121, 0.14] 10732\n", | |
"2 0.0 (0.14, 0.157] 10744\n", | |
"3 0.0 (0.157, 0.303] 10115\n", | |
"4 1.0 (0.0945, 0.121] 21\n", | |
"5 1.0 (0.121, 0.14] 91\n", | |
"6 1.0 (0.14, 0.157] 203\n", | |
"7 1.0 (0.157, 0.303] 468" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Wt_s1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[0.046267918932278794,\n", | |
" 0.018894266567386447,\n", | |
" 0.008479314200521804,\n", | |
" 0.0019045891529113005]" | |
] | |
}, | |
"execution_count": 55, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wts(Wt_s1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data['Weights_interval']=Train['Weights_interval'].append(Test['Weights_interval'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data['Weights_interval']=encod(data['Weights_interval'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['Weights_interval']==3,'Weights_interval']=0.046" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['Weights_interval']==2,'Weights_interval']=0.019" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['Weights_interval']==1,'Weights_interval']=0.0084" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[data['Weights_interval']==0,'Weights_interval']=0.002" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['Residence_type', 'bmi_interval', 'ever_married', 'glucose_level',\n", | |
" 'heart_disease', 'hypertension', 'smoking_status', 'stroke',\n", | |
" 'work_type', 'age_interval_0', 'age_interval_1', 'age_interval_2',\n", | |
" 'age_interval_3', 'age_interval_4', 'gender_0.0', 'gender_0.017',\n", | |
" 'gender_0.02', 'Weights', 'Weights_interval'],\n", | |
" dtype='object')" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data.columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 68, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"54.427841634738186" | |
] | |
}, | |
"execution_count": 68, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(WW)/len(WWW)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.to_csv('DataFinal.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"62001" | |
] | |
}, | |
"execution_count": 31, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"2053" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Total['bmi'].isnull().sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data=pd.get_dummies(data,columns=['Residence_type','bmi_interval','ever_married','glucose_level','heart_disease','hypertension','smoking_status','work_type'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data.drop('Unnamed: 0',inplace=True,axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.to_csv('Datafinal1.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"Data=pd.read_csv('Data.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"62001" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(Total)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data.loc[Total['bmi'].isnull(),'Weights_interval']=np.nan" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data=pd.read_csv('Data_nan.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data=pd.read_csv('DataFinal.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['Unnamed: 0', 'Unnamed: 0.1', 'Residence_type', 'bmi_interval',\n", | |
" 'ever_married', 'glucose_level', 'heart_disease', 'hypertension',\n", | |
" 'smoking_status', 'stroke', 'work_type', 'age_interval_0',\n", | |
" 'age_interval_1', 'age_interval_2', 'age_interval_3', 'age_interval_4',\n", | |
" 'gender_0.0', 'gender_0.017', 'gender_0.02', 'Weights',\n", | |
" 'Weights_interval', 'gender', 'age_interval'],\n", | |
" dtype='object')" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data.columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"P=['age_interval','gender', 'Weights_interval']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"P=[ 'age_interval_0', 'age_interval_1',\n", | |
" 'age_interval_2', 'age_interval_3', 'age_interval_4', 'gender_0.0',\n", | |
" 'gender_0.017', 'gender_0.02','Weights_interval']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"P=['Residence_type', 'bmi_interval', 'ever_married', 'glucose_level',\n", | |
" 'heart_disease', 'hypertension', 'smoking_status',\n", | |
" 'work_type', 'age_interval_0', 'age_interval_1', 'age_interval_2',\n", | |
" 'age_interval_3', 'age_interval_4', 'gender_0.0', 'gender_0.017',\n", | |
" 'gender_0.02']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"P=[ 'age_interval', 'gender', 'Residence_type_0.018000000000000002', 'bmi_interval_0.013',\n", | |
" 'bmi_interval_0.018000000000000002', 'ever_married_0.0052',\n", | |
" 'ever_married_0.0258', 'glucose_level_0.0123', 'glucose_level_0.0147',\n", | |
" 'glucose_level_0.028', 'heart_disease_0.014', 'heart_disease_0.094',\n", | |
" 'hypertension_0.015', 'hypertension_0.051',\n", | |
" 'smoking_status_0.018000000000000002', 'smoking_status_0.02',\n", | |
" 'smoking_status_0.03', 'work_type_0.0', 'work_type_0.016',\n", | |
" 'work_type_0.018000000000000002', 'work_type_0.038']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"P=[ 'age_interval_0', 'age_interval_1',\n", | |
" 'age_interval_2', 'age_interval_3', 'age_interval_4', 'gender_0.0',\n", | |
" 'gender_0.017', 'gender_0.02', 'Residence_type_0.018000000000000002', 'bmi_interval_0.013',\n", | |
" 'bmi_interval_0.018000000000000002', 'ever_married_0.0052',\n", | |
" 'ever_married_0.0258', 'glucose_level_0.0123', 'glucose_level_0.0147',\n", | |
" 'glucose_level_0.028', 'heart_disease_0.014', 'heart_disease_0.094',\n", | |
" 'hypertension_0.015', 'hypertension_0.051',\n", | |
" 'smoking_status_0.018000000000000002', 'smoking_status_0.02',\n", | |
" 'smoking_status_0.03', 'work_type_0.0', 'work_type_0.016',\n", | |
" 'work_type_0.018000000000000002', 'work_type_0.038']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"Xtest=pd.DataFrame(data[P][0:43400])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"target=pd.DataFrame(data['stroke'][0:43400])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(43400, 1)" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"target.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(43400, 27)" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Xtest.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"train_X, test_X, train_target, test_target= train_test_split(Xtest , target, train_size = 0.7,random_state=2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"Train=data[0:43400]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"train_X=Train[P]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(43400, 9)" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_X.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"train_target=pd.DataFrame(Train['stroke'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(43400, 1)" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_target.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"Test=data[43400:]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"test_X=Test[P]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"xgtest= xgb.DMatrix(test_X.values)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"predict=model.predict(xgtest)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"predict=predict>0.5\n", | |
"predict=predict.astype(int)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([0, 0, 0, ..., 0, 0, 0])" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"predict" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"test_target=pd.DataFrame({'id':test['id'],'stroke':predict})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(18601, 2)" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"test_target.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"test_target.to_csv('submission2.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(test[test_target['stroke']==1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 417, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"P=['bmi_interval','hypertension','heart_disease','glucose_level','Residence_type','ever_married','smoking_status','work_type','gender']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[0]\ttrain-auc:0.841246\n", | |
"[1]\ttrain-auc:0.841246\n", | |
"[2]\ttrain-auc:0.841327\n", | |
"[3]\ttrain-auc:0.841327\n", | |
"[4]\ttrain-auc:0.841327\n", | |
"[5]\ttrain-auc:0.841334\n", | |
"[6]\ttrain-auc:0.841367\n", | |
"[7]\ttrain-auc:0.841367\n" | |
] | |
} | |
], | |
"source": [ | |
"params={'objective': 'binary:logistic','eval_metric': 'auc','scale_pos_weight':54,'silent':0} \n", | |
"xgtrain = xgb.DMatrix(train_X.values, label=train_target.values)\n", | |
"#xgtest= xgb.DMatrix(test_X.values,label=test_target.values) \n", | |
"\n", | |
"watchlist = [(xgtrain, 'train')]\n", | |
"num_round=8\n", | |
"\n", | |
"model=xgb.train(params,xgtrain,num_round,watchlist)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[0]\ttrain-auc:0.877561\teval-auc:0.845137\n", | |
"[1]\ttrain-auc:0.880451\teval-auc:0.847989\n", | |
"[2]\ttrain-auc:0.883226\teval-auc:0.850051\n", | |
"[3]\ttrain-auc:0.886788\teval-auc:0.851737\n", | |
"[4]\ttrain-auc:0.890359\teval-auc:0.851997\n", | |
"[5]\ttrain-auc:0.890853\teval-auc:0.851832\n", | |
"[6]\ttrain-auc:0.891175\teval-auc:0.85165\n", | |
"[7]\ttrain-auc:0.892022\teval-auc:0.851533\n", | |
"[8]\ttrain-auc:0.892413\teval-auc:0.852003\n", | |
"[9]\ttrain-auc:0.892878\teval-auc:0.851517\n", | |
"[10]\ttrain-auc:0.89464\teval-auc:0.851343\n", | |
"[11]\ttrain-auc:0.895186\teval-auc:0.850506\n", | |
"[12]\ttrain-auc:0.897878\teval-auc:0.849897\n", | |
"[13]\ttrain-auc:0.899182\teval-auc:0.850583\n", | |
"[14]\ttrain-auc:0.899595\teval-auc:0.84993\n", | |
"[15]\ttrain-auc:0.901118\teval-auc:0.850049\n", | |
"[16]\ttrain-auc:0.902494\teval-auc:0.849881\n" | |
] | |
} | |
], | |
"source": [ | |
"params={'objective': 'binary:logistic','eval_metric': 'auc', 'scale_pos_weight':54,'silent':0} \n", | |
"xgtrain = xgb.DMatrix(train_X.values, label=train_target.values)\n", | |
"xgtest= xgb.DMatrix(test_X.values,label=test_target.values) \n", | |
"\n", | |
"watchlist = [(xgtrain, 'train'),(xgtest,'eval')]\n", | |
"num_round=8\n", | |
"\n", | |
"model=xgb.train(params,xgtrain,num_round,watchlist)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[0]\ttrain-auc:0.86673\teval-auc:0.799802\n", | |
"[1]\ttrain-auc:0.871534\teval-auc:0.800611\n", | |
"[2]\ttrain-auc:0.873838\teval-auc:0.802685\n", | |
"[3]\ttrain-auc:0.876299\teval-auc:0.803171\n", | |
"[4]\ttrain-auc:0.876774\teval-auc:0.802577\n", | |
"[5]\ttrain-auc:0.878146\teval-auc:0.808125\n", | |
"[6]\ttrain-auc:0.879084\teval-auc:0.809344\n", | |
"[7]\ttrain-auc:0.87952\teval-auc:0.810456\n", | |
"[8]\ttrain-auc:0.880245\teval-auc:0.811217\n", | |
"[9]\ttrain-auc:0.880607\teval-auc:0.811298\n", | |
"[10]\ttrain-auc:0.881696\teval-auc:0.810341\n", | |
"[11]\ttrain-auc:0.882438\teval-auc:0.810638\n", | |
"[12]\ttrain-auc:0.883275\teval-auc:0.810752\n", | |
"[13]\ttrain-auc:0.884984\teval-auc:0.809\n", | |
"[14]\ttrain-auc:0.886189\teval-auc:0.807483\n", | |
"[15]\ttrain-auc:0.887482\teval-auc:0.806357\n", | |
"[16]\ttrain-auc:0.887948\teval-auc:0.806266\n", | |
"[17]\ttrain-auc:0.888362\teval-auc:0.805683\n", | |
"[18]\ttrain-auc:0.889922\teval-auc:0.803806\n", | |
"[19]\ttrain-auc:0.890519\teval-auc:0.803836\n" | |
] | |
} | |
], | |
"source": [ | |
"params={'objective': 'binary:logistic','eval_metric': 'auc', 'scale_pos_weight':54,'silent':0} \n", | |
"xgtrain = xgb.DMatrix(train_X.values, label=train_target.values)\n", | |
"xgtest= xgb.DMatrix(test_X.values,label=test_target.values) \n", | |
"\n", | |
"watchlist = [(xgtrain, 'train'),(xgtest,'eval')]\n", | |
"num_round=20\n", | |
"\n", | |
"model=xgb.train(params,xgtrain,num_round,watchlist)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[0]\ttrain-auc:0.860487\teval-auc:0.843272\n", | |
"[1]\ttrain-auc:0.864726\teval-auc:0.841243\n", | |
"[2]\ttrain-auc:0.868072\teval-auc:0.843214\n", | |
"[3]\ttrain-auc:0.869415\teval-auc:0.844786\n", | |
"[4]\ttrain-auc:0.871723\teval-auc:0.843371\n", | |
"[5]\ttrain-auc:0.87383\teval-auc:0.845329\n", | |
"[6]\ttrain-auc:0.874583\teval-auc:0.845435\n", | |
"[7]\ttrain-auc:0.875655\teval-auc:0.845041\n", | |
"[8]\ttrain-auc:0.875792\teval-auc:0.844825\n", | |
"[9]\ttrain-auc:0.87612\teval-auc:0.844958\n", | |
"[10]\ttrain-auc:0.876931\teval-auc:0.843441\n", | |
"[11]\ttrain-auc:0.877273\teval-auc:0.842603\n", | |
"[12]\ttrain-auc:0.877801\teval-auc:0.843569\n", | |
"[13]\ttrain-auc:0.87879\teval-auc:0.843148\n", | |
"[14]\ttrain-auc:0.879359\teval-auc:0.843363\n", | |
"[15]\ttrain-auc:0.880228\teval-auc:0.842602\n", | |
"[16]\ttrain-auc:0.881113\teval-auc:0.841346\n", | |
"[17]\ttrain-auc:0.882921\teval-auc:0.840363\n", | |
"[18]\ttrain-auc:0.883353\teval-auc:0.840222\n", | |
"[19]\ttrain-auc:0.883529\teval-auc:0.84016\n" | |
] | |
} | |
], | |
"source": [ | |
"params={'objective': 'binary:logistic','eval_metric': 'auc', 'scale_pos_weight':54,'silent':0} \n", | |
"xgtrain = xgb.DMatrix(train_X.values, label=train_target.values)\n", | |
"xgtest= xgb.DMatrix(test_X.values,label=test_target.values) \n", | |
"\n", | |
"watchlist = [(xgtrain, 'train'),(xgtest,'eval')]\n", | |
"num_round=20\n", | |
"\n", | |
"model=xgb.train(params,xgtrain,num_round,watchlist)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[0]\ttrain-auc:0.859473\teval-auc:0.849337\n", | |
"[1]\ttrain-auc:0.864813\teval-auc:0.849273\n", | |
"[2]\ttrain-auc:0.86703\teval-auc:0.848219\n", | |
"[3]\ttrain-auc:0.869633\teval-auc:0.846106\n", | |
"[4]\ttrain-auc:0.871101\teval-auc:0.844758\n", | |
"[5]\ttrain-auc:0.872499\teval-auc:0.845837\n", | |
"[6]\ttrain-auc:0.872957\teval-auc:0.846623\n", | |
"[7]\ttrain-auc:0.873198\teval-auc:0.847826\n", | |
"[8]\ttrain-auc:0.873717\teval-auc:0.847706\n", | |
"[9]\ttrain-auc:0.874418\teval-auc:0.846017\n", | |
"[10]\ttrain-auc:0.87503\teval-auc:0.84529\n", | |
"[11]\ttrain-auc:0.875315\teval-auc:0.844226\n", | |
"[12]\ttrain-auc:0.875688\teval-auc:0.844982\n", | |
"[13]\ttrain-auc:0.876293\teval-auc:0.844024\n", | |
"[14]\ttrain-auc:0.877974\teval-auc:0.845358\n", | |
"[15]\ttrain-auc:0.878252\teval-auc:0.844628\n", | |
"[16]\ttrain-auc:0.878822\teval-auc:0.844068\n", | |
"[17]\ttrain-auc:0.879803\teval-auc:0.843451\n", | |
"[18]\ttrain-auc:0.880926\teval-auc:0.84163\n", | |
"[19]\ttrain-auc:0.881054\teval-auc:0.841593\n" | |
] | |
} | |
], | |
"source": [ | |
"params={'objective': 'binary:logistic','eval_metric': 'auc', 'scale_pos_weight':54,'silent':0} \n", | |
"xgtrain = xgb.DMatrix(train_X.values, label=train_target.values)\n", | |
"xgtest= xgb.DMatrix(test_X.values,label=test_target.values) \n", | |
"\n", | |
"watchlist = [(xgtrain, 'train'),(xgtest,'eval')]\n", | |
"num_round=20\n", | |
"\n", | |
"model=xgb.train(params,xgtrain,num_round,watchlist)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"dtest_predictions = xgb1.predict(test_X) \n", | |
"dtest_predprob = xgb1.predict_proba(test_X)[:,1]\n", | |
"#Print model report:\n", | |
"print(\"\\nModel Report\")\n", | |
"print(\"Accuracy on Test set: %.4g\" % metrics.accuracy_score(test_target.values, dtest_predictions))\n", | |
"print(\"AUC Score on test: %f\" % metrics.roc_auc_score(test_target, dtest_predprob))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment