Last active
December 11, 2019 05:39
-
-
Save MohdAzamSayeed/001c0b6c5363db375c3e0dbe4ce929f0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# To find out which factor -> age,trestbps,thalach \n", | |
"# influences more on patient having heart disease or not using DecisionTree" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import matplotlib.pyplot as plt\n", | |
"\n", | |
"dataset = pd.read_csv(\"heart1.csv\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>sex</th>\n", | |
" <th>cp</th>\n", | |
" <th>trestbps</th>\n", | |
" <th>chol</th>\n", | |
" <th>fbs</th>\n", | |
" <th>restecg</th>\n", | |
" <th>thalach</th>\n", | |
" <th>exang</th>\n", | |
" <th>oldpeak</th>\n", | |
" <th>slope</th>\n", | |
" <th>ca</th>\n", | |
" <th>target</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>63</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>145</td>\n", | |
" <td>233</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>150</td>\n", | |
" <td>0</td>\n", | |
" <td>2.3</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>37</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>130</td>\n", | |
" <td>250</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>187</td>\n", | |
" <td>0</td>\n", | |
" <td>3.5</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>41</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>130</td>\n", | |
" <td>204</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>172</td>\n", | |
" <td>0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>56</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>120</td>\n", | |
" <td>236</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>178</td>\n", | |
" <td>0</td>\n", | |
" <td>0.8</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>57</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>120</td>\n", | |
" <td>354</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>163</td>\n", | |
" <td>1</td>\n", | |
" <td>0.6</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n", | |
"0 63 1 3 145 233 1 0 150 0 2.3 0 \n", | |
"1 37 1 2 130 250 0 1 187 0 3.5 0 \n", | |
"2 41 0 1 130 204 0 0 172 0 1.4 2 \n", | |
"3 56 1 1 120 236 0 1 178 0 0.8 2 \n", | |
"4 57 0 0 120 354 0 1 163 1 0.6 2 \n", | |
"\n", | |
" ca target \n", | |
"0 0 1 \n", | |
"1 0 1 \n", | |
"2 0 1 \n", | |
"3 0 1 \n", | |
"4 0 1 " | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dataset.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1 165\n", | |
"0 138\n", | |
"Name: target, dtype: int64" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dataset.target.value_counts()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"age = dataset[['age']]\n", | |
"trestbps = dataset[['trestbps']]\n", | |
"thalach = dataset[['thalach']]\n", | |
"target =dataset[['target']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"RangeIndex: 303 entries, 0 to 302\n", | |
"Data columns (total 13 columns):\n", | |
"age 303 non-null int64\n", | |
"sex 303 non-null int64\n", | |
"cp 303 non-null int64\n", | |
"trestbps 303 non-null int64\n", | |
"chol 303 non-null int64\n", | |
"fbs 303 non-null int64\n", | |
"restecg 303 non-null int64\n", | |
"thalach 303 non-null int64\n", | |
"exang 303 non-null int64\n", | |
"oldpeak 303 non-null float64\n", | |
"slope 303 non-null int64\n", | |
"ca 303 non-null int64\n", | |
"target 303 non-null int64\n", | |
"dtypes: float64(1), int64(12)\n", | |
"memory usage: 30.9 KB\n" | |
] | |
} | |
], | |
"source": [ | |
"dataset.info(null_counts=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.model_selection import train_test_split\n", | |
"x_train,x_test, y_train,y_test = train_test_split(age,target,test_size=0.30)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", | |
" max_features=None, max_leaf_nodes=None,\n", | |
" min_impurity_decrease=0.0, min_impurity_split=None,\n", | |
" min_samples_leaf=1, min_samples_split=2,\n", | |
" min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", | |
" splitter='best')" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from sklearn.tree import DecisionTreeClassifier\n", | |
"classifier = DecisionTreeClassifier()\n", | |
"classifier.fit(x_train,y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>0</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>50</th>\n", | |
" <td>51</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31</th>\n", | |
" <td>65</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>88</th>\n", | |
" <td>54</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>72</th>\n", | |
" <td>29</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>51</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" age 0\n", | |
"50 51 1\n", | |
"31 65 0\n", | |
"88 54 1\n", | |
"72 29 0\n", | |
"27 51 1" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y_pred = classifier.predict(x_test)\n", | |
"z= pd.DataFrame(y_pred)\n", | |
"pp=x_test.merge(z,how='inner',left_index=True, right_index=True)\n", | |
"pp.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[[17 16]\n", | |
" [27 31]]\n" | |
] | |
} | |
], | |
"source": [ | |
"from sklearn.metrics import confusion_matrix\n", | |
"print(confusion_matrix(y_test,y_pred))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.5274725274725275\n" | |
] | |
} | |
], | |
"source": [ | |
"from sklearn.metrics import accuracy_score\n", | |
"print(accuracy_score(y_test,y_pred))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[[14 31]\n", | |
" [ 9 37]]\n", | |
"0.5604395604395604\n", | |
"[[23 12]\n", | |
" [26 30]]\n", | |
"0.5824175824175825\n" | |
] | |
} | |
], | |
"source": [ | |
"x_data=[]\n", | |
"for x_data in trestbps,thalach:\n", | |
" x_train,x_test, y_train,y_test = train_test_split(x_data,target,test_size=0.30)\n", | |
" classifier = DecisionTreeClassifier()\n", | |
" classifier.fit(x_train,y_train)\n", | |
" y_pred = classifier.predict(x_test)\n", | |
" print(confusion_matrix(y_test,y_pred))\n", | |
" print(accuracy_score(y_test,y_pred))\n", | |
" \n", | |
" \n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# To find out which factor -> cp,chol,ca \n", | |
"# influences more on patient having heart disease using Naive Bayes " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>sex</th>\n", | |
" <th>cp</th>\n", | |
" <th>trestbps</th>\n", | |
" <th>chol</th>\n", | |
" <th>fbs</th>\n", | |
" <th>restecg</th>\n", | |
" <th>thalach</th>\n", | |
" <th>exang</th>\n", | |
" <th>oldpeak</th>\n", | |
" <th>slope</th>\n", | |
" <th>ca</th>\n", | |
" <th>target</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>63</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>145</td>\n", | |
" <td>233</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>150</td>\n", | |
" <td>0</td>\n", | |
" <td>2.3</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>37</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>130</td>\n", | |
" <td>250</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>187</td>\n", | |
" <td>0</td>\n", | |
" <td>3.5</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>41</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>130</td>\n", | |
" <td>204</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>172</td>\n", | |
" <td>0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>56</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>120</td>\n", | |
" <td>236</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>178</td>\n", | |
" <td>0</td>\n", | |
" <td>0.8</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>57</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>120</td>\n", | |
" <td>354</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>163</td>\n", | |
" <td>1</td>\n", | |
" <td>0.6</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n", | |
"0 63 1 3 145 233 1 0 150 0 2.3 0 \n", | |
"1 37 1 2 130 250 0 1 187 0 3.5 0 \n", | |
"2 41 0 1 130 204 0 0 172 0 1.4 2 \n", | |
"3 56 1 1 120 236 0 1 178 0 0.8 2 \n", | |
"4 57 0 0 120 354 0 1 163 1 0.6 2 \n", | |
"\n", | |
" ca target \n", | |
"0 0 1 \n", | |
"1 0 1 \n", | |
"2 0 1 \n", | |
"3 0 1 \n", | |
"4 0 1 " | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"\n", | |
"data =pd.read_csv('heart1.csv')\n", | |
"data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"cp int64\n", | |
"chol int64\n", | |
"ca int64\n", | |
"dtype: object" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data.loc[:,['cp','chol','ca']].dtypes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[3 2 1 0]\n", | |
"[233 250 204 236 354 192 294 263 199 168 239 275 266 211 283 219 340 226\n", | |
" 247 234 243 302 212 175 417 197 198 177 273 213 304 232 269 360 308 245\n", | |
" 208 264 321 325 235 257 216 256 231 141 252 201 222 260 182 303 265 309\n", | |
" 186 203 183 220 209 258 227 261 221 205 240 318 298 564 277 214 248 255\n", | |
" 207 223 288 160 394 315 246 244 270 195 196 254 126 313 262 215 193 271\n", | |
" 268 267 210 295 306 178 242 180 228 149 278 253 342 157 286 229 284 224\n", | |
" 206 167 230 335 276 353 225 330 290 172 305 188 282 185 326 274 164 307\n", | |
" 249 341 407 217 174 281 289 322 299 300 293 184 409 259 200 327 237 218\n", | |
" 319 166 311 169 187 176 241 131]\n", | |
"[0 2 1 3 4]\n" | |
] | |
} | |
], | |
"source": [ | |
"print(data.cp.unique())\n", | |
"print(data.chol.unique())\n", | |
"print(data.ca.unique())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cp =data[['cp']]\n", | |
"chol =data[['chol']]\n", | |
"ca=data[['ca']]\n", | |
"Y=data[['target']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.7894736842105263\n", | |
"0.8026315789473685\n", | |
"0.7368421052631579\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"D:\\softwares\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:761: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", | |
" y = column_or_1d(y, warn=True)\n", | |
"D:\\softwares\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:761: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", | |
" y = column_or_1d(y, warn=True)\n", | |
"D:\\softwares\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:761: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", | |
" y = column_or_1d(y, warn=True)\n" | |
] | |
} | |
], | |
"source": [ | |
"from sklearn.naive_bayes import BernoulliNB \n", | |
"#using Bernoulli style of NB \n", | |
"from sklearn.model_selection import train_test_split\n", | |
"\n", | |
"x_data=[]\n", | |
"for x_data in cp,chol,ca:\n", | |
" X_train, X_test, Y_train, Y_test = train_test_split(cp, Y, test_size=0.25)\n", | |
"\n", | |
" bnb = BernoulliNB(binarize=0.0)\n", | |
" bnb.fit(X_train, Y_train)\n", | |
" print(bnb.score(X_test, Y_test))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[1]\n" | |
] | |
} | |
], | |
"source": [ | |
"#predicting target based on model build on y:target, x:ca\n", | |
"datas = np.array([4]).reshape(-1,1)\n", | |
"print(bnb.predict(datas))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment