Skip to content

Instantly share code, notes, and snippets.

@MohdAzamSayeed
Last active December 11, 2019 05:39
Show Gist options
  • Save MohdAzamSayeed/001c0b6c5363db375c3e0dbe4ce929f0 to your computer and use it in GitHub Desktop.
Save MohdAzamSayeed/001c0b6c5363db375c3e0dbe4ce929f0 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# To find out which factor -> age,trestbps,thalach \n",
"# influences more on patient having heart disease or not using DecisionTree"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"dataset = pd.read_csv(\"heart1.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>sex</th>\n",
" <th>cp</th>\n",
" <th>trestbps</th>\n",
" <th>chol</th>\n",
" <th>fbs</th>\n",
" <th>restecg</th>\n",
" <th>thalach</th>\n",
" <th>exang</th>\n",
" <th>oldpeak</th>\n",
" <th>slope</th>\n",
" <th>ca</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>63</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>145</td>\n",
" <td>233</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>150</td>\n",
" <td>0</td>\n",
" <td>2.3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>37</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>130</td>\n",
" <td>250</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>187</td>\n",
" <td>0</td>\n",
" <td>3.5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>41</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>130</td>\n",
" <td>204</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>172</td>\n",
" <td>0</td>\n",
" <td>1.4</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>56</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>120</td>\n",
" <td>236</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>178</td>\n",
" <td>0</td>\n",
" <td>0.8</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>57</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>120</td>\n",
" <td>354</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>163</td>\n",
" <td>1</td>\n",
" <td>0.6</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n",
"0 63 1 3 145 233 1 0 150 0 2.3 0 \n",
"1 37 1 2 130 250 0 1 187 0 3.5 0 \n",
"2 41 0 1 130 204 0 0 172 0 1.4 2 \n",
"3 56 1 1 120 236 0 1 178 0 0.8 2 \n",
"4 57 0 0 120 354 0 1 163 1 0.6 2 \n",
"\n",
" ca target \n",
"0 0 1 \n",
"1 0 1 \n",
"2 0 1 \n",
"3 0 1 \n",
"4 0 1 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1 165\n",
"0 138\n",
"Name: target, dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset.target.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"age = dataset[['age']]\n",
"trestbps = dataset[['trestbps']]\n",
"thalach = dataset[['thalach']]\n",
"target =dataset[['target']]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 303 entries, 0 to 302\n",
"Data columns (total 13 columns):\n",
"age 303 non-null int64\n",
"sex 303 non-null int64\n",
"cp 303 non-null int64\n",
"trestbps 303 non-null int64\n",
"chol 303 non-null int64\n",
"fbs 303 non-null int64\n",
"restecg 303 non-null int64\n",
"thalach 303 non-null int64\n",
"exang 303 non-null int64\n",
"oldpeak 303 non-null float64\n",
"slope 303 non-null int64\n",
"ca 303 non-null int64\n",
"target 303 non-null int64\n",
"dtypes: float64(1), int64(12)\n",
"memory usage: 30.9 KB\n"
]
}
],
"source": [
"dataset.info(null_counts=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"x_train,x_test, y_train,y_test = train_test_split(age,target,test_size=0.30)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n",
" max_features=None, max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
" splitter='best')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.tree import DecisionTreeClassifier\n",
"classifier = DecisionTreeClassifier()\n",
"classifier.fit(x_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>51</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>65</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88</th>\n",
" <td>54</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>29</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>51</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age 0\n",
"50 51 1\n",
"31 65 0\n",
"88 54 1\n",
"72 29 0\n",
"27 51 1"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_pred = classifier.predict(x_test)\n",
"z= pd.DataFrame(y_pred)\n",
"pp=x_test.merge(z,how='inner',left_index=True, right_index=True)\n",
"pp.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[17 16]\n",
" [27 31]]\n"
]
}
],
"source": [
"from sklearn.metrics import confusion_matrix\n",
"print(confusion_matrix(y_test,y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.5274725274725275\n"
]
}
],
"source": [
"from sklearn.metrics import accuracy_score\n",
"print(accuracy_score(y_test,y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[14 31]\n",
" [ 9 37]]\n",
"0.5604395604395604\n",
"[[23 12]\n",
" [26 30]]\n",
"0.5824175824175825\n"
]
}
],
"source": [
"x_data=[]\n",
"for x_data in trestbps,thalach:\n",
" x_train,x_test, y_train,y_test = train_test_split(x_data,target,test_size=0.30)\n",
" classifier = DecisionTreeClassifier()\n",
" classifier.fit(x_train,y_train)\n",
" y_pred = classifier.predict(x_test)\n",
" print(confusion_matrix(y_test,y_pred))\n",
" print(accuracy_score(y_test,y_pred))\n",
" \n",
" \n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# To find out which factor -> cp,chol,ca \n",
"# influences more on patient having heart disease using Naive Bayes "
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>sex</th>\n",
" <th>cp</th>\n",
" <th>trestbps</th>\n",
" <th>chol</th>\n",
" <th>fbs</th>\n",
" <th>restecg</th>\n",
" <th>thalach</th>\n",
" <th>exang</th>\n",
" <th>oldpeak</th>\n",
" <th>slope</th>\n",
" <th>ca</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>63</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>145</td>\n",
" <td>233</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>150</td>\n",
" <td>0</td>\n",
" <td>2.3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>37</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>130</td>\n",
" <td>250</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>187</td>\n",
" <td>0</td>\n",
" <td>3.5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>41</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>130</td>\n",
" <td>204</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>172</td>\n",
" <td>0</td>\n",
" <td>1.4</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>56</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>120</td>\n",
" <td>236</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>178</td>\n",
" <td>0</td>\n",
" <td>0.8</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>57</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>120</td>\n",
" <td>354</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>163</td>\n",
" <td>1</td>\n",
" <td>0.6</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n",
"0 63 1 3 145 233 1 0 150 0 2.3 0 \n",
"1 37 1 2 130 250 0 1 187 0 3.5 0 \n",
"2 41 0 1 130 204 0 0 172 0 1.4 2 \n",
"3 56 1 1 120 236 0 1 178 0 0.8 2 \n",
"4 57 0 0 120 354 0 1 163 1 0.6 2 \n",
"\n",
" ca target \n",
"0 0 1 \n",
"1 0 1 \n",
"2 0 1 \n",
"3 0 1 \n",
"4 0 1 "
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"data =pd.read_csv('heart1.csv')\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"cp int64\n",
"chol int64\n",
"ca int64\n",
"dtype: object"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[:,['cp','chol','ca']].dtypes"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[3 2 1 0]\n",
"[233 250 204 236 354 192 294 263 199 168 239 275 266 211 283 219 340 226\n",
" 247 234 243 302 212 175 417 197 198 177 273 213 304 232 269 360 308 245\n",
" 208 264 321 325 235 257 216 256 231 141 252 201 222 260 182 303 265 309\n",
" 186 203 183 220 209 258 227 261 221 205 240 318 298 564 277 214 248 255\n",
" 207 223 288 160 394 315 246 244 270 195 196 254 126 313 262 215 193 271\n",
" 268 267 210 295 306 178 242 180 228 149 278 253 342 157 286 229 284 224\n",
" 206 167 230 335 276 353 225 330 290 172 305 188 282 185 326 274 164 307\n",
" 249 341 407 217 174 281 289 322 299 300 293 184 409 259 200 327 237 218\n",
" 319 166 311 169 187 176 241 131]\n",
"[0 2 1 3 4]\n"
]
}
],
"source": [
"print(data.cp.unique())\n",
"print(data.chol.unique())\n",
"print(data.ca.unique())"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"cp =data[['cp']]\n",
"chol =data[['chol']]\n",
"ca=data[['ca']]\n",
"Y=data[['target']]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.7894736842105263\n",
"0.8026315789473685\n",
"0.7368421052631579\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\softwares\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:761: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n",
"D:\\softwares\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:761: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n",
"D:\\softwares\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:761: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
]
}
],
"source": [
"from sklearn.naive_bayes import BernoulliNB \n",
"#using Bernoulli style of NB \n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"x_data=[]\n",
"for x_data in cp,chol,ca:\n",
" X_train, X_test, Y_train, Y_test = train_test_split(cp, Y, test_size=0.25)\n",
"\n",
" bnb = BernoulliNB(binarize=0.0)\n",
" bnb.fit(X_train, Y_train)\n",
" print(bnb.score(X_test, Y_test))"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1]\n"
]
}
],
"source": [
"#predicting target based on model build on y:target, x:ca\n",
"datas = np.array([4]).reshape(-1,1)\n",
"print(bnb.predict(datas))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment