MohdAzamSayeed/NaiveBayesAnd DecisionTreeDemo.ipynb

## NaiveBayesAnd DecisionTreeDemo.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# To find out which factor -> age,trestbps,thalach \n",
    "# influences more on patient having heart disease or not using DecisionTree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "dataset = pd.read_csv(\"heart1.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>cp</th>\n",
       "      <th>trestbps</th>\n",
       "      <th>chol</th>\n",
       "      <th>fbs</th>\n",
       "      <th>restecg</th>\n",
       "      <th>thalach</th>\n",
       "      <th>exang</th>\n",
       "      <th>oldpeak</th>\n",
       "      <th>slope</th>\n",
       "      <th>ca</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>63</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>145</td>\n",
       "      <td>233</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>150</td>\n",
       "      <td>0</td>\n",
       "      <td>2.3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>37</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>130</td>\n",
       "      <td>250</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>187</td>\n",
       "      <td>0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>41</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>130</td>\n",
       "      <td>204</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>172</td>\n",
       "      <td>0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>56</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>120</td>\n",
       "      <td>236</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>178</td>\n",
       "      <td>0</td>\n",
       "      <td>0.8</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>57</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>120</td>\n",
       "      <td>354</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>163</td>\n",
       "      <td>1</td>\n",
       "      <td>0.6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \\\n",
       "0   63    1   3       145   233    1        0      150      0      2.3      0   \n",
       "1   37    1   2       130   250    0        1      187      0      3.5      0   \n",
       "2   41    0   1       130   204    0        0      172      0      1.4      2   \n",
       "3   56    1   1       120   236    0        1      178      0      0.8      2   \n",
       "4   57    0   0       120   354    0        1      163      1      0.6      2   \n",
       "\n",
       "   ca  target  \n",
       "0   0       1  \n",
       "1   0       1  \n",
       "2   0       1  \n",
       "3   0       1  \n",
       "4   0       1  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    165\n",
       "0    138\n",
       "Name: target, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.target.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "age = dataset[['age']]\n",
    "trestbps = dataset[['trestbps']]\n",
    "thalach = dataset[['thalach']]\n",
    "target =dataset[['target']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 303 entries, 0 to 302\n",
      "Data columns (total 13 columns):\n",
      "age         303 non-null int64\n",
      "sex         303 non-null int64\n",
      "cp          303 non-null int64\n",
      "trestbps    303 non-null int64\n",
      "chol        303 non-null int64\n",
      "fbs         303 non-null int64\n",
      "restecg     303 non-null int64\n",
      "thalach     303 non-null int64\n",
      "exang       303 non-null int64\n",
      "oldpeak     303 non-null float64\n",
      "slope       303 non-null int64\n",
      "ca          303 non-null int64\n",
      "target      303 non-null int64\n",
      "dtypes: float64(1), int64(12)\n",
      "memory usage: 30.9 KB\n"
     ]
    }
   ],
   "source": [
    "dataset.info(null_counts=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "x_train,x_test, y_train,y_test = train_test_split(age,target,test_size=0.30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n",
       "            max_features=None, max_leaf_nodes=None,\n",
       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "            min_samples_leaf=1, min_samples_split=2,\n",
       "            min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
       "            splitter='best')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "classifier = DecisionTreeClassifier()\n",
    "classifier.fit(x_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>51</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>65</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>54</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>29</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>51</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    age  0\n",
       "50   51  1\n",
       "31   65  0\n",
       "88   54  1\n",
       "72   29  0\n",
       "27   51  1"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred = classifier.predict(x_test)\n",
    "z= pd.DataFrame(y_pred)\n",
    "pp=x_test.merge(z,how='inner',left_index=True, right_index=True)\n",
    "pp.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[17 16]\n",
      " [27 31]]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import confusion_matrix\n",
    "print(confusion_matrix(y_test,y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.5274725274725275\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "print(accuracy_score(y_test,y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[14 31]\n",
      " [ 9 37]]\n",
      "0.5604395604395604\n",
      "[[23 12]\n",
      " [26 30]]\n",
      "0.5824175824175825\n"
     ]
    }
   ],
   "source": [
    "x_data=[]\n",
    "for x_data in trestbps,thalach:\n",
    "    x_train,x_test, y_train,y_test = train_test_split(x_data,target,test_size=0.30)\n",
    "    classifier = DecisionTreeClassifier()\n",
    "    classifier.fit(x_train,y_train)\n",
    "    y_pred = classifier.predict(x_test)\n",
    "    print(confusion_matrix(y_test,y_pred))\n",
    "    print(accuracy_score(y_test,y_pred))\n",
    "    \n",
    "    \n",
    " "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# To find out which factor -> cp,chol,ca \n",
    "# influences more on patient having heart disease using Naive Bayes "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>cp</th>\n",
       "      <th>trestbps</th>\n",
       "      <th>chol</th>\n",
       "      <th>fbs</th>\n",
       "      <th>restecg</th>\n",
       "      <th>thalach</th>\n",
       "      <th>exang</th>\n",
       "      <th>oldpeak</th>\n",
       "      <th>slope</th>\n",
       "      <th>ca</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>63</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>145</td>\n",
       "      <td>233</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>150</td>\n",
       "      <td>0</td>\n",
       "      <td>2.3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>37</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>130</td>\n",
       "      <td>250</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>187</td>\n",
       "      <td>0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>41</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>130</td>\n",
       "      <td>204</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>172</td>\n",
       "      <td>0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>56</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>120</td>\n",
       "      <td>236</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>178</td>\n",
       "      <td>0</td>\n",
       "      <td>0.8</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>57</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>120</td>\n",
       "      <td>354</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>163</td>\n",
       "      <td>1</td>\n",
       "      <td>0.6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \\\n",
       "0   63    1   3       145   233    1        0      150      0      2.3      0   \n",
       "1   37    1   2       130   250    0        1      187      0      3.5      0   \n",
       "2   41    0   1       130   204    0        0      172      0      1.4      2   \n",
       "3   56    1   1       120   236    0        1      178      0      0.8      2   \n",
       "4   57    0   0       120   354    0        1      163      1      0.6      2   \n",
       "\n",
       "   ca  target  \n",
       "0   0       1  \n",
       "1   0       1  \n",
       "2   0       1  \n",
       "3   0       1  \n",
       "4   0       1  "
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "data =pd.read_csv('heart1.csv')\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "cp      int64\n",
       "chol    int64\n",
       "ca      int64\n",
       "dtype: object"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.loc[:,['cp','chol','ca']].dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[3 2 1 0]\n",
      "[233 250 204 236 354 192 294 263 199 168 239 275 266 211 283 219 340 226\n",
      " 247 234 243 302 212 175 417 197 198 177 273 213 304 232 269 360 308 245\n",
      " 208 264 321 325 235 257 216 256 231 141 252 201 222 260 182 303 265 309\n",
      " 186 203 183 220 209 258 227 261 221 205 240 318 298 564 277 214 248 255\n",
      " 207 223 288 160 394 315 246 244 270 195 196 254 126 313 262 215 193 271\n",
      " 268 267 210 295 306 178 242 180 228 149 278 253 342 157 286 229 284 224\n",
      " 206 167 230 335 276 353 225 330 290 172 305 188 282 185 326 274 164 307\n",
      " 249 341 407 217 174 281 289 322 299 300 293 184 409 259 200 327 237 218\n",
      " 319 166 311 169 187 176 241 131]\n",
      "[0 2 1 3 4]\n"
     ]
    }
   ],
   "source": [
    "print(data.cp.unique())\n",
    "print(data.chol.unique())\n",
    "print(data.ca.unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "cp =data[['cp']]\n",
    "chol =data[['chol']]\n",
    "ca=data[['ca']]\n",
    "Y=data[['target']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7894736842105263\n",
      "0.8026315789473685\n",
      "0.7368421052631579\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\softwares\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:761: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "D:\\softwares\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:761: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "D:\\softwares\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:761: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.naive_bayes import BernoulliNB \n",
    "#using Bernoulli style of NB \n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "x_data=[]\n",
    "for x_data in cp,chol,ca:\n",
    "    X_train, X_test, Y_train, Y_test = train_test_split(cp, Y, test_size=0.25)\n",
    "\n",
    "    bnb = BernoulliNB(binarize=0.0)\n",
    "    bnb.fit(X_train, Y_train)\n",
    "    print(bnb.score(X_test, Y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1]\n"
     ]
    }
   ],
   "source": [
    "#predicting target based on model build on y:target, x:ca\n",
    "datas = np.array([4]).reshape(-1,1)\n",
    "print(bnb.predict(datas))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# To find out which factor -> age,trestbps,thalach \n",
	"# influences more on patient having heart disease or not using DecisionTree"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"import numpy as np\n",
	"import pandas as pd\n",
	"import matplotlib.pyplot as plt\n",
	"\n",
	"dataset = pd.read_csv(\"heart1.csv\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>age</th>\n",
	" <th>sex</th>\n",
	" <th>cp</th>\n",
	" <th>trestbps</th>\n",
	" <th>chol</th>\n",
	" <th>fbs</th>\n",
	" <th>restecg</th>\n",
	" <th>thalach</th>\n",
	" <th>exang</th>\n",
	" <th>oldpeak</th>\n",
	" <th>slope</th>\n",
	" <th>ca</th>\n",
	" <th>target</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>63</td>\n",
	" <td>1</td>\n",
	" <td>3</td>\n",
	" <td>145</td>\n",
	" <td>233</td>\n",
	" <td>1</td>\n",
	" <td>0</td>\n",
	" <td>150</td>\n",
	" <td>0</td>\n",
	" <td>2.3</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>37</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>130</td>\n",
	" <td>250</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" <td>187</td>\n",
	" <td>0</td>\n",
	" <td>3.5</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>41</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" <td>130</td>\n",
	" <td>204</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>172</td>\n",
	" <td>0</td>\n",
	" <td>1.4</td>\n",
	" <td>2</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>56</td>\n",
	" <td>1</td>\n",
	" <td>1</td>\n",
	" <td>120</td>\n",
	" <td>236</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" <td>178</td>\n",
	" <td>0</td>\n",
	" <td>0.8</td>\n",
	" <td>2</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>57</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>120</td>\n",
	" <td>354</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" <td>163</td>\n",
	" <td>1</td>\n",
	" <td>0.6</td>\n",
	" <td>2</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n",
	"0 63 1 3 145 233 1 0 150 0 2.3 0 \n",
	"1 37 1 2 130 250 0 1 187 0 3.5 0 \n",
	"2 41 0 1 130 204 0 0 172 0 1.4 2 \n",
	"3 56 1 1 120 236 0 1 178 0 0.8 2 \n",
	"4 57 0 0 120 354 0 1 163 1 0.6 2 \n",
	"\n",
	" ca target \n",
	"0 0 1 \n",
	"1 0 1 \n",
	"2 0 1 \n",
	"3 0 1 \n",
	"4 0 1 "
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"dataset.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"1 165\n",
	"0 138\n",
	"Name: target, dtype: int64"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"dataset.target.value_counts()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"age = dataset[['age']]\n",
	"trestbps = dataset[['trestbps']]\n",
	"thalach = dataset[['thalach']]\n",
	"target =dataset[['target']]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"<class 'pandas.core.frame.DataFrame'>\n",
	"RangeIndex: 303 entries, 0 to 302\n",
	"Data columns (total 13 columns):\n",
	"age 303 non-null int64\n",
	"sex 303 non-null int64\n",
	"cp 303 non-null int64\n",
	"trestbps 303 non-null int64\n",
	"chol 303 non-null int64\n",
	"fbs 303 non-null int64\n",
	"restecg 303 non-null int64\n",
	"thalach 303 non-null int64\n",
	"exang 303 non-null int64\n",
	"oldpeak 303 non-null float64\n",
	"slope 303 non-null int64\n",
	"ca 303 non-null int64\n",
	"target 303 non-null int64\n",
	"dtypes: float64(1), int64(12)\n",
	"memory usage: 30.9 KB\n"
	]
	}
	],
	"source": [
	"dataset.info(null_counts=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"from sklearn.model_selection import train_test_split\n",
	"x_train,x_test, y_train,y_test = train_test_split(age,target,test_size=0.30)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n",
	" max_features=None, max_leaf_nodes=None,\n",
	" min_impurity_decrease=0.0, min_impurity_split=None,\n",
	" min_samples_leaf=1, min_samples_split=2,\n",
	" min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
	" splitter='best')"
	]
	},
	"execution_count": 9,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"from sklearn.tree import DecisionTreeClassifier\n",
	"classifier = DecisionTreeClassifier()\n",
	"classifier.fit(x_train,y_train)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>age</th>\n",
	" <th>0</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>50</th>\n",
	" <td>51</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>31</th>\n",
	" <td>65</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>88</th>\n",
	" <td>54</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>72</th>\n",
	" <td>29</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>27</th>\n",
	" <td>51</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" age 0\n",
	"50 51 1\n",
	"31 65 0\n",
	"88 54 1\n",
	"72 29 0\n",
	"27 51 1"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"y_pred = classifier.predict(x_test)\n",
	"z= pd.DataFrame(y_pred)\n",
	"pp=x_test.merge(z,how='inner',left_index=True, right_index=True)\n",
	"pp.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[17 16]\n",
	" [27 31]]\n"
	]
	}
	],
	"source": [
	"from sklearn.metrics import confusion_matrix\n",
	"print(confusion_matrix(y_test,y_pred))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"0.5274725274725275\n"
	]
	}
	],
	"source": [
	"from sklearn.metrics import accuracy_score\n",
	"print(accuracy_score(y_test,y_pred))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[14 31]\n",
	" [ 9 37]]\n",
	"0.5604395604395604\n",
	"[[23 12]\n",
	" [26 30]]\n",
	"0.5824175824175825\n"
	]
	}
	],
	"source": [
	"x_data=[]\n",
	"for x_data in trestbps,thalach:\n",
	" x_train,x_test, y_train,y_test = train_test_split(x_data,target,test_size=0.30)\n",
	" classifier = DecisionTreeClassifier()\n",
	" classifier.fit(x_train,y_train)\n",
	" y_pred = classifier.predict(x_test)\n",
	" print(confusion_matrix(y_test,y_pred))\n",
	" print(accuracy_score(y_test,y_pred))\n",
	" \n",
	" \n",
	" "
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# To find out which factor -> cp,chol,ca \n",
	"# influences more on patient having heart disease using Naive Bayes "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>age</th>\n",
	" <th>sex</th>\n",
	" <th>cp</th>\n",
	" <th>trestbps</th>\n",
	" <th>chol</th>\n",
	" <th>fbs</th>\n",
	" <th>restecg</th>\n",
	" <th>thalach</th>\n",
	" <th>exang</th>\n",
	" <th>oldpeak</th>\n",
	" <th>slope</th>\n",
	" <th>ca</th>\n",
	" <th>target</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>63</td>\n",
	" <td>1</td>\n",
	" <td>3</td>\n",
	" <td>145</td>\n",
	" <td>233</td>\n",
	" <td>1</td>\n",
	" <td>0</td>\n",
	" <td>150</td>\n",
	" <td>0</td>\n",
	" <td>2.3</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>37</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>130</td>\n",
	" <td>250</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" <td>187</td>\n",
	" <td>0</td>\n",
	" <td>3.5</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>41</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" <td>130</td>\n",
	" <td>204</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>172</td>\n",
	" <td>0</td>\n",
	" <td>1.4</td>\n",
	" <td>2</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>56</td>\n",
	" <td>1</td>\n",
	" <td>1</td>\n",
	" <td>120</td>\n",
	" <td>236</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" <td>178</td>\n",
	" <td>0</td>\n",
	" <td>0.8</td>\n",
	" <td>2</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>57</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>120</td>\n",
	" <td>354</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" <td>163</td>\n",
	" <td>1</td>\n",
	" <td>0.6</td>\n",
	" <td>2</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n",
	"0 63 1 3 145 233 1 0 150 0 2.3 0 \n",
	"1 37 1 2 130 250 0 1 187 0 3.5 0 \n",
	"2 41 0 1 130 204 0 0 172 0 1.4 2 \n",
	"3 56 1 1 120 236 0 1 178 0 0.8 2 \n",
	"4 57 0 0 120 354 0 1 163 1 0.6 2 \n",
	"\n",
	" ca target \n",
	"0 0 1 \n",
	"1 0 1 \n",
	"2 0 1 \n",
	"3 0 1 \n",
	"4 0 1 "
	]
	},
	"execution_count": 1,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"import pandas as pd\n",
	"import numpy as np\n",
	"\n",
	"data =pd.read_csv('heart1.csv')\n",
	"data.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"cp int64\n",
	"chol int64\n",
	"ca int64\n",
	"dtype: object"
	]
	},
	"execution_count": 17,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"data.loc[:,['cp','chol','ca']].dtypes"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[3 2 1 0]\n",
	"[233 250 204 236 354 192 294 263 199 168 239 275 266 211 283 219 340 226\n",
	" 247 234 243 302 212 175 417 197 198 177 273 213 304 232 269 360 308 245\n",
	" 208 264 321 325 235 257 216 256 231 141 252 201 222 260 182 303 265 309\n",
	" 186 203 183 220 209 258 227 261 221 205 240 318 298 564 277 214 248 255\n",
	" 207 223 288 160 394 315 246 244 270 195 196 254 126 313 262 215 193 271\n",
	" 268 267 210 295 306 178 242 180 228 149 278 253 342 157 286 229 284 224\n",
	" 206 167 230 335 276 353 225 330 290 172 305 188 282 185 326 274 164 307\n",
	" 249 341 407 217 174 281 289 322 299 300 293 184 409 259 200 327 237 218\n",
	" 319 166 311 169 187 176 241 131]\n",
	"[0 2 1 3 4]\n"
	]
	}
	],
	"source": [
	"print(data.cp.unique())\n",
	"print(data.chol.unique())\n",
	"print(data.ca.unique())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 27,
	"metadata": {},
	"outputs": [],
	"source": [
	"cp =data[['cp']]\n",
	"chol =data[['chol']]\n",
	"ca=data[['ca']]\n",
	"Y=data[['target']]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 35,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"0.7894736842105263\n",
	"0.8026315789473685\n",
	"0.7368421052631579\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"D:\\softwares\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:761: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
	" y = column_or_1d(y, warn=True)\n",
	"D:\\softwares\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:761: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
	" y = column_or_1d(y, warn=True)\n",
	"D:\\softwares\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:761: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
	" y = column_or_1d(y, warn=True)\n"
	]
	}
	],
	"source": [
	"from sklearn.naive_bayes import BernoulliNB \n",
	"#using Bernoulli style of NB \n",
	"from sklearn.model_selection import train_test_split\n",
	"\n",
	"x_data=[]\n",
	"for x_data in cp,chol,ca:\n",
	" X_train, X_test, Y_train, Y_test = train_test_split(cp, Y, test_size=0.25)\n",
	"\n",
	" bnb = BernoulliNB(binarize=0.0)\n",
	" bnb.fit(X_train, Y_train)\n",
	" print(bnb.score(X_test, Y_test))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 46,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[1]\n"
	]
	}
	],
	"source": [
	"#predicting target based on model build on y:target, x:ca\n",
	"datas = np.array([4]).reshape(-1,1)\n",
	"print(bnb.predict(datas))"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}