Aditii7/Assignment naive bayes.ipynb

## Assignment naive bayes.ipynb
{
  "cells": [
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "import pandas as pd\nimport numpy as np",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df=pd.read_csv(\"SalaryData_Test.csv\")\ndf",
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": "       age      workclass      education  educationno        maritalstatus  \\\n0       25        Private           11th            7        Never-married   \n1       38        Private        HS-grad            9   Married-civ-spouse   \n2       28      Local-gov     Assoc-acdm           12   Married-civ-spouse   \n3       44        Private   Some-college           10   Married-civ-spouse   \n4       34        Private           10th            6        Never-married   \n...    ...            ...            ...          ...                  ...   \n15055   33        Private      Bachelors           13        Never-married   \n15056   39        Private      Bachelors           13             Divorced   \n15057   38        Private      Bachelors           13   Married-civ-spouse   \n15058   44        Private      Bachelors           13             Divorced   \n15059   35   Self-emp-inc      Bachelors           13   Married-civ-spouse   \n\n               occupation    relationship                 race      sex  \\\n0       Machine-op-inspct       Own-child                Black     Male   \n1         Farming-fishing         Husband                White     Male   \n2         Protective-serv         Husband                White     Male   \n3       Machine-op-inspct         Husband                Black     Male   \n4           Other-service   Not-in-family                White     Male   \n...                   ...             ...                  ...      ...   \n15055      Prof-specialty       Own-child                White     Male   \n15056      Prof-specialty   Not-in-family                White   Female   \n15057      Prof-specialty         Husband                White     Male   \n15058        Adm-clerical       Own-child   Asian-Pac-Islander     Male   \n15059     Exec-managerial         Husband                White     Male   \n\n       capitalgain  capitalloss  hoursperweek          native  Salary  \n0                0            0            40   United-States   <=50K  \n1                0            0            50   United-States   <=50K  \n2                0            0            40   United-States    >50K  \n3             7688            0            40   United-States    >50K  \n4                0            0            30   United-States   <=50K  \n...            ...          ...           ...             ...     ...  \n15055            0            0            40   United-States   <=50K  \n15056            0            0            36   United-States   <=50K  \n15057            0            0            50   United-States   <=50K  \n15058         5455            0            40   United-States   <=50K  \n15059            0            0            60   United-States    >50K  \n\n[15060 rows x 14 columns]",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>age</th>\n      <th>workclass</th>\n      <th>education</th>\n      <th>educationno</th>\n      <th>maritalstatus</th>\n      <th>occupation</th>\n      <th>relationship</th>\n      <th>race</th>\n      <th>sex</th>\n      <th>capitalgain</th>\n      <th>capitalloss</th>\n      <th>hoursperweek</th>\n      <th>native</th>\n      <th>Salary</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>25</td>\n      <td>Private</td>\n      <td>11th</td>\n      <td>7</td>\n      <td>Never-married</td>\n      <td>Machine-op-inspct</td>\n      <td>Own-child</td>\n      <td>Black</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>38</td>\n      <td>Private</td>\n      <td>HS-grad</td>\n      <td>9</td>\n      <td>Married-civ-spouse</td>\n      <td>Farming-fishing</td>\n      <td>Husband</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>50</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>28</td>\n      <td>Local-gov</td>\n      <td>Assoc-acdm</td>\n      <td>12</td>\n      <td>Married-civ-spouse</td>\n      <td>Protective-serv</td>\n      <td>Husband</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&gt;50K</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>44</td>\n      <td>Private</td>\n      <td>Some-college</td>\n      <td>10</td>\n      <td>Married-civ-spouse</td>\n      <td>Machine-op-inspct</td>\n      <td>Husband</td>\n      <td>Black</td>\n      <td>Male</td>\n      <td>7688</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&gt;50K</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>34</td>\n      <td>Private</td>\n      <td>10th</td>\n      <td>6</td>\n      <td>Never-married</td>\n      <td>Other-service</td>\n      <td>Not-in-family</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>30</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>15055</th>\n      <td>33</td>\n      <td>Private</td>\n      <td>Bachelors</td>\n      <td>13</td>\n      <td>Never-married</td>\n      <td>Prof-specialty</td>\n      <td>Own-child</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>15056</th>\n      <td>39</td>\n      <td>Private</td>\n      <td>Bachelors</td>\n      <td>13</td>\n      <td>Divorced</td>\n      <td>Prof-specialty</td>\n      <td>Not-in-family</td>\n      <td>White</td>\n      <td>Female</td>\n      <td>0</td>\n      <td>0</td>\n      <td>36</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>15057</th>\n      <td>38</td>\n      <td>Private</td>\n      <td>Bachelors</td>\n      <td>13</td>\n      <td>Married-civ-spouse</td>\n      <td>Prof-specialty</td>\n      <td>Husband</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>50</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>15058</th>\n      <td>44</td>\n      <td>Private</td>\n      <td>Bachelors</td>\n      <td>13</td>\n      <td>Divorced</td>\n      <td>Adm-clerical</td>\n      <td>Own-child</td>\n      <td>Asian-Pac-Islander</td>\n      <td>Male</td>\n      <td>5455</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>15059</th>\n      <td>35</td>\n      <td>Self-emp-inc</td>\n      <td>Bachelors</td>\n      <td>13</td>\n      <td>Married-civ-spouse</td>\n      <td>Exec-managerial</td>\n      <td>Husband</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>60</td>\n      <td>United-States</td>\n      <td>&gt;50K</td>\n    </tr>\n  </tbody>\n</table>\n<p>15060 rows × 14 columns</p>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df.info()",
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 15060 entries, 0 to 15059\nData columns (total 14 columns):\n #   Column         Non-Null Count  Dtype \n---  ------         --------------  ----- \n 0   age            15060 non-null  int64 \n 1   workclass      15060 non-null  object\n 2   education      15060 non-null  object\n 3   educationno    15060 non-null  int64 \n 4   maritalstatus  15060 non-null  object\n 5   occupation     15060 non-null  object\n 6   relationship   15060 non-null  object\n 7   race           15060 non-null  object\n 8   sex            15060 non-null  object\n 9   capitalgain    15060 non-null  int64 \n 10  capitalloss    15060 non-null  int64 \n 11  hoursperweek   15060 non-null  int64 \n 12  native         15060 non-null  object\n 13  Salary         15060 non-null  object\ndtypes: int64(5), object(9)\nmemory usage: 1.6+ MB\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df.describe()",
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 4,
          "data": {
            "text/plain": "                age   educationno   capitalgain   capitalloss  hoursperweek\ncount  15060.000000  15060.000000  15060.000000  15060.000000  15060.000000\nmean      38.768327     10.112749   1120.301594     89.041899     40.951594\nstd       13.380676      2.558727   7703.181842    406.283245     12.062831\nmin       17.000000      1.000000      0.000000      0.000000      1.000000\n25%       28.000000      9.000000      0.000000      0.000000     40.000000\n50%       37.000000     10.000000      0.000000      0.000000     40.000000\n75%       48.000000     13.000000      0.000000      0.000000     45.000000\nmax       90.000000     16.000000  99999.000000   3770.000000     99.000000",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>age</th>\n      <th>educationno</th>\n      <th>capitalgain</th>\n      <th>capitalloss</th>\n      <th>hoursperweek</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>count</th>\n      <td>15060.000000</td>\n      <td>15060.000000</td>\n      <td>15060.000000</td>\n      <td>15060.000000</td>\n      <td>15060.000000</td>\n    </tr>\n    <tr>\n      <th>mean</th>\n      <td>38.768327</td>\n      <td>10.112749</td>\n      <td>1120.301594</td>\n      <td>89.041899</td>\n      <td>40.951594</td>\n    </tr>\n    <tr>\n      <th>std</th>\n      <td>13.380676</td>\n      <td>2.558727</td>\n      <td>7703.181842</td>\n      <td>406.283245</td>\n      <td>12.062831</td>\n    </tr>\n    <tr>\n      <th>min</th>\n      <td>17.000000</td>\n      <td>1.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>1.000000</td>\n    </tr>\n    <tr>\n      <th>25%</th>\n      <td>28.000000</td>\n      <td>9.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>40.000000</td>\n    </tr>\n    <tr>\n      <th>50%</th>\n      <td>37.000000</td>\n      <td>10.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>40.000000</td>\n    </tr>\n    <tr>\n      <th>75%</th>\n      <td>48.000000</td>\n      <td>13.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>45.000000</td>\n    </tr>\n    <tr>\n      <th>max</th>\n      <td>90.000000</td>\n      <td>16.000000</td>\n      <td>99999.000000</td>\n      <td>3770.000000</td>\n      <td>99.000000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df.shape",
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 5,
          "data": {
            "text/plain": "(15060, 14)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df.isna().sum()",
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 6,
          "data": {
            "text/plain": "age              0\nworkclass        0\neducation        0\neducationno      0\nmaritalstatus    0\noccupation       0\nrelationship     0\nrace             0\nsex              0\ncapitalgain      0\ncapitalloss      0\nhoursperweek     0\nnative           0\nSalary           0\ndtype: int64"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df.columns",
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 7,
          "data": {
            "text/plain": "Index(['age', 'workclass', 'education', 'educationno', 'maritalstatus',\n       'occupation', 'relationship', 'race', 'sex', 'capitalgain',\n       'capitalloss', 'hoursperweek', 'native', 'Salary'],\n      dtype='object')"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "from sklearn.preprocessing import LabelEncoder\nle=LabelEncoder()\ndf['workclass']=le.fit_transform(df['workclass'])\ndf['education']=le.fit_transform(df['education'])\ndf['maritalstatus']=le.fit_transform(df['maritalstatus'])\ndf['occupation']=le.fit_transform(df['occupation'])\ndf['relationship']=le.fit_transform(df['relationship'])\ndf['race']=le.fit_transform(df['race'])\ndf['sex']=le.fit_transform(df['sex'])\ndf['native']=le.fit_transform(df['native'])\ndf.head()\n",
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 8,
          "data": {
            "text/plain": "   age  workclass  education  educationno  maritalstatus  occupation  \\\n0   25          2          1            7              4           6   \n1   38          2         11            9              2           4   \n2   28          1          7           12              2          10   \n3   44          2         15           10              2           6   \n4   34          2          0            6              4           7   \n\n   relationship  race  sex  capitalgain  capitalloss  hoursperweek  native  \\\n0             3     2    1            0            0            40      37   \n1             0     4    1            0            0            50      37   \n2             0     4    1            0            0            40      37   \n3             0     2    1         7688            0            40      37   \n4             1     4    1            0            0            30      37   \n\n   Salary  \n0   <=50K  \n1   <=50K  \n2    >50K  \n3    >50K  \n4   <=50K  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>age</th>\n      <th>workclass</th>\n      <th>education</th>\n      <th>educationno</th>\n      <th>maritalstatus</th>\n      <th>occupation</th>\n      <th>relationship</th>\n      <th>race</th>\n      <th>sex</th>\n      <th>capitalgain</th>\n      <th>capitalloss</th>\n      <th>hoursperweek</th>\n      <th>native</th>\n      <th>Salary</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>25</td>\n      <td>2</td>\n      <td>1</td>\n      <td>7</td>\n      <td>4</td>\n      <td>6</td>\n      <td>3</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>38</td>\n      <td>2</td>\n      <td>11</td>\n      <td>9</td>\n      <td>2</td>\n      <td>4</td>\n      <td>0</td>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>50</td>\n      <td>37</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>28</td>\n      <td>1</td>\n      <td>7</td>\n      <td>12</td>\n      <td>2</td>\n      <td>10</td>\n      <td>0</td>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n      <td>&gt;50K</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>44</td>\n      <td>2</td>\n      <td>15</td>\n      <td>10</td>\n      <td>2</td>\n      <td>6</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>7688</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n      <td>&gt;50K</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>34</td>\n      <td>2</td>\n      <td>0</td>\n      <td>6</td>\n      <td>4</td>\n      <td>7</td>\n      <td>1</td>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>30</td>\n      <td>37</td>\n      <td>&lt;=50K</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x=df.iloc[:,:13]\ny=df.iloc[:,-1]",
      "execution_count": 9,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "from sklearn import preprocessing\n#normalize data\nx=preprocessing.StandardScaler().fit_transform(x)\nx[0:13]",
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 10,
          "data": {
            "text/plain": "array([[-1.02900513, -0.222347  , -2.42343884, -1.2165628 ,  0.93318745,\n         0.00272543,  1.00610775, -2.03438913,  0.69583225, -0.14543845,\n        -0.2191694 , -0.07888904,  0.26058067],\n       [-0.05742253, -0.222347  ,  0.19017232, -0.43489824, -0.39546327,\n        -0.49478949, -0.88198978,  0.38223023,  0.69583225, -0.14543845,\n        -0.2191694 ,  0.75013125,  0.26058067],\n       [-0.80479376, -1.25712632, -0.85527215,  0.73759862, -0.39546327,\n         0.99775528, -0.88198978,  0.38223023,  0.69583225, -0.14543845,\n        -0.2191694 , -0.07888904,  0.26058067],\n       [ 0.39100021, -0.222347  ,  1.23561678, -0.04406595, -0.39546327,\n         0.00272543, -0.88198978, -2.03438913,  0.69583225,  0.85262384,\n        -0.2191694 , -0.07888904,  0.26058067],\n       [-0.35637102, -0.222347  , -2.68479996, -1.60739509,  0.93318745,\n         0.25148289, -0.25262394,  0.38223023,  0.69583225, -0.14543845,\n        -0.2191694 , -0.90790934,  0.26058067],\n       [ 1.81100556,  1.84721163,  0.97425567,  1.91009547, -0.39546327,\n         0.74899782, -0.88198978,  0.38223023,  0.69583225,  0.25739549,\n        -0.2191694 , -0.74210528,  0.26058067],\n       [-1.10374225, -0.222347  ,  1.23561678, -0.04406595,  0.93318745,\n         0.25148289,  1.63547359,  0.38223023, -1.43712799, -0.14543845,\n        -0.2191694 , -0.07888904,  0.26058067],\n       [ 1.21310857, -0.222347  , -1.37799438, -2.38905966, -0.39546327,\n        -0.99230442, -0.88198978,  0.38223023,  0.69583225, -0.14543845,\n        -0.2191694 , -2.56594993,  0.26058067],\n       [ 1.9604798 , -0.222347  ,  0.19017232, -0.43489824, -0.39546327,\n         0.00272543, -0.88198978,  0.38223023,  0.69583225,  0.68775143,\n        -0.2191694 , -0.07888904,  0.26058067],\n       [-0.20689677, -2.29190564, -0.33254991,  1.1284309 , -0.39546327,\n        -1.48981934, -0.88198978,  0.38223023,  0.69583225, -0.14543845,\n        -0.2191694 , -0.07888904,  0.26058067],\n       [-0.95426801, -0.222347  ,  0.19017232, -0.43489824,  0.93318745,\n        -1.48981934, -0.25262394,  0.38223023, -1.43712799, -0.14543845,\n        -0.2191694 , -0.16179107,  0.26058067],\n       [ 0.68994871, -0.222347  ,  0.19017232, -0.43489824, -0.39546327,\n         0.00272543, -0.88198978,  0.38223023,  0.69583225,  0.25739549,\n        -0.2191694 ,  0.58432719,  0.26058067],\n       [ 0.31626309, -0.222347  ,  0.45153343,  1.51926319, -0.39546327,\n        -0.74354695, -0.88198978,  0.38223023,  0.69583225, -0.14543845,\n        -0.2191694 ,  0.75013125,  0.26058067]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "from sklearn.model_selection import train_test_split\nx_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=1)\ny_test.shape",
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 11,
          "data": {
            "text/plain": "(4518,)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "import sklearn\nfrom sklearn.naive_bayes import BernoulliNB\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.naive_bayes import MultinomialNB\nfrom sklearn import metrics\nfrom sklearn.metrics import accuracy_score",
      "execution_count": 12,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "BernNB=BernoulliNB(binarize=True)\nBernNB.fit(x_train,y_train)\nprint(BernNB)\ny_expect=y_test\n\ny_pred=BernNB.predict(x_test)",
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "text": "BernoulliNB(binarize=True)\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y_pred",
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 14,
          "data": {
            "text/plain": "array([' <=50K', ' >50K', ' <=50K', ..., ' <=50K', ' >50K', ' <=50K'],\n      dtype='<U6')"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "accuracy_score(y_expect,y_pred)",
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 15,
          "data": {
            "text/plain": "0.7656042496679947"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "GausNB=GaussianNB()\nGausNB.fit(x_train,y_train)\ny_pred=GausNB.predict(x_test)\naccuracy_score(y_expect,y_pred)",
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 16,
          "data": {
            "text/plain": "0.8012394864984507"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y_pred",
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 17,
          "data": {
            "text/plain": "array([' <=50K', ' >50K', ' <=50K', ..., ' <=50K', ' <=50K', ' <=50K'],\n      dtype='<U6')"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "BernNB=BernoulliNB(binarize=0.1)\nBernNB.fit(x_train,y_train)\nprint(BernNB)\ny_expect=y_test\n\ny_pred=BernNB.predict(x_test)",
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "stream",
          "text": "BernoulliNB(binarize=0.1)\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "accuracy_score(y_expect,y_pred)",
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 19,
          "data": {
            "text/plain": "0.7841965471447543"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df2=pd.read_csv(\"SalaryData_Train.csv\")\ndf2",
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 20,
          "data": {
            "text/plain": "       age          workclass    education  educationno        maritalstatus  \\\n0       39          State-gov    Bachelors           13        Never-married   \n1       50   Self-emp-not-inc    Bachelors           13   Married-civ-spouse   \n2       38            Private      HS-grad            9             Divorced   \n3       53            Private         11th            7   Married-civ-spouse   \n4       28            Private    Bachelors           13   Married-civ-spouse   \n...    ...                ...          ...          ...                  ...   \n30156   27            Private   Assoc-acdm           12   Married-civ-spouse   \n30157   40            Private      HS-grad            9   Married-civ-spouse   \n30158   58            Private      HS-grad            9              Widowed   \n30159   22            Private      HS-grad            9        Never-married   \n30160   52       Self-emp-inc      HS-grad            9   Married-civ-spouse   \n\n               occupation    relationship    race      sex  capitalgain  \\\n0            Adm-clerical   Not-in-family   White     Male         2174   \n1         Exec-managerial         Husband   White     Male            0   \n2       Handlers-cleaners   Not-in-family   White     Male            0   \n3       Handlers-cleaners         Husband   Black     Male            0   \n4          Prof-specialty            Wife   Black   Female            0   \n...                   ...             ...     ...      ...          ...   \n30156        Tech-support            Wife   White   Female            0   \n30157   Machine-op-inspct         Husband   White     Male            0   \n30158        Adm-clerical       Unmarried   White   Female            0   \n30159        Adm-clerical       Own-child   White     Male            0   \n30160     Exec-managerial            Wife   White   Female        15024   \n\n       capitalloss  hoursperweek          native  Salary  \n0                0            40   United-States   <=50K  \n1                0            13   United-States   <=50K  \n2                0            40   United-States   <=50K  \n3                0            40   United-States   <=50K  \n4                0            40            Cuba   <=50K  \n...            ...           ...             ...     ...  \n30156            0            38   United-States   <=50K  \n30157            0            40   United-States    >50K  \n30158            0            40   United-States   <=50K  \n30159            0            20   United-States   <=50K  \n30160            0            40   United-States    >50K  \n\n[30161 rows x 14 columns]",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>age</th>\n      <th>workclass</th>\n      <th>education</th>\n      <th>educationno</th>\n      <th>maritalstatus</th>\n      <th>occupation</th>\n      <th>relationship</th>\n      <th>race</th>\n      <th>sex</th>\n      <th>capitalgain</th>\n      <th>capitalloss</th>\n      <th>hoursperweek</th>\n      <th>native</th>\n      <th>Salary</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>39</td>\n      <td>State-gov</td>\n      <td>Bachelors</td>\n      <td>13</td>\n      <td>Never-married</td>\n      <td>Adm-clerical</td>\n      <td>Not-in-family</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>2174</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>50</td>\n      <td>Self-emp-not-inc</td>\n      <td>Bachelors</td>\n      <td>13</td>\n      <td>Married-civ-spouse</td>\n      <td>Exec-managerial</td>\n      <td>Husband</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>13</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>38</td>\n      <td>Private</td>\n      <td>HS-grad</td>\n      <td>9</td>\n      <td>Divorced</td>\n      <td>Handlers-cleaners</td>\n      <td>Not-in-family</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>53</td>\n      <td>Private</td>\n      <td>11th</td>\n      <td>7</td>\n      <td>Married-civ-spouse</td>\n      <td>Handlers-cleaners</td>\n      <td>Husband</td>\n      <td>Black</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>28</td>\n      <td>Private</td>\n      <td>Bachelors</td>\n      <td>13</td>\n      <td>Married-civ-spouse</td>\n      <td>Prof-specialty</td>\n      <td>Wife</td>\n      <td>Black</td>\n      <td>Female</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>Cuba</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>30156</th>\n      <td>27</td>\n      <td>Private</td>\n      <td>Assoc-acdm</td>\n      <td>12</td>\n      <td>Married-civ-spouse</td>\n      <td>Tech-support</td>\n      <td>Wife</td>\n      <td>White</td>\n      <td>Female</td>\n      <td>0</td>\n      <td>0</td>\n      <td>38</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>30157</th>\n      <td>40</td>\n      <td>Private</td>\n      <td>HS-grad</td>\n      <td>9</td>\n      <td>Married-civ-spouse</td>\n      <td>Machine-op-inspct</td>\n      <td>Husband</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&gt;50K</td>\n    </tr>\n    <tr>\n      <th>30158</th>\n      <td>58</td>\n      <td>Private</td>\n      <td>HS-grad</td>\n      <td>9</td>\n      <td>Widowed</td>\n      <td>Adm-clerical</td>\n      <td>Unmarried</td>\n      <td>White</td>\n      <td>Female</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>30159</th>\n      <td>22</td>\n      <td>Private</td>\n      <td>HS-grad</td>\n      <td>9</td>\n      <td>Never-married</td>\n      <td>Adm-clerical</td>\n      <td>Own-child</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>20</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>30160</th>\n      <td>52</td>\n      <td>Self-emp-inc</td>\n      <td>HS-grad</td>\n      <td>9</td>\n      <td>Married-civ-spouse</td>\n      <td>Exec-managerial</td>\n      <td>Wife</td>\n      <td>White</td>\n      <td>Female</td>\n      <td>15024</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&gt;50K</td>\n    </tr>\n  </tbody>\n</table>\n<p>30161 rows × 14 columns</p>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df2.info()",
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "stream",
          "text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 30161 entries, 0 to 30160\nData columns (total 14 columns):\n #   Column         Non-Null Count  Dtype \n---  ------         --------------  ----- \n 0   age            30161 non-null  int64 \n 1   workclass      30161 non-null  object\n 2   education      30161 non-null  object\n 3   educationno    30161 non-null  int64 \n 4   maritalstatus  30161 non-null  object\n 5   occupation     30161 non-null  object\n 6   relationship   30161 non-null  object\n 7   race           30161 non-null  object\n 8   sex            30161 non-null  object\n 9   capitalgain    30161 non-null  int64 \n 10  capitalloss    30161 non-null  int64 \n 11  hoursperweek   30161 non-null  int64 \n 12  native         30161 non-null  object\n 13  Salary         30161 non-null  object\ndtypes: int64(5), object(9)\nmemory usage: 3.2+ MB\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df2.describe()",
      "execution_count": 22,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 22,
          "data": {
            "text/plain": "                age   educationno   capitalgain   capitalloss  hoursperweek\ncount  30161.000000  30161.000000  30161.000000  30161.000000  30161.000000\nmean      38.438115     10.121316   1092.044064     88.302311     40.931269\nstd       13.134830      2.550037   7406.466611    404.121321     11.980182\nmin       17.000000      1.000000      0.000000      0.000000      1.000000\n25%       28.000000      9.000000      0.000000      0.000000     40.000000\n50%       37.000000     10.000000      0.000000      0.000000     40.000000\n75%       47.000000     13.000000      0.000000      0.000000     45.000000\nmax       90.000000     16.000000  99999.000000   4356.000000     99.000000",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>age</th>\n      <th>educationno</th>\n      <th>capitalgain</th>\n      <th>capitalloss</th>\n      <th>hoursperweek</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>count</th>\n      <td>30161.000000</td>\n      <td>30161.000000</td>\n      <td>30161.000000</td>\n      <td>30161.000000</td>\n      <td>30161.000000</td>\n    </tr>\n    <tr>\n      <th>mean</th>\n      <td>38.438115</td>\n      <td>10.121316</td>\n      <td>1092.044064</td>\n      <td>88.302311</td>\n      <td>40.931269</td>\n    </tr>\n    <tr>\n      <th>std</th>\n      <td>13.134830</td>\n      <td>2.550037</td>\n      <td>7406.466611</td>\n      <td>404.121321</td>\n      <td>11.980182</td>\n    </tr>\n    <tr>\n      <th>min</th>\n      <td>17.000000</td>\n      <td>1.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>1.000000</td>\n    </tr>\n    <tr>\n      <th>25%</th>\n      <td>28.000000</td>\n      <td>9.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>40.000000</td>\n    </tr>\n    <tr>\n      <th>50%</th>\n      <td>37.000000</td>\n      <td>10.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>40.000000</td>\n    </tr>\n    <tr>\n      <th>75%</th>\n      <td>47.000000</td>\n      <td>13.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>45.000000</td>\n    </tr>\n    <tr>\n      <th>max</th>\n      <td>90.000000</td>\n      <td>16.000000</td>\n      <td>99999.000000</td>\n      <td>4356.000000</td>\n      <td>99.000000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df2.shape",
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 23,
          "data": {
            "text/plain": "(30161, 14)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df2.columns",
      "execution_count": 24,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 24,
          "data": {
            "text/plain": "Index(['age', 'workclass', 'education', 'educationno', 'maritalstatus',\n       'occupation', 'relationship', 'race', 'sex', 'capitalgain',\n       'capitalloss', 'hoursperweek', 'native', 'Salary'],\n      dtype='object')"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "from sklearn.preprocessing import LabelEncoder\nle=LabelEncoder()\ndf2['workclass']=le.fit_transform(df2['workclass'])\ndf2['education']=le.fit_transform(df2['education'])\ndf2['maritalstatus']=le.fit_transform(df2['maritalstatus'])\ndf2['occupation']=le.fit_transform(df2['occupation'])\ndf2['relationship']=le.fit_transform(df2['relationship'])\ndf2['race']=le.fit_transform(df2['race'])\ndf2['sex']=le.fit_transform(df2['sex'])\ndf2['native']=le.fit_transform(df2['native'])\ndf2.head()\n",
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 25,
          "data": {
            "text/plain": "   age  workclass  education  educationno  maritalstatus  occupation  \\\n0   39          5          9           13              4           0   \n1   50          4          9           13              2           3   \n2   38          2         11            9              0           5   \n3   53          2          1            7              2           5   \n4   28          2          9           13              2           9   \n\n   relationship  race  sex  capitalgain  capitalloss  hoursperweek  native  \\\n0             1     4    1         2174            0            40      37   \n1             0     4    1            0            0            13      37   \n2             1     4    1            0            0            40      37   \n3             0     2    1            0            0            40      37   \n4             5     2    0            0            0            40       4   \n\n   Salary  \n0   <=50K  \n1   <=50K  \n2   <=50K  \n3   <=50K  \n4   <=50K  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>age</th>\n      <th>workclass</th>\n      <th>education</th>\n      <th>educationno</th>\n      <th>maritalstatus</th>\n      <th>occupation</th>\n      <th>relationship</th>\n      <th>race</th>\n      <th>sex</th>\n      <th>capitalgain</th>\n      <th>capitalloss</th>\n      <th>hoursperweek</th>\n      <th>native</th>\n      <th>Salary</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>39</td>\n      <td>5</td>\n      <td>9</td>\n      <td>13</td>\n      <td>4</td>\n      <td>0</td>\n      <td>1</td>\n      <td>4</td>\n      <td>1</td>\n      <td>2174</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>50</td>\n      <td>4</td>\n      <td>9</td>\n      <td>13</td>\n      <td>2</td>\n      <td>3</td>\n      <td>0</td>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>13</td>\n      <td>37</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>38</td>\n      <td>2</td>\n      <td>11</td>\n      <td>9</td>\n      <td>0</td>\n      <td>5</td>\n      <td>1</td>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>53</td>\n      <td>2</td>\n      <td>1</td>\n      <td>7</td>\n      <td>2</td>\n      <td>5</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>28</td>\n      <td>2</td>\n      <td>9</td>\n      <td>13</td>\n      <td>2</td>\n      <td>9</td>\n      <td>5</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>4</td>\n      <td>&lt;=50K</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x=df2.iloc[:,:13]\ny=df2.iloc[:,-1]",
      "execution_count": 26,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x",
      "execution_count": 27,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 27,
          "data": {
            "text/plain": "       age  workclass  education  educationno  maritalstatus  occupation  \\\n0       39          5          9           13              4           0   \n1       50          4          9           13              2           3   \n2       38          2         11            9              0           5   \n3       53          2          1            7              2           5   \n4       28          2          9           13              2           9   \n...    ...        ...        ...          ...            ...         ...   \n30156   27          2          7           12              2          12   \n30157   40          2         11            9              2           6   \n30158   58          2         11            9              6           0   \n30159   22          2         11            9              4           0   \n30160   52          3         11            9              2           3   \n\n       relationship  race  sex  capitalgain  capitalloss  hoursperweek  native  \n0                 1     4    1         2174            0            40      37  \n1                 0     4    1            0            0            13      37  \n2                 1     4    1            0            0            40      37  \n3                 0     2    1            0            0            40      37  \n4                 5     2    0            0            0            40       4  \n...             ...   ...  ...          ...          ...           ...     ...  \n30156             5     4    0            0            0            38      37  \n30157             0     4    1            0            0            40      37  \n30158             4     4    0            0            0            40      37  \n30159             3     4    1            0            0            20      37  \n30160             5     4    0        15024            0            40      37  \n\n[30161 rows x 13 columns]",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>age</th>\n      <th>workclass</th>\n      <th>education</th>\n      <th>educationno</th>\n      <th>maritalstatus</th>\n      <th>occupation</th>\n      <th>relationship</th>\n      <th>race</th>\n      <th>sex</th>\n      <th>capitalgain</th>\n      <th>capitalloss</th>\n      <th>hoursperweek</th>\n      <th>native</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>39</td>\n      <td>5</td>\n      <td>9</td>\n      <td>13</td>\n      <td>4</td>\n      <td>0</td>\n      <td>1</td>\n      <td>4</td>\n      <td>1</td>\n      <td>2174</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>50</td>\n      <td>4</td>\n      <td>9</td>\n      <td>13</td>\n      <td>2</td>\n      <td>3</td>\n      <td>0</td>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>13</td>\n      <td>37</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>38</td>\n      <td>2</td>\n      <td>11</td>\n      <td>9</td>\n      <td>0</td>\n      <td>5</td>\n      <td>1</td>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>53</td>\n      <td>2</td>\n      <td>1</td>\n      <td>7</td>\n      <td>2</td>\n      <td>5</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>28</td>\n      <td>2</td>\n      <td>9</td>\n      <td>13</td>\n      <td>2</td>\n      <td>9</td>\n      <td>5</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>30156</th>\n      <td>27</td>\n      <td>2</td>\n      <td>7</td>\n      <td>12</td>\n      <td>2</td>\n      <td>12</td>\n      <td>5</td>\n      <td>4</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>38</td>\n      <td>37</td>\n    </tr>\n    <tr>\n      <th>30157</th>\n      <td>40</td>\n      <td>2</td>\n      <td>11</td>\n      <td>9</td>\n      <td>2</td>\n      <td>6</td>\n      <td>0</td>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n    </tr>\n    <tr>\n      <th>30158</th>\n      <td>58</td>\n      <td>2</td>\n      <td>11</td>\n      <td>9</td>\n      <td>6</td>\n      <td>0</td>\n      <td>4</td>\n      <td>4</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n    </tr>\n    <tr>\n      <th>30159</th>\n      <td>22</td>\n      <td>2</td>\n      <td>11</td>\n      <td>9</td>\n      <td>4</td>\n      <td>0</td>\n      <td>3</td>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>20</td>\n      <td>37</td>\n    </tr>\n    <tr>\n      <th>30160</th>\n      <td>52</td>\n      <td>3</td>\n      <td>11</td>\n      <td>9</td>\n      <td>2</td>\n      <td>3</td>\n      <td>5</td>\n      <td>4</td>\n      <td>0</td>\n      <td>15024</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n    </tr>\n  </tbody>\n</table>\n<p>30161 rows × 13 columns</p>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "from sklearn.model_selection import train_test_split\nx_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=1)\ny_test.shape",
      "execution_count": 28,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 28,
          "data": {
            "text/plain": "(9049,)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "import sklearn\nfrom sklearn.naive_bayes import BernoulliNB\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.naive_bayes import MultinomialNB\nfrom sklearn import metrics\nfrom sklearn.metrics import accuracy_score",
      "execution_count": 29,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "BernNB=BernoulliNB(binarize=True)\nBernNB.fit(x_train,y_train)\nprint(BernNB)\ny_expect=y_test\n\ny_pred=BernNB.predict(x_test)",
      "execution_count": 30,
      "outputs": [
        {
          "output_type": "stream",
          "text": "BernoulliNB(binarize=True)\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y_pred",
      "execution_count": 31,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 31,
          "data": {
            "text/plain": "array([' <=50K', ' <=50K', ' <=50K', ..., ' <=50K', ' <=50K', ' <=50K'],\n      dtype='<U6')"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "accuracy_score(y_expect,y_pred)",
      "execution_count": 32,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 32,
          "data": {
            "text/plain": "0.778649574538623"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "GausNB=GaussianNB()\nGausNB.fit(x_train,y_train)\ny_pred=GausNB.predict(x_test)\naccuracy_score(y_expect,y_pred)",
      "execution_count": 33,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 33,
          "data": {
            "text/plain": "0.7898110288429661"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "BernNB=BernoulliNB(binarize=0.1)\nBernNB.fit(x_train,y_train)\nprint(BernNB)\ny_expect=y_test\n\ny_pred=BernNB.predict(x_test)",
      "execution_count": 34,
      "outputs": [
        {
          "output_type": "stream",
          "text": "BernoulliNB(binarize=0.1)\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "accuracy_score(y_expect,y_pred)",
      "execution_count": 35,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 35,
          "data": {
            "text/plain": "0.7232843408111393"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3",
      "language": "python"
    },
    "language_info": {
      "name": "python",
      "version": "3.8.5",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "gist": {
      "id": "",
      "data": {
        "description": "ASSi naive bayes.ipynb",
        "public": true
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}