Created
April 25, 2021 05:52
-
-
Save Aditii7/da0da48c4ceae87f831f20f33a95a657 to your computer and use it in GitHub Desktop.
ASSi naive bayes.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import pandas as pd\nimport numpy as np", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df=pd.read_csv(\"SalaryData_Test.csv\")\ndf", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 2, | |
"data": { | |
"text/plain": " age workclass education educationno maritalstatus \\\n0 25 Private 11th 7 Never-married \n1 38 Private HS-grad 9 Married-civ-spouse \n2 28 Local-gov Assoc-acdm 12 Married-civ-spouse \n3 44 Private Some-college 10 Married-civ-spouse \n4 34 Private 10th 6 Never-married \n... ... ... ... ... ... \n15055 33 Private Bachelors 13 Never-married \n15056 39 Private Bachelors 13 Divorced \n15057 38 Private Bachelors 13 Married-civ-spouse \n15058 44 Private Bachelors 13 Divorced \n15059 35 Self-emp-inc Bachelors 13 Married-civ-spouse \n\n occupation relationship race sex \\\n0 Machine-op-inspct Own-child Black Male \n1 Farming-fishing Husband White Male \n2 Protective-serv Husband White Male \n3 Machine-op-inspct Husband Black Male \n4 Other-service Not-in-family White Male \n... ... ... ... ... \n15055 Prof-specialty Own-child White Male \n15056 Prof-specialty Not-in-family White Female \n15057 Prof-specialty Husband White Male \n15058 Adm-clerical Own-child Asian-Pac-Islander Male \n15059 Exec-managerial Husband White Male \n\n capitalgain capitalloss hoursperweek native Salary \n0 0 0 40 United-States <=50K \n1 0 0 50 United-States <=50K \n2 0 0 40 United-States >50K \n3 7688 0 40 United-States >50K \n4 0 0 30 United-States <=50K \n... ... ... ... ... ... \n15055 0 0 40 United-States <=50K \n15056 0 0 36 United-States <=50K \n15057 0 0 50 United-States <=50K \n15058 5455 0 40 United-States <=50K \n15059 0 0 60 United-States >50K \n\n[15060 rows x 14 columns]", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>25</td>\n <td>Private</td>\n <td>11th</td>\n <td>7</td>\n <td>Never-married</td>\n <td>Machine-op-inspct</td>\n <td>Own-child</td>\n <td>Black</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>38</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Married-civ-spouse</td>\n <td>Farming-fishing</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>28</td>\n <td>Local-gov</td>\n <td>Assoc-acdm</td>\n <td>12</td>\n <td>Married-civ-spouse</td>\n <td>Protective-serv</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>44</td>\n <td>Private</td>\n <td>Some-college</td>\n <td>10</td>\n <td>Married-civ-spouse</td>\n <td>Machine-op-inspct</td>\n <td>Husband</td>\n <td>Black</td>\n <td>Male</td>\n <td>7688</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>34</td>\n <td>Private</td>\n <td>10th</td>\n <td>6</td>\n <td>Never-married</td>\n <td>Other-service</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>30</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>15055</th>\n <td>33</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Never-married</td>\n <td>Prof-specialty</td>\n <td>Own-child</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>15056</th>\n <td>39</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Divorced</td>\n <td>Prof-specialty</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Female</td>\n <td>0</td>\n <td>0</td>\n <td>36</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>15057</th>\n <td>38</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Prof-specialty</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>15058</th>\n <td>44</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Divorced</td>\n <td>Adm-clerical</td>\n <td>Own-child</td>\n <td>Asian-Pac-Islander</td>\n <td>Male</td>\n <td>5455</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>15059</th>\n <td>35</td>\n <td>Self-emp-inc</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Exec-managerial</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>60</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n </tbody>\n</table>\n<p>15060 rows × 14 columns</p>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df.info()", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 15060 entries, 0 to 15059\nData columns (total 14 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 age 15060 non-null int64 \n 1 workclass 15060 non-null object\n 2 education 15060 non-null object\n 3 educationno 15060 non-null int64 \n 4 maritalstatus 15060 non-null object\n 5 occupation 15060 non-null object\n 6 relationship 15060 non-null object\n 7 race 15060 non-null object\n 8 sex 15060 non-null object\n 9 capitalgain 15060 non-null int64 \n 10 capitalloss 15060 non-null int64 \n 11 hoursperweek 15060 non-null int64 \n 12 native 15060 non-null object\n 13 Salary 15060 non-null object\ndtypes: int64(5), object(9)\nmemory usage: 1.6+ MB\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df.describe()", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 4, | |
"data": { | |
"text/plain": " age educationno capitalgain capitalloss hoursperweek\ncount 15060.000000 15060.000000 15060.000000 15060.000000 15060.000000\nmean 38.768327 10.112749 1120.301594 89.041899 40.951594\nstd 13.380676 2.558727 7703.181842 406.283245 12.062831\nmin 17.000000 1.000000 0.000000 0.000000 1.000000\n25% 28.000000 9.000000 0.000000 0.000000 40.000000\n50% 37.000000 10.000000 0.000000 0.000000 40.000000\n75% 48.000000 13.000000 0.000000 0.000000 45.000000\nmax 90.000000 16.000000 99999.000000 3770.000000 99.000000", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>educationno</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>count</th>\n <td>15060.000000</td>\n <td>15060.000000</td>\n <td>15060.000000</td>\n <td>15060.000000</td>\n <td>15060.000000</td>\n </tr>\n <tr>\n <th>mean</th>\n <td>38.768327</td>\n <td>10.112749</td>\n <td>1120.301594</td>\n <td>89.041899</td>\n <td>40.951594</td>\n </tr>\n <tr>\n <th>std</th>\n <td>13.380676</td>\n <td>2.558727</td>\n <td>7703.181842</td>\n <td>406.283245</td>\n <td>12.062831</td>\n </tr>\n <tr>\n <th>min</th>\n <td>17.000000</td>\n <td>1.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>1.000000</td>\n </tr>\n <tr>\n <th>25%</th>\n <td>28.000000</td>\n <td>9.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>40.000000</td>\n </tr>\n <tr>\n <th>50%</th>\n <td>37.000000</td>\n <td>10.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>40.000000</td>\n </tr>\n <tr>\n <th>75%</th>\n <td>48.000000</td>\n <td>13.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>45.000000</td>\n </tr>\n <tr>\n <th>max</th>\n <td>90.000000</td>\n <td>16.000000</td>\n <td>99999.000000</td>\n <td>3770.000000</td>\n <td>99.000000</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df.shape", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 5, | |
"data": { | |
"text/plain": "(15060, 14)" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df.isna().sum()", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 6, | |
"data": { | |
"text/plain": "age 0\nworkclass 0\neducation 0\neducationno 0\nmaritalstatus 0\noccupation 0\nrelationship 0\nrace 0\nsex 0\ncapitalgain 0\ncapitalloss 0\nhoursperweek 0\nnative 0\nSalary 0\ndtype: int64" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df.columns", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 7, | |
"data": { | |
"text/plain": "Index(['age', 'workclass', 'education', 'educationno', 'maritalstatus',\n 'occupation', 'relationship', 'race', 'sex', 'capitalgain',\n 'capitalloss', 'hoursperweek', 'native', 'Salary'],\n dtype='object')" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from sklearn.preprocessing import LabelEncoder\nle=LabelEncoder()\ndf['workclass']=le.fit_transform(df['workclass'])\ndf['education']=le.fit_transform(df['education'])\ndf['maritalstatus']=le.fit_transform(df['maritalstatus'])\ndf['occupation']=le.fit_transform(df['occupation'])\ndf['relationship']=le.fit_transform(df['relationship'])\ndf['race']=le.fit_transform(df['race'])\ndf['sex']=le.fit_transform(df['sex'])\ndf['native']=le.fit_transform(df['native'])\ndf.head()\n", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 8, | |
"data": { | |
"text/plain": " age workclass education educationno maritalstatus occupation \\\n0 25 2 1 7 4 6 \n1 38 2 11 9 2 4 \n2 28 1 7 12 2 10 \n3 44 2 15 10 2 6 \n4 34 2 0 6 4 7 \n\n relationship race sex capitalgain capitalloss hoursperweek native \\\n0 3 2 1 0 0 40 37 \n1 0 4 1 0 0 50 37 \n2 0 4 1 0 0 40 37 \n3 0 2 1 7688 0 40 37 \n4 1 4 1 0 0 30 37 \n\n Salary \n0 <=50K \n1 <=50K \n2 >50K \n3 >50K \n4 <=50K ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>25</td>\n <td>2</td>\n <td>1</td>\n <td>7</td>\n <td>4</td>\n <td>6</td>\n <td>3</td>\n <td>2</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>38</td>\n <td>2</td>\n <td>11</td>\n <td>9</td>\n <td>2</td>\n <td>4</td>\n <td>0</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>28</td>\n <td>1</td>\n <td>7</td>\n <td>12</td>\n <td>2</td>\n <td>10</td>\n <td>0</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td>>50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>44</td>\n <td>2</td>\n <td>15</td>\n <td>10</td>\n <td>2</td>\n <td>6</td>\n <td>0</td>\n <td>2</td>\n <td>1</td>\n <td>7688</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td>>50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>34</td>\n <td>2</td>\n <td>0</td>\n <td>6</td>\n <td>4</td>\n <td>7</td>\n <td>1</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>30</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "x=df.iloc[:,:13]\ny=df.iloc[:,-1]", | |
"execution_count": 9, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from sklearn import preprocessing\n#normalize data\nx=preprocessing.StandardScaler().fit_transform(x)\nx[0:13]", | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 10, | |
"data": { | |
"text/plain": "array([[-1.02900513, -0.222347 , -2.42343884, -1.2165628 , 0.93318745,\n 0.00272543, 1.00610775, -2.03438913, 0.69583225, -0.14543845,\n -0.2191694 , -0.07888904, 0.26058067],\n [-0.05742253, -0.222347 , 0.19017232, -0.43489824, -0.39546327,\n -0.49478949, -0.88198978, 0.38223023, 0.69583225, -0.14543845,\n -0.2191694 , 0.75013125, 0.26058067],\n [-0.80479376, -1.25712632, -0.85527215, 0.73759862, -0.39546327,\n 0.99775528, -0.88198978, 0.38223023, 0.69583225, -0.14543845,\n -0.2191694 , -0.07888904, 0.26058067],\n [ 0.39100021, -0.222347 , 1.23561678, -0.04406595, -0.39546327,\n 0.00272543, -0.88198978, -2.03438913, 0.69583225, 0.85262384,\n -0.2191694 , -0.07888904, 0.26058067],\n [-0.35637102, -0.222347 , -2.68479996, -1.60739509, 0.93318745,\n 0.25148289, -0.25262394, 0.38223023, 0.69583225, -0.14543845,\n -0.2191694 , -0.90790934, 0.26058067],\n [ 1.81100556, 1.84721163, 0.97425567, 1.91009547, -0.39546327,\n 0.74899782, -0.88198978, 0.38223023, 0.69583225, 0.25739549,\n -0.2191694 , -0.74210528, 0.26058067],\n [-1.10374225, -0.222347 , 1.23561678, -0.04406595, 0.93318745,\n 0.25148289, 1.63547359, 0.38223023, -1.43712799, -0.14543845,\n -0.2191694 , -0.07888904, 0.26058067],\n [ 1.21310857, -0.222347 , -1.37799438, -2.38905966, -0.39546327,\n -0.99230442, -0.88198978, 0.38223023, 0.69583225, -0.14543845,\n -0.2191694 , -2.56594993, 0.26058067],\n [ 1.9604798 , -0.222347 , 0.19017232, -0.43489824, -0.39546327,\n 0.00272543, -0.88198978, 0.38223023, 0.69583225, 0.68775143,\n -0.2191694 , -0.07888904, 0.26058067],\n [-0.20689677, -2.29190564, -0.33254991, 1.1284309 , -0.39546327,\n -1.48981934, -0.88198978, 0.38223023, 0.69583225, -0.14543845,\n -0.2191694 , -0.07888904, 0.26058067],\n [-0.95426801, -0.222347 , 0.19017232, -0.43489824, 0.93318745,\n -1.48981934, -0.25262394, 0.38223023, -1.43712799, -0.14543845,\n -0.2191694 , -0.16179107, 0.26058067],\n [ 0.68994871, -0.222347 , 0.19017232, -0.43489824, -0.39546327,\n 0.00272543, -0.88198978, 0.38223023, 0.69583225, 0.25739549,\n -0.2191694 , 0.58432719, 0.26058067],\n [ 0.31626309, -0.222347 , 0.45153343, 1.51926319, -0.39546327,\n -0.74354695, -0.88198978, 0.38223023, 0.69583225, -0.14543845,\n -0.2191694 , 0.75013125, 0.26058067]])" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from sklearn.model_selection import train_test_split\nx_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=1)\ny_test.shape", | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 11, | |
"data": { | |
"text/plain": "(4518,)" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import sklearn\nfrom sklearn.naive_bayes import BernoulliNB\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.naive_bayes import MultinomialNB\nfrom sklearn import metrics\nfrom sklearn.metrics import accuracy_score", | |
"execution_count": 12, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "BernNB=BernoulliNB(binarize=True)\nBernNB.fit(x_train,y_train)\nprint(BernNB)\ny_expect=y_test\n\ny_pred=BernNB.predict(x_test)", | |
"execution_count": 13, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "BernoulliNB(binarize=True)\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "y_pred", | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 14, | |
"data": { | |
"text/plain": "array([' <=50K', ' >50K', ' <=50K', ..., ' <=50K', ' >50K', ' <=50K'],\n dtype='<U6')" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "accuracy_score(y_expect,y_pred)", | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 15, | |
"data": { | |
"text/plain": "0.7656042496679947" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "GausNB=GaussianNB()\nGausNB.fit(x_train,y_train)\ny_pred=GausNB.predict(x_test)\naccuracy_score(y_expect,y_pred)", | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 16, | |
"data": { | |
"text/plain": "0.8012394864984507" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "y_pred", | |
"execution_count": 17, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 17, | |
"data": { | |
"text/plain": "array([' <=50K', ' >50K', ' <=50K', ..., ' <=50K', ' <=50K', ' <=50K'],\n dtype='<U6')" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "BernNB=BernoulliNB(binarize=0.1)\nBernNB.fit(x_train,y_train)\nprint(BernNB)\ny_expect=y_test\n\ny_pred=BernNB.predict(x_test)", | |
"execution_count": 18, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "BernoulliNB(binarize=0.1)\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "accuracy_score(y_expect,y_pred)", | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 19, | |
"data": { | |
"text/plain": "0.7841965471447543" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df2=pd.read_csv(\"SalaryData_Train.csv\")\ndf2", | |
"execution_count": 20, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 20, | |
"data": { | |
"text/plain": " age workclass education educationno maritalstatus \\\n0 39 State-gov Bachelors 13 Never-married \n1 50 Self-emp-not-inc Bachelors 13 Married-civ-spouse \n2 38 Private HS-grad 9 Divorced \n3 53 Private 11th 7 Married-civ-spouse \n4 28 Private Bachelors 13 Married-civ-spouse \n... ... ... ... ... ... \n30156 27 Private Assoc-acdm 12 Married-civ-spouse \n30157 40 Private HS-grad 9 Married-civ-spouse \n30158 58 Private HS-grad 9 Widowed \n30159 22 Private HS-grad 9 Never-married \n30160 52 Self-emp-inc HS-grad 9 Married-civ-spouse \n\n occupation relationship race sex capitalgain \\\n0 Adm-clerical Not-in-family White Male 2174 \n1 Exec-managerial Husband White Male 0 \n2 Handlers-cleaners Not-in-family White Male 0 \n3 Handlers-cleaners Husband Black Male 0 \n4 Prof-specialty Wife Black Female 0 \n... ... ... ... ... ... \n30156 Tech-support Wife White Female 0 \n30157 Machine-op-inspct Husband White Male 0 \n30158 Adm-clerical Unmarried White Female 0 \n30159 Adm-clerical Own-child White Male 0 \n30160 Exec-managerial Wife White Female 15024 \n\n capitalloss hoursperweek native Salary \n0 0 40 United-States <=50K \n1 0 13 United-States <=50K \n2 0 40 United-States <=50K \n3 0 40 United-States <=50K \n4 0 40 Cuba <=50K \n... ... ... ... ... \n30156 0 38 United-States <=50K \n30157 0 40 United-States >50K \n30158 0 40 United-States <=50K \n30159 0 20 United-States <=50K \n30160 0 40 United-States >50K \n\n[30161 rows x 14 columns]", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>39</td>\n <td>State-gov</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Never-married</td>\n <td>Adm-clerical</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>2174</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>50</td>\n <td>Self-emp-not-inc</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Exec-managerial</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>13</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>38</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Divorced</td>\n <td>Handlers-cleaners</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>53</td>\n <td>Private</td>\n <td>11th</td>\n <td>7</td>\n <td>Married-civ-spouse</td>\n <td>Handlers-cleaners</td>\n <td>Husband</td>\n <td>Black</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>28</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Prof-specialty</td>\n <td>Wife</td>\n <td>Black</td>\n <td>Female</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>Cuba</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>30156</th>\n <td>27</td>\n <td>Private</td>\n <td>Assoc-acdm</td>\n <td>12</td>\n <td>Married-civ-spouse</td>\n <td>Tech-support</td>\n <td>Wife</td>\n <td>White</td>\n <td>Female</td>\n <td>0</td>\n <td>0</td>\n <td>38</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>30157</th>\n <td>40</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Married-civ-spouse</td>\n <td>Machine-op-inspct</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n <tr>\n <th>30158</th>\n <td>58</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Widowed</td>\n <td>Adm-clerical</td>\n <td>Unmarried</td>\n <td>White</td>\n <td>Female</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>30159</th>\n <td>22</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Never-married</td>\n <td>Adm-clerical</td>\n <td>Own-child</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>20</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>30160</th>\n <td>52</td>\n <td>Self-emp-inc</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Married-civ-spouse</td>\n <td>Exec-managerial</td>\n <td>Wife</td>\n <td>White</td>\n <td>Female</td>\n <td>15024</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n </tbody>\n</table>\n<p>30161 rows × 14 columns</p>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df2.info()", | |
"execution_count": 21, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 30161 entries, 0 to 30160\nData columns (total 14 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 age 30161 non-null int64 \n 1 workclass 30161 non-null object\n 2 education 30161 non-null object\n 3 educationno 30161 non-null int64 \n 4 maritalstatus 30161 non-null object\n 5 occupation 30161 non-null object\n 6 relationship 30161 non-null object\n 7 race 30161 non-null object\n 8 sex 30161 non-null object\n 9 capitalgain 30161 non-null int64 \n 10 capitalloss 30161 non-null int64 \n 11 hoursperweek 30161 non-null int64 \n 12 native 30161 non-null object\n 13 Salary 30161 non-null object\ndtypes: int64(5), object(9)\nmemory usage: 3.2+ MB\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df2.describe()", | |
"execution_count": 22, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 22, | |
"data": { | |
"text/plain": " age educationno capitalgain capitalloss hoursperweek\ncount 30161.000000 30161.000000 30161.000000 30161.000000 30161.000000\nmean 38.438115 10.121316 1092.044064 88.302311 40.931269\nstd 13.134830 2.550037 7406.466611 404.121321 11.980182\nmin 17.000000 1.000000 0.000000 0.000000 1.000000\n25% 28.000000 9.000000 0.000000 0.000000 40.000000\n50% 37.000000 10.000000 0.000000 0.000000 40.000000\n75% 47.000000 13.000000 0.000000 0.000000 45.000000\nmax 90.000000 16.000000 99999.000000 4356.000000 99.000000", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>educationno</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>count</th>\n <td>30161.000000</td>\n <td>30161.000000</td>\n <td>30161.000000</td>\n <td>30161.000000</td>\n <td>30161.000000</td>\n </tr>\n <tr>\n <th>mean</th>\n <td>38.438115</td>\n <td>10.121316</td>\n <td>1092.044064</td>\n <td>88.302311</td>\n <td>40.931269</td>\n </tr>\n <tr>\n <th>std</th>\n <td>13.134830</td>\n <td>2.550037</td>\n <td>7406.466611</td>\n <td>404.121321</td>\n <td>11.980182</td>\n </tr>\n <tr>\n <th>min</th>\n <td>17.000000</td>\n <td>1.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>1.000000</td>\n </tr>\n <tr>\n <th>25%</th>\n <td>28.000000</td>\n <td>9.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>40.000000</td>\n </tr>\n <tr>\n <th>50%</th>\n <td>37.000000</td>\n <td>10.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>40.000000</td>\n </tr>\n <tr>\n <th>75%</th>\n <td>47.000000</td>\n <td>13.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>45.000000</td>\n </tr>\n <tr>\n <th>max</th>\n <td>90.000000</td>\n <td>16.000000</td>\n <td>99999.000000</td>\n <td>4356.000000</td>\n <td>99.000000</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df2.shape", | |
"execution_count": 23, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 23, | |
"data": { | |
"text/plain": "(30161, 14)" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df2.columns", | |
"execution_count": 24, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 24, | |
"data": { | |
"text/plain": "Index(['age', 'workclass', 'education', 'educationno', 'maritalstatus',\n 'occupation', 'relationship', 'race', 'sex', 'capitalgain',\n 'capitalloss', 'hoursperweek', 'native', 'Salary'],\n dtype='object')" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from sklearn.preprocessing import LabelEncoder\nle=LabelEncoder()\ndf2['workclass']=le.fit_transform(df2['workclass'])\ndf2['education']=le.fit_transform(df2['education'])\ndf2['maritalstatus']=le.fit_transform(df2['maritalstatus'])\ndf2['occupation']=le.fit_transform(df2['occupation'])\ndf2['relationship']=le.fit_transform(df2['relationship'])\ndf2['race']=le.fit_transform(df2['race'])\ndf2['sex']=le.fit_transform(df2['sex'])\ndf2['native']=le.fit_transform(df2['native'])\ndf2.head()\n", | |
"execution_count": 25, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 25, | |
"data": { | |
"text/plain": " age workclass education educationno maritalstatus occupation \\\n0 39 5 9 13 4 0 \n1 50 4 9 13 2 3 \n2 38 2 11 9 0 5 \n3 53 2 1 7 2 5 \n4 28 2 9 13 2 9 \n\n relationship race sex capitalgain capitalloss hoursperweek native \\\n0 1 4 1 2174 0 40 37 \n1 0 4 1 0 0 13 37 \n2 1 4 1 0 0 40 37 \n3 0 2 1 0 0 40 37 \n4 5 2 0 0 0 40 4 \n\n Salary \n0 <=50K \n1 <=50K \n2 <=50K \n3 <=50K \n4 <=50K ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>39</td>\n <td>5</td>\n <td>9</td>\n <td>13</td>\n <td>4</td>\n <td>0</td>\n <td>1</td>\n <td>4</td>\n <td>1</td>\n <td>2174</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>50</td>\n <td>4</td>\n <td>9</td>\n <td>13</td>\n <td>2</td>\n <td>3</td>\n <td>0</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>13</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>38</td>\n <td>2</td>\n <td>11</td>\n <td>9</td>\n <td>0</td>\n <td>5</td>\n <td>1</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>53</td>\n <td>2</td>\n <td>1</td>\n <td>7</td>\n <td>2</td>\n <td>5</td>\n <td>0</td>\n <td>2</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>28</td>\n <td>2</td>\n <td>9</td>\n <td>13</td>\n <td>2</td>\n <td>9</td>\n <td>5</td>\n <td>2</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>4</td>\n <td><=50K</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "x=df2.iloc[:,:13]\ny=df2.iloc[:,-1]", | |
"execution_count": 26, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "x", | |
"execution_count": 27, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 27, | |
"data": { | |
"text/plain": " age workclass education educationno maritalstatus occupation \\\n0 39 5 9 13 4 0 \n1 50 4 9 13 2 3 \n2 38 2 11 9 0 5 \n3 53 2 1 7 2 5 \n4 28 2 9 13 2 9 \n... ... ... ... ... ... ... \n30156 27 2 7 12 2 12 \n30157 40 2 11 9 2 6 \n30158 58 2 11 9 6 0 \n30159 22 2 11 9 4 0 \n30160 52 3 11 9 2 3 \n\n relationship race sex capitalgain capitalloss hoursperweek native \n0 1 4 1 2174 0 40 37 \n1 0 4 1 0 0 13 37 \n2 1 4 1 0 0 40 37 \n3 0 2 1 0 0 40 37 \n4 5 2 0 0 0 40 4 \n... ... ... ... ... ... ... ... \n30156 5 4 0 0 0 38 37 \n30157 0 4 1 0 0 40 37 \n30158 4 4 0 0 0 40 37 \n30159 3 4 1 0 0 20 37 \n30160 5 4 0 15024 0 40 37 \n\n[30161 rows x 13 columns]", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>39</td>\n <td>5</td>\n <td>9</td>\n <td>13</td>\n <td>4</td>\n <td>0</td>\n <td>1</td>\n <td>4</td>\n <td>1</td>\n <td>2174</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n </tr>\n <tr>\n <th>1</th>\n <td>50</td>\n <td>4</td>\n <td>9</td>\n <td>13</td>\n <td>2</td>\n <td>3</td>\n <td>0</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>13</td>\n <td>37</td>\n </tr>\n <tr>\n <th>2</th>\n <td>38</td>\n <td>2</td>\n <td>11</td>\n <td>9</td>\n <td>0</td>\n <td>5</td>\n <td>1</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n </tr>\n <tr>\n <th>3</th>\n <td>53</td>\n <td>2</td>\n <td>1</td>\n <td>7</td>\n <td>2</td>\n <td>5</td>\n <td>0</td>\n <td>2</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n </tr>\n <tr>\n <th>4</th>\n <td>28</td>\n <td>2</td>\n <td>9</td>\n <td>13</td>\n <td>2</td>\n <td>9</td>\n <td>5</td>\n <td>2</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>4</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>30156</th>\n <td>27</td>\n <td>2</td>\n <td>7</td>\n <td>12</td>\n <td>2</td>\n <td>12</td>\n <td>5</td>\n <td>4</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>38</td>\n <td>37</td>\n </tr>\n <tr>\n <th>30157</th>\n <td>40</td>\n <td>2</td>\n <td>11</td>\n <td>9</td>\n <td>2</td>\n <td>6</td>\n <td>0</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n </tr>\n <tr>\n <th>30158</th>\n <td>58</td>\n <td>2</td>\n <td>11</td>\n <td>9</td>\n <td>6</td>\n <td>0</td>\n <td>4</td>\n <td>4</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n </tr>\n <tr>\n <th>30159</th>\n <td>22</td>\n <td>2</td>\n <td>11</td>\n <td>9</td>\n <td>4</td>\n <td>0</td>\n <td>3</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>20</td>\n <td>37</td>\n </tr>\n <tr>\n <th>30160</th>\n <td>52</td>\n <td>3</td>\n <td>11</td>\n <td>9</td>\n <td>2</td>\n <td>3</td>\n <td>5</td>\n <td>4</td>\n <td>0</td>\n <td>15024</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n </tr>\n </tbody>\n</table>\n<p>30161 rows × 13 columns</p>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from sklearn.model_selection import train_test_split\nx_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=1)\ny_test.shape", | |
"execution_count": 28, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 28, | |
"data": { | |
"text/plain": "(9049,)" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import sklearn\nfrom sklearn.naive_bayes import BernoulliNB\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.naive_bayes import MultinomialNB\nfrom sklearn import metrics\nfrom sklearn.metrics import accuracy_score", | |
"execution_count": 29, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "BernNB=BernoulliNB(binarize=True)\nBernNB.fit(x_train,y_train)\nprint(BernNB)\ny_expect=y_test\n\ny_pred=BernNB.predict(x_test)", | |
"execution_count": 30, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "BernoulliNB(binarize=True)\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "y_pred", | |
"execution_count": 31, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 31, | |
"data": { | |
"text/plain": "array([' <=50K', ' <=50K', ' <=50K', ..., ' <=50K', ' <=50K', ' <=50K'],\n dtype='<U6')" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "accuracy_score(y_expect,y_pred)", | |
"execution_count": 32, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 32, | |
"data": { | |
"text/plain": "0.778649574538623" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "GausNB=GaussianNB()\nGausNB.fit(x_train,y_train)\ny_pred=GausNB.predict(x_test)\naccuracy_score(y_expect,y_pred)", | |
"execution_count": 33, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 33, | |
"data": { | |
"text/plain": "0.7898110288429661" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "BernNB=BernoulliNB(binarize=0.1)\nBernNB.fit(x_train,y_train)\nprint(BernNB)\ny_expect=y_test\n\ny_pred=BernNB.predict(x_test)", | |
"execution_count": 34, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "BernoulliNB(binarize=0.1)\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "accuracy_score(y_expect,y_pred)", | |
"execution_count": 35, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 35, | |
"data": { | |
"text/plain": "0.7232843408111393" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.8.5", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "ASSi naive bayes.ipynb", | |
"public": true | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment