Created
April 6, 2021 14:24
-
-
Save Aditii7/5af26e3198b06c1e8c7b8df89f494a34 to your computer and use it in GitHub Desktop.
SVM (salary data).ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# 1) Prepare a classification model using SVM for salary data \n\nimport pandas as pd\nimport numpy as np", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_test=pd.read_csv(\"SalaryData_Test(1).csv\")", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_test", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>25</td>\n <td>Private</td>\n <td>11th</td>\n <td>7</td>\n <td>Never-married</td>\n <td>Machine-op-inspct</td>\n <td>Own-child</td>\n <td>Black</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>38</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Married-civ-spouse</td>\n <td>Farming-fishing</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>28</td>\n <td>Local-gov</td>\n <td>Assoc-acdm</td>\n <td>12</td>\n <td>Married-civ-spouse</td>\n <td>Protective-serv</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>44</td>\n <td>Private</td>\n <td>Some-college</td>\n <td>10</td>\n <td>Married-civ-spouse</td>\n <td>Machine-op-inspct</td>\n <td>Husband</td>\n <td>Black</td>\n <td>Male</td>\n <td>7688</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>34</td>\n <td>Private</td>\n <td>10th</td>\n <td>6</td>\n <td>Never-married</td>\n <td>Other-service</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>30</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>15055</th>\n <td>33</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Never-married</td>\n <td>Prof-specialty</td>\n <td>Own-child</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>15056</th>\n <td>39</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Divorced</td>\n <td>Prof-specialty</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Female</td>\n <td>0</td>\n <td>0</td>\n <td>36</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>15057</th>\n <td>38</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Prof-specialty</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>15058</th>\n <td>44</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Divorced</td>\n <td>Adm-clerical</td>\n <td>Own-child</td>\n <td>Asian-Pac-Islander</td>\n <td>Male</td>\n <td>5455</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>15059</th>\n <td>35</td>\n <td>Self-emp-inc</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Exec-managerial</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>60</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n </tbody>\n</table>\n<p>15060 rows × 14 columns</p>\n</div>", | |
"text/plain": " age workclass education educationno maritalstatus \\\n0 25 Private 11th 7 Never-married \n1 38 Private HS-grad 9 Married-civ-spouse \n2 28 Local-gov Assoc-acdm 12 Married-civ-spouse \n3 44 Private Some-college 10 Married-civ-spouse \n4 34 Private 10th 6 Never-married \n... ... ... ... ... ... \n15055 33 Private Bachelors 13 Never-married \n15056 39 Private Bachelors 13 Divorced \n15057 38 Private Bachelors 13 Married-civ-spouse \n15058 44 Private Bachelors 13 Divorced \n15059 35 Self-emp-inc Bachelors 13 Married-civ-spouse \n\n occupation relationship race sex \\\n0 Machine-op-inspct Own-child Black Male \n1 Farming-fishing Husband White Male \n2 Protective-serv Husband White Male \n3 Machine-op-inspct Husband Black Male \n4 Other-service Not-in-family White Male \n... ... ... ... ... \n15055 Prof-specialty Own-child White Male \n15056 Prof-specialty Not-in-family White Female \n15057 Prof-specialty Husband White Male \n15058 Adm-clerical Own-child Asian-Pac-Islander Male \n15059 Exec-managerial Husband White Male \n\n capitalgain capitalloss hoursperweek native Salary \n0 0 0 40 United-States <=50K \n1 0 0 50 United-States <=50K \n2 0 0 40 United-States >50K \n3 7688 0 40 United-States >50K \n4 0 0 30 United-States <=50K \n... ... ... ... ... ... \n15055 0 0 40 United-States <=50K \n15056 0 0 36 United-States <=50K \n15057 0 0 50 United-States <=50K \n15058 5455 0 40 United-States <=50K \n15059 0 0 60 United-States >50K \n\n[15060 rows x 14 columns]" | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_train=pd.read_csv(\"SalaryData_Test(1).csv\")\ndf_train", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>25</td>\n <td>Private</td>\n <td>11th</td>\n <td>7</td>\n <td>Never-married</td>\n <td>Machine-op-inspct</td>\n <td>Own-child</td>\n <td>Black</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>38</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Married-civ-spouse</td>\n <td>Farming-fishing</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>28</td>\n <td>Local-gov</td>\n <td>Assoc-acdm</td>\n <td>12</td>\n <td>Married-civ-spouse</td>\n <td>Protective-serv</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>44</td>\n <td>Private</td>\n <td>Some-college</td>\n <td>10</td>\n <td>Married-civ-spouse</td>\n <td>Machine-op-inspct</td>\n <td>Husband</td>\n <td>Black</td>\n <td>Male</td>\n <td>7688</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>34</td>\n <td>Private</td>\n <td>10th</td>\n <td>6</td>\n <td>Never-married</td>\n <td>Other-service</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>30</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>15055</th>\n <td>33</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Never-married</td>\n <td>Prof-specialty</td>\n <td>Own-child</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>15056</th>\n <td>39</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Divorced</td>\n <td>Prof-specialty</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Female</td>\n <td>0</td>\n <td>0</td>\n <td>36</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>15057</th>\n <td>38</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Prof-specialty</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>15058</th>\n <td>44</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Divorced</td>\n <td>Adm-clerical</td>\n <td>Own-child</td>\n <td>Asian-Pac-Islander</td>\n <td>Male</td>\n <td>5455</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>15059</th>\n <td>35</td>\n <td>Self-emp-inc</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Exec-managerial</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>60</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n </tbody>\n</table>\n<p>15060 rows × 14 columns</p>\n</div>", | |
"text/plain": " age workclass education educationno maritalstatus \\\n0 25 Private 11th 7 Never-married \n1 38 Private HS-grad 9 Married-civ-spouse \n2 28 Local-gov Assoc-acdm 12 Married-civ-spouse \n3 44 Private Some-college 10 Married-civ-spouse \n4 34 Private 10th 6 Never-married \n... ... ... ... ... ... \n15055 33 Private Bachelors 13 Never-married \n15056 39 Private Bachelors 13 Divorced \n15057 38 Private Bachelors 13 Married-civ-spouse \n15058 44 Private Bachelors 13 Divorced \n15059 35 Self-emp-inc Bachelors 13 Married-civ-spouse \n\n occupation relationship race sex \\\n0 Machine-op-inspct Own-child Black Male \n1 Farming-fishing Husband White Male \n2 Protective-serv Husband White Male \n3 Machine-op-inspct Husband Black Male \n4 Other-service Not-in-family White Male \n... ... ... ... ... \n15055 Prof-specialty Own-child White Male \n15056 Prof-specialty Not-in-family White Female \n15057 Prof-specialty Husband White Male \n15058 Adm-clerical Own-child Asian-Pac-Islander Male \n15059 Exec-managerial Husband White Male \n\n capitalgain capitalloss hoursperweek native Salary \n0 0 0 40 United-States <=50K \n1 0 0 50 United-States <=50K \n2 0 0 40 United-States >50K \n3 7688 0 40 United-States >50K \n4 0 0 30 United-States <=50K \n... ... ... ... ... ... \n15055 0 0 40 United-States <=50K \n15056 0 0 36 United-States <=50K \n15057 0 0 50 United-States <=50K \n15058 5455 0 40 United-States <=50K \n15059 0 0 60 United-States >50K \n\n[15060 rows x 14 columns]" | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from sklearn.preprocessing import LabelEncoder\nlb=LabelEncoder()", | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_test['Salary']=lb.fit_transform(df_test['Salary'])\ndf_train['Salary']=lb.fit_transform(df_train['Salary'])", | |
"execution_count": 6, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df1_test=pd.get_dummies(df_test)\ndf1_train=pd.get_dummies(df_train)", | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "x_train=df1_train.drop('Salary',axis=1)\ny_train=df1_train['Salary']\nx_test=df1_test.drop('Salary',axis=1)\ny_test=df1_test['Salary']", | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "y_train", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "0 0\n1 0\n2 1\n3 1\n4 0\n ..\n15055 0\n15056 0\n15057 0\n15058 0\n15059 1\nName: Salary, Length: 15060, dtype: int32" | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from sklearn.svm import SVC", | |
"execution_count": 10, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "model=SVC()\nmodel.fit(x_train,y_train)", | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "SVC()" | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "pred=model.predict(x_test)", | |
"execution_count": 12, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "pred", | |
"execution_count": 13, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "array([0, 0, 0, ..., 0, 0, 0])" | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from sklearn.metrics import classification_report,confusion_matrix", | |
"execution_count": 14, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "print(confusion_matrix(pred,y_test))\nprint(classification_report(pred,y_test))", | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "[[10997 2703]\n [ 363 997]]\n precision recall f1-score support\n\n 0 0.97 0.80 0.88 13700\n 1 0.27 0.73 0.39 1360\n\n accuracy 0.80 15060\n macro avg 0.62 0.77 0.64 15060\nweighted avg 0.90 0.80 0.83 15060\n\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "np.mean(pred==y_test)*100", | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "79.64143426294821" | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.8.5", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "SVM (salary data).ipynb", | |
"public": true | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment