Skip to content

Instantly share code, notes, and snippets.

@Aditii7
Created April 6, 2021 14:24
Show Gist options
  • Save Aditii7/5af26e3198b06c1e8c7b8df89f494a34 to your computer and use it in GitHub Desktop.
Save Aditii7/5af26e3198b06c1e8c7b8df89f494a34 to your computer and use it in GitHub Desktop.
SVM (salary data).ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# 1) Prepare a classification model using SVM for salary data \n\nimport pandas as pd\nimport numpy as np",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df_test=pd.read_csv(\"SalaryData_Test(1).csv\")",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df_test",
"execution_count": 3,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>25</td>\n <td>Private</td>\n <td>11th</td>\n <td>7</td>\n <td>Never-married</td>\n <td>Machine-op-inspct</td>\n <td>Own-child</td>\n <td>Black</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>38</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Married-civ-spouse</td>\n <td>Farming-fishing</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>28</td>\n <td>Local-gov</td>\n <td>Assoc-acdm</td>\n <td>12</td>\n <td>Married-civ-spouse</td>\n <td>Protective-serv</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&gt;50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>44</td>\n <td>Private</td>\n <td>Some-college</td>\n <td>10</td>\n <td>Married-civ-spouse</td>\n <td>Machine-op-inspct</td>\n <td>Husband</td>\n <td>Black</td>\n <td>Male</td>\n <td>7688</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&gt;50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>34</td>\n <td>Private</td>\n <td>10th</td>\n <td>6</td>\n <td>Never-married</td>\n <td>Other-service</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>30</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>15055</th>\n <td>33</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Never-married</td>\n <td>Prof-specialty</td>\n <td>Own-child</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>15056</th>\n <td>39</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Divorced</td>\n <td>Prof-specialty</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Female</td>\n <td>0</td>\n <td>0</td>\n <td>36</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>15057</th>\n <td>38</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Prof-specialty</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>15058</th>\n <td>44</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Divorced</td>\n <td>Adm-clerical</td>\n <td>Own-child</td>\n <td>Asian-Pac-Islander</td>\n <td>Male</td>\n <td>5455</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>15059</th>\n <td>35</td>\n <td>Self-emp-inc</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Exec-managerial</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>60</td>\n <td>United-States</td>\n <td>&gt;50K</td>\n </tr>\n </tbody>\n</table>\n<p>15060 rows × 14 columns</p>\n</div>",
"text/plain": " age workclass education educationno maritalstatus \\\n0 25 Private 11th 7 Never-married \n1 38 Private HS-grad 9 Married-civ-spouse \n2 28 Local-gov Assoc-acdm 12 Married-civ-spouse \n3 44 Private Some-college 10 Married-civ-spouse \n4 34 Private 10th 6 Never-married \n... ... ... ... ... ... \n15055 33 Private Bachelors 13 Never-married \n15056 39 Private Bachelors 13 Divorced \n15057 38 Private Bachelors 13 Married-civ-spouse \n15058 44 Private Bachelors 13 Divorced \n15059 35 Self-emp-inc Bachelors 13 Married-civ-spouse \n\n occupation relationship race sex \\\n0 Machine-op-inspct Own-child Black Male \n1 Farming-fishing Husband White Male \n2 Protective-serv Husband White Male \n3 Machine-op-inspct Husband Black Male \n4 Other-service Not-in-family White Male \n... ... ... ... ... \n15055 Prof-specialty Own-child White Male \n15056 Prof-specialty Not-in-family White Female \n15057 Prof-specialty Husband White Male \n15058 Adm-clerical Own-child Asian-Pac-Islander Male \n15059 Exec-managerial Husband White Male \n\n capitalgain capitalloss hoursperweek native Salary \n0 0 0 40 United-States <=50K \n1 0 0 50 United-States <=50K \n2 0 0 40 United-States >50K \n3 7688 0 40 United-States >50K \n4 0 0 30 United-States <=50K \n... ... ... ... ... ... \n15055 0 0 40 United-States <=50K \n15056 0 0 36 United-States <=50K \n15057 0 0 50 United-States <=50K \n15058 5455 0 40 United-States <=50K \n15059 0 0 60 United-States >50K \n\n[15060 rows x 14 columns]"
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df_train=pd.read_csv(\"SalaryData_Test(1).csv\")\ndf_train",
"execution_count": 4,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>25</td>\n <td>Private</td>\n <td>11th</td>\n <td>7</td>\n <td>Never-married</td>\n <td>Machine-op-inspct</td>\n <td>Own-child</td>\n <td>Black</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>38</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Married-civ-spouse</td>\n <td>Farming-fishing</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>28</td>\n <td>Local-gov</td>\n <td>Assoc-acdm</td>\n <td>12</td>\n <td>Married-civ-spouse</td>\n <td>Protective-serv</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&gt;50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>44</td>\n <td>Private</td>\n <td>Some-college</td>\n <td>10</td>\n <td>Married-civ-spouse</td>\n <td>Machine-op-inspct</td>\n <td>Husband</td>\n <td>Black</td>\n <td>Male</td>\n <td>7688</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&gt;50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>34</td>\n <td>Private</td>\n <td>10th</td>\n <td>6</td>\n <td>Never-married</td>\n <td>Other-service</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>30</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>15055</th>\n <td>33</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Never-married</td>\n <td>Prof-specialty</td>\n <td>Own-child</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>15056</th>\n <td>39</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Divorced</td>\n <td>Prof-specialty</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Female</td>\n <td>0</td>\n <td>0</td>\n <td>36</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>15057</th>\n <td>38</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Prof-specialty</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>15058</th>\n <td>44</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Divorced</td>\n <td>Adm-clerical</td>\n <td>Own-child</td>\n <td>Asian-Pac-Islander</td>\n <td>Male</td>\n <td>5455</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>15059</th>\n <td>35</td>\n <td>Self-emp-inc</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Exec-managerial</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>60</td>\n <td>United-States</td>\n <td>&gt;50K</td>\n </tr>\n </tbody>\n</table>\n<p>15060 rows × 14 columns</p>\n</div>",
"text/plain": " age workclass education educationno maritalstatus \\\n0 25 Private 11th 7 Never-married \n1 38 Private HS-grad 9 Married-civ-spouse \n2 28 Local-gov Assoc-acdm 12 Married-civ-spouse \n3 44 Private Some-college 10 Married-civ-spouse \n4 34 Private 10th 6 Never-married \n... ... ... ... ... ... \n15055 33 Private Bachelors 13 Never-married \n15056 39 Private Bachelors 13 Divorced \n15057 38 Private Bachelors 13 Married-civ-spouse \n15058 44 Private Bachelors 13 Divorced \n15059 35 Self-emp-inc Bachelors 13 Married-civ-spouse \n\n occupation relationship race sex \\\n0 Machine-op-inspct Own-child Black Male \n1 Farming-fishing Husband White Male \n2 Protective-serv Husband White Male \n3 Machine-op-inspct Husband Black Male \n4 Other-service Not-in-family White Male \n... ... ... ... ... \n15055 Prof-specialty Own-child White Male \n15056 Prof-specialty Not-in-family White Female \n15057 Prof-specialty Husband White Male \n15058 Adm-clerical Own-child Asian-Pac-Islander Male \n15059 Exec-managerial Husband White Male \n\n capitalgain capitalloss hoursperweek native Salary \n0 0 0 40 United-States <=50K \n1 0 0 50 United-States <=50K \n2 0 0 40 United-States >50K \n3 7688 0 40 United-States >50K \n4 0 0 30 United-States <=50K \n... ... ... ... ... ... \n15055 0 0 40 United-States <=50K \n15056 0 0 36 United-States <=50K \n15057 0 0 50 United-States <=50K \n15058 5455 0 40 United-States <=50K \n15059 0 0 60 United-States >50K \n\n[15060 rows x 14 columns]"
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "from sklearn.preprocessing import LabelEncoder\nlb=LabelEncoder()",
"execution_count": 5,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df_test['Salary']=lb.fit_transform(df_test['Salary'])\ndf_train['Salary']=lb.fit_transform(df_train['Salary'])",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df1_test=pd.get_dummies(df_test)\ndf1_train=pd.get_dummies(df_train)",
"execution_count": 7,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "x_train=df1_train.drop('Salary',axis=1)\ny_train=df1_train['Salary']\nx_test=df1_test.drop('Salary',axis=1)\ny_test=df1_test['Salary']",
"execution_count": 8,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "y_train",
"execution_count": 9,
"outputs": [
{
"data": {
"text/plain": "0 0\n1 0\n2 1\n3 1\n4 0\n ..\n15055 0\n15056 0\n15057 0\n15058 0\n15059 1\nName: Salary, Length: 15060, dtype: int32"
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "from sklearn.svm import SVC",
"execution_count": 10,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "model=SVC()\nmodel.fit(x_train,y_train)",
"execution_count": 11,
"outputs": [
{
"data": {
"text/plain": "SVC()"
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "pred=model.predict(x_test)",
"execution_count": 12,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "pred",
"execution_count": 13,
"outputs": [
{
"data": {
"text/plain": "array([0, 0, 0, ..., 0, 0, 0])"
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "from sklearn.metrics import classification_report,confusion_matrix",
"execution_count": 14,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "print(confusion_matrix(pred,y_test))\nprint(classification_report(pred,y_test))",
"execution_count": 15,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "[[10997 2703]\n [ 363 997]]\n precision recall f1-score support\n\n 0 0.97 0.80 0.88 13700\n 1 0.27 0.73 0.39 1360\n\n accuracy 0.80 15060\n macro avg 0.62 0.77 0.64 15060\nweighted avg 0.90 0.80 0.83 15060\n\n"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "np.mean(pred==y_test)*100",
"execution_count": 16,
"outputs": [
{
"data": {
"text/plain": "79.64143426294821"
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.8.5",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "SVM (salary data).ipynb",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment