Skip to content

Instantly share code, notes, and snippets.

@Prathmeshp20
Created October 18, 2021 17:54
Show Gist options
  • Save Prathmeshp20/740b1cd7dcea6fea9c6aaaadce12d7c9 to your computer and use it in GitHub Desktop.
Save Prathmeshp20/740b1cd7dcea6fea9c6aaaadce12d7c9 to your computer and use it in GitHub Desktop.
Assignment17-SVM-SalaryData.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import pandas as pd\nimport numpy as np\nfrom sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\nfrom sklearn.preprocessing import StandardScaler\n\nfrom sklearn import svm\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.metrics import classification_report\n\n\nfrom sklearn.metrics import accuracy_score, confusion_matrix\nfrom sklearn.model_selection import train_test_split, cross_val_score",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "Salary_train = pd.read_csv(\"C:/Users/Prathmesh/Downloads/SalaryData_Train(1).csv\")\nSalary_train.head()",
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 2,
"data": {
"text/plain": " age workclass education educationno maritalstatus \\\n0 39 State-gov Bachelors 13 Never-married \n1 50 Self-emp-not-inc Bachelors 13 Married-civ-spouse \n2 38 Private HS-grad 9 Divorced \n3 53 Private 11th 7 Married-civ-spouse \n4 28 Private Bachelors 13 Married-civ-spouse \n\n occupation relationship race sex capitalgain \\\n0 Adm-clerical Not-in-family White Male 2174 \n1 Exec-managerial Husband White Male 0 \n2 Handlers-cleaners Not-in-family White Male 0 \n3 Handlers-cleaners Husband Black Male 0 \n4 Prof-specialty Wife Black Female 0 \n\n capitalloss hoursperweek native Salary \n0 0 40 United-States <=50K \n1 0 13 United-States <=50K \n2 0 40 United-States <=50K \n3 0 40 United-States <=50K \n4 0 40 Cuba <=50K ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>39</td>\n <td>State-gov</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Never-married</td>\n <td>Adm-clerical</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>2174</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>50</td>\n <td>Self-emp-not-inc</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Exec-managerial</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>13</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>38</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Divorced</td>\n <td>Handlers-cleaners</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>53</td>\n <td>Private</td>\n <td>11th</td>\n <td>7</td>\n <td>Married-civ-spouse</td>\n <td>Handlers-cleaners</td>\n <td>Husband</td>\n <td>Black</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>28</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Prof-specialty</td>\n <td>Wife</td>\n <td>Black</td>\n <td>Female</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>Cuba</td>\n <td>&lt;=50K</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "Salary_test = pd.read_csv(\"C:/Users/Prathmesh/Downloads/SalaryData_Test(1).csv\")\nSalary_test.head()",
"execution_count": 3,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 3,
"data": {
"text/plain": " age workclass education educationno maritalstatus \\\n0 25 Private 11th 7 Never-married \n1 38 Private HS-grad 9 Married-civ-spouse \n2 28 Local-gov Assoc-acdm 12 Married-civ-spouse \n3 44 Private Some-college 10 Married-civ-spouse \n4 34 Private 10th 6 Never-married \n\n occupation relationship race sex capitalgain \\\n0 Machine-op-inspct Own-child Black Male 0 \n1 Farming-fishing Husband White Male 0 \n2 Protective-serv Husband White Male 0 \n3 Machine-op-inspct Husband Black Male 7688 \n4 Other-service Not-in-family White Male 0 \n\n capitalloss hoursperweek native Salary \n0 0 40 United-States <=50K \n1 0 50 United-States <=50K \n2 0 40 United-States >50K \n3 0 40 United-States >50K \n4 0 30 United-States <=50K ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>25</td>\n <td>Private</td>\n <td>11th</td>\n <td>7</td>\n <td>Never-married</td>\n <td>Machine-op-inspct</td>\n <td>Own-child</td>\n <td>Black</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>38</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Married-civ-spouse</td>\n <td>Farming-fishing</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>28</td>\n <td>Local-gov</td>\n <td>Assoc-acdm</td>\n <td>12</td>\n <td>Married-civ-spouse</td>\n <td>Protective-serv</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&gt;50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>44</td>\n <td>Private</td>\n <td>Some-college</td>\n <td>10</td>\n <td>Married-civ-spouse</td>\n <td>Machine-op-inspct</td>\n <td>Husband</td>\n <td>Black</td>\n <td>Male</td>\n <td>7688</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>&gt;50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>34</td>\n <td>Private</td>\n <td>10th</td>\n <td>6</td>\n <td>Never-married</td>\n <td>Other-service</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>30</td>\n <td>United-States</td>\n <td>&lt;=50K</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "string_col=['workclass','education','maritalstatus','occupation','relationship','race','sex','native']\nfrom sklearn import preprocessing\nlabel_encoder=preprocessing.LabelEncoder()\nfor i in string_col:\n Salary_train[i]=label_encoder.fit_transform(Salary_train[i])\n Salary_test[i]=label_encoder.fit_transform(Salary_test[i])",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "Salary_train.head()",
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 5,
"data": {
"text/plain": " age workclass education educationno maritalstatus occupation \\\n0 39 5 9 13 4 0 \n1 50 4 9 13 2 3 \n2 38 2 11 9 0 5 \n3 53 2 1 7 2 5 \n4 28 2 9 13 2 9 \n\n relationship race sex capitalgain capitalloss hoursperweek native \\\n0 1 4 1 2174 0 40 37 \n1 0 4 1 0 0 13 37 \n2 1 4 1 0 0 40 37 \n3 0 2 1 0 0 40 37 \n4 5 2 0 0 0 40 4 \n\n Salary \n0 <=50K \n1 <=50K \n2 <=50K \n3 <=50K \n4 <=50K ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>39</td>\n <td>5</td>\n <td>9</td>\n <td>13</td>\n <td>4</td>\n <td>0</td>\n <td>1</td>\n <td>4</td>\n <td>1</td>\n <td>2174</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>50</td>\n <td>4</td>\n <td>9</td>\n <td>13</td>\n <td>2</td>\n <td>3</td>\n <td>0</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>13</td>\n <td>37</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>38</td>\n <td>2</td>\n <td>11</td>\n <td>9</td>\n <td>0</td>\n <td>5</td>\n <td>1</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>53</td>\n <td>2</td>\n <td>1</td>\n <td>7</td>\n <td>2</td>\n <td>5</td>\n <td>0</td>\n <td>2</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td>&lt;=50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>28</td>\n <td>2</td>\n <td>9</td>\n <td>13</td>\n <td>2</td>\n <td>9</td>\n <td>5</td>\n <td>2</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>4</td>\n <td>&lt;=50K</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "x_train = Salary_train.iloc[0:300,0:13]\ny_train = Salary_train.iloc[0:300,13]\nx_test = Salary_test.iloc[0:150,0:13]\ny_test = Salary_test.iloc[0:150,13]",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "clf = SVC()\nparam_grid = [{'kernel':['rbf'],'gamma':[50,5,10,0.5],'C':[15,14,13,12,11,10,0.1,0.001] }]\ngsv = GridSearchCV(clf,param_grid,cv=10)\ngsv.fit(x_train,y_train)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "gsv.best_params_ , gsv.best_score_ ",
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 8,
"data": {
"text/plain": "({'C': 15, 'gamma': 50, 'kernel': 'rbf'}, 0.7566666666666666)"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "clf = SVC(C= 15, gamma = 50)\nclf.fit(x_train , y_train)\ny_pred = clf.predict(x_test)\nacc = accuracy_score(y_test, y_pred) * 100\nprint(\"Accuracy =\", acc)\nconfusion_matrix(y_test, y_pred)",
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"text": "Accuracy = 78.0\n",
"name": "stdout"
},
{
"output_type": "execute_result",
"execution_count": 9,
"data": {
"text/plain": "array([[117, 0],\n [ 33, 0]], dtype=int64)"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.8.5",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "Assignment17-SVM-SalaryData.ipynb",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment