Prathmeshp20/Assignment17-SVM-SalaryData.ipynb

## Assignment17-SVM-SalaryData.ipynb
{
  "cells": [
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "import pandas as pd\nimport numpy as np\nfrom sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\nfrom sklearn.preprocessing import StandardScaler\n\nfrom sklearn import svm\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.metrics import classification_report\n\n\nfrom sklearn.metrics import accuracy_score, confusion_matrix\nfrom sklearn.model_selection import train_test_split, cross_val_score",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "Salary_train = pd.read_csv(\"C:/Users/Prathmesh/Downloads/SalaryData_Train(1).csv\")\nSalary_train.head()",
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": "   age          workclass   education  educationno        maritalstatus  \\\n0   39          State-gov   Bachelors           13        Never-married   \n1   50   Self-emp-not-inc   Bachelors           13   Married-civ-spouse   \n2   38            Private     HS-grad            9             Divorced   \n3   53            Private        11th            7   Married-civ-spouse   \n4   28            Private   Bachelors           13   Married-civ-spouse   \n\n           occupation    relationship    race      sex  capitalgain  \\\n0        Adm-clerical   Not-in-family   White     Male         2174   \n1     Exec-managerial         Husband   White     Male            0   \n2   Handlers-cleaners   Not-in-family   White     Male            0   \n3   Handlers-cleaners         Husband   Black     Male            0   \n4      Prof-specialty            Wife   Black   Female            0   \n\n   capitalloss  hoursperweek          native  Salary  \n0            0            40   United-States   <=50K  \n1            0            13   United-States   <=50K  \n2            0            40   United-States   <=50K  \n3            0            40   United-States   <=50K  \n4            0            40            Cuba   <=50K  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>age</th>\n      <th>workclass</th>\n      <th>education</th>\n      <th>educationno</th>\n      <th>maritalstatus</th>\n      <th>occupation</th>\n      <th>relationship</th>\n      <th>race</th>\n      <th>sex</th>\n      <th>capitalgain</th>\n      <th>capitalloss</th>\n      <th>hoursperweek</th>\n      <th>native</th>\n      <th>Salary</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>39</td>\n      <td>State-gov</td>\n      <td>Bachelors</td>\n      <td>13</td>\n      <td>Never-married</td>\n      <td>Adm-clerical</td>\n      <td>Not-in-family</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>2174</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>50</td>\n      <td>Self-emp-not-inc</td>\n      <td>Bachelors</td>\n      <td>13</td>\n      <td>Married-civ-spouse</td>\n      <td>Exec-managerial</td>\n      <td>Husband</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>13</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>38</td>\n      <td>Private</td>\n      <td>HS-grad</td>\n      <td>9</td>\n      <td>Divorced</td>\n      <td>Handlers-cleaners</td>\n      <td>Not-in-family</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>53</td>\n      <td>Private</td>\n      <td>11th</td>\n      <td>7</td>\n      <td>Married-civ-spouse</td>\n      <td>Handlers-cleaners</td>\n      <td>Husband</td>\n      <td>Black</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>28</td>\n      <td>Private</td>\n      <td>Bachelors</td>\n      <td>13</td>\n      <td>Married-civ-spouse</td>\n      <td>Prof-specialty</td>\n      <td>Wife</td>\n      <td>Black</td>\n      <td>Female</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>Cuba</td>\n      <td>&lt;=50K</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "Salary_test = pd.read_csv(\"C:/Users/Prathmesh/Downloads/SalaryData_Test(1).csv\")\nSalary_test.head()",
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 3,
          "data": {
            "text/plain": "   age   workclass      education  educationno        maritalstatus  \\\n0   25     Private           11th            7        Never-married   \n1   38     Private        HS-grad            9   Married-civ-spouse   \n2   28   Local-gov     Assoc-acdm           12   Married-civ-spouse   \n3   44     Private   Some-college           10   Married-civ-spouse   \n4   34     Private           10th            6        Never-married   \n\n           occupation    relationship    race    sex  capitalgain  \\\n0   Machine-op-inspct       Own-child   Black   Male            0   \n1     Farming-fishing         Husband   White   Male            0   \n2     Protective-serv         Husband   White   Male            0   \n3   Machine-op-inspct         Husband   Black   Male         7688   \n4       Other-service   Not-in-family   White   Male            0   \n\n   capitalloss  hoursperweek          native  Salary  \n0            0            40   United-States   <=50K  \n1            0            50   United-States   <=50K  \n2            0            40   United-States    >50K  \n3            0            40   United-States    >50K  \n4            0            30   United-States   <=50K  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>age</th>\n      <th>workclass</th>\n      <th>education</th>\n      <th>educationno</th>\n      <th>maritalstatus</th>\n      <th>occupation</th>\n      <th>relationship</th>\n      <th>race</th>\n      <th>sex</th>\n      <th>capitalgain</th>\n      <th>capitalloss</th>\n      <th>hoursperweek</th>\n      <th>native</th>\n      <th>Salary</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>25</td>\n      <td>Private</td>\n      <td>11th</td>\n      <td>7</td>\n      <td>Never-married</td>\n      <td>Machine-op-inspct</td>\n      <td>Own-child</td>\n      <td>Black</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>38</td>\n      <td>Private</td>\n      <td>HS-grad</td>\n      <td>9</td>\n      <td>Married-civ-spouse</td>\n      <td>Farming-fishing</td>\n      <td>Husband</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>50</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>28</td>\n      <td>Local-gov</td>\n      <td>Assoc-acdm</td>\n      <td>12</td>\n      <td>Married-civ-spouse</td>\n      <td>Protective-serv</td>\n      <td>Husband</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&gt;50K</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>44</td>\n      <td>Private</td>\n      <td>Some-college</td>\n      <td>10</td>\n      <td>Married-civ-spouse</td>\n      <td>Machine-op-inspct</td>\n      <td>Husband</td>\n      <td>Black</td>\n      <td>Male</td>\n      <td>7688</td>\n      <td>0</td>\n      <td>40</td>\n      <td>United-States</td>\n      <td>&gt;50K</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>34</td>\n      <td>Private</td>\n      <td>10th</td>\n      <td>6</td>\n      <td>Never-married</td>\n      <td>Other-service</td>\n      <td>Not-in-family</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0</td>\n      <td>0</td>\n      <td>30</td>\n      <td>United-States</td>\n      <td>&lt;=50K</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "string_col=['workclass','education','maritalstatus','occupation','relationship','race','sex','native']\nfrom sklearn import preprocessing\nlabel_encoder=preprocessing.LabelEncoder()\nfor i in string_col:\n    Salary_train[i]=label_encoder.fit_transform(Salary_train[i])\n    Salary_test[i]=label_encoder.fit_transform(Salary_test[i])",
      "execution_count": 4,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "Salary_train.head()",
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 5,
          "data": {
            "text/plain": "   age  workclass  education  educationno  maritalstatus  occupation  \\\n0   39          5          9           13              4           0   \n1   50          4          9           13              2           3   \n2   38          2         11            9              0           5   \n3   53          2          1            7              2           5   \n4   28          2          9           13              2           9   \n\n   relationship  race  sex  capitalgain  capitalloss  hoursperweek  native  \\\n0             1     4    1         2174            0            40      37   \n1             0     4    1            0            0            13      37   \n2             1     4    1            0            0            40      37   \n3             0     2    1            0            0            40      37   \n4             5     2    0            0            0            40       4   \n\n   Salary  \n0   <=50K  \n1   <=50K  \n2   <=50K  \n3   <=50K  \n4   <=50K  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>age</th>\n      <th>workclass</th>\n      <th>education</th>\n      <th>educationno</th>\n      <th>maritalstatus</th>\n      <th>occupation</th>\n      <th>relationship</th>\n      <th>race</th>\n      <th>sex</th>\n      <th>capitalgain</th>\n      <th>capitalloss</th>\n      <th>hoursperweek</th>\n      <th>native</th>\n      <th>Salary</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>39</td>\n      <td>5</td>\n      <td>9</td>\n      <td>13</td>\n      <td>4</td>\n      <td>0</td>\n      <td>1</td>\n      <td>4</td>\n      <td>1</td>\n      <td>2174</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>50</td>\n      <td>4</td>\n      <td>9</td>\n      <td>13</td>\n      <td>2</td>\n      <td>3</td>\n      <td>0</td>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>13</td>\n      <td>37</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>38</td>\n      <td>2</td>\n      <td>11</td>\n      <td>9</td>\n      <td>0</td>\n      <td>5</td>\n      <td>1</td>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>53</td>\n      <td>2</td>\n      <td>1</td>\n      <td>7</td>\n      <td>2</td>\n      <td>5</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>37</td>\n      <td>&lt;=50K</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>28</td>\n      <td>2</td>\n      <td>9</td>\n      <td>13</td>\n      <td>2</td>\n      <td>9</td>\n      <td>5</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>40</td>\n      <td>4</td>\n      <td>&lt;=50K</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x_train = Salary_train.iloc[0:300,0:13]\ny_train = Salary_train.iloc[0:300,13]\nx_test = Salary_test.iloc[0:150,0:13]\ny_test = Salary_test.iloc[0:150,13]",
      "execution_count": 6,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "clf = SVC()\nparam_grid = [{'kernel':['rbf'],'gamma':[50,5,10,0.5],'C':[15,14,13,12,11,10,0.1,0.001] }]\ngsv = GridSearchCV(clf,param_grid,cv=10)\ngsv.fit(x_train,y_train)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "gsv.best_params_ , gsv.best_score_ ",
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 8,
          "data": {
            "text/plain": "({'C': 15, 'gamma': 50, 'kernel': 'rbf'}, 0.7566666666666666)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "clf = SVC(C= 15, gamma = 50)\nclf.fit(x_train , y_train)\ny_pred = clf.predict(x_test)\nacc = accuracy_score(y_test, y_pred) * 100\nprint(\"Accuracy =\", acc)\nconfusion_matrix(y_test, y_pred)",
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "text": "Accuracy = 78.0\n",
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "execution_count": 9,
          "data": {
            "text/plain": "array([[117,   0],\n       [ 33,   0]], dtype=int64)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3",
      "language": "python"
    },
    "language_info": {
      "name": "python",
      "version": "3.8.5",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "gist": {
      "id": "",
      "data": {
        "description": "Assignment17-SVM-SalaryData.ipynb",
        "public": true
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}
	{
	"cells": [
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "import pandas as pd\nimport numpy as np\nfrom sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\nfrom sklearn.preprocessing import StandardScaler\n\nfrom sklearn import svm\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.metrics import classification_report\n\n\nfrom sklearn.metrics import accuracy_score, confusion_matrix\nfrom sklearn.model_selection import train_test_split, cross_val_score",
	"execution_count": 1,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "Salary_train = pd.read_csv(\"C:/Users/Prathmesh/Downloads/SalaryData_Train(1).csv\")\nSalary_train.head()",
	"execution_count": 2,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 2,
	"data": {
	"text/plain": " age workclass education educationno maritalstatus \\\n0 39 State-gov Bachelors 13 Never-married \n1 50 Self-emp-not-inc Bachelors 13 Married-civ-spouse \n2 38 Private HS-grad 9 Divorced \n3 53 Private 11th 7 Married-civ-spouse \n4 28 Private Bachelors 13 Married-civ-spouse \n\n occupation relationship race sex capitalgain \\\n0 Adm-clerical Not-in-family White Male 2174 \n1 Exec-managerial Husband White Male 0 \n2 Handlers-cleaners Not-in-family White Male 0 \n3 Handlers-cleaners Husband Black Male 0 \n4 Prof-specialty Wife Black Female 0 \n\n capitalloss hoursperweek native Salary \n0 0 40 United-States <=50K \n1 0 13 United-States <=50K \n2 0 40 United-States <=50K \n3 0 40 United-States <=50K \n4 0 40 Cuba <=50K ",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>39</td>\n <td>State-gov</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Never-married</td>\n <td>Adm-clerical</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>2174</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>50</td>\n <td>Self-emp-not-inc</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Exec-managerial</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>13</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>38</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Divorced</td>\n <td>Handlers-cleaners</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>53</td>\n <td>Private</td>\n <td>11th</td>\n <td>7</td>\n <td>Married-civ-spouse</td>\n <td>Handlers-cleaners</td>\n <td>Husband</td>\n <td>Black</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>28</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Prof-specialty</td>\n <td>Wife</td>\n <td>Black</td>\n <td>Female</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>Cuba</td>\n <td><=50K</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "Salary_test = pd.read_csv(\"C:/Users/Prathmesh/Downloads/SalaryData_Test(1).csv\")\nSalary_test.head()",
	"execution_count": 3,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 3,
	"data": {
	"text/plain": " age workclass education educationno maritalstatus \\\n0 25 Private 11th 7 Never-married \n1 38 Private HS-grad 9 Married-civ-spouse \n2 28 Local-gov Assoc-acdm 12 Married-civ-spouse \n3 44 Private Some-college 10 Married-civ-spouse \n4 34 Private 10th 6 Never-married \n\n occupation relationship race sex capitalgain \\\n0 Machine-op-inspct Own-child Black Male 0 \n1 Farming-fishing Husband White Male 0 \n2 Protective-serv Husband White Male 0 \n3 Machine-op-inspct Husband Black Male 7688 \n4 Other-service Not-in-family White Male 0 \n\n capitalloss hoursperweek native Salary \n0 0 40 United-States <=50K \n1 0 50 United-States <=50K \n2 0 40 United-States >50K \n3 0 40 United-States >50K \n4 0 30 United-States <=50K ",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>25</td>\n <td>Private</td>\n <td>11th</td>\n <td>7</td>\n <td>Never-married</td>\n <td>Machine-op-inspct</td>\n <td>Own-child</td>\n <td>Black</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>38</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Married-civ-spouse</td>\n <td>Farming-fishing</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>28</td>\n <td>Local-gov</td>\n <td>Assoc-acdm</td>\n <td>12</td>\n <td>Married-civ-spouse</td>\n <td>Protective-serv</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>44</td>\n <td>Private</td>\n <td>Some-college</td>\n <td>10</td>\n <td>Married-civ-spouse</td>\n <td>Machine-op-inspct</td>\n <td>Husband</td>\n <td>Black</td>\n <td>Male</td>\n <td>7688</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>34</td>\n <td>Private</td>\n <td>10th</td>\n <td>6</td>\n <td>Never-married</td>\n <td>Other-service</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>30</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "string_col=['workclass','education','maritalstatus','occupation','relationship','race','sex','native']\nfrom sklearn import preprocessing\nlabel_encoder=preprocessing.LabelEncoder()\nfor i in string_col:\n Salary_train[i]=label_encoder.fit_transform(Salary_train[i])\n Salary_test[i]=label_encoder.fit_transform(Salary_test[i])",
	"execution_count": 4,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "Salary_train.head()",
	"execution_count": 5,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 5,
	"data": {
	"text/plain": " age workclass education educationno maritalstatus occupation \\\n0 39 5 9 13 4 0 \n1 50 4 9 13 2 3 \n2 38 2 11 9 0 5 \n3 53 2 1 7 2 5 \n4 28 2 9 13 2 9 \n\n relationship race sex capitalgain capitalloss hoursperweek native \\\n0 1 4 1 2174 0 40 37 \n1 0 4 1 0 0 13 37 \n2 1 4 1 0 0 40 37 \n3 0 2 1 0 0 40 37 \n4 5 2 0 0 0 40 4 \n\n Salary \n0 <=50K \n1 <=50K \n2 <=50K \n3 <=50K \n4 <=50K ",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>39</td>\n <td>5</td>\n <td>9</td>\n <td>13</td>\n <td>4</td>\n <td>0</td>\n <td>1</td>\n <td>4</td>\n <td>1</td>\n <td>2174</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>50</td>\n <td>4</td>\n <td>9</td>\n <td>13</td>\n <td>2</td>\n <td>3</td>\n <td>0</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>13</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>38</td>\n <td>2</td>\n <td>11</td>\n <td>9</td>\n <td>0</td>\n <td>5</td>\n <td>1</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>53</td>\n <td>2</td>\n <td>1</td>\n <td>7</td>\n <td>2</td>\n <td>5</td>\n <td>0</td>\n <td>2</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>28</td>\n <td>2</td>\n <td>9</td>\n <td>13</td>\n <td>2</td>\n <td>9</td>\n <td>5</td>\n <td>2</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>4</td>\n <td><=50K</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "x_train = Salary_train.iloc[0:300,0:13]\ny_train = Salary_train.iloc[0:300,13]\nx_test = Salary_test.iloc[0:150,0:13]\ny_test = Salary_test.iloc[0:150,13]",
	"execution_count": 6,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "clf = SVC()\nparam_grid = [{'kernel':['rbf'],'gamma':[50,5,10,0.5],'C':[15,14,13,12,11,10,0.1,0.001] }]\ngsv = GridSearchCV(clf,param_grid,cv=10)\ngsv.fit(x_train,y_train)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "gsv.best_params_ , gsv.best_score_ ",
	"execution_count": 8,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 8,
	"data": {
	"text/plain": "({'C': 15, 'gamma': 50, 'kernel': 'rbf'}, 0.7566666666666666)"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "clf = SVC(C= 15, gamma = 50)\nclf.fit(x_train , y_train)\ny_pred = clf.predict(x_test)\nacc = accuracy_score(y_test, y_pred) * 100\nprint(\"Accuracy =\", acc)\nconfusion_matrix(y_test, y_pred)",
	"execution_count": 9,
	"outputs": [
	{
	"output_type": "stream",
	"text": "Accuracy = 78.0\n",
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"execution_count": 9,
	"data": {
	"text/plain": "array([[117, 0],\n [ 33, 0]], dtype=int64)"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3",
	"language": "python"
	},
	"language_info": {
	"name": "python",
	"version": "3.8.5",
	"mimetype": "text/x-python",
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"pygments_lexer": "ipython3",
	"nbconvert_exporter": "python",
	"file_extension": ".py"
	},
	"gist": {
	"id": "",
	"data": {
	"description": "Assignment17-SVM-SalaryData.ipynb",
	"public": true
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}