akhildaphara/nlp_test.ipynb

## nlp_test.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "NLP_test.ipynb",
      "provenance": [],
      "toc_visible": true,
      "mount_file_id": "1SKwoeAgvJx-alJaS1HG3JqJ0i6vGjZYp",
      "authorship_tag": "ABX9TyOaIJylIUt5sYqjE1dnrbvA",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/akhildaphara/9c0e8fd50def1b6eee6f3eb977584417/nlp_test.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "viU2VDxie3z6"
      },
      "source": [
        "import pandas as pd"
      ],
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1Bal3HEqfUE3",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 419
        },
        "outputId": "f328b6ad-5f75-4044-84be-028e508eb3d5"
      },
      "source": [
        "dataset = pd.read_csv(\"/content/drive/My Drive/Codes/NLP/Restaurant_Reviews.tsv\", delimiter='\\t')\n",
        "dataset"
      ],
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Review</th>\n",
              "      <th>Liked</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Wow... Loved this place.</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Crust is not good.</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Not tasty and the texture was just nasty.</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Stopped by during the late May bank holiday of...</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>The selection on the menu was great and so wer...</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>995</th>\n",
              "      <td>I think food should have flavor and texture an...</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>996</th>\n",
              "      <td>Appetite instantly gone.</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>997</th>\n",
              "      <td>Overall I was not impressed and would not go b...</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>998</th>\n",
              "      <td>The whole experience was underwhelming, and I ...</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>999</th>\n",
              "      <td>Then, as if I hadn't wasted enough of my life ...</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>1000 rows × 2 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "                                                Review  Liked\n",
              "0                             Wow... Loved this place.      1\n",
              "1                                   Crust is not good.      0\n",
              "2            Not tasty and the texture was just nasty.      0\n",
              "3    Stopped by during the late May bank holiday of...      1\n",
              "4    The selection on the menu was great and so wer...      1\n",
              "..                                                 ...    ...\n",
              "995  I think food should have flavor and texture an...      0\n",
              "996                           Appetite instantly gone.      0\n",
              "997  Overall I was not impressed and would not go b...      0\n",
              "998  The whole experience was underwhelming, and I ...      0\n",
              "999  Then, as if I hadn't wasted enough of my life ...      0\n",
              "\n",
              "[1000 rows x 2 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 2
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qb6W3rYihJSP",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "9969bc71-cdfc-4748-a255-dff5b4f8260e"
      },
      "source": [
        "import re\n",
        "import nltk\n",
        "nltk.download('stopwords')\n",
        "from nltk.corpus import stopwords\n",
        "from nltk.stem.porter import PorterStemmer\n",
        "\n",
        "corpus = []\n",
        "for i in range(0,1000):\n",
        "  review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])\n",
        "  review = review.lower()\n",
        "  review = review.split()\n",
        "  ps = PorterStemmer()\n",
        "  review = [ps.stem(word) for word in review if not word in set(stopwords.words('english')) or word=='not' ]\n",
        "  review = ' '.join(review)\n",
        "  corpus.append(review)"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
            "[nltk_data]   Unzipping corpora/stopwords.zip.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Ncp_nWMBjBMu"
      },
      "source": [
        "from sklearn.feature_extraction.text import CountVectorizer\n",
        "cv = CountVectorizer(max_features = 1500)\n",
        "X = cv.fit_transform(corpus).toarray()\n",
        "y = dataset.iloc[:, 1].values"
      ],
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "GTw-AMqkjFqG"
      },
      "source": [
        "from sklearn.model_selection import train_test_split\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)"
      ],
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "XX9KmF_OsJ1u"
      },
      "source": [
        "Random Forest"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XuJH1p0FrWO7",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "5abe0419-3ba6-447c-bc32-8a3fe3d6d3da"
      },
      "source": [
        "from sklearn.ensemble import RandomForestClassifier\n",
        "classifier = RandomForestClassifier(n_estimators=1000)\n",
        "classifier.fit(X_train, y_train)\n",
        "y_pred = classifier.predict(X_test)\n",
        "classifier.score(X_test, y_test)"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.765"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 6
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "FasIwD4NsIIi"
      },
      "source": [
        "SVC"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "204f6yKokJx4"
      },
      "source": [
        "from sklearn.svm import SVC\n",
        "classifier = SVC(C = 2, kernel = 'linear', random_state = 0)\n",
        "classifier.fit(X_train, y_train)\n",
        "y_pred = classifier.predict(X_test)"
      ],
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "fVKgW8bBh2XR"
      },
      "source": [
        "Prediction"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "sDDhC3Jyh42N"
      },
      "source": [
        "def predict(new_review):    \n",
        "    new_review = re.sub(\"[^a-zA-Z]\", \" \", new_review)    \n",
        "    new_review = new_review.lower().split()\n",
        "    new_review = [ps.stem(word) for word in new_review if word not in set(stopwords.words(\"english\")) or word=='not']    \n",
        "    new_review = \" \".join(new_review)    \n",
        "    new_review = [new_review]    \n",
        "    new_review = cv.transform(new_review).toarray()    \n",
        "    if classifier.predict(new_review)[0] == 1:\n",
        "        return \"Positive\"    \n",
        "    else:        \n",
        "        return \"Negative\""
      ],
      "execution_count": 8,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "MEVjVt2Xh_ju",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "9818d1fe-4e0c-48a8-eb59-1326c777bc0c"
      },
      "source": [
        "predict(\"Not Good food\")"
      ],
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            },
            "text/plain": [
              "'Negative'"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "52LWNdgkctrY"
      },
      "source": [
        "End of Code\n",
        "---\n",
        "\n",
        "\n",
        "Model Evaluation"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "IWeoae8KkbLd",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e95a23f5-fcf9-4475-ed2b-5efe46d61925"
      },
      "source": [
        "from sklearn.metrics import confusion_matrix\n",
        "cm = confusion_matrix(y_test, y_pred)\n",
        "print(cm)\n",
        "print(\"Accuracy= \"+str((cm[0][0]+cm[1][1])/200))"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[[78 19]\n",
            " [23 80]]\n",
            "Accuracy= 0.79\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "bNcJVPMxm58k",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "034e4e75-47f9-40c9-fd2f-8fc7d637250d"
      },
      "source": [
        "classifier.score(X_test, y_test)"
      ],
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.79"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 11
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "FoK7ue1Kqh7G",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "5d4e57ec-fa61-4a5c-827f-b6d4110737b1"
      },
      "source": [
        "classifier.score(X_train, y_train)"
      ],
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.9875"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 12
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "MjWvXrO4pXqO",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "14b612c7-b554-40e0-e3ec-c9be0a7b5e5c"
      },
      "source": [
        "from sklearn.model_selection import cross_val_score\n",
        "accuracy = cross_val_score(estimator = classifier, X= X_train, y = y_train, cv= 10)\n",
        "print(accuracy.mean())\n",
        "accuracy.std()"
      ],
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "0.8\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.04330127018922194"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 13
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TStqeQCZqCqq",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "39e4066a-fc7e-4c59-9011-9715bf7f1d14"
      },
      "source": [
        "from sklearn.model_selection import GridSearchCV\n",
        "parameters = [{'C' : [1, 2, 2.5, 3, 4], 'kernel' : ['linear']}]\n",
        "              \n",
        "grid_search = GridSearchCV(estimator= classifier, param_grid=parameters, scoring= 'accuracy', cv = 10, n_jobs = -1)\n",
        "grid_search = grid_search.fit(X_train, y_train)\n",
        "best_accuracy = grid_search.best_score_\n",
        "print(best_accuracy)"
      ],
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "0.8\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HZ_lnDy1vy8-",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e168df7d-d14c-404f-8803-0a34d1eeb6ae"
      },
      "source": [
        "best_param = grid_search.best_params_\n",
        "print(best_param)"
      ],
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "{'C': 2, 'kernel': 'linear'}\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "0VlKsg6c1XXy",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "912454d0-21fa-4078-f1cc-a75caa7bb2bf"
      },
      "source": [
        "from sklearn.metrics import classification_report\n",
        "print(classification_report(y_test, y_pred))"
      ],
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.77      0.80      0.79        97\n",
            "           1       0.81      0.78      0.79       103\n",
            "\n",
            "    accuracy                           0.79       200\n",
            "   macro avg       0.79      0.79      0.79       200\n",
            "weighted avg       0.79      0.79      0.79       200\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "NLP_test.ipynb",
	"provenance": [],
	"toc_visible": true,
	"mount_file_id": "1SKwoeAgvJx-alJaS1HG3JqJ0i6vGjZYp",
	"authorship_tag": "ABX9TyOaIJylIUt5sYqjE1dnrbvA",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/akhildaphara/9c0e8fd50def1b6eee6f3eb977584417/nlp_test.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "viU2VDxie3z6"
	},
	"source": [
	"import pandas as pd"
	],
	"execution_count": 1,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "1Bal3HEqfUE3",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 419
	},
	"outputId": "f328b6ad-5f75-4044-84be-028e508eb3d5"
	},
	"source": [
	"dataset = pd.read_csv(\"/content/drive/My Drive/Codes/NLP/Restaurant_Reviews.tsv\", delimiter='\\t')\n",
	"dataset"
	],
	"execution_count": 2,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>Review</th>\n",
	" <th>Liked</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>Wow... Loved this place.</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>Crust is not good.</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>Not tasty and the texture was just nasty.</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>Stopped by during the late May bank holiday of...</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>The selection on the menu was great and so wer...</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>...</th>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>995</th>\n",
	" <td>I think food should have flavor and texture an...</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>996</th>\n",
	" <td>Appetite instantly gone.</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>997</th>\n",
	" <td>Overall I was not impressed and would not go b...</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>998</th>\n",
	" <td>The whole experience was underwhelming, and I ...</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>999</th>\n",
	" <td>Then, as if I hadn't wasted enough of my life ...</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"<p>1000 rows × 2 columns</p>\n",
	"</div>"
	],
	"text/plain": [
	" Review Liked\n",
	"0 Wow... Loved this place. 1\n",
	"1 Crust is not good. 0\n",
	"2 Not tasty and the texture was just nasty. 0\n",
	"3 Stopped by during the late May bank holiday of... 1\n",
	"4 The selection on the menu was great and so wer... 1\n",
	".. ... ...\n",
	"995 I think food should have flavor and texture an... 0\n",
	"996 Appetite instantly gone. 0\n",
	"997 Overall I was not impressed and would not go b... 0\n",
	"998 The whole experience was underwhelming, and I ... 0\n",
	"999 Then, as if I hadn't wasted enough of my life ... 0\n",
	"\n",
	"[1000 rows x 2 columns]"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 2
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "qb6W3rYihJSP",
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"outputId": "9969bc71-cdfc-4748-a255-dff5b4f8260e"
	},
	"source": [
	"import re\n",
	"import nltk\n",
	"nltk.download('stopwords')\n",
	"from nltk.corpus import stopwords\n",
	"from nltk.stem.porter import PorterStemmer\n",
	"\n",
	"corpus = []\n",
	"for i in range(0,1000):\n",
	" review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])\n",
	" review = review.lower()\n",
	" review = review.split()\n",
	" ps = PorterStemmer()\n",
	" review = [ps.stem(word) for word in review if not word in set(stopwords.words('english')) or word=='not' ]\n",
	" review = ' '.join(review)\n",
	" corpus.append(review)"
	],
	"execution_count": 3,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
	"[nltk_data] Unzipping corpora/stopwords.zip.\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Ncp_nWMBjBMu"
	},
	"source": [
	"from sklearn.feature_extraction.text import CountVectorizer\n",
	"cv = CountVectorizer(max_features = 1500)\n",
	"X = cv.fit_transform(corpus).toarray()\n",
	"y = dataset.iloc[:, 1].values"
	],
	"execution_count": 4,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "GTw-AMqkjFqG"
	},
	"source": [
	"from sklearn.model_selection import train_test_split\n",
	"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)"
	],
	"execution_count": 5,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "XX9KmF_OsJ1u"
	},
	"source": [
	"Random Forest"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "XuJH1p0FrWO7",
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"outputId": "5abe0419-3ba6-447c-bc32-8a3fe3d6d3da"
	},
	"source": [
	"from sklearn.ensemble import RandomForestClassifier\n",
	"classifier = RandomForestClassifier(n_estimators=1000)\n",
	"classifier.fit(X_train, y_train)\n",
	"y_pred = classifier.predict(X_test)\n",
	"classifier.score(X_test, y_test)"
	],
	"execution_count": 6,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"0.765"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 6
	}
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "FasIwD4NsIIi"
	},
	"source": [
	"SVC"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "204f6yKokJx4"
	},
	"source": [
	"from sklearn.svm import SVC\n",
	"classifier = SVC(C = 2, kernel = 'linear', random_state = 0)\n",
	"classifier.fit(X_train, y_train)\n",
	"y_pred = classifier.predict(X_test)"
	],
	"execution_count": 7,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "fVKgW8bBh2XR"
	},
	"source": [
	"Prediction"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "sDDhC3Jyh42N"
	},
	"source": [
	"def predict(new_review): \n",
	" new_review = re.sub(\"[^a-zA-Z]\", \" \", new_review) \n",
	" new_review = new_review.lower().split()\n",
	" new_review = [ps.stem(word) for word in new_review if word not in set(stopwords.words(\"english\")) or word=='not'] \n",
	" new_review = \" \".join(new_review) \n",
	" new_review = [new_review] \n",
	" new_review = cv.transform(new_review).toarray() \n",
	" if classifier.predict(new_review)[0] == 1:\n",
	" return \"Positive\" \n",
	" else: \n",
	" return \"Negative\""
	],
	"execution_count": 8,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "MEVjVt2Xh_ju",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 35
	},
	"outputId": "9818d1fe-4e0c-48a8-eb59-1326c777bc0c"
	},
	"source": [
	"predict(\"Not Good food\")"
	],
	"execution_count": 9,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"application/vnd.google.colaboratory.intrinsic+json": {
	"type": "string"
	},
	"text/plain": [
	"'Negative'"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 9
	}
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "52LWNdgkctrY"
	},
	"source": [
	"End of Code\n",
	"---\n",
	"\n",
	"\n",
	"Model Evaluation"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "IWeoae8KkbLd",
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"outputId": "e95a23f5-fcf9-4475-ed2b-5efe46d61925"
	},
	"source": [
	"from sklearn.metrics import confusion_matrix\n",
	"cm = confusion_matrix(y_test, y_pred)\n",
	"print(cm)\n",
	"print(\"Accuracy= \"+str((cm[0][0]+cm[1][1])/200))"
	],
	"execution_count": 10,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"[[78 19]\n",
	" [23 80]]\n",
	"Accuracy= 0.79\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "bNcJVPMxm58k",
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"outputId": "034e4e75-47f9-40c9-fd2f-8fc7d637250d"
	},
	"source": [
	"classifier.score(X_test, y_test)"
	],
	"execution_count": 11,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"0.79"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 11
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "FoK7ue1Kqh7G",
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"outputId": "5d4e57ec-fa61-4a5c-827f-b6d4110737b1"
	},
	"source": [
	"classifier.score(X_train, y_train)"
	],
	"execution_count": 12,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"0.9875"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 12
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "MjWvXrO4pXqO",
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"outputId": "14b612c7-b554-40e0-e3ec-c9be0a7b5e5c"
	},
	"source": [
	"from sklearn.model_selection import cross_val_score\n",
	"accuracy = cross_val_score(estimator = classifier, X= X_train, y = y_train, cv= 10)\n",
	"print(accuracy.mean())\n",
	"accuracy.std()"
	],
	"execution_count": 13,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"0.8\n"
	],
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"0.04330127018922194"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 13
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "TStqeQCZqCqq",
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"outputId": "39e4066a-fc7e-4c59-9011-9715bf7f1d14"
	},
	"source": [
	"from sklearn.model_selection import GridSearchCV\n",
	"parameters = [{'C' : [1, 2, 2.5, 3, 4], 'kernel' : ['linear']}]\n",
	" \n",
	"grid_search = GridSearchCV(estimator= classifier, param_grid=parameters, scoring= 'accuracy', cv = 10, n_jobs = -1)\n",
	"grid_search = grid_search.fit(X_train, y_train)\n",
	"best_accuracy = grid_search.best_score_\n",
	"print(best_accuracy)"
	],
	"execution_count": 14,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"0.8\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "HZ_lnDy1vy8-",
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"outputId": "e168df7d-d14c-404f-8803-0a34d1eeb6ae"
	},
	"source": [
	"best_param = grid_search.best_params_\n",
	"print(best_param)"
	],
	"execution_count": 15,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"{'C': 2, 'kernel': 'linear'}\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "0VlKsg6c1XXy",
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"outputId": "912454d0-21fa-4078-f1cc-a75caa7bb2bf"
	},
	"source": [
	"from sklearn.metrics import classification_report\n",
	"print(classification_report(y_test, y_pred))"
	],
	"execution_count": 16,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	" precision recall f1-score support\n",
	"\n",
	" 0 0.77 0.80 0.79 97\n",
	" 1 0.81 0.78 0.79 103\n",
	"\n",
	" accuracy 0.79 200\n",
	" macro avg 0.79 0.79 0.79 200\n",
	"weighted avg 0.79 0.79 0.79 200\n",
	"\n"
	],
	"name": "stdout"
	}
	]
	}
	]
	}