ShahStavan/22bce539_practical_5.ipynb

## 22bce539_practical_5.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyP8jfPENwh78S9kTzOjASEA",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/ShahStavan/b9796007943a71a60bdd2a8e31b97d35/22bce539_practical_5.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Name & Roll no. :- Shah Stavan, 22bce539\n",
        "\n",
        "Subject & Course code :- ML, 2CS501\n",
        "\n",
        "Date : 29/09/2023"
      ],
      "metadata": {
        "id": "_sFJmwl1QBOR"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Practical 5 Naive Bayes\n",
        "Naïve-Bayes – Multivariate Bernoulli, Multinomial and Gaussian using sklearn"
      ],
      "metadata": {
        "id": "Z6p8pDGdQK9w"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Text Feature Extraction using Common Vectorizer"
      ],
      "metadata": {
        "id": "X7eA9V3tQUxj"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "zv1FHGGnP2gI",
        "outputId": "b37e893f-9827-46ea-c25d-5f67920ad4af"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "List of unique words :  ['and' 'documentation' 'first' 'is' 'library' 'of' 'one' 'paper'\n",
            " 'research' 'second' 'the' 'third' 'this']\n",
            "Document Term Matrix : \n",
            "[[1 1 1 1 1 1 0 1 1 0 1 0 1]\n",
            " [1 1 0 1 0 0 0 1 1 2 1 0 1]\n",
            " [1 0 0 0 0 0 1 0 0 0 1 1 0]\n",
            " [0 0 1 1 0 0 0 1 1 0 1 0 1]]\n"
          ]
        }
      ],
      "source": [
        "from sklearn.feature_extraction.text import CountVectorizer\n",
        "\n",
        "# Create a CountVectorizer instance\n",
        "vectorizer = CountVectorizer()\n",
        "\n",
        "# Define the corpus (collection of documents)\n",
        "corpus = [\n",
        "    'This is the first Research Paper and Documentation of Library.',\n",
        "    'This is the second second Research Paper and Documentation.',\n",
        "    'And the third one.',\n",
        "    'Is this the first Research Paper?'\n",
        "]\n",
        "\n",
        "# Transform the text data into a Document-Term Matrix (DTM)\n",
        "X = vectorizer.fit_transform(corpus)\n",
        "\n",
        "# Get the list of unique words (features)\n",
        "unique_words = vectorizer.get_feature_names_out()\n",
        "\n",
        "# Print the unique words\n",
        "print(\"List of unique words : \", unique_words)\n",
        "\n",
        "# Print the Document-Term Matrix (DTM)\n",
        "print(\"Document Term Matrix : \")\n",
        "print(X.toarray())\n"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "1.   Importing Python Libraries and Dependencies\n"
      ],
      "metadata": {
        "id": "jk4OBVk5QwHI"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "from sklearn.pipeline import Pipeline\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.metrics import confusion_matrix\n",
        "from sklearn.naive_bayes import MultinomialNB\n",
        "from sklearn.naive_bayes import BernoulliNB\n",
        "from sklearn.naive_bayes import GaussianNB"
      ],
      "metadata": {
        "id": "i7CusTIOQvwP"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "2.   Upload the Dataset"
      ],
      "metadata": {
        "id": "GqQCUz63RGZ4"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import files\n",
        "uploaded = files.upload()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 73
        },
        "id": "Sq6QkzGlQ8WR",
        "outputId": "3936c5cd-b08a-48bf-f18c-2e9b2e0f35d1"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "     <input type=\"file\" id=\"files-404040ae-d7be-4d65-99b6-dff1c6c94cca\" name=\"files[]\" multiple disabled\n",
              "        style=\"border:none\" />\n",
              "     <output id=\"result-404040ae-d7be-4d65-99b6-dff1c6c94cca\">\n",
              "      Upload widget is only available when the cell has been executed in the\n",
              "      current browser session. Please rerun this cell to enable.\n",
              "      </output>\n",
              "      <script>// Copyright 2017 Google LLC\n",
              "//\n",
              "// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
              "// you may not use this file except in compliance with the License.\n",
              "// You may obtain a copy of the License at\n",
              "//\n",
              "//      http://www.apache.org/licenses/LICENSE-2.0\n",
              "//\n",
              "// Unless required by applicable law or agreed to in writing, software\n",
              "// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
              "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
              "// See the License for the specific language governing permissions and\n",
              "// limitations under the License.\n",
              "\n",
              "/**\n",
              " * @fileoverview Helpers for google.colab Python module.\n",
              " */\n",
              "(function(scope) {\n",
              "function span(text, styleAttributes = {}) {\n",
              "  const element = document.createElement('span');\n",
              "  element.textContent = text;\n",
              "  for (const key of Object.keys(styleAttributes)) {\n",
              "    element.style[key] = styleAttributes[key];\n",
              "  }\n",
              "  return element;\n",
              "}\n",
              "\n",
              "// Max number of bytes which will be uploaded at a time.\n",
              "const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
              "\n",
              "function _uploadFiles(inputId, outputId) {\n",
              "  const steps = uploadFilesStep(inputId, outputId);\n",
              "  const outputElement = document.getElementById(outputId);\n",
              "  // Cache steps on the outputElement to make it available for the next call\n",
              "  // to uploadFilesContinue from Python.\n",
              "  outputElement.steps = steps;\n",
              "\n",
              "  return _uploadFilesContinue(outputId);\n",
              "}\n",
              "\n",
              "// This is roughly an async generator (not supported in the browser yet),\n",
              "// where there are multiple asynchronous steps and the Python side is going\n",
              "// to poll for completion of each step.\n",
              "// This uses a Promise to block the python side on completion of each step,\n",
              "// then passes the result of the previous step as the input to the next step.\n",
              "function _uploadFilesContinue(outputId) {\n",
              "  const outputElement = document.getElementById(outputId);\n",
              "  const steps = outputElement.steps;\n",
              "\n",
              "  const next = steps.next(outputElement.lastPromiseValue);\n",
              "  return Promise.resolve(next.value.promise).then((value) => {\n",
              "    // Cache the last promise value to make it available to the next\n",
              "    // step of the generator.\n",
              "    outputElement.lastPromiseValue = value;\n",
              "    return next.value.response;\n",
              "  });\n",
              "}\n",
              "\n",
              "/**\n",
              " * Generator function which is called between each async step of the upload\n",
              " * process.\n",
              " * @param {string} inputId Element ID of the input file picker element.\n",
              " * @param {string} outputId Element ID of the output display.\n",
              " * @return {!Iterable<!Object>} Iterable of next steps.\n",
              " */\n",
              "function* uploadFilesStep(inputId, outputId) {\n",
              "  const inputElement = document.getElementById(inputId);\n",
              "  inputElement.disabled = false;\n",
              "\n",
              "  const outputElement = document.getElementById(outputId);\n",
              "  outputElement.innerHTML = '';\n",
              "\n",
              "  const pickedPromise = new Promise((resolve) => {\n",
              "    inputElement.addEventListener('change', (e) => {\n",
              "      resolve(e.target.files);\n",
              "    });\n",
              "  });\n",
              "\n",
              "  const cancel = document.createElement('button');\n",
              "  inputElement.parentElement.appendChild(cancel);\n",
              "  cancel.textContent = 'Cancel upload';\n",
              "  const cancelPromise = new Promise((resolve) => {\n",
              "    cancel.onclick = () => {\n",
              "      resolve(null);\n",
              "    };\n",
              "  });\n",
              "\n",
              "  // Wait for the user to pick the files.\n",
              "  const files = yield {\n",
              "    promise: Promise.race([pickedPromise, cancelPromise]),\n",
              "    response: {\n",
              "      action: 'starting',\n",
              "    }\n",
              "  };\n",
              "\n",
              "  cancel.remove();\n",
              "\n",
              "  // Disable the input element since further picks are not allowed.\n",
              "  inputElement.disabled = true;\n",
              "\n",
              "  if (!files) {\n",
              "    return {\n",
              "      response: {\n",
              "        action: 'complete',\n",
              "      }\n",
              "    };\n",
              "  }\n",
              "\n",
              "  for (const file of files) {\n",
              "    const li = document.createElement('li');\n",
              "    li.append(span(file.name, {fontWeight: 'bold'}));\n",
              "    li.append(span(\n",
              "        `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
              "        `last modified: ${\n",
              "            file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
              "                                    'n/a'} - `));\n",
              "    const percent = span('0% done');\n",
              "    li.appendChild(percent);\n",
              "\n",
              "    outputElement.appendChild(li);\n",
              "\n",
              "    const fileDataPromise = new Promise((resolve) => {\n",
              "      const reader = new FileReader();\n",
              "      reader.onload = (e) => {\n",
              "        resolve(e.target.result);\n",
              "      };\n",
              "      reader.readAsArrayBuffer(file);\n",
              "    });\n",
              "    // Wait for the data to be ready.\n",
              "    let fileData = yield {\n",
              "      promise: fileDataPromise,\n",
              "      response: {\n",
              "        action: 'continue',\n",
              "      }\n",
              "    };\n",
              "\n",
              "    // Use a chunked sending to avoid message size limits. See b/62115660.\n",
              "    let position = 0;\n",
              "    do {\n",
              "      const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
              "      const chunk = new Uint8Array(fileData, position, length);\n",
              "      position += length;\n",
              "\n",
              "      const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
              "      yield {\n",
              "        response: {\n",
              "          action: 'append',\n",
              "          file: file.name,\n",
              "          data: base64,\n",
              "        },\n",
              "      };\n",
              "\n",
              "      let percentDone = fileData.byteLength === 0 ?\n",
              "          100 :\n",
              "          Math.round((position / fileData.byteLength) * 100);\n",
              "      percent.textContent = `${percentDone}% done`;\n",
              "\n",
              "    } while (position < fileData.byteLength);\n",
              "  }\n",
              "\n",
              "  // All done.\n",
              "  yield {\n",
              "    response: {\n",
              "      action: 'complete',\n",
              "    }\n",
              "  };\n",
              "}\n",
              "\n",
              "scope.google = scope.google || {};\n",
              "scope.google.colab = scope.google.colab || {};\n",
              "scope.google.colab._files = {\n",
              "  _uploadFiles,\n",
              "  _uploadFilesContinue,\n",
              "};\n",
              "})(self);\n",
              "</script> "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Saving emails.csv to emails.csv\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "\n",
        "# Load the dataset\n",
        "data_file_path = 'emails.csv'\n",
        "df = pd.read_csv(data_file_path)\n",
        "\n",
        "# Display the first few rows of the dataset\n",
        "print(df.head())\n",
        "\n",
        "# Split the data into features (X) and target (y)\n",
        "y = df['Prediction']\n",
        "X = df.drop(['Prediction', 'Email No.'], axis=1)\n",
        "\n",
        "# Split the data into training and testing sets\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "zJM_wznNRL6p",
        "outputId": "5b017cf3-a031-4560-8534-06a4ab1f5b9e"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "  Email No.  the  to  ect  and  for  of    a  you  hou  ...  connevey  jay  \\\n",
            "0   Email 1    0   0    1    0    0   0    2    0    0  ...         0    0   \n",
            "1   Email 2    8  13   24    6    6   2  102    1   27  ...         0    0   \n",
            "2   Email 3    0   0    1    0    0   0    8    0    0  ...         0    0   \n",
            "3   Email 4    0   5   22    0    5   1   51    2   10  ...         0    0   \n",
            "4   Email 5    7   6   17    1    5   2   57    0    9  ...         0    0   \n",
            "\n",
            "   valued  lay  infrastructure  military  allowing  ff  dry  Prediction  \n",
            "0       0    0               0         0         0   0    0           0  \n",
            "1       0    0               0         0         0   1    0           0  \n",
            "2       0    0               0         0         0   0    0           0  \n",
            "3       0    0               0         0         0   0    0           0  \n",
            "4       0    0               0         0         0   1    0           0  \n",
            "\n",
            "[5 rows x 3002 columns]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Practical 5 A : Multivariate Bernoulli NB"
      ],
      "metadata": {
        "id": "BXoEX9QfSSSc"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Create and train the Bernoulli Naive Bayes model\n",
        "model = BernoulliNB()\n",
        "model.fit(X_train, y_train)\n",
        "\n",
        "# Make predictions on the test data\n",
        "y_pred = model.predict(X_test)\n",
        "\n",
        "# Calculate and print the accuracy\n",
        "accuracy_score = model.score(X_test, y_test)\n",
        "print(\"Accuracy: {:.2f}%\".format(accuracy_score * 100))\n",
        "\n",
        "# Calculate and print the confusion matrix\n",
        "confusion = confusion_matrix(y_test, y_pred)\n",
        "print(\"Confusion Matrix:\")\n",
        "print(confusion)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ftRIJhYZSVKr",
        "outputId": "67f84f4f-802f-4c5c-d9fb-598f1de80992"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Accuracy: 87.34%\n",
            "Confusion Matrix:\n",
            "[[680  57]\n",
            " [ 74 224]]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Practical 5 B : Multinomial NB"
      ],
      "metadata": {
        "id": "7VtTcmFmSeYx"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Create and train the Multinomial Naive Bayes model\n",
        "model = MultinomialNB()\n",
        "model.fit(X_train, y_train)\n",
        "\n",
        "# Predict the labels for the test dataset\n",
        "y_pred = model.predict(X_test)\n",
        "\n",
        "# Calculate and print the accuracy\n",
        "accuracy_score = model.score(X_test, y_test)\n",
        "print(\"Accuracy: {:.2f}%\".format(accuracy_score * 100))\n",
        "\n",
        "# Calculate and print the confusion matrix\n",
        "confusion = confusion_matrix(y_test, y_pred)\n",
        "print(\"Confusion Matrix:\")\n",
        "print(confusion)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "d2dREuZISgs4",
        "outputId": "749737cc-ac72-48fc-93c7-b041bd1f89b3"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Accuracy: 94.20%\n",
            "Confusion Matrix:\n",
            "[[693  44]\n",
            " [ 16 282]]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Practical 5 C : Gaussian NB"
      ],
      "metadata": {
        "id": "Wn22wKr-TanR"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Initialize and train the Gaussian Naive Bayes model\n",
        "model = GaussianNB()\n",
        "model.fit(X_train, y_train)\n",
        "\n",
        "# Make predictions on the test data\n",
        "y_pred = model.predict(X_test)\n",
        "\n",
        "# Calculate and print the accuracy\n",
        "accuracy_score = model.score(X_test, y_test)\n",
        "print(\"Accuracy: {:.2f}%\".format(accuracy_score * 100))\n",
        "\n",
        "\n",
        "# Print the confusion matrix\n",
        "confusion = confusion_matrix(y_test, y_pred)\n",
        "print(\"Confusion Matrix:\")\n",
        "print(confusion)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "0MnwEfu7TbYz",
        "outputId": "36ca289f-ac50-4bea-dcc2-cb27afabdc20"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Accuracy: 93.82%\n",
            "Confusion Matrix:\n",
            "[[692  45]\n",
            " [ 19 279]]\n"
          ]
        }
      ]
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": [],
	"authorship_tag": "ABX9TyP8jfPENwh78S9kTzOjASEA",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/ShahStavan/b9796007943a71a60bdd2a8e31b97d35/22bce539_practical_5.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"Name & Roll no. :- Shah Stavan, 22bce539\n",
	"\n",
	"Subject & Course code :- ML, 2CS501\n",
	"\n",
	"Date : 29/09/2023"
	],
	"metadata": {
	"id": "_sFJmwl1QBOR"
	}
	},
	{
	"cell_type": "markdown",
	"source": [
	"# Practical 5 Naive Bayes\n",
	"Naïve-Bayes – Multivariate Bernoulli, Multinomial and Gaussian using sklearn"
	],
	"metadata": {
	"id": "Z6p8pDGdQK9w"
	}
	},
	{
	"cell_type": "markdown",
	"source": [
	"Text Feature Extraction using Common Vectorizer"
	],
	"metadata": {
	"id": "X7eA9V3tQUxj"
	}
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "zv1FHGGnP2gI",
	"outputId": "b37e893f-9827-46ea-c25d-5f67920ad4af"
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"List of unique words : ['and' 'documentation' 'first' 'is' 'library' 'of' 'one' 'paper'\n",
	" 'research' 'second' 'the' 'third' 'this']\n",
	"Document Term Matrix : \n",
	"[[1 1 1 1 1 1 0 1 1 0 1 0 1]\n",
	" [1 1 0 1 0 0 0 1 1 2 1 0 1]\n",
	" [1 0 0 0 0 0 1 0 0 0 1 1 0]\n",
	" [0 0 1 1 0 0 0 1 1 0 1 0 1]]\n"
	]
	}
	],
	"source": [
	"from sklearn.feature_extraction.text import CountVectorizer\n",
	"\n",
	"# Create a CountVectorizer instance\n",
	"vectorizer = CountVectorizer()\n",
	"\n",
	"# Define the corpus (collection of documents)\n",
	"corpus = [\n",
	" 'This is the first Research Paper and Documentation of Library.',\n",
	" 'This is the second second Research Paper and Documentation.',\n",
	" 'And the third one.',\n",
	" 'Is this the first Research Paper?'\n",
	"]\n",
	"\n",
	"# Transform the text data into a Document-Term Matrix (DTM)\n",
	"X = vectorizer.fit_transform(corpus)\n",
	"\n",
	"# Get the list of unique words (features)\n",
	"unique_words = vectorizer.get_feature_names_out()\n",
	"\n",
	"# Print the unique words\n",
	"print(\"List of unique words : \", unique_words)\n",
	"\n",
	"# Print the Document-Term Matrix (DTM)\n",
	"print(\"Document Term Matrix : \")\n",
	"print(X.toarray())\n"
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"1. Importing Python Libraries and Dependencies\n"
	],
	"metadata": {
	"id": "jk4OBVk5QwHI"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"import numpy as np\n",
	"import pandas as pd\n",
	"from sklearn.pipeline import Pipeline\n",
	"from sklearn.model_selection import train_test_split\n",
	"from sklearn.metrics import confusion_matrix\n",
	"from sklearn.naive_bayes import MultinomialNB\n",
	"from sklearn.naive_bayes import BernoulliNB\n",
	"from sklearn.naive_bayes import GaussianNB"
	],
	"metadata": {
	"id": "i7CusTIOQvwP"
	},
	"execution_count": 6,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"2. Upload the Dataset"
	],
	"metadata": {
	"id": "GqQCUz63RGZ4"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"from google.colab import files\n",
	"uploaded = files.upload()"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 73
	},
	"id": "Sq6QkzGlQ8WR",
	"outputId": "3936c5cd-b08a-48bf-f18c-2e9b2e0f35d1"
	},
	"execution_count": 3,
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"text/plain": [
	"<IPython.core.display.HTML object>"
	],
	"text/html": [
	"\n",
	" <input type=\"file\" id=\"files-404040ae-d7be-4d65-99b6-dff1c6c94cca\" name=\"files[]\" multiple disabled\n",
	" style=\"border:none\" />\n",
	" <output id=\"result-404040ae-d7be-4d65-99b6-dff1c6c94cca\">\n",
	" Upload widget is only available when the cell has been executed in the\n",
	" current browser session. Please rerun this cell to enable.\n",
	" </output>\n",
	" <script>// Copyright 2017 Google LLC\n",
	"//\n",
	"// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
	"// you may not use this file except in compliance with the License.\n",
	"// You may obtain a copy of the License at\n",
	"//\n",
	"// http://www.apache.org/licenses/LICENSE-2.0\n",
	"//\n",
	"// Unless required by applicable law or agreed to in writing, software\n",
	"// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
	"// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
	"// See the License for the specific language governing permissions and\n",
	"// limitations under the License.\n",
	"\n",
	"/**\n",
	" * @fileoverview Helpers for google.colab Python module.\n",
	" */\n",
	"(function(scope) {\n",
	"function span(text, styleAttributes = {}) {\n",
	" const element = document.createElement('span');\n",
	" element.textContent = text;\n",
	" for (const key of Object.keys(styleAttributes)) {\n",
	" element.style[key] = styleAttributes[key];\n",
	" }\n",
	" return element;\n",
	"}\n",
	"\n",
	"// Max number of bytes which will be uploaded at a time.\n",
	"const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
	"\n",
	"function _uploadFiles(inputId, outputId) {\n",
	" const steps = uploadFilesStep(inputId, outputId);\n",
	" const outputElement = document.getElementById(outputId);\n",
	" // Cache steps on the outputElement to make it available for the next call\n",
	" // to uploadFilesContinue from Python.\n",
	" outputElement.steps = steps;\n",
	"\n",
	" return _uploadFilesContinue(outputId);\n",
	"}\n",
	"\n",
	"// This is roughly an async generator (not supported in the browser yet),\n",
	"// where there are multiple asynchronous steps and the Python side is going\n",
	"// to poll for completion of each step.\n",
	"// This uses a Promise to block the python side on completion of each step,\n",
	"// then passes the result of the previous step as the input to the next step.\n",
	"function _uploadFilesContinue(outputId) {\n",
	" const outputElement = document.getElementById(outputId);\n",
	" const steps = outputElement.steps;\n",
	"\n",
	" const next = steps.next(outputElement.lastPromiseValue);\n",
	" return Promise.resolve(next.value.promise).then((value) => {\n",
	" // Cache the last promise value to make it available to the next\n",
	" // step of the generator.\n",
	" outputElement.lastPromiseValue = value;\n",
	" return next.value.response;\n",
	" });\n",
	"}\n",
	"\n",
	"/**\n",
	" * Generator function which is called between each async step of the upload\n",
	" * process.\n",
	" * @param {string} inputId Element ID of the input file picker element.\n",
	" * @param {string} outputId Element ID of the output display.\n",
	" * @return {!Iterable<!Object>} Iterable of next steps.\n",
	" */\n",
	"function* uploadFilesStep(inputId, outputId) {\n",
	" const inputElement = document.getElementById(inputId);\n",
	" inputElement.disabled = false;\n",
	"\n",
	" const outputElement = document.getElementById(outputId);\n",
	" outputElement.innerHTML = '';\n",
	"\n",
	" const pickedPromise = new Promise((resolve) => {\n",
	" inputElement.addEventListener('change', (e) => {\n",
	" resolve(e.target.files);\n",
	" });\n",
	" });\n",
	"\n",
	" const cancel = document.createElement('button');\n",
	" inputElement.parentElement.appendChild(cancel);\n",
	" cancel.textContent = 'Cancel upload';\n",
	" const cancelPromise = new Promise((resolve) => {\n",
	" cancel.onclick = () => {\n",
	" resolve(null);\n",
	" };\n",
	" });\n",
	"\n",
	" // Wait for the user to pick the files.\n",
	" const files = yield {\n",
	" promise: Promise.race([pickedPromise, cancelPromise]),\n",
	" response: {\n",
	" action: 'starting',\n",
	" }\n",
	" };\n",
	"\n",
	" cancel.remove();\n",
	"\n",
	" // Disable the input element since further picks are not allowed.\n",
	" inputElement.disabled = true;\n",
	"\n",
	" if (!files) {\n",
	" return {\n",
	" response: {\n",
	" action: 'complete',\n",
	" }\n",
	" };\n",
	" }\n",
	"\n",
	" for (const file of files) {\n",
	" const li = document.createElement('li');\n",
	" li.append(span(file.name, {fontWeight: 'bold'}));\n",
	" li.append(span(\n",
	" `(${file.type \|\| 'n/a'}) - ${file.size} bytes, ` +\n",
	" `last modified: ${\n",
	" file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
	" 'n/a'} - `));\n",
	" const percent = span('0% done');\n",
	" li.appendChild(percent);\n",
	"\n",
	" outputElement.appendChild(li);\n",
	"\n",
	" const fileDataPromise = new Promise((resolve) => {\n",
	" const reader = new FileReader();\n",
	" reader.onload = (e) => {\n",
	" resolve(e.target.result);\n",
	" };\n",
	" reader.readAsArrayBuffer(file);\n",
	" });\n",
	" // Wait for the data to be ready.\n",
	" let fileData = yield {\n",
	" promise: fileDataPromise,\n",
	" response: {\n",
	" action: 'continue',\n",
	" }\n",
	" };\n",
	"\n",
	" // Use a chunked sending to avoid message size limits. See b/62115660.\n",
	" let position = 0;\n",
	" do {\n",
	" const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
	" const chunk = new Uint8Array(fileData, position, length);\n",
	" position += length;\n",
	"\n",
	" const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
	" yield {\n",
	" response: {\n",
	" action: 'append',\n",
	" file: file.name,\n",
	" data: base64,\n",
	" },\n",
	" };\n",
	"\n",
	" let percentDone = fileData.byteLength === 0 ?\n",
	" 100 :\n",
	" Math.round((position / fileData.byteLength) * 100);\n",
	" percent.textContent = `${percentDone}% done`;\n",
	"\n",
	" } while (position < fileData.byteLength);\n",
	" }\n",
	"\n",
	" // All done.\n",
	" yield {\n",
	" response: {\n",
	" action: 'complete',\n",
	" }\n",
	" };\n",
	"}\n",
	"\n",
	"scope.google = scope.google \|\| {};\n",
	"scope.google.colab = scope.google.colab \|\| {};\n",
	"scope.google.colab._files = {\n",
	" _uploadFiles,\n",
	" _uploadFilesContinue,\n",
	"};\n",
	"})(self);\n",
	"</script> "
	]
	},
	"metadata": {}
	},
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Saving emails.csv to emails.csv\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"import pandas as pd\n",
	"\n",
	"# Load the dataset\n",
	"data_file_path = 'emails.csv'\n",
	"df = pd.read_csv(data_file_path)\n",
	"\n",
	"# Display the first few rows of the dataset\n",
	"print(df.head())\n",
	"\n",
	"# Split the data into features (X) and target (y)\n",
	"y = df['Prediction']\n",
	"X = df.drop(['Prediction', 'Email No.'], axis=1)\n",
	"\n",
	"# Split the data into training and testing sets\n",
	"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "zJM_wznNRL6p",
	"outputId": "5b017cf3-a031-4560-8534-06a4ab1f5b9e"
	},
	"execution_count": 7,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	" Email No. the to ect and for of a you hou ... connevey jay \\\n",
	"0 Email 1 0 0 1 0 0 0 2 0 0 ... 0 0 \n",
	"1 Email 2 8 13 24 6 6 2 102 1 27 ... 0 0 \n",
	"2 Email 3 0 0 1 0 0 0 8 0 0 ... 0 0 \n",
	"3 Email 4 0 5 22 0 5 1 51 2 10 ... 0 0 \n",
	"4 Email 5 7 6 17 1 5 2 57 0 9 ... 0 0 \n",
	"\n",
	" valued lay infrastructure military allowing ff dry Prediction \n",
	"0 0 0 0 0 0 0 0 0 \n",
	"1 0 0 0 0 0 1 0 0 \n",
	"2 0 0 0 0 0 0 0 0 \n",
	"3 0 0 0 0 0 0 0 0 \n",
	"4 0 0 0 0 0 1 0 0 \n",
	"\n",
	"[5 rows x 3002 columns]\n"
	]
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"Practical 5 A : Multivariate Bernoulli NB"
	],
	"metadata": {
	"id": "BXoEX9QfSSSc"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"# Create and train the Bernoulli Naive Bayes model\n",
	"model = BernoulliNB()\n",
	"model.fit(X_train, y_train)\n",
	"\n",
	"# Make predictions on the test data\n",
	"y_pred = model.predict(X_test)\n",
	"\n",
	"# Calculate and print the accuracy\n",
	"accuracy_score = model.score(X_test, y_test)\n",
	"print(\"Accuracy: {:.2f}%\".format(accuracy_score * 100))\n",
	"\n",
	"# Calculate and print the confusion matrix\n",
	"confusion = confusion_matrix(y_test, y_pred)\n",
	"print(\"Confusion Matrix:\")\n",
	"print(confusion)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "ftRIJhYZSVKr",
	"outputId": "67f84f4f-802f-4c5c-d9fb-598f1de80992"
	},
	"execution_count": 8,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Accuracy: 87.34%\n",
	"Confusion Matrix:\n",
	"[[680 57]\n",
	" [ 74 224]]\n"
	]
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"Practical 5 B : Multinomial NB"
	],
	"metadata": {
	"id": "7VtTcmFmSeYx"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"# Create and train the Multinomial Naive Bayes model\n",
	"model = MultinomialNB()\n",
	"model.fit(X_train, y_train)\n",
	"\n",
	"# Predict the labels for the test dataset\n",
	"y_pred = model.predict(X_test)\n",
	"\n",
	"# Calculate and print the accuracy\n",
	"accuracy_score = model.score(X_test, y_test)\n",
	"print(\"Accuracy: {:.2f}%\".format(accuracy_score * 100))\n",
	"\n",
	"# Calculate and print the confusion matrix\n",
	"confusion = confusion_matrix(y_test, y_pred)\n",
	"print(\"Confusion Matrix:\")\n",
	"print(confusion)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "d2dREuZISgs4",
	"outputId": "749737cc-ac72-48fc-93c7-b041bd1f89b3"
	},
	"execution_count": 9,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Accuracy: 94.20%\n",
	"Confusion Matrix:\n",
	"[[693 44]\n",
	" [ 16 282]]\n"
	]
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"Practical 5 C : Gaussian NB"
	],
	"metadata": {
	"id": "Wn22wKr-TanR"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"# Initialize and train the Gaussian Naive Bayes model\n",
	"model = GaussianNB()\n",
	"model.fit(X_train, y_train)\n",
	"\n",
	"# Make predictions on the test data\n",
	"y_pred = model.predict(X_test)\n",
	"\n",
	"# Calculate and print the accuracy\n",
	"accuracy_score = model.score(X_test, y_test)\n",
	"print(\"Accuracy: {:.2f}%\".format(accuracy_score * 100))\n",
	"\n",
	"\n",
	"# Print the confusion matrix\n",
	"confusion = confusion_matrix(y_test, y_pred)\n",
	"print(\"Confusion Matrix:\")\n",
	"print(confusion)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "0MnwEfu7TbYz",
	"outputId": "36ca289f-ac50-4bea-dcc2-cb27afabdc20"
	},
	"execution_count": 11,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Accuracy: 93.82%\n",
	"Confusion Matrix:\n",
	"[[692 45]\n",
	" [ 19 279]]\n"
	]
	}
	]
	}
	]
	}