Last active
April 25, 2022 04:19
-
-
Save ehzawad/3a871db2a76e7d035cab1be5cb9c2238 to your computer and use it in GitHub Desktop.
All Basic ML Algorithms.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "All Algorithms.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"accelerator": "TPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/ehzawad/3a871db2a76e7d035cab1be5cb9c2238/all-algorithms.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "RBwGqrXpH-8Z" | |
}, | |
"outputs": [], | |
"source": [ | |
"# import numpy and pandas library\n", | |
"import pandas as pd\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from google.colab import drive\n", | |
"drive.mount('/content/drive')" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "m3ptx6MNIQ70", | |
"outputId": "4ac7a55f-d10f-44ad-b93f-d61f17ad3fbe" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import pandas as pd\n", | |
"df = pd.read_csv('/content/drive/MyDrive/Genuity/Final Dataset/partyA_Sheet1.csv', usecols = [\"text_bangla\" ,\"audio_emotion\"])\n", | |
"df = df.sample(frac = 1 , random_state = 1).reset_index(drop=True)\n", | |
"df.head(22)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 663 | |
}, | |
"id": "w_BK2WBeIRty", | |
"outputId": "3ddc86e4-5ca9-409e-f717-75494c355337" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" text_bangla audio_emotion\n", | |
"0 ভয়েস চ্যাটে আমি এক সপ্তাহের জন্য কিনতে চাচ্ছি ... N\n", | |
"1 ইভানা খুব সুন্দর N\n", | |
"2 নাটক : শেষ দেখা রচনা : হুমায়ুন কবিরপিষ্ঠা : ৪৭... A\n", | |
"3 ওয়াও গ্রেইট। N\n", | |
"4 লাস্টের নাম্বারটা আসসালামু আলাইকুম N\n", | |
"5 ফাউল,, টাইম টা নস্ট হইলো বেকার!!,কোনো এক বোকাচ... A\n", | |
"6 ইবানা দারুন একটা মেয়ে N\n", | |
"7 য়েক কথায় অসাধারণ। ওয়াও N\n", | |
"8 বিয়েটা নিশুর সাথে হলে অনেক ভালো হতো নাটকটা,,তা... N\n", | |
"9 আমার দেখা of the best নাটক এইটা...খুবই ভাল ছি... N\n", | |
"10 তিশার অভিনয় চোখে পড়ার মতো উন্নতি হয়েছে। N\n", | |
"11 কাজটা জেরিনের বাবা ভাল করে নাই N\n", | |
"12 প্রচুর ফালতু । এরকম টা আশায় ছিলো না । A\n", | |
"13 টাকাতো এখন অ্যাকাউন্টের মধ্যে নাই আনফরচুনেটলি ... A\n", | |
"14 এভাবে কি কখনও সুখী হওয়া যায়??????? N\n", | |
"15 টাকা লোভ এ মানুষ কত নিচে নামে।যেমনি মোশাররফ ন... A\n", | |
"16 অসাধারণ নাটক।। N\n", | |
"17 সত্যি খুভ অসাধারন N\n", | |
"18 কেমনে পারেন এতো মজার নাটক করতে ভাই?? A\n", | |
"19 ভাল লাগছে, শেষ দিকটা বেশি মজার ছিল। N\n", | |
"20 কি কতা কই বলেন আপনার কথা শুনতেছি A\n", | |
"21 i love SARWAR TUBE channel k..shamne aro valo... N" | |
], | |
"text/html": [ | |
"\n", | |
" <div id=\"df-3c417648-4ace-426d-a107-9e9acd221ed9\">\n", | |
" <div class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>text_bangla</th>\n", | |
" <th>audio_emotion</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>ভয়েস চ্যাটে আমি এক সপ্তাহের জন্য কিনতে চাচ্ছি ...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>ইভানা খুব সুন্দর</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>নাটক : শেষ দেখা রচনা : হুমায়ুন কবিরপিষ্ঠা : ৪৭...</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>ওয়াও গ্রেইট।</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>লাস্টের নাম্বারটা আসসালামু আলাইকুম</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>ফাউল,, টাইম টা নস্ট হইলো বেকার!!,কোনো এক বোকাচ...</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>ইবানা দারুন একটা মেয়ে</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>য়েক কথায় অসাধারণ। ওয়াও</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>বিয়েটা নিশুর সাথে হলে অনেক ভালো হতো নাটকটা,,তা...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>আমার দেখা of the best নাটক এইটা...খুবই ভাল ছি...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>তিশার অভিনয় চোখে পড়ার মতো উন্নতি হয়েছে।</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>কাজটা জেরিনের বাবা ভাল করে নাই</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>প্রচুর ফালতু । এরকম টা আশায় ছিলো না ।</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>টাকাতো এখন অ্যাকাউন্টের মধ্যে নাই আনফরচুনেটলি ...</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>এভাবে কি কখনও সুখী হওয়া যায়???????</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>টাকা লোভ এ মানুষ কত নিচে নামে।যেমনি মোশাররফ ন...</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>অসাধারণ নাটক।।</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>সত্যি খুভ অসাধারন</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>কেমনে পারেন এতো মজার নাটক করতে ভাই??</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>ভাল লাগছে, শেষ দিকটা বেশি মজার ছিল।</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>20</th>\n", | |
" <td>কি কতা কই বলেন আপনার কথা শুনতেছি</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>21</th>\n", | |
" <td>i love SARWAR TUBE channel k..shamne aro valo...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3c417648-4ace-426d-a107-9e9acd221ed9')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
" \n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
" width=\"24px\">\n", | |
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", | |
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
" \n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" flex-wrap:wrap;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-3c417648-4ace-426d-a107-9e9acd221ed9 button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-3c417648-4ace-426d-a107-9e9acd221ed9');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
" </div>\n", | |
" " | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 3 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"df.audio_emotion.value_counts()" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "kHMLkJCSIcl1", | |
"outputId": "bd85a481-1860-435f-f7c4-95c1bd3fdb56" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"N 9487\n", | |
"A 4357\n", | |
"Name: audio_emotion, dtype: int64" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 5 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!pip install xlrd==1.2.0\n", | |
"!pip install openpyxl" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Aq3PV7N5IgQd", | |
"outputId": "23ebdc2d-f768-46f9-acc0-ea5647a276a1" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Collecting xlrd==1.2.0\n", | |
" Downloading xlrd-1.2.0-py2.py3-none-any.whl (103 kB)\n", | |
"\u001b[?25l\r\u001b[K |███▏ | 10 kB 17.4 MB/s eta 0:00:01\r\u001b[K |██████▍ | 20 kB 10.9 MB/s eta 0:00:01\r\u001b[K |█████████▌ | 30 kB 6.0 MB/s eta 0:00:01\r\u001b[K |████████████▊ | 40 kB 5.4 MB/s eta 0:00:01\r\u001b[K |███████████████▉ | 51 kB 4.0 MB/s eta 0:00:01\r\u001b[K |███████████████████ | 61 kB 4.7 MB/s eta 0:00:01\r\u001b[K |██████████████████████▏ | 71 kB 5.1 MB/s eta 0:00:01\r\u001b[K |█████████████████████████▍ | 81 kB 4.7 MB/s eta 0:00:01\r\u001b[K |████████████████████████████▋ | 92 kB 5.2 MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▊| 102 kB 5.7 MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 103 kB 5.7 MB/s \n", | |
"\u001b[?25hInstalling collected packages: xlrd\n", | |
" Attempting uninstall: xlrd\n", | |
" Found existing installation: xlrd 1.1.0\n", | |
" Uninstalling xlrd-1.1.0:\n", | |
" Successfully uninstalled xlrd-1.1.0\n", | |
"Successfully installed xlrd-1.2.0\n", | |
"Requirement already satisfied: openpyxl in /usr/local/lib/python3.7/dist-packages (3.0.9)\n", | |
"Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.7/dist-packages (from openpyxl) (1.1.0)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"df.dropna(inplace=True)" | |
], | |
"metadata": { | |
"id": "EFTuQSDeJMXB" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"df.columns" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "zlRj3UczJ6Bk", | |
"outputId": "78d125ba-6b5c-44bd-c7a0-e34d9035c421" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"Index(['text_bangla', 'audio_emotion'], dtype='object')" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 8 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"df.shape" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "-pLVCsOLJD7C", | |
"outputId": "86912e57-1575-4090-cebc-08b458769d50" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(13844, 2)" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 9 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"df.audio_emotion.value_counts()" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "9uDcV4Ajq7JP", | |
"outputId": "c91f5869-690c-43b3-c085-8d405658e5fb" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"N 9487\n", | |
"A 4357\n", | |
"Name: audio_emotion, dtype: int64" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 10 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"dfN = df[df['audio_emotion'] == \"N\"]\n", | |
"dfN" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 424 | |
}, | |
"id": "GIMZrBdVr8cB", | |
"outputId": "f7c1fb2c-20cb-4846-c889-b8ad2f55ef5f" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" text_bangla audio_emotion\n", | |
"0 ভয়েস চ্যাটে আমি এক সপ্তাহের জন্য কিনতে চাচ্ছি ... N\n", | |
"1 ইভানা খুব সুন্দর N\n", | |
"3 ওয়াও গ্রেইট। N\n", | |
"4 লাস্টের নাম্বারটা আসসালামু আলাইকুম N\n", | |
"6 ইবানা দারুন একটা মেয়ে N\n", | |
"... ... ...\n", | |
"13834 খুব ভাল লাগলো... ধন্যবাদ পরিচালক কে আমাদের দে... N\n", | |
"13836 নিশো ভাই পুরষ্কার পাবে ইনশাল্লাহ আবারও এবং মেহ... N\n", | |
"13837 ভাইয়া এত সুন্দর একটা নাটক করলে যে চোখের পানি ... N\n", | |
"13838 অপূর্ব অসাধারণ । N\n", | |
"13841 অনেক ভালো লগলো N\n", | |
"\n", | |
"[9487 rows x 2 columns]" | |
], | |
"text/html": [ | |
"\n", | |
" <div id=\"df-9f75e238-faaa-4333-a107-925d1ed218d2\">\n", | |
" <div class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>text_bangla</th>\n", | |
" <th>audio_emotion</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>ভয়েস চ্যাটে আমি এক সপ্তাহের জন্য কিনতে চাচ্ছি ...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>ইভানা খুব সুন্দর</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>ওয়াও গ্রেইট।</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>লাস্টের নাম্বারটা আসসালামু আলাইকুম</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>ইবানা দারুন একটা মেয়ে</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13834</th>\n", | |
" <td>খুব ভাল লাগলো... ধন্যবাদ পরিচালক কে আমাদের দে...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13836</th>\n", | |
" <td>নিশো ভাই পুরষ্কার পাবে ইনশাল্লাহ আবারও এবং মেহ...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13837</th>\n", | |
" <td>ভাইয়া এত সুন্দর একটা নাটক করলে যে চোখের পানি ...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13838</th>\n", | |
" <td>অপূর্ব অসাধারণ ।</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13841</th>\n", | |
" <td>অনেক ভালো লগলো</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>9487 rows × 2 columns</p>\n", | |
"</div>\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-9f75e238-faaa-4333-a107-925d1ed218d2')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
" \n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
" width=\"24px\">\n", | |
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", | |
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
" \n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" flex-wrap:wrap;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-9f75e238-faaa-4333-a107-925d1ed218d2 button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-9f75e238-faaa-4333-a107-925d1ed218d2');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
" </div>\n", | |
" " | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 11 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"df_Normal = dfN.sample(n=4357)\n", | |
"df_Normal" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 424 | |
}, | |
"id": "L1Nw5j_oysI-", | |
"outputId": "9156a707-a3a2-4df0-a77e-bc458515b279" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" text_bangla audio_emotion\n", | |
"3185 আমি তো আমি তো বর্তমান ইচ্ছে ডানাতে আছি না N\n", | |
"13615 মাত্র ২ জন কে দিয়ে ও যে ভালো সুন্দর নাটক করা এ... N\n", | |
"2122 এক কথায় অসাধারণ হইছে। N\n", | |
"7752 ঐটার কোডটা কি দেওয়া যায় না N\n", | |
"9959 নাটক টা খুব সুন্দর হয়ছে N\n", | |
"... ... ...\n", | |
"12684 আমি এই নাটক আর দেখিনি, দেখার সাথে সাথে ভালো লা... N\n", | |
"13804 এভাবে প্রতিটি শিল্পপতিদের ছেলেমেয়েরা যদি বেয়... N\n", | |
"8799 অসাধারণ একটি নাটক N\n", | |
"6816 উফ ভাই নাটক টা কি জে ভালো লাগতেছে বলে বুজাতে প... N\n", | |
"7380 অমি ভাই ,আপনার প্রতিটি কাজই ভালো লাগে।এইভাবেই ... N\n", | |
"\n", | |
"[4357 rows x 2 columns]" | |
], | |
"text/html": [ | |
"\n", | |
" <div id=\"df-bed52083-d8c4-4f88-8b6e-249f47c45b49\">\n", | |
" <div class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>text_bangla</th>\n", | |
" <th>audio_emotion</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>3185</th>\n", | |
" <td>আমি তো আমি তো বর্তমান ইচ্ছে ডানাতে আছি না</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13615</th>\n", | |
" <td>মাত্র ২ জন কে দিয়ে ও যে ভালো সুন্দর নাটক করা এ...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2122</th>\n", | |
" <td>এক কথায় অসাধারণ হইছে।</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7752</th>\n", | |
" <td>ঐটার কোডটা কি দেওয়া যায় না</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9959</th>\n", | |
" <td>নাটক টা খুব সুন্দর হয়ছে</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12684</th>\n", | |
" <td>আমি এই নাটক আর দেখিনি, দেখার সাথে সাথে ভালো লা...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13804</th>\n", | |
" <td>এভাবে প্রতিটি শিল্পপতিদের ছেলেমেয়েরা যদি বেয়...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8799</th>\n", | |
" <td>অসাধারণ একটি নাটক</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6816</th>\n", | |
" <td>উফ ভাই নাটক টা কি জে ভালো লাগতেছে বলে বুজাতে প...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7380</th>\n", | |
" <td>অমি ভাই ,আপনার প্রতিটি কাজই ভালো লাগে।এইভাবেই ...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>4357 rows × 2 columns</p>\n", | |
"</div>\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-bed52083-d8c4-4f88-8b6e-249f47c45b49')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
" \n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
" width=\"24px\">\n", | |
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", | |
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
" \n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" flex-wrap:wrap;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-bed52083-d8c4-4f88-8b6e-249f47c45b49 button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-bed52083-d8c4-4f88-8b6e-249f47c45b49');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
" </div>\n", | |
" " | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 12 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"df_Angry = df[df['audio_emotion'] == \"A\"]\n", | |
"df_Angry" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 424 | |
}, | |
"id": "DibERAXI12pi", | |
"outputId": "d02906c2-2216-4b3a-bfa9-21d4bf7e57a4" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" text_bangla audio_emotion\n", | |
"2 নাটক : শেষ দেখা রচনা : হুমায়ুন কবিরপিষ্ঠা : ৪৭... A\n", | |
"5 ফাউল,, টাইম টা নস্ট হইলো বেকার!!,কোনো এক বোকাচ... A\n", | |
"12 প্রচুর ফালতু । এরকম টা আশায় ছিলো না । A\n", | |
"13 টাকাতো এখন অ্যাকাউন্টের মধ্যে নাই আনফরচুনেটলি ... A\n", | |
"15 টাকা লোভ এ মানুষ কত নিচে নামে।যেমনি মোশাররফ ন... A\n", | |
"... ... ...\n", | |
"13839 ফালতু নাটাক A\n", | |
"13840 উনিশ টেকা এটা কন্টিনিউ প্রতি ত্রিশদিনে একবার A\n", | |
"13842 গরুর মাংস দেখলেই এমন করে যেন এক মাস খেতে পায়নি... A\n", | |
"13843 টু জি থ্রিজি ফোরজি অনলি করে দিছি আমি A\n", | |
"13844 নাটকের পরিচালক নাটকটা লিখছে মনে হয় গান্জা সেবন... A\n", | |
"\n", | |
"[4357 rows x 2 columns]" | |
], | |
"text/html": [ | |
"\n", | |
" <div id=\"df-6adce912-35a8-4352-b2bd-09bf30b47bdc\">\n", | |
" <div class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>text_bangla</th>\n", | |
" <th>audio_emotion</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>নাটক : শেষ দেখা রচনা : হুমায়ুন কবিরপিষ্ঠা : ৪৭...</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>ফাউল,, টাইম টা নস্ট হইলো বেকার!!,কোনো এক বোকাচ...</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>প্রচুর ফালতু । এরকম টা আশায় ছিলো না ।</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>টাকাতো এখন অ্যাকাউন্টের মধ্যে নাই আনফরচুনেটলি ...</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>টাকা লোভ এ মানুষ কত নিচে নামে।যেমনি মোশাররফ ন...</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13839</th>\n", | |
" <td>ফালতু নাটাক</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13840</th>\n", | |
" <td>উনিশ টেকা এটা কন্টিনিউ প্রতি ত্রিশদিনে একবার</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13842</th>\n", | |
" <td>গরুর মাংস দেখলেই এমন করে যেন এক মাস খেতে পায়নি...</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13843</th>\n", | |
" <td>টু জি থ্রিজি ফোরজি অনলি করে দিছি আমি</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13844</th>\n", | |
" <td>নাটকের পরিচালক নাটকটা লিখছে মনে হয় গান্জা সেবন...</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>4357 rows × 2 columns</p>\n", | |
"</div>\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-6adce912-35a8-4352-b2bd-09bf30b47bdc')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
" \n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
" width=\"24px\">\n", | |
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", | |
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
" \n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" flex-wrap:wrap;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-6adce912-35a8-4352-b2bd-09bf30b47bdc button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-6adce912-35a8-4352-b2bd-09bf30b47bdc');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
" </div>\n", | |
" " | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 13 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"df_final = pd.concat([df_Normal, df_Angry])\n", | |
"\n", | |
"df_final = df_final.sample(frac = 1 , random_state = 1).reset_index(drop=True)\n", | |
"df_final" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 424 | |
}, | |
"id": "AJMYS6KTyr_o", | |
"outputId": "ec39ba68-8cf1-4042-d9ed-c8bcd7a9d5d4" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" text_bangla audio_emotion\n", | |
"0 অসাধারণ, N\n", | |
"1 এমনিই এক বালছাল বানাইছে! জোর করে অখাদ্য গেলানো... A\n", | |
"2 আমি হলে টারে রাম চোদা দিতাম A\n", | |
"3 শুনতে পাচ্ছি ঠিক আছে ভদ্র ভাষায় কথা বলতেছি সার... A\n", | |
"4 মনকে,কুরুচি-অস্লীল-পর্নগ্রফী এবং বিসন্যতা থেকে... N\n", | |
"... ... ...\n", | |
"8709 এইরকম নাটকের জন্যই অপেক্ষায় থাকি!!!গল্পটা গতান... N\n", | |
"8710 ফালতু....... A\n", | |
"8711 সত্যি নিশো ভাই তুমি বাংলা নাটকের কিংবদন্তি আর ... N\n", | |
"8712 মোশাররফ করিমের অভিনয়ে সেই আগের মত ধার নাই A\n", | |
"8713 হ্যালো হ্যা হ্যালো N\n", | |
"\n", | |
"[8714 rows x 2 columns]" | |
], | |
"text/html": [ | |
"\n", | |
" <div id=\"df-e2304888-4811-4b5a-8b47-8fd9708b156b\">\n", | |
" <div class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>text_bangla</th>\n", | |
" <th>audio_emotion</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>অসাধারণ,</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>এমনিই এক বালছাল বানাইছে! জোর করে অখাদ্য গেলানো...</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>আমি হলে টারে রাম চোদা দিতাম</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>শুনতে পাচ্ছি ঠিক আছে ভদ্র ভাষায় কথা বলতেছি সার...</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>মনকে,কুরুচি-অস্লীল-পর্নগ্রফী এবং বিসন্যতা থেকে...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8709</th>\n", | |
" <td>এইরকম নাটকের জন্যই অপেক্ষায় থাকি!!!গল্পটা গতান...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8710</th>\n", | |
" <td>ফালতু.......</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8711</th>\n", | |
" <td>সত্যি নিশো ভাই তুমি বাংলা নাটকের কিংবদন্তি আর ...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8712</th>\n", | |
" <td>মোশাররফ করিমের অভিনয়ে সেই আগের মত ধার নাই</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8713</th>\n", | |
" <td>হ্যালো হ্যা হ্যালো</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>8714 rows × 2 columns</p>\n", | |
"</div>\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-e2304888-4811-4b5a-8b47-8fd9708b156b')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
" \n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
" width=\"24px\">\n", | |
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", | |
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
" \n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" flex-wrap:wrap;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-e2304888-4811-4b5a-8b47-8fd9708b156b button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-e2304888-4811-4b5a-8b47-8fd9708b156b');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
" </div>\n", | |
" " | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 14 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"df_final.text_bangla[13]" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
}, | |
"id": "fpaxg8aTyr8d", | |
"outputId": "882924cd-4576-4c92-ea77-5209352b4e93" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"'সুন্দর হয়েছে'" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
} | |
}, | |
"metadata": {}, | |
"execution_count": 15 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import regex as re\n", | |
"\n", | |
"def text_to_word_list(text_bangla):\n", | |
" text = text_bangla.split()\n", | |
" return text\n", | |
"\n", | |
"def replace_strings(text):\n", | |
" emoji_pattern = re.compile(\"[\"\n", | |
" u\"\\U0001F600-\\U0001F64F\" # emoticons\n", | |
" u\"\\U0001F300-\\U0001F5FF\" # symbols & pictographs\n", | |
" u\"\\U0001F680-\\U0001F6FF\" # transport & map symbols\n", | |
" u\"\\U0001F1E0-\\U0001F1FF\" # flags (iOS)\n", | |
" u\"\\U00002702-\\U000027B0\"\n", | |
" u\"\\U000024C2-\\U0001F251\"\n", | |
" u\"\\u00C0-\\u017F\" #latin\n", | |
" u\"\\u2000-\\u206F\" #generalPunctuations\n", | |
" \n", | |
" \"]+\", flags=re.UNICODE)\n", | |
" english_pattern=re.compile('[a-zA-Z0-9]+', flags=re.I)\n", | |
" #latin_pattern=re.compile('[A-Za-z\\u00C0-\\u00D6\\u00D8-\\u00f6\\u00f8-\\u00ff\\s]*',)\n", | |
" \n", | |
" text=emoji_pattern.sub(r'', text)\n", | |
" text=english_pattern.sub(r'', text)\n", | |
"\n", | |
" return text\n", | |
"\n", | |
"def remove_punctuations(my_str):\n", | |
" # define punctuation\n", | |
" punctuations = '''```\u0012\u0010\u0002\b`\u0007\b£|¢|\u0007Ñ+-*/=EROero৳০১২৩৪৫৬৭৮৯012–34567•89।!()-[]{};:'\"“\\’,<>./?@#$%^&*_~‘—॥”‰🤣🤗⚽️✌�৷'''\n", | |
" \n", | |
" no_punct = \"\"\n", | |
" for char in my_str:\n", | |
" if char not in punctuations:\n", | |
" no_punct = no_punct + char\n", | |
"\n", | |
" # display the unpunctuated string\n", | |
" return no_punct\n", | |
"\n", | |
"\n", | |
"def stopwordRemoval(text): \n", | |
" x=str(text)\n", | |
" l=x.split()\n", | |
"\n", | |
" stm=[elem for elem in l if elem not in stopwords]\n", | |
" \n", | |
" out=' '.join(stm)\n", | |
" \n", | |
" return str(out)\n", | |
"\n", | |
"\n", | |
"def joining(text):\n", | |
" out=' '.join(text)\n", | |
" return out\n", | |
"\n", | |
"def preprocessing(text):\n", | |
" out=remove_punctuations(replace_strings(stopwordRemoval(text)))\n", | |
" return out\n", | |
"\n" | |
], | |
"metadata": { | |
"id": "SAnSgrpDJzuI" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"stop_words = pd.read_excel('/content/stopwords_bangla.xlsx')\n", | |
"stopwords = stop_words['words'].tolist()\n", | |
"stopwords" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "uChFK4rJBtL9", | |
"outputId": "1f691bd6-d2c5-4e36-e355-fd0161fd924a" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['অই',\n", | |
" 'অগত্যা',\n", | |
" 'অত: পর',\n", | |
" 'অতএব',\n", | |
" 'অথচ',\n", | |
" 'অথবা',\n", | |
" 'অধিক',\n", | |
" 'অধীনে',\n", | |
" 'অধ্যায়',\n", | |
" 'অনুগ্রহ',\n", | |
" 'অনুভূত',\n", | |
" 'অনুযায়ী',\n", | |
" 'অনুরূপ',\n", | |
" 'অনুসন্ধান',\n", | |
" 'অনুসরণ',\n", | |
" 'অনুসারে',\n", | |
" 'অনুসৃত',\n", | |
" 'অনেক',\n", | |
" 'অনেকে',\n", | |
" 'অনেকেই',\n", | |
" 'অন্তত',\n", | |
" 'অন্য',\n", | |
" 'অন্যত্র',\n", | |
" 'অন্যভাবে',\n", | |
" 'অন্যান্য',\n", | |
" 'অপেক্ষাকৃতভাবে',\n", | |
" 'অবধি',\n", | |
" 'অবশ্য',\n", | |
" 'অবশ্যই',\n", | |
" 'অবস্থা',\n", | |
" 'অবিলম্বে',\n", | |
" 'অভ্যন্তরস্থ',\n", | |
" 'অর্জিত',\n", | |
" 'অর্থাত',\n", | |
" 'অসদৃশ',\n", | |
" 'অসম্ভাব্য',\n", | |
" 'আইন',\n", | |
" 'আউট',\n", | |
" 'আক্রান্ত',\n", | |
" 'আগামী',\n", | |
" 'আগে',\n", | |
" 'আগেই',\n", | |
" 'আগ্রহী',\n", | |
" 'আছে',\n", | |
" 'আজ',\n", | |
" 'আট',\n", | |
" 'আদেশ',\n", | |
" 'আদ্যভাগে',\n", | |
" 'আন্দাজ',\n", | |
" 'আপনার',\n", | |
" 'আপনি',\n", | |
" 'আবার',\n", | |
" 'আমরা',\n", | |
" 'আমাকে',\n", | |
" 'আমাদিগের',\n", | |
" 'আমাদের',\n", | |
" 'আমার',\n", | |
" 'আমি',\n", | |
" 'আর',\n", | |
" 'আরও',\n", | |
" 'আশি',\n", | |
" 'আশু',\n", | |
" 'আসা',\n", | |
" 'আসে',\n", | |
" 'ই',\n", | |
" 'ইচ্ছা',\n", | |
" 'ইচ্ছাপূর্বক',\n", | |
" 'ইতিমধ্যে',\n", | |
" 'ইতোমধ্যে',\n", | |
" 'ইত্যাদি',\n", | |
" 'ইশারা',\n", | |
" 'ইহা',\n", | |
" 'ইহাতে',\n", | |
" 'উক্তি',\n", | |
" 'উচিত',\n", | |
" 'উচ্চ',\n", | |
" 'উঠা',\n", | |
" 'উত্তম',\n", | |
" 'উত্তর',\n", | |
" 'উনি',\n", | |
" 'উপর',\n", | |
" 'উপরে',\n", | |
" 'উপলব্ধ',\n", | |
" 'উপায়',\n", | |
" 'উভয়',\n", | |
" 'উল্লেখ',\n", | |
" 'উল্লেখযোগ্যভাবে',\n", | |
" 'উহার',\n", | |
" 'ঊর্ধ্বতন',\n", | |
" 'এ',\n", | |
" 'এপর্যন্ত',\n", | |
" 'এঁদের',\n", | |
" 'এঁরা',\n", | |
" 'এই',\n", | |
" 'এইগুলো',\n", | |
" 'এইভাবে',\n", | |
" 'এক',\n", | |
" 'একই',\n", | |
" 'একটি',\n", | |
" 'একদা',\n", | |
" 'একবার',\n", | |
" 'একভাবে',\n", | |
" 'একরকম',\n", | |
" 'একসঙ্গে',\n", | |
" 'একা',\n", | |
" 'একে',\n", | |
" 'এক্',\n", | |
" 'এখন',\n", | |
" 'এখনও',\n", | |
" 'এখনো',\n", | |
" 'এখানে',\n", | |
" 'এখানেই',\n", | |
" 'এছাড়াও',\n", | |
" 'এটা',\n", | |
" 'এটাই',\n", | |
" 'এটি',\n", | |
" 'এত',\n", | |
" 'এতটাই',\n", | |
" 'এতদ্বারা',\n", | |
" 'এতে',\n", | |
" 'এদিকে',\n", | |
" 'এদের',\n", | |
" 'এপর্যন্ত',\n", | |
" 'এবং',\n", | |
" 'এবার',\n", | |
" 'এমন',\n", | |
" 'এমনকি',\n", | |
" 'এমনকী',\n", | |
" 'এমনি',\n", | |
" 'এর',\n", | |
" 'এরকম',\n", | |
" 'এরা',\n", | |
" 'এল',\n", | |
" 'এলাকায়',\n", | |
" 'এলাকার',\n", | |
" 'এস',\n", | |
" 'এসে',\n", | |
" 'ঐ',\n", | |
" 'ও',\n", | |
" 'ওঁদের',\n", | |
" 'ওঁর',\n", | |
" 'ওঁরা',\n", | |
" 'ওই',\n", | |
" 'ওকে',\n", | |
" 'ওখানে',\n", | |
" 'ওদের',\n", | |
" 'ওর',\n", | |
" 'ওরা',\n", | |
" 'ওহে',\n", | |
" 'কক্ষ',\n", | |
" 'কখন',\n", | |
" 'কখনও',\n", | |
" 'কত',\n", | |
" 'কবে',\n", | |
" 'কম',\n", | |
" 'কমনে',\n", | |
" 'কয়েক',\n", | |
" 'কয়েকটি',\n", | |
" 'করছে',\n", | |
" 'করছেন',\n", | |
" 'করতে',\n", | |
" 'করবে',\n", | |
" 'করবেন',\n", | |
" 'করলে',\n", | |
" 'করলেন',\n", | |
" 'করলো',\n", | |
" 'করা',\n", | |
" 'করাই',\n", | |
" 'করাত',\n", | |
" 'করার',\n", | |
" 'করায়',\n", | |
" 'করি',\n", | |
" 'করিতে',\n", | |
" 'করিয়া',\n", | |
" 'করিয়ে',\n", | |
" 'করে',\n", | |
" 'করেই',\n", | |
" 'করেছিল',\n", | |
" 'করেছিলেন',\n", | |
" 'করেছে',\n", | |
" 'করেছেন',\n", | |
" 'করেন',\n", | |
" 'কর্তব্য',\n", | |
" 'কাউকে',\n", | |
" 'কাছ',\n", | |
" 'কাছাকাছি',\n", | |
" 'কাছে',\n", | |
" 'কাজ',\n", | |
" 'কাজে',\n", | |
" 'কারও',\n", | |
" 'কারণ',\n", | |
" 'কারণসমূহ',\n", | |
" 'কারো',\n", | |
" 'কি',\n", | |
" 'কিংবা',\n", | |
" 'কিছু',\n", | |
" 'কিছুই',\n", | |
" 'কিছুটা',\n", | |
" 'কিছুনা',\n", | |
" 'কিনা',\n", | |
" 'কিন্তু',\n", | |
" 'কিভাবে',\n", | |
" 'কী',\n", | |
" 'কূপ',\n", | |
" 'কে',\n", | |
" 'কেউ',\n", | |
" 'কেউই',\n", | |
" 'কেউনা',\n", | |
" 'কেখা',\n", | |
" 'কেন',\n", | |
" 'কেবল',\n", | |
" 'কেবা',\n", | |
" 'কেস',\n", | |
" 'কেহ',\n", | |
" 'কোটি',\n", | |
" 'কোথা',\n", | |
" 'কোথাও',\n", | |
" 'কোথায়',\n", | |
" 'কোন',\n", | |
" 'কোনও',\n", | |
" 'কোনো',\n", | |
" 'ক্রম',\n", | |
" 'ক্ষেত্রে',\n", | |
" 'কয়েক',\n", | |
" 'কয়েকটি',\n", | |
" 'খুঁজছেন',\n", | |
" 'খুব',\n", | |
" 'খোলা',\n", | |
" 'খোলে',\n", | |
" 'গড়',\n", | |
" 'গত',\n", | |
" 'গিয়ে',\n", | |
" 'গিয়েছিলাম',\n", | |
" 'গিয়েছে',\n", | |
" 'গিয়ে',\n", | |
" 'গিয়েছে',\n", | |
" 'গুরুত্ব',\n", | |
" 'গুলি',\n", | |
" 'গেছে',\n", | |
" 'গেল',\n", | |
" 'গেলে',\n", | |
" 'গোটা',\n", | |
" 'গোষ্ঠীবদ্ধ',\n", | |
" 'গ্রহণ',\n", | |
" 'গ্রুপ',\n", | |
" 'ঘর',\n", | |
" 'ঘোষণা',\n", | |
" 'চলে',\n", | |
" 'চান',\n", | |
" 'চায়',\n", | |
" 'চার',\n", | |
" 'চালা',\n", | |
" 'চালান',\n", | |
" 'চালু',\n", | |
" 'চায়',\n", | |
" 'চেয়ে',\n", | |
" 'চেয়েছিলেন',\n", | |
" 'চেষ্টা',\n", | |
" 'চেয়ে',\n", | |
" 'ছয়',\n", | |
" 'ছাড়া',\n", | |
" 'ছাড়াছাড়ি',\n", | |
" 'ছাড়া',\n", | |
" 'ছাড়াও',\n", | |
" 'ছিল',\n", | |
" 'ছিলেন',\n", | |
" 'ছোট',\n", | |
" 'জন',\n", | |
" 'জনকে',\n", | |
" 'জনাব',\n", | |
" 'জনাবা',\n", | |
" 'জনের',\n", | |
" 'জন্য',\n", | |
" 'জানতাম',\n", | |
" 'জানতে',\n", | |
" 'জানা',\n", | |
" 'জানানো',\n", | |
" 'জানায়',\n", | |
" 'জানিয়ে',\n", | |
" 'জানিয়েছে',\n", | |
" 'জানে',\n", | |
" 'জায়গা',\n", | |
" 'জিজ্ঞাসা',\n", | |
" 'জিজ্ঞেস',\n", | |
" 'জিনিস',\n", | |
" 'জে',\n", | |
" 'জ্নজন',\n", | |
" 'টা',\n", | |
" 'টি',\n", | |
" 'ঠিক',\n", | |
" 'ঠিকআছে',\n", | |
" 'ডগা',\n", | |
" 'তখন',\n", | |
" 'তত',\n", | |
" 'তত্কারণে',\n", | |
" 'তত্প্রতি',\n", | |
" 'তথা',\n", | |
" 'তদনুসারে',\n", | |
" 'তদ্ব্যতীত',\n", | |
" 'তন্নতন্ন',\n", | |
" 'তবু',\n", | |
" 'তবে',\n", | |
" 'তরুণ',\n", | |
" 'তা',\n", | |
" 'তাঁকে',\n", | |
" 'তাঁদের',\n", | |
" 'তাঁর',\n", | |
" 'তাঁরা',\n", | |
" 'তাঁহারা',\n", | |
" 'তাই',\n", | |
" 'তাও',\n", | |
" 'তাকে',\n", | |
" 'তাতে',\n", | |
" 'তাদের',\n", | |
" 'তার',\n", | |
" 'তারপর',\n", | |
" 'তারপরেও',\n", | |
" 'তারা',\n", | |
" 'তারিখ',\n", | |
" 'তারৈ',\n", | |
" 'তাহলে',\n", | |
" 'তাহা',\n", | |
" 'তাহাতে',\n", | |
" 'তাহাদিগকে',\n", | |
" 'তাহাদেরই',\n", | |
" 'তাহার',\n", | |
" 'তিন',\n", | |
" 'তিনি',\n", | |
" 'তিনিও',\n", | |
" 'তীক্ষ্ন',\n", | |
" 'তুমি',\n", | |
" 'তুলে',\n", | |
" 'তেমন',\n", | |
" 'তৈরীর',\n", | |
" 'তো',\n", | |
" 'তোমার',\n", | |
" 'তোলে',\n", | |
" 'থাকবে',\n", | |
" 'থাকবেন',\n", | |
" 'থাকা',\n", | |
" 'থাকায়',\n", | |
" 'থাকায়',\n", | |
" 'থাকে',\n", | |
" 'থাকেন',\n", | |
" 'থেকে',\n", | |
" 'থেকেই',\n", | |
" 'থেকেও',\n", | |
" 'দরকারী',\n", | |
" 'দলবদ্ধ',\n", | |
" 'দান',\n", | |
" 'দিকে',\n", | |
" 'দিতে',\n", | |
" 'দিন',\n", | |
" 'দিয়ে',\n", | |
" 'দিয়েছে',\n", | |
" 'দিয়েছেন',\n", | |
" 'দিলেন',\n", | |
" 'দিয়ে',\n", | |
" 'দিয়েছে',\n", | |
" 'দিয়েছেন',\n", | |
" 'দু',\n", | |
" 'দুই',\n", | |
" 'দুটি',\n", | |
" 'দুটো',\n", | |
" 'দূরে',\n", | |
" 'দেওয়ার',\n", | |
" 'দেওয়া',\n", | |
" 'দেওয়ার',\n", | |
" 'দেখতে',\n", | |
" 'দেখা',\n", | |
" 'দেখাচ্ছে',\n", | |
" 'দেখিয়েছেন',\n", | |
" 'দেখে',\n", | |
" 'দেখেন',\n", | |
" 'দেন',\n", | |
" 'দেয়',\n", | |
" 'দেয়',\n", | |
" 'দ্বারা',\n", | |
" 'দ্বিগুণ',\n", | |
" 'দ্বিতীয়',\n", | |
" 'দ্য',\n", | |
" 'ধরা',\n", | |
" 'ধরে',\n", | |
" 'ধামার',\n", | |
" 'নতুন',\n", | |
" 'নব্বই',\n", | |
" 'নয়',\n", | |
" 'নাই',\n", | |
" 'নাকি',\n", | |
" 'নাগাদ',\n", | |
" 'নানা',\n", | |
" 'নাম',\n", | |
" 'নিচে',\n", | |
" 'নিছক',\n", | |
" 'নিজে',\n", | |
" 'নিজেই',\n", | |
" 'নিজেকে',\n", | |
" 'নিজেদের',\n", | |
" 'নিজেদেরকে',\n", | |
" 'নিজের',\n", | |
" 'নিতে',\n", | |
" 'নিদিষ্ট',\n", | |
" 'নিম্নাভিমুখে',\n", | |
" 'নিয়ে',\n", | |
" 'নির্দিষ্ট',\n", | |
" 'নির্বিশেষে',\n", | |
" 'নিশ্চিত',\n", | |
" 'নিয়ে',\n", | |
" 'নেই',\n", | |
" 'নেওয়ার',\n", | |
" 'নেওয়া',\n", | |
" 'নেয়ার',\n", | |
" 'নয়',\n", | |
" 'পক্ষই',\n", | |
" 'পক্ষে',\n", | |
" 'পঞ্চম',\n", | |
" 'পড়া',\n", | |
" 'পণ্য',\n", | |
" 'পথ',\n", | |
" 'পয়েন্ট',\n", | |
" 'পর',\n", | |
" 'পরন্তু',\n", | |
" 'পরবর্তী',\n", | |
" 'পরিণত',\n", | |
" 'পরিবর্তে',\n", | |
" 'পরে',\n", | |
" 'পরেই',\n", | |
" 'পরেও',\n", | |
" 'পর্যন্ত',\n", | |
" 'পর্যাপ্ত',\n", | |
" 'পাঁচ',\n", | |
" 'পাওয়া',\n", | |
" 'পাচ',\n", | |
" 'পায়',\n", | |
" 'পারা',\n", | |
" 'পারি',\n", | |
" 'পারিনি',\n", | |
" 'পারে',\n", | |
" 'পারেন',\n", | |
" 'পালা',\n", | |
" 'পাশ',\n", | |
" 'পাশে',\n", | |
" 'পিছনে',\n", | |
" 'পিঠের',\n", | |
" 'পুরোনো',\n", | |
" 'পুরোপুরি',\n", | |
" 'পূর্বে',\n", | |
" 'পৃষ্ঠা',\n", | |
" 'পৃষ্ঠাগুলি',\n", | |
" 'পেছনে',\n", | |
" 'পেয়েছেন',\n", | |
" 'পেয়ে',\n", | |
" 'পেয়্র্',\n", | |
" 'প্রকৃতপক্ষে',\n", | |
" 'প্রণীত',\n", | |
" 'প্রতি',\n", | |
" 'প্রথম',\n", | |
" 'প্রদত্ত',\n", | |
" 'প্রদর্শনী',\n", | |
" 'প্রদর্শিত',\n", | |
" 'প্রধানত',\n", | |
" 'প্রবলভাবে',\n", | |
" 'প্রভৃতি',\n", | |
" 'প্রমাণীকরণ',\n", | |
" 'প্রযন্ত',\n", | |
" 'প্রয়োজন',\n", | |
" 'প্রয়োজনীয়',\n", | |
" 'প্রসূত',\n", | |
" 'প্রাক্তন',\n", | |
" 'প্রাথমিক',\n", | |
" 'প্রাথমিকভাবে',\n", | |
" 'প্রান্ত',\n", | |
" 'প্রাপ্ত',\n", | |
" 'প্রায়',\n", | |
" 'প্রায়ই',\n", | |
" 'প্রায়',\n", | |
" 'ফলাফল',\n", | |
" 'ফলে',\n", | |
" 'ফিক্স',\n", | |
" 'ফিরে',\n", | |
" 'ফের',\n", | |
" 'বক্তব্য',\n", | |
" 'বছর',\n", | |
" 'বড়',\n", | |
" 'বদলে',\n", | |
" 'বন',\n", | |
" 'বন্ধ',\n", | |
" 'বরং',\n", | |
" 'বরাবর',\n", | |
" 'বর্ণন',\n", | |
" 'বর্তমান',\n", | |
" 'বলতে',\n", | |
" 'বলল',\n", | |
" 'বললেন',\n", | |
" 'বলা',\n", | |
" 'বলে',\n", | |
" 'বলেছেন',\n", | |
" 'বলেন',\n", | |
" 'বসে',\n", | |
" 'বহু',\n", | |
" 'বা',\n", | |
" 'বাঁক',\n", | |
" 'বাইরে',\n", | |
" 'বাকি',\n", | |
" 'বাড়ি',\n", | |
" 'বাতিক',\n", | |
" 'বাদ',\n", | |
" 'বাদে',\n", | |
" 'বার',\n", | |
" 'বাহিরে',\n", | |
" 'বিনা',\n", | |
" 'বিন্দু',\n", | |
" 'বিভিন্ন',\n", | |
" 'বিশেষ',\n", | |
" 'বিশেষণ',\n", | |
" 'বিশেষত',\n", | |
" 'বিশেষভাবে',\n", | |
" 'বিশ্ব',\n", | |
" 'বিষয়টি',\n", | |
" 'বুঝিয়ে',\n", | |
" 'বৃহত্তর',\n", | |
" 'বের',\n", | |
" 'বেশ',\n", | |
" 'বেশি',\n", | |
" 'বেশী',\n", | |
" 'ব্যতীত',\n", | |
" 'ব্যবহার',\n", | |
" 'ব্যবহারসমূহ',\n", | |
" 'ব্যবহৃত',\n", | |
" 'ব্যাক',\n", | |
" 'ব্যাপকভাবে',\n", | |
" 'ব্যাপারে',\n", | |
" 'ভবিষ্যতে',\n", | |
" 'ভান',\n", | |
" 'ভাবে',\n", | |
" 'ভাবেই',\n", | |
" 'ভাল',\n", | |
" 'ভিতরে',\n", | |
" 'ভিন্ন',\n", | |
" 'ভিন্নভাবে',\n", | |
" 'মত',\n", | |
" 'মতো',\n", | |
" 'মতোই',\n", | |
" 'মধ্যভাগে',\n", | |
" 'মধ্যে',\n", | |
" 'মধ্যেই',\n", | |
" 'মধ্যেও',\n", | |
" 'মনে',\n", | |
" 'মনে হয়',\n", | |
" 'মস্ত',\n", | |
" 'মহান',\n", | |
" 'মাত্র',\n", | |
" 'মাধ্যম',\n", | |
" 'মাধ্যমে',\n", | |
" 'মান',\n", | |
" 'মানানসই',\n", | |
" 'মানুষ',\n", | |
" 'মানে',\n", | |
" 'মামলা',\n", | |
" 'মিলিয়ন',\n", | |
" 'মুখ',\n", | |
" 'মূলত',\n", | |
" 'মোট',\n", | |
" 'মোটেই',\n", | |
" 'যখন',\n", | |
" 'যখনই',\n", | |
" 'যত',\n", | |
" 'যতটা',\n", | |
" 'যথা',\n", | |
" 'যথাক্রমে',\n", | |
" 'যথেষ্ট',\n", | |
" 'যদি',\n", | |
" 'যদিও',\n", | |
" 'যন্ত্রাংশ',\n", | |
" 'যা',\n", | |
" 'যাঁর',\n", | |
" 'যাঁরা',\n", | |
" 'যাই',\n", | |
" 'যাওয়া',\n", | |
" 'যাওয়ার',\n", | |
" 'যাওয়া',\n", | |
" 'যাওয়ার',\n", | |
" 'যাকে',\n", | |
" 'যাচ্ছে',\n", | |
" 'যাতে',\n", | |
" 'যাদের',\n", | |
" 'যান',\n", | |
" 'যাবে',\n", | |
" 'যায়',\n", | |
" 'যার',\n", | |
" 'যারা',\n", | |
" 'যাহার',\n", | |
" 'যাহোক',\n", | |
" 'যিনি',\n", | |
" 'যে',\n", | |
" 'যেখানে',\n", | |
" 'যেখানেই',\n", | |
" 'যেটি',\n", | |
" 'যেতে',\n", | |
" 'যেন',\n", | |
" 'যেমন',\n", | |
" 'যেহেতু',\n", | |
" 'যোগ',\n", | |
" 'রকম',\n", | |
" 'রয়েছে',\n", | |
" 'রাখা',\n", | |
" 'রাখে',\n", | |
" 'রাজী',\n", | |
" 'রাজ্যের',\n", | |
" 'রেখে',\n", | |
" 'রয়েছে',\n", | |
" 'লক্ষ',\n", | |
" 'লাইন',\n", | |
" 'লাল',\n", | |
" 'শত',\n", | |
" 'শব্দ',\n", | |
" 'শীঘ্র',\n", | |
" 'শীঘ্রই',\n", | |
" 'শুধু',\n", | |
" 'শুরু',\n", | |
" 'শুরুতে',\n", | |
" 'শূন্য',\n", | |
" 'শেষ',\n", | |
" 'সংক্রান্ত',\n", | |
" 'সংক্ষিপ্ত',\n", | |
" 'সংক্ষেপে',\n", | |
" 'সংখ্যা',\n", | |
" 'সংখ্যার',\n", | |
" 'সংশ্লিষ্ট',\n", | |
" 'সক্ষম',\n", | |
" 'সঙ্গে',\n", | |
" 'সঙ্গেও',\n", | |
" 'সত্য',\n", | |
" 'সত্যিই',\n", | |
" 'সদয়',\n", | |
" 'সদস্য',\n", | |
" 'সদস্যদের',\n", | |
" 'সফলভাবে',\n", | |
" 'সব',\n", | |
" 'সবচেয়ে',\n", | |
" 'সবাই',\n", | |
" 'সবার',\n", | |
" 'সময়',\n", | |
" 'সমস্ত',\n", | |
" 'সমান',\n", | |
" 'সম্পন্ন',\n", | |
" 'সম্প্রতি',\n", | |
" 'সম্ভব',\n", | |
" 'সম্ভবত',\n", | |
" 'সম্ভাব্য',\n", | |
" 'সরাইয়া',\n", | |
" 'সর্বত্র',\n", | |
" 'সর্বদা',\n", | |
" 'সর্বস্বান্ত',\n", | |
" 'সহ',\n", | |
" 'সহিত',\n", | |
" 'সাত',\n", | |
" 'সাধারণ',\n", | |
" 'সাধারণত',\n", | |
" 'সাব',\n", | |
" 'সাবেক',\n", | |
" 'সামগ্রিক',\n", | |
" 'সামনে',\n", | |
" 'সামান্য',\n", | |
" 'সাম্প্রতিক',\n", | |
" 'সুতরাং',\n", | |
" 'সুত্র',\n", | |
" 'সূচক',\n", | |
" 'সে',\n", | |
" 'সে হবে',\n", | |
" 'সেই',\n", | |
" 'সেকেন্ড',\n", | |
" 'সেখান',\n", | |
" 'সেখানে',\n", | |
" 'সেগুলো',\n", | |
" 'সেটা',\n", | |
" 'সেটাই',\n", | |
" 'সেটাও',\n", | |
" 'সেটি',\n", | |
" 'সেরা',\n", | |
" 'স্টপ',\n", | |
" 'স্থাপিত',\n", | |
" 'স্পষ্ট',\n", | |
" 'স্পষ্টত',\n", | |
" 'স্পষ্টতই',\n", | |
" 'স্ব',\n", | |
" 'স্বয়ং',\n", | |
" 'স্বাগত',\n", | |
" 'স্বাভাবিকভাবে',\n", | |
" 'স্বার্থ',\n", | |
" 'স্বয়ং',\n", | |
" 'হইতে',\n", | |
" 'হইবে',\n", | |
" 'হইয়া',\n", | |
" 'হওয়া',\n", | |
" 'হওয়ায়',\n", | |
" 'হওয়ার',\n", | |
" 'হচ্ছে',\n", | |
" 'হত',\n", | |
" 'হতে',\n", | |
" 'হতেই',\n", | |
" 'হন',\n", | |
" 'হবে',\n", | |
" 'হবেন',\n", | |
" 'হয়',\n", | |
" 'হয়তো',\n", | |
" 'হয়নি',\n", | |
" 'হয়ে',\n", | |
" 'হয়েই',\n", | |
" 'হয়েছিল',\n", | |
" 'হয়েছে',\n", | |
" 'হয়েছেন',\n", | |
" 'হল',\n", | |
" 'হলে',\n", | |
" 'হলেই',\n", | |
" 'হলেও',\n", | |
" 'হলো',\n", | |
" 'হাজার',\n", | |
" 'হায়',\n", | |
" 'হারানো',\n", | |
" 'হিসাবে',\n", | |
" 'হৈলে',\n", | |
" 'হোক',\n", | |
" 'হয়',\n", | |
" 'হয়তো',\n", | |
" 'হয়নি',\n", | |
" 'হয়ে',\n", | |
" 'হয়েই',\n", | |
" 'হয়েছিল',\n", | |
" 'হয়েছে',\n", | |
" 'হয়েছেন',\n", | |
" 'অংশ']" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 18 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"df_final['text_bangla'] = df_final.text_bangla.apply(lambda x: preprocessing(str(x)))\n", | |
"df_final" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 424 | |
}, | |
"id": "PN3GP0Z7LbvD", | |
"outputId": "f9cff672-0450-4aec-92ab-d951735c199c" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" text_bangla audio_emotion\n", | |
"0 অসাধারণ N\n", | |
"1 এমনিই বালছাল বানাইছে জোর অখাদ্য গেলানো যায়না A\n", | |
"2 টারে রাম চোদা দিতাম A\n", | |
"3 শুনতে পাচ্ছি ভদ্র ভাষায় কথা বলতেছি সার্ভিসটা দাও A\n", | |
"4 মনকেকুরুচিঅস্লীলপর্নগ্রফী বিসন্যতা মুক্ত রাখতে... N\n", | |
"... ... ...\n", | |
"8709 এইরকম নাটকের জন্যই অপেক্ষায় থাকিগল্পটা গতানুগত... N\n", | |
"8710 ফালতু A\n", | |
"8711 সত্যি নিশো ভাই বাংলা নাটকের কিংবদন্তি মেহজাবিন... N\n", | |
"8712 মোশাররফ করিমের অভিনয়ে আগের ধার A\n", | |
"8713 হ্যালো হ্যা হ্যালো N\n", | |
"\n", | |
"[8714 rows x 2 columns]" | |
], | |
"text/html": [ | |
"\n", | |
" <div id=\"df-8ea5c6ac-5ea4-4406-ac97-a0f1cc89cc59\">\n", | |
" <div class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>text_bangla</th>\n", | |
" <th>audio_emotion</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>অসাধারণ</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>এমনিই বালছাল বানাইছে জোর অখাদ্য গেলানো যায়না</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>টারে রাম চোদা দিতাম</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>শুনতে পাচ্ছি ভদ্র ভাষায় কথা বলতেছি সার্ভিসটা দাও</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>মনকেকুরুচিঅস্লীলপর্নগ্রফী বিসন্যতা মুক্ত রাখতে...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8709</th>\n", | |
" <td>এইরকম নাটকের জন্যই অপেক্ষায় থাকিগল্পটা গতানুগত...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8710</th>\n", | |
" <td>ফালতু</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8711</th>\n", | |
" <td>সত্যি নিশো ভাই বাংলা নাটকের কিংবদন্তি মেহজাবিন...</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8712</th>\n", | |
" <td>মোশাররফ করিমের অভিনয়ে আগের ধার</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8713</th>\n", | |
" <td>হ্যালো হ্যা হ্যালো</td>\n", | |
" <td>N</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>8714 rows × 2 columns</p>\n", | |
"</div>\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-8ea5c6ac-5ea4-4406-ac97-a0f1cc89cc59')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
" \n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
" width=\"24px\">\n", | |
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", | |
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
" \n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" flex-wrap:wrap;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-8ea5c6ac-5ea4-4406-ac97-a0f1cc89cc59 button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-8ea5c6ac-5ea4-4406-ac97-a0f1cc89cc59');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
" </div>\n", | |
" " | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 19 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"df_final.text_bangla[13]" | |
], | |
"metadata": { | |
"id": "UyZRgNvWRLTD", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
}, | |
"outputId": "e8768d7a-8b92-4c49-c9a3-2e63a6d70d0f" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"'সুন্দর'" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
} | |
}, | |
"metadata": {}, | |
"execution_count": 20 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# np.set_printoptions(threshold=np.inf)\n", | |
"from sklearn.feature_extraction.text import TfidfVectorizer\n", | |
"# vectorizer = TfidfVectorizer()\n", | |
"vectorizer = TfidfVectorizer(ngram_range=(1, 3))\n", | |
"X = vectorizer.fit_transform(df_final.text_bangla).toarray()\n", | |
"X" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Upp9dkKcWDVs", | |
"outputId": "020a9dd9-d465-47c9-98b4-c47e34432105" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array([[0., 0., 0., ..., 0., 0., 0.],\n", | |
" [0., 0., 0., ..., 0., 0., 0.],\n", | |
" [0., 0., 0., ..., 0., 0., 0.],\n", | |
" ...,\n", | |
" [0., 0., 0., ..., 0., 0., 0.],\n", | |
" [0., 0., 0., ..., 0., 0., 0.],\n", | |
" [0., 0., 0., ..., 0., 0., 0.]])" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 21 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"\n", | |
"from sklearn import preprocessing\n", | |
"label_encoder = preprocessing.LabelEncoder()\n", | |
"df_final.audio_emotion= label_encoder.fit_transform(df_final.audio_emotion)\n", | |
"df_final\n", | |
"\n", | |
"\n", | |
"# 1 means normal\n", | |
"# 0 means angry " | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 424 | |
}, | |
"id": "gKzgrqj3YYZs", | |
"outputId": "e0d528b0-a89d-41e3-fdc7-57f2d3a0bfcd" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" text_bangla audio_emotion\n", | |
"0 অসাধারণ 1\n", | |
"1 এমনিই বালছাল বানাইছে জোর অখাদ্য গেলানো যায়না 0\n", | |
"2 টারে রাম চোদা দিতাম 0\n", | |
"3 শুনতে পাচ্ছি ভদ্র ভাষায় কথা বলতেছি সার্ভিসটা দাও 0\n", | |
"4 মনকেকুরুচিঅস্লীলপর্নগ্রফী বিসন্যতা মুক্ত রাখতে... 1\n", | |
"... ... ...\n", | |
"8709 এইরকম নাটকের জন্যই অপেক্ষায় থাকিগল্পটা গতানুগত... 1\n", | |
"8710 ফালতু 0\n", | |
"8711 সত্যি নিশো ভাই বাংলা নাটকের কিংবদন্তি মেহজাবিন... 1\n", | |
"8712 মোশাররফ করিমের অভিনয়ে আগের ধার 0\n", | |
"8713 হ্যালো হ্যা হ্যালো 1\n", | |
"\n", | |
"[8714 rows x 2 columns]" | |
], | |
"text/html": [ | |
"\n", | |
" <div id=\"df-1d503126-aae3-463f-a1ec-07a5ab319c40\">\n", | |
" <div class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>text_bangla</th>\n", | |
" <th>audio_emotion</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>অসাধারণ</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>এমনিই বালছাল বানাইছে জোর অখাদ্য গেলানো যায়না</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>টারে রাম চোদা দিতাম</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>শুনতে পাচ্ছি ভদ্র ভাষায় কথা বলতেছি সার্ভিসটা দাও</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>মনকেকুরুচিঅস্লীলপর্নগ্রফী বিসন্যতা মুক্ত রাখতে...</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8709</th>\n", | |
" <td>এইরকম নাটকের জন্যই অপেক্ষায় থাকিগল্পটা গতানুগত...</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8710</th>\n", | |
" <td>ফালতু</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8711</th>\n", | |
" <td>সত্যি নিশো ভাই বাংলা নাটকের কিংবদন্তি মেহজাবিন...</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8712</th>\n", | |
" <td>মোশাররফ করিমের অভিনয়ে আগের ধার</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8713</th>\n", | |
" <td>হ্যালো হ্যা হ্যালো</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>8714 rows × 2 columns</p>\n", | |
"</div>\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1d503126-aae3-463f-a1ec-07a5ab319c40')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
" \n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
" width=\"24px\">\n", | |
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", | |
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
" \n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" flex-wrap:wrap;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-1d503126-aae3-463f-a1ec-07a5ab319c40 button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-1d503126-aae3-463f-a1ec-07a5ab319c40');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
" </div>\n", | |
" " | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 22 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"y = df_final.audio_emotion\n", | |
"y" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "IQf59LJMZNgE", | |
"outputId": "aed59e8e-130f-4516-aa9d-366330e2657f" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0 1\n", | |
"1 0\n", | |
"2 0\n", | |
"3 0\n", | |
"4 1\n", | |
" ..\n", | |
"8709 1\n", | |
"8710 0\n", | |
"8711 1\n", | |
"8712 0\n", | |
"8713 1\n", | |
"Name: audio_emotion, Length: 8714, dtype: int64" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 23 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from sklearn.model_selection import train_test_split\n", | |
"\n", | |
"X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.25, random_state = 42)\n" | |
], | |
"metadata": { | |
"id": "6nLQHg_-X9bO" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Naive Bayes" | |
], | |
"metadata": { | |
"id": "IFwZNRcqqYGQ" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Bulding Model of Naive Bayes\n", | |
"\n", | |
"from sklearn.metrics import classification_report, confusion_matrix, accuracy_score\n", | |
"\n", | |
"from sklearn.naive_bayes import MultinomialNB\n", | |
"\n", | |
"Emotion_detection_model = MultinomialNB().fit(X_train, y_train)\n", | |
"\n", | |
"y_pred = Emotion_detection_model.predict(X_valid)\n", | |
"\n", | |
"print(\"Report :\\n\" , classification_report(y_valid,y_pred))\n", | |
"\n", | |
"print(\" Confussion Matrix :\\n\" , confusion_matrix(y_valid,y_pred))\n", | |
"\n", | |
"print(\"Accuracy : \", accuracy_score(y_valid, y_pred)*100 , \"%\")\n" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "_D5AtvLDqB0Y", | |
"outputId": "a5f59f5e-c361-4f6d-f5ff-79c9abc08dca" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Report :\n", | |
" precision recall f1-score support\n", | |
"\n", | |
" 0 0.81 0.83 0.82 1068\n", | |
" 1 0.83 0.81 0.82 1111\n", | |
"\n", | |
" accuracy 0.82 2179\n", | |
" macro avg 0.82 0.82 0.82 2179\n", | |
"weighted avg 0.82 0.82 0.82 2179\n", | |
"\n", | |
" Confussion Matrix :\n", | |
" [[883 185]\n", | |
" [212 899]]\n", | |
"Accuracy : 81.7806333180358 %\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Logistic Regression" | |
], | |
"metadata": { | |
"id": "BoRwRK-tqpsg" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from sklearn.metrics import classification_report, confusion_matrix, accuracy_score\n", | |
"\n", | |
"from sklearn.linear_model import LogisticRegression\n", | |
"LR = LogisticRegression()\n", | |
"LR.fit(X_train, y_train)\n", | |
"\n", | |
"y_pred = LR.predict(X_valid)\n", | |
"\n", | |
"print(\"Report :\\n\" , classification_report(y_valid,y_pred))\n", | |
"\n", | |
"print(\" Confussion Matrix :\\n\" , confusion_matrix(y_valid,y_pred))\n", | |
"\n", | |
"print(\"Accuracy : \", accuracy_score(y_valid, y_pred)*100 , \"%\")" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "EgMS9YDWZZsD", | |
"outputId": "fafc62d9-3206-44f1-c118-b92922a8ba85" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Report :\n", | |
" precision recall f1-score support\n", | |
"\n", | |
" 0 0.77 0.89 0.82 1068\n", | |
" 1 0.87 0.74 0.80 1111\n", | |
"\n", | |
" accuracy 0.81 2179\n", | |
" macro avg 0.82 0.81 0.81 2179\n", | |
"weighted avg 0.82 0.81 0.81 2179\n", | |
"\n", | |
" Confussion Matrix :\n", | |
" [[950 118]\n", | |
" [291 820]]\n", | |
"Accuracy : 81.22992198256081 %\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## KNN" | |
], | |
"metadata": { | |
"id": "ys6z2iPGqxz_" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"\n", | |
"from sklearn.metrics import classification_report, confusion_matrix, accuracy_score\n", | |
"\n", | |
"from sklearn.neighbors import KNeighborsClassifier\n", | |
"\n", | |
"knn = KNeighborsClassifier()\n", | |
"\n", | |
"knn.fit(X_train,y_train)\n", | |
"\n", | |
"y_pred = knn.predict(X_valid)\n", | |
"\n", | |
"print(\"Report :\\n\" , classification_report(y_valid,y_pred))\n", | |
"\n", | |
"print(\" Confussion Matrix :\\n\" , confusion_matrix(y_valid,y_pred))\n", | |
"\n", | |
"print(\"Accuracy : \", accuracy_score(y_valid, y_pred)*100 , \"%\")" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "cW3c0dPuqwZe", | |
"outputId": "200a2434-27ab-4973-a9ad-4a8cb4222095" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Report :\n", | |
" precision recall f1-score support\n", | |
"\n", | |
" 0 0.58 0.66 0.62 1068\n", | |
" 1 0.63 0.55 0.58 1111\n", | |
"\n", | |
" accuracy 0.60 2179\n", | |
" macro avg 0.60 0.60 0.60 2179\n", | |
"weighted avg 0.60 0.60 0.60 2179\n", | |
"\n", | |
" Confussion Matrix :\n", | |
" [[703 365]\n", | |
" [502 609]]\n", | |
"Accuracy : 60.21110601193208 %\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## SVM" | |
], | |
"metadata": { | |
"id": "8IrdHKTcq7s2" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from sklearn.svm import SVC\n", | |
"from sklearn.metrics import classification_report, confusion_matrix, accuracy_score\n", | |
"\n", | |
"SVC_obj = SVC()\n", | |
"SVC_obj.fit(X_train, y_train)\n", | |
"y_pred = SVC_obj.predict(X_valid)\n", | |
"print(\"Report :\\n\" , classification_report(y_valid,y_pred))\n", | |
"\n", | |
"print(\" Confussion Matrix :\\n\" , confusion_matrix(y_valid,y_pred))\n", | |
"\n", | |
"print(\"Accuracy : \", accuracy_score(y_valid, y_pred)*100 , \"%\")" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "ToYGVLAeq-xi", | |
"outputId": "64c21eef-7628-4129-b2f2-6560056766e4" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Report :\n", | |
" precision recall f1-score support\n", | |
"\n", | |
" 0 0.77 0.89 0.82 1068\n", | |
" 1 0.88 0.74 0.80 1111\n", | |
"\n", | |
" accuracy 0.81 2179\n", | |
" macro avg 0.82 0.81 0.81 2179\n", | |
"weighted avg 0.82 0.81 0.81 2179\n", | |
"\n", | |
" Confussion Matrix :\n", | |
" [[951 117]\n", | |
" [291 820]]\n", | |
"Accuracy : 81.2758145938504 %\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Decission Tree" | |
], | |
"metadata": { | |
"id": "2mrzOn8WrAAg" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from sklearn.metrics import classification_report, confusion_matrix, accuracy_score\n", | |
"\n", | |
"from sklearn.tree import DecisionTreeClassifier\n", | |
"DT= DecisionTreeClassifier()\n", | |
"# dt= DecisionTreeClassifier(max_depth=2, min_samples_leaf=10)\n", | |
"\n", | |
"# training the model\n", | |
"DT.fit(X_train, y_train)\n", | |
"\n", | |
"y_pred = DT.predict(X_valid)\n", | |
"\n", | |
"print(\"Report :\\n\" , classification_report(y_valid,y_pred))\n", | |
"\n", | |
"print(\" Confussion Matrix :\\n\" , confusion_matrix(y_valid,y_pred))\n", | |
"\n", | |
"print(\"Accuracy : \", round(accuracy_score(y_valid, y_pred)*100,2), \"%\")\n" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "XGXAwjnuodAl", | |
"outputId": "83149004-2a38-44d4-8cf9-8e4b569e1b9b" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Report :\n", | |
" precision recall f1-score support\n", | |
"\n", | |
" 0 0.76 0.82 0.79 1068\n", | |
" 1 0.81 0.76 0.78 1111\n", | |
"\n", | |
" accuracy 0.79 2179\n", | |
" macro avg 0.79 0.79 0.79 2179\n", | |
"weighted avg 0.79 0.79 0.79 2179\n", | |
"\n", | |
" Confussion Matrix :\n", | |
" [[871 197]\n", | |
" [271 840]]\n", | |
"Accuracy : 78.52 %\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"##Random Forest" | |
], | |
"metadata": { | |
"id": "zfPz5N-vrHVK" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from sklearn.metrics import classification_report, confusion_matrix, accuracy_score\n", | |
"\n", | |
"from sklearn.ensemble import RandomForestClassifier\n", | |
"RF = RandomForestClassifier()\n", | |
"# classifier_rf = RandomForestClassifier(max_depth=10, n_estimators=24, random_state=0)\n", | |
"\n", | |
"RF.fit(X_train, y_train)\n", | |
"\n", | |
"y_pred =RF.predict(X_valid)\n", | |
"\n", | |
"print(\"Report :\\n\" , classification_report(y_valid,y_pred))\n", | |
"\n", | |
"print(\" Confussion Matrix :\\n\" , confusion_matrix(y_valid,y_pred))\n", | |
"\n", | |
"print(\"Accuracy : \", round(accuracy_score(y_valid, y_pred)*100,2), \"%\")" | |
], | |
"metadata": { | |
"id": "Q6BU5GhJpAWo", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "992131e3-9901-48ca-dda1-ad28ce7e61c1" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Report :\n", | |
" precision recall f1-score support\n", | |
"\n", | |
" 0 0.76 0.88 0.82 1068\n", | |
" 1 0.86 0.74 0.80 1111\n", | |
"\n", | |
" accuracy 0.81 2179\n", | |
" macro avg 0.81 0.81 0.81 2179\n", | |
"weighted avg 0.81 0.81 0.81 2179\n", | |
"\n", | |
" Confussion Matrix :\n", | |
" [[940 128]\n", | |
" [292 819]]\n", | |
"Accuracy : 80.73 %\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Bagging meta estimator(Bagging)" | |
], | |
"metadata": { | |
"id": "WjwBHY_irKwL" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"\n", | |
"from sklearn.metrics import classification_report, confusion_matrix, accuracy_score\n", | |
"\n", | |
"from sklearn.ensemble import BaggingClassifier\n", | |
"from sklearn import tree\n", | |
"BC = BaggingClassifier(tree.DecisionTreeClassifier(random_state=1))\n", | |
"BC.fit(X_train, y_train)\n", | |
"y_pred = BC.predict(X_valid)\n", | |
"\n", | |
"print(\"Report :\\n\" , classification_report(y_valid,y_pred))\n", | |
"\n", | |
"print(\" Confussion Matrix :\\n\" , confusion_matrix(y_valid,y_pred))\n", | |
"\n", | |
"print(\"Accuracy : \", round(accuracy_score(y_valid, y_pred)*100,2), \"%\")\n" | |
], | |
"metadata": { | |
"id": "S-KJhO3epMWU", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "cd005de0-2c7a-4609-e5e3-73b08638e7ae" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Report :\n", | |
" precision recall f1-score support\n", | |
"\n", | |
" 0 0.74 0.84 0.79 1068\n", | |
" 1 0.82 0.71 0.76 1111\n", | |
"\n", | |
" accuracy 0.78 2179\n", | |
" macro avg 0.78 0.78 0.77 2179\n", | |
"weighted avg 0.78 0.78 0.77 2179\n", | |
"\n", | |
" Confussion Matrix :\n", | |
" [[897 171]\n", | |
" [319 792]]\n", | |
"Accuracy : 77.51 %\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## XGBOOST" | |
], | |
"metadata": { | |
"id": "1ZfElpQVrSAK" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"\n", | |
"from sklearn.metrics import classification_report, confusion_matrix, accuracy_score\n", | |
"\n", | |
"import xgboost as xgb\n", | |
"\n", | |
"modelXG=xgb.XGBClassifier()\n", | |
"modelXG.fit(X_train, y_train)\n", | |
"y_pred = modelXG.predict(X_valid)\n", | |
"\n", | |
"print(\"Report :\\n\" , classification_report(y_valid,y_pred))\n", | |
"\n", | |
"print(\" Confussion Matrix :\\n\" , confusion_matrix(y_valid,y_pred))\n", | |
"\n", | |
"print(\"Accuracy : \", round(accuracy_score(y_valid, y_pred)*100,2), \"%\")" | |
], | |
"metadata": { | |
"id": "gwFYcNVCpWlV" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Gradient Boosting" | |
], | |
"metadata": { | |
"id": "Ja0xGmWqrXDd" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"\n", | |
"from sklearn.metrics import classification_report, confusion_matrix, accuracy_score\n", | |
"\n", | |
"\n", | |
"from sklearn.ensemble import GradientBoostingClassifier\n", | |
"modelGB= GradientBoostingClassifier()\n", | |
"modelGB.fit(X_train, y_train)\n", | |
"y_pred = modelGB.predict(X_valid)\n", | |
"\n", | |
"print(\"Report :\\n\" , classification_report(y_valid,y_pred))\n", | |
"\n", | |
"print(\" Confussion Matrix :\\n\" , confusion_matrix(y_valid,y_pred))\n", | |
"\n", | |
"print(\"Accuracy : \", round(accuracy_score(y_valid, y_pred)*100,2), \"%\")" | |
], | |
"metadata": { | |
"id": "hjUdalckpfW7" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment