Skip to content

Instantly share code, notes, and snippets.

@ZhiyaoShu
Last active March 8, 2024 04:39
Show Gist options
  • Save ZhiyaoShu/bba0a711f2f4b8f368c453dc9ae07641 to your computer and use it in GitHub Desktop.
Save ZhiyaoShu/bba0a711f2f4b8f368c453dc9ae07641 to your computer and use it in GitHub Desktop.
Tweet Sentiment Extraction NLP.ipynb
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"kernelspec": {
"language": "python",
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.10.12",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"kaggle": {
"accelerator": "none",
"dataSources": [
{
"sourceId": 16295,
"databundleVersionId": 1099992,
"sourceType": "competition"
}
],
"dockerImageVersionId": 30615,
"isInternetEnabled": true,
"language": "python",
"sourceType": "notebook",
"isGpuEnabled": false
},
"colab": {
"provenance": [],
"include_colab_link": true
}
},
"nbformat_minor": 0,
"nbformat": 4,
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/ZhiyaoShu/bba0a711f2f4b8f368c453dc9ae07641/tweet_sentiment_extraction_nlp.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"!pip install -q kaggle"
],
"metadata": {
"id": "tEsX1rtj0svh"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"! cp /content/drive/MyDrive/kaggle.json ~/.kaggle/"
],
"metadata": {
"id": "IEX4Y7n81pbZ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"! chmod 600 ~/.kaggle/kaggle.json"
],
"metadata": {
"id": "qeVgGSVS3iDU"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"! kaggle datasets list"
],
"metadata": {
"id": "v_K6hdnu3jwB"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"import os\n",
"for dirname, _, filenames in os.walk('/kaggle/input'):\n",
" for filename in filenames:\n",
" print(os.path.join(dirname, filename))\n"
],
"metadata": {
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"execution": {
"iopub.status.busy": "2023-12-09T03:03:55.911081Z",
"iopub.execute_input": "2023-12-09T03:03:55.912258Z",
"iopub.status.idle": "2023-12-09T03:03:56.443447Z",
"shell.execute_reply.started": "2023-12-09T03:03:55.912211Z",
"shell.execute_reply": "2023-12-09T03:03:56.441627Z"
},
"trusted": true,
"id": "BCLtIvzkmh6U"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "dMnUsKzuHHgq",
"outputId": "9b1f2c09-f65d-4f84-b802-0b4938a88ef4"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mounted at /content/drive\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Intro: This is a practice of social media sentiment extraction"
],
"metadata": {
"id": "dukkVz6omh6X"
}
},
{
"cell_type": "code",
"source": [
"import tensorflow as tf\n",
"print(tf.__version__)"
],
"metadata": {
"execution": {
"iopub.status.busy": "2023-12-09T03:05:31.877064Z",
"iopub.execute_input": "2023-12-09T03:05:31.877820Z",
"iopub.status.idle": "2023-12-09T03:05:51.675825Z",
"shell.execute_reply.started": "2023-12-09T03:05:31.877784Z",
"shell.execute_reply": "2023-12-09T03:05:51.674840Z"
},
"trusted": true,
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wmxvLE-ymh6Y",
"outputId": "08cc362d-d7eb-466c-d359-982e77055a2f"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2.15.0\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"! kaggle competitions download -c tweet-sentiment-extraction"
],
"metadata": {
"id": "gPNHxpMH3rXS",
"outputId": "23771922-368c-4770-fb45-c3149f4af9fd",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Downloading tweet-sentiment-extraction.zip to /content\n",
"\r 0% 0.00/1.39M [00:00<?, ?B/s]\n",
"\r100% 1.39M/1.39M [00:00<00:00, 133MB/s]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"! unzip tweet-sentiment-extraction.zip"
],
"metadata": {
"id": "ENI-yrZi4CUt",
"outputId": "d0c64924-1e65-4749-e858-e898fa75ec7b",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Archive: tweet-sentiment-extraction.zip\n",
" inflating: sample_submission.csv \n",
" inflating: test.csv \n",
" inflating: train.csv \n"
]
}
]
},
{
"cell_type": "code",
"source": [
"test_data = pd.read_csv(\"/content/test.csv\")\n",
"train_data = pd.read_csv(\"/content/train.csv\")\n",
"\n",
"test_data.head()\n",
"train_data.head()"
],
"metadata": {
"execution": {
"iopub.status.busy": "2023-12-09T03:05:51.887897Z",
"iopub.execute_input": "2023-12-09T03:05:51.888351Z",
"iopub.status.idle": "2023-12-09T03:05:52.026869Z",
"shell.execute_reply.started": "2023-12-09T03:05:51.888316Z",
"shell.execute_reply": "2023-12-09T03:05:52.025505Z"
},
"trusted": true,
"colab": {
"base_uri": "https://localhost:8080/",
"height": 258
},
"id": "yfvBvkeGmh6Y",
"outputId": "eb8b2cf9-1e40-4aba-c928-ef449a93cb73"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" textID text \\\n",
"0 cb774db0d1 I`d have responded, if I were going \n",
"1 549e992a42 Sooo SAD I will miss you here in San Diego!!! \n",
"2 088c60f138 my boss is bullying me... \n",
"3 9642c003ef what interview! leave me alone \n",
"4 358bd9e861 Sons of ****, why couldn`t they put them on t... \n",
"\n",
" selected_text sentiment \n",
"0 I`d have responded, if I were going neutral \n",
"1 Sooo SAD negative \n",
"2 bullying me negative \n",
"3 leave me alone negative \n",
"4 Sons of ****, negative "
],
"text/html": [
"\n",
" <div id=\"df-7b4501f3-e383-4523-881d-b8fc1dd19732\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>textID</th>\n",
" <th>text</th>\n",
" <th>selected_text</th>\n",
" <th>sentiment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>cb774db0d1</td>\n",
" <td>I`d have responded, if I were going</td>\n",
" <td>I`d have responded, if I were going</td>\n",
" <td>neutral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>549e992a42</td>\n",
" <td>Sooo SAD I will miss you here in San Diego!!!</td>\n",
" <td>Sooo SAD</td>\n",
" <td>negative</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>088c60f138</td>\n",
" <td>my boss is bullying me...</td>\n",
" <td>bullying me</td>\n",
" <td>negative</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>9642c003ef</td>\n",
" <td>what interview! leave me alone</td>\n",
" <td>leave me alone</td>\n",
" <td>negative</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>358bd9e861</td>\n",
" <td>Sons of ****, why couldn`t they put them on t...</td>\n",
" <td>Sons of ****,</td>\n",
" <td>negative</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7b4501f3-e383-4523-881d-b8fc1dd19732')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-7b4501f3-e383-4523-881d-b8fc1dd19732 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-7b4501f3-e383-4523-881d-b8fc1dd19732');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-7f93d536-0713-47f8-a2bd-a22ba0017957\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-7f93d536-0713-47f8-a2bd-a22ba0017957')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-7f93d536-0713-47f8-a2bd-a22ba0017957 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "train_data",
"summary": "{\n \"name\": \"train_data\",\n \"rows\": 27481,\n \"fields\": [\n {\n \"column\": \"textID\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 27481,\n \"samples\": [\n \"a7f72a928a\",\n \"ef42dee96c\",\n \"07d17131b1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 27480,\n \"samples\": [\n \" Enjoy! Family trumps everything\",\n \" --of them kinda turns me off of it all. And then I buy more of them and dig a deeper hole, etc. ;;\",\n \"Clive it`s my birthday pat me http://apps.facebook.com/dogbook/profile/view/6386106\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"selected_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 22463,\n \"samples\": [\n \"we win\",\n \"YES!!! haahaaa.! break out the jellybeaniesss!\",\n \"hay wats ur AIM? we should chat\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sentiment\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"neutral\",\n \"negative\",\n \"positive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"source": [
"# Check basic information\n",
"train_data.describe()"
],
"metadata": {
"execution": {
"iopub.status.busy": "2023-12-09T03:06:18.849022Z",
"iopub.execute_input": "2023-12-09T03:06:18.849502Z",
"iopub.status.idle": "2023-12-09T03:06:18.943085Z",
"shell.execute_reply.started": "2023-12-09T03:06:18.849465Z",
"shell.execute_reply": "2023-12-09T03:06:18.941595Z"
},
"trusted": true,
"colab": {
"base_uri": "https://localhost:8080/",
"height": 175
},
"id": "LGQvMS65mh6Y",
"outputId": "e2ef44af-d069-4bb1-99f2-563bc35ec443"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" textID text selected_text \\\n",
"count 27481 27480 27480 \n",
"unique 27481 27480 22463 \n",
"top cb774db0d1 I`d have responded, if I were going good \n",
"freq 1 1 199 \n",
"\n",
" sentiment \n",
"count 27481 \n",
"unique 3 \n",
"top neutral \n",
"freq 11118 "
],
"text/html": [
"\n",
" <div id=\"df-ad0427ca-cb16-43e0-82bb-b17a3e70e007\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>textID</th>\n",
" <th>text</th>\n",
" <th>selected_text</th>\n",
" <th>sentiment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>27481</td>\n",
" <td>27480</td>\n",
" <td>27480</td>\n",
" <td>27481</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>27481</td>\n",
" <td>27480</td>\n",
" <td>22463</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>cb774db0d1</td>\n",
" <td>I`d have responded, if I were going</td>\n",
" <td>good</td>\n",
" <td>neutral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>199</td>\n",
" <td>11118</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ad0427ca-cb16-43e0-82bb-b17a3e70e007')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-ad0427ca-cb16-43e0-82bb-b17a3e70e007 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-ad0427ca-cb16-43e0-82bb-b17a3e70e007');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-eb23c673-c67d-4a2b-ab0e-fae860d855eb\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-eb23c673-c67d-4a2b-ab0e-fae860d855eb')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-eb23c673-c67d-4a2b-ab0e-fae860d855eb button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"summary": "{\n \"name\": \"train_data\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"textID\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"27481\",\n \"cb774db0d1\",\n \"1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"27480\",\n \" I`d have responded, if I were going\",\n \"1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"selected_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n 22463,\n \"199\",\n \"27480\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sentiment\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n 3,\n \"11118\",\n \"27481\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 24
}
]
},
{
"cell_type": "code",
"source": [
"test_data.describe()"
],
"metadata": {
"execution": {
"iopub.status.busy": "2023-12-09T03:06:22.059012Z",
"iopub.execute_input": "2023-12-09T03:06:22.059505Z",
"iopub.status.idle": "2023-12-09T03:06:22.087533Z",
"shell.execute_reply.started": "2023-12-09T03:06:22.059474Z",
"shell.execute_reply": "2023-12-09T03:06:22.086136Z"
},
"trusted": true,
"colab": {
"base_uri": "https://localhost:8080/",
"height": 175
},
"id": "ziO__MDtmh6Z",
"outputId": "d4ed2938-e7de-4902-cf06-810dbb86a567"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" textID text \\\n",
"count 3534 3534 \n",
"unique 3534 3534 \n",
"top f87dea47db Last session of the day http://twitpic.com/67ezh \n",
"freq 1 1 \n",
"\n",
" sentiment \n",
"count 3534 \n",
"unique 3 \n",
"top neutral \n",
"freq 1430 "
],
"text/html": [
"\n",
" <div id=\"df-f2470ee3-a39b-4d22-8486-884b850c3405\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>textID</th>\n",
" <th>text</th>\n",
" <th>sentiment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>3534</td>\n",
" <td>3534</td>\n",
" <td>3534</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>3534</td>\n",
" <td>3534</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>f87dea47db</td>\n",
" <td>Last session of the day http://twitpic.com/67ezh</td>\n",
" <td>neutral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1430</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f2470ee3-a39b-4d22-8486-884b850c3405')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-f2470ee3-a39b-4d22-8486-884b850c3405 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-f2470ee3-a39b-4d22-8486-884b850c3405');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-27dac3cc-b7f4-4396-acdb-6ee946af87e7\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-27dac3cc-b7f4-4396-acdb-6ee946af87e7')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-27dac3cc-b7f4-4396-acdb-6ee946af87e7 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"summary": "{\n \"name\": \"test_data\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"textID\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"3534\",\n \"f87dea47db\",\n \"1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"3534\",\n \"Last session of the day http://twitpic.com/67ezh\",\n \"1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sentiment\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n 3,\n \"1430\",\n \"3534\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 25
}
]
},
{
"cell_type": "code",
"source": [
"# Check for null values of train data\n",
"train_data.isna().sum()"
],
"metadata": {
"execution": {
"iopub.status.busy": "2023-12-09T03:06:02.642708Z",
"iopub.execute_input": "2023-12-09T03:06:02.643173Z",
"iopub.status.idle": "2023-12-09T03:06:02.666775Z",
"shell.execute_reply.started": "2023-12-09T03:06:02.643140Z",
"shell.execute_reply": "2023-12-09T03:06:02.665157Z"
},
"trusted": true,
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "sW_rLEvTmh6Z",
"outputId": "f5974af5-e699-4372-d4de-32d330e96737"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"textID 0\n",
"text 1\n",
"selected_text 1\n",
"sentiment 0\n",
"dtype: int64"
]
},
"metadata": {},
"execution_count": 26
}
]
},
{
"cell_type": "code",
"source": [
"# Drop null values\n",
"train_data.dropna(inplace=True)"
],
"metadata": {
"execution": {
"iopub.status.busy": "2023-12-09T03:06:34.309793Z",
"iopub.execute_input": "2023-12-09T03:06:34.310425Z",
"iopub.status.idle": "2023-12-09T03:06:34.336322Z",
"shell.execute_reply.started": "2023-12-09T03:06:34.310375Z",
"shell.execute_reply": "2023-12-09T03:06:34.334742Z"
},
"trusted": true,
"id": "BYth5ceQmh6Z"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Check for null values of test data\n",
"test_data.isna().sum()"
],
"metadata": {
"execution": {
"iopub.status.busy": "2023-12-09T03:06:38.689673Z",
"iopub.execute_input": "2023-12-09T03:06:38.690104Z",
"iopub.status.idle": "2023-12-09T03:06:38.701870Z",
"shell.execute_reply.started": "2023-12-09T03:06:38.690071Z",
"shell.execute_reply": "2023-12-09T03:06:38.700699Z"
},
"trusted": true,
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Gad-bU3qmh6a",
"outputId": "b91e3e03-8c82-4993-9f35-4c98313a07eb"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"textID 0\n",
"text 0\n",
"sentiment 0\n",
"dtype: int64"
]
},
"metadata": {},
"execution_count": 28
}
]
},
{
"cell_type": "code",
"source": [
"# Drop null values\n",
"test_data.dropna(inplace=True)"
],
"metadata": {
"execution": {
"iopub.status.busy": "2023-12-09T03:06:50.905069Z",
"iopub.execute_input": "2023-12-09T03:06:50.906112Z",
"iopub.status.idle": "2023-12-09T03:06:50.913867Z",
"shell.execute_reply.started": "2023-12-09T03:06:50.906067Z",
"shell.execute_reply": "2023-12-09T03:06:50.912656Z"
},
"trusted": true,
"id": "yD8fZWvnmh6a"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Count seniment values\n",
"train_data['sentiment'].value_counts()"
],
"metadata": {
"execution": {
"iopub.status.busy": "2023-12-09T03:14:49.588524Z",
"iopub.execute_input": "2023-12-09T03:14:49.589545Z",
"iopub.status.idle": "2023-12-09T03:14:49.604619Z",
"shell.execute_reply.started": "2023-12-09T03:14:49.589491Z",
"shell.execute_reply": "2023-12-09T03:14:49.603145Z"
},
"trusted": true,
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tIX5rua1mh6b",
"outputId": "d7611ee3-cd43-4e04-d405-7dbf3a8b6fcf"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"neutral 11117\n",
"positive 8582\n",
"negative 7781\n",
"Name: sentiment, dtype: int64"
]
},
"metadata": {},
"execution_count": 30
}
]
},
{
"cell_type": "code",
"source": [
"# Analysis text length\n",
"train_data['text_length'] = train_data['text'].apply(len)\n",
"train_data.groupby('sentiment')['text_length'].mean()"
],
"metadata": {
"execution": {
"iopub.status.busy": "2023-12-09T03:06:53.491565Z",
"iopub.execute_input": "2023-12-09T03:06:53.492804Z",
"iopub.status.idle": "2023-12-09T03:06:53.531364Z",
"shell.execute_reply.started": "2023-12-09T03:06:53.492759Z",
"shell.execute_reply": "2023-12-09T03:06:53.530181Z"
},
"trusted": true,
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gvJ2x3Rtmh6a",
"outputId": "3aff7c0a-44fe-4ca8-80c3-24d8cfd6867a"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"sentiment\n",
"negative 70.488112\n",
"neutral 65.206800\n",
"positive 70.419133\n",
"Name: text_length, dtype: float64"
]
},
"metadata": {},
"execution_count": 31
}
]
},
{
"cell_type": "code",
"source": [
"# Analysis selected text length\n",
"train_data['selected_text_length'] = train_data['selected_text'].apply(len)\n",
"train_data.groupby('sentiment')['selected_text_length'].mean()\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "YMMpNaeUnRag",
"outputId": "a551d4d5-ec7a-42a6-9814-63f4a4b08567"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"sentiment\n",
"negative 19.970698\n",
"neutral 62.765134\n",
"positive 18.124680\n",
"Name: selected_text_length, dtype: float64"
]
},
"metadata": {},
"execution_count": 32
}
]
},
{
"cell_type": "code",
"source": [
"# ANOVA results analysis\n",
"from scipy import stats\n",
"\n",
"f_val, p_val = stats.f_oneway(train_data[train_data['sentiment'] == 'positive']['text_length'],\n",
" train_data[train_data['sentiment'] == 'negative']['text_length'],\n",
" train_data[train_data['sentiment'] == 'neutral']['text_length'])\n",
"\n",
"print(\"ANOVA Test Results:\")\n",
"print(f\"F-statistic: {f_val}\")\n",
"print(f\"P-value: {p_val}\")\n",
"\n",
"# Interpret the results\n",
"alpha = 0.05\n",
"if p_val<alpha:\n",
" print(\"The means of at least two groups are significantly different.\")\n",
"else:\n",
" print(\"There is no significant difference in the means of the groups.\")"
],
"metadata": {
"execution": {
"iopub.status.busy": "2023-12-09T03:17:23.632685Z",
"iopub.execute_input": "2023-12-09T03:17:23.633161Z",
"iopub.status.idle": "2023-12-09T03:18:41.566408Z",
"shell.execute_reply.started": "2023-12-09T03:17:23.633128Z",
"shell.execute_reply": "2023-12-09T03:18:41.564637Z"
},
"trusted": true,
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8u8vnTYrmh6b",
"outputId": "59af0801-38f3-4f6f-e7d6-3e6ef11982bc"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"ANOVA Test Results:\n",
"F-statistic: 72.2127709711816\n",
"P-value: 5.254438748898152e-32\n",
"The means of at least two groups are significantly different.\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Data Processing"
],
"metadata": {
"id": "j5gAPl2Umh6b"
}
},
{
"cell_type": "code",
"source": [
"import re\n",
"import string\n",
"\n",
"def clean_text(text):\n",
" text = text.lower()\n",
" text = re.sub(r\"what's\", \"what is \", text)\n",
" text = re.sub(r\"\\'s\", \" \", text)\n",
" text = re.sub(r\"\\'ve\", \" have \", text)\n",
" text = re.sub(r\"can't\", \"cannot \", text)\n",
" text = re.sub(r\"n't\", \" not \", text)\n",
" return text\n",
"\n",
"train_data['text'] = train_data['text'].apply(clean_text)\n",
"test_data['text'] = test_data['text'].apply(clean_text)"
],
"metadata": {
"id": "BphYjewqpCaL"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from tensorflow.keras.preprocessing.text import Tokenizer\n",
"\n",
"# Tokenize the text\n",
"token = Tokenizer(num_words = 500)\n",
"token.fit_on_texts(train_data['text'])\n",
"\n",
"# Convert texts to sequence of integers\n",
"train_sequences = token.texts_to_sequences(train_data['text'])\n",
"test_sequences = token.texts_to_sequences(test_data['text'])\n",
"\n",
"# Convert labels to categorical one-hot encoding\n",
"train_labels = pd.get_dummies(train_data['sentiment']).values\n",
"test_labels = pd.get_dummies(test_data['sentiment']).values\n"
],
"metadata": {
"execution": {
"iopub.status.busy": "2023-12-09T03:21:59.215359Z",
"iopub.execute_input": "2023-12-09T03:21:59.215864Z",
"iopub.status.idle": "2023-12-09T03:22:00.003619Z",
"shell.execute_reply.started": "2023-12-09T03:21:59.215829Z",
"shell.execute_reply": "2023-12-09T03:22:00.001594Z"
},
"trusted": true,
"id": "1iM34W3Kmh6b"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Vectorize the text\n",
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"\n",
"max_len = max([len(x) for x in train_sequences])\n",
"train_padded = pad_sequences(train_sequences, maxlen = max_len, padding = \"post\", truncating = \"post\")\n",
"test_padded = pad_sequences(test_sequences, maxlen = max_len, padding = \"post\", truncating = \"post\")\n",
"\n",
"print(max_len)\n",
"print(train_padded.shape)\n",
"print(test_padded.shape)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "46S4ueo3mh6b",
"outputId": "7a345779-d3ea-4656-fdd4-65d819a0ffa2"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"32\n",
"(27480, 32)\n",
"(3534, 32)\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Build the model"
],
"metadata": {
"id": "_B7z4_rmsNbJ"
}
},
{
"cell_type": "code",
"source": [
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"from tensorflow.keras.layers import Embedding, LSTM, GRU, Dense, Dropout\n",
"from tensorflow.keras.callbacks import EarlyStopping\n",
"from tensorflow.keras.preprocessing.text import Tokenizer\n",
"from tensorflow.keras.utils import to_categorical\n",
"import numpy as np\n",
"import tensorflow as tf"
],
"metadata": {
"execution": {
"iopub.status.busy": "2023-12-08T08:48:28.404499Z",
"iopub.execute_input": "2023-12-08T08:48:28.404989Z",
"iopub.status.idle": "2023-12-08T08:48:28.756170Z",
"shell.execute_reply.started": "2023-12-08T08:48:28.404953Z",
"shell.execute_reply": "2023-12-08T08:48:28.754753Z"
},
"trusted": true,
"id": "vzFXiRI5mh6b"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)"
],
"metadata": {
"id": "rmY2dSgAI6wL"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def lstm_model(max_len):\n",
" model = Sequential()\n",
"\n",
" # Add embedding layer\n",
" model.add(Embedding(input_dim = 500, output_dim = 32, input_length = max_len))\n",
"\n",
" # Add LSTM layer\n",
" model.add(LSTM(64, return_sequences=True))\n",
" model.add(Dropout(0.5))\n",
"\n",
" # Add LSTM layer without returning sequences\n",
" model.add(LSTM(32))\n",
" model.add(Dropout(0.5))\n",
"\n",
" # Add dense layer\n",
" model.add(Dense(3, activation='sigmoid'))\n",
"\n",
" return model\n",
"\n",
"def gru_model(max_len):\n",
" model = Sequential()\n",
"\n",
" # Add embedding layer\n",
" model.add(Embedding(input_dim = 500, output_dim = 32, input_length = max_len))\n",
" # Add GRU layer\n",
" model.add(GRU(128, return_sequences=True))\n",
" model.add(Dropout(0.5))\n",
"\n",
" # Add GRU layer without returning sequences\n",
" model.add(GRU(32))\n",
" model.add(Dropout(0.5))\n",
"\n",
" # Add dense layer\n",
" model.add(Dense(3, activation='sigmoid'))\n",
"\n",
" return model"
],
"metadata": {
"id": "h51X_LSwsSd1"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"lstm = lstm_model(max_len)\n",
"lstm.summary()\n",
"# Complie the models\n",
"lstm.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n",
"gru_model(max_len).compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Z2YzFGqu0iIz",
"outputId": "c3bd49c0-6aba-418d-aac3-8174818b90ca"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Model: \"sequential\"\n",
"_________________________________________________________________\n",
" Layer (type) Output Shape Param # \n",
"=================================================================\n",
" embedding (Embedding) (None, 32, 32) 16000 \n",
" \n",
" lstm (LSTM) (None, 32, 64) 24832 \n",
" \n",
" dropout (Dropout) (None, 32, 64) 0 \n",
" \n",
" lstm_1 (LSTM) (None, 32) 12416 \n",
" \n",
" dropout_1 (Dropout) (None, 32) 0 \n",
" \n",
" dense (Dense) (None, 3) 99 \n",
" \n",
"=================================================================\n",
"Total params: 53347 (208.39 KB)\n",
"Trainable params: 53347 (208.39 KB)\n",
"Non-trainable params: 0 (0.00 Byte)\n",
"_________________________________________________________________\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"gru = gru_model(max_len)\n",
"gru.summary()\n",
"# Complie the models\n",
"gru.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "m5zd-TmeBb5p",
"outputId": "0907d4cb-5675-4cbd-e6ea-fc08bb9c6368"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Model: \"sequential_2\"\n",
"_________________________________________________________________\n",
" Layer (type) Output Shape Param # \n",
"=================================================================\n",
" embedding_2 (Embedding) (None, 32, 32) 16000 \n",
" \n",
" gru_2 (GRU) (None, 32, 128) 62208 \n",
" \n",
" dropout_4 (Dropout) (None, 32, 128) 0 \n",
" \n",
" gru_3 (GRU) (None, 32) 15552 \n",
" \n",
" dropout_5 (Dropout) (None, 32) 0 \n",
" \n",
" dense_2 (Dense) (None, 3) 99 \n",
" \n",
"=================================================================\n",
"Total params: 93859 (366.64 KB)\n",
"Trainable params: 93859 (366.64 KB)\n",
"Non-trainable params: 0 (0.00 Byte)\n",
"_________________________________________________________________\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Train the models"
],
"metadata": {
"id": "mSijiLQZ0dk3"
}
},
{
"cell_type": "code",
"source": [
"# Train the models with early stopping\n",
"early_stopping = EarlyStopping(patience=3, restore_best_weights=True)"
],
"metadata": {
"id": "w72HRLgx4e4Q"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"lstm_history = lstm.fit(train_padded, train_labels, epochs=10, validation_split=0.2)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BxLCU34-4gtj",
"outputId": "d2ccbde3-5a44-446c-ced1-d1f180276c1d"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Epoch 1/10\n",
"687/687 [==============================] - 32s 37ms/step - loss: 1.0219 - accuracy: 0.4775 - val_loss: 0.8188 - val_accuracy: 0.6527\n",
"Epoch 2/10\n",
"687/687 [==============================] - 23s 34ms/step - loss: 0.8102 - accuracy: 0.6598 - val_loss: 0.7817 - val_accuracy: 0.6652\n",
"Epoch 3/10\n",
"687/687 [==============================] - 25s 37ms/step - loss: 0.7768 - accuracy: 0.6731 - val_loss: 0.7642 - val_accuracy: 0.6678\n",
"Epoch 4/10\n",
"687/687 [==============================] - 24s 35ms/step - loss: 0.7589 - accuracy: 0.6784 - val_loss: 0.7515 - val_accuracy: 0.6765\n",
"Epoch 5/10\n",
"687/687 [==============================] - 23s 34ms/step - loss: 0.7517 - accuracy: 0.6802 - val_loss: 0.7628 - val_accuracy: 0.6752\n",
"Epoch 6/10\n",
"687/687 [==============================] - 24s 35ms/step - loss: 0.7412 - accuracy: 0.6848 - val_loss: 0.7506 - val_accuracy: 0.6809\n",
"Epoch 7/10\n",
"687/687 [==============================] - 23s 34ms/step - loss: 0.7348 - accuracy: 0.6894 - val_loss: 0.7580 - val_accuracy: 0.6674\n",
"Epoch 8/10\n",
"687/687 [==============================] - 23s 34ms/step - loss: 0.7282 - accuracy: 0.6905 - val_loss: 0.7830 - val_accuracy: 0.6590\n",
"Epoch 9/10\n",
"687/687 [==============================] - 23s 34ms/step - loss: 0.7207 - accuracy: 0.6943 - val_loss: 0.7708 - val_accuracy: 0.6661\n",
"Epoch 10/10\n",
"687/687 [==============================] - 23s 34ms/step - loss: 0.7152 - accuracy: 0.6964 - val_loss: 0.7541 - val_accuracy: 0.6774\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"gru_history= gru.fit(train_padded, train_labels, epochs=10, validation_split=0.2, callbacks=[early_stopping])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "u8js0eVyBut3",
"outputId": "bf75fabb-ff65-4b7d-d179-c655714fceb8"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Epoch 1/10\n",
"687/687 [==============================] - 40s 52ms/step - loss: 1.0896 - accuracy: 0.3977 - val_loss: 1.0874 - val_accuracy: 0.4127\n",
"Epoch 2/10\n",
"687/687 [==============================] - 38s 55ms/step - loss: 1.0894 - accuracy: 0.4025 - val_loss: 1.0858 - val_accuracy: 0.4127\n",
"Epoch 3/10\n",
"687/687 [==============================] - 35s 50ms/step - loss: 1.0880 - accuracy: 0.4026 - val_loss: 1.0870 - val_accuracy: 0.4127\n",
"Epoch 4/10\n",
"687/687 [==============================] - 38s 55ms/step - loss: 1.0884 - accuracy: 0.4028 - val_loss: 1.0857 - val_accuracy: 0.4127\n",
"Epoch 5/10\n",
"687/687 [==============================] - 35s 51ms/step - loss: 1.0883 - accuracy: 0.4025 - val_loss: 1.0855 - val_accuracy: 0.4127\n",
"Epoch 6/10\n",
"687/687 [==============================] - 37s 54ms/step - loss: 0.9598 - accuracy: 0.5236 - val_loss: 0.7822 - val_accuracy: 0.6658\n",
"Epoch 7/10\n",
"687/687 [==============================] - 35s 51ms/step - loss: 0.7834 - accuracy: 0.6618 - val_loss: 0.7622 - val_accuracy: 0.6738\n",
"Epoch 8/10\n",
"687/687 [==============================] - 37s 54ms/step - loss: 0.7639 - accuracy: 0.6724 - val_loss: 0.7650 - val_accuracy: 0.6721\n",
"Epoch 9/10\n",
"687/687 [==============================] - 35s 51ms/step - loss: 0.7415 - accuracy: 0.6831 - val_loss: 0.7536 - val_accuracy: 0.6734\n",
"Epoch 10/10\n",
"687/687 [==============================] - 37s 53ms/step - loss: 0.7278 - accuracy: 0.6897 - val_loss: 0.7486 - val_accuracy: 0.6772\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"def jaccard(str1, str2):\n",
" a = set(str1.lower().split())\n",
" b = set(str2.lower().split())\n",
" c = a.intersection(b)\n",
" return float(len(c)) / (len(a) + len(b) - len(c))\n",
"\n",
"def evaluate_model(model, tokenizer, data, true_texts):\n",
" \"\"\"\n",
" Evaluate the model using the Jaccard score.\n",
" - model: The trained model (LSTM or GRU)\n",
" - tokenizer: Tokenizer used for the model\n",
" - data: The input data for prediction (features)\n",
" - true_texts: The true output texts (labels)\n",
"\n",
" Returns the average Jaccard score for the dataset.\n",
" \"\"\"\n",
" # Generate predictions\n",
" predictions = model.predict(data)\n",
"\n",
" pred_texts = [\" \".join(tokenizer.sequences_to_texts([p])) for p in predictions]\n",
"\n",
" # Compute Jaccard scores\n",
" scores = [jaccard(pred, true) for pred, true in zip(pred_texts, true_texts)]\n",
"\n",
" # Return the average Jaccard score\n",
" return sum(scores) / len(scores)"
],
"metadata": {
"id": "UGLCcoKzBuDm"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Evaluate on test data\n",
"test_loss, test_acc = lstm.evaluate(test_padded, test_labels)\n",
"print(\"Test accuracy:\", test_acc)\n",
"\n",
"test_loss, test_acc = gru.evaluate(test_padded, test_labels)\n",
"print(\"Test accuracy:\", test_acc)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ZVbw5hb6C9Yg",
"outputId": "179c13e3-64bf-4117-8a44-c0e35978fa87"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"111/111 [==============================] - 2s 14ms/step - loss: 0.7518 - accuracy: 0.6672\n",
"Test accuracy: 0.6672325730323792\n",
"111/111 [==============================] - 2s 16ms/step - loss: 0.7480 - accuracy: 0.6692\n",
"Test accuracy: 0.6692133545875549\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Model accuracy\n",
"lstm_val_acc = max(lstm_history.history['val_accuracy'])\n",
"gru_val_acc = max(gru_history.history['val_accuracy'])\n",
"\n",
"print(\"LSTM validation accuracy:\", lstm_val_acc)\n",
"print(\"GRU validation accuracy:\", gru_val_acc)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fTkR4aemFpXS",
"outputId": "bf7b8065-4a28-44fb-ffe3-f006b7dd9fdc"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"LSTM validation accuracy: 0.6808587908744812\n",
"GRU validation accuracy: 0.677219808101654\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Confusion matrix\n",
"from sklearn.metrics import classification_report, confusion_matrix\n",
"lstm_pred = lstm.predict(test_padded)\n",
"gru_pred = gru.predict(test_padded)\n",
"\n",
"print(classification_report(test_labels.argmax(axis=1), lstm_pred.argmax(axis=1)))\n",
"print(classification_report(test_labels.argmax(axis=1), gru_pred.argmax(axis=1)))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Vu1OEfmuMd5S",
"outputId": "b866ec81-2d9f-440a-9cf5-3eadacc159bc"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"111/111 [==============================] - 2s 11ms/step\n",
"111/111 [==============================] - 2s 14ms/step\n",
" precision recall f1-score support\n",
"\n",
" 0 0.78 0.46 0.58 1001\n",
" 1 0.58 0.79 0.67 1430\n",
" 2 0.76 0.70 0.73 1103\n",
"\n",
" accuracy 0.67 3534\n",
" macro avg 0.71 0.65 0.66 3534\n",
"weighted avg 0.70 0.67 0.66 3534\n",
"\n",
" precision recall f1-score support\n",
"\n",
" 0 0.75 0.47 0.58 1001\n",
" 1 0.58 0.79 0.67 1430\n",
" 2 0.79 0.69 0.74 1103\n",
"\n",
" accuracy 0.67 3534\n",
" macro avg 0.71 0.65 0.66 3534\n",
"weighted avg 0.69 0.67 0.67 3534\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "25XNKkNYCQae"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from datetime import datetime\n",
"\n",
"def save_model(model, prefix =''):\n",
" # Get the current time and filename\n",
" current_time = datetime.now().strftime(\"%Y-%m-%d-%H%M%S\")\n",
" filename = f\"{prefix}model_{current_time}.csv\"\n",
" # Save the models\n",
" model.save(filename)\n",
" print(f\"Model saved to {filename}\")\n",
"\n",
"save_model(lstm, prefix = \"lstm\")\n",
"save_model(gru, prefix = \"gru\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LYqnTQa-bdqn",
"outputId": "e248dd33-fa4a-4c52-88c2-01d216316d18"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Model saved to lstmmodel_2024-03-04-015021.csv\n",
"Model saved to grumodel_2024-03-04-015029.csv\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!pip install --upgrade kaggle"
],
"metadata": {
"id": "vgjLVo-l_LO9"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"! kaggle competitions submit -c tweet-sentiment-extraction -f '/content/lstmmodel_2024-03-04-005517.csv' -m late-submission"
],
"metadata": {
"id": "kmaX09fo9Urz",
"outputId": "24134994-354d-4d1b-8ac2-9e7ab86e3994",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Traceback (most recent call last):\n",
" File \"/usr/local/bin/kaggle\", line 8, in <module>\n",
" sys.exit(main())\n",
" File \"/usr/local/lib/python3.10/dist-packages/kaggle/cli.py\", line 70, in main\n",
" out = args.func(**command_args)\n",
" File \"/usr/local/lib/python3.10/dist-packages/kaggle/api/kaggle_api_extended.py\", line 801, in competition_submit_cli\n",
" submit_result = self.competition_submit(file_name, message,\n",
" File \"/usr/local/lib/python3.10/dist-packages/kaggle/api/kaggle_api_extended.py\", line 752, in competition_submit\n",
" content_length=os.path.getsize(file_name),\n",
" File \"/usr/lib/python3.10/genericpath.py\", line 50, in getsize\n",
" return os.stat(filename).st_size\n",
"FileNotFoundError: [Errno 2] No such file or directory: '/content/lstmmodel_2024-03-04-005517.csv'\n"
]
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment