Skip to content

Instantly share code, notes, and snippets.

@seanbenhur
Created April 12, 2021 11:29
Show Gist options
  • Save seanbenhur/5bb90e537eacffd2964c53e70afd6dfd to your computer and use it in GitHub Desktop.
Save seanbenhur/5bb90e537eacffd2964c53e70afd6dfd to your computer and use it in GitHub Desktop.
FIRE Better-LSTM Torchtext.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "FIRE Better-LSTM Torchtext.ipynb",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true,
"mount_file_id": "1WlC9p143n9EoqfWdHXttbg74mKWtXYSG",
"authorship_tag": "ABX9TyOFVTH5DL3mAfoAblOObeIv",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/seanbenhur/5bb90e537eacffd2964c53e70afd6dfd/fire-better-lstm-torchtext.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "Ix-b5qONpzGE",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "ccaad94d-7701-47d9-a145-6b70c52046af"
},
"source": [
"!pip install torchtext==0.6 -q\n",
"!pip install wandb -q"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"\u001b[K |████████████████████████████████| 71kB 5.9MB/s \n",
"\u001b[K |████████████████████████████████| 1.2MB 10.4MB/s \n",
"\u001b[K |████████████████████████████████| 2.1MB 11.0MB/s \n",
"\u001b[K |████████████████████████████████| 133kB 40.4MB/s \n",
"\u001b[K |████████████████████████████████| 102kB 10.6MB/s \n",
"\u001b[K |████████████████████████████████| 163kB 38.9MB/s \n",
"\u001b[K |████████████████████████████████| 71kB 8.8MB/s \n",
"\u001b[?25h Building wheel for pathtools (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Building wheel for subprocess32 (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "3ML89TVbgF2B"
},
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torchtext\n",
"from torchtext import data\n",
"from sklearn.metrics import confusion_matrix,f1_score\n",
"import matplotlib.pyplot as plt \n",
"import seaborn as sns\n",
"from sklearn.metrics import classification_report\n",
"import pandas as pd\n",
"import torch.optim as optim\n",
"from torch.optim.lr_scheduler import OneCycleLR\n",
"import time\n",
"\n",
"data_path = \"/content/drive/MyDrive/data/FIRE2020/data/tanglish/data_cleaned.csv\""
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 66
},
"id": "Xa0-CR8Ox86g",
"outputId": "47ea25be-7ca0-4475-bfb3-b5f67a224b91"
},
"source": [
"import wandb\n",
"\n",
"wandb.login()"
],
"execution_count": 3,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/javascript": [
"\n",
" window._wandbApiKey = new Promise((resolve, reject) => {\n",
" function loadScript(url) {\n",
" return new Promise(function(resolve, reject) {\n",
" let newScript = document.createElement(\"script\");\n",
" newScript.onerror = reject;\n",
" newScript.onload = resolve;\n",
" document.body.appendChild(newScript);\n",
" newScript.src = url;\n",
" });\n",
" }\n",
" loadScript(\"https://cdn.jsdelivr.net/npm/postmate/build/postmate.min.js\").then(() => {\n",
" const iframe = document.createElement('iframe')\n",
" iframe.style.cssText = \"width:0;height:0;border:none\"\n",
" document.body.appendChild(iframe)\n",
" const handshake = new Postmate({\n",
" container: iframe,\n",
" url: 'https://wandb.ai/authorize'\n",
" });\n",
" const timeout = setTimeout(() => reject(\"Couldn't auto authenticate\"), 5000)\n",
" handshake.then(function(child) {\n",
" child.on('authorize', data => {\n",
" clearTimeout(timeout)\n",
" resolve(data)\n",
" });\n",
" });\n",
" })\n",
" });\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n"
],
"name": "stderr"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {
"tags": []
},
"execution_count": 3
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 591
},
"id": "uBXAH6HtxLg9",
"outputId": "f628be30-f257-4dd3-eeb4-63e96d07001a"
},
"source": [
"wandb.init(project=\"fire2020\")"
],
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"text": [
"\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mseanbenhur\u001b[0m (use `wandb login --relogin` to force relogin)\n"
],
"name": "stderr"
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" Tracking run with wandb version 0.10.25<br/>\n",
" Syncing run <strong style=\"color:#cdcd00\">cerulean-night-15</strong> to <a href=\"https://wandb.ai\" target=\"_blank\">Weights & Biases</a> <a href=\"https://docs.wandb.com/integrations/jupyter.html\" target=\"_blank\">(Documentation)</a>.<br/>\n",
" Project page: <a href=\"https://wandb.ai/seanbenhur/fire2020\" target=\"_blank\">https://wandb.ai/seanbenhur/fire2020</a><br/>\n",
" Run page: <a href=\"https://wandb.ai/seanbenhur/fire2020/runs/3cux37o1\" target=\"_blank\">https://wandb.ai/seanbenhur/fire2020/runs/3cux37o1</a><br/>\n",
" Run data is saved locally in <code>/content/wandb/run-20210412_104729-3cux37o1</code><br/><br/>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<wandb.sdk.wandb_run.Run at 0x7fe939946250>"
],
"text/html": [
"<h1>Run(3cux37o1)</h1><iframe src=\"https://wandb.ai/seanbenhur/fire2020/runs/3cux37o1\" style=\"border:none;width:100%;height:400px\"></iframe>"
]
},
"metadata": {
"tags": []
},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "_jwTRRVul5Jc",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 191
},
"outputId": "38fede9f-b783-43c3-d84b-568c2932d90d"
},
"source": [
"df = pd.read_csv(data_path)\n",
"df.head()"
],
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>review</th>\n",
" <th>category</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>trailer late ah parthavanga like podunga</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>move pathutu vanthu trailer pakurvnga yaru</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>puthupetai dhanush ah yarellam pathinga</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>dhanush oda character puthu sa erukay mass ta</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>vera level ippa pesungada mokka nu thalaivaaaaaa</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 review category\n",
"0 1 trailer late ah parthavanga like podunga 1\n",
"1 2 move pathutu vanthu trailer pakurvnga yaru 1\n",
"2 3 puthupetai dhanush ah yarellam pathinga 1\n",
"3 4 dhanush oda character puthu sa erukay mass ta 1\n",
"4 5 vera level ippa pesungada mokka nu thalaivaaaaaa 1"
]
},
"metadata": {
"tags": []
},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8LPD_87s3-vK",
"outputId": "ce50ccd4-4953-441a-d8ca-0331a73c5aa9"
},
"source": [
"df.category.value_counts()"
],
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"1 8484\n",
"0 1613\n",
"2 1424\n",
"Name: category, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "xgLZFqRRplEK"
},
"source": [
"TEXT = data.Field(tokenize=\"spacy\",include_lengths=True)\n",
"LABEL = data.LabelField()\n",
"fields = [(None,None),('text', TEXT),('label',LABEL)]"
],
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Cwx2CojvyH3U"
},
"source": [
"max_vocab_size = 10000\n",
"batch_size = 128\n",
"input_dim = 10002\n",
"embedding_dim = 300\n",
"hidden_dim = 256\n",
"output_dim = 3\n",
"n_layers = 2\n",
"bidirectional = True\n",
"dropout = 0.3\n",
"n_epochs = 50\n",
"patience=10\n",
"model_path = \"/content/drive/MyDrive/Tanglishfiremodels/better_lstm.pt\""
],
"execution_count": 34,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "P6Ja-i9cvtUY"
},
"source": [
"train_data, test_data = data.TabularDataset.splits(\n",
" path = '/content/drive/MyDrive/data/FIRE2020/data/tanglish',\n",
" train = 'train_cleaned.csv',\n",
" test = 'test_cleaned.csv',\n",
" format = 'csv',\n",
" fields = fields,\n",
" skip_header = True\n",
")"
],
"execution_count": 9,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "aIx51JXA5MuR",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "307ec139-c157-4e4a-bcd1-a68ea8efed5e"
},
"source": [
"TEXT.build_vocab(train_data, max_size = max_vocab_size,\n",
" vectors=\"glove.6B.300d\")\n",
"LABEL.build_vocab(train_data)"
],
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"text": [
".vector_cache/glove.6B.zip: 862MB [02:43, 5.28MB/s] \n",
"100%|█████████▉| 399852/400000 [00:52<00:00, 7727.20it/s]"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "JiLmO2CQyw9Y",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "317c4ba1-7d3f-4131-922a-37c91fd91900"
},
"source": [
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
"\n",
"train_iterator, test_iterator = data.BucketIterator.splits(\n",
" (train_data,test_data), \n",
" batch_size = batch_size,\n",
" device = device,\n",
" shuffle = True,\n",
" sort_within_batch=True,\n",
" sort_key=lambda x:len(x.text))"
],
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"text": [
"\r100%|█████████▉| 399852/400000 [01:09<00:00, 7727.20it/s]"
],
"name": "stderr"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "lZY8MQ_FWaPy"
},
"source": [
"### LSTM "
]
},
{
"cell_type": "code",
"metadata": {
"id": "WqAdv0fUnbtH"
},
"source": [
"from torch.nn.utils.rnn import PackedSequence\n",
"from typing import *\n",
"\n",
"\n",
"\n",
"\n",
"class VariationalDropout(nn.Module):\n",
" \"\"\"\n",
" Applies the same dropout mask across the temporal dimension\n",
" See https://arxiv.org/abs/1512.05287 for more details.\n",
" Note that this is not applied to the recurrent activations in the LSTM like the above paper.\n",
" Instead, it is applied to the inputs and outputs of the recurrent layer.\n",
" \"\"\"\n",
" def __init__(self, dropout: float, batch_first: Optional[bool]=False):\n",
" super().__init__()\n",
" self.dropout = dropout\n",
" self.batch_first = batch_first\n",
"\n",
" def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
" if not self.training or self.dropout <= 0.:\n",
" return x\n",
"\n",
" is_packed = isinstance(x, PackedSequence)\n",
" if is_packed:\n",
" x, batch_sizes = x\n",
" max_batch_size = int(batch_sizes[0])\n",
" else:\n",
" batch_sizes = None\n",
" max_batch_size = x.size(0)\n",
"\n",
" # Drop same mask across entire sequence\n",
" if self.batch_first:\n",
" m = x.new_empty(max_batch_size, 1, x.size(2), requires_grad=False).bernoulli_(1 - self.dropout)\n",
" else:\n",
" m = x.new_empty(1, max_batch_size, x.size(2), requires_grad=False).bernoulli_(1 - self.dropout)\n",
" x = x.masked_fill(m == 0, 0) / (1 - self.dropout)\n",
"\n",
" if is_packed:\n",
" return PackedSequence(x, batch_sizes)\n",
" else:\n",
" return x\n",
"\n",
"class LSTM(nn.LSTM):\n",
" def __init__(self, *args, dropouti: float=0.,\n",
" dropoutw: float=0., dropouto: float=0.,\n",
" batch_first=True, unit_forget_bias=True, **kwargs):\n",
" super().__init__(*args, **kwargs, batch_first=batch_first)\n",
" self.unit_forget_bias = unit_forget_bias\n",
" self.dropoutw = dropoutw\n",
" self.input_drop = VariationalDropout(dropouti,\n",
" batch_first=batch_first)\n",
" self.output_drop = VariationalDropout(dropouto,\n",
" batch_first=batch_first)\n",
" self._init_weights()\n",
"\n",
" def _init_weights(self):\n",
" \"\"\"\n",
" Use orthogonal init for recurrent layers, xavier uniform for input layers\n",
" Bias is 0 except for forget gate\n",
" \"\"\"\n",
" for name, param in self.named_parameters():\n",
" if \"weight_hh\" in name:\n",
" nn.init.orthogonal_(param.data)\n",
" elif \"weight_ih\" in name:\n",
" nn.init.xavier_uniform_(param.data)\n",
" elif \"bias\" in name and self.unit_forget_bias:\n",
" nn.init.zeros_(param.data)\n",
" param.data[self.hidden_size:2 * self.hidden_size] = 1\n",
"\n",
" def _drop_weights(self):\n",
" for name, param in self.named_parameters():\n",
" if \"weight_hh\" in name:\n",
" getattr(self, name).data = \\\n",
" torch.nn.functional.dropout(param.data, p=self.dropoutw,\n",
" training=self.training).contiguous()\n",
"\n",
" def forward(self, input, hx=None):\n",
" self._drop_weights()\n",
" input = self.input_drop(input)\n",
" seq, state = super().forward(input, hx=hx)\n",
" return self.output_drop(seq), state"
],
"execution_count": 14,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "a6zDFy8nvd6d"
},
"source": [
"class Better_LSTM(nn.Module):\n",
" def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, n_layers,\n",
" bidirectional, dropout):\n",
" super().__init__()\n",
"\n",
" self.embedding = nn.Embedding(input_dim, embedding_dim)\n",
" self.lstm = LSTM(embedding_dim,hidden_dim,n_layers,\n",
" dropoutw=0.2,bidirectional=bidirectional)\n",
" self.fc = nn.Linear(hidden_dim*2, output_dim)\n",
" self.dropout = nn.Dropout(dropout)\n",
"\n",
" def forward(self,text,text_lengths):\n",
" embedded = self.dropout(self.embedding(text))\n",
"\n",
" packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'))\n",
"\n",
" packed_output, (hidden, cell) = self.lstm(packed_embedded)\n",
"\n",
" output, output_lengths= nn.utils.rnn.pad_packed_sequence(packed_output)\n",
"\n",
" hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))\n",
" \n",
" return self.fc(hidden)"
],
"execution_count": 19,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "L98bD65of_tW"
},
"source": [
"model = Better_LSTM(input_dim,embedding_dim,hidden_dim,output_dim,\n",
" n_layers,bidirectional,dropout)"
],
"execution_count": 20,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "3bpa_LPEZLs_",
"outputId": "b655b450-6825-4bc3-c052-a0e43c46a0a2"
},
"source": [
"pretrained_embeddings = TEXT.vocab.vectors \n",
"print(pretrained_embeddings.shape)"
],
"execution_count": 21,
"outputs": [
{
"output_type": "stream",
"text": [
"torch.Size([10002, 300])\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "YXcM-QJiZhU_",
"outputId": "be32e9f9-854b-4096-ca96-9523561781d3"
},
"source": [
"model.embedding.weight.data.copy_(pretrained_embeddings)"
],
"execution_count": 22,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"tensor([[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],\n",
" [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],\n",
" [ 0.3746, -0.2232, 0.1993, ..., 0.2104, 0.5803, -0.3151],\n",
" ...,\n",
" [-0.4404, -0.0984, -0.1382, ..., -0.3334, -0.0566, 0.4416],\n",
" [-0.0151, -0.2532, 0.4837, ..., -0.0918, -0.5545, 0.5392],\n",
" [-0.6988, -0.0398, 0.0269, ..., 0.2145, 0.1833, 0.4843]])"
]
},
"metadata": {
"tags": []
},
"execution_count": 22
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "X6Pz8hhCkFCQ",
"outputId": "54d72137-3758-406b-e0af-86dc5b76d839"
},
"source": [
"model.to(device)"
],
"execution_count": 23,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Better_LSTM(\n",
" (embedding): Embedding(10002, 300)\n",
" (lstm): LSTM(\n",
" 300, 256, num_layers=2, batch_first=True, bidirectional=True\n",
" (input_drop): VariationalDropout()\n",
" (output_drop): VariationalDropout()\n",
" )\n",
" (fc): Linear(in_features=512, out_features=3, bias=True)\n",
" (dropout): Dropout(p=0.5, inplace=False)\n",
")"
]
},
"metadata": {
"tags": []
},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "QCgvQnHp37WD"
},
"source": [
"n_samples = [8484, 1613, 1424]\n",
"normed_weights = [1 - (x / sum(n_samples)) for x in n_samples]\n",
"normed_weights = torch.cuda.FloatTensor(normed_weights).to(device)"
],
"execution_count": 24,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Hhf2-ECliZMd"
},
"source": [
"optimizer = optim.AdamW(model.parameters())\n",
"criterion = nn.CrossEntropyLoss(weight=normed_weights)\n",
"\n",
"model = model.to(device)\n",
"criterion = criterion.to(device)"
],
"execution_count": 25,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "5K78oDsptYzf"
},
"source": [
"sched = OneCycleLR(optimizer,max_lr=0.1,\n",
" steps_per_epoch = len(train_iterator),epochs=n_epochs)"
],
"execution_count": 26,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "KZWoGuo0n4Eo"
},
"source": [
"class Trainer(object):\n",
"\n",
" def __init__(self,model,optimizer,criterion,scheduler):\n",
" #set model parameters\n",
" self.model = model\n",
" self.optimizer = optimizer\n",
" self.criterion = criterion\n",
" self.scheduler = scheduler\n",
"\n",
" def accuracy(self,y_pred,y):\n",
" max_preds = y_pred.argmax(dim=1,keepdim=True)\n",
" correct = max_preds.squeeze(1).eq(y)\n",
" return correct.sum()/torch.cuda.FloatTensor([y.shape[0]])\n",
"\n",
" def train_step(self,iterator):\n",
" epoch_loss = 0\n",
" epoch_acc = 0\n",
"\n",
" self.model.train()\n",
"\n",
" for batch in iterator:\n",
" self.optimizer.zero_grad()\n",
" text,text_lengths = batch.text\n",
" preds = self.model(text,text_lengths)\n",
" loss = self.criterion(preds,batch.label)\n",
" loss.backward()\n",
" self.optimizer.step()\n",
" self.scheduler.step()\n",
" acc = self.accuracy(preds,batch.label)\n",
"\n",
" epoch_loss += loss.item()\n",
" epoch_acc += acc.item()\n",
"\n",
" return epoch_loss/len(iterator),epoch_acc/len(iterator)\n",
"\n",
" def eval_step(self,iterator):\n",
" epoch_loss = 0\n",
" epoch_acc = 0\n",
"\n",
" self.model.eval()\n",
"\n",
" with torch.no_grad():\n",
" for batch in iterator:\n",
" text,text_lengths = batch.text\n",
" preds = self.model(text,text_lengths)\n",
" loss = self.criterion(preds,batch.label)\n",
" \n",
" acc = self.accuracy(preds,batch.label)\n",
"\n",
" epoch_loss += loss.item()\n",
" epoch_acc += acc.item()\n",
"\n",
" return epoch_loss/len(iterator),epoch_acc/len(iterator)\n",
"\n",
" def train(self,n_epochs,patience,train_iterator,test_iterator):\n",
" best_test_loss = float('inf')\n",
"\n",
" for epoch in range(n_epochs):\n",
" start_time = time.time()\n",
"\n",
" train_loss, train_acc = self.train_step(iterator=train_iterator)\n",
"\n",
" test_loss, test_acc = self.eval_step(iterator=test_iterator)\n",
"\n",
" #early stopping\n",
" if test_loss < best_test_loss:\n",
" best_test_loss = test_loss\n",
" best_model = self.model\n",
" _patience = patience \n",
" else:\n",
" _patience -= 1\n",
" if not _patience:\n",
" print(\"Stopping early\")\n",
" torch.save(self.model.state_dict(),model_path)\n",
" break\n",
"\n",
" \n",
" wandb.log(\n",
" {\"train_loss\": train_loss,\n",
" \"train_acc\": train_acc,\n",
" \"test_loss\": test_loss,\n",
" \"test_acc\": test_acc,\n",
" \"lr\": {self.optimizer.param_groups[0]['lr']:.2}\n",
" }\n",
" )\n",
"\n",
" print(\n",
" f\"Epoch: {epoch+1},\"\n",
" f\"train_loss: {train_loss},\"\n",
" f\"train_acc: {train_acc},\"\n",
" f\"test_loss: {test_loss},\"\n",
" f\"test_acc: {test_acc},\"\n",
" f\"lr: {self.optimizer.param_groups[0]['lr']:.2E}, \"\n",
" f\"_patience: {_patience}\"\n",
" )\n",
"\n",
" return best_model, best_test_loss"
],
"execution_count": 27,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "552FTw-sxsDu",
"outputId": "ae774f97-7d03-41ed-9d72-6481623f3fc2"
},
"source": [
"train = Trainer(model,optimizer,criterion,sched)\n",
"train.train(n_epochs,patience,train_iterator,test_iterator)"
],
"execution_count": 35,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py:665: UserWarning: RNN module weights are not part of single contiguous chunk of memory. This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters(). (Triggered internally at /pytorch/aten/src/ATen/native/cudnn/RNN.cpp:915.)\n",
" self.num_layers, self.dropout, self.training, self.bidirectional)\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"Epoch: 1,train_loss: 1.7926972000687211,train_acc: 0.4875889306451068,test_loss: 2.918101179599762,test_acc: 0.16605113744735717,lr: 4.70E-02, _patience: 10\n",
"Epoch: 2,train_loss: 1.9570771396895985,train_acc: 0.48770009404347264,test_loss: 1.3201668441295624,test_acc: 0.44232954680919645,lr: 5.71E-02, _patience: 10\n",
"Epoch: 3,train_loss: 2.307614058624079,train_acc: 0.47059073565918724,test_loss: 4.0237770795822145,test_acc: 0.16448863744735717,lr: 6.69E-02, _patience: 9\n",
"Epoch: 4,train_loss: 2.5190020549444503,train_acc: 0.46963276833663753,test_loss: 2.939633345603943,test_acc: 0.17386363744735717,lr: 7.61E-02, _patience: 8\n",
"Epoch: 5,train_loss: 2.586048439696983,train_acc: 0.4762617047922111,test_loss: 2.586015510559082,test_acc: 0.1541193187236786,lr: 8.42E-02, _patience: 7\n",
"Epoch: 6,train_loss: 2.8215487341821928,train_acc: 0.4677201034846129,test_loss: 3.112446355819702,test_acc: 0.1387784093618393,lr: 9.09E-02, _patience: 6\n",
"Epoch: 7,train_loss: 2.7936658138110313,train_acc: 0.46697465505129027,test_loss: 1.8694010615348815,test_acc: 0.5980823874473572,lr: 9.59E-02, _patience: 5\n",
"Epoch: 8,train_loss: 2.782842500710193,train_acc: 0.4599076034846129,test_loss: 3.5176677942276,test_acc: 0.7139204561710357,lr: 9.90E-02, _patience: 4\n",
"Epoch: 9,train_loss: 2.8129371169172686,train_acc: 0.4721012501804917,test_loss: 4.183954787254334,test_acc: 0.17230113744735717,lr: 1.00E-01, _patience: 3\n",
"Epoch: 10,train_loss: 2.6812899657237677,train_acc: 0.47845718264579773,test_loss: 1.9564464807510376,test_acc: 0.22464488744735717,lr: 9.98E-02, _patience: 2\n",
"Epoch: 11,train_loss: 2.5678039185794783,train_acc: 0.47429509295357597,test_loss: 4.279632806777954,test_acc: 0.20980113744735718,lr: 9.92E-02, _patience: 1\n",
"Stopping early\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(Better_LSTM(\n",
" (embedding): Embedding(10002, 300)\n",
" (lstm): LSTM(\n",
" 300, 256, num_layers=2, batch_first=True, bidirectional=True\n",
" (input_drop): VariationalDropout()\n",
" (output_drop): VariationalDropout()\n",
" )\n",
" (fc): Linear(in_features=512, out_features=3, bias=True)\n",
" (dropout): Dropout(p=0.5, inplace=False)\n",
" ), 1.3201668441295624)"
]
},
"metadata": {
"tags": []
},
"execution_count": 35
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "X7npxE3mz8Zh"
},
"source": [
"\n",
"import spacy\n",
"nlp = spacy.load(\"en_core_web_sm\")\n",
"\n",
"def predict_class(model,sentence):\n",
" model.eval()\n",
" tokenized = [tok.text for tok in nlp.tokenizer(sentence)]\n",
" vectorized = [TEXT.vocab.stoi[t] for t in tokenized]\n",
" length = [len(vectorized)]\n",
" tensor = torch.LongTensor(vectorized).to(device)\n",
" tensor = tensor.unsqueeze(1)\n",
" length_tensor = torch.LongTensor(length)\n",
" preds = model(tensor,length_tensor)\n",
" max_preds = preds.argmax(dim=1)\n",
" return max_preds.item()"
],
"execution_count": 36,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "A16dSD-l21fi"
},
"source": [
"#### positive -> 1 according to the model --> 0\n",
"#### negative -> 0 according to the model -->1\n",
"#### mixed feelings -> 2 according to the model --> 2"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "3ps2LuPYzeVj",
"outputId": "a29b882f-607b-455d-9499-519cb086acbe"
},
"source": [
"pred_class = predict_class(model, \"PAdam vera level thala\")\n",
"\n",
"print(f'Predicted class is: {pred_class}')"
],
"execution_count": 37,
"outputs": [
{
"output_type": "stream",
"text": [
"Predicted class is: 0\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py:665: UserWarning: RNN module weights are not part of single contiguous chunk of memory. This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters(). (Triggered internally at /pytorch/aten/src/ATen/native/cudnn/RNN.cpp:915.)\n",
" self.num_layers, self.dropout, self.training, self.bidirectional)\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "EyjaXk4dzkcN",
"outputId": "d88fb0f2-2979-41bc-9247-277c56d8d104"
},
"source": [
"pred_class = predict_class(model, \"enpt ku kaathu kadanthathellam oru nimishathula poiruchu\")\n",
"print(f'Predicted class is: {pred_class} = {LABEL.vocab.itos[pred_class]}')"
],
"execution_count": 38,
"outputs": [
{
"output_type": "stream",
"text": [
"Predicted class is: 0 = 1\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py:665: UserWarning: RNN module weights are not part of single contiguous chunk of memory. This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters(). (Triggered internally at /pytorch/aten/src/ATen/native/cudnn/RNN.cpp:915.)\n",
" self.num_layers, self.dropout, self.training, self.bidirectional)\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gbq6-ksg0yoc",
"outputId": "0d1a52c9-1a12-49c1-a616-b0b0d11918f6"
},
"source": [
"\n",
"pred_class = predict_class(model, \"\te adicha copy da ithu ellam avan seyal movie\")\n",
"print(f'Predicted class is: {pred_class} = {LABEL.vocab.itos[pred_class]}')"
],
"execution_count": 39,
"outputs": [
{
"output_type": "stream",
"text": [
"Predicted class is: 0 = 1\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py:665: UserWarning: RNN module weights are not part of single contiguous chunk of memory. This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters(). (Triggered internally at /pytorch/aten/src/ATen/native/cudnn/RNN.cpp:915.)\n",
" self.num_layers, self.dropout, self.training, self.bidirectional)\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "iHICX_Ij7-q_",
"outputId": "cfb72e6c-df5f-4339-d253-607e55c82171"
},
"source": [
"pred_class = predict_class(model, \"padam mokka\")\n",
"print(f'Predicted class is: {pred_class}')"
],
"execution_count": 40,
"outputs": [
{
"output_type": "stream",
"text": [
"Predicted class is: 2\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py:665: UserWarning: RNN module weights are not part of single contiguous chunk of memory. This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters(). (Triggered internally at /pytorch/aten/src/ATen/native/cudnn/RNN.cpp:915.)\n",
" self.num_layers, self.dropout, self.training, self.bidirectional)\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "rfzgvsow8ad7",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "7edcce47-6f1c-4597-e4c5-39c0f1142406"
},
"source": [
"model.load_state_dict(torch.load('/content/drive/MyDrive/Tanglishfiremodels/better_lstm.pt'))\n",
"\n",
"test_loss, test_acc = train.eval_step(test_iterator)\n",
"\n",
"print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')"
],
"execution_count": 44,
"outputs": [
{
"output_type": "stream",
"text": [
"Test Loss: 1.457 | Test Acc: 49.05%\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py:665: UserWarning: RNN module weights are not part of single contiguous chunk of memory. This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters(). (Triggered internally at /pytorch/aten/src/ATen/native/cudnn/RNN.cpp:915.)\n",
" self.num_layers, self.dropout, self.training, self.bidirectional)\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "tU1Pmcg5zRKp"
},
"source": [
"@torch.no_grad()\n",
"def get_predictions(model,iterator):\n",
" y_pred = []\n",
" y_true = []\n",
"\n",
" model.eval()\n",
" for batch in iterator:\n",
" text, text_lengths = batch.text \n",
" preds = model(text,text_lengths)\n",
" y_pred.extend(torch.argmax(preds,axis=-1).tolist())\n",
" y_true.extend(batch.label.tolist())\n",
" return y_pred,y_true"
],
"execution_count": 46,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Aqk9bhT50zSE",
"outputId": "8f247669-63ac-446c-cd75-13572d738727"
},
"source": [
"y_pred,y_true = get_predictions(model,test_iterator)"
],
"execution_count": 47,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py:665: UserWarning: RNN module weights are not part of single contiguous chunk of memory. This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters(). (Triggered internally at /pytorch/aten/src/ATen/native/cudnn/RNN.cpp:915.)\n",
" self.num_layers, self.dropout, self.training, self.bidirectional)\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1qIcItb91O3U",
"outputId": "c8f3824e-27cd-4702-afb5-e719908a98c2"
},
"source": [
"print('Classification Report:')\n",
"print(classification_report(y_true, y_pred))"
],
"execution_count": 49,
"outputs": [
{
"output_type": "stream",
"text": [
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" 0 0.77 0.55 0.65 857\n",
" 1 0.17 0.52 0.25 165\n",
" 2 0.08 0.02 0.03 141\n",
"\n",
" accuracy 0.48 1163\n",
" macro avg 0.34 0.37 0.31 1163\n",
"weighted avg 0.60 0.48 0.52 1163\n",
"\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "MLGY0MXN0BOR"
},
"source": [
"def get_predictions(model,iterator):\n",
" y_pred = []\n",
" y_true = []\n",
"\n",
" model.eval()\n",
" with torch.no_grad():\n",
" for batch in iterator:\n",
"\n",
" text,text_lengths = batch.text\n",
"\n",
" predictions = model(text,text_lengths)\n",
"\n",
" y_pred.extend(torch.argmax(predictions,axis=-1).tolist())\n",
" y_true.extend(batch.label.tolist())\n",
"\n",
" return y_pred,y_true"
],
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment