Last active
July 29, 2019 15:17
-
-
Save davidalbertonogueira/3afd4ebc07b23cbd7f63bc884ced16cb to your computer and use it in GitHub Desktop.
Pytorch POS (Part of Speech) Tagger (from: https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<torch._C.Generator at 0x7f6b280d3910>" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import torch\n", | |
"import torch.nn as nn\n", | |
"import torch.nn.functional as F\n", | |
"import torch.optim as optim\n", | |
"\n", | |
"torch.manual_seed(1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"tensor([[[-0.2696, 0.2599, -0.0758]],\n", | |
"\n", | |
" [[-0.4923, 0.1408, -0.0738]],\n", | |
"\n", | |
" [[-0.4523, 0.1241, -0.1461]],\n", | |
"\n", | |
" [[-0.3057, 0.1198, -0.0571]],\n", | |
"\n", | |
" [[-0.1077, 0.0289, -0.0487]]], grad_fn=<CatBackward>)\n", | |
"(tensor([[[-0.1077, 0.0289, -0.0487]]], grad_fn=<ViewBackward>), tensor([[[-0.1439, 0.1426, -0.2563]]], grad_fn=<ViewBackward>))\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py:38: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.5 and num_layers=1\n", | |
" \"num_layers={}\".format(dropout, num_layers))\n" | |
] | |
} | |
], | |
"source": [ | |
"lstm = nn.LSTM(input_size=3,\n", | |
" hidden_size=3,\n", | |
" num_layers=1, \n", | |
" dropout=0.5, \n", | |
" bidirectional=False) # Input dim is 3, output dim is 3\n", | |
"inputs = [torch.randn(1, 3) for _ in range(5)] # make a sequence of length 5\n", | |
"\n", | |
"\n", | |
"for i in inputs:\n", | |
" # Step through the sequence one element at a time.\n", | |
" # after each step, hidden contains the hidden state.\n", | |
" out, hidden = lstm(i.view(1, 1, -1))\n", | |
"\n", | |
"# alternatively, we can do the entire sequence all at once.\n", | |
"# the first value returned by LSTM is all of the hidden states throughout\n", | |
"# the sequence. the second is just the most recent hidden state\n", | |
"# (compare the last slice of \"out\" with \"hidden\" below, they are the same)\n", | |
"# The reason for this is that:\n", | |
"# \"out\" will give you access to all hidden states in the sequence\n", | |
"# \"hidden\" will allow you to continue the sequence and backpropagate,\n", | |
"# by passing it as an argument to the lstm at a later time\n", | |
"# Add the extra 2nd dimension\n", | |
"inputs = torch.cat(inputs).view(len(inputs), 1, -1)\n", | |
"hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3)) # clean out hidden state\n", | |
"out, hidden = lstm(inputs, hidden)\n", | |
"print(out)\n", | |
"print(hidden)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}\n" | |
] | |
} | |
], | |
"source": [ | |
"def prepare_sequence(seq, to_ix):\n", | |
" idxs = [to_ix[w] for w in seq]\n", | |
" return torch.tensor(idxs, dtype=torch.long)\n", | |
"\n", | |
"\n", | |
"training_data = [\n", | |
" (\"The dog ate the apple\".split(), [\"DET\", \"NN\", \"V\", \"DET\", \"NN\"]),\n", | |
" (\"Everybody read that book\".split(), [\"NN\", \"V\", \"DET\", \"NN\"])\n", | |
"]\n", | |
"word_to_ix = {}\n", | |
"for sent, tags in training_data:\n", | |
" for word in sent:\n", | |
" if word not in word_to_ix:\n", | |
" word_to_ix[word] = len(word_to_ix)\n", | |
"print(word_to_ix)\n", | |
"tag_to_ix = {\"DET\": 0, \"NN\": 1, \"V\": 2}\n", | |
"\n", | |
"# These will usually be more like 32 or 64 dimensional.\n", | |
"# We will keep them small, so we can see how the weights change as we train.\n", | |
"EMBEDDING_DIM = 6\n", | |
"HIDDEN_DIM = 6" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class LSTMTagger(nn.Module):\n", | |
"\n", | |
" def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):\n", | |
" super(LSTMTagger, self).__init__()\n", | |
" self.hidden_dim = hidden_dim\n", | |
"\n", | |
" self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)\n", | |
"\n", | |
" # The LSTM takes word embeddings as inputs, and outputs hidden states\n", | |
" # with dimensionality hidden_dim.\n", | |
" self.lstm = nn.LSTM(embedding_dim, hidden_dim)\n", | |
"\n", | |
" # The linear layer that maps from hidden state space to tag space\n", | |
" self.hidden2tag = nn.Linear(hidden_dim, tagset_size)\n", | |
" self.hidden = self.init_hidden()\n", | |
"\n", | |
" def init_hidden(self):\n", | |
" # Before we've done anything, we dont have any hidden state.\n", | |
" # Refer to the Pytorch documentation to see exactly\n", | |
" # why they have this dimensionality.\n", | |
" # The axes semantics are (num_layers, minibatch_size, hidden_dim)\n", | |
" return (torch.zeros(1, 1, self.hidden_dim),\n", | |
" torch.zeros(1, 1, self.hidden_dim))\n", | |
"\n", | |
" def forward(self, sentence):\n", | |
" embeds = self.word_embeddings(sentence)\n", | |
" lstm_out, self.hidden = self.lstm(\n", | |
" embeds.view(len(sentence), 1, -1), self.hidden)\n", | |
" tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))\n", | |
" tag_scores = F.log_softmax(tag_space, dim=1)\n", | |
" return tag_scores" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"tensor([[-0.9909, -1.3892, -0.9690],\n", | |
" [-0.9636, -1.3335, -1.0358],\n", | |
" [-1.0044, -1.4001, -0.9489],\n", | |
" [-1.0038, -1.4218, -0.9359],\n", | |
" [-0.9848, -1.3414, -1.0079]])\n" | |
] | |
} | |
], | |
"source": [ | |
"model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))\n", | |
"loss_function = nn.NLLLoss()\n", | |
"optimizer = optim.SGD(model.parameters(), lr=0.1)\n", | |
"\n", | |
"# See what the scores are before training\n", | |
"# Note that element i,j of the output is the score for tag j for word i.\n", | |
"# Here we don't need to train, so the code is wrapped in torch.no_grad()\n", | |
"with torch.no_grad():\n", | |
" inputs = prepare_sequence(training_data[0][0], word_to_ix)\n", | |
" tag_scores = model(inputs)\n", | |
" print(tag_scores)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"tensor([[-0.1939, -2.4660, -2.3930],\n", | |
" [-5.3511, -0.0134, -4.7559],\n", | |
" [-3.6594, -4.6389, -0.0361],\n", | |
" [-0.0260, -4.6137, -4.1531],\n", | |
" [-5.0082, -0.0132, -5.0457]])\n" | |
] | |
} | |
], | |
"source": [ | |
"\n", | |
"for epoch in range(300): # again, normally you would NOT do 300 epochs, it is toy data\n", | |
" for sentence, tags in training_data:\n", | |
" # Step 1. Remember that Pytorch accumulates gradients.\n", | |
" # We need to clear them out before each instance\n", | |
" model.zero_grad()\n", | |
"\n", | |
" # Also, we need to clear out the hidden state of the LSTM,\n", | |
" # detaching it from its history on the last instance.\n", | |
" model.hidden = model.init_hidden()\n", | |
"\n", | |
" # Step 2. Get our inputs ready for the network, that is, turn them into\n", | |
" # Tensors of word indices.\n", | |
" sentence_in = prepare_sequence(sentence, word_to_ix)\n", | |
" targets = prepare_sequence(tags, tag_to_ix)\n", | |
"\n", | |
" # Step 3. Run our forward pass.\n", | |
" tag_scores = model(sentence_in)\n", | |
"\n", | |
" # Step 4. Compute the loss, gradients, and update the parameters by\n", | |
" # calling optimizer.step()\n", | |
" loss = loss_function(tag_scores, targets)\n", | |
" loss.backward()\n", | |
" optimizer.step()\n", | |
"\n", | |
"# See what the scores are after training\n", | |
"with torch.no_grad():\n", | |
" inputs = prepare_sequence(training_data[0][0], word_to_ix)\n", | |
" tag_scores = model(inputs)\n", | |
"\n", | |
" # The sentence is \"the dog ate the apple\". i,j corresponds to score for tag j\n", | |
" # for word i. The predicted tag is the maximum scoring tag.\n", | |
" # Here, we can see the predicted sequence below is 0 1 2 0 1\n", | |
" # since 0 is index of the maximum value of row 1,\n", | |
" # 1 is the index of maximum value of row 2, etc.\n", | |
" # Which is DET NOUN VERB DET NOUN, the correct sequence!\n", | |
" print(tag_scores)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment