/rnn_attention_wp_1.ipynb Secret
Created
January 27, 2021 13:27
Revisions
-
TaiToTo created this gist
Jan 27, 2021 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,129 @@ { "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((24000, 16), (6000, 16), (24000, 11), (6000, 11))" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'''\n", " My article is not on NLP, so please just suppose that you get the four tensors below. \n", " These shapes mean that the max length of the input sentences is 16, \n", " and the max length of the target sentneces is 11. \n", "'''\n", "input_tensor_train.shape, input_tensor_val.shape, target_tensor_train.shape, target_tensor_val.shape" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1, 35, 550, ..., 0, 0, 0],\n", " [ 1, 17, 12, ..., 0, 0, 0],\n", " [ 1, 140, 32, ..., 0, 0, 0],\n", " ...,\n", " [ 1, 2803, 19, ..., 0, 0, 0],\n", " [ 1, 9, 8, ..., 0, 0, 0],\n", " [ 1, 8, 18, ..., 0, 0, 0]], dtype=int32)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'''\n", " Each sentence is implemented like below.\n", " Each row denotes a sentence, and each integer denotes a token, in this case a word.\n", "'''\n", "input_tensor_train" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 1, 35, 550, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0], dtype=int32)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'''\n", " Let's see the first Spanish sentence of training data. \n", " The integer '1' and '2' correspond to \"<start>\" and \"<end>\" respectively. \n", "'''\n", "input_tensor_train[0]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 ----> <start>\n", "35 ----> soy\n", "550 ----> doctor\n", "3 ----> .\n", "2 ----> <end>\n" ] } ], "source": [ "'''\n", " You can see what each code denotes with convert()\n", "'''\n", "convert(inp_lang, input_tensor_train[0])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 4 }