YasuThompson/translator_wp_3.ipynb Secret

## translator_wp_3.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-09-27T01:27:47.394726Z",
     "iopub.status.busy": "2020-09-27T01:27:47.394164Z",
     "iopub.status.idle": "2020-09-27T01:27:47.395964Z",
     "shell.execute_reply": "2020-09-27T01:27:47.396326Z"
    },
    "id": "PED3bIpOYkBu"
   },
   "outputs": [],
   "source": [
    "class Transformer(tf.keras.Model):\n",
    "  def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, \n",
    "               target_vocab_size, pe_input, pe_target, rate=0.1):\n",
    "    super(Transformer, self).__init__()\n",
    "\n",
    "    self.encoder = Encoder(num_layers, d_model, num_heads, dff, \n",
    "                           input_vocab_size, pe_input, rate)\n",
    "\n",
    "    self.decoder = Decoder(num_layers, d_model, num_heads, dff, \n",
    "                           target_vocab_size, pe_target, rate)\n",
    "\n",
    "    self.final_layer = tf.keras.layers.Dense(target_vocab_size)\n",
    "    \n",
    "  def call(self, inp, tar, training, enc_padding_mask, \n",
    "           look_ahead_mask, dec_padding_mask):\n",
    "\n",
    "    enc_output = self.encoder(inp, training, enc_padding_mask)  # (batch_size, inp_seq_len, d_model)\n",
    "    \n",
    "    '''\n",
    "    The output of the last layer of the encoder is passed to all the layers of the decoder. \n",
    "    '''\n",
    "    #dec_output.shape == (batch_size, tar_seq_len, d_model)\n",
    "    dec_output, attention_weights = self.decoder(\n",
    "        tar, enc_output, training, look_ahead_mask, dec_padding_mask)\n",
    "    \n",
    "    '''\n",
    "    Tee final part of Transformer model. In case of machine translation, you predict a \n",
    "    'target_vocab_size' dimensional vector at every potition of the target sentence. \n",
    "    '''\n",
    "    final_output = self.final_layer(dec_output)  # (batch_size, tar_seq_len, target_vocab_size)\n",
    "    \n",
    "    return final_output, attention_weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_layers = 4\n",
    "d_model = 128\n",
    "dff = 512\n",
    "num_heads = 8\n",
    "\n",
    "input_vocab_size = 10000 + 2\n",
    "target_vocab_size = 10000 + 2\n",
    "dropout_rate = 0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "sample_transformer = Transformer(num_layers, d_model, num_heads, dff,\n",
    "                          input_vocab_size, target_vocab_size, \n",
    "                          pe_input=input_vocab_size, \n",
    "                          pe_target=target_vocab_size,\n",
    "                          rate=dropout_rate)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(64, 37, 10002)\n"
     ]
    }
   ],
   "source": [
    "# Let's put in sample inputs and targets in the sample Transformer model. \n",
    "# In this case, the max length of the input sentences is 38, and that of targets is 37. \n",
    "# In practice, all the elements of 'sample_input' and 'sample_target'  are integers. \n",
    "sample_input = tf.random.uniform((64, 38), dtype=tf.int64, minval=0, maxval=200)\n",
    "sample_target = tf.random.uniform((64, 37), dtype=tf.int64, minval=0, maxval=200)\n",
    "\n",
    "fn_out, _ = sample_transformer(sample_input, sample_target, training=False, \n",
    "                               enc_padding_mask=None, \n",
    "                               look_ahead_mask=None,\n",
    "                               dec_padding_mask=None)\n",
    "\n",
    "print(fn_out.shape)  # (batch_size, tar_seq_len, target_vocab_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# As you can see, each target entences is a (37, 10002) sized matrix. "
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [
    "s_qNSzzyaCbD"
   ],
   "name": "transformer.ipynb",
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {
	"execution": {
	"iopub.execute_input": "2020-09-27T01:27:47.394726Z",
	"iopub.status.busy": "2020-09-27T01:27:47.394164Z",
	"iopub.status.idle": "2020-09-27T01:27:47.395964Z",
	"shell.execute_reply": "2020-09-27T01:27:47.396326Z"
	},
	"id": "PED3bIpOYkBu"
	},
	"outputs": [],
	"source": [
	"class Transformer(tf.keras.Model):\n",
	" def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, \n",
	" target_vocab_size, pe_input, pe_target, rate=0.1):\n",
	" super(Transformer, self).__init__()\n",
	"\n",
	" self.encoder = Encoder(num_layers, d_model, num_heads, dff, \n",
	" input_vocab_size, pe_input, rate)\n",
	"\n",
	" self.decoder = Decoder(num_layers, d_model, num_heads, dff, \n",
	" target_vocab_size, pe_target, rate)\n",
	"\n",
	" self.final_layer = tf.keras.layers.Dense(target_vocab_size)\n",
	" \n",
	" def call(self, inp, tar, training, enc_padding_mask, \n",
	" look_ahead_mask, dec_padding_mask):\n",
	"\n",
	" enc_output = self.encoder(inp, training, enc_padding_mask) # (batch_size, inp_seq_len, d_model)\n",
	" \n",
	" '''\n",
	" The output of the last layer of the encoder is passed to all the layers of the decoder. \n",
	" '''\n",
	" #dec_output.shape == (batch_size, tar_seq_len, d_model)\n",
	" dec_output, attention_weights = self.decoder(\n",
	" tar, enc_output, training, look_ahead_mask, dec_padding_mask)\n",
	" \n",
	" '''\n",
	" Tee final part of Transformer model. In case of machine translation, you predict a \n",
	" 'target_vocab_size' dimensional vector at every potition of the target sentence. \n",
	" '''\n",
	" final_output = self.final_layer(dec_output) # (batch_size, tar_seq_len, target_vocab_size)\n",
	" \n",
	" return final_output, attention_weights"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [],
	"source": [
	"num_layers = 4\n",
	"d_model = 128\n",
	"dff = 512\n",
	"num_heads = 8\n",
	"\n",
	"input_vocab_size = 10000 + 2\n",
	"target_vocab_size = 10000 + 2\n",
	"dropout_rate = 0.1"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"sample_transformer = Transformer(num_layers, d_model, num_heads, dff,\n",
	" input_vocab_size, target_vocab_size, \n",
	" pe_input=input_vocab_size, \n",
	" pe_target=target_vocab_size,\n",
	" rate=dropout_rate)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {
	"scrolled": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(64, 37, 10002)\n"
	]
	}
	],
	"source": [
	"# Let's put in sample inputs and targets in the sample Transformer model. \n",
	"# In this case, the max length of the input sentences is 38, and that of targets is 37. \n",
	"# In practice, all the elements of 'sample_input' and 'sample_target' are integers. \n",
	"sample_input = tf.random.uniform((64, 38), dtype=tf.int64, minval=0, maxval=200)\n",
	"sample_target = tf.random.uniform((64, 37), dtype=tf.int64, minval=0, maxval=200)\n",
	"\n",
	"fn_out, _ = sample_transformer(sample_input, sample_target, training=False, \n",
	" enc_padding_mask=None, \n",
	" look_ahead_mask=None,\n",
	" dec_padding_mask=None)\n",
	"\n",
	"print(fn_out.shape) # (batch_size, tar_seq_len, target_vocab_size)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# As you can see, each target entences is a (37, 10002) sized matrix. "
	]
	}
	],
	"metadata": {
	"accelerator": "GPU",
	"colab": {
	"collapsed_sections": [
	"s_qNSzzyaCbD"
	],
	"name": "transformer.ipynb",
	"toc_visible": true
	},
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.8"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 1
	}