Skip to content

Instantly share code, notes, and snippets.

@seopbo
Last active June 24, 2018 09:32
Show Gist options
  • Save seopbo/86bbb462724269deee15ebafd1ca1ed4 to your computer and use it in GitHub Desktop.
Save seopbo/86bbb462724269deee15ebafd1ca1ed4 to your computer and use it in GitHub Desktop.
백수콘(180624)에서 "빠르게 구현하는 RNN" 슬라이드의 code snippets
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 빠르게 구현하는 RNN\n",
"18.06.24에 백수콘에서 ***\"빠르게 구현하는 RNN\"***이라는 주제로 발표한 슬라이드의 보충자료입니다."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"import numpy as np\n",
"from pprint import pprint"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Intro"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[3, 4, 7, 5]\n"
]
}
],
"source": [
"# 문장의 단어를 RNN에 하나하나씩 넣는다고 하면?\n",
"sentences = [['I', 'feel', 'hungry'],\n",
" ['tensorflow', 'is', 'very', 'difficult'],\n",
" ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],\n",
" ['tensorflow', 'is', 'very', 'fast', 'changing']]\n",
"\n",
"# RNN은 아래처럼 각 문장 별로 단어의 개수만큼 sequence를 처리해야한다.\n",
"# --> variable sequence length!\n",
"print(list(map(lambda word : len(word), sentences)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Intro : Padding"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'<pad>': 0,\n",
" 'I': 1,\n",
" 'a': 2,\n",
" 'changing': 3,\n",
" 'deep': 4,\n",
" 'difficult': 5,\n",
" 'fast': 6,\n",
" 'feel': 7,\n",
" 'for': 8,\n",
" 'framework': 9,\n",
" 'hungry': 10,\n",
" 'is': 11,\n",
" 'learning': 12,\n",
" 'tensorflow': 13,\n",
" 'very': 14}\n"
]
}
],
"source": [
"# word dic\n",
"word_list = []\n",
"for elm in sentences:\n",
" word_list += elm\n",
"word_list = list(set(word_list))\n",
"word_list.sort()\n",
"word_list = ['<pad>'] + word_list # '<pad>'라는 의미없는 token 추가\n",
"word_dic = {word : idx for idx, word in enumerate(word_list)}\n",
"pprint(word_dic)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# max_len의 길이에 못미치는 문장은 <pad>로 max_len만큼 padding\n",
"def pad_seq(sequences, max_len, dic):\n",
" seq_len, seq_indices = [], []\n",
" for seq in sequences:\n",
" seq_len.append(len(seq))\n",
" seq_idx = [dic.get(char) for char in seq]\n",
" seq_idx += (max_len - len(seq_idx)) * [dic.get('<pad>')] # 0 is idx of meaningless token \"<pad>\"\n",
" seq_indices.append(seq_idx)\n",
" return seq_len, seq_indices"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[3, 4, 7, 5]\n",
"[[1, 7, 10, 0, 0, 0, 0, 0],\n",
" [13, 11, 14, 5, 0, 0, 0, 0],\n",
" [13, 11, 2, 9, 8, 4, 12, 0],\n",
" [13, 11, 14, 6, 3, 0, 0, 0]]\n"
]
}
],
"source": [
"max_length = 8\n",
"sen_len, sen_indices = pad_seq(sequences = sentences, max_len = max_length, dic = word_dic)\n",
"pprint(sen_len)\n",
"pprint(sen_indices)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Intro : Padding"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n",
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n",
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n",
" trainable = False) # embedding vector training 안할 것이기 때문에~\n",
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 8)\n",
"(4, 8, 15)\n"
]
}
],
"source": [
"with tf.Session() as sess:\n",
" sess.run(tf.global_variables_initializer())\n",
" tmp = sess.run(seq_batch, feed_dict = {seq_indices : sen_indices})\n",
"print(np.shape(sen_indices))\n",
"print(np.shape(tmp)) # tf.nn.dynamic_rnn, tf.contrib.seq2seq.TrainingHelper 등에 이 shape을 유지하면서 전달되어야함"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],\n",
" [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],\n",
" dtype=float32)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tmp[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Many to One"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Many to One : Example data"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[3, 4, 7, 5]\n",
"[[1, 7, 10, 0, 0, 0, 0, 0],\n",
" [13, 11, 14, 5, 0, 0, 0, 0],\n",
" [13, 11, 2, 9, 8, 4, 12, 0],\n",
" [13, 11, 14, 6, 3, 0, 0, 0]]\n"
]
}
],
"source": [
"tf.reset_default_graph()\n",
"\n",
"sentences = [['I', 'feel', 'hungry'],\n",
" ['tensorflow', 'is', 'very', 'difficult'],\n",
" ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],\n",
" ['tensorflow', 'is', 'very', 'fast', 'changing']]\n",
"y = [[0.,1.], [0.,1.], [1.,0.], [1.,0.]]\n",
"max_length = 8\n",
"\n",
"sen_len, sen_indices = pad_seq(sequences = sentences, max_len = max_length, dic = word_dic)\n",
"\n",
"pprint(sen_len)\n",
"pprint(sen_indices)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Many to One : Simple"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"max_length = 8\n",
"h_dim = 2\n",
"n_of_classes = 2\n",
"\n",
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n",
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n",
"label = tf.placeholder(dtype = tf.float32, shape = [None, 2])\n",
"\n",
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n",
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n",
" trainable = False)\n",
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 2) dtype=float32>\n",
"<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>\n"
]
}
],
"source": [
"gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n",
"_, state = tf.nn.dynamic_rnn(cell = gru_cell, inputs = seq_batch, sequence_length = seq_len,\n",
" dtype = tf.float32)\n",
"pprint(_)\n",
"pprint(state)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"score = tf.layers.dense(inputs = state, units = n_of_classes)\n",
"ce_loss = tf.losses.softmax_cross_entropy(onehot_labels=label,\n",
" logits = score)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[array([[[ 0.09504685, -0.02786257],\n",
" [ 0.05727357, -0.08943594],\n",
" [ 0.11590318, -0.1923188 ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ]]], dtype=float32),\n",
" array([[ 0.11590318, -0.1923188 ]], dtype=float32)]\n"
]
}
],
"source": [
"sess = tf.Session()\n",
"sess.run(tf.global_variables_initializer())\n",
"pprint(sess.run([_, state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.6566503\n"
]
}
],
"source": [
"pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n",
" label : y}))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"sess.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Many to One : Stacked"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"tf.reset_default_graph()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"max_length = 8\n",
"n_of_classes = 2\n",
"\n",
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n",
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n",
"label = tf.placeholder(dtype = tf.float32, shape = [None, 2])\n",
"keep_prob = tf.placeholder(dtype = tf.float32)\n",
"\n",
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n",
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n",
" trainable = False)\n",
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"h_dims = [2,2]\n",
"gru_cells = []\n",
"for h_dim in h_dims:\n",
" gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n",
" gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell,\n",
" output_keep_prob = keep_prob)\n",
" gru_cells.append(gru_cell)\n",
"else:\n",
" gru_cells = tf.contrib.rnn.MultiRNNCell(cells = gru_cells)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 2) dtype=float32>\n",
"(<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n",
" <tf.Tensor 'rnn/while/Exit_4:0' shape=(?, 2) dtype=float32>)\n"
]
}
],
"source": [
"_, state = tf.nn.dynamic_rnn(cell = gru_cells, inputs = seq_batch, sequence_length = seq_len,\n",
" dtype = tf.float32)\n",
"pprint(_)\n",
"pprint(state)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"score = tf.layers.dense(inputs = state[-1], units = n_of_classes)\n",
"ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = label, logits = score)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[array([[[-0.00199929, -0.01550121],\n",
" [ 0.00384277, -0.01441574],\n",
" [ 0.0184861 , -0.00312071],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ]]], dtype=float32),\n",
" (array([[-0.10920699, -0.25967512]], dtype=float32),\n",
" array([[ 0.0184861 , -0.00312071]], dtype=float32))]\n"
]
}
],
"source": [
"sess = tf.Session()\n",
"sess.run(tf.global_variables_initializer())\n",
"pprint(sess.run([_, state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],\n",
" keep_prob : 1.}))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.7151346\n"
]
}
],
"source": [
"pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n",
" label : y, keep_prob : 1.}))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"sess.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Many to One : Bi-directional"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"tf.reset_default_graph()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"max_length = 8\n",
"h_dim = 2\n",
"n_of_classes = 2\n",
"\n",
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n",
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n",
"label = tf.placeholder(dtype = tf.float32, shape = [None, 2])\n",
"\n",
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n",
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n",
" trainable = False)\n",
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose_1:0' shape=(?, 8, 2) dtype=float32>,\n",
" <tf.Tensor 'ReverseSequence:0' shape=(?, 8, 2) dtype=float32>)\n",
"(<tf.Tensor 'bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n",
" <tf.Tensor 'bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)\n"
]
}
],
"source": [
"gru_fw_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n",
"gru_bw_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n",
"\n",
"_, output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw = gru_fw_cell, cell_bw = gru_bw_cell,\n",
" inputs = seq_batch, sequence_length = seq_len, dtype = tf.float32)\n",
"pprint(_)\n",
"pprint(output_states)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"# fw_cell의 final state와 bw_cell의 final state를 concatenate\n",
"concat_state = tf.concat(values = [output_states[0],\n",
" output_states[-1]],\n",
" axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"score = tf.layers.dense(inputs = concat_state, units = n_of_classes)\n",
"ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = label, logits = score)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(array([[[-0.06784903, -0.09155025],\n",
" [-0.05353671, -0.08901785],\n",
" [-0.06768034, -0.2230948 ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ]]], dtype=float32),\n",
" array([[[-0.11015528, 0.0082678 ],\n",
" [ 0.06593657, -0.03157877],\n",
" [ 0.04607821, 0.03097701],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ],\n",
" [ 0. , 0. ]]], dtype=float32)),\n",
" array([[-0.06768034, -0.2230948 , -0.11015528, 0.0082678 ]],\n",
" dtype=float32)]\n"
]
}
],
"source": [
"sess = tf.Session()\n",
"sess.run(tf.global_variables_initializer())\n",
"pprint(sess.run([_, concat_state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.77225125\n"
]
}
],
"source": [
"pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n",
" label : y}))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Many to One : Stacked Bi-directional"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"tf.reset_default_graph()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"max_length = 8\n",
"n_of_classes = 2\n",
"\n",
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n",
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n",
"label = tf.placeholder(dtype = tf.float32, shape = [None, 2])\n",
"keep_prob = tf.placeholder(dtype = tf.float32)\n",
"\n",
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n",
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n",
" trainable = False)\n",
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"h_dims = [2,2]\n",
"gru_fw_cells, gru_bw_cells = [], []\n",
"\n",
"# forward\n",
"for h_dim in h_dims:\n",
" gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n",
" gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)\n",
" gru_fw_cells.append(gru_cell)\n",
" \n",
"# backward\n",
"for h_dim in h_dims:\n",
" gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n",
" gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)\n",
" gru_bw_cells.append(gru_cell)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<tf.Tensor 'stack_bidirectional_rnn/cell_1/concat:0' shape=(?, 8, 4) dtype=float32>\n",
"(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n",
" <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>)\n",
"(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n",
" <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)\n"
]
}
],
"source": [
"outputs, output_state_fw, output_state_bw = \\\n",
"tf.contrib.rnn.stack_bidirectional_dynamic_rnn(cells_fw = gru_fw_cells, cells_bw = gru_bw_cells,\n",
" inputs = seq_batch, sequence_length = seq_len,\n",
" dtype = tf.float32)\n",
"pprint(outputs)\n",
"pprint(output_state_fw)\n",
"pprint(output_state_bw)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"concat_state = tf.concat(values=[output_state_fw[-1],output_state_bw[-1]], axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"score = tf.layers.dense(inputs = concat_state, units = n_of_classes)\n",
"ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = label, logits = score)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[array([[[ 0.01751452, -0.01348923, 0.01666009, 0.01491449],\n",
" [ 0.01305745, -0.0105529 , 0.00292257, 0.02082924],\n",
" [-0.00403357, -0.00203712, 0.00713446, 0.02249983],\n",
" [ 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ]]],\n",
" dtype=float32),\n",
" array([[-0.00403357, -0.00203712, 0.01666009, 0.01491449]],\n",
" dtype=float32)]\n"
]
}
],
"source": [
"sess = tf.Session()\n",
"sess.run(tf.global_variables_initializer())\n",
"pprint(sess.run([outputs, concat_state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],\n",
" keep_prob : 1.}))"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.69006765\n"
]
}
],
"source": [
"pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n",
" label : y, keep_prob : 1.}))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Many to Many"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Many to Many : Example data"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"sentences = [['I', 'feel', 'hungry'],\n",
" ['tensorflow', 'is', 'very', 'difficult'],\n",
" ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],\n",
" ['tensorflow', 'is', 'very', 'fast', 'changing']]\n",
"pos = [['pronoun', 'verb', 'adjective'],\n",
" ['noun', 'verb', 'adverb', 'adjective'],\n",
" ['noun', 'verb', 'determiner', 'noun', 'preposition', 'adjective', 'noun'],\n",
" ['noun', 'verb', 'adverb', 'adjective', 'verb']]\n",
"max_length = 8"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"# max_len의 길이에 못미치는 문장은 <pad>로 max_len만큼 padding\n",
"def pad_seq(sequences, max_len, dic):\n",
" seq_len, seq_indices = [], []\n",
" for seq in sequences:\n",
" seq_len.append(len(seq))\n",
" seq_idx = [dic.get(char) for char in seq]\n",
" seq_idx += (max_len - len(seq_idx)) * [dic.get('<pad>')] # 0 is idx of meaningless token \"pad\"\n",
" seq_indices.append(seq_idx)\n",
" return seq_len, seq_indices"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'<pad>': 0, 'I': 1, 'a': 2, 'changing': 3, 'deep': 4, 'difficult': 5, 'fast': 6, 'feel': 7, 'for': 8, 'framework': 9, 'hungry': 10, 'is': 11, 'learning': 12, 'tensorflow': 13, 'very': 14}\n",
"{'<pad>': 0, 'adjective': 1, 'adverb': 2, 'determiner': 3, 'noun': 4, 'preposition': 5, 'pronoun': 6, 'verb': 7}\n"
]
}
],
"source": [
"# word dic\n",
"word_list = []\n",
"for elm in sentences:\n",
" word_list += elm\n",
"word_list = list(set(word_list))\n",
"word_list.sort()\n",
"word_list = ['<pad>'] + word_list\n",
"\n",
"word_dic = {word : idx for idx, word in enumerate(word_list)}\n",
"\n",
"# pos dic\n",
"pos_list = []\n",
"for elm in pos:\n",
" pos_list += elm\n",
"pos_list = list(set(pos_list))\n",
"pos_list.sort()\n",
"pos_list = ['<pad>'] + pos_list\n",
"\n",
"pos_dic = {pos : idx for idx, pos in enumerate(pos_list)}\n",
"\n",
"print(word_dic)\n",
"print(pos_dic)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[3, 4, 7, 5]\n",
"[[1, 7, 10, 0, 0, 0, 0, 0],\n",
" [13, 11, 14, 5, 0, 0, 0, 0],\n",
" [13, 11, 2, 9, 8, 4, 12, 0],\n",
" [13, 11, 14, 6, 3, 0, 0, 0]]\n",
"[[6, 7, 1, 0, 0, 0, 0, 0],\n",
" [4, 7, 2, 1, 0, 0, 0, 0],\n",
" [4, 7, 3, 4, 5, 1, 4, 0],\n",
" [4, 7, 2, 1, 7, 0, 0, 0]]\n"
]
}
],
"source": [
"sen_len, sen_indices = pad_seq(sequences = sentences, max_len = max_length, dic = word_dic)\n",
"_, pos_indices = pad_seq(sequences = pos, max_len = max_length, dic = pos_dic)\n",
"\n",
"pprint(sen_len)\n",
"pprint(sen_indices)\n",
"pprint(pos_indices)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Many to Many : Simple"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"max_length = 8\n",
"h_dim = 2\n",
"n_of_classes = len(pos_dic)\n",
"\n",
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n",
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n",
"label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n",
"\n",
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n",
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n",
" trainable = False)\n",
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 8) dtype=float32>\n",
"<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>\n"
]
}
],
"source": [
"gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n",
"score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = gru_cell, output_size = n_of_classes)\n",
"outputs, _ = tf.nn.dynamic_rnn(cell = score_cell, inputs = seq_batch, sequence_length = seq_len,\n",
" dtype = tf.float32)\n",
"\n",
"pprint(outputs)\n",
"pprint(_)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"masking = tf.sequence_mask(lengths = sen_len,\n",
" maxlen = max_length, dtype = tf.float32)\n",
"seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs,\n",
" targets = label,\n",
" weights = masking)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[array([[[ 0.01570132, 0.0524365 , -0.14573137, -0.12667798,\n",
" 0.00564757, -0.02671078, -0.09015815, -0.10335645],\n",
" [ 0.0628261 , 0.10740477, -0.25103313, -0.21593538,\n",
" -0.01752878, 0.03181047, -0.1846405 , -0.15016457],\n",
" [ 0.02495369, 0.07662933, -0.20985961, -0.18227278,\n",
" 0.00634775, -0.03336836, -0.13175288, -0.14701241],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ]]],\n",
" dtype=float32),\n",
" array([[-0.11723392, 0.20574999]], dtype=float32)]\n"
]
}
],
"source": [
"sess = tf.Session()\n",
"sess.run(tf.global_variables_initializer())\n",
"pprint(sess.run([outputs, _], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[array([[1., 1., 1., 0., 0., 0., 0., 0.],\n",
" [1., 1., 1., 1., 0., 0., 0., 0.],\n",
" [1., 1., 1., 1., 1., 1., 1., 0.],\n",
" [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),\n",
" 2.10219]\n"
]
}
],
"source": [
"pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n",
" label : pos_indices}))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Many to Many : Stacked"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"max_length = 8\n",
"n_of_classes = len(pos_dic)\n",
"\n",
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n",
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n",
"label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n",
"keep_prob = tf.placeholder(dtype = tf.float32)\n",
"\n",
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n",
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n",
" trainable = False)\n",
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"h_dims = [2,2]\n",
"gru_cells = []\n",
"for h_dim in h_dims:\n",
" gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n",
" gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)\n",
" gru_cells.append(gru_cell)\n",
"else:\n",
" gru_cells = tf.contrib.rnn.MultiRNNCell(cells = gru_cells)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 8) dtype=float32>\n",
"(<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n",
" <tf.Tensor 'rnn/while/Exit_4:0' shape=(?, 2) dtype=float32>)\n"
]
}
],
"source": [
"score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = gru_cells, output_size = n_of_classes)\n",
"outputs, _ = tf.nn.dynamic_rnn(cell = score_cell, inputs = seq_batch, sequence_length = seq_len,\n",
" dtype = tf.float32)\n",
"\n",
"pprint(outputs)\n",
"pprint(_)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"masking = tf.sequence_mask(lengths = sen_len, maxlen = max_length, dtype = tf.float32)\n",
"seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets = label, weights = masking)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[array([[[-0.0035896 , 0.00190467, -0.00314977, -0.00056082,\n",
" -0.00091539, -0.00521373, 0.00357852, -0.00100121],\n",
" [-0.01860922, 0.00316853, -0.01515145, -0.00715531,\n",
" -0.00680878, -0.02094646, 0.01408813, -0.00569285],\n",
" [-0.02975762, -0.00246203, -0.02290624, -0.01621121,\n",
" -0.01320424, -0.02666592, 0.01751654, -0.00966735],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ]]],\n",
" dtype=float32),\n",
" (array([[ 0.03279103, -0.09169079]], dtype=float32),\n",
" array([[-0.05043861, -0.02464323]], dtype=float32))]\n"
]
}
],
"source": [
"sess = tf.Session()\n",
"sess.run(tf.global_variables_initializer())\n",
"pprint(sess.run([outputs, _], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],\n",
" keep_prob : 1.}))"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[array([[1., 1., 1., 0., 0., 0., 0., 0.],\n",
" [1., 1., 1., 1., 0., 0., 0., 0.],\n",
" [1., 1., 1., 1., 1., 1., 1., 0.],\n",
" [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),\n",
" 2.0782952]\n"
]
}
],
"source": [
"pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n",
" label : pos_indices, keep_prob : 1.}))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Many to Many : Bi-directional"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"max_length = 8\n",
"h_dim = 2\n",
"n_of_classes = len(pos_dic)\n",
"\n",
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n",
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n",
"label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n",
"\n",
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n",
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n",
" trainable = False)\n",
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose_1:0' shape=(?, 8, 2) dtype=float32>,\n",
" <tf.Tensor 'ReverseSequence:0' shape=(?, 8, 2) dtype=float32>)\n",
"(<tf.Tensor 'bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n",
" <tf.Tensor 'bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)\n"
]
}
],
"source": [
"gru_fw_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)\n",
"gru_bw_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)\n",
"\n",
"outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw = gru_fw_cell, cell_bw = gru_bw_cell,\n",
" inputs = seq_batch, sequence_length = seq_len, dtype = tf.float32)\n",
"pprint(outputs)\n",
"pprint(_)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<tf.Tensor 'map/TensorArrayStack/TensorArrayGatherV3:0' shape=(?, 8, 8) dtype=float32>\n"
]
}
],
"source": [
"concat_outputs = tf.concat([outputs[0], outputs[1]], axis = 2)\n",
"weights = tf.get_variable(name = 'weights', shape = (concat_outputs.get_shape()[-1], n_of_classes),\n",
" initializer = tf.contrib.layers.xavier_initializer())\n",
"score = tf.map_fn(lambda elm : tf.matmul(elm, weights), concat_outputs)\n",
"pprint(score)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"masking = tf.sequence_mask(lengths = sen_len, maxlen = max_length, dtype = tf.float32)\n",
"seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = score, targets = label, weights = masking)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[array([[[ 0.16862816, 0.00404082, -0.11827 , 0.0535416 ,\n",
" -0.11164413, 0.0408196 , 0.00550904, -0.16447452],\n",
" [ 0.21764617, -0.04588123, -0.15964638, 0.21997268,\n",
" -0.09319463, 0.08445665, 0.06755111, -0.23474823],\n",
" [ 0.13310257, -0.08929545, -0.08857331, 0.12412636,\n",
" -0.07805191, 0.01264486, 0.02737853, -0.13982151],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ]]],\n",
" dtype=float32),\n",
" (array([[-0.16780025, 0.0583857 ]], dtype=float32),\n",
" array([[ 0.06507254, -0.0562867 ]], dtype=float32))]\n"
]
}
],
"source": [
"sess = tf.Session()\n",
"sess.run(tf.global_variables_initializer())\n",
"pprint(sess.run([score, _], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[array([[1., 1., 1., 0., 0., 0., 0., 0.],\n",
" [1., 1., 1., 1., 0., 0., 0., 0.],\n",
" [1., 1., 1., 1., 1., 1., 1., 0.],\n",
" [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),\n",
" 2.0999782]\n"
]
}
],
"source": [
"pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n",
" label : pos_indices}))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Many to Many : Stacked Bi-directional"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"max_length = 8\n",
"n_of_classes = len(pos_dic)\n",
"\n",
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n",
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n",
"label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n",
"keep_prob = tf.placeholder(dtype = tf.float32)\n",
"\n",
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n",
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n",
" trainable = False)\n",
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"h_dims = [2,2]\n",
"gru_fw_cells, gru_bw_cells = [], []\n",
"\n",
"# forward\n",
"for h_dim in h_dims:\n",
" gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n",
" gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)\n",
" gru_fw_cells.append(gru_cell)\n",
" \n",
"# backward\n",
"for h_dim in h_dims:\n",
" gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n",
" gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)\n",
" gru_bw_cells.append(gru_cell)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<tf.Tensor 'stack_bidirectional_rnn/cell_1/concat:0' shape=(?, 8, 4) dtype=float32>\n",
"(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n",
" <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>)\n",
"(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n",
" <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)\n"
]
}
],
"source": [
"outputs, output_state_fw, output_state_bw = \\\n",
"tf.contrib.rnn.stack_bidirectional_dynamic_rnn(cells_fw = gru_fw_cells, cells_bw = gru_bw_cells,\n",
" inputs = seq_batch, sequence_length = seq_len,\n",
" dtype = tf.float32)\n",
"pprint(outputs)\n",
"pprint(output_state_fw)\n",
"pprint(output_state_bw)"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<tf.Tensor 'map/TensorArrayStack/TensorArrayGatherV3:0' shape=(?, 8, 8) dtype=float32>\n"
]
}
],
"source": [
"weights = tf.get_variable(name = 'weights', shape = (outputs.get_shape()[-1], n_of_classes),\n",
" initializer = tf.contrib.layers.xavier_initializer())\n",
"score = tf.map_fn(lambda elm : tf.matmul(elm, weights), outputs)\n",
"pprint(score)"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"masking = tf.sequence_mask(lengths = sen_len, maxlen = max_length, dtype = tf.float32)\n",
"seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = score, targets = label, weights = masking)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"array([[[ 0.02271244, -0.02384472, 0.01439292, -0.01237436,\n",
" -0.0037775 , 0.03624248, -0.02230978, -0.01178392],\n",
" [ 0.02258522, -0.01905726, 0.00837328, -0.00484734,\n",
" 0.00557474, 0.0180396 , 0.00872493, 0.00712166],\n",
" [ 0.02025667, -0.02466974, 0.01172541, -0.01174619,\n",
" 0.00904868, 0.01962387, 0.01582363, 0.00594005],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. ]]],\n",
" dtype=float32)\n"
]
}
],
"source": [
"sess = tf.Session()\n",
"sess.run(tf.global_variables_initializer())\n",
"pprint(sess.run(score, feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],\n",
" keep_prob : 1.}))"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[array([[1., 1., 1., 0., 0., 0., 0., 0.],\n",
" [1., 1., 1., 1., 0., 0., 0., 0.],\n",
" [1., 1., 1., 1., 1., 1., 1., 0.],\n",
" [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),\n",
" 2.0819566]\n"
]
}
],
"source": [
"pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n",
" label : pos_indices, keep_prob : 1.}))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Sequence to Sequence"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Sequence to Sequence : Example data"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"sources = [['I', 'feel', 'hungry'],\n",
" ['tensorflow', 'is', 'very', 'difficult'],\n",
" ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],\n",
" ['tensorflow', 'is', 'very', 'fast', 'changing']]\n",
"targets = [['나는', '배가', '고프다'],\n",
" ['텐서플로우는', '매우', '어렵다'],\n",
" ['텐서플로우는', '딥러닝을', '위한', '프레임워크이다'],\n",
" ['텐서플로우는', '매우', '빠르게', '변화한다']]"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'<pad>': 0, 'I': 1, 'a': 2, 'changing': 3, 'deep': 4, 'difficult': 5, 'fast': 6, 'feel': 7, 'for': 8, 'framework': 9, 'hungry': 10, 'is': 11, 'learning': 12, 'tensorflow': 13, 'very': 14}\n",
"15\n"
]
}
],
"source": [
"# word dic for sentences\n",
"source_words = []\n",
"for elm in sources:\n",
" source_words += elm\n",
"source_words = list(set(source_words))\n",
"source_words.sort()\n",
"source_words = ['<pad>'] + source_words\n",
"\n",
"source_dic = {word : idx for idx, word in enumerate(source_words)}\n",
"print(source_dic)\n",
"print(len(source_dic))"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'<pad>': 0, '<start>': 1, '<end>': 2, '고프다': 3, '나는': 4, '딥러닝을': 5, '매우': 6, '배가': 7, '변화한다': 8, '빠르게': 9, '어렵다': 10, '위한': 11, '텐서플로우는': 12, '프레임워크이다': 13}\n",
"14\n"
]
}
],
"source": [
"# word dic for translations\n",
"target_words = []\n",
"for elm in targets:\n",
" target_words += elm\n",
"target_words = list(set(target_words))\n",
"target_words.sort()\n",
"target_words = ['<pad>']+ ['<start>'] + ['<end>'] + \\\n",
" target_words # 번역문의 시작과 끝을 알리는 'start', 'end' token 추가\n",
"\n",
"target_dic = {word : idx for idx, word in enumerate(target_words)}\n",
"print(target_dic)\n",
"print(len(target_dic))"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"def pad_seq_enc(sequences, max_len, dic):\n",
" seq_len = []\n",
" seq_indices = []\n",
" for seq in sequences:\n",
" seq_len.append(len(seq))\n",
" seq_idx = [dic.get(word) for word in seq]\n",
" seq_idx += (max_len - len(seq_idx)) * [dic.get('<pad>')] \n",
" seq_indices.append(seq_idx) \n",
" return seq_len, seq_indices"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
"def pad_seq_dec(sequences, max_len, dic):\n",
" seq_input_len = []\n",
" seq_input_indices = []\n",
" seq_target_indices = []\n",
" \n",
" # for decoder input\n",
" for seq in sequences:\n",
" seq_input_idx = [dic.get('<start>')] + [dic.get(token) for token in seq]\n",
" seq_input_len.append(len(seq_input_idx))\n",
" seq_input_idx += (max_len - len(seq_input_idx)) * [dic.get('<pad>')] \n",
" seq_input_indices.append(seq_input_idx)\n",
" \n",
" # for decoder output\n",
" for seq in sequences:\n",
" seq_target_idx = [dic.get(token) for token in seq] + [dic.get('<end>')]\n",
" seq_target_idx += (max_len - len(seq_target_idx)) * [dic.get('<pad>')]\n",
" seq_target_indices.append(seq_target_idx)\n",
" \n",
" return seq_input_len, seq_input_indices, seq_target_indices"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[3, 4, 7, 5] (4, 10)\n"
]
}
],
"source": [
"# for encoder\n",
"source_max_len = 10\n",
"X_length, X_indices = pad_seq_enc(sequences = sources, max_len = source_max_len, dic = source_dic)\n",
"print(X_length, np.shape(X_indices))"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[4, 4, 5, 5]\n",
"[[1, 4, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0],\n",
" [1, 12, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0],\n",
" [1, 12, 5, 11, 13, 0, 0, 0, 0, 0, 0, 0],\n",
" [1, 12, 6, 9, 8, 0, 0, 0, 0, 0, 0, 0]]\n",
"[[4, 7, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0],\n",
" [12, 6, 10, 2, 0, 0, 0, 0, 0, 0, 0, 0],\n",
" [12, 5, 11, 13, 2, 0, 0, 0, 0, 0, 0, 0],\n",
" [12, 6, 9, 8, 2, 0, 0, 0, 0, 0, 0, 0]]\n"
]
}
],
"source": [
"# for decoder\n",
"target_max_len = 12\n",
"y_length, y_input_indices, y_target_indices = pad_seq_dec(sequences = targets, max_len = target_max_len,\n",
" dic = target_dic)\n",
"pprint(y_length)\n",
"pprint(y_input_indices)\n",
"pprint(y_target_indices)"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"s_len = tf.placeholder(dtype = tf.int32, shape = [None])\n",
"s_indices = tf.placeholder(dtype = tf.int32, shape = [None, source_max_len])\n",
"t_len = tf.placeholder(dtype = tf.int32, shape = [None])\n",
"t_input_indices = tf.placeholder(dtype = tf.int32, shape = [None, target_max_len])\n",
"t_output_indices = tf.placeholder(dtype = tf.int32, shape = [None, target_max_len])"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"s_embedding = tf.eye(num_rows = len(source_dic), dtype = tf.float32)\n",
"s_embedding = tf.get_variable(name = 's_embedding', initializer = s_embedding)\n",
"s_batch = tf.nn.embedding_lookup(params = s_embedding, ids = s_indices)\n",
"\n",
"enc_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)\n",
"_, enc_state = tf.nn.dynamic_rnn(cell = enc_cell, inputs = s_batch, sequence_length = s_len, dtype = tf.float32)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"t_embedding = tf.eye(num_rows = len(target_dic), dtype = tf.float32)\n",
"t_embedding = tf.get_variable(name = 't_embedding', initializer = t_embedding)\n",
"t_batch = tf.nn.embedding_lookup(params = t_embedding, ids = t_input_indices)\n",
"\n",
"tokens = tf.ones_like(tensor = s_len, dtype = tf.int32)\n",
"tr_tokens = tf.map_fn(lambda elm : tf.multiply(elm, target_max_len), tokens, dtype = tf.int32)\n",
"start_tokens = tokens\n",
"\n",
"tr_helper = tf.contrib.seq2seq.TrainingHelper(inputs = t_batch, sequence_length = tr_tokens)\n",
"dec_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)\n",
"score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = dec_cell, output_size = len(target_dic))\n",
"tr_decoder = tf.contrib.seq2seq.BasicDecoder(cell = score_cell, initial_state = enc_state, helper = tr_helper)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"tr_outputs,_,_= tf.contrib.seq2seq.dynamic_decode(decoder = tr_decoder, impute_finished = True,\n",
" maximum_iterations = target_max_len)"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"masking = tf.sequence_mask(lengths = t_len, maxlen = target_max_len, dtype = tf.float32)\n",
"seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = tr_outputs.rnn_output,\n",
" targets = t_output_indices, weights = masking)"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [],
"source": [
"trans_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding = t_embedding,\n",
" start_tokens = start_tokens,\n",
" end_token = target_dic.get('<end>'))\n",
"trans_decoder = tf.contrib.seq2seq.BasicDecoder(cell = score_cell, initial_state = enc_state,\n",
" helper = trans_helper)\n",
"trans_outputs,_,_ = tf.contrib.seq2seq.dynamic_decode(decoder = trans_decoder, impute_finished = True,\n",
" maximum_iterations = target_max_len * 2)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [],
"source": [
"sess = tf.Session()\n",
"sess.run(tf.global_variables_initializer())\n",
"logits, masks = sess.run([tr_outputs.rnn_output,masking], feed_dict = {s_len : [X_length[0]],\n",
" s_indices : [X_indices[0]],\n",
" t_len : [y_length[0]],\n",
" t_input_indices : [y_input_indices[0]]})\n",
"loss = sess.run(seq2seq_loss, feed_dict = {s_len : [X_length[0]],\n",
" s_indices : [X_indices[0]],\n",
" t_len : [y_length[0]],\n",
" t_input_indices : [y_input_indices[0]],\n",
" t_output_indices : [y_target_indices[0]]})"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [],
"source": [
"translations = sess.run(trans_outputs.sample_id, feed_dict = {s_len : [X_length[0]],\n",
" s_indices : [X_indices[0]]})"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"array([[[-0.00770796, -0.01160654, 0.06141101, -0.00119117,\n",
" 0.00151106, -0.00418233, 0.04408944, -0.01907253,\n",
" 0.06191742, 0.00312654, -0.00581186, -0.02721635,\n",
" 0.05599122, -0.03012921],\n",
" [-0.06202612, -0.00614491, 0.06214031, -0.06277049,\n",
" 0.02558279, -0.01664988, 0.02347147, -0.03689051,\n",
" 0.06652129, 0.04653463, 0.04533478, -0.00508038,\n",
" 0.05785662, 0.00203341],\n",
" [-0.09766634, -0.01459097, 0.12218379, -0.09584236,\n",
" 0.0395265 , -0.02717485, 0.05562212, -0.06465542,\n",
" 0.12906486, 0.0720693 , 0.06619576, -0.02005081,\n",
" 0.11322317, -0.0105706 ],\n",
" [-0.05630375, -0.0053757 , 0.0554057 , -0.05710275,\n",
" 0.02325371, -0.01507437, 0.02053787, -0.03321677,\n",
" 0.05938321, 0.04229102, 0.04136585, -0.00411567,\n",
" 0.0516084 , 0.00241266],\n",
" [-0.09801295, -0.01543377, 0.12653415, -0.09570034,\n",
" 0.03954508, -0.02742546, 0.05882316, -0.06594183,\n",
" 0.13343686, 0.07213129, 0.0655957 , -0.02206141,\n",
" 0.11718518, -0.01282454],\n",
" [-0.13214648, -0.02217925, 0.17738682, -0.12819305,\n",
" 0.05310601, -0.03724365, 0.08451303, -0.09073777,\n",
" 0.18668863, 0.09691563, 0.08699297, -0.03310486,\n",
" 0.16416422, -0.02113115],\n",
" [-0.15983033, -0.02676146, 0.21423036, -0.15508783,\n",
" 0.06424127, -0.04503345, 0.1019742 , -0.10966098,\n",
" 0.22548106, 0.11723457, 0.10528523, -0.03988271,\n",
" 0.19826664, -0.0253781 ],\n",
" [-0.18215221, -0.02990646, 0.24121594, -0.1771085 ,\n",
" 0.07330434, -0.05120733, 0.11396605, -0.12418452,\n",
" 0.25404 , 0.13375267, 0.12061477, -0.044 ,\n",
" 0.22328982, -0.02726221],\n",
" [-0.20007661, -0.03208359, 0.2611607 , -0.19500336,\n",
" 0.08063555, -0.05609706, 0.12227292, -0.13538148,\n",
" 0.27524936, 0.14710198, 0.13329202, -0.04645221,\n",
" 0.24181573, -0.02779921],\n",
" [-0.2144253 , -0.03360179, 0.2760145 , -0.2094653 ,\n",
" 0.08653881, -0.05996756, 0.12806974, -0.14404465,\n",
" 0.2911163 , 0.15784328, 0.14367735, -0.04786452,\n",
" 0.255635 , -0.02759974],\n",
" [-0.22588326, -0.03466735, 0.28714904, -0.22110319,\n",
" 0.09127538, -0.06302971, 0.1321414 , -0.1507664 ,\n",
" 0.30306035, 0.16645658, 0.15212543, -0.04863242,\n",
" 0.2660096 , -0.02702914],\n",
" [-0.23501435, -0.03541961, 0.29554257, -0.23043671,\n",
" 0.09506495, -0.0654511 , 0.13501821, -0.15599361,\n",
" 0.31209937, 0.17334443, 0.15896013, -0.04900678,\n",
" 0.27384117, -0.02630292]]], dtype=float32)\n"
]
}
],
"source": [
"pprint(logits)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"array([[1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)\n",
"2.6709042\n"
]
}
],
"source": [
"pprint(masks)\n",
"pprint(loss)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment