Last active
June 24, 2018 09:32
-
-
Save seopbo/86bbb462724269deee15ebafd1ca1ed4 to your computer and use it in GitHub Desktop.
백수콘(180624)에서 "빠르게 구현하는 RNN" 슬라이드의 code snippets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# 빠르게 구현하는 RNN\n", | |
"18.06.24에 백수콘에서 ***\"빠르게 구현하는 RNN\"***이라는 주제로 발표한 슬라이드의 보충자료입니다." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Load libraries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import tensorflow as tf\n", | |
"import numpy as np\n", | |
"from pprint import pprint" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Intro" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[3, 4, 7, 5]\n" | |
] | |
} | |
], | |
"source": [ | |
"# 문장의 단어를 RNN에 하나하나씩 넣는다고 하면?\n", | |
"sentences = [['I', 'feel', 'hungry'],\n", | |
" ['tensorflow', 'is', 'very', 'difficult'],\n", | |
" ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],\n", | |
" ['tensorflow', 'is', 'very', 'fast', 'changing']]\n", | |
"\n", | |
"# RNN은 아래처럼 각 문장 별로 단어의 개수만큼 sequence를 처리해야한다.\n", | |
"# --> variable sequence length!\n", | |
"print(list(map(lambda word : len(word), sentences)))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Intro : Padding" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'<pad>': 0,\n", | |
" 'I': 1,\n", | |
" 'a': 2,\n", | |
" 'changing': 3,\n", | |
" 'deep': 4,\n", | |
" 'difficult': 5,\n", | |
" 'fast': 6,\n", | |
" 'feel': 7,\n", | |
" 'for': 8,\n", | |
" 'framework': 9,\n", | |
" 'hungry': 10,\n", | |
" 'is': 11,\n", | |
" 'learning': 12,\n", | |
" 'tensorflow': 13,\n", | |
" 'very': 14}\n" | |
] | |
} | |
], | |
"source": [ | |
"# word dic\n", | |
"word_list = []\n", | |
"for elm in sentences:\n", | |
" word_list += elm\n", | |
"word_list = list(set(word_list))\n", | |
"word_list.sort()\n", | |
"word_list = ['<pad>'] + word_list # '<pad>'라는 의미없는 token 추가\n", | |
"word_dic = {word : idx for idx, word in enumerate(word_list)}\n", | |
"pprint(word_dic)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# max_len의 길이에 못미치는 문장은 <pad>로 max_len만큼 padding\n", | |
"def pad_seq(sequences, max_len, dic):\n", | |
" seq_len, seq_indices = [], []\n", | |
" for seq in sequences:\n", | |
" seq_len.append(len(seq))\n", | |
" seq_idx = [dic.get(char) for char in seq]\n", | |
" seq_idx += (max_len - len(seq_idx)) * [dic.get('<pad>')] # 0 is idx of meaningless token \"<pad>\"\n", | |
" seq_indices.append(seq_idx)\n", | |
" return seq_len, seq_indices" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[3, 4, 7, 5]\n", | |
"[[1, 7, 10, 0, 0, 0, 0, 0],\n", | |
" [13, 11, 14, 5, 0, 0, 0, 0],\n", | |
" [13, 11, 2, 9, 8, 4, 12, 0],\n", | |
" [13, 11, 14, 6, 3, 0, 0, 0]]\n" | |
] | |
} | |
], | |
"source": [ | |
"max_length = 8\n", | |
"sen_len, sen_indices = pad_seq(sequences = sentences, max_len = max_length, dic = word_dic)\n", | |
"pprint(sen_len)\n", | |
"pprint(sen_indices)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Intro : Padding" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n", | |
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n", | |
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n", | |
" trainable = False) # embedding vector training 안할 것이기 때문에~\n", | |
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 8)\n", | |
"(4, 8, 15)\n" | |
] | |
} | |
], | |
"source": [ | |
"with tf.Session() as sess:\n", | |
" sess.run(tf.global_variables_initializer())\n", | |
" tmp = sess.run(seq_batch, feed_dict = {seq_indices : sen_indices})\n", | |
"print(np.shape(sen_indices))\n", | |
"print(np.shape(tmp)) # tf.nn.dynamic_rnn, tf.contrib.seq2seq.TrainingHelper 등에 이 shape을 유지하면서 전달되어야함" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", | |
" [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],\n", | |
" [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],\n", | |
" [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", | |
" [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", | |
" [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", | |
" [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", | |
" [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],\n", | |
" dtype=float32)" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tmp[0]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Many to One" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Many to One : Example data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[3, 4, 7, 5]\n", | |
"[[1, 7, 10, 0, 0, 0, 0, 0],\n", | |
" [13, 11, 14, 5, 0, 0, 0, 0],\n", | |
" [13, 11, 2, 9, 8, 4, 12, 0],\n", | |
" [13, 11, 14, 6, 3, 0, 0, 0]]\n" | |
] | |
} | |
], | |
"source": [ | |
"tf.reset_default_graph()\n", | |
"\n", | |
"sentences = [['I', 'feel', 'hungry'],\n", | |
" ['tensorflow', 'is', 'very', 'difficult'],\n", | |
" ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],\n", | |
" ['tensorflow', 'is', 'very', 'fast', 'changing']]\n", | |
"y = [[0.,1.], [0.,1.], [1.,0.], [1.,0.]]\n", | |
"max_length = 8\n", | |
"\n", | |
"sen_len, sen_indices = pad_seq(sequences = sentences, max_len = max_length, dic = word_dic)\n", | |
"\n", | |
"pprint(sen_len)\n", | |
"pprint(sen_indices)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Many to One : Simple" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"max_length = 8\n", | |
"h_dim = 2\n", | |
"n_of_classes = 2\n", | |
"\n", | |
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n", | |
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n", | |
"label = tf.placeholder(dtype = tf.float32, shape = [None, 2])\n", | |
"\n", | |
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n", | |
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n", | |
" trainable = False)\n", | |
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 2) dtype=float32>\n", | |
"<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>\n" | |
] | |
} | |
], | |
"source": [ | |
"gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n", | |
"_, state = tf.nn.dynamic_rnn(cell = gru_cell, inputs = seq_batch, sequence_length = seq_len,\n", | |
" dtype = tf.float32)\n", | |
"pprint(_)\n", | |
"pprint(state)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"score = tf.layers.dense(inputs = state, units = n_of_classes)\n", | |
"ce_loss = tf.losses.softmax_cross_entropy(onehot_labels=label,\n", | |
" logits = score)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[array([[[ 0.09504685, -0.02786257],\n", | |
" [ 0.05727357, -0.08943594],\n", | |
" [ 0.11590318, -0.1923188 ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ]]], dtype=float32),\n", | |
" array([[ 0.11590318, -0.1923188 ]], dtype=float32)]\n" | |
] | |
} | |
], | |
"source": [ | |
"sess = tf.Session()\n", | |
"sess.run(tf.global_variables_initializer())\n", | |
"pprint(sess.run([_, state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.6566503\n" | |
] | |
} | |
], | |
"source": [ | |
"pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n", | |
" label : y}))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sess.close()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Many to One : Stacked" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tf.reset_default_graph()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"max_length = 8\n", | |
"n_of_classes = 2\n", | |
"\n", | |
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n", | |
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n", | |
"label = tf.placeholder(dtype = tf.float32, shape = [None, 2])\n", | |
"keep_prob = tf.placeholder(dtype = tf.float32)\n", | |
"\n", | |
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n", | |
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n", | |
" trainable = False)\n", | |
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"h_dims = [2,2]\n", | |
"gru_cells = []\n", | |
"for h_dim in h_dims:\n", | |
" gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n", | |
" gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell,\n", | |
" output_keep_prob = keep_prob)\n", | |
" gru_cells.append(gru_cell)\n", | |
"else:\n", | |
" gru_cells = tf.contrib.rnn.MultiRNNCell(cells = gru_cells)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 2) dtype=float32>\n", | |
"(<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n", | |
" <tf.Tensor 'rnn/while/Exit_4:0' shape=(?, 2) dtype=float32>)\n" | |
] | |
} | |
], | |
"source": [ | |
"_, state = tf.nn.dynamic_rnn(cell = gru_cells, inputs = seq_batch, sequence_length = seq_len,\n", | |
" dtype = tf.float32)\n", | |
"pprint(_)\n", | |
"pprint(state)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"score = tf.layers.dense(inputs = state[-1], units = n_of_classes)\n", | |
"ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = label, logits = score)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[array([[[-0.00199929, -0.01550121],\n", | |
" [ 0.00384277, -0.01441574],\n", | |
" [ 0.0184861 , -0.00312071],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ]]], dtype=float32),\n", | |
" (array([[-0.10920699, -0.25967512]], dtype=float32),\n", | |
" array([[ 0.0184861 , -0.00312071]], dtype=float32))]\n" | |
] | |
} | |
], | |
"source": [ | |
"sess = tf.Session()\n", | |
"sess.run(tf.global_variables_initializer())\n", | |
"pprint(sess.run([_, state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],\n", | |
" keep_prob : 1.}))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.7151346\n" | |
] | |
} | |
], | |
"source": [ | |
"pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n", | |
" label : y, keep_prob : 1.}))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sess.close()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Many to One : Bi-directional" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tf.reset_default_graph()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"max_length = 8\n", | |
"h_dim = 2\n", | |
"n_of_classes = 2\n", | |
"\n", | |
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n", | |
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n", | |
"label = tf.placeholder(dtype = tf.float32, shape = [None, 2])\n", | |
"\n", | |
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n", | |
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n", | |
" trainable = False)\n", | |
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose_1:0' shape=(?, 8, 2) dtype=float32>,\n", | |
" <tf.Tensor 'ReverseSequence:0' shape=(?, 8, 2) dtype=float32>)\n", | |
"(<tf.Tensor 'bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n", | |
" <tf.Tensor 'bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)\n" | |
] | |
} | |
], | |
"source": [ | |
"gru_fw_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n", | |
"gru_bw_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n", | |
"\n", | |
"_, output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw = gru_fw_cell, cell_bw = gru_bw_cell,\n", | |
" inputs = seq_batch, sequence_length = seq_len, dtype = tf.float32)\n", | |
"pprint(_)\n", | |
"pprint(output_states)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# fw_cell의 final state와 bw_cell의 final state를 concatenate\n", | |
"concat_state = tf.concat(values = [output_states[0],\n", | |
" output_states[-1]],\n", | |
" axis = 1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"score = tf.layers.dense(inputs = concat_state, units = n_of_classes)\n", | |
"ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = label, logits = score)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[(array([[[-0.06784903, -0.09155025],\n", | |
" [-0.05353671, -0.08901785],\n", | |
" [-0.06768034, -0.2230948 ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ]]], dtype=float32),\n", | |
" array([[[-0.11015528, 0.0082678 ],\n", | |
" [ 0.06593657, -0.03157877],\n", | |
" [ 0.04607821, 0.03097701],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ],\n", | |
" [ 0. , 0. ]]], dtype=float32)),\n", | |
" array([[-0.06768034, -0.2230948 , -0.11015528, 0.0082678 ]],\n", | |
" dtype=float32)]\n" | |
] | |
} | |
], | |
"source": [ | |
"sess = tf.Session()\n", | |
"sess.run(tf.global_variables_initializer())\n", | |
"pprint(sess.run([_, concat_state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.77225125\n" | |
] | |
} | |
], | |
"source": [ | |
"pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n", | |
" label : y}))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Many to One : Stacked Bi-directional" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tf.reset_default_graph()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"max_length = 8\n", | |
"n_of_classes = 2\n", | |
"\n", | |
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n", | |
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n", | |
"label = tf.placeholder(dtype = tf.float32, shape = [None, 2])\n", | |
"keep_prob = tf.placeholder(dtype = tf.float32)\n", | |
"\n", | |
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n", | |
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n", | |
" trainable = False)\n", | |
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"h_dims = [2,2]\n", | |
"gru_fw_cells, gru_bw_cells = [], []\n", | |
"\n", | |
"# forward\n", | |
"for h_dim in h_dims:\n", | |
" gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n", | |
" gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)\n", | |
" gru_fw_cells.append(gru_cell)\n", | |
" \n", | |
"# backward\n", | |
"for h_dim in h_dims:\n", | |
" gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n", | |
" gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)\n", | |
" gru_bw_cells.append(gru_cell)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<tf.Tensor 'stack_bidirectional_rnn/cell_1/concat:0' shape=(?, 8, 4) dtype=float32>\n", | |
"(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n", | |
" <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>)\n", | |
"(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n", | |
" <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)\n" | |
] | |
} | |
], | |
"source": [ | |
"outputs, output_state_fw, output_state_bw = \\\n", | |
"tf.contrib.rnn.stack_bidirectional_dynamic_rnn(cells_fw = gru_fw_cells, cells_bw = gru_bw_cells,\n", | |
" inputs = seq_batch, sequence_length = seq_len,\n", | |
" dtype = tf.float32)\n", | |
"pprint(outputs)\n", | |
"pprint(output_state_fw)\n", | |
"pprint(output_state_bw)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"concat_state = tf.concat(values=[output_state_fw[-1],output_state_bw[-1]], axis = 1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"score = tf.layers.dense(inputs = concat_state, units = n_of_classes)\n", | |
"ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = label, logits = score)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[array([[[ 0.01751452, -0.01348923, 0.01666009, 0.01491449],\n", | |
" [ 0.01305745, -0.0105529 , 0.00292257, 0.02082924],\n", | |
" [-0.00403357, -0.00203712, 0.00713446, 0.02249983],\n", | |
" [ 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ]]],\n", | |
" dtype=float32),\n", | |
" array([[-0.00403357, -0.00203712, 0.01666009, 0.01491449]],\n", | |
" dtype=float32)]\n" | |
] | |
} | |
], | |
"source": [ | |
"sess = tf.Session()\n", | |
"sess.run(tf.global_variables_initializer())\n", | |
"pprint(sess.run([outputs, concat_state], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],\n", | |
" keep_prob : 1.}))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.69006765\n" | |
] | |
} | |
], | |
"source": [ | |
"pprint(sess.run(ce_loss, feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n", | |
" label : y, keep_prob : 1.}))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Many to Many" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Many to Many : Example data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tf.reset_default_graph()\n", | |
"\n", | |
"sentences = [['I', 'feel', 'hungry'],\n", | |
" ['tensorflow', 'is', 'very', 'difficult'],\n", | |
" ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],\n", | |
" ['tensorflow', 'is', 'very', 'fast', 'changing']]\n", | |
"pos = [['pronoun', 'verb', 'adjective'],\n", | |
" ['noun', 'verb', 'adverb', 'adjective'],\n", | |
" ['noun', 'verb', 'determiner', 'noun', 'preposition', 'adjective', 'noun'],\n", | |
" ['noun', 'verb', 'adverb', 'adjective', 'verb']]\n", | |
"max_length = 8" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# max_len의 길이에 못미치는 문장은 <pad>로 max_len만큼 padding\n", | |
"def pad_seq(sequences, max_len, dic):\n", | |
" seq_len, seq_indices = [], []\n", | |
" for seq in sequences:\n", | |
" seq_len.append(len(seq))\n", | |
" seq_idx = [dic.get(char) for char in seq]\n", | |
" seq_idx += (max_len - len(seq_idx)) * [dic.get('<pad>')] # 0 is idx of meaningless token \"pad\"\n", | |
" seq_indices.append(seq_idx)\n", | |
" return seq_len, seq_indices" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'<pad>': 0, 'I': 1, 'a': 2, 'changing': 3, 'deep': 4, 'difficult': 5, 'fast': 6, 'feel': 7, 'for': 8, 'framework': 9, 'hungry': 10, 'is': 11, 'learning': 12, 'tensorflow': 13, 'very': 14}\n", | |
"{'<pad>': 0, 'adjective': 1, 'adverb': 2, 'determiner': 3, 'noun': 4, 'preposition': 5, 'pronoun': 6, 'verb': 7}\n" | |
] | |
} | |
], | |
"source": [ | |
"# word dic\n", | |
"word_list = []\n", | |
"for elm in sentences:\n", | |
" word_list += elm\n", | |
"word_list = list(set(word_list))\n", | |
"word_list.sort()\n", | |
"word_list = ['<pad>'] + word_list\n", | |
"\n", | |
"word_dic = {word : idx for idx, word in enumerate(word_list)}\n", | |
"\n", | |
"# pos dic\n", | |
"pos_list = []\n", | |
"for elm in pos:\n", | |
" pos_list += elm\n", | |
"pos_list = list(set(pos_list))\n", | |
"pos_list.sort()\n", | |
"pos_list = ['<pad>'] + pos_list\n", | |
"\n", | |
"pos_dic = {pos : idx for idx, pos in enumerate(pos_list)}\n", | |
"\n", | |
"print(word_dic)\n", | |
"print(pos_dic)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[3, 4, 7, 5]\n", | |
"[[1, 7, 10, 0, 0, 0, 0, 0],\n", | |
" [13, 11, 14, 5, 0, 0, 0, 0],\n", | |
" [13, 11, 2, 9, 8, 4, 12, 0],\n", | |
" [13, 11, 14, 6, 3, 0, 0, 0]]\n", | |
"[[6, 7, 1, 0, 0, 0, 0, 0],\n", | |
" [4, 7, 2, 1, 0, 0, 0, 0],\n", | |
" [4, 7, 3, 4, 5, 1, 4, 0],\n", | |
" [4, 7, 2, 1, 7, 0, 0, 0]]\n" | |
] | |
} | |
], | |
"source": [ | |
"sen_len, sen_indices = pad_seq(sequences = sentences, max_len = max_length, dic = word_dic)\n", | |
"_, pos_indices = pad_seq(sequences = pos, max_len = max_length, dic = pos_dic)\n", | |
"\n", | |
"pprint(sen_len)\n", | |
"pprint(sen_indices)\n", | |
"pprint(pos_indices)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Many to Many : Simple" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"max_length = 8\n", | |
"h_dim = 2\n", | |
"n_of_classes = len(pos_dic)\n", | |
"\n", | |
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n", | |
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n", | |
"label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n", | |
"\n", | |
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n", | |
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n", | |
" trainable = False)\n", | |
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 8) dtype=float32>\n", | |
"<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>\n" | |
] | |
} | |
], | |
"source": [ | |
"gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n", | |
"score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = gru_cell, output_size = n_of_classes)\n", | |
"outputs, _ = tf.nn.dynamic_rnn(cell = score_cell, inputs = seq_batch, sequence_length = seq_len,\n", | |
" dtype = tf.float32)\n", | |
"\n", | |
"pprint(outputs)\n", | |
"pprint(_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"masking = tf.sequence_mask(lengths = sen_len,\n", | |
" maxlen = max_length, dtype = tf.float32)\n", | |
"seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs,\n", | |
" targets = label,\n", | |
" weights = masking)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[array([[[ 0.01570132, 0.0524365 , -0.14573137, -0.12667798,\n", | |
" 0.00564757, -0.02671078, -0.09015815, -0.10335645],\n", | |
" [ 0.0628261 , 0.10740477, -0.25103313, -0.21593538,\n", | |
" -0.01752878, 0.03181047, -0.1846405 , -0.15016457],\n", | |
" [ 0.02495369, 0.07662933, -0.20985961, -0.18227278,\n", | |
" 0.00634775, -0.03336836, -0.13175288, -0.14701241],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ]]],\n", | |
" dtype=float32),\n", | |
" array([[-0.11723392, 0.20574999]], dtype=float32)]\n" | |
] | |
} | |
], | |
"source": [ | |
"sess = tf.Session()\n", | |
"sess.run(tf.global_variables_initializer())\n", | |
"pprint(sess.run([outputs, _], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[array([[1., 1., 1., 0., 0., 0., 0., 0.],\n", | |
" [1., 1., 1., 1., 0., 0., 0., 0.],\n", | |
" [1., 1., 1., 1., 1., 1., 1., 0.],\n", | |
" [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),\n", | |
" 2.10219]\n" | |
] | |
} | |
], | |
"source": [ | |
"pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n", | |
" label : pos_indices}))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Many to Many : Stacked" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tf.reset_default_graph()\n", | |
"\n", | |
"max_length = 8\n", | |
"n_of_classes = len(pos_dic)\n", | |
"\n", | |
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n", | |
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n", | |
"label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n", | |
"keep_prob = tf.placeholder(dtype = tf.float32)\n", | |
"\n", | |
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n", | |
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n", | |
" trainable = False)\n", | |
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"h_dims = [2,2]\n", | |
"gru_cells = []\n", | |
"for h_dim in h_dims:\n", | |
" gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n", | |
" gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)\n", | |
" gru_cells.append(gru_cell)\n", | |
"else:\n", | |
" gru_cells = tf.contrib.rnn.MultiRNNCell(cells = gru_cells)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<tf.Tensor 'rnn/transpose_1:0' shape=(?, 8, 8) dtype=float32>\n", | |
"(<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n", | |
" <tf.Tensor 'rnn/while/Exit_4:0' shape=(?, 2) dtype=float32>)\n" | |
] | |
} | |
], | |
"source": [ | |
"score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = gru_cells, output_size = n_of_classes)\n", | |
"outputs, _ = tf.nn.dynamic_rnn(cell = score_cell, inputs = seq_batch, sequence_length = seq_len,\n", | |
" dtype = tf.float32)\n", | |
"\n", | |
"pprint(outputs)\n", | |
"pprint(_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"masking = tf.sequence_mask(lengths = sen_len, maxlen = max_length, dtype = tf.float32)\n", | |
"seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets = label, weights = masking)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[array([[[-0.0035896 , 0.00190467, -0.00314977, -0.00056082,\n", | |
" -0.00091539, -0.00521373, 0.00357852, -0.00100121],\n", | |
" [-0.01860922, 0.00316853, -0.01515145, -0.00715531,\n", | |
" -0.00680878, -0.02094646, 0.01408813, -0.00569285],\n", | |
" [-0.02975762, -0.00246203, -0.02290624, -0.01621121,\n", | |
" -0.01320424, -0.02666592, 0.01751654, -0.00966735],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ]]],\n", | |
" dtype=float32),\n", | |
" (array([[ 0.03279103, -0.09169079]], dtype=float32),\n", | |
" array([[-0.05043861, -0.02464323]], dtype=float32))]\n" | |
] | |
} | |
], | |
"source": [ | |
"sess = tf.Session()\n", | |
"sess.run(tf.global_variables_initializer())\n", | |
"pprint(sess.run([outputs, _], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],\n", | |
" keep_prob : 1.}))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[array([[1., 1., 1., 0., 0., 0., 0., 0.],\n", | |
" [1., 1., 1., 1., 0., 0., 0., 0.],\n", | |
" [1., 1., 1., 1., 1., 1., 1., 0.],\n", | |
" [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),\n", | |
" 2.0782952]\n" | |
] | |
} | |
], | |
"source": [ | |
"pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n", | |
" label : pos_indices, keep_prob : 1.}))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Many to Many : Bi-directional" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tf.reset_default_graph()\n", | |
"\n", | |
"max_length = 8\n", | |
"h_dim = 2\n", | |
"n_of_classes = len(pos_dic)\n", | |
"\n", | |
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n", | |
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n", | |
"label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n", | |
"\n", | |
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n", | |
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n", | |
" trainable = False)\n", | |
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose_1:0' shape=(?, 8, 2) dtype=float32>,\n", | |
" <tf.Tensor 'ReverseSequence:0' shape=(?, 8, 2) dtype=float32>)\n", | |
"(<tf.Tensor 'bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n", | |
" <tf.Tensor 'bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)\n" | |
] | |
} | |
], | |
"source": [ | |
"gru_fw_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)\n", | |
"gru_bw_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)\n", | |
"\n", | |
"outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw = gru_fw_cell, cell_bw = gru_bw_cell,\n", | |
" inputs = seq_batch, sequence_length = seq_len, dtype = tf.float32)\n", | |
"pprint(outputs)\n", | |
"pprint(_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<tf.Tensor 'map/TensorArrayStack/TensorArrayGatherV3:0' shape=(?, 8, 8) dtype=float32>\n" | |
] | |
} | |
], | |
"source": [ | |
"concat_outputs = tf.concat([outputs[0], outputs[1]], axis = 2)\n", | |
"weights = tf.get_variable(name = 'weights', shape = (concat_outputs.get_shape()[-1], n_of_classes),\n", | |
" initializer = tf.contrib.layers.xavier_initializer())\n", | |
"score = tf.map_fn(lambda elm : tf.matmul(elm, weights), concat_outputs)\n", | |
"pprint(score)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"masking = tf.sequence_mask(lengths = sen_len, maxlen = max_length, dtype = tf.float32)\n", | |
"seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = score, targets = label, weights = masking)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[array([[[ 0.16862816, 0.00404082, -0.11827 , 0.0535416 ,\n", | |
" -0.11164413, 0.0408196 , 0.00550904, -0.16447452],\n", | |
" [ 0.21764617, -0.04588123, -0.15964638, 0.21997268,\n", | |
" -0.09319463, 0.08445665, 0.06755111, -0.23474823],\n", | |
" [ 0.13310257, -0.08929545, -0.08857331, 0.12412636,\n", | |
" -0.07805191, 0.01264486, 0.02737853, -0.13982151],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ]]],\n", | |
" dtype=float32),\n", | |
" (array([[-0.16780025, 0.0583857 ]], dtype=float32),\n", | |
" array([[ 0.06507254, -0.0562867 ]], dtype=float32))]\n" | |
] | |
} | |
], | |
"source": [ | |
"sess = tf.Session()\n", | |
"sess.run(tf.global_variables_initializer())\n", | |
"pprint(sess.run([score, _], feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]]}))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[array([[1., 1., 1., 0., 0., 0., 0., 0.],\n", | |
" [1., 1., 1., 1., 0., 0., 0., 0.],\n", | |
" [1., 1., 1., 1., 1., 1., 1., 0.],\n", | |
" [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),\n", | |
" 2.0999782]\n" | |
] | |
} | |
], | |
"source": [ | |
"pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n", | |
" label : pos_indices}))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Many to Many : Stacked Bi-directional" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tf.reset_default_graph()\n", | |
"\n", | |
"max_length = 8\n", | |
"n_of_classes = len(pos_dic)\n", | |
"\n", | |
"seq_len = tf.placeholder(dtype = tf.int32, shape = [None])\n", | |
"seq_indices = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n", | |
"label = tf.placeholder(dtype = tf.int32, shape = [None, max_length])\n", | |
"keep_prob = tf.placeholder(dtype = tf.float32)\n", | |
"\n", | |
"one_hot = np.eye(len(word_dic)).astype(np.float32)\n", | |
"one_hot = tf.get_variable(name='one_hot', initializer = one_hot,\n", | |
" trainable = False)\n", | |
"seq_batch = tf.nn.embedding_lookup(params = one_hot, ids = seq_indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"h_dims = [2,2]\n", | |
"gru_fw_cells, gru_bw_cells = [], []\n", | |
"\n", | |
"# forward\n", | |
"for h_dim in h_dims:\n", | |
" gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n", | |
" gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)\n", | |
" gru_fw_cells.append(gru_cell)\n", | |
" \n", | |
"# backward\n", | |
"for h_dim in h_dims:\n", | |
" gru_cell = tf.contrib.rnn.GRUCell(num_units = h_dim, activation = tf.nn.tanh)\n", | |
" gru_cell = tf.contrib.rnn.DropoutWrapper(cell = gru_cell, output_keep_prob = keep_prob)\n", | |
" gru_bw_cells.append(gru_cell)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<tf.Tensor 'stack_bidirectional_rnn/cell_1/concat:0' shape=(?, 8, 4) dtype=float32>\n", | |
"(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n", | |
" <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 2) dtype=float32>)\n", | |
"(<tf.Tensor 'stack_bidirectional_rnn/cell_0/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>,\n", | |
" <tf.Tensor 'stack_bidirectional_rnn/cell_1/bidirectional_rnn/bw/bw/while/Exit_3:0' shape=(?, 2) dtype=float32>)\n" | |
] | |
} | |
], | |
"source": [ | |
"outputs, output_state_fw, output_state_bw = \\\n", | |
"tf.contrib.rnn.stack_bidirectional_dynamic_rnn(cells_fw = gru_fw_cells, cells_bw = gru_bw_cells,\n", | |
" inputs = seq_batch, sequence_length = seq_len,\n", | |
" dtype = tf.float32)\n", | |
"pprint(outputs)\n", | |
"pprint(output_state_fw)\n", | |
"pprint(output_state_bw)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<tf.Tensor 'map/TensorArrayStack/TensorArrayGatherV3:0' shape=(?, 8, 8) dtype=float32>\n" | |
] | |
} | |
], | |
"source": [ | |
"weights = tf.get_variable(name = 'weights', shape = (outputs.get_shape()[-1], n_of_classes),\n", | |
" initializer = tf.contrib.layers.xavier_initializer())\n", | |
"score = tf.map_fn(lambda elm : tf.matmul(elm, weights), outputs)\n", | |
"pprint(score)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"masking = tf.sequence_mask(lengths = sen_len, maxlen = max_length, dtype = tf.float32)\n", | |
"seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = score, targets = label, weights = masking)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"array([[[ 0.02271244, -0.02384472, 0.01439292, -0.01237436,\n", | |
" -0.0037775 , 0.03624248, -0.02230978, -0.01178392],\n", | |
" [ 0.02258522, -0.01905726, 0.00837328, -0.00484734,\n", | |
" 0.00557474, 0.0180396 , 0.00872493, 0.00712166],\n", | |
" [ 0.02025667, -0.02466974, 0.01172541, -0.01174619,\n", | |
" 0.00904868, 0.01962387, 0.01582363, 0.00594005],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ],\n", | |
" [ 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. ]]],\n", | |
" dtype=float32)\n" | |
] | |
} | |
], | |
"source": [ | |
"sess = tf.Session()\n", | |
"sess.run(tf.global_variables_initializer())\n", | |
"pprint(sess.run(score, feed_dict = {seq_len : [sen_len[0]], seq_indices : [sen_indices[0]],\n", | |
" keep_prob : 1.}))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[array([[1., 1., 1., 0., 0., 0., 0., 0.],\n", | |
" [1., 1., 1., 1., 0., 0., 0., 0.],\n", | |
" [1., 1., 1., 1., 1., 1., 1., 0.],\n", | |
" [1., 1., 1., 1., 1., 0., 0., 0.]], dtype=float32),\n", | |
" 2.0819566]\n" | |
] | |
} | |
], | |
"source": [ | |
"pprint(sess.run([masking, seq2seq_loss], feed_dict = {seq_len : sen_len, seq_indices : sen_indices,\n", | |
" label : pos_indices, keep_prob : 1.}))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Sequence to Sequence" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Sequence to Sequence : Example data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 68, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tf.reset_default_graph()\n", | |
"\n", | |
"sources = [['I', 'feel', 'hungry'],\n", | |
" ['tensorflow', 'is', 'very', 'difficult'],\n", | |
" ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],\n", | |
" ['tensorflow', 'is', 'very', 'fast', 'changing']]\n", | |
"targets = [['나는', '배가', '고프다'],\n", | |
" ['텐서플로우는', '매우', '어렵다'],\n", | |
" ['텐서플로우는', '딥러닝을', '위한', '프레임워크이다'],\n", | |
" ['텐서플로우는', '매우', '빠르게', '변화한다']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'<pad>': 0, 'I': 1, 'a': 2, 'changing': 3, 'deep': 4, 'difficult': 5, 'fast': 6, 'feel': 7, 'for': 8, 'framework': 9, 'hungry': 10, 'is': 11, 'learning': 12, 'tensorflow': 13, 'very': 14}\n", | |
"15\n" | |
] | |
} | |
], | |
"source": [ | |
"# word dic for sentences\n", | |
"source_words = []\n", | |
"for elm in sources:\n", | |
" source_words += elm\n", | |
"source_words = list(set(source_words))\n", | |
"source_words.sort()\n", | |
"source_words = ['<pad>'] + source_words\n", | |
"\n", | |
"source_dic = {word : idx for idx, word in enumerate(source_words)}\n", | |
"print(source_dic)\n", | |
"print(len(source_dic))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 70, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'<pad>': 0, '<start>': 1, '<end>': 2, '고프다': 3, '나는': 4, '딥러닝을': 5, '매우': 6, '배가': 7, '변화한다': 8, '빠르게': 9, '어렵다': 10, '위한': 11, '텐서플로우는': 12, '프레임워크이다': 13}\n", | |
"14\n" | |
] | |
} | |
], | |
"source": [ | |
"# word dic for translations\n", | |
"target_words = []\n", | |
"for elm in targets:\n", | |
" target_words += elm\n", | |
"target_words = list(set(target_words))\n", | |
"target_words.sort()\n", | |
"target_words = ['<pad>']+ ['<start>'] + ['<end>'] + \\\n", | |
" target_words # 번역문의 시작과 끝을 알리는 'start', 'end' token 추가\n", | |
"\n", | |
"target_dic = {word : idx for idx, word in enumerate(target_words)}\n", | |
"print(target_dic)\n", | |
"print(len(target_dic))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 71, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def pad_seq_enc(sequences, max_len, dic):\n", | |
" seq_len = []\n", | |
" seq_indices = []\n", | |
" for seq in sequences:\n", | |
" seq_len.append(len(seq))\n", | |
" seq_idx = [dic.get(word) for word in seq]\n", | |
" seq_idx += (max_len - len(seq_idx)) * [dic.get('<pad>')] \n", | |
" seq_indices.append(seq_idx) \n", | |
" return seq_len, seq_indices" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 72, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def pad_seq_dec(sequences, max_len, dic):\n", | |
" seq_input_len = []\n", | |
" seq_input_indices = []\n", | |
" seq_target_indices = []\n", | |
" \n", | |
" # for decoder input\n", | |
" for seq in sequences:\n", | |
" seq_input_idx = [dic.get('<start>')] + [dic.get(token) for token in seq]\n", | |
" seq_input_len.append(len(seq_input_idx))\n", | |
" seq_input_idx += (max_len - len(seq_input_idx)) * [dic.get('<pad>')] \n", | |
" seq_input_indices.append(seq_input_idx)\n", | |
" \n", | |
" # for decoder output\n", | |
" for seq in sequences:\n", | |
" seq_target_idx = [dic.get(token) for token in seq] + [dic.get('<end>')]\n", | |
" seq_target_idx += (max_len - len(seq_target_idx)) * [dic.get('<pad>')]\n", | |
" seq_target_indices.append(seq_target_idx)\n", | |
" \n", | |
" return seq_input_len, seq_input_indices, seq_target_indices" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 73, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[3, 4, 7, 5] (4, 10)\n" | |
] | |
} | |
], | |
"source": [ | |
"# for encoder\n", | |
"source_max_len = 10\n", | |
"X_length, X_indices = pad_seq_enc(sequences = sources, max_len = source_max_len, dic = source_dic)\n", | |
"print(X_length, np.shape(X_indices))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 74, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[4, 4, 5, 5]\n", | |
"[[1, 4, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0],\n", | |
" [1, 12, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0],\n", | |
" [1, 12, 5, 11, 13, 0, 0, 0, 0, 0, 0, 0],\n", | |
" [1, 12, 6, 9, 8, 0, 0, 0, 0, 0, 0, 0]]\n", | |
"[[4, 7, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0],\n", | |
" [12, 6, 10, 2, 0, 0, 0, 0, 0, 0, 0, 0],\n", | |
" [12, 5, 11, 13, 2, 0, 0, 0, 0, 0, 0, 0],\n", | |
" [12, 6, 9, 8, 2, 0, 0, 0, 0, 0, 0, 0]]\n" | |
] | |
} | |
], | |
"source": [ | |
"# for decoder\n", | |
"target_max_len = 12\n", | |
"y_length, y_input_indices, y_target_indices = pad_seq_dec(sequences = targets, max_len = target_max_len,\n", | |
" dic = target_dic)\n", | |
"pprint(y_length)\n", | |
"pprint(y_input_indices)\n", | |
"pprint(y_target_indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 75, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"s_len = tf.placeholder(dtype = tf.int32, shape = [None])\n", | |
"s_indices = tf.placeholder(dtype = tf.int32, shape = [None, source_max_len])\n", | |
"t_len = tf.placeholder(dtype = tf.int32, shape = [None])\n", | |
"t_input_indices = tf.placeholder(dtype = tf.int32, shape = [None, target_max_len])\n", | |
"t_output_indices = tf.placeholder(dtype = tf.int32, shape = [None, target_max_len])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 76, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"s_embedding = tf.eye(num_rows = len(source_dic), dtype = tf.float32)\n", | |
"s_embedding = tf.get_variable(name = 's_embedding', initializer = s_embedding)\n", | |
"s_batch = tf.nn.embedding_lookup(params = s_embedding, ids = s_indices)\n", | |
"\n", | |
"enc_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)\n", | |
"_, enc_state = tf.nn.dynamic_rnn(cell = enc_cell, inputs = s_batch, sequence_length = s_len, dtype = tf.float32)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 77, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"t_embedding = tf.eye(num_rows = len(target_dic), dtype = tf.float32)\n", | |
"t_embedding = tf.get_variable(name = 't_embedding', initializer = t_embedding)\n", | |
"t_batch = tf.nn.embedding_lookup(params = t_embedding, ids = t_input_indices)\n", | |
"\n", | |
"tokens = tf.ones_like(tensor = s_len, dtype = tf.int32)\n", | |
"tr_tokens = tf.map_fn(lambda elm : tf.multiply(elm, target_max_len), tokens, dtype = tf.int32)\n", | |
"start_tokens = tokens\n", | |
"\n", | |
"tr_helper = tf.contrib.seq2seq.TrainingHelper(inputs = t_batch, sequence_length = tr_tokens)\n", | |
"dec_cell = tf.contrib.rnn.GRUCell(num_units = 2, activation = tf.nn.tanh)\n", | |
"score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell = dec_cell, output_size = len(target_dic))\n", | |
"tr_decoder = tf.contrib.seq2seq.BasicDecoder(cell = score_cell, initial_state = enc_state, helper = tr_helper)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 78, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tr_outputs,_,_= tf.contrib.seq2seq.dynamic_decode(decoder = tr_decoder, impute_finished = True,\n", | |
" maximum_iterations = target_max_len)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 79, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"masking = tf.sequence_mask(lengths = t_len, maxlen = target_max_len, dtype = tf.float32)\n", | |
"seq2seq_loss = tf.contrib.seq2seq.sequence_loss(logits = tr_outputs.rnn_output,\n", | |
" targets = t_output_indices, weights = masking)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 80, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"trans_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding = t_embedding,\n", | |
" start_tokens = start_tokens,\n", | |
" end_token = target_dic.get('<end>'))\n", | |
"trans_decoder = tf.contrib.seq2seq.BasicDecoder(cell = score_cell, initial_state = enc_state,\n", | |
" helper = trans_helper)\n", | |
"trans_outputs,_,_ = tf.contrib.seq2seq.dynamic_decode(decoder = trans_decoder, impute_finished = True,\n", | |
" maximum_iterations = target_max_len * 2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 81, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sess = tf.Session()\n", | |
"sess.run(tf.global_variables_initializer())\n", | |
"logits, masks = sess.run([tr_outputs.rnn_output,masking], feed_dict = {s_len : [X_length[0]],\n", | |
" s_indices : [X_indices[0]],\n", | |
" t_len : [y_length[0]],\n", | |
" t_input_indices : [y_input_indices[0]]})\n", | |
"loss = sess.run(seq2seq_loss, feed_dict = {s_len : [X_length[0]],\n", | |
" s_indices : [X_indices[0]],\n", | |
" t_len : [y_length[0]],\n", | |
" t_input_indices : [y_input_indices[0]],\n", | |
" t_output_indices : [y_target_indices[0]]})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 82, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"translations = sess.run(trans_outputs.sample_id, feed_dict = {s_len : [X_length[0]],\n", | |
" s_indices : [X_indices[0]]})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 83, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"array([[[-0.00770796, -0.01160654, 0.06141101, -0.00119117,\n", | |
" 0.00151106, -0.00418233, 0.04408944, -0.01907253,\n", | |
" 0.06191742, 0.00312654, -0.00581186, -0.02721635,\n", | |
" 0.05599122, -0.03012921],\n", | |
" [-0.06202612, -0.00614491, 0.06214031, -0.06277049,\n", | |
" 0.02558279, -0.01664988, 0.02347147, -0.03689051,\n", | |
" 0.06652129, 0.04653463, 0.04533478, -0.00508038,\n", | |
" 0.05785662, 0.00203341],\n", | |
" [-0.09766634, -0.01459097, 0.12218379, -0.09584236,\n", | |
" 0.0395265 , -0.02717485, 0.05562212, -0.06465542,\n", | |
" 0.12906486, 0.0720693 , 0.06619576, -0.02005081,\n", | |
" 0.11322317, -0.0105706 ],\n", | |
" [-0.05630375, -0.0053757 , 0.0554057 , -0.05710275,\n", | |
" 0.02325371, -0.01507437, 0.02053787, -0.03321677,\n", | |
" 0.05938321, 0.04229102, 0.04136585, -0.00411567,\n", | |
" 0.0516084 , 0.00241266],\n", | |
" [-0.09801295, -0.01543377, 0.12653415, -0.09570034,\n", | |
" 0.03954508, -0.02742546, 0.05882316, -0.06594183,\n", | |
" 0.13343686, 0.07213129, 0.0655957 , -0.02206141,\n", | |
" 0.11718518, -0.01282454],\n", | |
" [-0.13214648, -0.02217925, 0.17738682, -0.12819305,\n", | |
" 0.05310601, -0.03724365, 0.08451303, -0.09073777,\n", | |
" 0.18668863, 0.09691563, 0.08699297, -0.03310486,\n", | |
" 0.16416422, -0.02113115],\n", | |
" [-0.15983033, -0.02676146, 0.21423036, -0.15508783,\n", | |
" 0.06424127, -0.04503345, 0.1019742 , -0.10966098,\n", | |
" 0.22548106, 0.11723457, 0.10528523, -0.03988271,\n", | |
" 0.19826664, -0.0253781 ],\n", | |
" [-0.18215221, -0.02990646, 0.24121594, -0.1771085 ,\n", | |
" 0.07330434, -0.05120733, 0.11396605, -0.12418452,\n", | |
" 0.25404 , 0.13375267, 0.12061477, -0.044 ,\n", | |
" 0.22328982, -0.02726221],\n", | |
" [-0.20007661, -0.03208359, 0.2611607 , -0.19500336,\n", | |
" 0.08063555, -0.05609706, 0.12227292, -0.13538148,\n", | |
" 0.27524936, 0.14710198, 0.13329202, -0.04645221,\n", | |
" 0.24181573, -0.02779921],\n", | |
" [-0.2144253 , -0.03360179, 0.2760145 , -0.2094653 ,\n", | |
" 0.08653881, -0.05996756, 0.12806974, -0.14404465,\n", | |
" 0.2911163 , 0.15784328, 0.14367735, -0.04786452,\n", | |
" 0.255635 , -0.02759974],\n", | |
" [-0.22588326, -0.03466735, 0.28714904, -0.22110319,\n", | |
" 0.09127538, -0.06302971, 0.1321414 , -0.1507664 ,\n", | |
" 0.30306035, 0.16645658, 0.15212543, -0.04863242,\n", | |
" 0.2660096 , -0.02702914],\n", | |
" [-0.23501435, -0.03541961, 0.29554257, -0.23043671,\n", | |
" 0.09506495, -0.0654511 , 0.13501821, -0.15599361,\n", | |
" 0.31209937, 0.17334443, 0.15896013, -0.04900678,\n", | |
" 0.27384117, -0.02630292]]], dtype=float32)\n" | |
] | |
} | |
], | |
"source": [ | |
"pprint(logits)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 84, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"array([[1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)\n", | |
"2.6709042\n" | |
] | |
} | |
], | |
"source": [ | |
"pprint(masks)\n", | |
"pprint(loss)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment