Skip to content

Instantly share code, notes, and snippets.

View ChunML's full-sized avatar
✍️
Landed new site at https://trungtran.io

Trung Tran ChunML

✍️
Landed new site at https://trungtran.io
View GitHub Profile
train_init_op = iterator.make_initializer(train_data)
val_init_op = iterator.make_initializer(val_data)
return next_element, train_init_op, val_init_op
def reinitializable_input_fn(filenames, labels, train_val_ratio=0.8):
num_files = len(filenames)
num_train_files = int(num_files * train_val_ratio)
train_filenames = filenames[:num_train_files]
train_labels = labels[:num_train_files]
val_filenames = filenames[num_train_files:]
val_labels = labels[num_train_files:]
train_data = tf.data.Dataset.from_tensor_slices(
(train_filenames, train_labels))
next_element, train_init_op, val_init_op = reinitializable_input_fn(filenames, labels)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print('Training...')
for _ in range(5):
sess.run(train_init_op)
imgs, labels = sess.run(next_element)
print('Image shape:', imgs.shape)
def get_data_from_file(train_file, batch_size, seq_size):
with open(train_file, encoding='utf-8') as f:
text = f.read()
text = text.split()
word_counts = Counter(text)
sorted_vocab = sorted(word_counts, key=word_counts.get, reverse=True)
int_to_vocab = {k:w for k, w in enumerate(sorted_vocab)}
vocab_to_int = {w:k for k, w in int_to_vocab.items()}
n_vocab = len(int_to_vocab)
int_text = [vocab_to_int[w] for w in text]
num_batches = int(len(int_text) / (seq_size * batch_size))
in_text = int_text[:num_batches * batch_size * seq_size]
out_text = np.zeros_like(in_text)
out_text[:-1] = in_text[1:]
out_text[-1] = in_text[0]
in_text = np.reshape(in_text, (batch_size, -1))
out_text = np.reshape(out_text, (batch_size, -1))
return int_to_vocab, vocab_to_int, n_vocab, in_text, out_text
print(in_text[:10, :10])
print(out_text[:10, :10])
'''
in_text:
[[ 412 413 414 415 42 416 417 1 418 419]
[ 247 1 5 479 144 44 33 70 145 21]
[ 92 37 43 25 72 263 7 18 523 24]
[ 3 590 591 592 593 594 3 595 1 95]
[ 54 650 54 80 182 3 651 6 305 19]
[ 715 716 717 718 3 0 719 1 720 0]
def network(batch_size, seq_size, embedding_size, lstm_size, keep_prob, n_vocab, reuse=False):
with tf.variable_scope('LSTM', reuse=reuse):
in_op = tf.placeholder(tf.int32, [None, seq_size])
out_op = tf.placeholder(tf.int32, [None, seq_size])
embedding = tf.get_variable('embedding_weights', [n_vocab, embedding_size])
embed = tf.nn.embedding_lookup(embedding, in_op)