Last active
December 7, 2016 23:17
-
-
Save nemo/da1dc7a3daef5b1e446deefad04ae807 to your computer and use it in GitHub Desktop.
keras_model_rnn_error.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
theano/scan_module/scan_perform.pyx in theano.scan_module.scan_perform.perform (/home/ubuntu/.theano/compiledir_Linux-4.4--generic-x86_64-with-debian-stretch-sid-x86_64-2.7.12-64/scan_perform/mod.cpp:4193)() | |
ValueError: dimension mismatch in args to gemm (64,256)x(256,256)->(1,256) | |
Apply node that caused the error: GpuGemm{no_inplace}(GpuSubtensor{::, int64::}.0, TensorConstant{0.20000000298}, <CudaNdarrayType(float32, matrix)>, lstm_7_U_o_copy[cuda], TensorConstant{0.20000000298}) | |
Toposort index: 5 | |
Inputs types: [CudaNdarrayType(float32, matrix), TensorType(float32, scalar), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), TensorType(float32, scalar)] | |
Inputs shapes: [(1, 256), (), (64, 256), (256, 256), ()] | |
Inputs strides: [(0, 1), (), (256, 1), (256, 1), ()] | |
Inputs values: ['not shown', array(0.20000000298023224, dtype=float32), 'not shown', 'not shown', array(0.20000000298023224, dtype=float32)] | |
Outputs clients: [[GpuElemwise{Composite{(clip((i0 + i1), i2, i3) * tanh(i4))},no_inplace}(CudaNdarrayConstant{[[ 0.5]]}, GpuGemm{no_inplace}.0, CudaNdarrayConstant{[[ 0.]]}, CudaNdarrayConstant{[[ 1.]]}, GpuElemwise{Composite{((clip((i0 + i1), i2, i3) * i4) + (clip((i0 + i5), i2, i3) * tanh(i6)))},no_inplace}.0)]] | |
HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'. | |
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node. | |
Apply node that caused the error: forall_inplace,gpu,scan_fn}(TensorConstant{10}, GpuDimShuffle{1,0,2}.0, GpuIncSubtensor{InplaceSet;:int64:}.0, GpuIncSubtensor{InplaceSet;:int64:}.0, TensorConstant{10}, lstm_7_U_o, lstm_7_U_f, lstm_7_U_i, lstm_7_U_c) | |
Toposort index: 75 | |
Inputs types: [TensorType(int64, scalar), CudaNdarrayType(float32, 3D), CudaNdarrayType(float32, 3D), CudaNdarrayType(float32, 3D), TensorType(int64, scalar), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix)] | |
Inputs shapes: [(), (10, 1, 1024), (2, 64, 256), (2, 64, 256), (), (256, 256), (256, 256), (256, 256), (256, 256)] | |
Inputs strides: [(), (1024, 0, 1), (16384, 256, 1), (16384, 256, 1), (), (256, 1), (256, 1), (256, 1), (256, 1)] | |
Inputs values: [array(10), 'not shown', 'not shown', 'not shown', array(10), 'not shown', 'not shown', 'not shown', 'not shown'] | |
Outputs clients: [[GpuSubtensor{int64}(forall_inplace,gpu,scan_fn}.0, Constant{1})], [GpuSubtensor{int64}(forall_inplace,gpu,scan_fn}.1, Constant{1})], [GpuDimShuffle{0,1,2}(forall_inplace,gpu,scan_fn}.2)]] | |
HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'. | |
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import pickle | |
from theano.sandbox import cuda | |
%matplotlib inline | |
import utils; reload(utils) | |
from utils import * | |
from __future__ import division, print_function | |
from keras.layers import * | |
from keras.layers.normalization import * | |
from keras.datasets import imdb | |
idx = imdb.get_word_index() | |
path = get_file('imdb_full.pkl', | |
origin='https://s3.amazonaws.com/text-datasets/imdb_full.pkl', | |
md5_hash='d091312047c43cf9e4e38fef92437263') | |
f = open(path, 'rb') | |
(x_train, labels_train), (x_test, labels_test) = pickle.load(f) | |
idx2word = {v: k for k, v in idx.iteritems()} | |
articles = [' '.join([idx2word[word] for word in review]) for review in x_train] | |
idx = {} | |
article_words = [name.decode('unicode_escape').encode('ascii','ignore').split(" ") for name in articles] | |
words = [item for sublist in article_words for item in sublist] | |
for i in range(0, len(words)): | |
if not words[i] in idx: | |
idx[words[i]] = 1 | |
else: | |
idx[words[i]] += 1; | |
idx_arr = sorted(idx, key=idx.get, reverse=True) | |
vocab_size = 5000 | |
idx2word = {idx_arr.index(v): v for v in idx_arr} | |
word2idx = {v: k for k, v in idx2word.iteritems()} | |
def load_vectors(loc): | |
return (load_array(loc+'.dat'), | |
pickle.load(open(loc+'_words.pkl','rb')), | |
pickle.load(open(loc+'_idx.pkl','rb'))) | |
vecs, words, wordidx = load_vectors('data/6B.50d') | |
def create_emb(): | |
n_fact = vecs.shape[1] | |
emb = np.zeros((vocab_size, n_fact)) | |
for i in range(1,len(emb)): | |
word = idx2word[i] | |
if word and re.match(r"^[a-zA-Z0-9\-]*$", word) and word in wordidx: | |
src_idx = wordidx[word] | |
emb[i] = vecs[src_idx] | |
else: | |
# If we can't find the word in glove, randomly initialize | |
emb[i] = normal(scale=0.6, size=(n_fact,)) | |
# This is our "rare word" id - we want to randomly initialize | |
emb[-1] = normal(scale=0.6, size=(n_fact,)) | |
emb/=3 | |
return emb | |
emb = create_emb() | |
n_fact = vecs.shape[1] | |
word_length = min(10, int(np.floor(np.mean([len(wds) for wds in article_words])))) | |
batch_size = 64 | |
n_hidden = 256 | |
c_in_dat = [[np.clip(word2idx[article_words[articleidx][widx]], 0, vocab_size-1) for widx in range(0, min(len(article_words[articleidx])-1, word_length-1))] | |
for articleidx in range(len(article_words))] | |
c_out_dat = [[np.clip(word2idx[article_words[articleidx][widx]], 0, vocab_size-1) for widx in range(1, min(len(article_words[articleidx]), word_length))] | |
for articleidx in range(len(article_words))] | |
x_rnn = sequence.pad_sequences(c_in_dat, maxlen=word_length, value=0) | |
ys = sequence.pad_sequences(c_out_dat, maxlen=word_length, value=0) | |
x_rnn = x_rnn.clip(0, vocab_size-1) | |
y_rnn = y_rnn.clip(0, vocab_size-1) | |
y_rnn = np.expand_dims(np.array(ys), -1) | |
model=Sequential([ | |
Embedding(vocab_size, n_fact, input_length=word_length, | |
weights=[emb], | |
trainable=False, | |
batch_input_shape=(batch_size, word_length)), | |
BatchNormalization(), | |
LSTM(n_hidden, return_sequences=True, stateful=True), | |
TimeDistributed(Dense(vocab_size, activation='softmax')), | |
]) | |
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=1e-15)) | |
mx = len(x_rnn)//batch_size*batch_size | |
model.fit(x_rnn[:mx], y_rnn[:mx], batch_size=batch_size, nb_epoch=4, shuffle=False) | |
def get_nexts_keras(inp): | |
idxs = [word2idx[c] for c in inp] | |
arr = np.array(idxs)[np.newaxis,:] | |
p = model.predict(arr)[0] | |
print(list(inp)) | |
return [idx2word[np.argmax(o)] for o in p] | |
get_nexts_keras(['', 'that', '', '', '', '', '', '', '', '']) | |
n_hidden = 256 | |
batch_size = 64 | |
word_length = int(np.floor(np.mean([len(wds) for wds in article_words]))) # becomes 8 | |
n_fact = vecs.shape[1] #50 | |
model=Sequential([ | |
Embedding(vocab_size, n_fact, input_length=word_length, | |
weights=[emb], | |
trainable=False, | |
batch_input_shape=(batch_size, word_length)), | |
BatchNormalization(), | |
LSTM(n_hidden, return_sequences=True, stateful=True), | |
TimeDistributed(Dense(vocab_size, activation='softmax')), | |
]) | |
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=1e-15)) | |
model.fit(x_rnn[:mx], y_rnn[:mx], batch_size=batch_size, nb_epoch=1, shuffle=False) #works just fine | |
def get_nexts_keras(inp): | |
idxs = [word2idx[c] for c in inp] | |
arr = np.array(idxs)[np.newaxis,:] | |
p = model.predict(arr, verbose=True)[0] | |
print(list(inp)) | |
return [idx_arr[np.argmax(o)] for o in p] | |
get_nexts_keras(["Netflix", '', '', '', '', '', '', '']) | |
# Throws error (attached) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment