Skip to content

Instantly share code, notes, and snippets.

@nemo
Last active December 7, 2016 23:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nemo/da1dc7a3daef5b1e446deefad04ae807 to your computer and use it in GitHub Desktop.
Save nemo/da1dc7a3daef5b1e446deefad04ae807 to your computer and use it in GitHub Desktop.
keras_model_rnn_error.py
theano/scan_module/scan_perform.pyx in theano.scan_module.scan_perform.perform (/home/ubuntu/.theano/compiledir_Linux-4.4--generic-x86_64-with-debian-stretch-sid-x86_64-2.7.12-64/scan_perform/mod.cpp:4193)()
ValueError: dimension mismatch in args to gemm (64,256)x(256,256)->(1,256)
Apply node that caused the error: GpuGemm{no_inplace}(GpuSubtensor{::, int64::}.0, TensorConstant{0.20000000298}, <CudaNdarrayType(float32, matrix)>, lstm_7_U_o_copy[cuda], TensorConstant{0.20000000298})
Toposort index: 5
Inputs types: [CudaNdarrayType(float32, matrix), TensorType(float32, scalar), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), TensorType(float32, scalar)]
Inputs shapes: [(1, 256), (), (64, 256), (256, 256), ()]
Inputs strides: [(0, 1), (), (256, 1), (256, 1), ()]
Inputs values: ['not shown', array(0.20000000298023224, dtype=float32), 'not shown', 'not shown', array(0.20000000298023224, dtype=float32)]
Outputs clients: [[GpuElemwise{Composite{(clip((i0 + i1), i2, i3) * tanh(i4))},no_inplace}(CudaNdarrayConstant{[[ 0.5]]}, GpuGemm{no_inplace}.0, CudaNdarrayConstant{[[ 0.]]}, CudaNdarrayConstant{[[ 1.]]}, GpuElemwise{Composite{((clip((i0 + i1), i2, i3) * i4) + (clip((i0 + i5), i2, i3) * tanh(i6)))},no_inplace}.0)]]
HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
Apply node that caused the error: forall_inplace,gpu,scan_fn}(TensorConstant{10}, GpuDimShuffle{1,0,2}.0, GpuIncSubtensor{InplaceSet;:int64:}.0, GpuIncSubtensor{InplaceSet;:int64:}.0, TensorConstant{10}, lstm_7_U_o, lstm_7_U_f, lstm_7_U_i, lstm_7_U_c)
Toposort index: 75
Inputs types: [TensorType(int64, scalar), CudaNdarrayType(float32, 3D), CudaNdarrayType(float32, 3D), CudaNdarrayType(float32, 3D), TensorType(int64, scalar), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix)]
Inputs shapes: [(), (10, 1, 1024), (2, 64, 256), (2, 64, 256), (), (256, 256), (256, 256), (256, 256), (256, 256)]
Inputs strides: [(), (1024, 0, 1), (16384, 256, 1), (16384, 256, 1), (), (256, 1), (256, 1), (256, 1), (256, 1)]
Inputs values: [array(10), 'not shown', 'not shown', 'not shown', array(10), 'not shown', 'not shown', 'not shown', 'not shown']
Outputs clients: [[GpuSubtensor{int64}(forall_inplace,gpu,scan_fn}.0, Constant{1})], [GpuSubtensor{int64}(forall_inplace,gpu,scan_fn}.1, Constant{1})], [GpuDimShuffle{0,1,2}(forall_inplace,gpu,scan_fn}.2)]]
HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
import re
import pickle
from theano.sandbox import cuda
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function
from keras.layers import *
from keras.layers.normalization import *
from keras.datasets import imdb
idx = imdb.get_word_index()
path = get_file('imdb_full.pkl',
origin='https://s3.amazonaws.com/text-datasets/imdb_full.pkl',
md5_hash='d091312047c43cf9e4e38fef92437263')
f = open(path, 'rb')
(x_train, labels_train), (x_test, labels_test) = pickle.load(f)
idx2word = {v: k for k, v in idx.iteritems()}
articles = [' '.join([idx2word[word] for word in review]) for review in x_train]
idx = {}
article_words = [name.decode('unicode_escape').encode('ascii','ignore').split(" ") for name in articles]
words = [item for sublist in article_words for item in sublist]
for i in range(0, len(words)):
if not words[i] in idx:
idx[words[i]] = 1
else:
idx[words[i]] += 1;
idx_arr = sorted(idx, key=idx.get, reverse=True)
vocab_size = 5000
idx2word = {idx_arr.index(v): v for v in idx_arr}
word2idx = {v: k for k, v in idx2word.iteritems()}
def load_vectors(loc):
return (load_array(loc+'.dat'),
pickle.load(open(loc+'_words.pkl','rb')),
pickle.load(open(loc+'_idx.pkl','rb')))
vecs, words, wordidx = load_vectors('data/6B.50d')
def create_emb():
n_fact = vecs.shape[1]
emb = np.zeros((vocab_size, n_fact))
for i in range(1,len(emb)):
word = idx2word[i]
if word and re.match(r"^[a-zA-Z0-9\-]*$", word) and word in wordidx:
src_idx = wordidx[word]
emb[i] = vecs[src_idx]
else:
# If we can't find the word in glove, randomly initialize
emb[i] = normal(scale=0.6, size=(n_fact,))
# This is our "rare word" id - we want to randomly initialize
emb[-1] = normal(scale=0.6, size=(n_fact,))
emb/=3
return emb
emb = create_emb()
n_fact = vecs.shape[1]
word_length = min(10, int(np.floor(np.mean([len(wds) for wds in article_words]))))
batch_size = 64
n_hidden = 256
c_in_dat = [[np.clip(word2idx[article_words[articleidx][widx]], 0, vocab_size-1) for widx in range(0, min(len(article_words[articleidx])-1, word_length-1))]
for articleidx in range(len(article_words))]
c_out_dat = [[np.clip(word2idx[article_words[articleidx][widx]], 0, vocab_size-1) for widx in range(1, min(len(article_words[articleidx]), word_length))]
for articleidx in range(len(article_words))]
x_rnn = sequence.pad_sequences(c_in_dat, maxlen=word_length, value=0)
ys = sequence.pad_sequences(c_out_dat, maxlen=word_length, value=0)
x_rnn = x_rnn.clip(0, vocab_size-1)
y_rnn = y_rnn.clip(0, vocab_size-1)
y_rnn = np.expand_dims(np.array(ys), -1)
model=Sequential([
Embedding(vocab_size, n_fact, input_length=word_length,
weights=[emb],
trainable=False,
batch_input_shape=(batch_size, word_length)),
BatchNormalization(),
LSTM(n_hidden, return_sequences=True, stateful=True),
TimeDistributed(Dense(vocab_size, activation='softmax')),
])
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=1e-15))
mx = len(x_rnn)//batch_size*batch_size
model.fit(x_rnn[:mx], y_rnn[:mx], batch_size=batch_size, nb_epoch=4, shuffle=False)
def get_nexts_keras(inp):
idxs = [word2idx[c] for c in inp]
arr = np.array(idxs)[np.newaxis,:]
p = model.predict(arr)[0]
print(list(inp))
return [idx2word[np.argmax(o)] for o in p]
get_nexts_keras(['', 'that', '', '', '', '', '', '', '', ''])
n_hidden = 256
batch_size = 64
word_length = int(np.floor(np.mean([len(wds) for wds in article_words]))) # becomes 8
n_fact = vecs.shape[1] #50
model=Sequential([
Embedding(vocab_size, n_fact, input_length=word_length,
weights=[emb],
trainable=False,
batch_input_shape=(batch_size, word_length)),
BatchNormalization(),
LSTM(n_hidden, return_sequences=True, stateful=True),
TimeDistributed(Dense(vocab_size, activation='softmax')),
])
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=1e-15))
model.fit(x_rnn[:mx], y_rnn[:mx], batch_size=batch_size, nb_epoch=1, shuffle=False) #works just fine
def get_nexts_keras(inp):
idxs = [word2idx[c] for c in inp]
arr = np.array(idxs)[np.newaxis,:]
p = model.predict(arr, verbose=True)[0]
print(list(inp))
return [idx_arr[np.argmax(o)] for o in p]
get_nexts_keras(["Netflix", '', '', '', '', '', '', ''])
# Throws error (attached)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment