Last active
January 24, 2018 09:57
-
-
Save nlgranger/279bda7fff356cfe3f40ad6397d0ba04 to your computer and use it in GitHub Desktop.
standalone CTC in Theano
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ nosetests . | |
Using cuDNN version 7005 on context None | |
Preallocating 9494/11170 Mb (0.850000) on cuda1 | |
Mapped name None to device cuda1: GeForce GTX 1080 Ti (0000:03:00.0) | |
EEE | |
====================================================================== | |
ERROR: test_forward_backward (test_ctc.TestCTC) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/home/granger/bug/test_ctc.py", line 58, in test_forward_backward | |
alphas = alphas.eval() | |
File "/home/granger/dev/Theano/theano/gof/graph.py", line 522, in eval | |
self._fn_cache[inputs] = theano.function(inputs, self) | |
File "/home/granger/dev/Theano/theano/compile/function.py", line 317, in function | |
output_keys=output_keys) | |
File "/home/granger/dev/Theano/theano/compile/pfunc.py", line 486, in pfunc | |
output_keys=output_keys) | |
File "/home/granger/dev/Theano/theano/compile/function_module.py", line 1839, in orig_function | |
name=name) | |
File "/home/granger/dev/Theano/theano/compile/function_module.py", line 1519, in __init__ | |
optimizer_profile = optimizer(fgraph) | |
File "/home/granger/dev/Theano/theano/gof/opt.py", line 108, in __call__ | |
return self.optimize(fgraph) | |
File "/home/granger/dev/Theano/theano/gof/opt.py", line 97, in optimize | |
ret = self.apply(fgraph, *args, **kwargs) | |
File "/home/granger/dev/Theano/theano/gof/opt.py", line 263, in apply | |
self.failure_callback(e, self, optimizer) | |
File "/home/granger/dev/Theano/theano/gof/opt.py", line 202, in warn | |
raise exc | |
File "/home/granger/dev/Theano/theano/gof/opt.py", line 251, in apply | |
sub_prof = optimizer.optimize(fgraph) | |
File "/home/granger/dev/Theano/theano/gof/opt.py", line 97, in optimize | |
ret = self.apply(fgraph, *args, **kwargs) | |
File "/home/granger/dev/Theano/theano/gof/opt.py", line 2513, in apply | |
lopt_change = self.process_node(fgraph, node, lopt) | |
File "/home/granger/dev/Theano/theano/gof/opt.py", line 2039, in process_node | |
lopt, node) | |
File "/home/granger/dev/Theano/theano/gof/opt.py", line 1933, in warn_inplace | |
return NavigatorOptimizer.warn(exc, nav, repl_pairs, local_opt, node) | |
File "/home/granger/dev/Theano/theano/gof/opt.py", line 1919, in warn | |
raise exc [210/1233] | |
File "/home/granger/dev/Theano/theano/gof/opt.py", line 2034, in process_node | |
replacements = lopt.transform(node) | |
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 4989, in transform | |
num, denum = self.simplify(list(orig_num), list(orig_denum), out.type) | |
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 4833, in simplify | |
out_type=out_type) | |
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 4919, in simplify_constants | |
out_type=out_type) | |
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 6328, in add_calculate | |
v = reduce(np.add, num, zero) - reduce(np.add, denum, zero) | |
TypeError: numpy boolean subtract, the `-` operator, is deprecated, use the bitwise_xor, the `^` operator, or the logical_xor function instead. | |
-------------------- >> begin captured logging << -------------------- | |
theano.gof.opt: ERROR: Optimization failure due to: local_add_canonizer | |
theano.gof.opt: ERROR: node: Elemwise{add,no_inplace}(InplaceDimShuffle{0,1,x}.0, InplaceDimShuffle{x,0,1}.0) | |
theano.gof.opt: ERROR: TRACEBACK: | |
theano.gof.opt: ERROR: Traceback (most recent call last): | |
File "/home/granger/dev/Theano/theano/gof/opt.py", line 2034, in process_node | |
replacements = lopt.transform(node) | |
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 4989, in transform | |
num, denum = self.simplify(list(orig_num), list(orig_denum), out.type) | |
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 4833, in simplify | |
out_type=out_type) | |
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 4919, in simplify_constants | |
out_type=out_type) | |
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 6328, in add_calculate | |
v = reduce(np.add, num, zero) - reduce(np.add, denum, zero) | |
TypeError: numpy boolean subtract, the `-` operator, is deprecated, use the bitwise_xor, the `^` operator, or the logical_xor function instead. | |
..... | |
..... | |
..... > more error output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Author: Nicolas Granger <nicolas.granger.m@gmail.com> | |
# | |
# Implements the connectionist temporal classification loss from: | |
# Graves, A., Fernández, S., Gomez, F., & Schmidhuber, J. (2006, June). | |
# Connectionist temporal classification: labelling unsegmented sequence data | |
# with recurrent neural networks. In Proceedings of the 23rd international | |
# conference on Machine learning (pp. 369-376). ACM. | |
# ftp://ftp.idsia.ch/pub/juergen/icml2006.pdf | |
import numpy as np | |
import theano | |
import theano.tensor as T | |
from theano.tensor import discrete_dtypes, continuous_dtypes | |
def isneginf(x, neginf=-1e9): | |
return x < neginf | |
def logaddexp(x, y, magnitude=20): | |
x, y = T.minimum(x, y), T.maximum(x, y) | |
diff = T.minimum(y - x, magnitude) | |
res = x + T.log(1 + T.exp(diff)) | |
return T.switch((y - x > magnitude), y, res) | |
def logsumexp(x, axis, keepdims=False): | |
k = T.max(x, axis=axis, keepdims=True) | |
res = T.log(T.sum(T.exp(x - k), axis=axis, keepdims=keepdims)) + k | |
return T.switch(isneginf(k), -2e9, res) | |
def log_softmax(X, axis=-1, clip=None): | |
k = T.max(X, axis=axis, keepdims=True) | |
norm_X = X - k | |
if clip is not None: | |
norm_X = T.maximum(norm_X, clip) | |
log_sum_exp_X = logsumexp(norm_X, axis=axis, keepdims=True) | |
return norm_X - log_sum_exp_X | |
# Bits of the CTC algorithm --------------------------------------------------- | |
def insert_alternating_blanks(labels, blank_label): | |
batch_size, label_size = labels.shape | |
blanked_labels = T.zeros((batch_size, 2 * label_size + 1), dtype=np.int32) | |
blanked_labels = T.set_subtensor(blanked_labels[:, 0::2], blank_label) | |
blanked_labels = T.set_subtensor(blanked_labels[:, 1:-1:2], labels) | |
return blanked_labels | |
def ctc_forward(log_odds, seq_sizes, | |
blanked_labels, label_sizes, not_repeated): | |
seqsize, batch_sz, _ = log_odds.shape | |
label_size = blanked_labels.shape[1] | |
def step(t, a_tm1, log_odds_, | |
seq_sizes_, blanked_labels_, label_sizes_, not_repeated_): | |
y_t = log_odds_[t] | |
a_t = a_tm1 | |
a_t = T.set_subtensor( | |
a_t[:, 1:], | |
logaddexp(a_t[:, 1:], a_tm1[:, :-1])) | |
a_t = T.set_subtensor( | |
a_t[:, 2:], | |
logaddexp(a_t[:, 2:], T.switch(not_repeated_, a_tm1[:, :-2], -2e9))) | |
# stop after a_T(|l'|) | |
mask = T.ge(t, seq_sizes_)[:, None] \ | |
+ T.ge(T.arange(label_size)[None, :], | |
2 * label_sizes_[:, None] + 1) | |
a_t = T.switch( | |
isneginf(a_t) + mask, -2e9, | |
a_t + y_t[T.arange(batch_sz)[:, None], blanked_labels_]) | |
return a_t | |
alpha_init = -2e9 * T.ones((batch_sz, label_size)) | |
alpha_init = T.set_subtensor(alpha_init[:, 0], 0) | |
alphas, _ = theano.scan( | |
fn=step, | |
sequences=[T.arange(seqsize)], | |
outputs_info=alpha_init, | |
non_sequences=[log_odds, seq_sizes, blanked_labels, label_sizes, | |
not_repeated], | |
name="ctc_forward") | |
return alphas | |
def ctc_backward(log_odds, seq_sizes, | |
blanked_labels, label_sizes, not_repeated): | |
seqsize, batch_sz, _ = log_odds.shape | |
label_size = blanked_labels.shape[1] | |
def step(t, b_tp1, log_odds_, | |
seq_sizes_, blanked_labels_, label_sizes_, not_repeated_): | |
y_t = log_odds_[t] | |
# increase b_{T+1}(|l'|) from 0 to 1 to initialize recursion | |
starter_t = T.eq(t, seq_sizes_ - 1)[:, None] \ | |
* T.eq((2 * label_sizes_)[:, None], | |
T.arange(label_size)[None, :]) | |
b_tp1_2lp1 = b_tp1[T.arange(batch_sz), 2 * label_sizes_] | |
b_tp1 = T.set_subtensor( | |
b_tp1_2lp1, | |
T.switch(T.eq(t, seq_sizes_ - 1), 0, b_tp1_2lp1)) | |
b_tp1 = T.switch(starter_t, 0, b_tp1) # initialize recursion | |
b_t = b_tp1 | |
b_t = T.set_subtensor( | |
b_t[:, :-1], | |
logaddexp(b_t[:, :-1], b_tp1[:, 1:])) | |
b_t = T.set_subtensor( | |
b_t[:, :-2], | |
logaddexp(b_t[:, :-2], T.switch(not_repeated_, b_tp1[:, 2:], -2e9))) | |
b_t += y_t[T.arange(batch_sz)[:, None], blanked_labels_] | |
b_t = T.switch(isneginf(b_t), -2e9, b_t) | |
return b_t | |
beta_init = -2e9 * T.ones((batch_sz, label_size)) | |
betas, _ = theano.scan( | |
fn=step, | |
sequences=[T.arange(seqsize)], | |
outputs_info=beta_init, | |
non_sequences=[log_odds, seq_sizes, blanked_labels, label_sizes, | |
not_repeated], | |
go_backwards=True, | |
name="ctc_backward") | |
betas = betas[::-1, :, :] | |
return betas | |
# Theano Op ------------------------------------------------------------------- | |
def ctc_perform_graph(linout, seq_sizes, labels, label_sizes, blank): | |
_, batch_size, voca_size = linout.shape | |
logits = log_softmax(linout) | |
blanked_labels = insert_alternating_blanks(labels, blank) | |
not_repeated = T.neq(blanked_labels[:, 2:], blanked_labels[:, :-2]) | |
betas = ctc_backward(logits, seq_sizes, | |
blanked_labels, label_sizes, not_repeated) | |
loss = - logaddexp(betas[0, :, 0], betas[0, :, 1]) | |
# alphas = ctc_forward(logits, seq_sizes, | |
# blanked_labels, label_sizes, not_repeated) | |
# loss = - logaddexp( | |
# alphas[seq_sizes - 1, T.arange(batch_size), 2 * label_sizes - 1], | |
# alphas[seq_sizes - 1, T.arange(batch_size), 2 * label_sizes]) | |
return logits, blanked_labels, not_repeated, betas, loss | |
def ctc_grad_graph(inputs, output_gradients): | |
linout, seq_durations, labels, label_sizes, _ = inputs | |
seq_size, batch_size, voca_size = linout.shape | |
label_size = labels.shape[1] | |
logits, blanked_labels, not_repeated, betas, loss = \ | |
ctc_perform_graph(*inputs) | |
alphas = ctc_forward(logits, seq_durations, | |
blanked_labels, label_sizes, not_repeated) | |
# log(sum_{s \in lab(l, k)} a_t(s) b_t(s)) | |
def fwbw_sum_step(k, s, labels_, ab_): | |
s_view = s[:, T.arange(batch_size), labels_[:, k]] | |
ab_view = ab_[:, :, k] | |
next_sum = logaddexp(s_view, ab_view) | |
s = T.set_subtensor(s_view, next_sum) | |
return s | |
ab = alphas + betas | |
fwbw_sum = theano.scan( | |
fn=fwbw_sum_step, | |
sequences=[T.arange(2 * label_size + 1)], | |
outputs_info=-2e9 * T.ones((seq_size, batch_size, voca_size)), | |
non_sequences=[blanked_labels, ab], | |
strict=True, | |
name="fwbw_sum")[0][-1] | |
A = loss[None, :, None] + logits \ | |
+ logsumexp(fwbw_sum - logits, axis=2, keepdims=True) | |
B = loss[None, :, None] + fwbw_sum - logits | |
dloss_dy = T.exp(A) - T.exp(B) | |
dloss_dy = T.switch(T.all(isneginf(fwbw_sum), axis=2, keepdims=True), | |
0, dloss_dy) | |
return [dloss_dy * output_gradients[0][None, :, None], | |
theano.gradient.disconnected_type(), | |
theano.gradient.disconnected_type(), | |
theano.gradient.disconnected_type(), | |
theano.gradient.disconnected_type()] | |
def make_ctc_op(): | |
preds_var = T.tensor3() | |
seq_durations_var = T.ivector() | |
labels_var = T.imatrix() | |
label_sizes_var = T.ivector() | |
blank_var = T.iscalar() | |
_, _, _, _, loss = ctc_perform_graph( | |
preds_var, seq_durations_var, labels_var, | |
label_sizes_var, blank_var) | |
return theano.OpFromGraph( | |
inputs=[preds_var, seq_durations_var, | |
labels_var, label_sizes_var, blank_var], | |
outputs=[loss], | |
grad_overrides=ctc_grad_graph, | |
inline=True, name="ctcLossOp") | |
CTCLossOp = make_ctc_op() | |
# ----------------------------------------------------------------------------- | |
def ctc_loss(linout, durations, labels, label_sizes, blank=-1): | |
"""Compute the Connectionnist Temporal Classification loss [#graves2006]_. | |
.. math:: L = - ln\left( \sum_{\pi \in \mathcal{B}^{-1}(l)} P(\pi | y) | |
\right) | |
where :math:`y` is the sequence of predictions, :math:`l` the target | |
label sequence without blanks or repetition, :math:`\pi` is taken from the | |
ensemble of possible label assignments over the observations and | |
:math:`\mathcal{B}` is a function that remove blanks and repetitions for a | |
sequence of labels. | |
Parameters | |
---------- | |
linout : Theano shared variable, expression or numpy array | |
The input values for the softmax function with shape | |
duration x batch_size x nclasses. | |
durations: Theano shared variable, expression or numpy array | |
An _integer_ vector of size batch_size contining the actual length of | |
each sequence in preds. | |
labels: Theano shared variable, expression or numpy array | |
An _integer_ matrix of size batch_size x label_size containing the | |
target labels. | |
label_sizes: Theano shared variable, expression or numpy array | |
An _integer_ vector of size batch_size containing the actual length of | |
each sequence in labels. | |
blank: | |
The blank label class, by default the last index. | |
Returns | |
------- | |
Theano tensor | |
A vector expression with the CTC loss of each sequence. | |
Reference | |
--------- | |
.. [#graves2006] Graves, A., Fernández, S., Gomez, F., & Schmidhuber, J. | |
(2006, June). Connectionist temporal classification: labelling | |
unsegmented sequence data with recurrent neural networks. In | |
Proceedings of the 23rd international conference on Machine learning | |
(pp. 369-376). ACM. ftp://ftp.idsia.ch/pub/juergen/icml2006.pdf | |
""" | |
linout = T.as_tensor_variable(linout) | |
durations = T.as_tensor_variable(durations) | |
labels = T.as_tensor_variable(labels) | |
label_sizes = T.as_tensor_variable(label_sizes) | |
blank = T.cast(T.as_tensor_variable(blank), 'int32') | |
if not(linout.dtype in continuous_dtypes and linout.ndim == 3): | |
raise ValueError("preds must continuous with dimension 3") | |
if not (durations.dtype in discrete_dtypes and durations.ndim == 1): | |
raise ValueError("durations must be a integer vector") | |
if not (labels.dtype in discrete_dtypes and labels.ndim == 2): | |
raise ValueError("labels must be an integer matrix") | |
if not (label_sizes.dtype in discrete_dtypes and label_sizes.ndim == 1): | |
raise ValueError("label_sizes must be an integer vector") | |
if not (blank.dtype in discrete_dtypes and blank.ndim == 0): | |
raise ValueError("blank must be an integer value") | |
voca_size = T.cast(linout.shape[2], 'int32') | |
labels = labels % voca_size | |
blank = blank % voca_size | |
return CTCLossOp(linout, durations, labels, label_sizes, blank) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
import numpy as np | |
import theano | |
import theano.tensor as T | |
from theano.tests import unittest_tools | |
from ctc import ctc_loss, ctc_forward, ctc_backward, insert_alternating_blanks, \ | |
isneginf, log_softmax | |
class TestCTC(unittest.TestCase): | |
def setUp(self): | |
unittest_tools.seed_rng() | |
def test_forward_backward(self): | |
batch_size = 6 | |
label_size = 7 | |
voca_size = 5 | |
seq_size = 10 | |
label_lengths = np.random.randint(0, label_size, | |
size=(batch_size,), dtype=np.int32) | |
label_lengths[0] = label_size # extremum case | |
label_lengths[1] = 0 # extremum case | |
labels = np.array( | |
[np.random.randint(0, voca_size - 1, size=label_size, dtype=np.int32) | |
for _ in range(batch_size)]) | |
for i in range(batch_size): | |
labels[i, label_lengths[i]:] = -1 | |
seq_durations = np.array([ | |
np.random.randint(max(1, label_lengths[i]), seq_size) | |
for i in range(batch_size)], dtype=np.int32) | |
linear_out = np.random.randn(seq_size, batch_size, voca_size) \ | |
.astype(np.float32) | |
blank_class = -1 | |
blank_class = np.mod(blank_class, voca_size) | |
labels = np.mod(labels, voca_size) | |
log_odds = log_softmax(linear_out) | |
blanked_labels = insert_alternating_blanks(T.mod(labels, voca_size), | |
blank_class) | |
not_repeated = T.neq(blanked_labels[:, 2:], blanked_labels[:, :-2]) | |
alphas = ctc_forward(log_odds, seq_durations, | |
blanked_labels, label_lengths, not_repeated) | |
betas = ctc_backward(log_odds, seq_durations, | |
blanked_labels, label_lengths, not_repeated) | |
preds = log_softmax(linear_out) | |
y_blanks = preds[:, T.arange(batch_size)[:, None], blanked_labels] | |
p_l = T.sum(T.exp(alphas + betas - y_blanks), axis=2) | |
alphas = alphas.eval() | |
betas = betas.eval() | |
preds = preds.eval() | |
for i in range(batch_size): | |
assert np.allclose(alphas[0, i, 0], preds[0, i, -1]) | |
if label_lengths[i] > 0: | |
assert np.allclose(alphas[0, i, 1], preds[0, i, labels[i, 0]]) | |
else: | |
assert isneginf(alphas[0, i, 1]) | |
assert np.all(isneginf(alphas[0, i, 2:])) | |
for i in range(batch_size): | |
t = seq_durations[i] - 1 | |
l = label_lengths[i] | |
assert np.allclose(betas[t, i, 2 * l], preds[t, i, -1]) | |
if l > 0: | |
assert np.allclose(betas[t, i, 2 * l - 1], | |
preds[t, i, labels[i, l - 1]]) | |
assert np.all(isneginf(betas[t, i, :max(l - 2, 0)])) | |
else: | |
assert np.all(isneginf(betas[t, i, 1:])) | |
p_l = p_l.eval() | |
for i in range(batch_size): | |
assert (np.allclose(p_l[:seq_durations[i], i], p_l[0, i])) | |
a, b = max(0, 2 * label_lengths[i] - 1), 2 * label_lengths[i] + 1 | |
p_li = np.exp(alphas[seq_durations[i] - 1, i, a:b]).sum() | |
assert np.allclose(p_li, p_l[0, i]) | |
p_li = np.exp(betas[0, i, :2]).sum() | |
assert np.allclose(p_li, p_l[0, i]) | |
def test_simple_precomputed(self): | |
# Test obtained from Torch tutorial at: | |
# https://github.com/baidu-research/warp-ctc/blob/master/torch_binding/TUTORIAL.md | |
linear_out = np.asarray([ | |
[[0, 0, 0, 0, 0], [1, 2, 3, 4, 5], [-5, -4, -3, -2, -1]], | |
[[0, 0, 0, 0, 0], [6, 7, 8, 9, 10], [-10, -9, -8, -7, -6]], | |
[[0, 0, 0, 0, 0], [11, 12, 13, 14, 15], [-15, -14, -13, -12, -11]] | |
], dtype=np.float32) | |
seq_sizes = np.asarray([1, 3, 3], dtype=np.int32) | |
labels = np.asarray([[1, 0], [3, 3], [2, 3]], dtype=np.int32) | |
label_sizes = np.asarray([1, 2, 2], dtype=np.int32) | |
expected_losses = np.asarray([1.609437943, 7.355742931, 4.938849926], | |
dtype=np.float32) | |
blank = 0 | |
expected_grad = np.asarray([ | |
[[0.2, -0.8, 0.2, 0.2, 0.2], | |
[ 0.01165623125, 0.03168492019, 0.08612854034, -0.7658783197, 0.636408627], | |
[-0.02115798369, 0.03168492019, -0.8810571432, 0.2341216654, 0.636408627]], | |
[[0, 0, 0, 0, 0], | |
[-0.9883437753, 0.03168492019, 0.08612854034, 0.2341216654, 0.636408627], | |
[-0.02115798369, 0.03168492019, -0.1891518533, -0.4577836394, 0.636408627]], | |
[[0, 0, 0, 0, 0], | |
[0.01165623125, 0.03168492019, 0.08612854034, -0.7658783197, 0.636408627], | |
[-0.02115798369, 0.03168492019, 0.08612854034, -0.7330639958, 0.636408627]] | |
], dtype=np.float32) | |
linear_out_var = T.as_tensor_variable(linear_out) | |
losses = ctc_loss( | |
linear_out_var, seq_sizes, labels, label_sizes, blank) | |
assert np.allclose(losses.eval(), expected_losses, atol=1) | |
grad = theano.grad(losses.sum(), wrt=linear_out_var) | |
assert np.allclose(grad.eval(), expected_grad, rtol=.001, atol=1) | |
def test_random(self): | |
batch_size = 16 | |
label_size = 5 | |
voca_size = 4 | |
seq_size = 20 | |
label_sizes = np.random.randint( | |
0, label_size, size=(batch_size,), dtype=np.int32) | |
label_sizes[0] = label_size | |
label_sizes[1] = 0 | |
label_sizes[2] = 5 | |
label_sizes[3] = 5 | |
labels = np.random.randint( | |
0, voca_size - 1, | |
size=(batch_size, label_size), dtype=np.int32) | |
labels[3] = 0 | |
seq_sizes = np.array([ | |
np.random.randint(max(1, label_sizes[i]), seq_size) | |
for i in range(batch_size)], dtype=np.int32) | |
seq_sizes[2] = 4 | |
linear_out = np.random.randn( | |
seq_size, batch_size, voca_size).astype(np.float32) | |
# check edge cases | |
# TODO | |
# check the gradient can be computed at all | |
linear_out_var = T.tensor3() | |
preds = T.nnet.softmax( | |
linear_out_var.reshape((-1, voca_size)) | |
).reshape((seq_size, batch_size, voca_size)) | |
g = theano.grad(ctc_loss(preds, seq_sizes, | |
labels, label_sizes).sum(), | |
wrt=linear_out_var).eval( | |
{linear_out_var: linear_out.astype(np.float32)}) | |
assert not np.any(np.isnan(g)) | |
# check correctness against finite difference approximation | |
def f(linear_out_): | |
preds_ = T.nnet.softmax( | |
linear_out_.reshape((-1, voca_size)) | |
).reshape((seq_size, batch_size, voca_size)) | |
loss = ctc_loss(preds_, seq_sizes, labels, label_sizes) | |
# prevent finite differences from failing | |
loss = T.switch(isneginf(-loss), 0, loss) | |
return loss | |
unittest_tools.verify_grad(f, [linear_out], abs_tol=0.05, rel_tol=0.05) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment