Skip to content

Instantly share code, notes, and snippets.

@nlgranger
Last active January 24, 2018 09:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nlgranger/279bda7fff356cfe3f40ad6397d0ba04 to your computer and use it in GitHub Desktop.
Save nlgranger/279bda7fff356cfe3f40ad6397d0ba04 to your computer and use it in GitHub Desktop.
standalone CTC in Theano
$ nosetests .
Using cuDNN version 7005 on context None
Preallocating 9494/11170 Mb (0.850000) on cuda1
Mapped name None to device cuda1: GeForce GTX 1080 Ti (0000:03:00.0)
EEE
======================================================================
ERROR: test_forward_backward (test_ctc.TestCTC)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/granger/bug/test_ctc.py", line 58, in test_forward_backward
alphas = alphas.eval()
File "/home/granger/dev/Theano/theano/gof/graph.py", line 522, in eval
self._fn_cache[inputs] = theano.function(inputs, self)
File "/home/granger/dev/Theano/theano/compile/function.py", line 317, in function
output_keys=output_keys)
File "/home/granger/dev/Theano/theano/compile/pfunc.py", line 486, in pfunc
output_keys=output_keys)
File "/home/granger/dev/Theano/theano/compile/function_module.py", line 1839, in orig_function
name=name)
File "/home/granger/dev/Theano/theano/compile/function_module.py", line 1519, in __init__
optimizer_profile = optimizer(fgraph)
File "/home/granger/dev/Theano/theano/gof/opt.py", line 108, in __call__
return self.optimize(fgraph)
File "/home/granger/dev/Theano/theano/gof/opt.py", line 97, in optimize
ret = self.apply(fgraph, *args, **kwargs)
File "/home/granger/dev/Theano/theano/gof/opt.py", line 263, in apply
self.failure_callback(e, self, optimizer)
File "/home/granger/dev/Theano/theano/gof/opt.py", line 202, in warn
raise exc
File "/home/granger/dev/Theano/theano/gof/opt.py", line 251, in apply
sub_prof = optimizer.optimize(fgraph)
File "/home/granger/dev/Theano/theano/gof/opt.py", line 97, in optimize
ret = self.apply(fgraph, *args, **kwargs)
File "/home/granger/dev/Theano/theano/gof/opt.py", line 2513, in apply
lopt_change = self.process_node(fgraph, node, lopt)
File "/home/granger/dev/Theano/theano/gof/opt.py", line 2039, in process_node
lopt, node)
File "/home/granger/dev/Theano/theano/gof/opt.py", line 1933, in warn_inplace
return NavigatorOptimizer.warn(exc, nav, repl_pairs, local_opt, node)
File "/home/granger/dev/Theano/theano/gof/opt.py", line 1919, in warn
raise exc [210/1233]
File "/home/granger/dev/Theano/theano/gof/opt.py", line 2034, in process_node
replacements = lopt.transform(node)
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 4989, in transform
num, denum = self.simplify(list(orig_num), list(orig_denum), out.type)
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 4833, in simplify
out_type=out_type)
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 4919, in simplify_constants
out_type=out_type)
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 6328, in add_calculate
v = reduce(np.add, num, zero) - reduce(np.add, denum, zero)
TypeError: numpy boolean subtract, the `-` operator, is deprecated, use the bitwise_xor, the `^` operator, or the logical_xor function instead.
-------------------- >> begin captured logging << --------------------
theano.gof.opt: ERROR: Optimization failure due to: local_add_canonizer
theano.gof.opt: ERROR: node: Elemwise{add,no_inplace}(InplaceDimShuffle{0,1,x}.0, InplaceDimShuffle{x,0,1}.0)
theano.gof.opt: ERROR: TRACEBACK:
theano.gof.opt: ERROR: Traceback (most recent call last):
File "/home/granger/dev/Theano/theano/gof/opt.py", line 2034, in process_node
replacements = lopt.transform(node)
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 4989, in transform
num, denum = self.simplify(list(orig_num), list(orig_denum), out.type)
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 4833, in simplify
out_type=out_type)
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 4919, in simplify_constants
out_type=out_type)
File "/home/granger/dev/Theano/theano/tensor/opt.py", line 6328, in add_calculate
v = reduce(np.add, num, zero) - reduce(np.add, denum, zero)
TypeError: numpy boolean subtract, the `-` operator, is deprecated, use the bitwise_xor, the `^` operator, or the logical_xor function instead.
.....
.....
..... > more error output
# Author: Nicolas Granger <nicolas.granger.m@gmail.com>
#
# Implements the connectionist temporal classification loss from:
# Graves, A., Fernández, S., Gomez, F., & Schmidhuber, J. (2006, June).
# Connectionist temporal classification: labelling unsegmented sequence data
# with recurrent neural networks. In Proceedings of the 23rd international
# conference on Machine learning (pp. 369-376). ACM.
# ftp://ftp.idsia.ch/pub/juergen/icml2006.pdf
import numpy as np
import theano
import theano.tensor as T
from theano.tensor import discrete_dtypes, continuous_dtypes
def isneginf(x, neginf=-1e9):
return x < neginf
def logaddexp(x, y, magnitude=20):
x, y = T.minimum(x, y), T.maximum(x, y)
diff = T.minimum(y - x, magnitude)
res = x + T.log(1 + T.exp(diff))
return T.switch((y - x > magnitude), y, res)
def logsumexp(x, axis, keepdims=False):
k = T.max(x, axis=axis, keepdims=True)
res = T.log(T.sum(T.exp(x - k), axis=axis, keepdims=keepdims)) + k
return T.switch(isneginf(k), -2e9, res)
def log_softmax(X, axis=-1, clip=None):
k = T.max(X, axis=axis, keepdims=True)
norm_X = X - k
if clip is not None:
norm_X = T.maximum(norm_X, clip)
log_sum_exp_X = logsumexp(norm_X, axis=axis, keepdims=True)
return norm_X - log_sum_exp_X
# Bits of the CTC algorithm ---------------------------------------------------
def insert_alternating_blanks(labels, blank_label):
batch_size, label_size = labels.shape
blanked_labels = T.zeros((batch_size, 2 * label_size + 1), dtype=np.int32)
blanked_labels = T.set_subtensor(blanked_labels[:, 0::2], blank_label)
blanked_labels = T.set_subtensor(blanked_labels[:, 1:-1:2], labels)
return blanked_labels
def ctc_forward(log_odds, seq_sizes,
blanked_labels, label_sizes, not_repeated):
seqsize, batch_sz, _ = log_odds.shape
label_size = blanked_labels.shape[1]
def step(t, a_tm1, log_odds_,
seq_sizes_, blanked_labels_, label_sizes_, not_repeated_):
y_t = log_odds_[t]
a_t = a_tm1
a_t = T.set_subtensor(
a_t[:, 1:],
logaddexp(a_t[:, 1:], a_tm1[:, :-1]))
a_t = T.set_subtensor(
a_t[:, 2:],
logaddexp(a_t[:, 2:], T.switch(not_repeated_, a_tm1[:, :-2], -2e9)))
# stop after a_T(|l'|)
mask = T.ge(t, seq_sizes_)[:, None] \
+ T.ge(T.arange(label_size)[None, :],
2 * label_sizes_[:, None] + 1)
a_t = T.switch(
isneginf(a_t) + mask, -2e9,
a_t + y_t[T.arange(batch_sz)[:, None], blanked_labels_])
return a_t
alpha_init = -2e9 * T.ones((batch_sz, label_size))
alpha_init = T.set_subtensor(alpha_init[:, 0], 0)
alphas, _ = theano.scan(
fn=step,
sequences=[T.arange(seqsize)],
outputs_info=alpha_init,
non_sequences=[log_odds, seq_sizes, blanked_labels, label_sizes,
not_repeated],
name="ctc_forward")
return alphas
def ctc_backward(log_odds, seq_sizes,
blanked_labels, label_sizes, not_repeated):
seqsize, batch_sz, _ = log_odds.shape
label_size = blanked_labels.shape[1]
def step(t, b_tp1, log_odds_,
seq_sizes_, blanked_labels_, label_sizes_, not_repeated_):
y_t = log_odds_[t]
# increase b_{T+1}(|l'|) from 0 to 1 to initialize recursion
starter_t = T.eq(t, seq_sizes_ - 1)[:, None] \
* T.eq((2 * label_sizes_)[:, None],
T.arange(label_size)[None, :])
b_tp1_2lp1 = b_tp1[T.arange(batch_sz), 2 * label_sizes_]
b_tp1 = T.set_subtensor(
b_tp1_2lp1,
T.switch(T.eq(t, seq_sizes_ - 1), 0, b_tp1_2lp1))
b_tp1 = T.switch(starter_t, 0, b_tp1) # initialize recursion
b_t = b_tp1
b_t = T.set_subtensor(
b_t[:, :-1],
logaddexp(b_t[:, :-1], b_tp1[:, 1:]))
b_t = T.set_subtensor(
b_t[:, :-2],
logaddexp(b_t[:, :-2], T.switch(not_repeated_, b_tp1[:, 2:], -2e9)))
b_t += y_t[T.arange(batch_sz)[:, None], blanked_labels_]
b_t = T.switch(isneginf(b_t), -2e9, b_t)
return b_t
beta_init = -2e9 * T.ones((batch_sz, label_size))
betas, _ = theano.scan(
fn=step,
sequences=[T.arange(seqsize)],
outputs_info=beta_init,
non_sequences=[log_odds, seq_sizes, blanked_labels, label_sizes,
not_repeated],
go_backwards=True,
name="ctc_backward")
betas = betas[::-1, :, :]
return betas
# Theano Op -------------------------------------------------------------------
def ctc_perform_graph(linout, seq_sizes, labels, label_sizes, blank):
_, batch_size, voca_size = linout.shape
logits = log_softmax(linout)
blanked_labels = insert_alternating_blanks(labels, blank)
not_repeated = T.neq(blanked_labels[:, 2:], blanked_labels[:, :-2])
betas = ctc_backward(logits, seq_sizes,
blanked_labels, label_sizes, not_repeated)
loss = - logaddexp(betas[0, :, 0], betas[0, :, 1])
# alphas = ctc_forward(logits, seq_sizes,
# blanked_labels, label_sizes, not_repeated)
# loss = - logaddexp(
# alphas[seq_sizes - 1, T.arange(batch_size), 2 * label_sizes - 1],
# alphas[seq_sizes - 1, T.arange(batch_size), 2 * label_sizes])
return logits, blanked_labels, not_repeated, betas, loss
def ctc_grad_graph(inputs, output_gradients):
linout, seq_durations, labels, label_sizes, _ = inputs
seq_size, batch_size, voca_size = linout.shape
label_size = labels.shape[1]
logits, blanked_labels, not_repeated, betas, loss = \
ctc_perform_graph(*inputs)
alphas = ctc_forward(logits, seq_durations,
blanked_labels, label_sizes, not_repeated)
# log(sum_{s \in lab(l, k)} a_t(s) b_t(s))
def fwbw_sum_step(k, s, labels_, ab_):
s_view = s[:, T.arange(batch_size), labels_[:, k]]
ab_view = ab_[:, :, k]
next_sum = logaddexp(s_view, ab_view)
s = T.set_subtensor(s_view, next_sum)
return s
ab = alphas + betas
fwbw_sum = theano.scan(
fn=fwbw_sum_step,
sequences=[T.arange(2 * label_size + 1)],
outputs_info=-2e9 * T.ones((seq_size, batch_size, voca_size)),
non_sequences=[blanked_labels, ab],
strict=True,
name="fwbw_sum")[0][-1]
A = loss[None, :, None] + logits \
+ logsumexp(fwbw_sum - logits, axis=2, keepdims=True)
B = loss[None, :, None] + fwbw_sum - logits
dloss_dy = T.exp(A) - T.exp(B)
dloss_dy = T.switch(T.all(isneginf(fwbw_sum), axis=2, keepdims=True),
0, dloss_dy)
return [dloss_dy * output_gradients[0][None, :, None],
theano.gradient.disconnected_type(),
theano.gradient.disconnected_type(),
theano.gradient.disconnected_type(),
theano.gradient.disconnected_type()]
def make_ctc_op():
preds_var = T.tensor3()
seq_durations_var = T.ivector()
labels_var = T.imatrix()
label_sizes_var = T.ivector()
blank_var = T.iscalar()
_, _, _, _, loss = ctc_perform_graph(
preds_var, seq_durations_var, labels_var,
label_sizes_var, blank_var)
return theano.OpFromGraph(
inputs=[preds_var, seq_durations_var,
labels_var, label_sizes_var, blank_var],
outputs=[loss],
grad_overrides=ctc_grad_graph,
inline=True, name="ctcLossOp")
CTCLossOp = make_ctc_op()
# -----------------------------------------------------------------------------
def ctc_loss(linout, durations, labels, label_sizes, blank=-1):
"""Compute the Connectionnist Temporal Classification loss [#graves2006]_.
.. math:: L = - ln\left( \sum_{\pi \in \mathcal{B}^{-1}(l)} P(\pi | y)
\right)
where :math:`y` is the sequence of predictions, :math:`l` the target
label sequence without blanks or repetition, :math:`\pi` is taken from the
ensemble of possible label assignments over the observations and
:math:`\mathcal{B}` is a function that remove blanks and repetitions for a
sequence of labels.
Parameters
----------
linout : Theano shared variable, expression or numpy array
The input values for the softmax function with shape
duration x batch_size x nclasses.
durations: Theano shared variable, expression or numpy array
An _integer_ vector of size batch_size contining the actual length of
each sequence in preds.
labels: Theano shared variable, expression or numpy array
An _integer_ matrix of size batch_size x label_size containing the
target labels.
label_sizes: Theano shared variable, expression or numpy array
An _integer_ vector of size batch_size containing the actual length of
each sequence in labels.
blank:
The blank label class, by default the last index.
Returns
-------
Theano tensor
A vector expression with the CTC loss of each sequence.
Reference
---------
.. [#graves2006] Graves, A., Fernández, S., Gomez, F., & Schmidhuber, J.
(2006, June). Connectionist temporal classification: labelling
unsegmented sequence data with recurrent neural networks. In
Proceedings of the 23rd international conference on Machine learning
(pp. 369-376). ACM. ftp://ftp.idsia.ch/pub/juergen/icml2006.pdf
"""
linout = T.as_tensor_variable(linout)
durations = T.as_tensor_variable(durations)
labels = T.as_tensor_variable(labels)
label_sizes = T.as_tensor_variable(label_sizes)
blank = T.cast(T.as_tensor_variable(blank), 'int32')
if not(linout.dtype in continuous_dtypes and linout.ndim == 3):
raise ValueError("preds must continuous with dimension 3")
if not (durations.dtype in discrete_dtypes and durations.ndim == 1):
raise ValueError("durations must be a integer vector")
if not (labels.dtype in discrete_dtypes and labels.ndim == 2):
raise ValueError("labels must be an integer matrix")
if not (label_sizes.dtype in discrete_dtypes and label_sizes.ndim == 1):
raise ValueError("label_sizes must be an integer vector")
if not (blank.dtype in discrete_dtypes and blank.ndim == 0):
raise ValueError("blank must be an integer value")
voca_size = T.cast(linout.shape[2], 'int32')
labels = labels % voca_size
blank = blank % voca_size
return CTCLossOp(linout, durations, labels, label_sizes, blank)
import unittest
import numpy as np
import theano
import theano.tensor as T
from theano.tests import unittest_tools
from ctc import ctc_loss, ctc_forward, ctc_backward, insert_alternating_blanks, \
isneginf, log_softmax
class TestCTC(unittest.TestCase):
def setUp(self):
unittest_tools.seed_rng()
def test_forward_backward(self):
batch_size = 6
label_size = 7
voca_size = 5
seq_size = 10
label_lengths = np.random.randint(0, label_size,
size=(batch_size,), dtype=np.int32)
label_lengths[0] = label_size # extremum case
label_lengths[1] = 0 # extremum case
labels = np.array(
[np.random.randint(0, voca_size - 1, size=label_size, dtype=np.int32)
for _ in range(batch_size)])
for i in range(batch_size):
labels[i, label_lengths[i]:] = -1
seq_durations = np.array([
np.random.randint(max(1, label_lengths[i]), seq_size)
for i in range(batch_size)], dtype=np.int32)
linear_out = np.random.randn(seq_size, batch_size, voca_size) \
.astype(np.float32)
blank_class = -1
blank_class = np.mod(blank_class, voca_size)
labels = np.mod(labels, voca_size)
log_odds = log_softmax(linear_out)
blanked_labels = insert_alternating_blanks(T.mod(labels, voca_size),
blank_class)
not_repeated = T.neq(blanked_labels[:, 2:], blanked_labels[:, :-2])
alphas = ctc_forward(log_odds, seq_durations,
blanked_labels, label_lengths, not_repeated)
betas = ctc_backward(log_odds, seq_durations,
blanked_labels, label_lengths, not_repeated)
preds = log_softmax(linear_out)
y_blanks = preds[:, T.arange(batch_size)[:, None], blanked_labels]
p_l = T.sum(T.exp(alphas + betas - y_blanks), axis=2)
alphas = alphas.eval()
betas = betas.eval()
preds = preds.eval()
for i in range(batch_size):
assert np.allclose(alphas[0, i, 0], preds[0, i, -1])
if label_lengths[i] > 0:
assert np.allclose(alphas[0, i, 1], preds[0, i, labels[i, 0]])
else:
assert isneginf(alphas[0, i, 1])
assert np.all(isneginf(alphas[0, i, 2:]))
for i in range(batch_size):
t = seq_durations[i] - 1
l = label_lengths[i]
assert np.allclose(betas[t, i, 2 * l], preds[t, i, -1])
if l > 0:
assert np.allclose(betas[t, i, 2 * l - 1],
preds[t, i, labels[i, l - 1]])
assert np.all(isneginf(betas[t, i, :max(l - 2, 0)]))
else:
assert np.all(isneginf(betas[t, i, 1:]))
p_l = p_l.eval()
for i in range(batch_size):
assert (np.allclose(p_l[:seq_durations[i], i], p_l[0, i]))
a, b = max(0, 2 * label_lengths[i] - 1), 2 * label_lengths[i] + 1
p_li = np.exp(alphas[seq_durations[i] - 1, i, a:b]).sum()
assert np.allclose(p_li, p_l[0, i])
p_li = np.exp(betas[0, i, :2]).sum()
assert np.allclose(p_li, p_l[0, i])
def test_simple_precomputed(self):
# Test obtained from Torch tutorial at:
# https://github.com/baidu-research/warp-ctc/blob/master/torch_binding/TUTORIAL.md
linear_out = np.asarray([
[[0, 0, 0, 0, 0], [1, 2, 3, 4, 5], [-5, -4, -3, -2, -1]],
[[0, 0, 0, 0, 0], [6, 7, 8, 9, 10], [-10, -9, -8, -7, -6]],
[[0, 0, 0, 0, 0], [11, 12, 13, 14, 15], [-15, -14, -13, -12, -11]]
], dtype=np.float32)
seq_sizes = np.asarray([1, 3, 3], dtype=np.int32)
labels = np.asarray([[1, 0], [3, 3], [2, 3]], dtype=np.int32)
label_sizes = np.asarray([1, 2, 2], dtype=np.int32)
expected_losses = np.asarray([1.609437943, 7.355742931, 4.938849926],
dtype=np.float32)
blank = 0
expected_grad = np.asarray([
[[0.2, -0.8, 0.2, 0.2, 0.2],
[ 0.01165623125, 0.03168492019, 0.08612854034, -0.7658783197, 0.636408627],
[-0.02115798369, 0.03168492019, -0.8810571432, 0.2341216654, 0.636408627]],
[[0, 0, 0, 0, 0],
[-0.9883437753, 0.03168492019, 0.08612854034, 0.2341216654, 0.636408627],
[-0.02115798369, 0.03168492019, -0.1891518533, -0.4577836394, 0.636408627]],
[[0, 0, 0, 0, 0],
[0.01165623125, 0.03168492019, 0.08612854034, -0.7658783197, 0.636408627],
[-0.02115798369, 0.03168492019, 0.08612854034, -0.7330639958, 0.636408627]]
], dtype=np.float32)
linear_out_var = T.as_tensor_variable(linear_out)
losses = ctc_loss(
linear_out_var, seq_sizes, labels, label_sizes, blank)
assert np.allclose(losses.eval(), expected_losses, atol=1)
grad = theano.grad(losses.sum(), wrt=linear_out_var)
assert np.allclose(grad.eval(), expected_grad, rtol=.001, atol=1)
def test_random(self):
batch_size = 16
label_size = 5
voca_size = 4
seq_size = 20
label_sizes = np.random.randint(
0, label_size, size=(batch_size,), dtype=np.int32)
label_sizes[0] = label_size
label_sizes[1] = 0
label_sizes[2] = 5
label_sizes[3] = 5
labels = np.random.randint(
0, voca_size - 1,
size=(batch_size, label_size), dtype=np.int32)
labels[3] = 0
seq_sizes = np.array([
np.random.randint(max(1, label_sizes[i]), seq_size)
for i in range(batch_size)], dtype=np.int32)
seq_sizes[2] = 4
linear_out = np.random.randn(
seq_size, batch_size, voca_size).astype(np.float32)
# check edge cases
# TODO
# check the gradient can be computed at all
linear_out_var = T.tensor3()
preds = T.nnet.softmax(
linear_out_var.reshape((-1, voca_size))
).reshape((seq_size, batch_size, voca_size))
g = theano.grad(ctc_loss(preds, seq_sizes,
labels, label_sizes).sum(),
wrt=linear_out_var).eval(
{linear_out_var: linear_out.astype(np.float32)})
assert not np.any(np.isnan(g))
# check correctness against finite difference approximation
def f(linear_out_):
preds_ = T.nnet.softmax(
linear_out_.reshape((-1, voca_size))
).reshape((seq_size, batch_size, voca_size))
loss = ctc_loss(preds_, seq_sizes, labels, label_sizes)
# prevent finite differences from failing
loss = T.switch(isneginf(-loss), 0, loss)
return loss
unittest_tools.verify_grad(f, [linear_out], abs_tol=0.05, rel_tol=0.05)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment