Fix error in the return value of gradient computations

上级 b390f484
from __future__ import (division, absolute_import, print_function) from __future__ import (division, absolute_import, print_function)
import os
import numpy as np import numpy as np
import theano.tensor as T import theano.tensor as T
from theano import config from theano import config
...@@ -9,8 +10,6 @@ from theano.tensor.opt import register_stabilize ...@@ -9,8 +10,6 @@ from theano.tensor.opt import register_stabilize
from theano.tensor.extra_ops import cpu_contiguous from theano.tensor.extra_ops import cpu_contiguous
from theano.gradient import grad_undefined from theano.gradient import grad_undefined
import os
ctc_enabled = config.ctc.enabled ctc_enabled = config.ctc.enabled
...@@ -71,6 +70,15 @@ class ConnectionistTemporalClassification(gof.COp): ...@@ -71,6 +70,15 @@ class ConnectionistTemporalClassification(gof.COp):
t_labels = T.as_tensor_variable(labels) t_labels = T.as_tensor_variable(labels)
t_input_lengths = T.as_tensor_variable(input_lengths) t_input_lengths = T.as_tensor_variable(input_lengths)
if t_activations.type.dtype != 'float32':
raise TypeError('Activations must use the float32 type!')
if t_labels.type.dtype != 'int32':
raise TypeError('Labels must use the int32 type!')
if t_input_lengths.type.dtype != 'int32':
raise TypeError('Label lengths must use the int32 type!')
# Return only the cost. Gradient will be returned by grad() # Return only the cost. Gradient will be returned by grad()
self.default_output = 0 self.default_output = 0
...@@ -82,11 +90,7 @@ class ConnectionistTemporalClassification(gof.COp): ...@@ -82,11 +90,7 @@ class ConnectionistTemporalClassification(gof.COp):
raise RuntimeError('Baidu CTC is not enabled and ' raise RuntimeError('Baidu CTC is not enabled and '
'ConnectionistTemporalClassification Op ' 'ConnectionistTemporalClassification Op '
'can not be constructed.') 'can not be constructed.')
# self.gradients.shape = [seqLen, batchSize, outputSize] return [self.gradients,
# output_grads[0].shape = [batchSize] (one cost per sequence)
# So, reshape output_grads to [1, batchSize, 1] for broadcasting
output_grad = output_grads[0].reshape((1, -1, 1))
return [output_grad * self.gradients,
grad_undefined(self, 1, inputs[1]), grad_undefined(self, 1, inputs[1]),
grad_undefined(self, 2, inputs[2])] grad_undefined(self, 2, inputs[2])]
......
...@@ -45,11 +45,11 @@ class TestCTC(unittest.TestCase): ...@@ -45,11 +45,11 @@ class TestCTC(unittest.TestCase):
[[0, 0, 0, 0, 0], [11, 12, 13, 14, 15], [-15, -14, -13, -12, -11]]], [[0, 0, 0, 0, 0], [11, 12, 13, 14, 15], [-15, -14, -13, -12, -11]]],
dtype=np.float32) dtype=np.float32)
# Duration of each sequence # Duration of each sequence
activation_times = np.asarray([1, 3, 3], dtype=np.int) activation_times = np.asarray([1, 3, 3], dtype=np.int32)
# Labels for each sequence # Labels for each sequence
labels = np.asarray([[1, -1], labels = np.asarray([[1, -1],
[3, 3], [3, 3],
[2, 3]], dtype=np.int) [2, 3]], dtype=np.int32)
expected_costs = np.asarray([1.609437943, 7.355742931, 4.938849926], expected_costs = np.asarray([1.609437943, 7.355742931, 4.938849926],
dtype=np.float32) dtype=np.float32)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论