Add basic prototype of GPU CTC wrapper

Signed-off-by: João Victor Tozatti Risso <joaovictor.risso@gmail.com>

Add basic prototype of GPU CTC wrapper
e2c9abc4 · João Victor Tozatti Risso · 388801f2 · e2c9abc4 · e2c9abc4 · e2c9abc4
--- a/theano/gpuarray/__init__.py
+++ b/theano/gpuarray/__init__.py
@@ -29,7 +29,7 @@ from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
                   GpuArraySharedVariable, gpuarray_shared_constructor,
                   reg_context, get_context, ContextNotDefined)
 from .basic_ops import as_gpuarray_variable
-from . import fft, dnn, opt, extra_ops, multinomial, reduction, rng_mrg
+from . import fft, dnn, opt, extra_ops, multinomial, reduction, rng_mrg, ctc


 def transfer(x, target):

--- a/theano/gpuarray/ctc.py
+++ b/theano/gpuarray/ctc.py
+from __future__ import absolute_import, print_function, division
+
+import numpy as np
+import theano
+from theano import Op
+from theano import config
+import theano.tensor as T
+from .basic_ops import (gpu_contiguous, as_gpuarray_variable,
+                        infer_context_name, CGpuKernelBase)
+import theano.tensor.nnet.ctc
+from .type import GpuArrayType
+from .opt import register_opt, op_lifter, register_opt2
+from theano.gradient import grad_undefined
+
+ctc_enabled = config.ctc.enabled
+
+
+class GpuConnectionistTemporalClassification(CGpuKernelBase, Op):
+    """
+    GPU wrapper for Baidu CTC loss function.
+    """
+    __props__ = ('context_name', 'compute_grad',)
+
+    func_file = "./ctc_wrapper.c"
+    func_name = "APPLY_SPECIFIC(ctc_cost_gpu)"
+
+    def __init__(self, compute_grad=True, context_name=None):
+        if not compute_grad:
+            self.func_name = "APPLY_SPECIFIC(ctc_cost_gpu_no_grad)"
+        self.compute_grad = compute_grad
+        self.context_name = context_name
+
+        CGpuKernelBase.__init__(self, self.func_file, self.func_name)
+
+        self.costs_type = GpuArrayType(dtype='float32',
+                                       broadcastable=(False,),
+                                       context_name=self.context_name)
+
+        if self.compute_grad:
+            self.grads_type = GpuArrayType(dtype='float32',
+                                           broadcastable=(False, False, False,),
+                                           context_name=self.context_name)
+
+        if config.ctc.root == "":
+            raise ValueError('ctc.root variable is not set, please set it '
+                             'to the root directory of the CTC library in '
+                             'your system.')
+
+    def c_lib_dirs(self):
+        dirs = []
+        if ctc_enabled:
+            # We assume here that the compiled library (libwarpctc.so) is available
+            # at the build directory of the CTC root directory.
+            dirs.append(os.path.join(config.ctc.root, "build"))
+        return dirs
+
+    def c_libraries(self):
+        return ["warpctc"]
+
+    def c_header_dirs(self):
+        dirs = []
+        if ctc_enabled:
+            # We assume here that the header is available at the include directory
+            # of the CTC root directory.
+            dirs.append(os.path.join(config.ctc.root, "include"))
+        return dirs + CGpuKernelBase.c_header_dirs(self)
+
+    def c_headers(self):
+        return ["ctc.h"] + CGpuKernelBase.c_headers(self)
+
+    def make_node(self, activations, labels, input_lengths):
+        if not ctc_enabled:
+            raise RuntimeError('Baidu CTC is not enabled and '
+                               'ConnectionistTemporalClassification Op '
+                               'can not be constructed.')
+
+        context = infer_context_name(activations, labels, input_lengths)
+        assert context == self.context_name
+
+        t_activations = as_gpuarray_variable(activations,
+                                             context_name=self.context_name)
+        # Ensure activations array is C-contiguous
+        t_activations = gpu_contiguous(t_activations)
+
+        t_labels = as_gpuarray_variable(labels, context_name=self.context_name)
+        t_input_lengths = as_gpuarray_variable(input_lengths,
+                                               context_name=self.context_name)
+
+        if t_activations.type.dtype != 'float32':
+            raise TypeError('Activations must use the float32 type!')
+
+        if t_labels.type.dtype != 'int32':
+            raise TypeError('Labels must use the int32 type!')
+
+        if t_input_lengths.type.dtype != 'int32':
+            raise TypeError('Label lengths must use the int32 type!')
+
+        # Return only the cost. Gradient will be returned by grad()
+        self.default_output = 0
+
+        out_params = [self.costs_type()]
+        if self.grads_type is not None:
+            out_params.append(self.grads_type())
+
+        return theano.Apply(self, inputs=[t_activations, t_labels, t_input_lengths],
+                            outputs=out_params)
+
+    def grad(self, inputs, output_grads):
+        return [grad_undefined(self, 0, inputs[0]),
+                grad_undefined(self, 1, inputs[1]),
+                grad_undefined(self, 2, inputs[2])]
+
+def ctc(activations, labels, input_lengths):
+    return GpuConnectionistTemporalClassification()(activations, labels,
+                                                    input_lengths)
\ No newline at end of file
--- a/theano/gpuarray/ctc_wrapper.c
+++ b/theano/gpuarray/ctc_wrapper.c
+#section support_code_struct
+
+int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject   *  in_activations,
+                                 PyGpuArrayObject   *  in_labels,
+                                 PyGpuArrayObject   *  in_input_lengths,
+                                 PyGpuArrayObject   ** out_costs,
+                                 PyGpuArrayObject   ** out_gradients,
+                                 PyGpuContextObject *  ctx)
+{
+   return 0;
+}
+
+int APPLY_SPECIFIC(ctc_cost_gpu_no_grad)(PyGpuArrayObject   *  in_activations,
+                                         PyGpuArrayObject   *  in_labels,
+                                         PyGpuArrayObject   *  in_input_lengths,
+                                         PyGpuArrayObject   ** out_costs,
+                                         PyGpuContextObject *  ctx)
+{
+    return APPLY_SPECIFIC(ctc_cost_gpu)(in_activations,
+                                        in_labels,
+                                        in_input_lengths,
+                                        out_costs,
+                                        NULL,
+                                        ctx);
+}
\ No newline at end of file
--- a/theano/gpuarray/tests/test_ctc.py
+++ b/theano/gpuarray/tests/test_ctc.py
+from __future__ import (division, absolute_import, print_function)
+
+import unittest
+import numpy as np
+
+import theano
+import theano.tensor as T
+from theano import config
+from theano.tests import unittest_tools as utt
+import theano.gpuarray
+from theano.gpuarray.ctc import (ctc_enabled, ctc)
+
+class TestCTC(unittest.TestCase):
+    def setUp(self):
+        if not ctc_enabled:
+            self.skipTest('Optional library warp-ctc not available')
+
+    def run_ctc(self, activations, labels, input_length, expected_costs, expected_grads):
+        # Check if softmax probabilites are approximately equal to the gradients
+        # of the activations, using utt.assert_allclose(a, b)
+
+        # Create symbolic variables
+        t_activations = theano.shared(activations, name="activations")
+        t_activation_times = theano.shared(input_length, name="activation_times")
+        t_labels = theano.shared(labels, name="labels")
+
+        t_cost = ctc(t_activations, t_labels, t_activation_times)
+        # Symbolic gradient of CTC cost
+        t_grad = T.grad(T.mean(t_cost), t_activations)
+        # Compile symbolic functions
+        train = theano.function([], [t_cost, t_grad])
+        test = theano.function([], [t_cost])
+
+        cost, grad = train()
+        test_cost, = test()
+
+        #utt.assert_allclose(expected_grads, grad)
+        #utt.assert_allclose(expected_costs, cost)
+
+    def simple_test(self):
+        activations = np.asarray([[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]],
+                                  [[0.6, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.5, 0.2, 0.1]]],
+                                 dtype='float32')
+
+        activation_times = np.asarray([2, 2], dtype='int32')
+
+        labels = np.asarray([[1, 2], [1, 2]], dtype='int32')
+
+        expected_costs = np.asarray([2.962858438, 3.053659201], dtype='float32')
+
+        grads = [[[0.177031219, -0.7081246376, 0.177031219, 0.177031219, 0.177031219],
+                  [0.177031219, -0.8229685426, 0.291875124, 0.177031219, 0.177031219]],
+                 [[0.291875124, 0.177031219, -0.8229685426, 0.177031219, 0.177031219],
+                  [0.1786672771, 0.1786672771, -0.7334594727, 0.1974578798, 0.1786672771]]]
+
+        expected_gradients = np.asarray(grads, dtype=np.float32)
+
+        self.run_ctc(activations, labels, activation_times, expected_costs, expected_gradients)