Merge pull request #6292 from nouiz/fix_ctc_test

Fix CTC tests in FAST_COMPILE

Merge pull request #6292 from nouiz/fix_ctc_test
a6b12aad · Frédéric Bastien · GitHub · dc9ac109 · 6c47fae4 · a6b12aad
--- a/doc/library/gpuarray/ctc.txt
+++ b/doc/library/gpuarray/ctc.txt
@@ -4,6 +4,13 @@
 :mod:`theano.gpuarray.ctc` -- Connectionist Temporal Classification (CTC) loss
 ================================================================================
+.. warning::
+    This is not the recomanded user interface. Use :ref:`the CPU
+    interface <libdoc_tensor_nnet_ctc>`. It will get moved
+    automatically to the GPU.
 .. note::
    Usage of connectionist temporal classification (CTC) loss Op, requires that

--- a/doc/library/tensor/nnet/ctc.txt
+++ b/doc/library/tensor/nnet/ctc.txt
@@ -12,6 +12,11 @@
    the ``config.ctc.root`` configuration option must be appropriately set to the
    directory containing the warp-ctc library files.
+.. note::
+   This interface is the prefered interface. It will be moved
+   automatically to the GPU.
 .. note::
    Unfortunately, Windows platforms are not yet supported by the underlying

--- a/doc/requirements.inc
+++ b/doc/requirements.inc
@@ -13,6 +13,7 @@ Requirements
 .. _libgpuarray: http://deeplearning.net/software/libgpuarray/installation.html
 .. _pycuda: https://mathema.tician.de/software/pycuda/
 .. _skcuda: http://scikit-cuda.readthedocs.io/en/latest/
+.. _warp-ctc: https://github.com/baidu-research/warp-ctc
    Python_ == 2.7* or ( >= 3.4 and < 3.6 )
        |PythonDistRecommended|. Python 2.4 was supported up to and including the
@@ -57,6 +58,12 @@ Requirements
        cusolver: ``pip install pycuda; pip install
        git+https://github.com/lebedov/scikit-cuda.git#egg=scikit-cuda``.
+    `warp-ctc`_
+        Required for :ref:`Theano CTC implementation
+        <libdoc_tensor_nnet_ctc>`. It is faster then using an
+        equivalent graph of Theano ops.
 Requirements installation through Conda (recommended)
 -----------------------------------------------------

--- a/theano/gpuarray/ctc.py
+++ b/theano/gpuarray/ctc.py
@@ -58,7 +58,8 @@ class GpuConnectionistTemporalClassification(gof.COp):
        return ["warpctc", "gpuarray"]
    def c_header_dirs(self):
-        dirs = [gpuarray_helper_inc_dir(), pygpu.get_include()]
+        dirs = [gpuarray_helper_inc_dir(), pygpu.get_include(),
+                config.cuda.include_path]
        if config.ctc.root != '':
            dirs.append(os.path.join(config.ctc.root, "include"))
        return dirs
@@ -163,7 +164,7 @@ def gpu_ctc(activations, labels, input_lengths):
 # Disable gradient computation if not needed
-@register_canonicalize
+@register_canonicalize("fast_compile")
 @local_optimizer([GpuConnectionistTemporalClassification])
 def local_gpu_ctc_no_grad(node):
    if isinstance(node.op, GpuConnectionistTemporalClassification):

--- a/theano/gpuarray/tests/test_ctc.py
+++ b/theano/gpuarray/tests/test_ctc.py
@@ -49,7 +49,7 @@ class TestCTC(unittest.TestCase):
            # Symbolic gradient of CTC cost
            gpu_ctc_grad = T.grad(T.mean(gpu_ctc_cost), activations)
            outputs += [gpu_ctc_grad]
-        return theano.function([], outputs)
+        return theano.function([], outputs, mode=mode_with_gpu)
    def check_expected_values(self, activations, labels, input_length, expected_costs, expected_grads):
        gpu_train = self.setup_gpu_op(activations, labels, input_length)
@@ -139,4 +139,4 @@ class TestCTC(unittest.TestCase):
        ctc_op = ctc_op_functor(labels, activation_times)
-        utt.verify_grad(ctc_op, [activations])
+        utt.verify_grad(ctc_op, [activations], mode=mode_with_gpu)
--- a/theano/tensor/nnet/ctc.py
+++ b/theano/tensor/nnet/ctc.py
@@ -224,7 +224,7 @@ def ctc(activations, labels, input_lengths):
 # Disable gradient computation if not needed
-@register_canonicalize
+@register_canonicalize('fast_compile')
 @local_optimizer([ConnectionistTemporalClassification])
 def local_ctc_no_grad(node):
    if isinstance(node.op, ConnectionistTemporalClassification):