Now by default, if cuDNN is available, always use it.

666e371d · Frederic · 03f42b36 · 666e371d · 666e371d · 666e371d
--- a/doc/library/sandbox/cuda/dnn.txt
+++ b/doc/library/sandbox/cuda/dnn.txt
@@ -6,8 +6,8 @@
 .. moduleauthor:: LISA
-Normally you should not call directly those Ops, but the CPU interface
+Normally you should not call directly GPU Ops, but the CPU interface
-currently don't allow all option supported by those ops, so it is
+currently do not allow all options supported by cuDNN ops. So it is
 possible that you need to call them manually.
 `cuDNN <https://developer.nvidia.com/cuDNN>`_ is NVIDIA library with
@@ -16,21 +16,20 @@ implementation of some operation like the convolution. cuDNN currently
 is not installed with CUDA 6.5. You must download it and install it
 yourself.
-To install it, decompress the downloaded file and make the *.h and
+To install it, decompress the downloaded file and make the ``*.h`` and
-*.so* files available to the compilation environment. On Linux, this
+``*.so*`` files available to the compilation environment. On Linux,
-can be done by setting the environment variable LD_LIBRARY_PATH,
+this can be done by setting the environment variable
-LIBRARY_PATH and CPATH to the uncompressed directory path. They work
+``LD_LIBRARY_PATH``, ``LIBRARY_PATH`` and ``CPATH`` to the
-the same way as PATH. Or you can copy the *.h files to /usr/include
+uncompressed directory path. They work the same way as ``PATH``. Or
-and the files *.so* to /lib64.
+you can copy the ``*.h`` files to ``/usr/include`` and the files
+``*.so*`` to ``/lib64``.
-Then you need to tell Theano to use it. For the convolution, if cuDNN
+By default, Theano will detect if it can use cuDNN. If so, it will use
-is available, we will use it by default, but not for other
+it.  If not, Theano optimization will not introduce cuDNN op. So
-operations. Also, it do not give you an error in case it can't use
+Theano will still work if the user did not introduce them manually.
-cuDNN as it will fall back to a slower and more memory hungry version.
-To enable the use of all cuDNN operation and get an error if we can't
-use cuDNN, use the Theano flags: ``optimizer_including=cudnn``.
+To get an error if Theano can not use cuDNN, use this Theano flags:
+``optimizer_including=cudnn``.
 Functions
@@ -39,8 +38,20 @@ Functions
 .. automodule:: theano.sandbox.cuda.dnn
    :members: dnn_conv, dnn_pool
-Ops
+Convolution Ops
-===
+===============
+.. automodule:: theano.sandbox.cuda.dnn
+    :members: GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI,
+Pooling Ops
+===========
+.. automodule:: theano.sandbox.cuda.dnn
+    :members: GpuDnnPoolDesc, GpuDnnPool, GpuDnnPoolGrad,
+Softmax Ops
+===========
 .. automodule:: theano.sandbox.cuda.dnn
-    :members: GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI, GpuDnnPoolDesc, GpuDnnPool, GpuDnnPoolGrad, GpuDnnSoftmax
+    :members: GpuDnnSoftmax, GpuDnnSoftmaxGrad
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -2,6 +2,7 @@ import os
 import theano
 from theano import Apply, tensor
+from theano.gof import Optimizer
 from theano.gof.type import CDataType
 from theano.compat import PY3
 from theano.sandbox.cuda.type import CudaNdarrayType
@@ -12,6 +13,7 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
 from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
                                      GpuDownsampleFactorMaxGrad)
 from theano.sandbox.cuda.nnet import GpuSoftmax
+from theano.sandbox.cuda.opt import register_opt
 from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
@@ -54,14 +56,6 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) {
        """ % dict(var=var, err=err, desc=desc, fail=fail)
-def raise_no_dnn():
-    """ Raise a RuntimeError if cudnn can't be used"""
-    if not dnn_available():
-        raise RuntimeError(
-            "cuDNN optimization was enabled, but cuDNN is not available. " +
-            dnn_available.msg)
 class DnnBase(GpuOp):
    """
    Creates a handle for cudnn and pulls in the cudnn libraries and headers.
@@ -981,11 +975,14 @@ err%(name)s = cudnnSoftmaxForward(
 # We need this since other stuff from opt is not importable.
 if cuda_available:
-    from theano.sandbox.cuda.opt import local_optimizer, gpu_optimizer
+    from theano.sandbox.cuda.opt import (
+        local_optimizer, gpu_optimizer, gpu_seqopt)
+    @register_opt('cudnn')
    @local_optimizer([GpuConv])
    def local_conv_dnn(node):
-        raise_no_dnn()
+        if not dnn_available():
+            return
        if isinstance(node.op, GpuConv):
            if node.op.border_mode not in ['full', 'valid']:
                return
@@ -995,10 +992,11 @@ if cuda_available:
            return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
                             border_mode=border_mode, subsample=subsample)]
-    gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
+    @register_opt('cudnn')
    @local_optimizer([GpuDownsampleFactorMax])
    def local_pool_dnn(node):
+        if not dnn_available():
+            return
        if isinstance(node.op, GpuDownsampleFactorMax):
            if node.op.ignore_border:
                return
@@ -1006,10 +1004,11 @@ if cuda_available:
            ds = node.op.ds
            return [dnn_pool(gpu_contiguous(img), ds, ds)]
-    gpu_optimizer.register("pool_cudnn", local_pool_dnn, 'cudnn')
+    @register_opt('cudnn')
    @local_optimizer([GpuDownsampleFactorMaxGrad])
    def local_pool_dnn_grad(node):
+        if not dnn_available():
+            return
        if isinstance(node.op, GpuDownsampleFactorMaxGrad):
            if node.op.ignore_border:
                return
@@ -1022,11 +1021,11 @@ if cuda_available:
                                     gpu_contiguous(inp_grad),
                                     gpu_contiguous(out), desc)]
-    gpu_optimizer.register("pool_cudnn_grad", local_pool_dnn_grad, 'cudnn')
+    @register_opt('cudnn')
    @local_optimizer([GpuSoftmax])
    def local_softmax_dnn(node):
-        raise_no_dnn()
+        if not dnn_available():
+            return
        if isinstance(node.op, GpuSoftmax):
            ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
            ins = gpu_contiguous(ins)
@@ -1034,4 +1033,11 @@ if cuda_available:
            out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
            return [out]
-    gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn')
+    class NoCuDNNRaise(Optimizer):
+        def apply(self, fgraph):
+            """ Raise a RuntimeError if cudnn can't be used"""
+            if not dnn_available():
+                raise RuntimeError(
+                    "cuDNN optimization was enabled, but cuDNN is not available. " +
+                    dnn_available.msg)
+    gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
+import logging
+import unittest
 from nose.plugins.skip import SkipTest
 import numpy
-import unittest
 import theano
+from theano.compat.six import StringIO
 from theano.gof.python25 import any
 import theano.tensor as T
 import theano.tests.unittest_tools as utt
@@ -85,7 +88,7 @@ def test_pooling_opt():
    f = theano.function(
        [x],
        max_pool_2d(x, ds=(2, 2)),
-        mode=mode_with_gpu.including("cudnn"))
+        mode=mode_with_gpu)
    assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
                for n in f.maker.fgraph.toposort()])
@@ -97,3 +100,36 @@ def test_pooling_opt():
    assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
                for n in f.maker.fgraph.toposort()])
+def test_dnn_tag():
+    """
+    We test that if cudnn isn't avail we crash and that if it is avail, we use it.
+    """
+    x = T.ftensor4()
+    old = theano.config.on_opt_error
+    theano.config.on_opt_error = "raise"
+    sio = StringIO()
+    handler = logging.StreamHandler(sio)
+    logging.getLogger('theano.compile.tests.test_dnn').addHandler(handler)
+    # Silence original handler when intentionnally generating warning messages
+    logging.getLogger('theano').removeHandler(theano.logging_default_handler)
+    raised = False
+    try:
+        f = theano.function(
+            [x],
+            max_pool_2d(x, ds=(2, 2)),
+            mode=mode_with_gpu.including("cudnn"))
+    except RuntimeError, e:
+        assert not cuda.dnn.dnn_available()
+        raised = True
+    finally:
+        theano.config.on_opt_error = old
+        logging.getLogger('theano.compile.tests.test_dnn').removeHandler(handler)
+        logging.getLogger('theano').addHandler(theano.logging_default_handler)
+    if not raised:
+        assert cuda.dnn.dnn_available()
+        assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
+                    for n in f.maker.fgraph.toposort()])