Merge pull request #2145 from abergeron/dnn_conv_doc

Dnn conv doc

Merge pull request #2145 from abergeron/dnn_conv_doc
eabfd16f · Frédéric Bastien · 22788242 · 203ac939 · eabfd16f · eabfd16f
--- a/doc/library/tensor/nnet/conv.txt
+++ b/doc/library/tensor/nnet/conv.txt
@@ -95,12 +95,16 @@ TODO: Give examples on how to use these things! They are pretty complicated.
          f = theano.function(..., mode=mode)
-    - :func:`GpuDnnConv <theano.sandbox.cuda.dnn.GpuDnnConv>` GPU-only
+    - :func:`dnn_conv <theano.sandbox.cuda.dnn.dnn_conv>` GPU-only
-      convolution using NVIDIA's cuDNN library.  To enable it (and
+      convolution using NVIDIA's cuDNN library.  To have conv2d()
-      other cudnn-accelerated ops), set
+      automatically converted set
      ``THEANO_FLAGS=optimizer_including=cudnn`` in your environment.
-      This requires that you have cuDNN installed and available.  It
+      This will also replace other operations by their a
-      also requires a GPU with compute capability 3.0 or more.
+      cuDNN-accelerated equivalent.  This requires that you have cuDNN
+      installed and available.  It requires a GPU with compute
+      capability 3.0 or more.
+      Since it has a gradient defined it can also be used manually.
    - :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`
      3D Convolution applying multi-channel 3D filters to batches of
@@ -146,6 +150,7 @@ TODO: Give examples on how to use these things! They are pretty complicated.
 .. autofunction:: theano.tensor.nnet.conv.conv2d
 .. autofunction:: theano.sandbox.cuda.fftconv.conv2d_fft
 .. autofunction:: theano.sandbox.cuda.blas.GpuCorrMM
+.. autofunction:: theano.sandbox.cuda.dnn.dnn_conv
 .. autofunction:: theano.tensor.nnet.Conv3D.conv3D
 .. autofunction:: theano.sandbox.cuda.fftconv.conv3d_fft
 .. autofunction:: theano.tensor.nnet.conv3d2d.conv3d
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -7,7 +7,8 @@ from theano.gof.type import CDataType
 from theano.compat import PY3
 from theano.compat.six import StringIO
 from theano.sandbox.cuda.type import CudaNdarrayType
-from theano.sandbox.cuda import GpuOp, active_device_number, device_properties
+from theano.sandbox.cuda import (GpuOp, cuda_available, active_device_number,
+                                 device_properties)
 from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
                                           gpu_contiguous)
 from theano.sandbox.cuda.blas import GpuConv
@@ -376,12 +377,24 @@ class GpuDnnConvGradI(GpuDnnConvBase):
    conv_op = 'cudnnConvolutionBackwardData'
-from theano.sandbox.cuda.opt import (local_optimizer, gpu_contiguous,
-                                     gpu_optimizer)
 def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
             conv_mode='conv'):
+    """
+    GPU convolution using cuDNN from NVIDIA.
+    The memory layout to use is 'bc01', that is 'batch', 'channel',
+    'first dim', 'second dim' in that order.
+    :param img: images to do the convolution over
+    :param kerns: convolution filters
+    :param border_mode: one of 'valid', 'full' (default: 'valid')
+    :param subsample: perform subsampling of the output (default: (1, 1))
+    :param conv_mode: perform convolution (kernels flipped) or cross-correlation.  One of 'conv', 'cross'. (default: 'conv')
+    :warning: The cuDNN library only works with GPU that have a compute
+      capability of 3.0 or higer.  This means that older GPU will not
+      work with this Op.
+    """
    img = gpu_contiguous(img)
    kerns = gpu_contiguous(kerns)
    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
@@ -389,20 +402,6 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
    return GpuDnnConv()(img, kerns, desc)
-@local_optimizer([GpuConv])
-def local_conv_dnn(node):
-    if isinstance(node.op, GpuConv):
-        if node.op.border_mode not in ['full', 'valid']:
-            return
-        img, kern = node.inputs
-        border_mode = node.op.border_mode
-        subsample = node.op.subsample
-        return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
-                         border_mode=border_mode, subsample=subsample)]
-gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
 class GpuDnnSoftmax(DnnBase):
    """
    Op for the cuDNN Softmax.
@@ -555,12 +554,32 @@ err%(name)s = cudnnSoftmaxForward(
        return (0, 3)
-@local_optimizer([GpuSoftmax])
+# We need this since other stuff from opt is not importable.
-def local_softmax_dnn(node):
+if cuda_available:
-    if isinstance(node.op, GpuSoftmax):
-        ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
+    from theano.sandbox.cuda.opt import (local_optimizer, gpu_contiguous,
-        out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins))
+                                         gpu_optimizer)
-        out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
-        return [out]
+    @local_optimizer([GpuConv])
+    def local_conv_dnn(node):
+        if isinstance(node.op, GpuConv):
+            if node.op.border_mode not in ['full', 'valid']:
+                return
+            img, kern = node.inputs
+            border_mode = node.op.border_mode
+            subsample = node.op.subsample
+            return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
+                             border_mode=border_mode, subsample=subsample)]
+    gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
+    @local_optimizer([GpuSoftmax])
+    def local_softmax_dnn(node):
+        if isinstance(node.op, GpuSoftmax):
+            ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
+            out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins))
+            out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
+            return [out]
-gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn')
+    gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn')