update abstract dnn conv implementation

a9647f76 · Nicolas Ballas · Pascal Lamblin · d15656b7 · a9647f76 · a9647f76
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -1276,6 +1276,57 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
    return GpuDnnConv3d(algo=algo)(img, kerns, out, desc)


+def dnn_gradweight(img, topgrad,
+                   kerns_shp,
+                   border_mode='valid', subsample=(1, 1),
+                   conv_mode='conv', workmem=None):
+    """
+    GPU convolution gradient with respect to weight using cuDNN from NVIDIA.
+
+    The memory layout to use is 'bc01', that is 'batch', 'channel',
+    'first dim', 'second dim' in that order.
+
+    FIXME parameters doc
+
+    :warning: The cuDNN library only works with GPU that have a compute
+      capability of 3.0 or higer.  This means that older GPU will not
+      work with this Op.
+    """
+
+    img = gpu_contiguous(img)
+    topgrad = gpu_contiguous(topgrad)
+    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
+                          conv_mode=conv_mode)(img.shape, kerns_shp)
+
+    out = gpu_alloc_empty(*kern_shp)
+    return GpuDnnConvGradW(workmem=workmem)(img, topgrad, out, desc)
+
+def dnn_gradinput(kerns, topgrad,
+                  img_shape,
+                  border_mode='valid', subsample=(1, 1),
+                  conv_mode='conv', workmem=None):
+    """
+    GPU convolution gradient with respect to input using cuDNN from NVIDIA.
+
+    The memory layout to use is 'bc01', that is 'batch', 'channel',
+    'first dim', 'second dim' in that order.
+
+    FIXME parameters doc
+
+    :warning: The cuDNN library only works with GPU that have a compute
+      capability of 3.0 or higer.  This means that older GPU will not
+      work with this Op.
+    """
+
+    kerns = gpu_contiguous(kerns)
+    topgrad = gpu_contiguous(topgrad)
+    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
+                          conv_mode=conv_mode)(img_shp, kerns.shape)
+
+    out = gpu_alloc_empty(*img_shp)
+    return GpuDnnConvGradI(workmem=workmem)(kerns, topgrad, out, desc)
+
+
 class GpuDnnPoolDesc(GpuOp):
    """
    This Op builds a pooling descriptor for use in the other pooling operations.

--- a/theano/tensor/nnet/abstract_conv2d.py
+++ b/theano/tensor/nnet/abstract_conv2d.py
@@ -393,7 +393,7 @@ def local_conv2d_gpu_conv(node):
            out.values_eq_approx = values_eq_approx_high_tol
            return [as_tensor_variable(out)]
 # We register the optimizer that moves convolutions to the GPU.
-#register_gpu()(local_conv2d_gpu_conv)
+register_gpu()(local_conv2d_gpu_conv)



@@ -424,20 +424,20 @@ def local_conv2d_cudnn(node):
                        conv_mode = conv_mode)
        return [rval]
    if (isinstance(node.op, AbstractConv2d_gradWeights)):
-        rval = dnn_conv(inp1.dimshuffle(1, 0, 2, 3), inp2,
-                        border_mode=node.op.border_mode,
-                        subsample=node.op.subsample,
-                        direction_hint='bprop weights',
-                        conv_mode = conv_mode)
+        shape = node.inputs[2]
+        rval = dnn_gradweight(inp1, inp2, shape,
+                              border_mode=node.op.border_mode,
+                              subsample=node.op.subsample,
+                              conv_mode = conv_mode)
        return [rval]
    if (isinstance(node.op, AbstractConv2d_gradInputs)):
-        rval = dnn_conv(inp1, inp2,
-                        border_mode=node.op.border_mode,
-                        subsample=node.op.subsample,
-                        direction_hint='bprop inputs',
-                        conv_mode = conv_mode)
+        shape = node.inputs[2]
+        rval = dnn_gradinput(inp1, inp2, shape
+                             border_mode=node.op.border_mode,
+                             subsample=node.op.subsample,
+                             conv_mode = conv_mode)
        return [rval]
-#register_specialize_device(local_conv2d_cudnn)
+register_specialize_device(local_conv2d_cudnn)


 @local_optimizer([AbstractConv2d])