grouped conv 3d python changes

e3124016 · affanv14 · notoraptor · c2052266 · e3124016
--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -627,8 +627,6 @@ class GpuDnnConv(DnnBase):
                                                    SUPPORTED_DNN_CONV_ALGO_RUNTIME):
            raise ValueError("convolution algo %s can't be used for "
                             "3d convolutions", (self.algo,))
-        if img.type.ndim == 5 and self.num_groups != 1:
-            raise ValueError("Grouped convolutions not implemented for 3D convolutions")
        if (not isinstance(desc.type, CDataType) or
                desc.type.ctype != 'cudnnConvolutionDescriptor_t'):
@@ -1062,7 +1060,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
 def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
               conv_mode='conv', direction_hint=None,
-               algo=None, precision=None):
+               algo=None, precision=None, num_groups=1):
    """
    GPU convolution using cuDNN from NVIDIA.
@@ -1119,7 +1117,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1
    fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
    ctx_name = infer_context_name(img, kerns)
    if (border_mode == 'valid' and subsample == (1, 1, 1) and dilation == (1, 1, 1) and
-            direction_hint == 'bprop weights'):
+            direction_hint == 'bprop weights' and num_groups == 1):
        # Special case: We are asked to use GpuDnnConvGradW. We need to set
        # up a suitable 'fake' convolution to compute the gradient for.
        img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3, 4))
@@ -1141,7 +1139,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1
        return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3, 4), ctx_name)
    elif (border_mode == 'full' and subsample == (1, 1, 1) and
-          direction_hint != 'forward!'):
+          direction_hint != 'forward!' and num_groups == 1):
        # Special case: We can be faster by using GpuDnnConvGradI to compute
        # the full convolution as the backward pass of a valid convolution.
        # We just need to set up a suitable 'fake' valid convolution.
@@ -1165,7 +1163,8 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1
    img = gpu_contiguous(img)
    kerns = gpu_contiguous(kerns)
    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
-                          conv_mode=conv_mode, precision=precision)(kerns.shape)
+                          conv_mode=conv_mode, precision=precision,
+                          num_groups=num_groups)(kerns.shape)
    desc_op = desc.owner.op
    # We can use Shape_i and bypass the infer_shape here as this is on
    # the input of node and it will always be present.
@@ -1177,7 +1176,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1
                                    filter_dilation=dilation)
    out_shp = assert_conv_shape(out_shp)
    out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
-    return GpuDnnConv(algo=algo)(img, kerns, out, desc)
+    return GpuDnnConv(algo=algo, num_groups=num_groups)(img, kerns, out, desc)
 def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
@@ -1202,12 +1201,14 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
 def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid',
-                     subsample=(1, 1, 1), dilation=(1, 1, 1), conv_mode='conv', precision=None):
+                     subsample=(1, 1, 1), dilation=(1, 1, 1), conv_mode='conv',
+                     precision=None, num_groups=1):
    """
    3d version of dnn_gradweight
    """
    return dnn_gradweight(img, topgrad, kerns_shp, border_mode,
-                          subsample, dilation, conv_mode, precision)
+                          subsample, dilation, conv_mode, precision,
+                          num_groups)
 def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
@@ -1232,12 +1233,14 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
 def dnn_gradinput3d(kerns, topgrad, img_shp, border_mode='valid',
-                    subsample=(1, 1, 1), dilation=(1, 1, 1), conv_mode='conv', precision=None):
+                    subsample=(1, 1, 1), dilation=(1, 1, 1), conv_mode='conv',
+                    precision=None, num_groups=1):
    """
    3d version of `dnn_gradinput`.
    """
    return dnn_gradinput(kerns, topgrad, img_shp, border_mode, subsample,
-                         dilation, conv_mode, precision)
+                         dilation, conv_mode, precision,
+                         num_groups)
 class GpuDnnPoolDesc(Op):
@@ -3028,8 +3031,6 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
    if version(raises=False) < 6000 and op.filter_dilation != (1, 1):
        return None
-    if op.num_groups > 1:
-        return None
    inp1 = inputs[0]
    inp2 = inputs[1]
@@ -3079,8 +3080,6 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
    if version(raises=False) < 6000 and op.filter_dilation != (1, 1, 1):
        return None
-    if op.num_groups > 1:
-        return None
    inp1 = inputs[0]
    inp2 = inputs[1]
@@ -3099,7 +3098,8 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
                          subsample=op.subsample,
                          dilation=op.filter_dilation,
                          direction_hint='forward!',
-                          conv_mode=conv_mode)
+                          conv_mode=conv_mode,
+                          num_groups=op.num_groups)
    elif isinstance(op, AbstractConv3d_gradWeights):
        shape = (inp2.shape[1], inp1.shape[1],
                 inputs[2][0], inputs[2][1], inputs[2][2])
@@ -3107,7 +3107,8 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
                                border_mode=op.border_mode,
                                subsample=op.subsample,
                                dilation=op.filter_dilation,
-                                conv_mode=conv_mode)
+                                conv_mode=conv_mode,
+                                num_groups=op.num_groups)
    elif isinstance(op, AbstractConv3d_gradInputs):
        shape = (inp2.shape[0], inp1.shape[1],
                 inputs[2][0], inputs[2][1], inputs[2][2])
@@ -3115,7 +3116,8 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
                               border_mode=op.border_mode,
                               subsample=op.subsample,
                               dilation=op.filter_dilation,
-                               conv_mode=conv_mode)
+                               conv_mode=conv_mode,
+                               num_groups=op.num_groups)
    return [rval]