Merge pull request #6190 from affanv14/fix

Update warnings and fix errors

Merge pull request #6190 from affanv14/fix
4372225d · abergeron · GitHub · 9a7c799b · 033f563e · 4372225d
--- a/theano/gpuarray/corr_gemm.c
+++ b/theano/gpuarray/corr_gemm.c
@@ -417,6 +417,11 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
                "GpuCorrMM images and kernel must have the same stack size\n");
        return NULL;
    }
+    if ((nFilters % numgroups) != 0) {
+        PyErr_SetString(PyExc_ValueError,
+                "GPUCorrMM the number of filters must be divisible by the number of groups\n");
+        return NULL;
+    }
    // implicit dilated filter
    const size_t dil_kH = (kH - 1) * dilH + 1;
    const size_t dil_kW = (kW - 1) * dilW + 1;
@@ -440,7 +445,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
                "  weight shape: %ld %ld %ld %ld\n"
                "  top shape: %ld %ld %ld %ld (expected %ld %ld %ld %ld)\n",
                batchSize, nChannels, bottomHeight, bottomWidth,
-                nFilters, nChannels, kH, kW,
+                nFilters, nChannels / numgroups, kH, kW,
                PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
                PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3],
                batchSize, nFilters, topHeight, topWidth);

--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -994,7 +994,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
    fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
    ctx_name = infer_context_name(img, kerns)
    if (border_mode == 'valid' and subsample == (1, 1) and dilation == (1, 1) and
-            direction_hint == 'bprop weights'):
+            direction_hint == 'bprop weights' and num_groups == 1):
        # Special case: We are asked to use GpuDnnConvGradW. We need to set
        # up a suitable 'fake' convolution to compute the gradient for.
        img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3))
@@ -1015,7 +1015,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
        return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name)

    elif (border_mode == 'full' and subsample == (1, 1) and dilation == (1, 1) and
-          direction_hint != 'forward!'):
+          direction_hint != 'forward!' and num_groups == 1):
        # Special case: We can be faster by using GpuDnnConvGradI to compute
        # the full convolution as the backward pass of a valid convolution.
        # We just need to set up a suitable 'fake' valid convolution.
@@ -1119,7 +1119,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1
        if conv_mode == 'conv':
            # We need to flip manually. These 'kerns' are not the kernels
            # that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
-            kerns = kerns[:, :, ::-1, ::-1]
+            kerns = kerns[:, :, ::-1, ::-1, ::-1]
        kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3, 4))
        out_shp = (shape_i(kerns, 1, fgraph),
                   shape_i(img, 1, fgraph),

--- a/theano/gpuarray/dnn_fwd.c
+++ b/theano/gpuarray/dnn_fwd.c
@@ -30,6 +30,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
 		    "images and kernel must have the same stack size");
    return 1;
  }
+  if ((PyGpuArray_DIMS(kerns)[0] % params->num_groups) != 0) {
+    PyErr_SetString(PyExc_ValueError,
+		    "Number of filters must be divisible by number of groups");
+    return 1;
+  }

  switch (input->ga.typecode) {
  case GA_DOUBLE:

--- a/theano/gpuarray/dnn_gi.c
+++ b/theano/gpuarray/dnn_gi.c
@@ -29,6 +29,11 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
                    "stack size");
    return 1;
  }
+  if ((PyGpuArray_DIMS(kerns)[0] % params->num_groups) != 0) {
+    PyErr_SetString(PyExc_ValueError,
+		    "Number of filters must be divisible by number of groups");
+    return 1;
+  }

  switch (im->ga.typecode) {
  case GA_DOUBLE:

--- a/theano/gpuarray/dnn_gw.c
+++ b/theano/gpuarray/dnn_gw.c
@@ -29,6 +29,11 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
                    "GpuDnnConv images and kernel must have the same stack size");
    return 1;
  }
+  if ((PyGpuArray_DIMS(output)[1] % params->num_groups) != 0) {
+    PyErr_SetString(PyExc_ValueError,
+		    "Number of output channels must be divisible by number of groups");
+    return 1;
+  }

  switch (input->ga.typecode) {
  case GA_DOUBLE:

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -1588,7 +1588,8 @@ def local_abstractconv_gemm(node):
                (None not in node.op.imshp[-2:]) and
                (node.op.kshp is not None) and
                (None not in node.op.kshp) and
-                border_mode != "half"):
+                border_mode != "half" and
+                node.op.num_groups == 1):
            # we know the kernel and output size
            prod1 = node.op.kshp[0] * node.op.kshp[1]
            prod2 = ((node.op.imshp[-2] - node.op.kshp[0] + 1) *

--- a/theano/tensor/nnet/corr_gemm.c
+++ b/theano/tensor/nnet/corr_gemm.c
@@ -161,6 +161,11 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
                "CorrMM images and kernel must have the same stack size\n");
        return NULL;
    }
+    if ((nFilters %% numgroups) != 0) {
+        PyErr_SetString(PyExc_ValueError,
+                "CorrMM the number of filters must be divisible by the number of groups\n");
+        return NULL;
+    }
    // implicit dilated filter
    const int dil_kH = (kH - 1) * dilH + 1;
    const int dil_kW = (kW - 1) * dilW + 1;
@@ -184,7 +189,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
                "  weight shape: %%d %%d %%d %%d\n"
                "  top shape: %%ld %%ld %%ld %%ld (expected %%d %%d %%d %%d)\n",
                batchSize, nChannels, bottomHeight, bottomWidth,
-                nFilters, nChannels, kH, kW,
+                nFilters, nChannels / numgroups, kH, kW,
                PyArray_DIMS(top)[0], PyArray_DIMS(top)[1],
                PyArray_DIMS(top)[2], PyArray_DIMS(top)[3],
                batchSize, nFilters, topHeight, topWidth);