Merge pull request #6252 from Faruk-Ahmed/conv3d

phase out outdated conv3d

Merge pull request #6252 from Faruk-Ahmed/conv3d
d844e6c1 · Frédéric Bastien · GitHub · 4747cf44 · e3853c84 · d844e6c1
--- a/doc/library/tensor/nnet/conv.txt
+++ b/doc/library/tensor/nnet/conv.txt
@@ -125,9 +125,6 @@ TODO: Give examples on how to use these things! They are pretty complicated.
      ``THEANO_FLAGS=optimizer_excluding=conv_dnn`` in your environment.
      As dnn_conv has a gradient defined, you can also use it manually.
 - Implemented operators for neural network 3D / video convolution:
-    - :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`
-      3D Convolution applying multi-channel 3D filters to batches of
-      multi-channel 3D images. It does not flip the kernel.
    - :func:`GpuCorr3dMM <theano.gpuarray.blas.GpuCorr3dMM>`
      This is a GPU-only 3d correlation relying on a Toeplitz matrix
      and gemm implementation (see :func:`GpuCorrMM <theano.sandbox.cuda.blas.GpuCorrMM>`)
@@ -168,7 +165,6 @@ TODO: Give examples on how to use these things! They are pretty complicated.
 .. autofunction:: theano.tensor.nnet.conv2d
 .. autofunction:: theano.tensor.nnet.conv2d_transpose
 .. autofunction:: theano.tensor.nnet.conv3d
-.. autofunction:: theano.tensor.nnet.Conv3D.conv3D
 .. autofunction:: theano.tensor.nnet.conv3d2d.conv3d
 .. autofunction:: theano.tensor.nnet.conv.conv2d

--- a/theano/tensor/nnet/Conv3D.py
+++ b/theano/tensor/nnet/Conv3D.py
--- a/theano/tensor/nnet/ConvGrad3D.py
+++ b/theano/tensor/nnet/ConvGrad3D.py
--- a/theano/tensor/nnet/ConvTransp3D.py
+++ b/theano/tensor/nnet/ConvTransp3D.py
--- a/theano/tensor/nnet/__init__.py
+++ b/theano/tensor/nnet/__init__.py
@@ -22,9 +22,6 @@ from .nnet import (
    confusion_matrix, softsign)
 from . import opt
 from .conv import ConvOp
-from .Conv3D import *
-from .ConvGrad3D import *
-from .ConvTransp3D import *
 from .sigm import (softplus, sigmoid, sigmoid_inplace,
                   scalar_sigmoid, ultra_fast_sigmoid,
                   hard_sigmoid)

--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -854,35 +854,9 @@ class ConvOp(OpenMPOp):
            raise NotImplementedError('todo')
        if self.out_mode == 'valid' and (self.dx, self.dy) != (1, 1):
-            # Use the gradient as defined in conv3D, because the implementation
+            raise NotImplementedError(
-            # by Conv is slow (about 3x slower than conv3D, and probably 10x
+                "ERROR: ConvOp.grad is now disabled for 'valid' convolutions with"
-            # slower than it could be), and incorrect when dx or dy > 2.
+                " stride != (1, 1); call theano.tensor.nnet.conv2d() instead.")
-            # build a "node", that should be equivalent to the one given by
-            # self.make_node, but using conv3D instead of self.
-            shuffled_inputs = inputs.dimshuffle(0, 2, 3, 'x', 1)
-            if inputs.name is not None:
-                shuffled_inputs.name = 'shuffle_for_conv3D(%s)' % inputs.name
-            flipped_kerns = kerns[:, :, ::-1, ::-1]
-            if kerns.name is not None:
-                flipped_kerns.name = 'flipped(%s)' % kerns.name
-            shuffled_kerns = flipped_kerns.dimshuffle(0, 2, 3, 'x', 1)
-            if flipped_kerns.name is not None:
-                shuffled_kerns.name = 'shuffled_for_conv3D(%s)' % flipped_kerns.name
-            tmp_node = theano.tensor.nnet.conv3D(
-                V=shuffled_inputs,
-                W=shuffled_kerns,
-                b=theano.tensor.alloc(np.asarray(0, dtype=kerns.dtype),
-                                      kerns.shape[0]),
-                d=(self.dx, self.dy, 1))
-            node = theano.tensor.addbroadcast(
-                tmp_node, 3).dimshuffle(0, 4, 1, 2)
-            # mimic what happens inside theano.grad: get the input gradient
-            # of the final cost wrt all variables involved.
-            return theano.gradient.grad(cost=None, known_grads={node: gz},
-                                        wrt=[inputs, kerns])
        if self.dx not in (1, 2) or self.dy not in (1, 2):
            raise NotImplementedError(

--- a/theano/tensor/nnet/opt.py
+++ b/theano/tensor/nnet/opt.py
@@ -30,9 +30,6 @@ from theano.tensor import opt
 # Cpu implementation
 from theano.tensor.nnet.conv import conv2d, ConvOp
-from theano.tensor.nnet.Conv3D import conv3D
-from theano.tensor.nnet.ConvGrad3D import convGrad3D
-from theano.tensor.nnet.ConvTransp3D import convTransp3D
 @gof.local_optimizer([SparseBlockGemv], inplace=True)
@@ -257,39 +254,6 @@ def local_conv2d_cpu(node):
    return [rval]
-@local_optimizer([AbstractConv3d])
-def local_conv3d_cpu(node):
-    if not isinstance(node.op, AbstractConv3d):
-        return None
-    img, kern = node.inputs
-    if ((not isinstance(img.type, TensorType) or
-         not isinstance(kern.type, TensorType))):
-        return None
-    if node.op.border_mode not in ['valid', (0, 0, 0)]:
-        return None
-    if node.op.filter_dilation != (1, 1, 1):
-        return None
-    if node.op.num_groups > 1:
-        return None
-    bias = theano.tensor.zeros_like(kern[:, 0, 0, 0, 0])
-    # need to flip the kernel if necessary (conv3D does not flip)
-    if node.op.filter_flip:
-        kern = kern[:, :, ::-1, ::-1, ::-1]
-    # conv3D expects shape (batch, row, column, time, channel)
-    img = img.dimshuffle(0, 2, 3, 4, 1)
-    kern = kern.dimshuffle(0, 2, 3, 4, 1)
-    rval = conv3D(img, kern, bias, node.op.subsample)
-    copy_stack_trace(node.outputs[0], rval)
-    rval = rval.dimshuffle(0, 4, 1, 2, 3)
-    return [rval]
 @local_optimizer([AbstractConv2d_gradWeights])
 def local_conv2d_gradweight_cpu(node):
    if (not isinstance(node.op, AbstractConv2d_gradWeights) or
@@ -311,28 +275,7 @@ def local_conv2d_gradweight_cpu(node):
    if node.op.border_mode == 'valid' and \
            (node.op.subsample != (1, 1)):
-        # Use the gradient as defined in conv3D, because the implementation
+        return None
-        # by Conv is slow (about 3x slower than conv3D, and probably 10x
-        # slower than it could be), and incorrect when subsample > 2.
-        # build a "node", that should be equivalent to the one given by
-        # self.make_node, but using convGrad3D instead.
-        shuffled_img = img.dimshuffle(0, 2, 3, 'x', 1)
-        shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1)
-        rval = convGrad3D(V=shuffled_img,
-                          d=(node.op.subsample[0], node.op.subsample[1], 1),
-                          WShape=(shuffled_topgrad.shape[4],
-                                  shape[0], shape[1], 1,
-                                  shuffled_img.shape[4]),
-                          dCdH=shuffled_topgrad)
-        copy_stack_trace(node.outputs[0], rval)
-        rval = theano.tensor.addbroadcast(rval, 3)
-        rval = rval.dimshuffle(0, 4, 1, 2)
-        rval = rval[:, :, ::-1, ::-1]
-        rval = theano.tensor.patternbroadcast(rval,
-                                              node.outputs[0].broadcastable)
-        copy_stack_trace(node.outputs[0], rval)
-        return [rval]
    dx, dy = node.op.subsample
    if dx not in (1, 2) or dy not in (1, 2):
@@ -411,41 +354,6 @@ def local_conv2d_gradweight_cpu(node):
    return [res]
-@local_optimizer([AbstractConv3d_gradWeights])
-def local_conv3d_gradweight_cpu(node):
-    if not isinstance(node.op, AbstractConv3d_gradWeights):
-        return None
-    img, topgrad, shape = node.inputs
-    if ((not isinstance(img.type, TensorType) or
-         not isinstance(topgrad.type, TensorType))):
-        return None
-    if node.op.border_mode not in ['valid', (0, 0, 0)]:
-        return None
-    if node.op.filter_dilation != (1, 1, 1):
-        return None
-    if node.op.num_groups > 1:
-        return None
-    # conv3D expects shape (batch, row, column, time, channel)
-    img = img.dimshuffle(0, 2, 3, 4, 1)
-    topgrad = topgrad.dimshuffle(0, 2, 3, 4, 1)
-    W_shape = (topgrad.shape[4], shape[0], shape[1], shape[2], img.shape[4])
-    rval = convGrad3D(img, node.op.subsample, W_shape, topgrad)
-    copy_stack_trace(node.outputs[0], rval)
-    rval = rval.dimshuffle(0, 4, 1, 2, 3)
-    # need to flip the kernel if necessary (conv3D does not flip)
-    if node.op.filter_flip:
-        rval = rval[:, :, ::-1, ::-1, ::-1]
-    rval = theano.tensor.patternbroadcast(rval,
-                                          node.outputs[0].broadcastable)
-    return [rval]
 @local_optimizer([AbstractConv2d_gradInputs])
 def local_conv2d_gradinputs_cpu(node):
    if (not isinstance(node.op, AbstractConv2d_gradInputs) or
@@ -467,22 +375,8 @@ def local_conv2d_gradinputs_cpu(node):
    # Conv 3d implementation, needed when subsample > 2
    if node.op.border_mode == 'valid' and node.op.subsample != (1, 1):
-        kern = kern[:, :, ::-1, ::-1]
+        # The op don't support that anymore.
-        shuffled_kern = kern.dimshuffle(0, 2, 3, 'x', 1)
+        return False
-        shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1)
-        b = theano.tensor.zeros_like(shuffled_kern[0, 0, 0, 0, :])
-        rval = convTransp3D(W=shuffled_kern, b=b,
-                            d=(node.op.subsample[0], node.op.subsample[1], 1),
-                            H=shuffled_topgrad,
-                            RShape=(shape[0], shape[1], 1))
-        copy_stack_trace(node.outputs[0], rval)
-        rval = theano.tensor.addbroadcast(rval, 3)
-        rval = rval.dimshuffle(0, 4, 1, 2)
-        rval = theano.tensor.patternbroadcast(rval,
-                                              node.outputs[0].broadcastable)
-        copy_stack_trace(node.outputs[0], rval)
-        return [rval]
    # Conv2d Implementation
    dx, dy = node.op.subsample
@@ -538,40 +432,6 @@ def local_conv2d_gradinputs_cpu(node):
    return [din]
-@local_optimizer([AbstractConv3d_gradInputs])
-def local_conv3d_gradinputs_cpu(node):
-    if not isinstance(node.op, AbstractConv3d_gradInputs):
-        return None
-    kern, topgrad, shape = node.inputs
-    if ((not isinstance(kern.type, TensorType) or
-         not isinstance(topgrad.type, TensorType))):
-        return None
-    if node.op.border_mode not in ['valid', (0, 0, 0)]:
-        return None
-    if node.op.filter_dilation != (1, 1, 1):
-        return None
-    if node.op.num_groups > 1:
-        return None
-    # need to flip the kernel if necessary (conv3D does not flip)
-    if node.op.filter_flip:
-        kern = kern[:, :, ::-1, ::-1, ::-1]
-    # conv3D expects shape (batch, row, column, time, channel)
-    kern = kern.dimshuffle(0, 2, 3, 4, 1)
-    topgrad = topgrad.dimshuffle(0, 2, 3, 4, 1)
-    bias = theano.tensor.zeros_like(kern[0, 0, 0, 0, :])
-    rval = convTransp3D(kern, bias, node.op.subsample, topgrad, shape)
-    copy_stack_trace(node.outputs[0], rval)
-    rval = rval.dimshuffle(0, 4, 1, 2, 3)
-    rval = theano.tensor.patternbroadcast(rval,
-                                          node.outputs[0].broadcastable)
-    return [rval]
 # Register Cpu Optmization
 conv_groupopt = theano.gof.optdb.LocalGroupDB()
 conv_groupopt.__name__ = "conv_opts"
@@ -595,6 +455,7 @@ conv_groupopt.register('local_abstractconv3d_gradweight_gemm',
 conv_groupopt.register('local_abstractconv3d_gradinputs_gemm',
                       local_abstractconv3d_gradinputs_gemm, 30,
                       'conv_gemm', 'fast_compile', 'fast_run')
 # Legacy convolution
 conv_groupopt.register('local_conv2d_cpu', local_conv2d_cpu, 40,
                       'fast_compile', 'fast_run')
@@ -604,14 +465,6 @@ conv_groupopt.register('local_conv2d_gradweight_cpu',
 conv_groupopt.register('local_conv2d_gradinputs_cpu',
                       local_conv2d_gradinputs_cpu, 40,
                       'fast_compile', 'fast_run')
-conv_groupopt.register('local_conv3d_cpu', local_conv3d_cpu, 40,
-                       'fast_compile', 'fast_run')
-conv_groupopt.register('local_conv3d_gradweight_cpu',
-                       local_conv3d_gradweight_cpu, 40,
-                       'fast_compile', 'fast_run')
-conv_groupopt.register('local_conv3d_gradinputs_cpu',
-                       local_conv3d_gradinputs_cpu, 40,
-                       'fast_compile', 'fast_run')
 # Verify that no AbstractConv are present in the graph

--- a/theano/tensor/nnet/tests/test_abstract_conv.py
+++ b/theano/tensor/nnet/tests/test_abstract_conv.py
--- a/theano/tensor/nnet/tests/test_conv.py
+++ b/theano/tensor/nnet/tests/test_conv.py
@@ -294,15 +294,17 @@ class TestConv2D(utt.InferShapeTester):
        """
        Tests convolution where subsampling != (1,1)
        """
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'valid', subsample=(2, 2))
        self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'full', subsample=(2, 2))
-        self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'valid', subsample=(2, 1))
-        self.validate((1, 1, 6, 6), (1, 1, 3, 3), 'valid', subsample=(3, 3))
        # Fails as of 2012-07-11
        self.assertRaises(NotImplementedError, self.validate, (1, 1, 6, 6),
                          (1, 1, 3, 3), 'full', subsample=(3, 3))
+        # Fails as of 2017-08-10
+        self.assertRaises(NotImplementedError, self.validate, (3, 2, 7, 5), (5, 2, 2, 3), 'valid', subsample=(2, 2))
+        self.assertRaises(NotImplementedError, self.validate, (3, 2, 7, 5), (5, 2, 2, 3), 'valid', subsample=(2, 1))
+        self.assertRaises(NotImplementedError, self.validate, (1, 1, 6, 6), (1, 1, 3, 3), 'valid', subsample=(3, 3))
    def test_shape_Constant_tensor(self):
        """
        Tests convolution where the {image,filter}_shape is a Constant tensor.
@@ -604,9 +606,6 @@ class TestConv2D(utt.InferShapeTester):
            excluding=['conv_gemm'])
-class TestDefaultConv2D(TestConv2D):
-    conv2d = staticmethod(theano.tensor.nnet.conv2d)
 # Test that broadcasting of gradients works correctly when using the
 # nnet.conv2d() interface. This was reported in #3763, and uses the example
 # code from that ticket.

--- a/theano/tensor/nnet/tests/test_conv3d.py
+++ b/theano/tensor/nnet/tests/test_conv3d.py