Merge pull request #3786 from lamblin/abstract_conv_infershape

Abstract conv: fix tests and add infer_shape

Merge pull request #3786 from lamblin/abstract_conv_infershape
22db3930 · Frédéric Bastien · 725b7a3f · bcb9318e · 22db3930 · 22db3930
--- a/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
@@ -439,18 +439,6 @@ def test_default_conv():
        assert any([isinstance(a.op, cuda.blas.GpuCorrMM)
                    for a in f.maker.fgraph.apply_nodes])

-    mode = theano_mode.excluding('local_conv_dnn', 'local_conv_gemm')
-    f = theano.function([img, fil], c, mode=mode)
-
-    assert any([isinstance(a.op, cuda.blas.GpuConv)
-                for a in f.maker.fgraph.apply_nodes])
-
-    mode = theano_mode.excluding('conv_dnn', 'conv_gemm')
-    f = theano.function([img, fil], c, mode=mode)
-
-    assert any([isinstance(a.op, cuda.blas.GpuConv)
-                for a in f.maker.fgraph.apply_nodes])
-

 def _test_full(cls, mode=None, version=[-1], extra_shapes=[],
               test_bigger_kernels=True):
@@ -614,7 +602,8 @@ class TestConv2DGPU(unittest.TestCase):
                cuda.blas.BaseGpuCorrMM)

    def test_logical_shapes(self):
-        seed_rng()
+        # Logical shapes are not supported anymore, so we check that it
+        # raises an Exception.
        for stride in range(1, 4):
            kshp = (10, 2, 10, 10)
            featshp = (3, 10, 11, 11)
@@ -629,23 +618,14 @@ class TestConv2DGPU(unittest.TestCase):
            featshp_logical = (featshp[0], featshp[1], featshp[2] * stride,
                               featshp[3] * stride)
            kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
-            # print featshp, kshp_rotated, featshp_logical[1:], kshp[2:]
-            image_estimate = tensor.nnet.conv2d(a, kernel_rotated,
-                                                border_mode='full',
-                                                image_shape=featshp,
-                                                filter_shape=kshp_rotated,
-                                                imshp_logical=featshp_logical[1:],
-                                                kshp_logical=kshp[2:])
+            self.assertRaises(ValueError, tensor.nnet.conv2d,
+                              a, kernel_rotated,
+                              border_mode='full',
+                              image_shape=featshp,
+                              filter_shape=kshp_rotated,
+                              imshp_logical=featshp_logical[1:],
+                              kshp_logical=kshp[2:])

-            func = theano.function([a, A], image_estimate, mode=theano_mode)
-            # theano.printing.debugprint(func,)
-            assert any([isinstance(node.op, self.conv_ops)
-                        for node in func.maker.fgraph.toposort()])
-
-            a_in = numpy.random.randn(*featshp).astype("float32")
-            A_in = numpy.random.randn(*kshp).astype("float32")
-
-            func(a_in, A_in)

    def test_invalid_input_shape(self):
        """
@@ -838,17 +818,8 @@ def conv_grad(mode, bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsample, op):

    # TODO: also test custom pad values
    corr_op = op(mode, subsample)(i, k)
-    # try to compile reference implementation without shape,
-    # so we don't have to compile hundreds of versions
    conv_op = tensor.nnet.conv2d(i, k[:, :, ::-1, ::-1],
                                 border_mode=mode, subsample=subsample)
-    try:
-        conv_op_di = theano.grad(conv_op.sum(), i)
-        conv_op_dk = theano.grad(conv_op.sum(), k)
-    except Exception:
-        # compile with shape information only when needed
-        conv_op = tensor.nnet.conv2d(i, k[:, :, ::-1, ::-1],
-                                     ishape, kshape, mode, subsample)
    conv_op_di = theano.grad(conv_op.sum(), i)
    conv_op_dk = theano.grad(conv_op.sum(), k)
    corr_op_di = theano.grad(corr_op.sum(), i)
@@ -856,18 +827,15 @@ def conv_grad(mode, bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsample, op):
    outputs = [corr_op, conv_op,
               corr_op_di, conv_op_di,
               corr_op_dk, conv_op_dk]
-    try:
-        conv_op_dik = theano.grad(conv_op_di.sum(), k)
-        conv_op_dki = theano.grad(conv_op_dk.sum(), i)
-        corr_op_dik = theano.grad(corr_op_di.sum(), k)
-        corr_op_dki = theano.grad(corr_op_dk.sum(), i)
-        outputs.extend([corr_op_dik, conv_op_dik,
-                        corr_op_dki, conv_op_dki])
-    except Exception:
-        # skip if the reference implementation can't do it
-        pass
-
-    f = theano.function([i, k], outputs, mode=theano_mode.excluding('conv_dnn', 'conv_gemm'))
+
+    conv_op_dik = theano.grad(conv_op_di.sum(), k)
+    conv_op_dki = theano.grad(conv_op_dk.sum(), i)
+    corr_op_dik = theano.grad(corr_op_di.sum(), k)
+    corr_op_dki = theano.grad(corr_op_dk.sum(), i)
+    outputs.extend([corr_op_dik, conv_op_dik,
+                    corr_op_dki, conv_op_dki])
+
+    f = theano.function([i, k], outputs, mode=theano_mode)

    allvals = f(npy_img, npy_kern)


--- a/theano/tensor/nnet/__init__.py
+++ b/theano/tensor/nnet/__init__.py
@@ -101,16 +101,24 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
        of shape (batch size, output channels, output rows, output columns)
    """

+    if 'imshp_logical' in kwargs or 'kshp_logical' in kwargs:
+        raise ValueError(
+            "Keyword arguments 'imshp_logical' and 'kshp_logical' for conv2d "
+            "are not supported anymore (and have not been a reliable way to "
+            "perform upsampling). That feature is still available by calling "
+            "theano.tensor.nnet.conv.conv2d() for the time being.")
    if len(kwargs.keys()) > 0:
        warnings.warn(str(kwargs.keys()) +
                      " are now deprecated in "
                      "`tensor.nnet.abstract_conv.conv2d` interface"
-                      " and will be ignored.")
+                      " and will be ignored.",
+                      stacklevel=2)

    if image_shape is not None:
        warnings.warn("The `image_shape` keyword argument to "
                      "`tensor.nnet.conv2d` is deprecated, it has been "
-                      "renamed to `input_shape`.")
+                      "renamed to `input_shape`.",
+                      stacklevel=2)
        if input_shape is None:
            input_shape = image_shape
        else:

--- a/theano/tensor/nnet/abstract_conv.py
+++ b/theano/tensor/nnet/abstract_conv.py
@@ -269,6 +269,11 @@ class BaseAbstractConv2d(Op):
        flops *= inputs[1] * filters[0] * inputs[0]
        return flops

+    def do_constant_folding(self, node):
+        # Disable constant folding since there is no implementation.
+        # This may change in the future.
+        return False
+

 class AbstractConv2d(BaseAbstractConv2d):
    """
@@ -298,7 +303,10 @@ class AbstractConv2d(BaseAbstractConv2d):
        return Apply(self, [img, kern], [output])

    def perform(self, node, inp, out_):
-        raise NotImplementedError('AbstractConv2d theano optimization failed')
+        raise NotImplementedError(
+            'AbstractConv2d theano optimization failed. '
+            'Did you exclude both "conv_dnn" and "conv_gemm" from '
+            'the optimizer?')

    def grad(self, inp, grads):
        bottom, weights = inp
@@ -322,6 +330,21 @@ class AbstractConv2d(BaseAbstractConv2d):
        d_weights = patternbroadcast(d_weights, weights.broadcastable)
        return d_bottom, d_weights

+    def infer_shape(self, node, input_shapes):
+        imshp = input_shapes[0]
+        kshp = input_shapes[1]
+
+        # replace symbolic shapes with known constant shapes
+        if self.imshp is not None:
+            imshp = [imshp[i] if self.imshp[i] is None else self.imshp[i]
+                     for i in range(4)]
+        if self.kshp is not None:
+            kshp = [kshp[i] if self.kshp[i] is None else self.kshp[i]
+                    for i in range(4)]
+        res = get_conv_output_shape(imshp, kshp, self.border_mode,
+                                    self.subsample)
+        return [res]
+

 class AbstractConv2d_gradWeights(BaseAbstractConv2d):
    """Gradient wrt. filters for `AbstractConv2d`.
@@ -358,7 +381,9 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):

    def perform(self, node, inp, out_):
        raise NotImplementedError(
-            'AbstractConv2d_gradWeight theano optimization failed')
+            'AbstractConv2d_gradWeights theano optimization failed. '
+            'Did you exclude both "conv_dnn" and "conv_gemm" from '
+            'the optimizer?')

    def grad(self, inp, grads):
        bottom, top = inp[:2]
@@ -387,6 +412,19 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
    def connection_pattern(self, node):
        return [[1], [1], [0]]  # no connection to height, width

+    def infer_shape(self, node, input_shapes):
+        # We use self.kshp (that was passed when creating the Op) if possible,
+        # or fall back to the `shape` input of the node.
+        # TODO: when there is no subsampling, try to infer the kernel shape
+        # from the shapes of inputs.
+        imshp = input_shapes[0]
+        topshp = input_shapes[1]
+        kshp = self.kshp[:] if self.kshp is not None else [None] * 4
+        fallback_kshp = [topshp[1], imshp[1], node.inputs[2][0], node.inputs[2][1]]
+        kshp = [fallback_kshp[i] if kshp[i] is None else kshp[i]
+                for i in range(4)]
+        return [kshp]
+

 class AbstractConv2d_gradInputs(BaseAbstractConv2d):
    """Gradient wrt. inputs for `AbstractConv2d`.
@@ -424,7 +462,9 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):

    def perform(self, node, inp, out_):
        raise NotImplementedError(
-            'AbstractConv2d_gradWeight theano optimization failed')
+            'AbstractConv2d_gradInputs theano optimization failed. '
+            'Did you exclude both "conv_dnn" and "conv_gemm" from '
+            'the optimizer?')

    def grad(self, inp, grads):
        weights, top = inp[:2]
@@ -448,3 +488,17 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):

    def connection_pattern(self, node):
        return [[1], [1], [0]]  # no connection to height, width
+
+    def infer_shape(self, node, input_shapes):
+        # We use self.imshp (that was passed when creating the Op) if possible,
+        # or fall back to the `shape` input of the node.
+        # TODO: when there is no subsampling, try to infer the image shape
+        # from the shapes of inputs.
+        kshp = input_shapes[0]
+        topshp = input_shapes[1]
+        imshp = self.imshp[:] if self.imshp is not None else [None] * 4
+        fallback_imshp = [topshp[0], kshp[1], node.inputs[2][0],
+                          node.inputs[2][1]]
+        imshp = [fallback_imshp[i] if imshp[i] is None else imshp[i]
+                 for i in range(4)]
+        return [imshp]
--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -367,7 +367,7 @@ class ConvOp(OpenMPOp):
        # with s=1 for mode=='full' and s=-1 for mode=='valid'.
        # To support symbolic shapes, we express this with integer arithmetics.
        warnings.warn("The method `getOutputShape` is deprecated use"
-                      "`get_conv_output_shape` instead.")
+                      "`get_conv_output_shape` instead.", stacklevel=2)
        return tuple(get_conv_shape_1axis(i, k, mode, d)
                     for i, k, d in zip(inshp, kshp, stride))


--- a/theano/tensor/nnet/tests/test_conv.py
+++ b/theano/tensor/nnet/tests/test_conv.py
@@ -13,8 +13,14 @@ from theano.tests.unittest_tools import attr


 class TestConv2D(utt.InferShapeTester):
+    # This class contains tests for the legacy 2d convolution,
+    # but will also be inherited from for other implementations
    mode = None
    dtype = theano.config.floatX
+    # This will be set to the appropriate function in the inherited classes.
+    # The call to `staticmethod` is necessary to prevent Python from passing
+    # `self` as the first argument.
+    conv2d = staticmethod(conv.conv2d)

    def setUp(self):
        super(TestConv2D, self).setUp()
@@ -435,7 +441,7 @@ class TestConv2D(utt.InferShapeTester):
                        input = theano.shared(numpy.random.random(image_shape))
                        filters = theano.shared(numpy.random.random(filter_shape))

-                        output = conv.conv2d(input, filters,
+                        output = self.conv2d(input, filters,
                                             image_shape, filter_shape,
                                             border_mode,
                                             unroll_patch=True,
@@ -465,62 +471,75 @@ class TestConv2D(utt.InferShapeTester):
        adtens_val = rand(*aivec_val)
        bdtens_val = rand(*bivec_val)
        self._compile_and_check([adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
-                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
+                [self.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp,
+                excluding=['conv_gemm'])

        self._compile_and_check([adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
-                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
+                [self.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp,
+                excluding=['conv_gemm'])

        aivec_val = [6, 2, 8, 3]
        bivec_val = [4, 2, 5, 3]
        adtens_val = rand(*aivec_val)
        bdtens_val = rand(*bivec_val)
        self._compile_and_check([adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
-                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
+                [self.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp,
+                excluding=['conv_gemm'])

        self._compile_and_check([adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
-                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
+                [self.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp,
+                excluding=['conv_gemm'])

        aivec_val = [3, 6, 7, 5]
        bivec_val = [5, 6, 3, 2]
        adtens_val = rand(*aivec_val)
        bdtens_val = rand(*bivec_val)
        self._compile_and_check([adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
-                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
+                [self.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp,
+                excluding=['conv_gemm'])

        self._compile_and_check([adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
-                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
+                [self.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp,
+                excluding=['conv_gemm'])

        aivec_val = [3, 6, 7, 5]
        bivec_val = [5, 6, 2, 3]
        adtens_val = rand(*aivec_val)
        bdtens_val = rand(*bivec_val)
        self._compile_and_check([adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
-                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
+                [self.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp,
+                excluding=['conv_gemm'])

        self._compile_and_check([adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
-                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
+                [self.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp,
+                excluding=['conv_gemm'])

        aivec_val = [5, 2, 4, 3]
        bivec_val = [6, 2, 4, 3]
        adtens_val = rand(*aivec_val)
        bdtens_val = rand(*bivec_val)
        self._compile_and_check([adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
-                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
+                [self.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp,
+                excluding=['conv_gemm'])

        self._compile_and_check([adtens, bdtens],
-                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
-                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
+                [self.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp,
+                excluding=['conv_gemm'])


+class TestDefaultConv2D(TestConv2D):
+    conv2d = staticmethod(theano.tensor.nnet.conv2d)
+
 # Test that broadcasting of gradients works correctly when using the
 # nnet.conv2d() interface. This was reported in #3763, and uses the example
 # code from that ticket.