Merge pull request #2480 from nouiz/conv

Conv and meta_conv

Merge pull request #2480 from nouiz/conv
7a78f453 · abergeron · c907bf60 · 876eb091 · 7a78f453 · 7a78f453
--- a/theano/gof/opt.py
+++ b/theano/gof/opt.py
@@ -856,8 +856,10 @@ class LocalMetaOptimizer(LocalOptimizer):
                pass
            elif hasattr(input.tag, 'test_value'):
                givens[input] = theano.shared(
-                        input.type.filter(input.tag.test_value),
-                        input.name, borrow=True)
+                    input.type.filter(input.tag.test_value),
+                    input.name,
+                    broadcastable=input.broadcastable,
+                    borrow=True)
            else:
                missing.add(input)
        if missing:

--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -1541,6 +1541,18 @@ class GpuConv(GpuOp):
                        to enable them.
        """
        self.border_mode = border_mode
+        if version != -1:
+            raise Exception(
+                """GpuConv with version!=-1 is disabled as we do not
+                test it anymore. It probably work, so you probably can
+                just comment this error and use it. But we want to
+                make sure you know about that. Also, this Op is pretty
+                slow and isn't used by default anymore. We strongly
+                suggest to use GpuCorrMM that is much faster and
+                implement all the functionality (at a cost of some
+                extra memory usage). If you can use cuDNN, that is
+                even better.
+                """)
        self.subsample = subsample
        if logical_img_hw is not None:
            h, w = logical_img_hw

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1285,16 +1285,11 @@ def local_conv_gemm(node):
        img, kern = node.inputs
        border_mode = node.op.border_mode
        subsample = node.op.subsample
-        pad = (0,0)
-        if (border_mode == 'full') and (subsample != (1,1)):
-            # need to simulate this via a padded valid convolution
-            pad = 'full'
-            border_mode = 'valid'
-        if (border_mode == 'valid'):
+        if (border_mode == 'valid') or (subsample != (1,1)):
            # need to flip the kernel for valid convolution
            kern = kern[:, :, ::-1, ::-1]
            # By default use GpuCorrMM
-            rval = GpuCorrMM('valid', subsample, pad)(
+            rval = GpuCorrMM(border_mode, subsample)(
                gpu_contiguous(img), gpu_contiguous(kern))

            # call GpuCorrMM_gradWeights if good
@@ -1323,7 +1318,7 @@ def local_conv_gemm(node):
                    # because we are not allowed to replace a CudaNdarray with
                    # a DimShuffle instance in a graph optimization)
                    rval = theano.sandbox.cuda.as_cuda_ndarray_variable(
-                        GpuCorrMM_gradWeights('valid', subsample, pad)(
+                        GpuCorrMM_gradWeights(border_mode, subsample)(
                            gpu_contiguous(img.dimshuffle(1, 0, 2, 3)),
                            gpu_contiguous(kern.dimshuffle(1, 0, 2, 3))
                        ).dimshuffle(1, 0, 2, 3))
@@ -1331,7 +1326,7 @@ def local_conv_gemm(node):
            # need to dimshuffle the kernel for full convolution
            kern = kern.dimshuffle(1, 0, 2, 3)
            # call GpuCorrMM_gradInputs
-            rval = GpuCorrMM_gradInputs('valid', subsample, pad)(
+            rval = GpuCorrMM_gradInputs('valid', subsample)(
                    gpu_contiguous(kern), gpu_contiguous(img))
        if node.outputs[0].broadcastable != rval.broadcastable:
            # With given shape information, conv2d_fft may return a different
@@ -1411,9 +1406,11 @@ class ConvMetaOptimizer(LocalCudaMetaOptimizer):
 # TODO: Use var.type.filter when cuda_ndarray.filter supports non-strict casts
 #                        var.type.filter(numpy.random.randn(*shape),
 #                                        allow_downcast=True),
-                        numpy.require(numpy.random.randn(*shape),
-                                      dtype=var.dtype),
-                        var.name, borrow=True)
+                    numpy.require(numpy.random.randn(*shape),
+                                  dtype=var.dtype),
+                    var.name,
+                    broadcastable=var.broadcastable,
+                    borrow=True)
        # return mapping
        return result


--- a/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
@@ -556,8 +556,8 @@ def _test_subsample(cls, mode, version_valid=[-1], version_full=[-1]):

 def test_subsample():
    for t in _test_subsample(None, theano_mode,
-                             version_valid=[-2, -1, 1, 3, 11, 12],
-                             version_full=[-2, -1]):
+                             version_valid=[-1],
+                             version_full=[-1]):
        yield t