Fix each opt individually instead of doing it in the meta conv.

641c325b · Frederic · d524d461 · 641c325b · 641c325b · 641c325b
--- a/theano/gof/opt.py
+++ b/theano/gof/opt.py
@@ -898,14 +898,7 @@ class LocalMetaOptimizer(LocalOptimizer):
            timings.sort()
            if self.verbose:
                print "= %s" % timings[0][2]
-            ret = timings[0][1]
+            return timings[0][1]
-            for i in range(len(ret)):
-                if ret[i].type != node.outputs[i].type:
-                    # If the type is different, it is probably due to
-                    # the broadcast pattern. Try to fix that.
-                    ret[i] = theano.tensor.patternbroadcast(
-                        ret[i], node.outputs[i].broadcastable)
-            return ret
        return
    def provide_inputs(self, node, inputs):

--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -1331,9 +1331,13 @@ if True:
            border_mode = node.op.border_mode
            subsample = node.op.subsample
            direction_hint = node.op.direction_hint
-            return [dnn_conv(img, kern,
+            rval = dnn_conv(img, kern,
-                             border_mode=border_mode, subsample=subsample,
+                            border_mode=border_mode, subsample=subsample,
-                             direction_hint=direction_hint)]
+                            direction_hint=direction_hint)
+            if node.outputs[0].broadcastable != rval.broadcastable:
+                rval = tensor.patternbroadcast(
+                    rval, node.outputs[0].type.broadcastable)
+            return [rval]
    # This optimizer is registered in opt.py as part of the meta-optimizer.
    # It tries exactly the opposite code path of what local_conv_dnn() uses,
@@ -1360,9 +1364,13 @@ if True:
                    direction_hint = 'forward'
                else:
                    direction_hint = 'bprop weights'
-            return [dnn_conv(img, kern,
+            rval = dnn_conv(img, kern,
-                             border_mode=border_mode, subsample=subsample,
+                            border_mode=border_mode, subsample=subsample,
-                             direction_hint=direction_hint)]
+                            direction_hint=direction_hint)
+            if node.outputs[0].broadcastable != rval.broadcastable:
+                rval = tensor.patternbroadcast(
+                    rval, node.outputs[0].type.broadcastable)
+            return [rval]
    @register_opt('cudnn')
    @local_optimizer([GpuDownsampleFactorMax])

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1148,10 +1148,11 @@ def _gpu_conv_to_fftconv(node):
            (node.op.imshp[0] is not None)):
        kwargs['filter_shape'] = (node.op.nkern, node.op.imshp[0]) + node.op.kshp
    rval = conv2d_fft(node.inputs[0], node.inputs[1], **kwargs)
-    if ('image_shape' in kwargs) or ('filter_shape' in kwargs):
+    if node.outputs[0].broadcastable != rval.broadcastable:
        # With given shape information, conv2d_fft may return a different
        # broadcast pattern than GpuConv. This is forbidden, so we fix it.
-        rval = tensor.patternbroadcast(rval, node.outputs[0].type.broadcastable)
+        rval = tensor.patternbroadcast(
+            rval, node.outputs[0].type.broadcastable)
    return rval
@@ -1292,7 +1293,11 @@ def local_conv_gemm(node):
        if (border_mode == 'valid'):
            # need to flip the kernel for valid convolution
            kern = kern[:, :, ::-1, ::-1]
-            # call GpuCorrMM or GpuCorrMM_gradWeights
+            # By default use GpuCorrMM
+            rval = GpuCorrMM('valid', subsample, pad)(
+                gpu_contiguous(img), gpu_contiguous(kern))
+            # call GpuCorrMM_gradWeights if good
            # (the latter is faster if batchsize * kernelHeight * kernelWidth
            # is larger than inputChannels * outputHeight * outputWidth.
            # GpuConv does not always store information on the batchsize and
@@ -1317,21 +1322,23 @@ def local_conv_gemm(node):
                    # (we need to wrap the result in as_cuda_ndarray_variable,
                    # because we are not allowed to replace a CudaNdarray with
                    # a DimShuffle instance in a graph optimization)
-                    return [theano.sandbox.cuda.as_cuda_ndarray_variable(
+                    rval = theano.sandbox.cuda.as_cuda_ndarray_variable(
-                            GpuCorrMM_gradWeights('valid', subsample, pad)(
+                        GpuCorrMM_gradWeights('valid', subsample, pad)(
                            gpu_contiguous(img.dimshuffle(1, 0, 2, 3)),
                            gpu_contiguous(kern.dimshuffle(1, 0, 2, 3))
-                            ).dimshuffle(1, 0, 2, 3))]
+                        ).dimshuffle(1, 0, 2, 3))
-            # use GpuCorrMM if we did not choose GpuCorrMM_gradWeights above
-            return [GpuCorrMM('valid', subsample, pad)(
-                    gpu_contiguous(img), gpu_contiguous(kern))]
        elif (border_mode == 'full'):
            # need to dimshuffle the kernel for full convolution
            kern = kern.dimshuffle(1, 0, 2, 3)
            # call GpuCorrMM_gradInputs
-            return [GpuCorrMM_gradInputs('valid', subsample, pad)(
+            rval = GpuCorrMM_gradInputs('valid', subsample, pad)(
-                    gpu_contiguous(kern), gpu_contiguous(img))]
+                    gpu_contiguous(kern), gpu_contiguous(img))
+        if node.outputs[0].broadcastable != rval.broadcastable:
+            # With given shape information, conv2d_fft may return a different
+            # broadcast pattern than GpuConv. This is forbidden, so we fix it.
+            rval = tensor.patternbroadcast(
+                rval, node.outputs[0].type.broadcastable)
+        return [rval]
 # First we register the optimizer that moves convolutions to the GPU.
 register_opt()(local_gpu_conv)