Add an option to desactivate conv2d_fft at the op level

9fe28989 · Nicolas Ballas · 93be9cb8 · 9fe28989 · 9fe28989 · 9fe28989
--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -687,7 +687,8 @@ class GpuConv(GpuOp):
            verbose=0,
            kshp=None,
            imshp=None,
-            max_threads_dim0=None):
+            max_threads_dim0=None,
+            fft_opt=True):
        """
        :param version: each version of c_code implements many kernel for the
                        convolution. By default we try to guess the best one.
@@ -706,6 +707,8 @@ class GpuConv(GpuOp):
        :param max_threads_dim0: The maximum number of threads for the
                        block size dimensions 0 (blockDim.x) used by the
                        GPU function.
+        :param fft_opt: desactivate fft_opt optimization at the op level when
+                        set to False.

        """
        self.border_mode = border_mode
@@ -730,6 +733,7 @@ class GpuConv(GpuOp):
        self.kshp = kshp
        self.imshp = imshp
        self.max_threads_dim0 = max_threads_dim0
+        self.fft_opt = fft_opt

    def __eq__(self, other):
        return type(self) == type(other) \

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1143,6 +1143,7 @@ def local_gpu_conv(node):
                    version=op.version,
                    verbose=op.verbose,
                    imshp=op.imshp,
+                    fft_opt=op.fft_opt
                    )
        if op.imshp_logical is not None:
            logical_img_hw = op.imshp_logical[1:3]
@@ -1242,7 +1243,8 @@ def _gpu_conv_to_fftconv(node):
 def local_conv_fft_valid(node):
    if (isinstance(node.op, GpuConv) and
        node.op.border_mode == 'valid' and
-        node.op.subsample == (1, 1)):
+        node.op.subsample == (1, 1) and
+        node.op.fft_opt):
        return [_gpu_conv_to_fftconv(node)]


@@ -1250,7 +1252,8 @@ def local_conv_fft_valid(node):
 def local_conv_fft_full(node):
    if (isinstance(node.op, GpuConv) and
        node.op.border_mode == 'full' and
-        node.op.subsample == (1, 1)):
+        node.op.subsample == (1, 1) and
+        node.op.fft_opt):
        return [_gpu_conv_to_fftconv(node)]

 gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid)

--- a/theano/sandbox/cuda/tests/test_fftconv.py
+++ b/theano/sandbox/cuda/tests/test_fftconv.py
@@ -119,6 +119,52 @@ class TestConv2dFFT(unittest.TestCase):

        utt.assert_allclose(res_ref, res_fft)

+    def test_opt_nofft_valid(self):
+        inputs_shape = (5, 3, 7, 6)
+        filters_shape = (2, 3, 3, 3)
+
+        inputs_val = numpy.random.random(inputs_shape).astype('float32')
+        filters_val = numpy.random.random(filters_shape).astype('float32')
+
+        inputs = shared(inputs_val)
+        filters = shared(filters_val)
+
+        conv = theano.tensor.nnet.conv.conv2d(inputs, filters, version='no_fft')
+
+        mode = mode_with_gpu.including('conv_fft_valid')
+
+        f_ref = theano.function([], conv)
+        f_fft = theano.function([], conv, mode=mode)
+
+        # make sure we inserted the fft trickery
+        topo = f_fft.maker.fgraph.toposort()
+        assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
+                   for n in topo) == 0
+
+    def test_opt_nofft_full(self):
+        inputs_shape = (5, 3, 7, 6)
+        filters_shape = (2, 3, 3, 3)
+
+        inputs_val = numpy.random.random(inputs_shape).astype('float32')
+        filters_val = numpy.random.random(filters_shape).astype('float32')
+
+        inputs = shared(inputs_val)
+        filters = shared(filters_val)
+
+        conv = theano.tensor.nnet.conv.conv2d(inputs, filters,
+                                              border_mode='full',
+                                              version='no_fft')
+
+        mode = mode_with_gpu.including('conv_fft_full')
+
+        f_ref = theano.function([], conv)
+        f_fft = theano.function([], conv, mode=mode)
+
+        # make sure we that no CuFFTOp has been inserted
+        topo = f_fft.maker.fgraph.toposort()
+        assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
+                   for n in topo) == 0
+

 class TestConv3dFFT(unittest.TestCase):


--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -348,8 +348,9 @@ class ConvOp(OpenMPOp):

        :type verbose: int
        :param verbose: passed to GpuConv
-        :type version: int
-        :param version: passed to GpuConv
+        :type version: int or str
+        :param version: passed to GpuConv, if version='no_fft', fft
+            optimization will be desactivated the at the op level.

        The 3 following parameters are used internally when we generate
        the gradient when dx!=1 or dy!=1.
@@ -367,6 +368,13 @@ class ConvOp(OpenMPOp):
            Set to False in the grad again the weight when the
            output_mode is full.
        """
+        # Desactivate fft_optimization at the op level if specified
+        if version == "no_fft":
+            self.fft_opt = False
+            version = -1
+        else:
+            self.fft_opt = True
+
        # We must continue to consider None as 1 for backward compatibility.
        if dx is None:
            dx = 1