Merge pull request #2044 from ballasn/conv2d_fft

Add an option to desactivate conv2d_fft at the op level

Merge pull request #2044 from ballasn/conv2d_fft
203255b3 · Frédéric Bastien · 93be9cb8 · c20b54c6 · 203255b3 · 203255b3
--- a/doc/library/tensor/nnet/conv.txt
+++ b/doc/library/tensor/nnet/conv.txt
@@ -36,6 +36,9 @@ TODO: Give examples for how to use these things! They are pretty complicated.
      in your environement.  This is not enabled by default because it
      has some restrictions on input and uses more memory.  Also note
      that it requires CUDA >= 5.0, scikits.cuda >= 0.5.0 and PyCUDA to run.
+      To desactivate the fft optimization on a specific nnet.conv2d
+      while the optimization flags are active, you can set its parameters
+      version to 'no_fft'
    - :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`
      3D Convolution. Doesn't work on the GPU.
    - :func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>`

--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -687,7 +687,8 @@ class GpuConv(GpuOp):
            verbose=0,
            kshp=None,
            imshp=None,
-            max_threads_dim0=None):
+            max_threads_dim0=None,
+            fft_opt=True):
        """
        :param version: each version of c_code implements many kernel for the
                        convolution. By default we try to guess the best one.
@@ -706,7 +707,11 @@ class GpuConv(GpuOp):
        :param max_threads_dim0: The maximum number of threads for the
                        block size dimensions 0 (blockDim.x) used by the
                        GPU function.
-
+        :param fft_opt: desactivate fft_opt optimization at the op level when
+                        set to False. Note that by default fft optimization
+                        aren't enabled. See
+                        :ref:`convolution documentation <libdoc_tensor_nnet_conv>`
+                        to enable them.
        """
        self.border_mode = border_mode
        self.subsample = subsample
@@ -730,6 +735,7 @@ class GpuConv(GpuOp):
        self.kshp = kshp
        self.imshp = imshp
        self.max_threads_dim0 = max_threads_dim0
+        self.fft_opt = fft_opt

    def __eq__(self, other):
        return type(self) == type(other) \

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1143,6 +1143,7 @@ def local_gpu_conv(node):
                    version=op.version,
                    verbose=op.verbose,
                    imshp=op.imshp,
+                    fft_opt=op.fft_opt
                    )
        if op.imshp_logical is not None:
            logical_img_hw = op.imshp_logical[1:3]
@@ -1242,7 +1243,8 @@ def _gpu_conv_to_fftconv(node):
 def local_conv_fft_valid(node):
    if (isinstance(node.op, GpuConv) and
        node.op.border_mode == 'valid' and
-        node.op.subsample == (1, 1)):
+        node.op.subsample == (1, 1) and
+        node.op.fft_opt):
        return [_gpu_conv_to_fftconv(node)]


@@ -1250,7 +1252,8 @@ def local_conv_fft_valid(node):
 def local_conv_fft_full(node):
    if (isinstance(node.op, GpuConv) and
        node.op.border_mode == 'full' and
-        node.op.subsample == (1, 1)):
+        node.op.subsample == (1, 1) and
+        node.op.fft_opt):
        return [_gpu_conv_to_fftconv(node)]

 gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid)

--- a/theano/sandbox/cuda/tests/test_fftconv.py
+++ b/theano/sandbox/cuda/tests/test_fftconv.py
@@ -119,6 +119,52 @@ class TestConv2dFFT(unittest.TestCase):

        utt.assert_allclose(res_ref, res_fft)

+    def test_opt_nofft_valid(self):
+        inputs_shape = (5, 3, 7, 6)
+        filters_shape = (2, 3, 3, 3)
+
+        inputs_val = numpy.random.random(inputs_shape).astype('float32')
+        filters_val = numpy.random.random(filters_shape).astype('float32')
+
+        inputs = shared(inputs_val)
+        filters = shared(filters_val)
+
+        conv = theano.tensor.nnet.conv.conv2d(inputs, filters, version='no_fft')
+
+        mode = mode_with_gpu.including('conv_fft_valid')
+
+        f_ref = theano.function([], conv)
+        f_fft = theano.function([], conv, mode=mode)
+
+        # make sure we that no CuFFTOp has been inserted
+        topo = f_fft.maker.fgraph.toposort()
+        assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
+                   for n in topo) == 0
+
+    def test_opt_nofft_full(self):
+        inputs_shape = (5, 3, 7, 6)
+        filters_shape = (2, 3, 3, 3)
+
+        inputs_val = numpy.random.random(inputs_shape).astype('float32')
+        filters_val = numpy.random.random(filters_shape).astype('float32')
+
+        inputs = shared(inputs_val)
+        filters = shared(filters_val)
+
+        conv = theano.tensor.nnet.conv.conv2d(inputs, filters,
+                                              border_mode='full',
+                                              version='no_fft')
+
+        mode = mode_with_gpu.including('conv_fft_full')
+
+        f_ref = theano.function([], conv)
+        f_fft = theano.function([], conv, mode=mode)
+
+        # make sure we that no CuFFTOp has been inserted
+        topo = f_fft.maker.fgraph.toposort()
+        assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
+                   for n in topo) == 0
+

 class TestConv3dFFT(unittest.TestCase):


--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -348,8 +348,9 @@ class ConvOp(OpenMPOp):

        :type verbose: int
        :param verbose: passed to GpuConv
-        :type version: int
-        :param version: passed to GpuConv
+        :type version: int or str
+        :param version: passed to GpuConv, if version='no_fft', fft
+            optimization will be desactivated at the op level.

        The 3 following parameters are used internally when we generate
        the gradient when dx!=1 or dy!=1.
@@ -367,6 +368,13 @@ class ConvOp(OpenMPOp):
            Set to False in the grad again the weight when the
            output_mode is full.
        """
+        # Desactivate fft_optimization at the op level if specified
+        if version == "no_fft":
+            self.fft_opt = False
+            version = -1
+        else:
+            self.fft_opt = True
+
        # We must continue to consider None as 1 for backward compatibility.
        if dx is None:
            dx = 1