Add optimization for the full mode and update the tests to work with the new way of enabling them.

7a10c464 · Arnaud Bergeron · 0c41c43b · 7a10c464 · 7a10c464 · 7a10c464
--- a/theano/sandbox/cuda/fftconv.py
+++ b/theano/sandbox/cuda/fftconv.py
@@ -4,18 +4,17 @@ import numpy as np
 import theano
 import theano.tensor as T

-from theano.sandbox.cuda import (GpuOp, basic_ops, CudaNdarrayType,
-                                 CudaNdarray)
+from theano.sandbox.cuda import GpuOp, basic_ops, CudaNdarrayType

 import scikits.cuda
-from scikits.cuda import fft, linalg, cublas
+from scikits.cuda import fft, cublas, misc

 import pycuda.gpuarray

 import theano.misc.pycuda_init

-linalg.init()

+misc.init()

 # TODO: investigate the effect of enabling fastmath on FFT performance
 # (how can it be enabled?).

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1135,8 +1135,8 @@ def local_conv_fft_full(node):
        node.op.subsample == (1, 1)):
        return [conv2d_fft(node.inputs[0], node.inputs[1], border_mode='full')]

-gpu_optimizer.register("local_conv_fft_valid", local_conv_fft_valid)
-gpu_optimizer.register("local_conv_fft_full", local_conv_fft_full)
+gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid)
+gpu_optimizer.register("conv_fft_full", local_conv_fft_full)


 import theano.tensor.signal.downsample as downsample

--- a/theano/sandbox/cuda/tests/test_fftconv.py
+++ b/theano/sandbox/cuda/tests/test_fftconv.py
@@ -19,13 +19,6 @@ else:


 class TestConv2dFFT(unittest.TestCase):
-    def setUp(self):
-        self._prev = theano.confg.enable_conv2d_fft
-        theano.confg.enable_conv2d_fft = True
-
-    def tearDown(self):
-        theano.confg.enable_conv2d_fft = self._prev
-
    def run_conv(self, inputs_shape, filters_shape, pad=False, **other_args):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')
@@ -63,7 +56,7 @@ class TestConv2dFFT(unittest.TestCase):
                      filters_shape=(2, 3, 3, 3),
                      border_mode='full', pad=True)

-    def test_opt(self):
+    def test_opt_valid(self):
        inputs_shape = (5, 3, 7, 6)
        filters_shape = (2, 3, 3, 3)

@@ -75,6 +68,36 @@ class TestConv2dFFT(unittest.TestCase):

        conv = theano.tensor.nnet.conv.conv2d(inputs, filters)

+        mode = mode_with_gpu.optimizer_including('conv2d_fft_valid')
+
+        f_ref = theano.function([], conv)
+        f_fft = theano.function([], conv, mode=mode_with_gpu)
+
+        # make sure we inserted the fft trickery
+        topo = f_fft.maker.fgraph.toposort()
+        assert len(op for op in topo
+                   if isinstance(op, theano.sandbox.cuda.fftconv.CuFFTOp)) == 1
+
+        res_ref = f_ref()
+        res_fft = f_fft()
+
+        utt.assert_allclose(res_ref, res_fft)
+
+    def test_opt_full(self):
+        inputs_shape = (5, 3, 7, 6)
+        filters_shape = (2, 3, 3, 3)
+
+        inputs_val = numpy.random.random(inputs_shape).astype('float32')
+        filters_val = numpy.random.random(filters_shape).astype('float32')
+
+        inputs = shared(inputs_val)
+        filters = shared(filters_val)
+
+        conv = theano.tensor.nnet.conv.conv2d(inputs, filters,
+                                              border_mode='full')
+
+        mode = mode_with_gpu.optimizer_including('conv2d_fft_full')
+
        f_ref = theano.function([], conv)
        f_fft = theano.function([], conv, mode=mode_with_gpu)