Add a config flags to enable/disable the conv2d_fft opt.

558ad02e · Arnaud Bergeron · 750f6c02 · 558ad02e · 558ad02e · 558ad02e
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -33,6 +33,23 @@ AddConfigVar('cublas.lib',
        """Name of the cuda blas library for the linker.""",
        StrParam('cublas'))
+AddConfigVar('enable_conv2d_fft'
+             """
+             Use an fft implementation of conv2d on the GPU rather
+             than a direct one.
+             The current implementation cannot handle inputs that have
+             an odd number of columns in the images. You have to
+             pad/unpad manually if you want to use this
+             implementation.  This also means that you need to make
+             sure all your filters have an odd number of columns if
+             doing stacked convolutions since otherwise you will end
+             up with odd images.
+             You can also call conv2d_fft() with pad=True
+             """,
+             BoolParam(False),
+             in_c_key=False)
 #is_nvcc_available called here to initialize global vars in
 #nvcc_compiler module

--- a/theano/sandbox/cuda/fftconv.py
+++ b/theano/sandbox/cuda/fftconv.py
@@ -17,13 +17,9 @@ import theano.misc.pycuda_init
 linalg.init()
-# TODO: investigate FFTW compatibility modes. Can probably set this to
-# the fastest setting.
 # TODO: investigate the effect of enabling fastmath on FFT performance
 # (how can it be enabled?).
 # base class for shared code between scikits.cuda-based ops
 class ScikitsCudaOp(GpuOp):
    def __eq__(self, other):

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1123,7 +1123,8 @@ def local_gpu_conv(node):
 @register_opt()
 @local_optimizer([GpuConv])
 def local_conv_fft(node):
-    if (isinstance(node.op, GpuConv) and
+    if (theano.config.enable_conv2d_fft and
+        isinstance(node.op, GpuConv) and
        node.op.border_mode == 'valid' and
        node.op.subsample == (1, 1)):
        return [conv2d_fft(node.inputs[0], node.inputs[1])]