Change the way to enable the optimization from a theano flags to a registered, but disabled opt.

0c41c43b · Arnaud Bergeron · 5c61c434 · 0c41c43b · 0c41c43b
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -33,24 +33,6 @@ AddConfigVar('cublas.lib',
        """Name of the cuda blas library for the linker.""",
        StrParam('cublas'))

-AddConfigVar('enable_conv2d_fft',
-             """
-             Use an fft implementation of conv2d on the GPU rather
-             than a direct one.
-
-             The current implementation cannot handle inputs that have
-             an odd number of columns in the images. You have to
-             pad/unpad manually if you want to use this
-             implementation.  This also means that you need to make
-             sure all your filters have an odd number of columns if
-             doing stacked convolutions since otherwise you will end
-             up with odd images.
-
-             You can also call conv2d_fft() with pad=True
-             """,
-             BoolParam(False),
-             in_c_key=False)
-
 #is_nvcc_available called here to initialize global vars in
 #nvcc_compiler module
 nvcc_compiler.is_nvcc_available()

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1120,17 +1120,26 @@ def local_gpu_conv(node):
            return [out]


-@register_opt()
 @local_optimizer([GpuConv])
-def local_conv_fft(node):
-    if (theano.config.enable_conv2d_fft and
-        isinstance(node.op, GpuConv) and
+def local_conv_fft_valid(node):
+    if (isinstance(node.op, GpuConv) and
        node.op.border_mode == 'valid' and
        node.op.subsample == (1, 1)):
        return [conv2d_fft(node.inputs[0], node.inputs[1])]

-import theano.tensor.signal.downsample as downsample

+@local_optimizer([GpuConv])
+def local_conv_fft_full(node):
+    if (isinstance(node.op, GpuConv) and
+        node.op.border_mode == 'full' and
+        node.op.subsample == (1, 1)):
+        return [conv2d_fft(node.inputs[0], node.inputs[1], border_mode='full')]
+
+gpu_optimizer.register("local_conv_fft_valid", local_conv_fft_valid)
+gpu_optimizer.register("local_conv_fft_full", local_conv_fft_full)
+
+
+import theano.tensor.signal.downsample as downsample

 @register_opt()
 @local_optimizer([downsample.DownsampleFactorMax])