make conv_fft_valid and conv_fft_full optimizers support uneven input shape if possible

06572ddf · f0k · 61144de5 · 06572ddf
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1119,14 +1119,32 @@ def local_gpu_conv(node):
            return [out]
+def _gpu_conv_to_fftconv(node):
+    # shared helper function for local_conv_fft_valid and local_conv_fft_full.
+    # we import conv2d_fft locally to avoid pycuda warnings
+    from theano.sandbox.cuda.fftconv import conv2d_fft
+    kwargs = {'border_mode': node.op.border_mode}
+    if (node.op.imshp is not None and
+        node.op.imshp[-1] is not None and
+        node.op.imshp[-1] % 2 == 1):
+        kwargs['pad_last_dim'] = True
+    # TODO: If the user supplied the full nonsymbolic image_shape and
+    # filter_shape in conv2d(), we could pass it on to conv2d_fft(). However,
+    # information on batch size and channel counts is currently discarded
+    # when a ConvOp is replaced by a GpuConv, so this would need more changes.
+    #if (node.op.imshp is not None) and (None not in node.op.imshp):
+    #    kwargs['image_shape'] = (bsize, inchannels) + node.op.imshp
+    #if (node.op.kshp is not None) and (None not in node.op.kshp):
+    #    kwargs['filter_shape'] = (outchannels, inchannels) + node.op.kshp
+    return conv2d_fft(node.inputs[0], node.inputs[1], **kwargs)
 @local_optimizer([GpuConv])
 def local_conv_fft_valid(node):
    if (isinstance(node.op, GpuConv) and
        node.op.border_mode == 'valid' and
        node.op.subsample == (1, 1)):
-        # import locally to avoid pycuda warnings
+        return [_gpu_conv_to_fftconv(node)]
-        from theano.sandbox.cuda.fftconv import conv2d_fft
-        return [conv2d_fft(node.inputs[0], node.inputs[1])]
 @local_optimizer([GpuConv])
@@ -1134,9 +1152,7 @@ def local_conv_fft_full(node):
    if (isinstance(node.op, GpuConv) and
        node.op.border_mode == 'full' and
        node.op.subsample == (1, 1)):
-        # import locally to avoid pycuda warnings
+        return [_gpu_conv_to_fftconv(node)]
-        from theano.sandbox.cuda.fftconv import conv2d_fft
-        return [conv2d_fft(node.inputs[0], node.inputs[1], border_mode='full')]
 gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid)
 gpu_optimizer.register("conv_fft_full", local_conv_fft_full)