提交 06572ddf authored 作者: f0k's avatar f0k

make conv_fft_valid and conv_fft_full optimizers support uneven input shape if possible

上级 61144de5
...@@ -1119,14 +1119,32 @@ def local_gpu_conv(node): ...@@ -1119,14 +1119,32 @@ def local_gpu_conv(node):
return [out] return [out]
def _gpu_conv_to_fftconv(node):
# shared helper function for local_conv_fft_valid and local_conv_fft_full.
# we import conv2d_fft locally to avoid pycuda warnings
from theano.sandbox.cuda.fftconv import conv2d_fft
kwargs = {'border_mode': node.op.border_mode}
if (node.op.imshp is not None and
node.op.imshp[-1] is not None and
node.op.imshp[-1] % 2 == 1):
kwargs['pad_last_dim'] = True
# TODO: If the user supplied the full nonsymbolic image_shape and
# filter_shape in conv2d(), we could pass it on to conv2d_fft(). However,
# information on batch size and channel counts is currently discarded
# when a ConvOp is replaced by a GpuConv, so this would need more changes.
#if (node.op.imshp is not None) and (None not in node.op.imshp):
# kwargs['image_shape'] = (bsize, inchannels) + node.op.imshp
#if (node.op.kshp is not None) and (None not in node.op.kshp):
# kwargs['filter_shape'] = (outchannels, inchannels) + node.op.kshp
return conv2d_fft(node.inputs[0], node.inputs[1], **kwargs)
@local_optimizer([GpuConv]) @local_optimizer([GpuConv])
def local_conv_fft_valid(node): def local_conv_fft_valid(node):
if (isinstance(node.op, GpuConv) and if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'valid' and node.op.border_mode == 'valid' and
node.op.subsample == (1, 1)): node.op.subsample == (1, 1)):
# import locally to avoid pycuda warnings return [_gpu_conv_to_fftconv(node)]
from theano.sandbox.cuda.fftconv import conv2d_fft
return [conv2d_fft(node.inputs[0], node.inputs[1])]
@local_optimizer([GpuConv]) @local_optimizer([GpuConv])
...@@ -1134,9 +1152,7 @@ def local_conv_fft_full(node): ...@@ -1134,9 +1152,7 @@ def local_conv_fft_full(node):
if (isinstance(node.op, GpuConv) and if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'full' and node.op.border_mode == 'full' and
node.op.subsample == (1, 1)): node.op.subsample == (1, 1)):
# import locally to avoid pycuda warnings return [_gpu_conv_to_fftconv(node)]
from theano.sandbox.cuda.fftconv import conv2d_fft
return [conv2d_fft(node.inputs[0], node.inputs[1], border_mode='full')]
gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid) gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid)
gpu_optimizer.register("conv_fft_full", local_conv_fft_full) gpu_optimizer.register("conv_fft_full", local_conv_fft_full)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论