提交 4eff8975 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #1904 from f0k/fftconv-enhancements

FFT convolution: make optimization support uneven image width
......@@ -32,7 +32,8 @@ TODO: Give examples for how to use these things! They are pretty complicated.
to perform the work. You can enable it by setting
'THEANO_FLAGS=optimizer_including=conv_fft_valid:conv_fft_full'
in your environement. This is not enabled by default because it
has some restrictions on input and uses more memory.
has some restrictions on input and uses more memory. Also note
that it requires CUDA >= 5.0, scikits.cuda >= 0.5.0 and PyCUDA to run.
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`. Doesn't work on the GPU.
- :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>`
Another conv3d implementation that uses the conv2d with data reshaping.
......
......@@ -662,7 +662,7 @@ class GpuConv(GpuOp):
def c_compile_args(self):
nb = 0
if self.kshp is not None:
if (self.kshp is not None) and (self.kshp[1] is not None):
nb = self.kshp[1]
return ['-DTHEANO_KERN_WID=' + str(nb)] # ,'-g','-G']
......
......@@ -1119,14 +1119,32 @@ def local_gpu_conv(node):
return [out]
def _gpu_conv_to_fftconv(node):
# shared helper function for local_conv_fft_valid and local_conv_fft_full.
# we import conv2d_fft locally to avoid pycuda warnings
from theano.sandbox.cuda.fftconv import conv2d_fft
kwargs = {'border_mode': node.op.border_mode}
if (node.op.imshp is not None and
node.op.imshp[-1] is not None and
node.op.imshp[-1] % 2 == 1):
kwargs['pad_last_dim'] = True
# TODO: If the user supplied the full nonsymbolic image_shape and
# filter_shape in conv2d(), we could pass it on to conv2d_fft(). However,
# information on batch size and channel counts is currently discarded
# when a ConvOp is replaced by a GpuConv, so this would need more changes.
#if (node.op.imshp is not None) and (None not in node.op.imshp):
# kwargs['image_shape'] = (bsize, inchannels) + node.op.imshp
#if (node.op.kshp is not None) and (None not in node.op.kshp):
# kwargs['filter_shape'] = (outchannels, inchannels) + node.op.kshp
return conv2d_fft(node.inputs[0], node.inputs[1], **kwargs)
@local_optimizer([GpuConv])
def local_conv_fft_valid(node):
if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'valid' and
node.op.subsample == (1, 1)):
# import locally to avoid pycuda warnings
from theano.sandbox.cuda.fftconv import conv2d_fft
return [conv2d_fft(node.inputs[0], node.inputs[1])]
return [_gpu_conv_to_fftconv(node)]
@local_optimizer([GpuConv])
......@@ -1134,9 +1152,7 @@ def local_conv_fft_full(node):
if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'full' and
node.op.subsample == (1, 1)):
# import locally to avoid pycuda warnings
from theano.sandbox.cuda.fftconv import conv2d_fft
return [conv2d_fft(node.inputs[0], node.inputs[1], border_mode='full')]
return [_gpu_conv_to_fftconv(node)]
gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid)
gpu_optimizer.register("conv_fft_full", local_conv_fft_full)
......
......@@ -11,6 +11,7 @@ if cuda_ndarray.cuda_available == False:
raise SkipTest('Optional package cuda disabled')
from theano.sandbox.cuda import float32_shared_constructor as shared
import theano.sandbox.cuda.fftconv
if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论