提交 203255b3 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2044 from ballasn/conv2d_fft

Add an option to desactivate conv2d_fft at the op level
......@@ -36,6 +36,9 @@ TODO: Give examples for how to use these things! They are pretty complicated.
in your environement. This is not enabled by default because it
has some restrictions on input and uses more memory. Also note
that it requires CUDA >= 5.0, scikits.cuda >= 0.5.0 and PyCUDA to run.
To desactivate the fft optimization on a specific nnet.conv2d
while the optimization flags are active, you can set its parameters
version to 'no_fft'
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`
3D Convolution. Doesn't work on the GPU.
- :func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>`
......
......@@ -687,7 +687,8 @@ class GpuConv(GpuOp):
verbose=0,
kshp=None,
imshp=None,
max_threads_dim0=None):
max_threads_dim0=None,
fft_opt=True):
"""
:param version: each version of c_code implements many kernel for the
convolution. By default we try to guess the best one.
......@@ -706,7 +707,11 @@ class GpuConv(GpuOp):
:param max_threads_dim0: The maximum number of threads for the
block size dimensions 0 (blockDim.x) used by the
GPU function.
:param fft_opt: desactivate fft_opt optimization at the op level when
set to False. Note that by default fft optimization
aren't enabled. See
:ref:`convolution documentation <libdoc_tensor_nnet_conv>`
to enable them.
"""
self.border_mode = border_mode
self.subsample = subsample
......@@ -730,6 +735,7 @@ class GpuConv(GpuOp):
self.kshp = kshp
self.imshp = imshp
self.max_threads_dim0 = max_threads_dim0
self.fft_opt = fft_opt
def __eq__(self, other):
return type(self) == type(other) \
......
......@@ -1143,6 +1143,7 @@ def local_gpu_conv(node):
version=op.version,
verbose=op.verbose,
imshp=op.imshp,
fft_opt=op.fft_opt
)
if op.imshp_logical is not None:
logical_img_hw = op.imshp_logical[1:3]
......@@ -1242,7 +1243,8 @@ def _gpu_conv_to_fftconv(node):
def local_conv_fft_valid(node):
if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'valid' and
node.op.subsample == (1, 1)):
node.op.subsample == (1, 1) and
node.op.fft_opt):
return [_gpu_conv_to_fftconv(node)]
......@@ -1250,7 +1252,8 @@ def local_conv_fft_valid(node):
def local_conv_fft_full(node):
if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'full' and
node.op.subsample == (1, 1)):
node.op.subsample == (1, 1) and
node.op.fft_opt):
return [_gpu_conv_to_fftconv(node)]
gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid)
......
......@@ -119,6 +119,52 @@ class TestConv2dFFT(unittest.TestCase):
utt.assert_allclose(res_ref, res_fft)
def test_opt_nofft_valid(self):
inputs_shape = (5, 3, 7, 6)
filters_shape = (2, 3, 3, 3)
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
conv = theano.tensor.nnet.conv.conv2d(inputs, filters, version='no_fft')
mode = mode_with_gpu.including('conv_fft_valid')
f_ref = theano.function([], conv)
f_fft = theano.function([], conv, mode=mode)
# make sure we that no CuFFTOp has been inserted
topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 0
def test_opt_nofft_full(self):
inputs_shape = (5, 3, 7, 6)
filters_shape = (2, 3, 3, 3)
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
conv = theano.tensor.nnet.conv.conv2d(inputs, filters,
border_mode='full',
version='no_fft')
mode = mode_with_gpu.including('conv_fft_full')
f_ref = theano.function([], conv)
f_fft = theano.function([], conv, mode=mode)
# make sure we that no CuFFTOp has been inserted
topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 0
class TestConv3dFFT(unittest.TestCase):
......
......@@ -348,8 +348,9 @@ class ConvOp(OpenMPOp):
:type verbose: int
:param verbose: passed to GpuConv
:type version: int
:param version: passed to GpuConv
:type version: int or str
:param version: passed to GpuConv, if version='no_fft', fft
optimization will be desactivated at the op level.
The 3 following parameters are used internally when we generate
the gradient when dx!=1 or dy!=1.
......@@ -367,6 +368,13 @@ class ConvOp(OpenMPOp):
Set to False in the grad again the weight when the
output_mode is full.
"""
# Desactivate fft_optimization at the op level if specified
if version == "no_fft":
self.fft_opt = False
version = -1
else:
self.fft_opt = True
# We must continue to consider None as 1 for backward compatibility.
if dx is None:
dx = 1
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论