提交 203255b3 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2044 from ballasn/conv2d_fft

Add an option to desactivate conv2d_fft at the op level
...@@ -36,6 +36,9 @@ TODO: Give examples for how to use these things! They are pretty complicated. ...@@ -36,6 +36,9 @@ TODO: Give examples for how to use these things! They are pretty complicated.
in your environement. This is not enabled by default because it in your environement. This is not enabled by default because it
has some restrictions on input and uses more memory. Also note has some restrictions on input and uses more memory. Also note
that it requires CUDA >= 5.0, scikits.cuda >= 0.5.0 and PyCUDA to run. that it requires CUDA >= 5.0, scikits.cuda >= 0.5.0 and PyCUDA to run.
To desactivate the fft optimization on a specific nnet.conv2d
while the optimization flags are active, you can set its parameters
version to 'no_fft'
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>` - :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`
3D Convolution. Doesn't work on the GPU. 3D Convolution. Doesn't work on the GPU.
- :func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>` - :func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>`
......
...@@ -687,7 +687,8 @@ class GpuConv(GpuOp): ...@@ -687,7 +687,8 @@ class GpuConv(GpuOp):
verbose=0, verbose=0,
kshp=None, kshp=None,
imshp=None, imshp=None,
max_threads_dim0=None): max_threads_dim0=None,
fft_opt=True):
""" """
:param version: each version of c_code implements many kernel for the :param version: each version of c_code implements many kernel for the
convolution. By default we try to guess the best one. convolution. By default we try to guess the best one.
...@@ -706,7 +707,11 @@ class GpuConv(GpuOp): ...@@ -706,7 +707,11 @@ class GpuConv(GpuOp):
:param max_threads_dim0: The maximum number of threads for the :param max_threads_dim0: The maximum number of threads for the
block size dimensions 0 (blockDim.x) used by the block size dimensions 0 (blockDim.x) used by the
GPU function. GPU function.
:param fft_opt: desactivate fft_opt optimization at the op level when
set to False. Note that by default fft optimization
aren't enabled. See
:ref:`convolution documentation <libdoc_tensor_nnet_conv>`
to enable them.
""" """
self.border_mode = border_mode self.border_mode = border_mode
self.subsample = subsample self.subsample = subsample
...@@ -730,6 +735,7 @@ class GpuConv(GpuOp): ...@@ -730,6 +735,7 @@ class GpuConv(GpuOp):
self.kshp = kshp self.kshp = kshp
self.imshp = imshp self.imshp = imshp
self.max_threads_dim0 = max_threads_dim0 self.max_threads_dim0 = max_threads_dim0
self.fft_opt = fft_opt
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) \ return type(self) == type(other) \
......
...@@ -1143,6 +1143,7 @@ def local_gpu_conv(node): ...@@ -1143,6 +1143,7 @@ def local_gpu_conv(node):
version=op.version, version=op.version,
verbose=op.verbose, verbose=op.verbose,
imshp=op.imshp, imshp=op.imshp,
fft_opt=op.fft_opt
) )
if op.imshp_logical is not None: if op.imshp_logical is not None:
logical_img_hw = op.imshp_logical[1:3] logical_img_hw = op.imshp_logical[1:3]
...@@ -1242,7 +1243,8 @@ def _gpu_conv_to_fftconv(node): ...@@ -1242,7 +1243,8 @@ def _gpu_conv_to_fftconv(node):
def local_conv_fft_valid(node): def local_conv_fft_valid(node):
if (isinstance(node.op, GpuConv) and if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'valid' and node.op.border_mode == 'valid' and
node.op.subsample == (1, 1)): node.op.subsample == (1, 1) and
node.op.fft_opt):
return [_gpu_conv_to_fftconv(node)] return [_gpu_conv_to_fftconv(node)]
...@@ -1250,7 +1252,8 @@ def local_conv_fft_valid(node): ...@@ -1250,7 +1252,8 @@ def local_conv_fft_valid(node):
def local_conv_fft_full(node): def local_conv_fft_full(node):
if (isinstance(node.op, GpuConv) and if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'full' and node.op.border_mode == 'full' and
node.op.subsample == (1, 1)): node.op.subsample == (1, 1) and
node.op.fft_opt):
return [_gpu_conv_to_fftconv(node)] return [_gpu_conv_to_fftconv(node)]
gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid) gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid)
......
...@@ -119,6 +119,52 @@ class TestConv2dFFT(unittest.TestCase): ...@@ -119,6 +119,52 @@ class TestConv2dFFT(unittest.TestCase):
utt.assert_allclose(res_ref, res_fft) utt.assert_allclose(res_ref, res_fft)
def test_opt_nofft_valid(self):
inputs_shape = (5, 3, 7, 6)
filters_shape = (2, 3, 3, 3)
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
conv = theano.tensor.nnet.conv.conv2d(inputs, filters, version='no_fft')
mode = mode_with_gpu.including('conv_fft_valid')
f_ref = theano.function([], conv)
f_fft = theano.function([], conv, mode=mode)
# make sure we that no CuFFTOp has been inserted
topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 0
def test_opt_nofft_full(self):
inputs_shape = (5, 3, 7, 6)
filters_shape = (2, 3, 3, 3)
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
conv = theano.tensor.nnet.conv.conv2d(inputs, filters,
border_mode='full',
version='no_fft')
mode = mode_with_gpu.including('conv_fft_full')
f_ref = theano.function([], conv)
f_fft = theano.function([], conv, mode=mode)
# make sure we that no CuFFTOp has been inserted
topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 0
class TestConv3dFFT(unittest.TestCase): class TestConv3dFFT(unittest.TestCase):
......
...@@ -348,8 +348,9 @@ class ConvOp(OpenMPOp): ...@@ -348,8 +348,9 @@ class ConvOp(OpenMPOp):
:type verbose: int :type verbose: int
:param verbose: passed to GpuConv :param verbose: passed to GpuConv
:type version: int :type version: int or str
:param version: passed to GpuConv :param version: passed to GpuConv, if version='no_fft', fft
optimization will be desactivated at the op level.
The 3 following parameters are used internally when we generate The 3 following parameters are used internally when we generate
the gradient when dx!=1 or dy!=1. the gradient when dx!=1 or dy!=1.
...@@ -367,6 +368,13 @@ class ConvOp(OpenMPOp): ...@@ -367,6 +368,13 @@ class ConvOp(OpenMPOp):
Set to False in the grad again the weight when the Set to False in the grad again the weight when the
output_mode is full. output_mode is full.
""" """
# Desactivate fft_optimization at the op level if specified
if version == "no_fft":
self.fft_opt = False
version = -1
else:
self.fft_opt = True
# We must continue to consider None as 1 for backward compatibility. # We must continue to consider None as 1 for backward compatibility.
if dx is None: if dx is None:
dx = 1 dx = 1
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论