提交 9fe28989 authored 作者: Nicolas Ballas's avatar Nicolas Ballas

Add an option to desactivate conv2d_fft at the op level

上级 93be9cb8
......@@ -687,7 +687,8 @@ class GpuConv(GpuOp):
verbose=0,
kshp=None,
imshp=None,
max_threads_dim0=None):
max_threads_dim0=None,
fft_opt=True):
"""
:param version: each version of c_code implements many kernel for the
convolution. By default we try to guess the best one.
......@@ -706,6 +707,8 @@ class GpuConv(GpuOp):
:param max_threads_dim0: The maximum number of threads for the
block size dimensions 0 (blockDim.x) used by the
GPU function.
:param fft_opt: desactivate fft_opt optimization at the op level when
set to False.
"""
self.border_mode = border_mode
......@@ -730,6 +733,7 @@ class GpuConv(GpuOp):
self.kshp = kshp
self.imshp = imshp
self.max_threads_dim0 = max_threads_dim0
self.fft_opt = fft_opt
def __eq__(self, other):
return type(self) == type(other) \
......
......@@ -1143,6 +1143,7 @@ def local_gpu_conv(node):
version=op.version,
verbose=op.verbose,
imshp=op.imshp,
fft_opt=op.fft_opt
)
if op.imshp_logical is not None:
logical_img_hw = op.imshp_logical[1:3]
......@@ -1242,7 +1243,8 @@ def _gpu_conv_to_fftconv(node):
def local_conv_fft_valid(node):
if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'valid' and
node.op.subsample == (1, 1)):
node.op.subsample == (1, 1) and
node.op.fft_opt):
return [_gpu_conv_to_fftconv(node)]
......@@ -1250,7 +1252,8 @@ def local_conv_fft_valid(node):
def local_conv_fft_full(node):
if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'full' and
node.op.subsample == (1, 1)):
node.op.subsample == (1, 1) and
node.op.fft_opt):
return [_gpu_conv_to_fftconv(node)]
gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid)
......
......@@ -119,6 +119,52 @@ class TestConv2dFFT(unittest.TestCase):
utt.assert_allclose(res_ref, res_fft)
def test_opt_nofft_valid(self):
inputs_shape = (5, 3, 7, 6)
filters_shape = (2, 3, 3, 3)
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
conv = theano.tensor.nnet.conv.conv2d(inputs, filters, version='no_fft')
mode = mode_with_gpu.including('conv_fft_valid')
f_ref = theano.function([], conv)
f_fft = theano.function([], conv, mode=mode)
# make sure we inserted the fft trickery
topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 0
def test_opt_nofft_full(self):
inputs_shape = (5, 3, 7, 6)
filters_shape = (2, 3, 3, 3)
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
conv = theano.tensor.nnet.conv.conv2d(inputs, filters,
border_mode='full',
version='no_fft')
mode = mode_with_gpu.including('conv_fft_full')
f_ref = theano.function([], conv)
f_fft = theano.function([], conv, mode=mode)
# make sure we that no CuFFTOp has been inserted
topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 0
class TestConv3dFFT(unittest.TestCase):
......
......@@ -348,8 +348,9 @@ class ConvOp(OpenMPOp):
:type verbose: int
:param verbose: passed to GpuConv
:type version: int
:param version: passed to GpuConv
:type version: int or str
:param version: passed to GpuConv, if version='no_fft', fft
optimization will be desactivated the at the op level.
The 3 following parameters are used internally when we generate
the gradient when dx!=1 or dy!=1.
......@@ -367,6 +368,13 @@ class ConvOp(OpenMPOp):
Set to False in the grad again the weight when the
output_mode is full.
"""
# Desactivate fft_optimization at the op level if specified
if version == "no_fft":
self.fft_opt = False
version = -1
else:
self.fft_opt = True
# We must continue to consider None as 1 for backward compatibility.
if dx is None:
dx = 1
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论