提交 0c41c43b authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Change the way to enable the optimization from a theano flags to a registered, but disabled opt.

上级 5c61c434
......@@ -33,24 +33,6 @@ AddConfigVar('cublas.lib',
"""Name of the cuda blas library for the linker.""",
StrParam('cublas'))
AddConfigVar('enable_conv2d_fft',
"""
Use an fft implementation of conv2d on the GPU rather
than a direct one.
The current implementation cannot handle inputs that have
an odd number of columns in the images. You have to
pad/unpad manually if you want to use this
implementation. This also means that you need to make
sure all your filters have an odd number of columns if
doing stacked convolutions since otherwise you will end
up with odd images.
You can also call conv2d_fft() with pad=True
""",
BoolParam(False),
in_c_key=False)
#is_nvcc_available called here to initialize global vars in
#nvcc_compiler module
nvcc_compiler.is_nvcc_available()
......
......@@ -1120,17 +1120,26 @@ def local_gpu_conv(node):
return [out]
@register_opt()
@local_optimizer([GpuConv])
def local_conv_fft(node):
if (theano.config.enable_conv2d_fft and
isinstance(node.op, GpuConv) and
def local_conv_fft_valid(node):
if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'valid' and
node.op.subsample == (1, 1)):
return [conv2d_fft(node.inputs[0], node.inputs[1])]
import theano.tensor.signal.downsample as downsample
@local_optimizer([GpuConv])
def local_conv_fft_full(node):
if (isinstance(node.op, GpuConv) and
node.op.border_mode == 'full' and
node.op.subsample == (1, 1)):
return [conv2d_fft(node.inputs[0], node.inputs[1], border_mode='full')]
gpu_optimizer.register("local_conv_fft_valid", local_conv_fft_valid)
gpu_optimizer.register("local_conv_fft_full", local_conv_fft_full)
import theano.tensor.signal.downsample as downsample
@register_opt()
@local_optimizer([downsample.DownsampleFactorMax])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论