提交 ee3c3f08 authored 作者: Nicolas Ballas's avatar Nicolas Ballas

update optimization tags

上级 eefa42ac
......@@ -2784,22 +2784,22 @@ def local_abstractconv_gradinputs_gemm(node):
# which ones take precedence over others.
abstractconv_groupopt = theano.gof.optdb.LocalGroupDB()
abstractconv_groupopt.__name__ = "gpu_abstractconv_opts"
register_specialize_device()(abstractconv_groupopt)
register_specialize_device()(abstractconv_groupopt, 'gpu', 'fast_compile')
# cuDNN is first, but only registered if cuDNN is available.
conv_groupopt.register('local_abstractconv_dnn', dnn.local_abstractconv_cudnn, 20,
'conv_dnn',
'gpu_opt', 'cudnn')
'gpu', 'fast_compile', 'fast_run', 'cudnn')
# The GEMM-based convolution comes last to catch all remaining cases.
# It can be disabled by excluding 'conv_gemm'.
conv_groupopt.register('local_abstractconv_gemm', local_abstractconv_gemm, 30,
'conv_gemm',
'gpu_opt')
'gpu', 'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv_gradweight_gemm',
local_abstractconv_gradweight_gemm, 30,
'conv_gemm',
'fast_compile', 'fast_run')
'gpu', 'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv_gradinputs_gemm',
local_abstractconv_gradinputs_gemm, 30,
'conv_gemm',
'gpu_opt')
'gpu', 'fast_compile', 'fast_run')
......@@ -516,4 +516,4 @@ def local_conv2d_gradinputs_cpu(node):
din = din(topgrad, filters)
din = patternbroadcast(din, node.outputs[0].broadcastable)
return [din]
register_specialize_device(local_conv2d_gradinputs_cpu)
register_specialize_device(local_conv2d_gradinputs_cpu, 'fast_compile')
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论