提交 02bde7ea authored 作者: Frederic Bastien's avatar Frederic Bastien

Split local_abstractconv_cudnn into 3 opt to have better stats.

上级 74b4c807
......@@ -32,7 +32,7 @@ from .elemwise import GpuElemwise
# These don't exist in gpuarray
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
from .nnet import GpuSoftmax
from .opt import (gpu_seqopt, register_opt, conv_groupopt,
from .opt import (gpu_seqopt, register_opt,
op_lifter, register_opt2)
from .opt_util import alpha_merge, output_merge, inplace_allocempty
......@@ -1472,18 +1472,31 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
return [rval]
@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs])
@register_opt('fast_compile', 'conv_dnn', 'cudnn')
@local_optimizer([AbstractConv2d])
def local_abstractconv_cudnn(node):
ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType):
return
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
conv_groupopt.register('local_abstractconv_cudnn',
local_abstractconv_cudnn, 20,
'fast_compile', 'fast_run',
'gpuarray', 'conv_dnn', 'cudnn')
@register_opt('fast_compile', 'conv_dnn', 'cudnn')
@local_optimizer([AbstractConv2d_gradWeights])
def local_abstractconv_gw_cudnn(node):
ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType):
return
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@register_opt('fast_compile', 'conv_dnn', 'cudnn')
@local_optimizer([AbstractConv2d_gradInputs])
def local_abstractconv_gi_cudnn(node):
ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType):
return
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@inplace_allocempty(GpuDnnConv, 2)
......
......@@ -79,10 +79,6 @@ class GraphToGPUDB(DB):
gpu_seqopt = SequenceDB()
# Don't register this right now
conv_groupopt = LocalGroupDB()
conv_groupopt.__name__ = "gpua_conv_opts"
gpu_seqopt.register('gpuarray_graph_optimization', GraphToGPUDB(), -0.5,
'fast_compile', 'fast_run', 'gpuarray')
......@@ -1297,9 +1293,6 @@ def local_gpua_lift_abstractconv2d_graph(op, context_name, inputs, outputs):
context_name=context_name)
return [op(*inps)]
# Register this here so that it goes after the abstract lifting
register_opt('fast_compile')(conv_groupopt)
@register_opt("low_memory")
@local_optimizer([GpuCAReduceCuda])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论