提交 e8017096 authored 作者: sentient07's avatar sentient07

Added outputs argument

上级 3510323b
...@@ -1414,7 +1414,7 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase): ...@@ -1414,7 +1414,7 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
AbstractConv2d_gradInputs]) AbstractConv2d_gradInputs])
@register_opt2([AbstractConv2d, AbstractConv2d_gradWeights, @register_opt2([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'fast_compile') AbstractConv2d_gradInputs], 'fast_compile')
def local_abstractconv_cudnn_graph(op, context_name, inputs): def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
if (not isinstance(op, (AbstractConv2d, if (not isinstance(op, (AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs))): AbstractConv2d_gradInputs))):
...@@ -1536,7 +1536,7 @@ def local_dnn_convi_output_merge(node, *inputs): ...@@ -1536,7 +1536,7 @@ def local_dnn_convi_output_merge(node, *inputs):
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
@op_lifter([Pool]) @op_lifter([Pool])
@register_opt2([Pool], 'fast_compile') @register_opt2([Pool], 'fast_compile')
def local_pool_dnn_alternative(op, ctx_name, inputs): def local_pool_dnn_alternative(op, ctx_name, inputs, outputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn() raise_no_cudnn()
if not op.ignore_border: if not op.ignore_border:
...@@ -1553,7 +1553,7 @@ def local_pool_dnn_alternative(op, ctx_name, inputs): ...@@ -1553,7 +1553,7 @@ def local_pool_dnn_alternative(op, ctx_name, inputs):
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
@op_lifter([MaxPoolGrad]) @op_lifter([MaxPoolGrad])
@register_opt2([MaxPoolGrad], 'fast_compile') @register_opt2([MaxPoolGrad], 'fast_compile')
def local_pool_dnn_grad_stride(op, ctx_name, inputs): def local_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn() raise_no_cudnn()
if not op.ignore_border: if not op.ignore_border:
...@@ -1578,7 +1578,7 @@ def local_pool_dnn_grad_stride(op, ctx_name, inputs): ...@@ -1578,7 +1578,7 @@ def local_pool_dnn_grad_stride(op, ctx_name, inputs):
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
@op_lifter([AveragePoolGrad]) @op_lifter([AveragePoolGrad])
@register_opt2([AveragePoolGrad], 'fast_compile') @register_opt2([AveragePoolGrad], 'fast_compile')
def local_avg_pool_dnn_grad_stride(op, ctx_name, inputs): def local_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn() raise_no_cudnn()
if not op.ignore_border: if not op.ignore_border:
...@@ -1632,7 +1632,7 @@ def local_log_softmax_dnn(node): ...@@ -1632,7 +1632,7 @@ def local_log_softmax_dnn(node):
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
@op_lifter([LogSoftmax]) @op_lifter([LogSoftmax])
@register_opt2([LogSoftmax], 'fast_compile') @register_opt2([LogSoftmax], 'fast_compile')
def local_logsoftmax_to_dnn(op, ctx_name, inputs): def local_logsoftmax_to_dnn(op, ctx_name, inputs, outputs):
# Transform the input in the format expected by GpuDnnSoftmax # Transform the input in the format expected by GpuDnnSoftmax
inp = inputs[0] inp = inputs[0]
if inp.ndim != 2: if inp.ndim != 2:
...@@ -1671,7 +1671,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn') ...@@ -1671,7 +1671,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
@op_lifter([SoftmaxGrad]) @op_lifter([SoftmaxGrad])
@register_opt2([SoftmaxGrad], 'cudnn', 'fast_compile') @register_opt2([SoftmaxGrad], 'cudnn', 'fast_compile')
def local_softmax_dnn_grad(op, ctx_name, inputs): def local_softmax_dnn_grad(op, ctx_name, inputs, outputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn("cuDNN needed for SoftmaxGrad") raise_no_cudnn("cuDNN needed for SoftmaxGrad")
ins = [] ins = []
......
...@@ -454,7 +454,7 @@ class GpuCumsum(GpuKernelBase, Op): ...@@ -454,7 +454,7 @@ class GpuCumsum(GpuKernelBase, Op):
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([CumsumOp]) @op_lifter([CumsumOp])
@register_opt2([CumsumOp], 'fast_compile') @register_opt2([CumsumOp], 'fast_compile')
def use_gpu_cumsumop(op, ctx_name, inputs): def use_gpu_cumsumop(op, ctx_name, inputs, outputs):
if inputs[0].dtype == 'float32': if inputs[0].dtype == 'float32':
axis = op.axis axis = op.axis
x = inputs[0] x = inputs[0]
......
...@@ -230,7 +230,7 @@ KERNEL void k_multi_warp_multinomial( ...@@ -230,7 +230,7 @@ KERNEL void k_multi_warp_multinomial(
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([theano.sandbox.multinomial.MultinomialFromUniform]) @op_lifter([theano.sandbox.multinomial.MultinomialFromUniform])
@register_opt2([theano.sandbox.multinomial.MultinomialFromUniform], 'fast_compile') @register_opt2([theano.sandbox.multinomial.MultinomialFromUniform], 'fast_compile')
def local_gpua_multinomial(op, context_name, inputs): def local_gpua_multinomial(op, context_name, inputs, outputs):
# TODO : need description for function # TODO : need description for function
if len(inputs) == 2: if len(inputs) == 2:
......
...@@ -150,7 +150,7 @@ if (GpuKernel_init(&k_%(name)s, c->ctx, 1, &bcode, &sz, ...@@ -150,7 +150,7 @@ if (GpuKernel_init(&k_%(name)s, c->ctx, 1, &bcode, &sz,
@opt.register_opt('fast_compile') @opt.register_opt('fast_compile')
@opt.op_lifter([tensor.Dot]) @opt.op_lifter([tensor.Dot])
@opt.register_opt2([tensor.Dot], 'fast_compile') @opt.register_opt2([tensor.Dot], 'fast_compile')
def local_dot_to_gemm16(op, ctx_name, inputs): def local_dot_to_gemm16(op, ctx_name, inputs, outputs):
if nerv is None: if nerv is None:
return return
A = inputs[0] A = inputs[0]
......
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论