提交 3f9d0601 authored 作者: sentient07's avatar sentient07

Changed op_lifter to accept Op and Inputs

上级 e45b6cd6
......@@ -1498,35 +1498,35 @@ def local_dnn_convi_output_merge(node, *inputs):
@register_opt('cudnn', 'fast_compile')
@op_lifter([Pool])
def local_pool_dnn_alternative(node, ctx_name):
def local_pool_dnn_alternative(op, ctx_name, inputs):
if not dnn_available(ctx_name):
raise_no_cudnn()
if not node.op.ignore_border:
if not op.ignore_border:
return
img, = node.inputs
img, = inputs
img = as_gpuarray_variable(img, ctx_name)
ds = node.op.ds
stride = node.op.st
pad = node.op.padding
mode = node.op.mode
ds = op.ds
stride = op.st
pad = op.padding
mode = op.mode
return dnn_pool(gpu_contiguous(img), ds, stride=stride, pad=pad, mode=mode)
@register_opt('cudnn', 'fast_compile')
@op_lifter([MaxPoolGrad])
def local_pool_dnn_grad_stride(node, ctx_name):
def local_pool_dnn_grad_stride(op, ctx_name, inputs):
if not dnn_available(ctx_name):
raise_no_cudnn()
if not node.op.ignore_border:
return
inp, out, out_grad = node.inputs
inp, out, out_grad = inputs
inp = as_gpuarray_variable(inp, ctx_name)
out = as_gpuarray_variable(out, ctx_name)
out_grad = as_gpuarray_variable(out_grad, ctx_name)
ds = node.op.ds
st = node.op.st
pad = node.op.padding
mode = node.op.mode
ds = op.ds
st = op.st
pad = op.padding
mode = op.mode
return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
gpu_contiguous(out),
......@@ -1538,18 +1538,18 @@ def local_pool_dnn_grad_stride(node, ctx_name):
@register_opt('cudnn', 'fast_compile')
@op_lifter([AveragePoolGrad])
def local_avg_pool_dnn_grad_stride(node, ctx_name):
def local_avg_pool_dnn_grad_stride(op, ctx_name, inputs):
if not dnn_available(ctx_name):
raise_no_cudnn()
if not node.op.ignore_border:
if not op.ignore_border:
return
inp, out_grad = node.inputs
inp, out_grad = inputs
inp = as_gpuarray_variable(inp, ctx_name)
out_grad = as_gpuarray_variable(out_grad, ctx_name)
ds = node.op.ds
st = node.op.st
pad = node.op.padding
mode = node.op.mode
ds = op.ds
st = op.st
pad = op.padding
mode = op.mode
cg = gpu_contiguous(out_grad)
......@@ -1591,9 +1591,9 @@ def local_log_softmax_dnn(node):
@register_opt('cudnn', 'fast_compile')
@op_lifter([LogSoftmax])
def local_logsoftmax_to_dnn(node, ctx_name):
def local_logsoftmax_to_dnn(op, ctx_name, inputs):
# Transform the input in the format expected by GpuDnnSoftmax
inp = node.inputs[0]
inp = inputs[0]
if inp.ndim != 2:
return
if not dnn_available(ctx_name) or version(raises=False) < 3000:
......@@ -1629,11 +1629,11 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@register_opt('cudnn', 'fast_compile')
@op_lifter([SoftmaxGrad])
def local_softmax_dnn_grad(node, ctx_name):
def local_softmax_dnn_grad(op, ctx_name, inputs):
if not dnn_available(ctx_name):
raise_no_cudnn("cuDNN needed for SoftmaxGrad")
ins = []
for n in node.inputs:
for n in inputs:
n = as_gpuarray_variable(n, ctx_name)
if n.ndim != 2:
return
......
......@@ -452,10 +452,10 @@ class GpuCumsum(GpuKernelBase, Op):
@op_lifter([CumsumOp])
def use_gpu_cumsumop(node, ctx_name):
if node.inputs[0].dtype == 'float32':
axis = node.op.axis
x = node.inputs[0]
def use_gpu_cumsumop(op, ctx_name, inputs):
if inputs[0].dtype == 'float32':
axis = op.axis
x = inputs[0]
if axis is not None and x.ndim > GpuCumsum.SUPPORTED_NDIMS:
return None
......
......@@ -229,21 +229,21 @@ KERNEL void k_multi_warp_multinomial(
@register_opt()
@op_lifter([theano.sandbox.multinomial.MultinomialFromUniform])
def local_gpua_multinomial(node, context_name):
def local_gpua_multinomial(op, context_name, inputs):
# TODO : need description for function
if len(node.inputs) == 2:
p, u = node.inputs
if len(inputs) == 2:
p, u = inputs
n_samples = 1
else:
p, u, n_samples = node.inputs
p, u, n_samples = inputs
try:
if get_scalar_constant_value(n_samples) != 1:
return None
except NotScalarConstantError:
return None
m, = node.outputs
m, = outputs
if (p.dtype == u.dtype == m.dtype == 'float32'):
gpu_op = GPUAMultinomialFromUniform(node.op.odtype)
gpu_op = GPUAMultinomialFromUniform(op.odtype)
return gpuarray.elemwise.GpuDimShuffle([False, False], [1, 0])(
gpu_op(p, u))
......@@ -469,8 +469,8 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
@op_lifter([Images2Neibs])
def use_gpu_images2neibs(node, context_name):
if node.op.mode in ['valid', 'ignore_borders', 'wrap_centered']:
return GpuImages2Neibs(node.op.mode)
def use_gpu_images2neibs(op, context_name, inputs):
if op.mode in ['valid', 'ignore_borders', 'wrap_centered']:
return GpuImages2Neibs(op.mode)
register_gpu_opt()(use_gpu_images2neibs)
......@@ -149,14 +149,14 @@ if (GpuKernel_init(&k_%(name)s, c->ctx, 1, &bcode, &sz,
@opt.register_opt()
@opt.op_lifter([tensor.Dot])
def local_dot_to_gemm16(node, ctx_name):
def local_dot_to_gemm16(op, ctx_name, inputs):
if nerv is None:
return
A = node.inputs[0]
B = node.inputs[1]
A = inputs[0]
B = inputs[1]
if (A.ndim == 2 and B.ndim == 2 and
A.dtype == 'float16' and B.dtype == 'float16'):
fgraph = node.inputs[0].fgraph
fgraph = inputs[0].fgraph
C = GpuAllocEmpty(dtype='float16', context_name=ctx_name)(
shape_i(A, 0, fgraph), shape_i(B, 1, fgraph))
return Gemm16()(C, 1.0, A, B, 0.0)
......
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论