提交 3c5c1506 authored 作者: sentient07's avatar sentient07

Added register_opt2 tags for other opts

上级 9dfbff30
......@@ -31,7 +31,7 @@ from .elemwise import GpuElemwise
# These don't exist in gpuarray
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
from .nnet import GpuSoftmax
from .opt import gpu_seqopt, register_opt, conv_groupopt, op_lifter
from .opt import gpu_seqopt, register_opt, conv_groupopt, op_lifter, register_opt2
from .opt_util import alpha_merge, output_merge, inplace_allocempty
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
......@@ -1498,6 +1498,7 @@ def local_dnn_convi_output_merge(node, *inputs):
@register_opt('cudnn', 'fast_compile')
@op_lifter([Pool])
@register_opt2([Pool], 'fast_compile')
def local_pool_dnn_alternative(op, ctx_name, inputs):
if not dnn_available(ctx_name):
raise_no_cudnn()
......@@ -1514,10 +1515,11 @@ def local_pool_dnn_alternative(op, ctx_name, inputs):
@register_opt('cudnn', 'fast_compile')
@op_lifter([MaxPoolGrad])
@register_opt2([MaxPoolGrad], 'fast_compile')
def local_pool_dnn_grad_stride(op, ctx_name, inputs):
if not dnn_available(ctx_name):
raise_no_cudnn()
if not node.op.ignore_border:
if not op.ignore_border:
return
inp, out, out_grad = inputs
inp = as_gpuarray_variable(inp, ctx_name)
......@@ -1538,6 +1540,7 @@ def local_pool_dnn_grad_stride(op, ctx_name, inputs):
@register_opt('cudnn', 'fast_compile')
@op_lifter([AveragePoolGrad])
@register_opt2([AveragePoolGrad], 'fast_compile')
def local_avg_pool_dnn_grad_stride(op, ctx_name, inputs):
if not dnn_available(ctx_name):
raise_no_cudnn()
......@@ -1591,6 +1594,7 @@ def local_log_softmax_dnn(node):
@register_opt('cudnn', 'fast_compile')
@op_lifter([LogSoftmax])
@register_opt2([LogSoftmax], 'fast_compile')
def local_logsoftmax_to_dnn(op, ctx_name, inputs):
# Transform the input in the format expected by GpuDnnSoftmax
inp = inputs[0]
......@@ -1629,6 +1633,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@register_opt('cudnn', 'fast_compile')
@op_lifter([SoftmaxGrad])
#@register_opt2([SoftmaxGrad], 'fast_compile')
def local_softmax_dnn_grad(op, ctx_name, inputs):
if not dnn_available(ctx_name):
raise_no_cudnn("cuDNN needed for SoftmaxGrad")
......
......@@ -10,7 +10,7 @@ except ImportError:
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel,
infer_context_name, GpuFromHost)
from .opt import register_opt as register_gpu_opt, op_lifter
from .opt import register_opt, op_lifter, register_opt2
class GpuCumsum(GpuKernelBase, Op):
......@@ -450,8 +450,9 @@ class GpuCumsum(GpuKernelBase, Op):
""" % locals()
return super(GpuCumsum, self).c_support_code_struct(node, nodename) + code
@register_opt('fast_compile')
@op_lifter([CumsumOp])
@register_opt2([CumsumOp], 'fast_compile')
def use_gpu_cumsumop(op, ctx_name, inputs):
if inputs[0].dtype == 'float32':
axis = op.axis
......@@ -471,4 +472,5 @@ def use_gpu_cumsumop(op, ctx_name, inputs):
return GpuCumsum(axis)(x)
register_gpu_opt()(use_gpu_cumsumop)
#register_opt('fast_compile')(use_gpu_cumsumop)
#register_opt2([CumsumOp], 'fast_compile')(use_gpu_cumsumop)
\ No newline at end of file
......@@ -14,7 +14,7 @@ from theano.gof import Op
from theano.tensor import NotScalarConstantError, get_scalar_constant_value
from theano import gpuarray
from .basic_ops import as_gpuarray_variable, infer_context_name
from .opt import register_opt, op_lifter
from .opt import register_opt, op_lifter, register_opt2
from .type import GpuArrayType
......@@ -227,8 +227,9 @@ KERNEL void k_multi_warp_multinomial(
return (1,)
@register_opt()
@register_opt('fast_compile')
@op_lifter([theano.sandbox.multinomial.MultinomialFromUniform])
@register_opt2([theano.sandbox.multinomial.MultinomialFromUniform], 'fast_compile')
def local_gpua_multinomial(op, context_name, inputs):
# TODO : need description for function
......@@ -242,6 +243,9 @@ def local_gpua_multinomial(op, context_name, inputs):
return None
except NotScalarConstantError:
return None
node = op.make_node(*inputs)
outputs = node.outputs
m, = outputs
if (p.dtype == u.dtype == m.dtype == 'float32'):
gpu_op = GPUAMultinomialFromUniform(op.odtype)
......
......@@ -147,8 +147,9 @@ if (GpuKernel_init(&k_%(name)s, c->ctx, 1, &bcode, &sz,
return '\n'.join(codel)
@opt.register_opt()
@opt.register_opt('fast_compile')
@opt.op_lifter([tensor.Dot])
@opt.register_opt2([tensor.Dot], 'fast_compile')
def local_dot_to_gemm16(op, ctx_name, inputs):
if nerv is None:
return
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论