提交 3c5c1506 authored 作者: sentient07's avatar sentient07

Added register_opt2 tags for other opts

上级 9dfbff30
...@@ -31,7 +31,7 @@ from .elemwise import GpuElemwise ...@@ -31,7 +31,7 @@ from .elemwise import GpuElemwise
# These don't exist in gpuarray # These don't exist in gpuarray
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad # GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
from .nnet import GpuSoftmax from .nnet import GpuSoftmax
from .opt import gpu_seqopt, register_opt, conv_groupopt, op_lifter from .opt import gpu_seqopt, register_opt, conv_groupopt, op_lifter, register_opt2
from .opt_util import alpha_merge, output_merge, inplace_allocempty from .opt_util import alpha_merge, output_merge, inplace_allocempty
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
...@@ -1498,6 +1498,7 @@ def local_dnn_convi_output_merge(node, *inputs): ...@@ -1498,6 +1498,7 @@ def local_dnn_convi_output_merge(node, *inputs):
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
@op_lifter([Pool]) @op_lifter([Pool])
@register_opt2([Pool], 'fast_compile')
def local_pool_dnn_alternative(op, ctx_name, inputs): def local_pool_dnn_alternative(op, ctx_name, inputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn() raise_no_cudnn()
...@@ -1514,10 +1515,11 @@ def local_pool_dnn_alternative(op, ctx_name, inputs): ...@@ -1514,10 +1515,11 @@ def local_pool_dnn_alternative(op, ctx_name, inputs):
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
@op_lifter([MaxPoolGrad]) @op_lifter([MaxPoolGrad])
@register_opt2([MaxPoolGrad], 'fast_compile')
def local_pool_dnn_grad_stride(op, ctx_name, inputs): def local_pool_dnn_grad_stride(op, ctx_name, inputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn() raise_no_cudnn()
if not node.op.ignore_border: if not op.ignore_border:
return return
inp, out, out_grad = inputs inp, out, out_grad = inputs
inp = as_gpuarray_variable(inp, ctx_name) inp = as_gpuarray_variable(inp, ctx_name)
...@@ -1538,6 +1540,7 @@ def local_pool_dnn_grad_stride(op, ctx_name, inputs): ...@@ -1538,6 +1540,7 @@ def local_pool_dnn_grad_stride(op, ctx_name, inputs):
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
@op_lifter([AveragePoolGrad]) @op_lifter([AveragePoolGrad])
@register_opt2([AveragePoolGrad], 'fast_compile')
def local_avg_pool_dnn_grad_stride(op, ctx_name, inputs): def local_avg_pool_dnn_grad_stride(op, ctx_name, inputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn() raise_no_cudnn()
...@@ -1591,6 +1594,7 @@ def local_log_softmax_dnn(node): ...@@ -1591,6 +1594,7 @@ def local_log_softmax_dnn(node):
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
@op_lifter([LogSoftmax]) @op_lifter([LogSoftmax])
@register_opt2([LogSoftmax], 'fast_compile')
def local_logsoftmax_to_dnn(op, ctx_name, inputs): def local_logsoftmax_to_dnn(op, ctx_name, inputs):
# Transform the input in the format expected by GpuDnnSoftmax # Transform the input in the format expected by GpuDnnSoftmax
inp = inputs[0] inp = inputs[0]
...@@ -1629,6 +1633,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn') ...@@ -1629,6 +1633,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
@op_lifter([SoftmaxGrad]) @op_lifter([SoftmaxGrad])
#@register_opt2([SoftmaxGrad], 'fast_compile')
def local_softmax_dnn_grad(op, ctx_name, inputs): def local_softmax_dnn_grad(op, ctx_name, inputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn("cuDNN needed for SoftmaxGrad") raise_no_cudnn("cuDNN needed for SoftmaxGrad")
......
...@@ -10,7 +10,7 @@ except ImportError: ...@@ -10,7 +10,7 @@ except ImportError:
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel,
infer_context_name, GpuFromHost) infer_context_name, GpuFromHost)
from .opt import register_opt as register_gpu_opt, op_lifter from .opt import register_opt, op_lifter, register_opt2
class GpuCumsum(GpuKernelBase, Op): class GpuCumsum(GpuKernelBase, Op):
...@@ -450,8 +450,9 @@ class GpuCumsum(GpuKernelBase, Op): ...@@ -450,8 +450,9 @@ class GpuCumsum(GpuKernelBase, Op):
""" % locals() """ % locals()
return super(GpuCumsum, self).c_support_code_struct(node, nodename) + code return super(GpuCumsum, self).c_support_code_struct(node, nodename) + code
@register_opt('fast_compile')
@op_lifter([CumsumOp]) @op_lifter([CumsumOp])
@register_opt2([CumsumOp], 'fast_compile')
def use_gpu_cumsumop(op, ctx_name, inputs): def use_gpu_cumsumop(op, ctx_name, inputs):
if inputs[0].dtype == 'float32': if inputs[0].dtype == 'float32':
axis = op.axis axis = op.axis
...@@ -471,4 +472,5 @@ def use_gpu_cumsumop(op, ctx_name, inputs): ...@@ -471,4 +472,5 @@ def use_gpu_cumsumop(op, ctx_name, inputs):
return GpuCumsum(axis)(x) return GpuCumsum(axis)(x)
register_gpu_opt()(use_gpu_cumsumop) #register_opt('fast_compile')(use_gpu_cumsumop)
#register_opt2([CumsumOp], 'fast_compile')(use_gpu_cumsumop)
\ No newline at end of file
...@@ -14,7 +14,7 @@ from theano.gof import Op ...@@ -14,7 +14,7 @@ from theano.gof import Op
from theano.tensor import NotScalarConstantError, get_scalar_constant_value from theano.tensor import NotScalarConstantError, get_scalar_constant_value
from theano import gpuarray from theano import gpuarray
from .basic_ops import as_gpuarray_variable, infer_context_name from .basic_ops import as_gpuarray_variable, infer_context_name
from .opt import register_opt, op_lifter from .opt import register_opt, op_lifter, register_opt2
from .type import GpuArrayType from .type import GpuArrayType
...@@ -227,8 +227,9 @@ KERNEL void k_multi_warp_multinomial( ...@@ -227,8 +227,9 @@ KERNEL void k_multi_warp_multinomial(
return (1,) return (1,)
@register_opt() @register_opt('fast_compile')
@op_lifter([theano.sandbox.multinomial.MultinomialFromUniform]) @op_lifter([theano.sandbox.multinomial.MultinomialFromUniform])
@register_opt2([theano.sandbox.multinomial.MultinomialFromUniform], 'fast_compile')
def local_gpua_multinomial(op, context_name, inputs): def local_gpua_multinomial(op, context_name, inputs):
# TODO : need description for function # TODO : need description for function
...@@ -242,6 +243,9 @@ def local_gpua_multinomial(op, context_name, inputs): ...@@ -242,6 +243,9 @@ def local_gpua_multinomial(op, context_name, inputs):
return None return None
except NotScalarConstantError: except NotScalarConstantError:
return None return None
node = op.make_node(*inputs)
outputs = node.outputs
m, = outputs m, = outputs
if (p.dtype == u.dtype == m.dtype == 'float32'): if (p.dtype == u.dtype == m.dtype == 'float32'):
gpu_op = GPUAMultinomialFromUniform(op.odtype) gpu_op = GPUAMultinomialFromUniform(op.odtype)
......
...@@ -147,8 +147,9 @@ if (GpuKernel_init(&k_%(name)s, c->ctx, 1, &bcode, &sz, ...@@ -147,8 +147,9 @@ if (GpuKernel_init(&k_%(name)s, c->ctx, 1, &bcode, &sz,
return '\n'.join(codel) return '\n'.join(codel)
@opt.register_opt() @opt.register_opt('fast_compile')
@opt.op_lifter([tensor.Dot]) @opt.op_lifter([tensor.Dot])
@opt.register_opt2([tensor.Dot], 'fast_compile')
def local_dot_to_gemm16(op, ctx_name, inputs): def local_dot_to_gemm16(op, ctx_name, inputs):
if nerv is None: if nerv is None:
return return
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论