提交 b4f4a23b authored 作者: Frederic Bastien's avatar Frederic Bastien

small opt to gpu speed up.

上级 c7f2dd05
......@@ -192,7 +192,7 @@ class InputToGpuOptimizer(Optimizer):
# This happen frequently as we do 2 pass of the gpu optimizations
if (len(input.clients) == 1 and
(input.clients[0][0] == 'output' or
input.clients[0][0].op == gpu_from_host)):
isinstance(input.clients[0][0].op, GpuFromHost))):
continue
try:
......@@ -215,7 +215,7 @@ gpu_seqopt.register('InputToGpuOptimizer', InputToGpuOptimizer(),
'merge') # TODO: how to make it mandatory for gpu_seqopt?
@local_optimizer([gpu_from_host, host_from_gpu])
@local_optimizer([GpuFromHost, HostFromGpu])
def local_cut_gpu_host_gpu(node):
if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu):
return [node.inputs[0].owner.inputs[0]]
......@@ -336,7 +336,7 @@ def local_gpu_elemwise_0(node):
@register_opt()
@local_optimizer([gpu_from_host])
@local_optimizer([GpuFromHost])
def local_gpu_elemwise_1(node):
"""
gpu_from_host(Elemwise)) -> GpuElemwise(gpu_from_host(...))
......@@ -392,7 +392,7 @@ def local_gpu_split(node):
@register_opt()
@local_optimizer([tensor.DimShuffle, gpu_from_host])
@local_optimizer([tensor.DimShuffle, GpuFromHost])
def local_gpu_dimshuffle_0(node):
"""
dimshuffle(host_from_gpu()) -> host_from_gpu(gpu_dimshuffle)
......@@ -421,7 +421,7 @@ def local_gpu_dimshuffle_0(node):
@register_opt()
@local_optimizer([tensor.SpecifyShape, gpu_from_host])
@local_optimizer([tensor.SpecifyShape, GpuFromHost])
def local_gpu_specifyShape_0(node):
"""
specify_shape(host_from_gpu()) -> host_from_gpu(specify_shape)
......@@ -445,7 +445,7 @@ def local_gpu_specifyShape_0(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.basic.Dot])
@local_optimizer([GpuFromHost, tensor.basic.Dot])
def local_gpu_dot_to_dot22(node):
"""
gpu_from_host(dot) -> gpudot(gpu_from_host)
......@@ -537,7 +537,7 @@ optdb.register('gpu_assert_no_cpu_op', assert_no_cpu_op, 49.2,
@register_opt()
@local_optimizer([theano.ifelse.IfElse, gpu_from_host])
@local_optimizer([theano.ifelse.IfElse, GpuFromHost])
def local_gpu_lazy_ifelse(node):
"""
gpu_from_host(ifelse) -> gpu_ifelse(gpu_from_host)
......@@ -606,7 +606,7 @@ def local_gpu_lazy_ifelse(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.blas.Dot22])
@local_optimizer([GpuFromHost, tensor.blas.Dot22])
def local_gpu_dot22(node):
"""
gpu_from_host(dot22) -> gpudot(gpu_from_host)
......@@ -631,7 +631,7 @@ def local_gpu_dot22(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.blas.BatchedDot])
@local_optimizer([GpuFromHost, tensor.blas.BatchedDot])
def local_gpu_batched_dot(node):
"""
gpu_from_host(batched_dot) -> gpu_batched_dot(gpu_from_host)
......@@ -670,7 +670,7 @@ def local_gpu_batched_dot(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.blas.Dot22Scalar])
@local_optimizer([GpuFromHost, tensor.blas.Dot22Scalar])
def local_gpu_dot22scalar(node):
"""
gpu_from_host(dot22scalar) -> gpudot(gpu_from_host)
......@@ -699,7 +699,7 @@ def local_gpu_dot22scalar(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.blas_c.CGemv, tensor.blas.Gemv])
@local_optimizer([GpuFromHost, tensor.blas_c.CGemv, tensor.blas.Gemv])
def local_gpu_gemv(node):
"""
gpu_from_host(gemv) -> gpu_gemv(gpu_from_host)
......@@ -737,7 +737,7 @@ def local_gpu_gemv(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.blas_c.CGer, tensor.blas.Ger,
@local_optimizer([GpuFromHost, tensor.blas_c.CGer, tensor.blas.Ger,
tensor.blas_scipy.ScipyGer])
def local_gpu_ger(node):
"""
......@@ -777,7 +777,7 @@ def local_gpu_ger(node):
@register_opt()
@local_optimizer([tensor.blas.Gemm, gpu_from_host])
@local_optimizer([tensor.blas.Gemm, GpuFromHost])
def local_gpu_gemm(node):
"""
gpu_from_host(gemm) -> gpu_gemm(gpu_from_host)
......@@ -966,7 +966,7 @@ def local_gpu_elemwise_careduce(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.Reshape])
@local_optimizer([GpuFromHost, tensor.Reshape])
def local_gpu_reshape(node):
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
......@@ -999,7 +999,7 @@ def local_gpu_reshape(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.Flatten])
@local_optimizer([GpuFromHost, tensor.Flatten])
def local_gpu_flatten(node):
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
......@@ -1019,7 +1019,7 @@ def local_gpu_flatten(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.Subtensor])
@local_optimizer([GpuFromHost, tensor.Subtensor])
def local_gpu_subtensor(node):
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
......@@ -1062,7 +1062,7 @@ def local_gpu_subtensor(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.AdvancedSubtensor1])
@local_optimizer([GpuFromHost, tensor.AdvancedSubtensor1])
def local_gpu_advanced_subtensor1(node):
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
......@@ -1083,7 +1083,7 @@ def local_gpu_advanced_subtensor1(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.AdvancedIncSubtensor1])
@local_optimizer([GpuFromHost, tensor.AdvancedIncSubtensor1])
def local_gpu_advanced_incsubtensor1(node):
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
......@@ -1153,7 +1153,7 @@ def local_gpu_advanced_incsubtensor1(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.IncSubtensor])
@local_optimizer([GpuFromHost, tensor.IncSubtensor])
def local_gpu_incsubtensor(node):
if isinstance(node.op, GpuFromHost):
host_output = node.inputs[0]
......@@ -1463,7 +1463,7 @@ def values_eq_approx_high_tol(a, b):
return CudaNdarrayType.values_eq_approx(a, b, atol=atol)
@local_optimizer([gpu_from_host, conv.ConvOp])
@local_optimizer([GpuFromHost, conv.ConvOp])
def local_gpu_conv(node):
"""
gpu_from_host(conv) -> gpu_conv(gpu_from_host)
......@@ -2309,7 +2309,7 @@ def local_gpu_contiguous(node):
@register_opt()
@local_optimizer([gpu_from_host, tensor.Eye])
@local_optimizer([GpuFromHost, tensor.Eye])
def local_gpu_eye(node):
"""
gpu_from_host(eye) -> gpueye(gpu_from_host)
......@@ -2438,7 +2438,7 @@ def typeConstructor(broadcastable, dtype):
@register_opt('scan')
@local_optimizer([gpu_from_host, scan_op.Scan])
@local_optimizer([GpuFromHost, scan_op.Scan])
def gpuScanOptimization(node):
"""
scan(host_from_gpu) -> host_from_gpu(GPUscan)
......@@ -2560,7 +2560,7 @@ def gpuScanOptimization(node):
@register_opt()
@local_optimizer([tensor.AllocEmpty, gpu_from_host])
@local_optimizer([tensor.AllocEmpty, GpuFromHost])
def local_gpu_allocempty(node):
if (isinstance(node.op, tensor.AllocEmpty) and
node.op.dtype == "float32"):
......@@ -2727,7 +2727,7 @@ optdb.register('local_inplace_gpu_sparse_block_outer',
# Move to Gpu optimization
@local_optimizer([gpu_from_host,
@local_optimizer([GpuFromHost,
AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论