提交 b4f4a23b authored 作者: Frederic Bastien's avatar Frederic Bastien

small opt to gpu speed up.

上级 c7f2dd05
...@@ -192,7 +192,7 @@ class InputToGpuOptimizer(Optimizer): ...@@ -192,7 +192,7 @@ class InputToGpuOptimizer(Optimizer):
# This happen frequently as we do 2 pass of the gpu optimizations # This happen frequently as we do 2 pass of the gpu optimizations
if (len(input.clients) == 1 and if (len(input.clients) == 1 and
(input.clients[0][0] == 'output' or (input.clients[0][0] == 'output' or
input.clients[0][0].op == gpu_from_host)): isinstance(input.clients[0][0].op, GpuFromHost))):
continue continue
try: try:
...@@ -215,7 +215,7 @@ gpu_seqopt.register('InputToGpuOptimizer', InputToGpuOptimizer(), ...@@ -215,7 +215,7 @@ gpu_seqopt.register('InputToGpuOptimizer', InputToGpuOptimizer(),
'merge') # TODO: how to make it mandatory for gpu_seqopt? 'merge') # TODO: how to make it mandatory for gpu_seqopt?
@local_optimizer([gpu_from_host, host_from_gpu]) @local_optimizer([GpuFromHost, HostFromGpu])
def local_cut_gpu_host_gpu(node): def local_cut_gpu_host_gpu(node):
if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu): if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu):
return [node.inputs[0].owner.inputs[0]] return [node.inputs[0].owner.inputs[0]]
...@@ -336,7 +336,7 @@ def local_gpu_elemwise_0(node): ...@@ -336,7 +336,7 @@ def local_gpu_elemwise_0(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host]) @local_optimizer([GpuFromHost])
def local_gpu_elemwise_1(node): def local_gpu_elemwise_1(node):
""" """
gpu_from_host(Elemwise)) -> GpuElemwise(gpu_from_host(...)) gpu_from_host(Elemwise)) -> GpuElemwise(gpu_from_host(...))
...@@ -392,7 +392,7 @@ def local_gpu_split(node): ...@@ -392,7 +392,7 @@ def local_gpu_split(node):
@register_opt() @register_opt()
@local_optimizer([tensor.DimShuffle, gpu_from_host]) @local_optimizer([tensor.DimShuffle, GpuFromHost])
def local_gpu_dimshuffle_0(node): def local_gpu_dimshuffle_0(node):
""" """
dimshuffle(host_from_gpu()) -> host_from_gpu(gpu_dimshuffle) dimshuffle(host_from_gpu()) -> host_from_gpu(gpu_dimshuffle)
...@@ -421,7 +421,7 @@ def local_gpu_dimshuffle_0(node): ...@@ -421,7 +421,7 @@ def local_gpu_dimshuffle_0(node):
@register_opt() @register_opt()
@local_optimizer([tensor.SpecifyShape, gpu_from_host]) @local_optimizer([tensor.SpecifyShape, GpuFromHost])
def local_gpu_specifyShape_0(node): def local_gpu_specifyShape_0(node):
""" """
specify_shape(host_from_gpu()) -> host_from_gpu(specify_shape) specify_shape(host_from_gpu()) -> host_from_gpu(specify_shape)
...@@ -445,7 +445,7 @@ def local_gpu_specifyShape_0(node): ...@@ -445,7 +445,7 @@ def local_gpu_specifyShape_0(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.basic.Dot]) @local_optimizer([GpuFromHost, tensor.basic.Dot])
def local_gpu_dot_to_dot22(node): def local_gpu_dot_to_dot22(node):
""" """
gpu_from_host(dot) -> gpudot(gpu_from_host) gpu_from_host(dot) -> gpudot(gpu_from_host)
...@@ -537,7 +537,7 @@ optdb.register('gpu_assert_no_cpu_op', assert_no_cpu_op, 49.2, ...@@ -537,7 +537,7 @@ optdb.register('gpu_assert_no_cpu_op', assert_no_cpu_op, 49.2,
@register_opt() @register_opt()
@local_optimizer([theano.ifelse.IfElse, gpu_from_host]) @local_optimizer([theano.ifelse.IfElse, GpuFromHost])
def local_gpu_lazy_ifelse(node): def local_gpu_lazy_ifelse(node):
""" """
gpu_from_host(ifelse) -> gpu_ifelse(gpu_from_host) gpu_from_host(ifelse) -> gpu_ifelse(gpu_from_host)
...@@ -606,7 +606,7 @@ def local_gpu_lazy_ifelse(node): ...@@ -606,7 +606,7 @@ def local_gpu_lazy_ifelse(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.blas.Dot22]) @local_optimizer([GpuFromHost, tensor.blas.Dot22])
def local_gpu_dot22(node): def local_gpu_dot22(node):
""" """
gpu_from_host(dot22) -> gpudot(gpu_from_host) gpu_from_host(dot22) -> gpudot(gpu_from_host)
...@@ -631,7 +631,7 @@ def local_gpu_dot22(node): ...@@ -631,7 +631,7 @@ def local_gpu_dot22(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.blas.BatchedDot]) @local_optimizer([GpuFromHost, tensor.blas.BatchedDot])
def local_gpu_batched_dot(node): def local_gpu_batched_dot(node):
""" """
gpu_from_host(batched_dot) -> gpu_batched_dot(gpu_from_host) gpu_from_host(batched_dot) -> gpu_batched_dot(gpu_from_host)
...@@ -670,7 +670,7 @@ def local_gpu_batched_dot(node): ...@@ -670,7 +670,7 @@ def local_gpu_batched_dot(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.blas.Dot22Scalar]) @local_optimizer([GpuFromHost, tensor.blas.Dot22Scalar])
def local_gpu_dot22scalar(node): def local_gpu_dot22scalar(node):
""" """
gpu_from_host(dot22scalar) -> gpudot(gpu_from_host) gpu_from_host(dot22scalar) -> gpudot(gpu_from_host)
...@@ -699,7 +699,7 @@ def local_gpu_dot22scalar(node): ...@@ -699,7 +699,7 @@ def local_gpu_dot22scalar(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.blas_c.CGemv, tensor.blas.Gemv]) @local_optimizer([GpuFromHost, tensor.blas_c.CGemv, tensor.blas.Gemv])
def local_gpu_gemv(node): def local_gpu_gemv(node):
""" """
gpu_from_host(gemv) -> gpu_gemv(gpu_from_host) gpu_from_host(gemv) -> gpu_gemv(gpu_from_host)
...@@ -737,7 +737,7 @@ def local_gpu_gemv(node): ...@@ -737,7 +737,7 @@ def local_gpu_gemv(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.blas_c.CGer, tensor.blas.Ger, @local_optimizer([GpuFromHost, tensor.blas_c.CGer, tensor.blas.Ger,
tensor.blas_scipy.ScipyGer]) tensor.blas_scipy.ScipyGer])
def local_gpu_ger(node): def local_gpu_ger(node):
""" """
...@@ -777,7 +777,7 @@ def local_gpu_ger(node): ...@@ -777,7 +777,7 @@ def local_gpu_ger(node):
@register_opt() @register_opt()
@local_optimizer([tensor.blas.Gemm, gpu_from_host]) @local_optimizer([tensor.blas.Gemm, GpuFromHost])
def local_gpu_gemm(node): def local_gpu_gemm(node):
""" """
gpu_from_host(gemm) -> gpu_gemm(gpu_from_host) gpu_from_host(gemm) -> gpu_gemm(gpu_from_host)
...@@ -966,7 +966,7 @@ def local_gpu_elemwise_careduce(node): ...@@ -966,7 +966,7 @@ def local_gpu_elemwise_careduce(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.Reshape]) @local_optimizer([GpuFromHost, tensor.Reshape])
def local_gpu_reshape(node): def local_gpu_reshape(node):
if isinstance(node.op, GpuFromHost): if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
...@@ -999,7 +999,7 @@ def local_gpu_reshape(node): ...@@ -999,7 +999,7 @@ def local_gpu_reshape(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.Flatten]) @local_optimizer([GpuFromHost, tensor.Flatten])
def local_gpu_flatten(node): def local_gpu_flatten(node):
if isinstance(node.op, GpuFromHost): if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
...@@ -1019,7 +1019,7 @@ def local_gpu_flatten(node): ...@@ -1019,7 +1019,7 @@ def local_gpu_flatten(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.Subtensor]) @local_optimizer([GpuFromHost, tensor.Subtensor])
def local_gpu_subtensor(node): def local_gpu_subtensor(node):
if isinstance(node.op, GpuFromHost): if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
...@@ -1062,7 +1062,7 @@ def local_gpu_subtensor(node): ...@@ -1062,7 +1062,7 @@ def local_gpu_subtensor(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.AdvancedSubtensor1]) @local_optimizer([GpuFromHost, tensor.AdvancedSubtensor1])
def local_gpu_advanced_subtensor1(node): def local_gpu_advanced_subtensor1(node):
if isinstance(node.op, GpuFromHost): if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
...@@ -1083,7 +1083,7 @@ def local_gpu_advanced_subtensor1(node): ...@@ -1083,7 +1083,7 @@ def local_gpu_advanced_subtensor1(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.AdvancedIncSubtensor1]) @local_optimizer([GpuFromHost, tensor.AdvancedIncSubtensor1])
def local_gpu_advanced_incsubtensor1(node): def local_gpu_advanced_incsubtensor1(node):
if isinstance(node.op, GpuFromHost): if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
...@@ -1153,7 +1153,7 @@ def local_gpu_advanced_incsubtensor1(node): ...@@ -1153,7 +1153,7 @@ def local_gpu_advanced_incsubtensor1(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.IncSubtensor]) @local_optimizer([GpuFromHost, tensor.IncSubtensor])
def local_gpu_incsubtensor(node): def local_gpu_incsubtensor(node):
if isinstance(node.op, GpuFromHost): if isinstance(node.op, GpuFromHost):
host_output = node.inputs[0] host_output = node.inputs[0]
...@@ -1463,7 +1463,7 @@ def values_eq_approx_high_tol(a, b): ...@@ -1463,7 +1463,7 @@ def values_eq_approx_high_tol(a, b):
return CudaNdarrayType.values_eq_approx(a, b, atol=atol) return CudaNdarrayType.values_eq_approx(a, b, atol=atol)
@local_optimizer([gpu_from_host, conv.ConvOp]) @local_optimizer([GpuFromHost, conv.ConvOp])
def local_gpu_conv(node): def local_gpu_conv(node):
""" """
gpu_from_host(conv) -> gpu_conv(gpu_from_host) gpu_from_host(conv) -> gpu_conv(gpu_from_host)
...@@ -2309,7 +2309,7 @@ def local_gpu_contiguous(node): ...@@ -2309,7 +2309,7 @@ def local_gpu_contiguous(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.Eye]) @local_optimizer([GpuFromHost, tensor.Eye])
def local_gpu_eye(node): def local_gpu_eye(node):
""" """
gpu_from_host(eye) -> gpueye(gpu_from_host) gpu_from_host(eye) -> gpueye(gpu_from_host)
...@@ -2438,7 +2438,7 @@ def typeConstructor(broadcastable, dtype): ...@@ -2438,7 +2438,7 @@ def typeConstructor(broadcastable, dtype):
@register_opt('scan') @register_opt('scan')
@local_optimizer([gpu_from_host, scan_op.Scan]) @local_optimizer([GpuFromHost, scan_op.Scan])
def gpuScanOptimization(node): def gpuScanOptimization(node):
""" """
scan(host_from_gpu) -> host_from_gpu(GPUscan) scan(host_from_gpu) -> host_from_gpu(GPUscan)
...@@ -2560,7 +2560,7 @@ def gpuScanOptimization(node): ...@@ -2560,7 +2560,7 @@ def gpuScanOptimization(node):
@register_opt() @register_opt()
@local_optimizer([tensor.AllocEmpty, gpu_from_host]) @local_optimizer([tensor.AllocEmpty, GpuFromHost])
def local_gpu_allocempty(node): def local_gpu_allocempty(node):
if (isinstance(node.op, tensor.AllocEmpty) and if (isinstance(node.op, tensor.AllocEmpty) and
node.op.dtype == "float32"): node.op.dtype == "float32"):
...@@ -2727,7 +2727,7 @@ optdb.register('local_inplace_gpu_sparse_block_outer', ...@@ -2727,7 +2727,7 @@ optdb.register('local_inplace_gpu_sparse_block_outer',
# Move to Gpu optimization # Move to Gpu optimization
@local_optimizer([gpu_from_host, @local_optimizer([GpuFromHost,
AbstractConv2d, AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs, AbstractConv2d_gradInputs,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论