提交 8dfe6847 authored 作者: sentient07's avatar sentient07

changed Flatten CPU Op with GPU Op

上级 6c611b5e
...@@ -8,7 +8,7 @@ try: ...@@ -8,7 +8,7 @@ try:
except ImportError: except ImportError:
pass pass
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel) from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, GpuReshape)
from .opt import register_opt, op_lifter, register_opt2 from .opt import register_opt, op_lifter, register_opt2
...@@ -454,21 +454,18 @@ class GpuCumsum(GpuKernelBase, Op): ...@@ -454,21 +454,18 @@ class GpuCumsum(GpuKernelBase, Op):
@register_opt2([CumsumOp], 'fast_compile') @register_opt2([CumsumOp], 'fast_compile')
def use_gpu_cumsumop(op, ctx_name, inputs, outputs): def use_gpu_cumsumop(op, ctx_name, inputs, outputs):
if inputs[0].dtype == 'float32': if inputs[0].dtype == 'float32':
if isinstance(inputs[0].type, GpuArrayType):
return
axis = op.axis axis = op.axis
x = inputs[0] x = inputs[0]
if axis is not None and x.ndim > GpuCumsum.SUPPORTED_NDIMS: if axis is not None and x.ndim > GpuCumsum.SUPPORTED_NDIMS:
return None return None
if axis is None and x.ndim > 1:
x = x.flatten()
x = as_gpuarray_variable(x, ctx_name) x = as_gpuarray_variable(x, ctx_name)
if axis is None and x.ndim > 1:
x = GpuReshape(1)(x, (-1,))
# ``gpu_cumsum`` assume array has been flattened if needed. # ``gpu_cumsum`` assume array has been flattened if needed.
if axis is None: if axis is None:
axis = 0 axis = 0
assert isinstance(x.type, GpuArrayType)
return GpuCumsum(axis)(x) return GpuCumsum(axis)(x)
...@@ -612,8 +612,8 @@ def local_gpua_alloc_empty_to_zeros(node): ...@@ -612,8 +612,8 @@ def local_gpua_alloc_empty_to_zeros(node):
if isinstance(node.op, GpuAllocEmpty): if isinstance(node.op, GpuAllocEmpty):
context_name = infer_context_name(*node.inputs) context_name = infer_context_name(*node.inputs)
z = numpy.asarray(0, dtype=node.outputs[0].dtype) z = numpy.asarray(0, dtype=node.outputs[0].dtype)
return [gpu_alloc(None)(as_gpuarray_variable(z, context_name), return [gpu_alloc(context_name)(as_gpuarray_variable(z, context_name),
*node.inputs)] *node.inputs)]
optdb.register('local_gpua_alloc_empty_to_zeros', optdb.register('local_gpua_alloc_empty_to_zeros',
theano.tensor.opt.in2out(local_gpua_alloc_empty_to_zeros), theano.tensor.opt.in2out(local_gpua_alloc_empty_to_zeros),
# After move to gpu and merge2, before inplace. # After move to gpu and merge2, before inplace.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论