提交 a579c1eb authored 作者: Frederic's avatar Frederic 提交者: Arnaud Bergeron

Make opt not crash with multi output CPU elemwise

上级 6319e9dc
...@@ -279,7 +279,8 @@ def local_gpu_elemwise_0(node): ...@@ -279,7 +279,8 @@ def local_gpu_elemwise_0(node):
# TODO: change this when fusion makes Elemwise with # TODO: change this when fusion makes Elemwise with
# multiple outputs # multiple outputs
gpu_elemwise = new_op(*(gpu_from_host(i) gpu_elemwise = new_op(*(gpu_from_host(i)
for i in node.inputs)) for i in node.inputs),
return_list=True)
# case 2 - it is still ok if some inputs were upcast to float32 # case 2 - it is still ok if some inputs were upcast to float32
elif all([i.type.dtype in upcastable elif all([i.type.dtype in upcastable
for i in node.inputs]): for i in node.inputs]):
...@@ -292,18 +293,19 @@ def local_gpu_elemwise_0(node): ...@@ -292,18 +293,19 @@ def local_gpu_elemwise_0(node):
new_inputs = [gpu_from_host(tensor.cast(i, 'float32')) new_inputs = [gpu_from_host(tensor.cast(i, 'float32'))
for i in node.inputs] for i in node.inputs]
gpu_elemwise = new_op(*new_inputs) gpu_elemwise = new_op(*new_inputs, return_list=True)
else: else:
return False return False
else: else:
return False return False
gpu_elemwise = split_huge_add_or_mul(gpu_elemwise.owner) gpu_elemwise = split_huge_add_or_mul(gpu_elemwise[0].owner)
if not gpu_elemwise: if not gpu_elemwise:
return False return False
if max_inputs_to_GpuElemwise(node) < len(gpu_elemwise.inputs): if (max_inputs_to_GpuElemwise(node) <
len(gpu_elemwise.inputs)):
return False return False
return [host_from_gpu(gpu_elemwise.outputs[0])] return [host_from_gpu(out) for out in gpu_elemwise.outputs]
@register_opt() @register_opt()
......
...@@ -618,6 +618,15 @@ def test_local_gpu_elemwise_0(): ...@@ -618,6 +618,15 @@ def test_local_gpu_elemwise_0():
assert sum(isinstance(node.op, tensor.Elemwise) for node in topo) == 1 assert sum(isinstance(node.op, tensor.Elemwise) for node in topo) == 1
f(a_v, b_v, c_v) f(a_v, b_v, c_v)
# Test multiple output
out_s = theano.scalar.Composite([a_s, b_s, c_s], [a_s + b_s, a_s * b_s])
outs_op = tensor.Elemwise(out_s)
f = theano.function([a, b, c], outs_op(a, b, c), mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert sum(isinstance(node.op, cuda.GpuElemwise) for node in topo) == 1
assert sum(isinstance(node.op, tensor.Elemwise) for node in topo) == 1
f(a_v, b_v, c_v)
def test_elemwise_fusion(): def test_elemwise_fusion():
""" Test the the GpuElemwise fusion work correctly""" """ Test the the GpuElemwise fusion work correctly"""
......
...@@ -255,3 +255,46 @@ def test_local_gpu_subtensor(): ...@@ -255,3 +255,46 @@ def test_local_gpu_subtensor():
assert any([type(node.op) is tensor.Subtensor for node in topo]) assert any([type(node.op) is tensor.Subtensor for node in topo])
assert not any([isinstance(node.op, GpuSubtensor) for node in topo]) assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
assert any([isinstance(node.op, GpuElemwise) for node in topo]) assert any([isinstance(node.op, GpuElemwise) for node in topo])
def test_local_gpu_elemwise():
"""
Test local_gpu_elemwise_0 when there is a dtype upcastable to float32
"""
a = tensor.bmatrix()
b = tensor.fmatrix()
c = tensor.fmatrix()
a_v = (numpy.random.rand(4, 5) * 10).astype("int8")
b_v = (numpy.random.rand(4, 5) * 10).astype("float32")
c_v = (numpy.random.rand(4, 5) * 10).astype("float32")
# Due to optimization order, this composite is created when all
# the op are on the gpu.
f = theano.function([a, b, c], [a + b + c], mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert sum(isinstance(node.op, GpuElemwise) for node in topo) == 1
assert sum(type(node.op) == tensor.Elemwise for node in topo) == 0
f(a_v, b_v, c_v)
# Now test with the composite already on the cpu before we move it
# to the gpu
a_s = theano.scalar.int8()
b_s = theano.scalar.float32()
c_s = theano.scalar.float32()
out_s = theano.scalar.Composite([a_s, b_s, c_s], [a_s + b_s + c_s])
out_op = tensor.Elemwise(out_s)
f = theano.function([a, b, c], [out_op(a, b, c)], mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert sum(isinstance(node.op, GpuElemwise) for node in topo) == 1
assert sum(type(node.op) == tensor.Elemwise for node in topo) == 0
f(a_v, b_v, c_v)
# Test multiple output
out_s = theano.scalar.Composite([a_s, b_s, c_s], [a_s + b_s, a_s * b_s])
outs_op = tensor.Elemwise(out_s)
f = theano.function([a, b, c], outs_op(a, b, c), mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert sum(isinstance(node.op, GpuElemwise) for node in topo) == 1
assert sum(type(node.op) == tensor.Elemwise for node in topo) == 0
f(a_v, b_v, c_v)
...@@ -296,6 +296,10 @@ def inplace_elemwise_optimizer_op(OP): ...@@ -296,6 +296,10 @@ def inplace_elemwise_optimizer_op(OP):
# gpuarray GpuElemwise inherit from Elemwise # gpuarray GpuElemwise inherit from Elemwise
if not type(op) == OP: if not type(op) == OP:
continue continue
# TODO support this case
if len(node.outputs) > 1:
return
baseline = op.inplace_pattern baseline = op.inplace_pattern
protected_inputs = [ protected_inputs = [
f.protected for f in node.fgraph._features if f.protected for f in node.fgraph._features if
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论