提交 648438e7 authored 作者: Frederic's avatar Frederic

crash fix:GPU optimization that move element-wise op to the gpu.

It happen in a particular execution order of this optimization and the element-wise fusion optimization when upcasting some inputs to float32 (to compute them on the gpu). (Frederic B., reported by Sander Dieleman)
上级 65eeaaec
...@@ -54,6 +54,11 @@ Crash Fix ...@@ -54,6 +54,11 @@ Crash Fix
* Optimization print useless error when scipy is not available. (Frederic B.) * Optimization print useless error when scipy is not available. (Frederic B.)
* Gpu conv crash/slowdown on newer hardware? (James B.) * Gpu conv crash/slowdown on newer hardware? (James B.)
* Better error handling in gpu conv (Frederic B.) * Better error handling in gpu conv (Frederic B.)
* GPU optimization that move element-wise op to the gpu. It happen in
a particular execution order of this optimization and the
element-wise fusion optimization when upcasting some inputs to
float32 (to compute them on the gpu).
(Frederic B., reported by Sander Dieleman)
============= =============
Release Notes Release Notes
......
...@@ -241,6 +241,43 @@ def test_huge_elemwise_fusion(): ...@@ -241,6 +241,43 @@ def test_huge_elemwise_fusion():
f() f()
def test_local_gpu_elemwise_0():
"""
Test the test_local_gpu_elemwise_0 when there is dtype upcastable
to float32
"""
a = tensor.bmatrix()
b = tensor.fmatrix()
c = tensor.fmatrix()
a_v = (numpy.random.rand(4, 5) * 10).astype("int8")
b_v = (numpy.random.rand(4, 5) * 10).astype("float32")
c_v = (numpy.random.rand(4, 5) * 10).astype("float32")
# Due to order of optimization, this the composite is created when all
# the op are on the gpu.
f = theano.function([a, b, c], [a + b + c], mode=mode_with_gpu)
#theano.printing.debugprint(f)
topo = f.maker.env.toposort()
assert sum(isinstance(node.op, cuda.GpuElemwise) for node in topo) == 1
assert sum(isinstance(node.op, tensor.Elemwise) for node in topo) == 1
f(a_v, b_v, c_v)
# Not test with the composite already on the cpu before we move it
# to the gpu
a_s = theano.scalar.int8()
b_s = theano.scalar.float32()
c_s = theano.scalar.float32()
out_s = theano.scalar.Composite([a_s, b_s, c_s], [a_s + b_s + c_s])
out_op = tensor.Elemwise(out_s)
f = theano.function([a, b, c], [out_op(a, b, c)], mode=mode_with_gpu)
#theano.printing.debugprint(f)
topo = f.maker.env.toposort()
assert sum(isinstance(node.op, cuda.GpuElemwise) for node in topo) == 1
assert sum(isinstance(node.op, tensor.Elemwise) for node in topo) == 1
f(a_v, b_v, c_v)
def test_elemwise_fusion(): def test_elemwise_fusion():
""" Test the the GpuElemwise fusion work correctly""" """ Test the the GpuElemwise fusion work correctly"""
shape = (3,4) shape = (3,4)
......
...@@ -2371,6 +2371,19 @@ class Composite(ScalarOp): ...@@ -2371,6 +2371,19 @@ class Composite(ScalarOp):
% (self.inputs_type, tuple(input_types))) % (self.inputs_type, tuple(input_types)))
return self.outputs_type return self.outputs_type
def make_node(self, *inputs):
if (tuple([i.type for i in self.inputs]) ==
tuple([i.type for i in inputs])):
return super(Composite, self).make_node(*inputs)
else:
# Make a new op with the right input type.
res = theano.compile.rebuild_collect_shared(
self.outputs,
replace=dict(zip(self.inputs, inputs)),
rebuild_strict=False)
node = Composite(inputs, res[1]).make_node(*inputs)
return node
def perform(self, node, inputs, output_storage): def perform(self, node, inputs, output_storage):
for storage, impl in zip(output_storage, self._impls): for storage, impl in zip(output_storage, self._impls):
storage[0] = impl(inputs) storage[0] = impl(inputs)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论