提交 bd34bddf authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merge pull request #544 from nouiz/fix_composite_new_inputs

Fix composite new inputs
......@@ -29,7 +29,8 @@ New Features
(Frederic B.)
* debugprint does not print anymore the "|" symbol in a column after the last input. (Frederic B.)
* If you use Enthought Python Distribution (EPD) now we use its blas
implementation by default. (Frederic B.)
implementation by default (Tested Linux, Windows)
(Frederic B., Simon McGregor)
Sparse Sandbox graduate
* Remove0 op: it remove store element with value 0. (Frederic B.)
......@@ -54,6 +55,11 @@ Crash Fix
* Optimization print useless error when scipy is not available. (Frederic B.)
* Gpu conv crash/slowdown on newer hardware? (James B.)
* Better error handling in gpu conv (Frederic B.)
* GPU optimization that move element-wise op to the gpu. It happen in
a particular execution order of this optimization and the
element-wise fusion optimization when upcasting some inputs to
float32 (to compute them on the gpu).
(Frederic B., reported by Sander Dieleman)
=============
Release Notes
......
......@@ -20,7 +20,7 @@ from debugmode import DebugMode
from profilemode import ProfileMode
from theano.compile.sharedvalue import shared, shared_constructor, SharedVariable
from theano.compile.pfunc import pfunc, Param
from theano.compile.pfunc import pfunc, Param, rebuild_collect_shared
from function import function
......@@ -42,7 +42,7 @@ def rebuild_collect_shared(outputs,
inputs of the computational graph (or None)
:type replace: dict
:param replace: dictionary describing which subgraphs should be
replaced by what
replaced by what. orig_value => new_value
:type updates: dict
:param updates: dictionary describing updates expressions for shared
......
......@@ -165,6 +165,7 @@ class Apply(utils.object2):
:returns: an Apply instance with the same op but different outputs.
"""
assert isinstance(inputs, (list, tuple))
remake_node = False
new_inputs = inputs[:]
for i, (curr, new) in enumerate(zip(self.inputs, new_inputs)):
......
......@@ -241,6 +241,43 @@ def test_huge_elemwise_fusion():
f()
def test_local_gpu_elemwise_0():
"""
Test the test_local_gpu_elemwise_0 when there is dtype upcastable
to float32
"""
a = tensor.bmatrix()
b = tensor.fmatrix()
c = tensor.fmatrix()
a_v = (numpy.random.rand(4, 5) * 10).astype("int8")
b_v = (numpy.random.rand(4, 5) * 10).astype("float32")
c_v = (numpy.random.rand(4, 5) * 10).astype("float32")
# Due to order of optimization, this the composite is created when all
# the op are on the gpu.
f = theano.function([a, b, c], [a + b + c], mode=mode_with_gpu)
#theano.printing.debugprint(f)
topo = f.maker.env.toposort()
assert sum(isinstance(node.op, cuda.GpuElemwise) for node in topo) == 1
assert sum(isinstance(node.op, tensor.Elemwise) for node in topo) == 1
f(a_v, b_v, c_v)
# Not test with the composite already on the cpu before we move it
# to the gpu
a_s = theano.scalar.int8()
b_s = theano.scalar.float32()
c_s = theano.scalar.float32()
out_s = theano.scalar.Composite([a_s, b_s, c_s], [a_s + b_s + c_s])
out_op = tensor.Elemwise(out_s)
f = theano.function([a, b, c], [out_op(a, b, c)], mode=mode_with_gpu)
#theano.printing.debugprint(f)
topo = f.maker.env.toposort()
assert sum(isinstance(node.op, cuda.GpuElemwise) for node in topo) == 1
assert sum(isinstance(node.op, tensor.Elemwise) for node in topo) == 1
f(a_v, b_v, c_v)
def test_elemwise_fusion():
""" Test the the GpuElemwise fusion work correctly"""
shape = (3,4)
......
......@@ -2371,6 +2371,19 @@ class Composite(ScalarOp):
% (self.inputs_type, tuple(input_types)))
return self.outputs_type
def make_node(self, *inputs):
if (tuple([i.type for i in self.inputs]) ==
tuple([i.type for i in inputs])):
return super(Composite, self).make_node(*inputs)
else:
# Make a new op with the right input type.
res = theano.compile.rebuild_collect_shared(
self.outputs,
replace=dict(zip(self.inputs, inputs)),
rebuild_strict=False)
node = Composite(inputs, res[1]).make_node(*inputs)
return node
def perform(self, node, inputs, output_storage):
for storage, impl in zip(output_storage, self._impls):
storage[0] = impl(inputs)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论