提交 454eba0c authored 作者: Frederic Bastien's avatar Frederic Bastien

make Elemwise fusion work inplace

上级 7b8526b6
......@@ -1669,7 +1669,21 @@ class Composite(ScalarOp):
return "%s{%s}" % (self.__class__.__name__, ", ".join(
"%s=%s" % (k, v) for k, v in self.__dict__.items()
if k not in ["env","_c_code", "_cmodule_key", "_impls",
"_hashval"] ))
"_hashval", "inputs_type"] ))
def make_new_inplace(self, output_types_preference = None, name = None):
"""
This op.__init__ fct don't have the same parameter as other scalar op.
This break the insert_inplace_optimizer optimization.
This fct allow fix patch this.
"""
out = self.__class__(self.inputs,self.outputs)
if name:
out.name = name
else:
name = out.name
super(Composite,out).__init__(output_types_preference, name)
return out
def __init__(self, inputs, outputs):
self.inputs=copy(inputs)
......
......@@ -125,12 +125,18 @@ def insert_inplace_optimizer(env):
inplace_pattern = dict(baseline, **{candidate_output: candidate_input})
try:
new = Elemwise(
op.scalar_op.__class__(
if hasattr(op.scalar_op,"make_new_inplace"):
new_scal = op.scalar_op.make_new_inplace(
scalar.transfer_type(
*[inplace_pattern.get(i, None) \
for i in xrange(len(node.outputs))])),
inplace_pattern).make_node(*node.inputs)
for i in xrange(len(node.outputs))]))
else:
new_scal = op.scalar_op.__class__(
scalar.transfer_type(
*[inplace_pattern.get(i, None) \
for i in xrange(len(node.outputs))]))
new = Elemwise(new_scal,inplace_pattern).make_node(*node.inputs)
env.replace_all_validate(zip(node.outputs, new.outputs),
reason="insert_inplace_optimizer")
except (ValueError, TypeError, InconsistencyError), e:
......
......@@ -886,6 +886,20 @@ class test_fusion(unittest.TestCase):
print d
print "min", d.min(), "argmin", d.argmin(), "max", d.max(), "mean", d.mean(), "std", d.std()
def test_fusion_inplace(self):
mode=cp(compile.mode.get_default_mode())
#we need the optimisation enabled and the canonicalize.
#the canonicalize is needed to merge multiplication/addition by constant.
mode._optimizer=mode._optimizer.including('local_elemwise_fusion','canonicalize','inplace')
x, y, z = dmatrices('xyz')
f=theano.function([x,y,z],dot(x,y)+x+y+z,mode=mode)
topo = f.maker.env.toposort()
assert len(topo) == 2
assert f.maker.env.toposort()[-1].op.inplace_pattern
f(numpy.random.random((5,5)),numpy.random.random((5,5)),numpy.random.random((5,5)))
def speed_fusion_gpu(self):
import theano.sandbox.cuda as cuda
self.speed_fusion(shared_fn=tcn.float32_shared_constructor, gpu=True, s=slice(0,15))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论