提交 faa42f89 authored 作者: Frederic Bastien's avatar Frederic Bastien

added optimizer for T.eq and T.neq and test.

上级 a50c108b
...@@ -607,6 +607,30 @@ def local_subtensor_make_vector(node): ...@@ -607,6 +607,30 @@ def local_subtensor_make_vector(node):
_logger.error('failed to index with "%s"' % str(idx)) _logger.error('failed to index with "%s"' % str(idx))
raise raise
@register_canonicalize
@register_specialize
@gof.local_optimizer([T.Elemwise])
def local_useless_eq(node):
"""eq(x,x) -> 1
"""
if isinstance(node.op, T.Elemwise) and node.op.scalar_op == theano.scalar.eq and len(node.inputs)==2:
if node.inputs[0]==node.inputs[1]:
#it is the same var in the graph. That will always be true
return [T.fill(node.inputs[0], T.constant(1.0, dtype=node.outputs[0].type.dtype))]
@register_canonicalize
@register_specialize
@gof.local_optimizer([T.Elemwise])
def local_useless_neq(node):
"""neq(x,x) -> 0
"""
if isinstance(node.op, T.Elemwise) and node.op.scalar_op == theano.scalar.neq and len(node.inputs)==2:
if node.inputs[0]==node.inputs[1]:
#it is the same var in the graph. That will always be true
return [T.fill(node.inputs[0], T.constant(0.0, dtype=node.outputs[0].type.dtype))]
#TODO: the other optimization for and, or, xor, le and ge see ticket #496.
@register_specialize @register_specialize
@gof.local_optimizer([T.Elemwise]) @gof.local_optimizer([T.Elemwise])
def local_alloc_unary(node): def local_alloc_unary(node):
...@@ -749,7 +773,6 @@ def local_alloc_elemwise(node): ...@@ -749,7 +773,6 @@ def local_alloc_elemwise(node):
new[no_broad_idx]=assert_op new[no_broad_idx]=assert_op
return [node.op(*new)] return [node.op(*new)]
#TODO, T.eq if both input are the same, remove!
#TODO, global optimizer that lift the assert to the beginning of the graph. #TODO, global optimizer that lift the assert to the beginning of the graph.
#TODO, var.tag.shape to propagate the shape and lower the overhead of this op #TODO, var.tag.shape to propagate the shape and lower the overhead of this op
#TODO, when all inputs can be optimized do all except one #TODO, when all inputs can be optimized do all except one
...@@ -764,8 +787,12 @@ theano.configparser.AddConfigVar('experimental.local_alloc_elemwise_assert', ...@@ -764,8 +787,12 @@ theano.configparser.AddConfigVar('experimental.local_alloc_elemwise_assert',
theano.configparser.BoolParam(True), theano.configparser.BoolParam(True),
) )
if theano.config.experimental.local_alloc_elemwise: if theano.config.experimental.local_alloc_elemwise:
#enabled by default when the lifter of assert is done.
register_specialize(local_alloc_elemwise) register_specialize(local_alloc_elemwise)
else:
#don't register them in fast_run by default to have them disabled by default
#disable them by default as we are not sure it is always a good idea to replace an alloc with multiple op.
compile.optdb['specialize'].register("local_alloc_elemwise", local_alloc_elemwise)
############################ ############################
# Constant Canonicalization # Constant Canonicalization
......
...@@ -1210,6 +1210,46 @@ class T_Rebroadcast(unittest.TestCase): ...@@ -1210,6 +1210,46 @@ class T_Rebroadcast(unittest.TestCase):
assert len(rebroadcast_nodes) == 1 assert len(rebroadcast_nodes) == 1
assert rebroadcast_nodes[0].op.axis == {0: True} assert rebroadcast_nodes[0].op.axis == {0: True}
def test_local_useless_eq():
mode = theano.compile.get_default_mode().including('local_useless_eq')
x=T.dmatrix()
y=T.dmatrix()
f=theano.function([x,y],T.eq(x,y), mode=mode)
vx=numpy.random.rand(5,4)
vy=numpy.random.rand(5,4)
f(vx,vy)
topo = f.maker.env.toposort()
assert len(topo)==1
assert isinstance(topo[0].op,T.Elemwise)
assert isinstance(topo[0].op.scalar_op,theano.scalar.EQ)
f2=theano.function([x],T.eq(x,x), mode=mode)
assert numpy.all(f2(vx)==numpy.ones((5,4)))
topo2 = f2.maker.env.toposort()
print topo2
#Shape_i{1}(<TensorType(float64, matrix)>), Shape_i{0}(<TensorType(float64, matrix)>), Alloc([[1]], Shape_i{0}.0, Shape_i{1}.0
assert len(topo2)==3
assert isinstance(topo2[-1].op,T.Alloc)
def test_local_useless_neq():
mode = theano.compile.get_default_mode().including('local_useless_neq')
x=T.dmatrix()
y=T.dmatrix()
f=theano.function([x,y],T.neq(x,y), mode=mode)
vx=numpy.random.rand(5,4)
vy=numpy.random.rand(5,4)
f(vx,vy)
topo = f.maker.env.toposort()
assert len(topo)==1
assert isinstance(topo[0].op,T.Elemwise)
assert isinstance(topo[0].op.scalar_op,theano.scalar.NEQ)
f2=theano.function([x],T.neq(x,x), mode=mode)
assert numpy.all(f2(vx)==numpy.zeros((5,4)))
topo2 = f2.maker.env.toposort()
print topo2
assert len(topo2)==3
assert isinstance(topo2[-1].op,T.Alloc)
if __name__ == '__main__': if __name__ == '__main__':
# unittest.main() # unittest.main()
test_fusion().tes_memory_leak() test_fusion().tes_memory_leak()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论