added optimizer for T.eq and T.neq and test.

faa42f89 · Frederic Bastien · a50c108b · faa42f89 · faa42f89
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -607,6 +607,30 @@ def local_subtensor_make_vector(node):
                    _logger.error('failed to index with "%s"' % str(idx))
                    raise
+@register_canonicalize
+@register_specialize
+@gof.local_optimizer([T.Elemwise])
+def local_useless_eq(node):
+    """eq(x,x) -> 1
+    """
+    if isinstance(node.op, T.Elemwise) and node.op.scalar_op == theano.scalar.eq and len(node.inputs)==2:
+        if node.inputs[0]==node.inputs[1]:
+            #it is the same var in the graph. That will always be true
+            return [T.fill(node.inputs[0], T.constant(1.0, dtype=node.outputs[0].type.dtype))]
+@register_canonicalize
+@register_specialize
+@gof.local_optimizer([T.Elemwise])
+def local_useless_neq(node):
+    """neq(x,x) -> 0
+    """
+    if isinstance(node.op, T.Elemwise) and node.op.scalar_op == theano.scalar.neq and len(node.inputs)==2:
+        if node.inputs[0]==node.inputs[1]:
+            #it is the same var in the graph. That will always be true
+            return [T.fill(node.inputs[0], T.constant(0.0, dtype=node.outputs[0].type.dtype))]
+#TODO: the other optimization for and, or, xor, le and ge see ticket #496.
 @register_specialize
 @gof.local_optimizer([T.Elemwise])
 def local_alloc_unary(node):
@@ -749,7 +773,6 @@ def local_alloc_elemwise(node):
    new[no_broad_idx]=assert_op
    return [node.op(*new)]
-#TODO, T.eq if both input are the same, remove!
 #TODO, global optimizer that lift the assert to the beginning of the graph.
 #TODO, var.tag.shape to propagate the shape and lower the overhead of this op
 #TODO, when all inputs can be optimized do all except one
@@ -764,8 +787,12 @@ theano.configparser.AddConfigVar('experimental.local_alloc_elemwise_assert',
        theano.configparser.BoolParam(True),
        )
 if theano.config.experimental.local_alloc_elemwise:
+    #enabled by default when the lifter of assert is done.
    register_specialize(local_alloc_elemwise)
+else:
+    #don't register them in fast_run by default to have them disabled by default
+    #disable them by default as we are not sure it is always a good idea to replace an alloc with multiple op.
+    compile.optdb['specialize'].register("local_alloc_elemwise", local_alloc_elemwise)
 ############################
 # Constant Canonicalization

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -1210,6 +1210,46 @@ class T_Rebroadcast(unittest.TestCase):
        assert len(rebroadcast_nodes) == 1
        assert rebroadcast_nodes[0].op.axis == {0: True}
+def test_local_useless_eq():
+    mode = theano.compile.get_default_mode().including('local_useless_eq')
+    x=T.dmatrix()
+    y=T.dmatrix()
+    f=theano.function([x,y],T.eq(x,y), mode=mode)
+    vx=numpy.random.rand(5,4)
+    vy=numpy.random.rand(5,4)
+    f(vx,vy)
+    topo = f.maker.env.toposort()
+    assert len(topo)==1
+    assert isinstance(topo[0].op,T.Elemwise)
+    assert isinstance(topo[0].op.scalar_op,theano.scalar.EQ)
+    f2=theano.function([x],T.eq(x,x), mode=mode)
+    assert numpy.all(f2(vx)==numpy.ones((5,4)))
+    topo2 = f2.maker.env.toposort()
+    print topo2
+    #Shape_i{1}(<TensorType(float64, matrix)>), Shape_i{0}(<TensorType(float64, matrix)>), Alloc([[1]], Shape_i{0}.0, Shape_i{1}.0
+    assert len(topo2)==3
+    assert isinstance(topo2[-1].op,T.Alloc)
+def test_local_useless_neq():
+    mode = theano.compile.get_default_mode().including('local_useless_neq')
+    x=T.dmatrix()
+    y=T.dmatrix()
+    f=theano.function([x,y],T.neq(x,y), mode=mode)
+    vx=numpy.random.rand(5,4)
+    vy=numpy.random.rand(5,4)
+    f(vx,vy)
+    topo = f.maker.env.toposort()
+    assert len(topo)==1
+    assert isinstance(topo[0].op,T.Elemwise)
+    assert isinstance(topo[0].op.scalar_op,theano.scalar.NEQ)
+    f2=theano.function([x],T.neq(x,x), mode=mode)
+    assert numpy.all(f2(vx)==numpy.zeros((5,4)))
+    topo2 = f2.maker.env.toposort()
+    print topo2
+    assert len(topo2)==3
+    assert isinstance(topo2[-1].op,T.Alloc)
 if __name__ == '__main__':
 #    unittest.main()
    test_fusion().tes_memory_leak()