Merge pull request #3392 from nouiz/aalmah-elemwise_opt

elemwise opt

Merge pull request #3392 from nouiz/aalmah-elemwise_opt
c13853ad · Frédéric Bastien · bae54705 · 7a79acaf · c13853ad · c13853ad
--- a/theano/scan_module/scan_opt.py
+++ b/theano/scan_module/scan_opt.py
@@ -89,7 +89,7 @@ _logger = logging.getLogger('theano.scan_module.scan_opt')
 list_opt_slice = [tensor.opt.local_abs_merge,
                  tensor.opt.local_mul_switch_sink,
                  tensor.opt.local_upcast_elemwise_constant_inputs,
-                  tensor.opt.local_remove_switch_const_cond,
+                  tensor.opt.local_useless_switch,
                  tensor.opt.constant_folding]



--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -1554,9 +1554,24 @@ def local_useless_elemwise(node):
    mul(x) -> x
    add(x) -> x
    identity(x) -> x
+    and(x,1) -> x
+    and(x,0) -> zeros_like(x)
+    or(x,0) -> x
+    or(x,1) -> ones_like(x)
+    xor(x,x) -> zeros_like(x)

    """
    if isinstance(node.op, T.Elemwise):
+        def zeros_like(node, in_idx):
+            # it is the same var in the graph. That will always be true
+            return [T.fill(node.inputs[in_idx],
+                           T.constant(0.0, dtype=node.outputs[0].type.dtype))]
+
+        def ones_like(node, in_idx):
+            # it is the same var in the graph. That will always be true
+            return [T.fill(node.inputs[in_idx],
+                           T.constant(1.0, dtype=node.outputs[0].type.dtype))]
+
        if node.op.scalar_op == theano.scalar.eq and len(node.inputs) == 2:
            if node.inputs[0] == node.inputs[1]:
                # it is the same var in the graph. That will always be true
@@ -1581,14 +1596,57 @@ def local_useless_elemwise(node):
        elif node.op.scalar_op == theano.scalar.mul and len(node.inputs) == 1:
            # No need to copy over any stack trace
            return [node.inputs[0]]
+
        elif node.op.scalar_op == theano.scalar.add and len(node.inputs) == 1:
            # No need to copy over any stack trace
            return [node.inputs[0]]
        elif (node.op.scalar_op == theano.scalar.identity and
              len(node.inputs) == 1):
-            # No need to copy over any stack trace
            return [node.inputs[0]]

+        elif (isinstance(node.op.scalar_op, scalar.AND) and
+              len(node.inputs) == 2):
+
+            if isinstance(node.inputs[0], T.TensorConstant):
+                const_val = T.extract_constant(node.inputs[0])
+                if not isinstance(const_val, Variable):
+                    if const_val == 0:
+                        return zeros_like(node, 1)
+                    else:
+                        return [node.inputs[1]]
+
+            if isinstance(node.inputs[1], T.TensorConstant):
+                const_val = T.extract_constant(node.inputs[1])
+                if not isinstance(const_val, Variable):
+                    if const_val == 0:
+                        return zeros_like(node, 0)
+                    else:
+                        return [node.inputs[0]]
+
+        elif (isinstance(node.op.scalar_op, scalar.OR) and
+              len(node.inputs) == 2):
+
+            if isinstance(node.inputs[0], T.TensorConstant):
+                const_val = T.extract_constant(node.inputs[0])
+                if not isinstance(const_val, Variable):
+                    if const_val == 0:
+                        return [node.inputs[1]]
+                    else:
+                        return ones_like(node, 1)
+
+            if isinstance(node.inputs[1], T.TensorConstant):
+                const_val = T.extract_constant(node.inputs[1])
+                if not isinstance(const_val, Variable):
+                    if const_val == 0:
+                        return [node.inputs[0]]
+                    else:
+                        return ones_like(node, 0)
+
+        elif (isinstance(node.op.scalar_op, scalar.XOR) and
+              len(node.inputs) == 2):
+            if node.inputs[0] is node.inputs[1]:
+                return zeros_like(node, 0)
+

 @register_specialize
 @gof.local_optimizer([T.Elemwise])
@@ -2389,7 +2447,7 @@ def merge_two_slices(slice1, len1, slice2, len2):
    """
    list_opt = [local_abs_merge, local_mul_switch_sink,
                local_upcast_elemwise_constant_inputs,
-                local_remove_switch_const_cond, constant_folding]
+                local_useless_switch, constant_folding]

    if type(slice1) is not slice:
        raise ValueError(('First provided slice should actually be of type'
@@ -2767,10 +2825,11 @@ def local_inplace_setsubtensor(node):

    """
    if isinstance(node.op, IncSubtensor) and not node.op.inplace:
+        dta = node.op.destroyhandler_tolerate_aliased
        new_op = node.op.__class__(
            node.op.idx_list, inplace=True,
            set_instead_of_inc=node.op.set_instead_of_inc,
-            destroyhandler_tolerate_aliased=node.op.destroyhandler_tolerate_aliased)
+            destroyhandler_tolerate_aliased=dta)
        new_node = new_op(*node.inputs)
        return [new_node]
    return False
@@ -3206,15 +3265,18 @@ def local_join_make_vector(node):
 # Switch opts #
 ###############

-@register_canonicalize
+@register_canonicalize('fast_compile', 'local_remove_switch_const_cond')
+@register_specialize
 @gof.local_optimizer([T.Elemwise])
-def local_remove_switch_const_cond(node):
+def local_useless_switch(node):
    """
    This optimization makes the following changes in the graph:
        T.switch(cond,left,right) -->
               if cond is constant and cond == 0: right
               if cond is constant and cond != 0: left
+               if left is right -> left

+        T.switch(le(shape_i{id}(X), 0), 0, shape_i{id}(X)) -> shape_i{id}(X)
    """
    if (isinstance(node.op, T.Elemwise) and
            isinstance(node.op.scalar_op, scalar.basic.Switch)):
@@ -3235,7 +3297,25 @@ def local_remove_switch_const_cond(node):
                out = T.alloc(out, *[node.outputs[0].shape[i] for i
                                     in xrange(out.ndim)])
            return [out]
+        # if left is right -> left
+        if node.inputs[1] is node.inputs[2]:
+            return [node.inputs[1]]

+        # This case happens with scan.
+        # Elemwise{switch}(le(shape_i{id}(X), 0), 0, shape_i{id}(X)) -> shape_i{id}(X)
+        left = node.inputs[1]
+        right = node.inputs[2]
+        cond_var = node.inputs[0]
+        if cond_var.owner and \
+           isinstance(cond_var.owner.op, T.Elemwise) and \
+           isinstance(cond_var.owner.op.scalar_op, scalar.LE) and \
+           cond_var.owner.inputs[0].owner and \
+           isinstance(cond_var.owner.inputs[0].owner.op, Shape_i) and \
+           T.extract_constant(cond_var.owner.inputs[1]) == 0 and \
+           T.extract_constant(left) == 0 and \
+           right is cond_var.owner.inputs[0]:
+            assert right.type == node.outputs[0].type
+            return [right]
        return False
    return False

@@ -4136,6 +4216,110 @@ def local_elemwise_sub_zeros(node):
        return [T.zeros_like(node.inputs[0])]


+@register_specialize
+@register_stabilize
+@register_canonicalize
+@gof.local_optimizer([T.Elemwise])
+def local_useless_elemwise_comparison(node):
+    """...
+
+    :note: These cases appear in the graph generated by scan.
+           These optimizations will make the graph easier to read.
+    # Comparing to itself is constant
+    Elemwise[{LT,GT}](X, X) -> Elemwise[zeros](X)
+    Elemwise[{LE,GE}](X, X) -> Elemwise[ones](X)
+    Elemwise[{minimum,maximum}](X, X) -> X
+
+    # Comparing shape to 0 can be constant
+    Elemwise[LT](X.shape[i], 0) -> Elemwise[zeros](X)
+    Elemwise[GE](X.shape[i], 0) -> Elemwise[ones](X)
+    Elemwise[maximum](X.shape[i], 0) -> X.shape[i]
+    Elemwise[maximum](0, X.shape[i]) -> X.shape[i]
+    Elemwise[minimum](X.shape[i], 0) -> 0
+    Elemwise[minimum](0, X.shape[i]) -> 0
+
+    # The shape can be replaced with sum of shapes
+    Elemwise[LT](add([anything that is shapes]), 0) -> Elemwise[zeros](X)
+    Elemwise[GE](add([anything that is shapes]), 0) -> Elemwise[ones](X)
+
+    """
+    if not isinstance(node.op, T.Elemwise):
+        return
+    if node.op.scalar_op.nin != 2:
+        return
+    # Elemwise[{LT,GT}](X, X) -> Elemwise[zeros](X)
+    if isinstance(node.op.scalar_op, (scalar.LT, scalar.GT)) and \
+       node.inputs[0] is node.inputs[1]:
+        return [T.zeros_like(node.inputs[0], dtype=node.outputs[0].dtype)]
+    # Elemwise[{LE,GE}](X, X) -> Elemwise[ones](X)
+    if isinstance(node.op.scalar_op, (scalar.LE, scalar.GE)) and \
+       node.inputs[0] is node.inputs[1]:
+        return [T.ones_like(node.inputs[0], dtype=node.outputs[0].dtype)]
+    # Elemwise[{minimum,maximum}](X, X) -> X
+    if isinstance(node.op.scalar_op, (scalar.Minimum, scalar.Maximum)) and \
+       node.inputs[0] is node.inputs[1]:
+        return [node.inputs[0]]
+
+    # Elemwise[LT](X.shape[i], 0) -> Elemwise[zeros](X)
+    if isinstance(node.op.scalar_op, scalar.LT) and \
+       node.inputs[0].owner and \
+       isinstance(node.inputs[0].owner.op, Shape_i) and \
+       T.extract_constant(node.inputs[1]) == 0:
+        return [T.zeros_like(node.inputs[0], dtype=node.outputs[0].dtype)]
+    # Elemwise[GE](X.shape[i], 0) -> Elemwise[ones](X)
+    if isinstance(node.op.scalar_op, scalar.GE) and \
+       node.inputs[0].owner and \
+       isinstance(node.inputs[0].owner.op, Shape_i) and \
+       T.extract_constant(node.inputs[1]) == 0:
+        return [T.ones_like(node.inputs[0], dtype=node.outputs[0].dtype)]
+    # Elemwise[maximum](X.shape[i], 0) -> X.shape[i]
+    if isinstance(node.op.scalar_op, scalar.Maximum) and \
+       node.inputs[0].owner and \
+       isinstance(node.inputs[0].owner.op, Shape_i) and \
+       T.extract_constant(node.inputs[1]) == 0:
+        return [node.inputs[0]]
+    # Elemwise[maximum](0, X.shape[i]) -> X.shape[i]
+    if isinstance(node.op.scalar_op, scalar.Maximum) and \
+       T.extract_constant(node.inputs[0]) == 0 and \
+       node.inputs[1].owner and \
+       isinstance(node.inputs[1].owner.op, Shape_i):
+        return [node.inputs[1]]
+    # Elemwise[minimum](X.shape[i], 0) -> 0
+    if isinstance(node.op.scalar_op, scalar.Minimum) and \
+       node.inputs[0].owner and \
+       isinstance(node.inputs[0].owner.op, Shape_i) and \
+       T.extract_constant(node.inputs[1]) == 0:
+        return [T.zeros_like(node.inputs[0], dtype=node.outputs[0].dtype)]
+    # Elemwise[minimum](0, X.shape[i]) -> 0
+    if isinstance(node.op.scalar_op, scalar.Minimum) and \
+       T.extract_constant(node.inputs[0]) == 0 and \
+       node.inputs[1].owner and \
+       isinstance(node.inputs[1].owner.op, Shape_i):
+        return [T.zeros_like(node.inputs[1], dtype=node.outputs[0].dtype)]
+
+    # Elemwise[LT](add([anything that is shapes]), 0) -> Elemwise[zeros](X)
+    if isinstance(node.op.scalar_op, scalar.LT) and \
+       node.inputs[0].owner and \
+       isinstance(node.inputs[0].owner.op, Elemwise) and \
+       isinstance(node.inputs[0].owner.op.scalar_op, scalar.Add) and \
+       all([isinstance(var.owner and var.owner.op, Shape_i)
+            for var in node.inputs[0].owner.inputs]) and \
+       T.extract_constant(node.inputs[1]) == 0:
+
+        return [T.zeros_like(node.inputs[0], dtype=node.outputs[0].dtype)]
+    # Elemwise[GE](add([anything that is shapes]), 0) -> Elemwise[ones](X)
+    if isinstance(node.op.scalar_op, scalar.GE) and \
+       node.inputs[0].owner and \
+       isinstance(node.inputs[0].owner.op, Elemwise) and \
+       isinstance(node.inputs[0].owner.op.scalar_op, scalar.Add) and \
+       all([isinstance(var.owner and var.owner.op, Shape_i)
+            for var in node.inputs[0].owner.inputs]) and \
+       T.extract_constant(node.inputs[1]) == 0:
+        return [T.ones_like(node.inputs[0], dtype=node.outputs[0].dtype)]
+
+    return
+
+
 @register_canonicalize
 @register_specialize
 @gof.local_optimizer([T.Sum, T.elemwise.Prod])

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -3135,6 +3135,201 @@ def test_local_fill_useless():
    assert T.Alloc in ops
    f(m_, x_)

+    
+class Test_local_useless_elemwise_comparison(unittest.TestCase):
+    def test_local_useless_elemwise_comparison(self):
+        # TODO: test each case individually.
+        # The following case is what made me discover those cases.
+        X = T.matrix('X')
+        Y = T.vector('Y')
+        X_sum, updates = theano.scan(fn=lambda x: x.sum(),
+                                     outputs_info=None,
+                                     sequences=[X],
+                                     non_sequences=None)
+        Z = X_sum + Y
+        theano.printing.debugprint(Z)
+        # here is the output for the debug print:
+        """
+        Elemwise{add,no_inplace} [@A] ''
+         |for{cpu,scan_fn} [@B] ''
+         | |Subtensor{int64} [@C] ''
+         | | |Shape [@D] ''
+         | | | |Subtensor{int64::} [@E] 'X[0:]'
+         | | |   |X [@F]
+         | | |   |Constant{0} [@G]
+         | | |Constant{0} [@H]
+         | |Subtensor{:int64:} [@I] ''
+         | | |Subtensor{int64::} [@E] 'X[0:]'
+         | | |ScalarFromTensor [@J] ''
+         | |   |Subtensor{int64} [@C] ''
+         | |Subtensor{int64} [@C] ''
+         |Y [@K]
+
+        Inner graphs of the scan ops:
+
+        for{cpu,scan_fn} [@B] ''
+         >Sum{acc_dtype=float64} [@L] ''
+         > |X[t] [@M] -> [@I]
+        """
+        
+        mode = theano.compile.get_default_mode().excluding('fusion')
+        f = theano.function([X, Y], Z, mode=mode)
+        theano.printing.debugprint(f, print_type=True)
+        # here is the output for the debug print:
+        """
+        Elemwise{Add}[(0, 0)] [@A] <TensorType(float64, vector)> ''   7
+         |for{cpu,scan_fn} [@B] <TensorType(float64, vector)> ''   6
+         | |Shape_i{0} [@C] <TensorType(int64, scalar)> ''   0
+         | | |X [@D] <TensorType(float64, matrix)>
+         | |Subtensor{int64:int64:int8} [@E] <TensorType(float64, matrix)> ''   5
+         | | |X [@D] <TensorType(float64, matrix)>
+         | | |ScalarFromTensor [@F] <int64> ''   4
+         | | | |Elemwise{switch,no_inplace} [@G] <TensorType(int64, scalar)> ''   3
+         | | |   |Elemwise{le,no_inplace} [@H] <TensorType(int8, scalar)> ''   2
+         | | |   | |Shape_i{0} [@C] <TensorType(int64, scalar)> ''   0
+         | | |   | |TensorConstant{0} [@I] <TensorType(int8, scalar)>
+         | | |   |TensorConstant{0} [@I] <TensorType(int8, scalar)>
+         | | |   |TensorConstant{0} [@J] <TensorType(int64, scalar)>
+         | | |ScalarFromTensor [@K] <int64> ''   1
+         | | | |Shape_i{0} [@C] <TensorType(int64, scalar)> ''   0
+         | | |Constant{1} [@L] <int8>
+         | |Shape_i{0} [@C] <TensorType(int64, scalar)> ''   0
+         |Y [@M] <TensorType(float64, vector)>
+
+        Inner graphs of the scan ops:
+
+        for{cpu,scan_fn} [@B] <TensorType(float64, vector)> ''
+         >Sum{acc_dtype=float64} [@N] <TensorType(float64, scalar)> ''
+         > |X[t] [@O] <TensorType(float64, vector)> -> [@E]
+        """
+
+    def assert_eqs_const(self, f, val):
+        topo = f.maker.fgraph.toposort()
+        elem = topo[0]
+        assert len(topo) == 1, topo
+        assert elem.op == deep_copy_op, elem.op
+        assert len(elem.inputs) == 1, elem.inputs
+        assert isinstance(elem.inputs[0], T.TensorConstant), elem
+        assert T.extract_constant(elem.inputs[0]) == val, val
+    
+    def assert_identity(self, f):
+        topo = f.maker.fgraph.toposort()
+        assert len(topo) == 1
+        assert topo[0].op == deep_copy_op
+        x_val = 10
+        assert f(x_val) == x_val
+
+    #def assert_returns
+
+    def test_inequality_with_self(self):
+        x = T.scalar('x', dtype=config.floatX)
+        mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison')
+        
+        f = theano.function([x], T.lt(x, x), mode=mode)
+        self.assert_eqs_const(f, 0)
+        
+        f = theano.function([x], T.le(x, x), mode=mode)
+        self.assert_eqs_const(f, 1)
+
+        f = theano.function([x], T.gt(x, x), mode=mode)
+        self.assert_eqs_const(f, 0)
+
+        f = theano.function([x], T.ge(x, x), mode=mode)
+        self.assert_eqs_const(f, 1)
+
+        f = theano.function([x], T.minimum(x, x), mode=mode)
+        self.assert_identity(f)
+
+        f = theano.function([x], T.maximum(x, x), mode=mode)
+        self.assert_identity(f)
+
+    def test_shape_inequality_with_self(self):
+        x = T.vector('x', dtype=config.floatX)
+        mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison',
+                                                           'local_shape_to_shape_i',
+                                                           'local_track_shape_i',
+                                                           'local_subtensor_make_vector')
+        f = theano.function([x], T.lt(x.shape[0], 0), mode=mode)
+        self.assert_eqs_const(f, 0)
+
+        f = theano.function([x], T.ge(x.shape[0], 0), mode=mode)
+        self.assert_eqs_const(f, 1)
+
+        f = theano.function([x], T.maximum(x.shape[0], 0), mode=mode)
+        topo = f.maker.fgraph.toposort()
+        assert len(topo) == 1
+        assert isinstance(topo[0].op, Shape_i), topo[0].op
+        x_val = numpy.ones(100, dtype=config.floatX)
+        assert f(x_val) == x_val.shape[0]
+
+        f = theano.function([x], T.maximum(0, x.shape[0]), mode=mode)
+        topo = f.maker.fgraph.toposort()
+        assert len(topo) == 1
+        assert isinstance(topo[0].op, Shape_i), topo[0].op
+        x_val = numpy.ones(100, dtype=config.floatX)
+        assert f(x_val) == x_val.shape[0]
+
+        f = theano.function([x], T.minimum(x.shape[0], 0), mode=mode)
+        self.assert_eqs_const(f, 0)
+
+        f = theano.function([x], T.minimum(0, x.shape[0]), mode=mode)
+        self.assert_eqs_const(f, 0)
+
+    def test_shape_add_inequality(self):
+        x = T.vector('x', dtype=config.floatX)
+        mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison',
+                                                           'local_shape_to_shape_i',
+                                                           'local_track_shape_i',
+                                                           'local_subtensor_make_vector')
+
+        y = T.vector('y', dtype=config.floatX)
+
+        f = theano.function([x, y], T.lt(x.shape[0]+y.shape[0], 0), mode=mode)
+        self.assert_eqs_const(f, 0)
+
+        f = theano.function([x, y], T.ge(x.shape[0]+y.shape[0], 0), mode=mode)
+        self.assert_eqs_const(f, 1)
+        
+    def test_and(self):
+        mode = theano.compile.get_default_mode().including('canonicalize')
+        
+        x = T.scalar('x', dtype='int8')
+
+        f = theano.function([x], T.and_(x, 0), mode=mode)
+        self.assert_eqs_const(f, 0)
+
+        f = theano.function([x], T.and_(0, x), mode=mode)
+        self.assert_eqs_const(f, 0)
+
+        f = theano.function([x], T.and_(x, 1), mode=mode)
+        self.assert_identity(f)
+
+        f = theano.function([x], T.and_(1, x), mode=mode)
+        self.assert_identity(f)
+
+    def test_or(self):
+        mode = theano.compile.get_default_mode().including('canonicalize')
+        x = T.scalar('x', dtype='int8')
+
+        f = theano.function([x], T.or_(x, 1), mode=mode)
+        self.assert_eqs_const(f, 1)
+
+        f = theano.function([x], T.or_(1, x), mode=mode)
+        self.assert_eqs_const(f, 1)
+
+        f = theano.function([x], T.or_(x, 0), mode=mode)
+        self.assert_identity(f)
+
+        f = theano.function([x], T.or_(0, x), mode=mode)
+        self.assert_identity(f)
+
+    def test_xor(self):
+        mode = theano.compile.get_default_mode().including('canonicalize')
+        x = T.scalar('x', dtype='int8')
+
+        f = theano.function([x], T.xor(x, x), mode=mode)
+        self.assert_eqs_const(f, 0)
+

 class Test_local_useless_alloc(unittest.TestCase):
    def setUp(self):
@@ -4446,6 +4641,53 @@ class test_local_remove_switch_const_cond(unittest.TestCase):
                vy = numpy.array([[7, 8, 9], [10, 11, 12]], dtype=dtype2)
                assert numpy.all(f(vx, vy) == vx)

+    def test_left_is_right(self):
+
+        for dtype1 in ['int32', 'int64']:
+            x = theano.tensor.matrix('x', dtype=dtype1)
+            varc = theano.tensor.matrix('varc', dtype=dtype1)
+            z1 = theano.tensor.switch(1, x, x)
+            z0 = theano.tensor.switch(0, x, x)
+            z2 = theano.tensor.switch(varc, x, x)
+            f1 = theano.function([x], z1, mode=self.mode)
+            f0 = theano.function([x], z0, mode=self.mode)
+            f2 = theano.function([x,varc], z2, mode=self.mode)
+
+            topo = f1.maker.fgraph.toposort()
+            assert len(topo) == 1
+            assert topo[0].op == deep_copy_op
+
+            topo = f0.maker.fgraph.toposort()
+            assert len(topo) == 1
+            assert topo[0].op == deep_copy_op
+
+            topo = f2.maker.fgraph.toposort()
+            assert len(topo) == 1
+            assert topo[0].op == deep_copy_op
+
+            vx = numpy.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
+            vc = numpy.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
+            assert numpy.all(f1(vx) == vx)
+            assert numpy.all(f0(vx) == vx)
+            assert numpy.all(f2(vx,vc) == vx)
+
+    def test_shape_le_0(self):
+
+        for dtype1 in ['float32', 'float64']:
+            x = theano.tensor.matrix('x', dtype=dtype1)
+            z0 = theano.tensor.switch(theano.tensor.le(x.shape[0], 0), 0, x.shape[0])
+            f0 = theano.function([x], z0, mode=self.mode)
+            assert isinstance(f0.maker.fgraph.toposort()[0].op, Shape_i)
+
+            z1 = theano.tensor.switch(theano.tensor.le(x.shape[1], 0), 0, x.shape[1])
+            f1 = theano.function([x], z1, mode=self.mode)
+            assert isinstance(f1.maker.fgraph.toposort()[0].op, Shape_i)
+
+            vx = numpy.random.randn(0,5).astype(dtype1)
+            assert f0(vx) == 0
+            assert f1(vx) == 5
+
+
    def test_broadcast1(self):
        # test switch(cst, matrix, row)
        x = theano.tensor.matrix('x', dtype='int32')