Merge pull request #2852 from kelvinxu/prod_opts

Prod opts [WIP]

Merge pull request #2852 from kelvinxu/prod_opts
c9d69119 · abergeron · 2dced45c · 85c369e1 · c9d69119 · c9d69119
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -3849,17 +3849,22 @@ register_canonicalize(local_neg_to_mul)


 @register_specialize
-@gof.local_optimizer([T.Sum])
-def local_sum_mul_by_scalar(node):
+@gof.local_optimizer([T.Sum, T.elemwise.Prod])
+def local_sum_prod_mul_by_scalar(node):
    """sum(scalar * smth) -> scalar * sum(smth)
       sum(-smth) -> -sum(smth)
+
+       or
+
+       prod(scalar * smth) -> scalar * prod(smth)
+       prod(-smth) -> -prod(smth)
    """
    # TODO: if the the thing inside the Sum is a division,
    # we should get at the numerator....
-    if isinstance(node.op, T.Sum):
-        thing_summed, = node.inputs
-        if thing_summed.owner and thing_summed.owner.op == T.mul:
-            terms = thing_summed.owner.inputs
+    if isinstance(node.op, T.Sum) or isinstance(node.op, T.elemwise.Prod):
+        node_inps, = node.inputs
+        if node_inps.owner and node_inps.owner.op == T.mul:
+            terms = node_inps.owner.inputs
            scalars = [t.dimshuffle() for t in terms if
                       numpy.all(t.type.broadcastable)]
            non_scalars = [t for t in terms if not numpy.all(t.broadcastable)]
@@ -3881,8 +3886,8 @@ def local_sum_mul_by_scalar(node):
                        return [T.mul(scalars[0], node.op(non_scalars[0]))]
                    else:
                        return [scalars[0]]
-        if thing_summed.owner and thing_summed.owner.op == T.neg:
-            return [T.neg(node.op(thing_summed.owner.inputs[0]))]
+        if isinstance(node.op, T.Sum) and node_inps.owner and node_inps.owner.op == T.neg:
+            return [T.neg(node.op(node_inps.owner.inputs[0]))]


 @register_specialize
@@ -3989,64 +3994,68 @@ def local_sum_div_dimshuffle(node):


 @register_canonicalize
-@gof.local_optimizer([T.Sum])
-def local_sum_all_to_none(node):
-    """Sum{0,1,...N} -> Sum{}"""
-    if isinstance(node.op, T.Sum):
+@gof.local_optimizer([T.Sum, T.elemwise.Prod])
+def local_sum_prod_all_to_none(node):
+    """Sum{0,1,...N} -> Sum{} or
+       Prod{0,1,...N} -> Prod{}
+    """
+    if isinstance(node.op, T.Sum) or isinstance(node.op, T.elemwise.Prod):
+        opt_type = T.Sum if isinstance(node.op, T.Sum) else T.elemwise.Prod
        # if all the axes are named, then use None as a shorthand
        # this permits more merging
        if node.op.axis is None:
            return
        if set(node.op.axis) == set(range(node.inputs[0].type.ndim)):
-            return [T.Sum(axis=None, dtype=node.op.dtype)(node.inputs[0])]
+            return [opt_type(axis=None, dtype=node.op.dtype)(node.inputs[0])]


 @register_canonicalize
-@gof.local_optimizer([T.Sum])
-def local_sum_sum(node):
+@gof.local_optimizer([T.Sum, T.elemwise.Prod])
+def local_op_of_op(node):
    """
-    Sum(Sum()) -> Sum
+    Prod(Prod()) -> single Prod()
+    or 
+    Sum(Sum()) -> single Sum()
    """
-    if isinstance(node.op, T.Sum):
-        summed, = node.inputs
+    if isinstance(node.op, T.elemwise.Prod) or isinstance(node.op, T.Sum):
+        opt_type = T.Sum if isinstance(node.op, T.Sum) else T.elemwise.Prod
+        node_inps, = node.inputs
        out_dtype = node.op.dtype
-        if len(summed.clients) == 1:
-            if (summed.owner and
-                    isinstance(summed.owner.op, T.Sum)):
-
-                if summed.owner.op.axis is None:
-                    # special case of local_cut_useless_reduce
-                    return [T.Sum(None, dtype=out_dtype)(
-                        summed.owner.inputs[0])]
-                if node.op.axis is None:
-                    # we're summing up everything anyway so lets
-                    # do it all at once
-                    return [T.Sum(None, dtype=out_dtype)(
-                        summed.owner.inputs[0])]
-
-                newaxis = list(tuple(summed.owner.op.axis))
-                # figure out which dimensions of the original input
-                # are preserved
+        # We manipulate the graph so this is done to make sure the opt
+        # doesn't affect other computations.
+        if len(node_inps.clients) == 1:
+            if (node_inps.owner and (isinstance(node_inps.owner.op, T.elemwise.Prod)
+                    or isinstance(node_inps.owner.op, T.elemwise.Sum))): 
+
+                # check to see either the inner or outer prod is doing a 
+                # product over all axis, in which case we can remove it
+                if node_inps.owner.op.axis is None or node.op.axis is None:
+                    return [opt_type(None, dtype=out_dtype)(
+                        node_inps.owner.inputs[0])] 
+
+                # figure out which axes were in the original sum
+                newaxis = list(tuple(node_inps.owner.op.axis))
                for i in node.op.axis:
                    new_i = i
-                    for ii in summed.owner.op.axis:
+                    for ii in node_inps.owner.op.axis:
                        if new_i >= ii:
                            new_i += 1
                    assert new_i not in newaxis
                    newaxis.append(new_i)

-                assert len(newaxis) == len(list(summed.owner.op.axis) +
+                assert len(newaxis) == len(list(node_inps.owner.op.axis) +
                                           list(node.op.axis))

+ 
                # The old bugged logic. We keep it there to generate a warning
                # when we generated bad code.
-                alldims = range(summed.owner.inputs[0].type.ndim)
+                alldims = range(node_inps.owner.inputs[0].type.ndim)
                alldims = [d for i, d in enumerate(alldims) if i
-                           in summed.owner.op.axis]
+                           in node_inps.owner.op.axis]
                alldims = [d for i, d in enumerate(alldims)
                           if i in node.op.axis]
                newaxis_old = [i for i in
-                               xrange(summed.owner.inputs[0].type.ndim)
+                               xrange(node_inps.owner.inputs[0].type.ndim)
                               if i not in alldims]

                if (theano.config.warn.sum_sum_bug and
@@ -4065,8 +4074,9 @@ def local_sum_sum(node):
                            "been fixed) set the theano flag "
                            "`warn.sum_sum_bug` to False.")

-                combined_sum = T.Sum(newaxis, dtype=out_dtype)
-                return [combined_sum(summed.owner.inputs[0])]
+                combined = opt_type(newaxis, dtype=out_dtype)
+                return [combined(node_inps.owner.inputs[0])]
+

 ALL_REDUCE = [T.elemwise.CAReduce, T.elemwise.All, T.elemwise.Any,
              T.elemwise.Sum, T.elemwise.Prod,
@@ -4208,21 +4218,29 @@ def local_reduce_broadcastable(node):


 @register_specialize
-@gof.local_optimizer([T.Sum])
-def local_sum_alloc(node):
-    """ sum(alloc(constant,shapes...)) => constant*prod(shapes)"""
-    if isinstance(node.op, T.Sum):
-        summed, = node.inputs
-        if summed.owner and isinstance(summed.owner.op, T.Alloc):
-            input = summed.owner.inputs[0]
-            shapes = summed.owner.inputs[1:]
+@gof.local_optimizer([T.Sum, T.elemwise.Prod])
+def local_opt_alloc(node):
+    """ sum(alloc(constant,shapes...)) => constant*prod(shapes)
+        or 
+        prod(alloc(constant,shapes...)) => constant**prod(shapes)
+    """
+    if isinstance(node.op, T.Sum) or isinstance(node.op, T.elemwise.Prod):
+        node_inps, = node.inputs
+        if node_inps.owner and isinstance(node_inps.owner.op, T.Alloc):
+            input = node_inps.owner.inputs[0]
+            shapes = node_inps.owner.inputs[1:]
            if (node.op.axis is None or
                node.op.axis == tuple(range(input.ndim))):
                try:
                    val = get_scalar_constant_value(input)
                    assert val.size == 1
-                    val = val.reshape(1)[0] * T.mul(*shapes)
+                    # check which type of op
+                    if isinstance(node.op, T.Sum):
+                        val = val.reshape(1)[0] * T.mul(*shapes)
+                    else:
+                        val = val.reshape(1)[0] ** T.mul(*shapes)
                    return [T.cast(val, dtype=node.outputs[0].dtype)]
+
                except NotScalarConstantError:
                    pass
            else:
@@ -4233,7 +4251,10 @@ def local_sum_alloc(node):
                    to_prod = [shapes[i] for i in xrange(len(shapes))
                               if i in node.op.axis]
                    if to_prod:
-                        val *= T.mul(*to_prod)
+                        if isinstance(node.op, T.Sum):
+                            val *= T.mul(*to_prod)
+                        else:
+                            val = val ** T.mul(*to_prod)
                    return [T.alloc(T.cast(val, dtype=node.outputs[0].dtype),
                                    *[shapes[i] for i in xrange(len(shapes))
                                      if i not in node.op.axis])]

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -4459,21 +4459,33 @@ class test_local_remove_switch_const_cond(unittest.TestCase):
        assert numpy.all(f(vx, vy) == vy)


-class T_local_sum(unittest.TestCase):
+class T_local_sum_prod(unittest.TestCase):
+    """
+    Test sum/prod opts in opt.py 
+    """
    def setUp(self):
        self.mode = theano.compile.get_default_mode().including('canonicalize',
                                                                'specialize')

-    def test_local_sum_all_to_none(self):
+    def test_local_sum_prod_all_to_none(self):
        a = T.tensor3()
        input = numpy.arange(3 * 4 * 5, dtype=config.floatX).reshape(3, 4, 5)
+        # test sum
        f = theano.function([a], a.sum(), mode=self.mode)
        assert len(f.maker.fgraph.apply_nodes) == 1
        assert numpy.allclose(f(input), input.sum())
-
+        # test prod
+        f = theano.function([a], a.prod(), mode=self.mode)
+        assert len(f.maker.fgraph.apply_nodes) == 1
+        assert numpy.allclose(f(input), input.prod())
+        # test sum
        f = theano.function([a], a.sum([0, 1, 2]), mode=self.mode)
        assert len(f.maker.fgraph.apply_nodes) == 1
        assert numpy.allclose(f(input), input.sum())
+        # test prod
+        f = theano.function([a], a.prod([0, 1, 2]), mode=self.mode)
+        assert len(f.maker.fgraph.apply_nodes) == 1
+        assert numpy.allclose(f(input), input.prod())

        backup = config.warn.sum_sum_bug
        config.warn.sum_sum_bug = False
@@ -4484,7 +4496,7 @@ class T_local_sum(unittest.TestCase):
        finally:
            config.warn.sum_sum_bug = backup

-    def test_local_sum_sum(self):
+    def test_local_sum_sum_prod_prod(self):
        a = T.tensor3()
        input = numpy.arange(3 * 4 * 5, dtype=config.floatX).reshape(3, 4, 5)
        dims = [(0, 0), (1, 0), (2, 0), (0, 1), (1, 1), (2, 1),
@@ -4494,6 +4506,17 @@ class T_local_sum(unittest.TestCase):
        backup = config.warn.sum_sum_bug
        config.warn.sum_sum_bug = False

+        def my_prod(data, d, dd):
+            # This prod when d or dd is a tuple of 2 dimensions.
+            if not isinstance(d, tuple) and not isinstance(dd, tuple):
+                return data.prod(d).prod(dd)
+            if isinstance(d, tuple):
+                d = sorted(d)
+                return data.prod(d[1]).prod(d[0]).prod(dd)
+            else:
+                dd = sorted(dd)
+                return data.prod(d).prod(dd[1]).prod(dd[0])
+
        def my_sum(data, d, dd):
            # This sum when d or dd is a tuple of 2 dimensions.
            if not isinstance(d, tuple) and not isinstance(dd, tuple):
@@ -4526,7 +4549,27 @@ class T_local_sum(unittest.TestCase):
        finally:
            config.warn.sum_sum_bug = backup

-    def test_local_sum_alloc(self):
+        # test prod
+        for d, dd in dims:
+            expected = my_prod(input, d, dd)
+            f = theano.function([a], a.prod(d).prod(dd), mode=self.mode)
+            assert numpy.allclose(f(input), expected)
+            assert len(f.maker.fgraph.apply_nodes) == 1
+        for d, dd in dims[:6]:
+            f = theano.function([a], a.prod(d).prod(dd).
+                                prod(0), mode=self.mode)
+            assert numpy.allclose(f(input), input.prod(d).prod(dd).prod(0))
+            assert len(f.maker.fgraph.apply_nodes) == 1
+        for d in [0, 1, 2]:
+            f = theano.function([a], a.prod(d).prod(None), mode=self.mode)
+            assert numpy.allclose(f(input), input.prod(d).prod())
+            assert len(f.maker.fgraph.apply_nodes) == 1
+        f = theano.function([a], a.prod(None).prod(), mode=self.mode)
+        assert numpy.allclose(f(input), input.prod())
+        assert len(f.maker.fgraph.apply_nodes) == 1
+
+
+    def test_local_sum_prod_alloc(self):
        a = T.dtensor3()
        input = numpy.asarray(numpy.arange(2 * 3 * 4).reshape(2, 3, 4),
                              dtype='float64')
@@ -4535,6 +4578,7 @@ class T_local_sum(unittest.TestCase):
        for t_like, n_like, nb_nodes in [(tensor.zeros_like, numpy.zeros_like, (1, 3, 3, 2)),
                                       (tensor.ones_like, numpy.ones_like, (5, 5, 5, 6))]:

+            # test sum
            f = theano.function([a], t_like(a).sum(None), mode=mode)
            assert numpy.allclose(f(input), n_like(input).sum())
            assert len(f.maker.fgraph.apply_nodes) == nb_nodes[0]
@@ -4558,6 +4602,30 @@ class T_local_sum(unittest.TestCase):
                assert topo[-1].op == T.alloc
                assert not any([isinstance(node.op, T.Sum) for node in topo])

+            # test prod
+            f = theano.function([a], t_like(a).prod(None), mode=mode)
+            assert numpy.allclose(f(input), n_like(input).prod())
+            #assert len(f.maker.fgraph.apply_nodes) == nb_nodes[0]
+
+            f = theano.function([a], t_like(a).prod([0, 1, 2]), mode=mode)
+            assert numpy.allclose(f(input), n_like(input).prod())
+            #assert len(f.maker.fgraph.apply_nodes) == nb_nodes[0]
+
+            for d in range(3):
+                f = theano.function([a], t_like(a).prod(d), mode=mode)
+                assert numpy.allclose(f(input), n_like(input).prod(d))
+                #assert len(f.maker.fgraph.apply_nodes) == nb_nodes[1]
+                topo = f.maker.fgraph.toposort()
+                assert topo[-1].op == T.alloc
+                assert not any([isinstance(node.op, T.elemwise.Prod) for node in topo])
+            for i in range(3):
+                f = theano.function([a], t_like(a).prod(i), mode=mode)
+                assert numpy.allclose(f(input), n_like(input).prod(i))
+                #assert len(f.maker.fgraph.apply_nodes) == nb_nodes[2]
+                topo = f.maker.fgraph.toposort()
+                assert topo[-1].op == T.alloc
+                assert not any([isinstance(node.op, T.elemwise.Prod) for node in topo])
+
            backup = config.warn.sum_sum_bug
            config.warn.sum_sum_bug = False
            try: