Merge pull request #1655 from nouiz/local_subtensor_of_alloc

Local subtensor of alloc

Merge pull request #1655 from nouiz/local_subtensor_of_alloc
aeeec0bf · Pascal Lamblin · 2b806f0b · 6cadcb12 · aeeec0bf · aeeec0bf
--- a/theano/printing.py
+++ b/theano/printing.py
@@ -94,6 +94,8 @@ def debugprint(obj, depth=-1, print_type=False,
    elif isinstance(obj, gof.FunctionGraph):
        results_to_print.extend(obj.outputs)
        order = obj.toposort()
+    elif isinstance(obj, (int, long, float, numpy.ndarray)):
+        print obj
    else:
        raise TypeError("debugprint cannot print an object of this type", obj)
    for r in results_to_print:

--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
 import os
 import shutil
+import sys
 from tempfile import mkdtemp
 import time
 import unittest
@@ -1585,17 +1586,19 @@ class T_Scan(unittest.TestCase):
            vparams = [v_u1, v_u2, v_x0, v_y0, vW_in1]
            params = [u1, u2, x0, y0, W_in1]
            gparams = theano.tensor.grad(cost, params)
-            grad_fn = theano.function([u1, u2, x0, y0, W_in1],
-                                      gparams,
+            print >> sys.stderr, "."
+            cost_fn = theano.function([u1, u2, x0, y0, W_in1],
+                                      cost,
                                      updates=updates,
                                      no_default_updates=True,
                                      allow_input_downcast=True)
-
-            cost_fn = theano.function([u1, u2, x0, y0, W_in1],
-                                      cost,
+            print >> sys.stderr, "."
+            grad_fn = theano.function([u1, u2, x0, y0, W_in1],
+                                      gparams,
                                      updates=updates,
                                      no_default_updates=True,
                                      allow_input_downcast=True)
+            print >> sys.stderr, "."
        finally:
            theano.config.compute_test_value = old1
            theano.config.compute_test_value_opt = old2
@@ -3688,7 +3691,9 @@ class T_Scan(unittest.TestCase):

        cost = result_outer[-1]
        H = theano.gradient.hessian(cost, W_flat)
+        print >> sys.stderr, "."
        f = theano.function([W_flat], H)
+        print >> sys.stderr, "."
        f(numpy.ones((8,), dtype='float32'))



--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -559,25 +559,32 @@ def get_scalar_constant_value(v):
                                   compile.ops.OutputGuard,
                                   compile.DeepCopyOp)):
            return get_scalar_constant_value(v.owner.inputs[0])
-        if isinstance(v.owner.op, Elemwise) and \
-                isinstance(v.owner.op.scalar_op, scal.Second):
-            shape, val = v.owner.inputs
-            return get_scalar_constant_value(val)
-        if isinstance(v.owner.op, scal.Second):
-            x, y = v.owner.inputs
-            return get_scalar_constant_value(y)
        if (isinstance(v.owner.op, theano.compile.ops.Shape_i) and
            isinstance(v.owner.inputs[0], Constant)):
            return v.owner.inputs[0].data.shape[v.owner.op.i]
        # Don't act as the constant_folding optimization here as this
        # fct is used too early in the optimization phase.  This would
-        # mess with the stabilization optimization.
-        if (isinstance(v.owner.op, Elemwise) and isinstance(
-            v.owner.op.scalar_op, scal.Cast)) or \
-            isinstance(v.owner.op, scal.Cast):
-            const = get_scalar_constant_value(v.owner.inputs[0])
+        # mess with the stabilization optimization and be too slow.
+        # We put all the scalar Ops used by get_canonical_form_slice()
+        # to allow it to determine the broadcast pattern correctly.
+        if ((isinstance(v.owner.op, Elemwise) and
+             isinstance(v.owner.op.scalar_op, scal.Second)) or
+            isinstance(v.owner.op, scal.Second)):
+            # We don't need both input to be constant for second
+            shape, val = v.owner.inputs
+            return get_scalar_constant_value(val)
+        elemwises = (scal.Cast, scal.Switch,
+                     scal.NEQ, scal.EQ,
+                     scal.LT, scal.GT, scal.LE, scal.GE,
+                     scal.Sub, scal.Add, scal.Mod, scal.Mul,
+                     scal.IntDiv, scal.TrueDiv)
+        if (isinstance(v.owner.op, Elemwise) and
+            len(v.owner.outputs) == 1 and
+            (isinstance(v.owner.op.scalar_op, elemwises) or
+            isinstance(v.owner.op, elemwises))):
+            const = [get_scalar_constant_value(i) for i in v.owner.inputs]
            ret = [[None]]
-            v.owner.op.perform(v.owner, [const], ret)
+            v.owner.op.perform(v.owner, const, ret)
            return ret[0][0]
        if isinstance(v.owner.op, theano.tensor.subtensor.Subtensor) and v.ndim == 0:
            # This condition depends on Subtensor always embedding constant
@@ -655,13 +662,13 @@ def get_scalar_constant_value(v):
                assert ndim == len(gp_broadcastable)

                if not (idx < len(gp_broadcastable)):
-                    msg = "get_scalar_constant_value detected " + \
-                            "deterministic IndexError: x.shape[%d] " + \
-                            "when x.ndim=%d." % (ndim, idx)
+                    msg = ("get_scalar_constant_value detected " +
+                           "deterministic IndexError: x.shape[%d] " +
+                           "when x.ndim=%d.") % (ndim, idx)
                    if config.exception_verbosity == 'high':
-                        msg += 'x=%s' % min_informative_str(x)
+                        msg += 'x=%s' % min_informative_str(v)
                    else:
-                        msg += 'x=%s' % str(x)
+                        msg += 'x=%s' % str(v)
                    raise ValueError(msg)

                if gp_broadcastable[idx]:

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -2002,7 +2002,14 @@ def local_subtensor_of_alloc(node):
            # That dimension is removed.
            pass
        else:
-            nw_dims += [T.ceil_intdiv((csl.stop - csl.start), csl.step)]
+            nw_dim = csl.stop - csl.start
+
+            if csl.step != 1:
+                # Do not add the ceil_intdiv() graphs in the graphs
+                # when this is not needed as it prevent detecting the
+                # correct broadcast pattern.
+                nw_dim = T.ceil_intdiv(nw_dim, csl.step)
+            nw_dims += [nw_dim]

    nw_val = val[tuple(val_slices)]
    nw_dims += dims[len(slices):]
@@ -2011,7 +2018,15 @@ def local_subtensor_of_alloc(node):
    rval = T.alloc(nw_val, *nw_dims)
    if type(rval) not in (list, tuple):
        rval = [rval]
-
+    if rval[0].type != node.outputs[0].type:
+        #It happen that the make_node() isn't able to infer that some
+        #dimensions are broadcastable, but that now we can infer
+        #that. So we need to remove that information here.
+        rval[0] = theano.tensor.unbroadcast(
+            rval[0],
+            *[i for i, (b1, b2) in enumerate(zip(rval[0].broadcastable,
+                                                node.outputs[0].broadcastable))
+             if b1 and not b2])
    return rval



--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
@@ -15,7 +15,7 @@ from theano.gof import Apply, Constant, hashtype, Op, Type, MethodNotDefined
 from theano.gof.python25 import maxsize
 from theano.printing import pprint
 from theano import scalar as scal
-from theano.tensor.basic import (addbroadcast, clip,
+from theano.tensor.basic import (addbroadcast, clip, get_scalar_constant_value,
                                 ARange, TensorType)
 from theano.tensor.elemwise import DimShuffle
 from theano.tensor.type_other import NoneConst, SliceType, make_slice
@@ -86,7 +86,7 @@ def get_canonical_form_slice(theslice, length):

        def analyze(x):
            try:
-                x_constant = theano.tensor.get_scalar_constant_value(x)
+                x_constant = get_scalar_constant_value(x)
                is_constant = True
            except theano.tensor.NotScalarConstantError:
                x_constant = theano.tensor.extract_constant(x)
@@ -100,6 +100,7 @@ def get_canonical_form_slice(theslice, length):

        if step is None:
            step = 1
+            is_step_constant = True

        # First handle the easier and common case where `step` is 1 and
        # either `start` or `stop` is a range boundary. More specializations
@@ -390,12 +391,6 @@ class Subtensor(Op):
            exception.subtensor_invalid = True
            raise exception

-        # infer the broadcasting pattern
-        padded = (idx_list
-                + [slice(None, None, None)] * (x.type.ndim - len(idx_list)))
-        broadcastable = [bc for p, bc in izip(padded, x.type.broadcastable)
-                if isinstance(p, slice)]
-
        input_types = Subtensor.collapse(idx_list,
                lambda entry: isinstance(entry, gof.Type))
        if len(inputs) != len(input_types):
@@ -408,6 +403,34 @@ class Subtensor(Op):
                    "Wrong type for Subtensor template. Expected %s, got %s."
                    % (input.type, expected_type))

+        # infer the broadcasting pattern
+        padded = (idx_list
+                + [slice(None, None, None)] * (x.type.ndim - len(idx_list)))
+        broadcastable = []
+        for i, (p, bc) in enumerate(izip(padded, x.type.broadcastable)):
+            if isinstance(p, slice):
+                if bc and p.start in [None, 0]:
+                    # No need to check step when there is only
+                    # one element.
+                    # We could call get_canonical_form_slice() to
+                    # catch more broadcast case. I let this to
+                    # later.
+                    if p.stop is None:
+                        broadcastable.append(bc)
+                        continue
+                    try:
+                        if p.start is None:
+                            start = 0
+                        else:
+                            start = get_scalar_constant_value(p.start)
+                        stop = get_scalar_constant_value(p.stop)
+                        if stop > start:
+                            broadcastable.append(True)
+                            continue
+                    except theano.tensor.NotScalarConstantError:
+                        pass
+                broadcastable.append(False)
+
        return gof.Apply(self,
                         (x, ) + inputs,
                         [theano.tensor.tensor(dtype=x.type.dtype,
@@ -1824,6 +1847,7 @@ class AdvancedSubtensor(Op):
        return [advanced_inc_subtensor(theano.tensor.zeros_like(x), gz,
                                       *rest)] + \
            [DisconnectedType()()] * len(rest)
+advanced_subtensor = AdvancedSubtensor()


 class AdvancedIncSubtensor(Op):

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -5955,6 +5955,28 @@ class T_get_scalar_constant_value(unittest.TestCase):
        s = opt.Shape_i(1)(c)
        assert get_scalar_constant_value(s) == 4

+    def test_elemwise(self):
+        # We test only for a few elemwise, the list of all supported
+        # elemwise are in the fct.
+        c = theano.tensor.constant(numpy.random.rand())
+        s = c + 1
+        assert get_scalar_constant_value(s) == c.data + 1
+        s = c - 1
+        assert get_scalar_constant_value(s) == c.data - 1
+        s = c * 1.2
+        assert get_scalar_constant_value(s) == c.data * 1.2
+        s = c < 0.5
+        assert get_scalar_constant_value(s) == int(c.data < 0.5)
+        s = tensor.second(c, .4)
+        assert get_scalar_constant_value(s) == .4
+
+    def test_second(self):
+        #Second should apply when the value is constant but not the shape
+        c = theano.tensor.constant(numpy.random.rand())
+        shp = theano.tensor.vector()
+        s = theano.tensor.second(shp, c)
+        assert get_scalar_constant_value(s) == c.data
+

 class T_as_tensor_variable(unittest.TestCase):
    """

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -2366,11 +2366,13 @@ class Test_alloc_zero(unittest.TestCase):


 def test_local_subtensor_of_alloc():
-    x = tensor.matrix('x')

    # DebugMode should detect if something goes wrong.
    # test shape combination of odd and event shape.
-    for shape in [(3, 5), (4, 6), (3, 8), (4, 7)]:
+    for shape in [(3, 5), (4, 6), (3, 8), (4, 7),
+                  (1, 5), (5, 1)]:
+        x = tensor.tensor(dtype=theano.config.floatX,
+                          broadcastable=(shape[0] == 1, shape[1] == 1))

        xval = numpy.zeros(shape, dtype=config.floatX)
        yval = numpy.arange(shape[1], dtype=config.floatX)
@@ -2387,21 +2389,29 @@ def test_local_subtensor_of_alloc():
            # Only one column
            z_vec = yx[:, 3]
            assert z_vec.ndim == 1
-
-            for slices in [
-                # results are vector
-                (slice(None), 3),
-                (2, slice(None)),
-                # results are matrix
+            # results are vector
+            slicess = []
+            if shape[0] != 1:
+                slicess.append((2, slice(None)))
+            if shape[1] != 1:
+                slicess.append((slice(None), 3))
+
+            # results are matrix
+            slicess += [
                (slice(None), slice(3, None)),
                (slice(3, None), ),
                (slice(3, None), slice(3, None)),
                (slice(1, 3), slice(None, -1)),
                (slice(None, None, 2)),
                (slice(1, None, 2)),
-                ]:
+                ]
+            for slices in slicess:
                z = yx.__getitem__(slices)
                f = theano.function([x], z)
+                if theano.config.mode != 'FAST_COMPILE':
+                    # Subtensor can be in the input of Alloc
+                    assert not isinstance(f.maker.fgraph.toposort()[-1].op,
+                                          Subtensor)
                val = f(xval)
                assert xval.__getitem__(slices).shape == val.shape


--- a/theano/tensor/var.py
+++ b/theano/tensor/var.py
@@ -378,7 +378,7 @@ class _tensor_py_operators:
                        theano.tensor.sharedvar.TensorSharedVariable))):
                return self.take(arg, axis)
            else:
-                return theano.tensor.subtensor.AdvancedSubtensor()(self, *args)
+                return theano.tensor.subtensor.advanced_subtensor(self, *args)
        else:
            if numpy.newaxis in args:
                # None (aka np.newaxis) in numpy indexing means to add a