Continued work on adding stack traces to optimizations #3018

8571cb47 · Iulian Vlad Serban · 2ebc24ff · 8571cb47 · 8571cb47 · 8571cb47
--- a/theano/compile/mode.py
+++ b/theano/compile/mode.py
@@ -199,7 +199,7 @@ optdb.register('merge1', gof.MergeOptimizer(),
 # rearranges elemwise expressions
 optdb.register('canonicalize', gof.EquilibriumDB(ignore_newtrees=False),
-               1, 'fast_run', 'fast_compile')
+               1, 'fast_run', 'fast_compile', 'canonicalize_db')
 # Register in the canonizer Equilibrium as a clean up opt the merge opt.
 # Without this, as the equilibrium have ignore_newtrees=False, we
 # won't merge all nodes if it is set as a global optimizer with

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -1841,7 +1841,7 @@ def local_subtensor_make_vector(node):
    elif isinstance(idx, Variable):
        if idx.ndim == 0:
            # if it is a constant we can do something with it
-            try:
+            try:               
                v = get_scalar_constant_value(idx)
                if isinstance(v, numpy.integer):
                    # Python 2.4 wants to index only with Python integers
@@ -1851,6 +1851,8 @@ def local_subtensor_make_vector(node):
                    ret = [x.owner.inputs[v]]
                except IndexError:
                    raise NotScalarConstantError("Bad user graph!")
+                # Copy over stack trace from previous output to new output
                return ret
            except NotScalarConstantError:
                pass
@@ -2960,8 +2962,20 @@ def local_subtensor_of_dot(node):
    a_sub = a.__getitem__(tuple(a_indices))
    b_sub = b.__getitem__(tuple(b_indices)) if b_indices else b
-    return [T.dot(a_sub, b_sub)]
+    # Copy over previous output stacktrace to a_sub and b_sub,
+    # because an error in the subtensor operation (e.g. an index error) 
+    # on either a or b must correspond to an error in the 
+    # subtensor operation on their dot product.
+    copy_stack_trace(node.outputs[0], [a_sub, b_sub])
+    # Copy over previous output stacktrace and previous dot product stacktrace,
+    # because an error here may correspond to an either in either the original
+    # dot product, or in the dot product after the subtensor operation.
+    r = T.dot(a_sub, b_sub)
+    copy_stack_trace([node.outputs[0], node.inputs[0]], r)
+    return [r]
 @register_canonicalize
@@ -3016,6 +3030,11 @@ def local_IncSubtensor_serialize(node):
            new_inputs = ([i for i in node.inputs if not movable(i)] +
                          [mi.owner.inputs[0] for mi in movable_inputs])
            new_add = T.add(*new_inputs)
+            # Copy over stacktrace from original output, as an error 
+            # (e.g. an index error) in this add operation should 
+            # correspond to an error in the original add operation.
+            copy_stack_trace(node.outputs[0], new_add)
            # stack up the new incsubtensors
            tip = new_add
@@ -3023,6 +3042,11 @@ def local_IncSubtensor_serialize(node):
                assert tip.type == o_type
                assert tip.type == mi.owner.inputs[0].type
                tip = mi.owner.op(tip, *mi.owner.inputs[1:])
+                # Copy over stacktrace from outputs of the original 
+                # "movable" operation to the new operation. 
+                # Julian: Do we want to also include the stacktace of the output (node.outputs[0])?
+                copy_stack_trace(mi.owner.outputs, tip)
            return [tip]
        # print incsub_inputs, [id(i.owner.inputs[0]) for i in incsub_inputs]
@@ -3052,6 +3076,11 @@ def local_inplace_setsubtensor(node):
            set_instead_of_inc=node.op.set_instead_of_inc,
            destroyhandler_tolerate_aliased=dta)
        new_node = new_op(*node.inputs)
+        # Copy stacktrace from original outputs to new outputs.
+        # This should be sensible, because the new operation is the 
+        # same as the old one, but now with different attributes?
+        # Julian: Pascal, is this correct?
+        copy_stack_trace(node.outputs, new_node)
        return [new_node]
    return False
 compile.optdb.register('local_inplace_setsubtensor',
@@ -3070,6 +3099,12 @@ def local_inplace_incsubtensor1(node):
    if isinstance(node.op, AdvancedIncSubtensor1) and not node.op.inplace:
        new_op = node.op.clone_inplace()
        new_node = new_op(*node.inputs)
+        # Copy stacktrace from original outputs to new outputs.
+        # This should be sensible, because the new operation is the 
+        # same as the old one, but now with different attributes?
+        # Julian: same as above, is this correct?
+        copy_stack_trace(node.outputs, new_node)
        return [new_node]
    return False
 compile.optdb.register('local_inplace_incsubtensor1',
@@ -3104,6 +3139,8 @@ def local_incsubtensor_of_zeros(node):
            pass
        if replace:
+            # No need to copy over the stacktrace,
+            # because x should already have a stacktrace
            return [x]
        else:
            return False
@@ -3138,6 +3175,9 @@ def local_setsubtensor_of_constants(node):
        if (replace_x is not None and
                replace_y is not None and
                replace_x == replace_y):
+            # No need to copy over the stacktrace,
+            # because x should already have a stacktrace
            return [x]
        else:
            return False
@@ -3184,7 +3224,13 @@ def local_adv_sub1_adv_inc_sub1(node):
        return [y]
    # It is possible that y is upcast or downcast to x.dtype.
    # In all case, as we set or add with 0, we can just cast y.
-    return [T.cast(y, node.outputs[0].dtype)]
+    r = T.cast(y, node.outputs[0].dtype)
+    # Copy over stacktrace from before casting, since
+    # we don't expect problems in the casting operation,
+    # and any problems in the indexing would have been spotted above.
+    copy_stack_trace(y, r)
+    return [r]
 @register_specialize
@@ -3287,7 +3333,14 @@ def local_useless_inc_subtensor_alloc(node):
                msg = '`x[i]` and `y` do not have the same shape.'
                z = Assert(msg)(z, *cond)
-            return [node.op(x, z, *i)]
+            r = node.op(x, z, *i)
+            # Copy over stacktrace from previous output, since
+            # we don't expect problems when removing the intermediate 
+            # alloc operation and so we still want to point at the line
+            # of the inc_subtensor operation.
+            copy_stack_trace(node.outputs, r)
+            return [r]
 ####################
@@ -3306,6 +3359,8 @@ def local_useless_rebroadcast(node):
        x = node.inputs[0]
        if numpy.all(x.broadcastable == node.outputs[0].broadcastable):
            # No broadcastable flag was modified
+            # No need to copy over stack trace, 
+            # because x should already have a stack trace.
            return [x]
        else:
            # Keep the flags that modify something
@@ -3317,7 +3372,10 @@ def local_useless_rebroadcast(node):
                # All flags are useful
                return
            else:
-                return [T.Rebroadcast(*list(new_axis.items()))(x)]
+                r = T.Rebroadcast(*list(new_axis.items()))(x)
+                # Copy over stacktrace from previous output
+                copy_stack_trace(node.outputs, r)
+                return [r]
 @register_canonicalize

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -1622,6 +1622,11 @@ def test_local_useless_slice():
    subtens = apply_node.op
    assert not any(isinstance(idx, slice) for idx in subtens.idx_list), "Slice should be gone"
+    # Now test that the stack trace is copied over properly,
+    # before before and after optimization.
+    assert hasattr(f_unopt.outputs[0].variable.tag, 'trace')
+    assert hasattr(f_opt.outputs[0].variable.tag, 'trace')
    # test a 4d tensor
    z = tensor.tensor4('z')
    o2 = z[1, :, :, 1]
@@ -1638,6 +1643,10 @@ def test_local_useless_slice():
    subtens = apply_node.op
    assert not any(isinstance(idx, slice) for idx in subtens.idx_list)
+    # Finally, test that the stack trace is copied over properly,
+    # before before and after optimization.
+    assert hasattr(f_opt_check.outputs[0].variable.tag, 'trace')
+    assert hasattr(f_opt_check_apply.outputs[0].variable.tag, 'trace')
 def test_local_useless_inc_subtensor():
    x = tensor.matrix('x')
@@ -1851,17 +1860,23 @@ class test_local_subtensor_make_vector(unittest.TestCase):
    def test_stacktrace(self):
        x, y, z = tensor.lscalars('xyz')
        v = make_vector(x, y, z)
-        #mode = theano.compile.mode.get_default_mode().including("local_subtensor_make_vector")
+        # Compile function using only the 'local_subtensor_make_vector' optimization,
+        # which requires us to add the 'canonicalize' phase.
+        mode = theano.compile.mode.Mode(optimizer=None).including('canonicalize_db').including("local_subtensor_make_vector")
+        f = function([x, y, z], v[0], mode=mode)
+        # Check stacktrace was copied over correctly after opt was applied
+        self.assertTrue(hasattr(f.outputs[0].variable.tag, 'trace'))
+        #import ipdb; ipdb.set_trace()
+        # Compile function using all optimizations in fast_compile mode, 
+        # including the 'local_subtensor_make_vector' optimization
        mode = theano.compile.mode.get_mode('FAST_COMPILE').including("local_subtensor_make_vector")
        f = function([x, y, z], v[0], mode=mode)
-        # TODO Pascal is there some way I can disable ALL optimizations except the 'local_subtensor_make_vector' opt?
-        # Right now there is some other optimization removing the stack trace
-        print ('Before optimization')
-        print (v[0].tag)
-        print ('After optimization')
-        print (f.outputs[0].tag)
        # Check stacktrace was copied over correctly after opt was applied
-        #self.assertTrue(hasattr(f.outputs[0].tag, 'trace'))
+        self.assertTrue(hasattr(f.outputs[0].variable.tag, 'trace'))
 class test_local_subtensor_lift(unittest.TestCase):
    def _verify_stack_trace(self, f):
@@ -2661,6 +2676,32 @@ class test_local_adv_sub1_adv_inc_sub1(unittest.TestCase):
            self.assertRaises((AssertionError, ValueError),
                              f, dx, dy, [1])
+    def test_stacktrace(self):
+        x = tensor.matrix("x")
+        y = tensor.matrix("y")
+        idx = tensor.ivector()
+        dx = numpy.random.rand(4, 5).astype(config.floatX)
+        dy = numpy.random.rand(2, 5).astype(config.floatX)
+        didx = numpy.asarray([1, 3], "int32")
+        # set_subtensor
+        inc = tensor.set_subtensor(x[idx], y)
+        o = inc[idx]
+        # Compile function using only the 'local_subtensor_make_vector' optimization,
+        # which requires us to add the 'canonicalize' phase.
+        mode = theano.compile.mode.Mode(optimizer=None).including('canonicalize').including("local_adv_sub1_adv_inc_sub1")
+        f = theano.function([x, y, idx], o, self.mode)
+        # Check stacktrace was copied over correctly after opt was applied
+        self.assertTrue(hasattr(f.outputs[0].variable.tag, 'trace'))
+        # Compile function using all optimizations in fast_compile mode, 
+        # including the 'local_subtensor_make_vector' optimization
+        mode = theano.compile.mode.get_mode('FAST_COMPILE').including("local_adv_sub1_adv_inc_sub1")
+        f = theano.function([x, y, idx], o, self.mode)
+        # Check stacktrace was copied over correctly after opt was applied
+        self.assertTrue(hasattr(f.outputs[0].variable.tag, 'trace'))
 class Test_alloc_zero(unittest.TestCase):
    def setUp(self):
@@ -2861,7 +2902,11 @@ def test_local_IncSubtensor_serialize():
                                    tensor.AdvancedIncSubtensor1))
                        for inp in a.inputs])
+    # Now test that the stack trace is copied over properly,
+    # if we return the gradients. We need to use same mode as before.
+    f = theano.function([i, j, t], dW, mode=mode)
+    assert hasattr(f.outputs[0].variable.tag, 'trace')
 def test_local_set_to_inc_subtensor():
    v = theano.tensor.fmatrix()
    s = v[[2, 1]]
@@ -2890,7 +2935,12 @@ def test_local_set_to_inc_subtensor():
    utt.assert_allclose(r1, r2)
+    # Finally, test that the stack trace is copied over properly,
+    # before before and after optimization.
+    assert hasattr(f1.outputs[0].variable.tag, 'trace')
+    assert hasattr(f2.outputs[0].variable.tag, 'trace')
 def test_local_subtensor_of_dot():
    m1 = theano.tensor.matrix()
    m2 = theano.tensor.matrix()
@@ -2922,10 +2972,16 @@ def test_local_subtensor_of_dot():
    f = theano.function([m1, m2, idx], theano.dot(m1, m2)[idx, 1:4, :, idx:], mode=mode)
    assert test_equality(f(d1, d2, 1), numpy.dot(d1, d2)[1, 1:4, :, 1:])
+    # if we return the gradients. We need to use same mode as before.
+    assert hasattr(f.outputs[0].variable.tag, 'trace')
    f = theano.function([m1, m2, idx], theano.dot(m1, m2)[1:4, :, idx:, idx], mode=mode)
    assert test_equality(f(d1, d2, 1), numpy.dot(d1, d2)[1:4, :, 1:, 1])
+    # Now test that the stack trace is copied over properly,
+    # if we return the gradients. We need to use same mode as before.
+    assert hasattr(f.outputs[0].variable.tag, 'trace')
 class Test_local_elemwise_alloc(unittest.TestCase):
    dtype = config.floatX
@@ -3428,6 +3484,11 @@ class Test_local_useless_elemwise_comparison(unittest.TestCase):
 class Test_local_useless_alloc(unittest.TestCase):
+    def _verify_stack_trace(self, f):
+        for output in f.outputs:
+            # Check stacktrace was copied over correctly after opt was applied
+            self.assertTrue(hasattr(output.variable.tag, 'trace'))
    def setUp(self):
        self.rng = numpy.random.RandomState(utt.fetch_seed())
@@ -3448,6 +3509,8 @@ class Test_local_useless_alloc(unittest.TestCase):
        if isinstance(mode_opt, compile.DebugMode):
            self.assertRaises(ValueError, f)
+        self._verify_stack_trace(f)
    def test1(self):
        # Test that alloc never gets instantiated during optimization
        mode = mode_opt.excluding('local_useless_alloc')
@@ -3461,6 +3524,8 @@ class Test_local_useless_alloc(unittest.TestCase):
        op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()]
        assert tensor.Alloc not in op_classes
+        self._verify_stack_trace(f)
    def test2(self):
        # Test that alloc never gets instantiated during optimization
        mode = mode_opt.excluding('local_useless_alloc')
@@ -3479,10 +3544,17 @@ class Test_local_useless_alloc(unittest.TestCase):
        # in op_classes and we have to change the assert.
        assert tensor.Alloc in op_classes
+        self._verify_stack_trace(f)
 class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
    opt_name = 'local_useless_inc_subtensor_alloc'
+    def _verify_stack_trace(self, f):
+        for output in f.outputs:
+            # Check stacktrace was copied over correctly after opt was applied
+            self.assertTrue(hasattr(output.variable.tag, 'trace'))
    def setUp(self):
        # The optimization requires the shape feature so we need to compile in
        # FAST_RUN mode.
@@ -3519,6 +3591,10 @@ class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
        r2 = f2(x_value, i_value, y_value)
        utt.assert_allclose(r1, r2)
+        self._verify_stack_trace(f1)
+        self._verify_stack_trace(f2)
    def test_advanced_inc_subtensor1(self):
        if tensor.inplace_increment is None:
@@ -3548,6 +3624,9 @@ class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
        r2 = f2(x_value, i_value, y_value)
        utt.assert_allclose(r1, r2)
+        self._verify_stack_trace(f1)
+        self._verify_stack_trace(f2)
    def test_incsubtensor(self):
        x = tensor.vector('x')
@@ -3574,6 +3653,9 @@ class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
        r2 = f2(x_value, i_value, y_value)
        utt.assert_allclose(r1, r2)
+        self._verify_stack_trace(f1)
+        self._verify_stack_trace(f2)
 class test_shapeoptimizer(unittest.TestCase):
@@ -4082,6 +4164,8 @@ class T_Rebroadcast(unittest.TestCase):
        e = f.maker.fgraph.toposort()
        assert len([n for n in e if isinstance(n.op, T.Rebroadcast)]) == 0
+        assert hasattr(f.outputs[0].variable.tag, 'trace')
    def test_rebroadcast_rebroadcast(self):
        mode = theano.compile.get_default_mode().including('canonicalize')
        m = T.matrix()