define and use with_stack_trace

9bc05a38 · Tim Cooijmans · Reyhane Askari · 592e7c75 · 9bc05a38 · 9bc05a38
--- a/theano/gof/opt.py
+++ b/theano/gof/opt.py
@@ -2948,6 +2948,34 @@ def copy_stack_trace(from_var, to_var):
        to_var.tag.trace = getattr(to_var.tag, 'trace', []) + tr
+def with_stack_trace(from_var, to_var):
+    """
+    Copies the stack trace from one or more tensor variables to
+    one or more tensor variables and returns the destination variables.
+    Parameters
+    ----------
+    from_var
+        Tensor variable or list of tensor variables to copy stack traces from.
+    to_var
+        Tensor variable or list of tensor variables to copy stack traces to.
+    Returns
+    -------
+    tensor variable or list of tensor variables
+        `to_var`, augmented with the stack traces from `from_var`.
+    Notes
+    -----
+    The stacktrace is assumed to be of the form of a list of lists
+    of tuples. Each tuple contains the filename, line number, function name
+    and so on. Each list of tuples contains the truples belonging to a
+    particular variable.
+    """
+    copy_stack_trace(from_var, to_var)
+    return to_var
 def check_stack_trace(f_or_fgraph, ops_to_check='last', bug_print='raise'):
    """
    This function checks if the outputs of specific ops of a compiled graph

--- a/theano/gpuarray/basic_ops.py
+++ b/theano/gpuarray/basic_ops.py
@@ -15,6 +15,7 @@ from theano.tensor.basic import (
 from theano.gof import HideC, COp, ParamsType
 from theano.gof.utils import MethodNotDefined
+from theano.gof.opt import with_stack_trace
 from collections import deque
@@ -75,11 +76,11 @@ def as_gpuarray_variable(x, context_name):
        # If we couldn't deal with transfers, then maybe it's a tensor
        if isinstance(x.type, tensor.TensorType):
-            return GpuFromHost(context_name)(x)
+            return with_stack_trace(x, GpuFromHost(context_name)(x))
    # Try _as_GpuArrayVariable if possible
    if hasattr(x, '_as_GpuArrayVariable'):
-        return x._as_GpuArrayVariable(context_name)
+        return with_stack_trace(x, x._as_GpuArrayVariable(context_name))
    # If it didn't work try for a constant
    ctx = get_context(context_name)
@@ -88,13 +89,13 @@ def as_gpuarray_variable(x, context_name):
        if x.context.ptr != ctx.ptr:
            x = x.transfer(ctx)
-    x = gpuarray.asarray(x, context=ctx)
+    x = with_stack_trace(x, gpuarray.asarray(x, context=ctx))
    bcast = [(s == 1) for s in x.shape]
-    return GpuArrayConstant(GpuArrayType(dtype=x.dtype,
+    return with_stack_trace(x, GpuArrayConstant(GpuArrayType(dtype=x.dtype,
-                                         broadcastable=bcast,
+                                                             broadcastable=bcast,
-                                         context_name=context_name),
+                                                             context_name=context_name),
-                            x)
+                                                x))
 def infer_context_name(*vars):

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -15,7 +15,7 @@ from theano.compile.ops import shape_i
 from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer,
                        LocalGroupDB,
                        SequenceDB, Optimizer, DB, toolbox, graph)
-from theano.gof.opt import LocalMetaOptimizer, copy_stack_trace
+from theano.gof.opt import LocalMetaOptimizer, copy_stack_trace, with_stack_trace
 from theano.ifelse import IfElse
 from theano.misc.ordered_set import OrderedSet
@@ -421,6 +421,8 @@ class GraphToGPU(Optimizer):
            if isinstance(new_ops, theano.Op):
                outputs = new_ops(*[mapping[i] for i in node.inputs], return_list=True)
+                for old_output, new_output in zip(node.outputs, outputs):
+                    copy_stack_trace(old_output, new_output)
            elif not new_ops:
                newnode = node.clone_with_new_inputs([mapping.get(i) for i in node.inputs])
                outputs = newnode.outputs
@@ -461,7 +463,7 @@ class GraphToGPU(Optimizer):
                        new_o.owner.inputs[0].type == o.type):
                    new_o = new_o.owner.inputs[0]
                else:
-                    new_o = safe_to_cpu(new_o)
+                    new_o = with_stack_trace(o, safe_to_cpu(new_o))
            new_nodes.append(new_o)
        fgraph.replace_all_validate(zip(fgraph.outputs, new_nodes),
                                    reason=self.__class__.__name__)
@@ -692,8 +694,6 @@ def local_gpu_contiguous_gpu_contiguous(node):
    if isinstance(node.op, GpuContiguous):
        inp = node.inputs[0]
        if inp.owner and isinstance(inp.owner.op, GpuContiguous):
-            if not getattr(inp.tag, 'trace', None):
-                copy_stack_trace(node.outputs[0], inp)
            return [inp]
@@ -1220,7 +1220,7 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
            op.scalar_op, axis=op.axis,
            dtype=odtype,
            acc_dtype=adtype)
-        gvar = greduce(x)
+        gvar = with_stack_trace(outputs, greduce(x))
        # We need to have the make node called, otherwise the mask can
        # be None
        if (op2 is GpuCAReduceCPY or
@@ -1260,22 +1260,27 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
                dtype=getattr(op, 'dtype', outputs[0].dtype),
                acc_dtype=getattr(op, 'acc_dtype', None))
-            reshaped_x = x.reshape(tensor.stack(new_in_shp))
+            reshaped_x = with_stack_trace(
-            gpu_reshaped_x = as_gpuarray_variable(reshaped_x, context_name)
+                outputs, x.reshape(tensor.stack(new_in_shp)))
-            gvar = greduce(gpu_reshaped_x)
+            gpu_reshaped_x = with_stack_trace(
+                outputs, as_gpuarray_variable(reshaped_x, context_name))
+            gvar = with_stack_trace(outputs, greduce(gpu_reshaped_x))
            # We need to have the make node called, otherwise the mask can
            # be None
            reshaped_gpu_inputs = [gpu_reshaped_x]
            if greduce.supports_c_code(reshaped_gpu_inputs):
-                reduce_reshaped_x = greduce(gpu_reshaped_x)
+                reduce_reshaped_x = with_stack_trace(
+                    outputs, greduce(gpu_reshaped_x))
                if reduce_reshaped_x.ndim != outputs[0].ndim:
                    out_shp = []
                    for i in range(x.ndim):
                        if i not in op.axis:
                            out_shp.append(shape_i(x, i))
-                    unreshaped_reduce = GpuReshape(len(out_shp))(reduce_reshaped_x,
+                    unreshaped_reduce = with_stack_trace(
-                                                                 tensor.stack(out_shp))
+                        outputs, GpuReshape(len(out_shp))(
+                            reduce_reshaped_x,
+                            tensor.stack(out_shp)))
                else:
                    unreshaped_reduce = reduce_reshaped_x
                return [unreshaped_reduce]
@@ -2398,7 +2403,8 @@ def local_gpu_elemwise_careduce(node):
        props = node.op._props_dict()
        props["pre_scalar_op"] = scalar.basic.sqr
        out = GpuCAReduceCuda(**props)(inp)
-        return [out]
+        return with_stack_trace(
+            node.outputs, out)
 @local_optimizer(None)

--- a/theano/gpuarray/tests/test_opt.py
+++ b/theano/gpuarray/tests/test_opt.py
--- a/theano/tensor/opt_uncanonicalize.py
+++ b/theano/tensor/opt_uncanonicalize.py
@@ -43,6 +43,7 @@ from theano.tensor import DimShuffle, Subtensor
 from theano.tensor.opt import register_uncanonicalize
 from theano import scalar as scal
+from theano.gof.opt import copy_stack_trace, with_stack_trace
 _logger = logging.getLogger('theano.tensor.opt')
@@ -57,10 +58,13 @@ def local_max_and_argmax(node):
        axis = node.op.get_params(node)
        if len(node.outputs[1].clients) == 0:
            new = CAReduce(scal.maximum, axis)(node.inputs[0])
+            copy_stack_trace(node.outputs[0], new)
            return [new, None]
        if len(node.outputs[0].clients) == 0:
-            return [None, T.Argmax(axis)(node.inputs[0])]
+            new = T.Argmax(axis)(node.inputs[0])
+            copy_stack_trace(node.outputs[0], new)
+            return [None, new]
 @register_uncanonicalize
@@ -84,8 +88,8 @@ def local_max_to_min(node):
                max.owner.op.scalar_op == scal.maximum):
            neg = max.owner.inputs[0]
            if neg.owner and neg.owner.op == T.neg:
-                return [CAReduce(scal.minimum,
+                new = CAReduce(scal.minimum, max.owner.op.axis)(neg.owner.inputs[0])
-                                 max.owner.op.axis)(neg.owner.inputs[0])]
+                return [with_stack_trace(node.outputs[0], new)]
    return False