Merge pull request #2461 from nouiz/traceback2

Lower the number of disk access: continuation

Merge pull request #2461 from nouiz/traceback2
9db3be23 · abergeron · 8f5e49d3 · c7ecb37c · 9db3be23 · 9db3be23
--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -1236,10 +1236,13 @@ class FunctionMaker(object):
        if need_opt:
            compute_test_value_orig = theano.config.compute_test_value
            add_stack_trace_on_call_orig = gof.Op.add_stack_trace_on_call
+            limit_orig = theano.config.traceback.limit
+            # Why we add stack on node when it get done in output var?
            try:
                # optimize the fgraph
                theano.config.compute_test_value = theano.config.compute_test_value_opt
                gof.Op.add_stack_trace_on_call = False
+                theano.config.traceback.limit = 0
                start_optimizer = time.time()

                # now optimize the graph
@@ -1262,6 +1265,7 @@ class FunctionMaker(object):
            finally:
                theano.config.compute_test_value = compute_test_value_orig
                gof.Op.add_stack_trace_on_call = add_stack_trace_on_call_orig
+                theano.config.traceback.limit = limit_orig
        
        # initialize the linker
        if not hasattr(linker, 'accept'):
@@ -1406,7 +1410,17 @@ class FunctionMaker(object):

        # Get a function instance
        start_linker = time.time()
-        _fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists)
+        add_stack_trace_on_call_orig = gof.Op.add_stack_trace_on_call
+        limit_orig = theano.config.traceback.limit
+        try:
+            gof.Op.add_stack_trace_on_call = False
+            theano.config.traceback.limit = 0
+            _fn, _i, _o = self.linker.make_thunk(
+                input_storage=input_storage_lists)
+        finally:
+            gof.Op.add_stack_trace_on_call = add_stack_trace_on_call_orig
+            theano.config.traceback.limit = limit_orig
+
        end_linker = time.time()

        linker_time = end_linker - start_linker

--- a/theano/gof/null_type.py
+++ b/theano/gof/null_type.py
@@ -38,3 +38,4 @@ class NullType(Type):

    def __str__(self):
        return 'NullType'
+null_type = NullType()
--- a/theano/gof/utils.py
+++ b/theano/gof/utils.py
-import re
+import linecache
 import traceback
+import re
+import sys

 from theano import config


+def simple_extract_stack(f=None, limit=None):
+    """This is traceback.extract_stack from python 2.7 with this
+    change:
+
+    - Comment the update of the cache
+
+    This is because this update cause an call to os.stat to get the
+    line content. This cause too much long on cluster.
+
+    """
+    if f is None:
+        try:
+            raise ZeroDivisionError
+        except ZeroDivisionError:
+            f = sys.exc_info()[2].tb_frame.f_back
+    if limit is None:
+        if hasattr(sys, 'tracebacklimit'):
+            limit = sys.tracebacklimit
+    list = []
+    n = 0
+    while f is not None and (limit is None or n < limit):
+        lineno = f.f_lineno
+        co = f.f_code
+        filename = co.co_filename
+        name = co.co_name
+#        linecache.checkcache(filename)
+        line = linecache.getline(filename, lineno, f.f_globals)
+        if line:
+            line = line.strip()
+        else:
+            line = None
+        list.append((filename, lineno, name, line))
+        f = f.f_back
+        n = n + 1
+    list.reverse()
+    return list
+
+if sys.version_info[:2] <= (3, 2):
+    # I enable my implementation only for some python version just to
+    # be sure the Python internal do not change. If this work with
+    # other python version, you can enable it.
+    simple_extract_stack = traceback.extract_stack
+
+
 def add_tag_trace(thing):
    """Add tag.trace to an node or variable.

@@ -12,7 +58,7 @@ def add_tag_trace(thing):
    limit = config.traceback.limit
    if limit == -1:
        limit = None
-    tr = traceback.extract_stack(limit=limit)[:-1]
+    tr = simple_extract_stack(limit=limit)[:-1]
    # Different python version use different sementic for
    # limit. python 2.7 include the call to extrack_stack. The -1 get
    # rid of it.  We also want to get rid of the add_tag_trace call.

--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -21,7 +21,7 @@ import theano
 from theano import gof
 from theano.gof import Variable
 from theano.gof.python25 import OrderedDict
-from theano.gof.null_type import NullType
+from theano.gof.null_type import NullType, null_type
 from theano.gof.op import get_debug_values
 from theano.compile import ViewOp

@@ -146,6 +146,7 @@ class DisconnectedType(theano.gof.type.Type):

    def __str__(self):
        return 'DisconnectedType'
+disconnected_type = DisconnectedType()


 ########################
@@ -524,7 +525,7 @@ def grad(cost, wrt, consider_constant=None,
        if elem not in var_to_app_to_idx and elem is not cost \
                and elem not in grad_dict:
            handle_disconnected(elem)
-            grad_dict[elem] = DisconnectedType()()
+            grad_dict[elem] = disconnected_type()

    cost_name = None
    if add_names and cost is not None:
@@ -978,7 +979,7 @@ def _populate_grad_dict(var_to_app_to_idx,
                # are disconnected
                # (The op's grad method could do this too, but this saves the
                # implementer the trouble of worrying about this case)
-                input_grads = [DisconnectedType()() for ipt in inputs]
+                input_grads = [disconnected_type() for ipt in inputs]
            elif False not in only_connected_to_nan:
                # All inputs are only connected to nan gradients, so we don't
                # need to bother calling the grad method. We know the gradient
@@ -986,9 +987,9 @@ def _populate_grad_dict(var_to_app_to_idx,
                input_grads = []
                for connected in inputs_connected:
                    if connected:
-                        input_grads.append(NullType()())
+                        input_grads.append(null_type())
                    else:
-                        input_grads.append(DisconnectedType()())
+                        input_grads.append(disconnected_type())
            else:
                # At least one input of this op is connected to the cost so and
                # not all output gradients are undefined so we must
@@ -1124,7 +1125,7 @@ def _populate_grad_dict(var_to_app_to_idx,
                    raise TypeError(('%s.grad returned None for' +
                             ' a gradient term, '
                            'this is prohibited. Instead of None,'
-                            'return zeros_like(input), DisconnectedType()(),'
+                            'return zeros_like(input), disconnected_type(),'
                            ' or a NullType variable such as those made with '
                            'the grad_undefined or grad_unimplemented helper '
                            'functions.') % node.op)
@@ -1258,14 +1259,14 @@ def _populate_grad_dict(var_to_app_to_idx,
                    # extraneous TensorConstant(0)
                    grad_dict[var] = reduce(lambda x, y: x + y, terms)
                else:
-                    grad_dict[var] = DisconnectedType()()
+                    grad_dict[var] = disconnected_type()

                if cost_name is not None and var.name is not None:
                    grad_dict[var].name = '(d%s/d%s)' % (cost_name, var.name)
            else:
                # this variable isn't connected to the cost in the
                # computational graph
-                grad_dict[var] = DisconnectedType()()
+                grad_dict[var] = disconnected_type()
        # end if cache miss
        return grad_dict[var]


--- a/theano/sandbox/cuda/elemwise.py
+++ b/theano/sandbox/cuda/elemwise.py
@@ -132,9 +132,9 @@ class NaiveAlgo(object):
        #TODO: What if the scalar_op needs support_code??
        task_code = self.scalar_op.c_code(
            Apply(self.scalar_op,
-                  [scalar.Scalar(dtype=input.type.dtype)()
+                  [scalar.Scalar(dtype=input.type.dtype).make_variable()
                   for input in node.inputs],
-                  [scalar.Scalar(dtype=output.type.dtype)()
+                  [scalar.Scalar(dtype=output.type.dtype).make_variable()
                   for output in node.outputs]),
            nodename + '_scalar_',
            get_str_list_logical_scalar(node),
@@ -253,8 +253,10 @@ class NaiveAlgo(object):
            #TODO: What if the scalar_op needs support_code??
            task_code = self.scalar_op.c_code(
                    Apply(self.scalar_op,
-                        [scalar.Scalar(dtype = input.type.dtype)() for input in node.inputs],
-                        [scalar.Scalar(dtype = output.type.dtype)() for output in node.outputs])
+                        [scalar.Scalar(dtype = input.type.dtype).make_variable()
+                         for input in node.inputs],
+                        [scalar.Scalar(dtype = output.type.dtype).make_variable()
+                         for output in node.outputs])
                    , nodename + '_scalar_'
                    , get_str_list_logical_scalar(node, value_str='value0[%i]')
                    , ['ii_o%i_data[0]'%ipos for ipos, i in enumerate(node.outputs)]
@@ -389,8 +391,10 @@ class NaiveAlgo(object):
        def task_code(d):
            print >> sio, self.scalar_op.c_code(
                Apply(self.scalar_op,
-                    [scalar.Scalar(dtype = input.type.dtype)() for input in node.inputs],
-                    [scalar.Scalar(dtype = output.type.dtype)() for output in node.outputs])
+                    [scalar.Scalar(dtype = input.type.dtype).make_variable()
+                     for input in node.inputs],
+                    [scalar.Scalar(dtype = output.type.dtype).make_variable()
+                     for output in node.outputs])
                , nodename + '_scalar_'
                , ['i%i_data_%i[0]'%(ipos,d) for ipos, i in enumerate(node.inputs)]
                , ['o%i_data_%i[0]'%(ipos,d) for ipos, i in enumerate(node.outputs)]
@@ -461,8 +465,10 @@ class NaiveAlgo(object):
        #TODO: What if the scalar_op needs support_code??
        task_code = self.scalar_op.c_code(
                Apply(self.scalar_op,
-                    [scalar.Scalar(dtype = input.type.dtype)() for input in node.inputs],
-                    [scalar.Scalar(dtype = output.type.dtype)() for output in node.outputs])
+                    [scalar.Scalar(dtype = input.type.dtype).make_variable()
+                     for input in node.inputs],
+                    [scalar.Scalar(dtype = output.type.dtype).make_variable()
+                     for output in node.outputs])
                , nodename + '_scalar_'
                #, ['i%i_data[i]'%ipos for ipos, i in enumerate(node.inputs)]
                , get_str_list_logical_scalar(node, data_str='i%i_data[i]')

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -526,7 +526,8 @@ class Elemwise(OpenMPOp):
        """
        inputs = map(as_tensor_variable, inputs)
        shadow = self.scalar_op.make_node(
-                *[get_scalar_type(dtype=i.type.dtype)() for i in inputs])
+                *[get_scalar_type(dtype=i.type.dtype).make_variable()
+                  for i in inputs])

        target_length = max([input.type.ndim for input in inputs])

@@ -1029,9 +1030,9 @@ class Elemwise(OpenMPOp):
        # We generate the C code of the inner loop using the scalar op
        task_code = self.scalar_op.c_code(
                Apply(self.scalar_op,
-                      [get_scalar_type(dtype=input.type.dtype)()
+                      [get_scalar_type(dtype=input.type.dtype).make_variable()
                          for input in node.inputs],
-                      [get_scalar_type(dtype=output.type.dtype)()
+                      [get_scalar_type(dtype=output.type.dtype).make_variable()
                          for output in node.outputs]),
                nodename + '_scalar_',
                ["%s_i" % s for s in _inames],
@@ -1182,8 +1183,10 @@ class Elemwise(OpenMPOp):

        # now we insert versions for the ops on which we depend...
        scalar_node = Apply(self.scalar_op,
-                [get_scalar_type(dtype=input.type.dtype)() for input in node.inputs],
-                [get_scalar_type(dtype=output.type.dtype)() for output in node.outputs])
+                [get_scalar_type(dtype=input.type.dtype).make_variable()
+                 for input in node.inputs],
+                [get_scalar_type(dtype=output.type.dtype).make_variable()
+                 for output in node.outputs])
        version.append(self.scalar_op.c_code_cache_version_apply(scalar_node))
        for i in node.inputs + node.outputs:
            version.append(get_scalar_type(dtype=i.type.dtype).c_code_cache_version())
@@ -1560,9 +1563,9 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
        task1_code = self.scalar_op.c_code(
                Apply(
                    self.scalar_op,
-                    [get_scalar_type(dtype=input.type.dtype)()
+                    [get_scalar_type(dtype=input.type.dtype).make_variable()
                        for input in (node.inputs * 2)],
-                    [get_scalar_type(dtype=output.type.dtype)()
+                    [get_scalar_type(dtype=output.type.dtype).make_variable()
                        for input in node.outputs]),
                None,
                ["%s_i" % aname, "%s_i" % inames[0]],
@@ -1612,8 +1615,10 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){

        # now we insert versions for the ops on which we depend...
        scalar_node = Apply(self.scalar_op,
-                [get_scalar_type(dtype=input.type.dtype)() for input in node.inputs],
-                [get_scalar_type(dtype=output.type.dtype)() for output in node.outputs])
+                [get_scalar_type(dtype=input.type.dtype).make_variable()
+                 for input in node.inputs],
+                [get_scalar_type(dtype=output.type.dtype).make_variable()
+                 for output in node.outputs])
        version.append(self.scalar_op.c_code_cache_version_apply(scalar_node))
        for i in node.inputs + node.outputs:
            version.append(get_scalar_type(dtype=i.type.dtype).c_code_cache_version())