Merge pull request #3507 from nouiz/mixed3

Mixed: warning, error msg, tests fix, clean up, crash

Merge pull request #3507 from nouiz/mixed3
2928d02a · Frédéric Bastien · 1882012f · 7757381c · 2928d02a · 2928d02a
--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
@@ -580,7 +580,7 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
               print_view_map=False, order=None, ids='CHAR',
               stop_on_name=False, prefix_child=None,
               scan_ops=None, profile=None,
-               scan_inner_to_outer_inputs=None):
+               scan_inner_to_outer_inputs=None, smap=None):
    """
    Print the graph leading to `r` to given depth.
@@ -620,7 +620,8 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
    scan_inner_to_outer_inputs
        A dictionary mapping a scan ops inner function inputs to the scan op
        inputs (outer inputs) for printing purposes.
+    smap
+        None or the storage_map when printing an Theano function.
    """
    if depth == 0:
        return
@@ -689,23 +690,21 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
        already_printed = a in done  # get_id_str put it in the dict
        id_str = get_id_str(a)
-        if profile is None or a not in profile.apply_time:
        if len(a.outputs) == 1:
-                print('%s%s %s%s \'%s\' %s %s %s' % (prefix, a.op,
+            idx = ""
-                                                     id_str,
-                                                     type_str,
-                                                     r_name,
-                                                     destroy_map_str,
-                                                     view_map_str,
-                                                     o), file=file)
        else:
-                print('%s%s.%i %s%s \'%s\' %s %s %s' % (prefix, a.op,
+            idx = ".%i" % a.outputs.index(r)
-                                                        a.outputs.index(r),
+        data = ""
+        if smap:
+            data = " " + str(smap.get(a.outputs[0], ''))
+        if profile is None or a not in profile.apply_time:
+            print('%s%s%s %s%s \'%s\' %s %s %s%s' % (prefix, a.op,
+                                                     idx,
                                                     id_str, type_str,
                                                     r_name,
                                                     destroy_map_str,
                                                     view_map_str,
-                                                        o), file=file)
+                                                     o, data), file=file)
        else:
            op_time = profile.apply_time[a]
            op_time_percent = (op_time / profile.fct_call_time) * 100
@@ -714,28 +713,19 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
            tot_time_percent = (tot_time_dict[a] / profile.fct_call_time) * 100
            if len(a.outputs) == 1:
-                print("%s%s %s%s '%s' %s %s %s --> "
+                idx = ""
-                      "%8.2es %4.1f%% %8.2es %4.1f%%"
-                      % (prefix, a.op,
-                         id_str,
-                         type_str,
-                         r_name,
-                         destroy_map_str,
-                         view_map_str,
-                         o, op_time,
-                         op_time_percent,
-                         tot_time,
-                         tot_time_percent), file=file)
            else:
-                print("%s%s.%i %s%s '%s' %s %s %s --> "
+                idx = ".%i" % a.outputs.index(r)
+            print("%s%s%s %s%s '%s' %s %s %s%s --> "
                  "%8.2es %4.1f%% %8.2es %4.1f%%"
                  % (prefix, a.op,
-                         a.outputs.index(r),
+                     idx,
                     id_str, type_str,
                     r_name,
                     destroy_map_str,
                     view_map_str,
-                         o, op_time,
+                     o, data,
+                     op_time,
                     op_time_percent,
                     tot_time,
                     tot_time_percent), file=file)
@@ -761,7 +751,8 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
                        ids=ids, stop_on_name=stop_on_name,
                        prefix_child=new_prefix_child, scan_ops=scan_ops,
                        profile=profile,
-                        scan_inner_to_outer_inputs=scan_inner_to_outer_inputs)
+                        scan_inner_to_outer_inputs=scan_inner_to_outer_inputs,
+                        smap=smap)
    else:
        if scan_inner_to_outer_inputs is not None and\
           r in scan_inner_to_outer_inputs:
@@ -777,8 +768,13 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
                                       outer_id_str), file=file)
        else:
            # this is an input variable
+            data = ""
+            if smap:
+                data = " " + str(smap.get(r, ''))
            id_str = get_id_str(r)
-            print('%s%s %s%s' % (prefix, r, id_str, type_str), file=file)
+            print('%s%s %s%s%s' % (prefix, r, id_str,
+                                   type_str, data),
+                  file=file)
    return file

--- a/theano/compile/mode.py
+++ b/theano/compile/mode.py
@@ -91,6 +91,8 @@ exclude = []
 if not theano.config.cxx:
    exclude = ['cxx_only']
 OPT_NONE = gof.Query(include=[], exclude=exclude)
+# Even if multiple merge optimizer call will be there, this shouldn't
+# impact performance.
 OPT_MERGE = gof.Query(include=['merge'], exclude=exclude)
 OPT_FAST_RUN = gof.Query(include=['fast_run'], exclude=exclude)
 OPT_FAST_RUN_STABLE = OPT_FAST_RUN.requiring('stable')
@@ -113,7 +115,7 @@ OPT_STABILIZE.name = 'OPT_STABILIZE'
 predefined_optimizers = {
    None: OPT_NONE,
    'None': OPT_NONE,
-    'merge': gof.MergeOptimizer(),
+    'merge': OPT_MERGE,
    'fast_run': OPT_FAST_RUN,
    'fast_run_stable': OPT_FAST_RUN_STABLE,
    'fast_compile': OPT_FAST_COMPILE,

--- a/theano/compile/tests/test_mode.py
+++ b/theano/compile/tests/test_mode.py
@@ -25,3 +25,8 @@ def test_no_output_from_implace():
    fct_opt = theano.function([x, y], b, mode=mode_opt)
    op = fct_opt.maker.fgraph.outputs[0].owner.op
    assert (not hasattr(op, 'destroy_map') or 0 not in op.destroy_map)
+def test_including():
+    mode = theano.Mode(optimizer='merge')
+    mode.including('fast_compile')
--- a/theano/d3viz/tests/test_formatting.py
+++ b/theano/d3viz/tests/test_formatting.py
@@ -32,7 +32,7 @@ class TestPyDotFormatter(unittest.TestCase):
        expected = 11
        if th.config.mode == "FAST_COMPILE":
            expected = 12
-        self.assertEqual(len(graph.get_nodes()), 12)
+        self.assertEqual(len(graph.get_nodes()), expected)
        nc = self.node_counts(graph)
        if th.config.mode == "FAST_COMPILE":

--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -547,9 +547,7 @@ class CLinker(link.Linker):
        if no_recycling is None:
            no_recycling = []
        if self.fgraph is not None and self.fgraph is not fgraph:
-            return type(self)().accept(fgraph, no_recycling)
+            return type(self)(self.schedule).accept(fgraph, no_recycling)
-            # raise Exception("Cannot accept from a Linker that is already"
-            #                " tied to another FunctionGraph.")
        self.fgraph = fgraph
        self.fetch_variables()
        self.no_recycling = no_recycling
@@ -1755,7 +1753,8 @@ class OpWiseCLinker(link.LocalLinker):
            return type(self)(
                fallback_on_perform=self.fallback_on_perform,
                allow_gc=self.allow_gc,
-                nice_errors=self.nice_errors
+                nice_errors=self.nice_errors,
+                schedule=self.schedule,
            ).accept(fgraph, no_recycling)
            # raise Exception("Cannot accept from a Linker that is
            # already tied to another FunctionGraph.")
@@ -1908,7 +1907,8 @@ class DualLinker(link.Linker):
        if no_recycling is None:
            no_recycling = []
        if self.fgraph is not None and self.fgraph is not fgraph:
-            return type(self)(self.checker).accept(fgraph, no_recycling)
+            return type(self)(self.checker, self.schedule).accept(
+                fgraph, no_recycling)
        self.fgraph = fgraph
        self.no_recycling = no_recycling
        return self

--- a/theano/gof/cmodule.py
+++ b/theano/gof/cmodule.py
@@ -17,6 +17,7 @@ import tempfile
 import time
 import platform
 import distutils.sysconfig
+import warnings
 import numpy.distutils  # TODO: TensorType should handle this
@@ -324,6 +325,9 @@ def dlimport(fullpath, suffix=None):
            if hasattr(importlib, "invalidate_caches"):
                importlib.invalidate_caches()
        t0 = time.time()
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore",
+                                    message="numpy.ndarray size changed")
            rval = __import__(module_name, {}, {}, [module_name])
        t1 = time.time()
        import_time += t1 - t0

--- a/theano/printing.py
+++ b/theano/printing.py
@@ -48,7 +48,7 @@ VALID_ASSOC = set(['left', 'right', 'either'])
 def debugprint(obj, depth=-1, print_type=False,
               file=None, ids='CHAR', stop_on_name=False,
-               done=None):
+               done=None, print_storage=False):
    """Print a computation graph as text to stdout or a file.
    :type obj: Variable, Apply, or Function instance
@@ -70,6 +70,10 @@ def debugprint(obj, depth=-1, print_type=False,
    :type done: None or dict
    :param done: A dict where we store the ids of printed node.
        Useful to have multiple call to debugprint share the same ids.
+    :type print_storage: bool
+    :param print_storage: If True, this will print the storage map
+        for Theano functions. Combined with allow_gc=False, after the
+        execution of a Theano function, we see the intermediate result.
    :returns: string if `file` == 'str', else file arg
@@ -101,7 +105,8 @@ def debugprint(obj, depth=-1, print_type=False,
        done = dict()
    results_to_print = []
    profile_list = []
-    order = []
+    order = []  # Toposort
+    smap = []  # storage_map
    if isinstance(obj, (list, tuple, set)):
        lobj = obj
    else:
@@ -110,24 +115,41 @@ def debugprint(obj, depth=-1, print_type=False,
        if isinstance(obj, gof.Variable):
            results_to_print.append(obj)
            profile_list.append(None)
+            smap.append(None)
+            order.append(None)
        elif isinstance(obj, gof.Apply):
            results_to_print.extend(obj.outputs)
            profile_list.extend([None for item in obj.outputs])
+            smap.extend([None for item in obj.outputs])
+            order.extend([None for item in obj.outputs])
        elif isinstance(obj, Function):
            results_to_print.extend(obj.maker.fgraph.outputs)
            profile_list.extend(
                [obj.profile for item in obj.maker.fgraph.outputs])
-            order = obj.maker.fgraph.toposort()
+            if print_storage:
+                smap.extend(
+                    [obj.fn.storage_map for item in obj.maker.fgraph.outputs])
+            else:
+                smap.extend(
+                    [None for item in obj.maker.fgraph.outputs])
+            topo = obj.maker.fgraph.toposort()
+            order.extend(
+                [topo for item in obj.maker.fgraph.outputs])
        elif isinstance(obj, gof.FunctionGraph):
            results_to_print.extend(obj.outputs)
            profile_list.extend([getattr(obj, 'profile', None)
                                 for item in obj.outputs])
-            order = obj.toposort()
+            smap.extend([getattr(obj, 'storage_map', None)
+                         for item in obj.outputs])
+            topo = obj.toposort()
+            order.extend([topo for item in obj.outputs])
        elif isinstance(obj, (integer_types, float, np.ndarray)):
            print(obj)
        elif isinstance(obj, (theano.In, theano.Out)):
            results_to_print.append(obj.variable)
            profile_list.append(None)
+            smap.append(None)
+            order.append(None)
        else:
            raise TypeError("debugprint cannot print an object of this type",
                            obj)
@@ -152,16 +174,16 @@ N.B.:
  to remove when optimizing a graph because their <total time> is very low.
 """, file=_file)
-    for r, p in zip(results_to_print, profile_list):
+    for r, p, s, o in zip(results_to_print, profile_list, smap, order):
        # Add the parent scan op to the list as well
        if (hasattr(r.owner, 'op') and
                isinstance(r.owner.op, theano.scan_module.scan_op.Scan)):
                    scan_ops.append(r)
        debugmode.debugprint(r, depth=depth, done=done, print_type=print_type,
-                             file=_file, order=order, ids=ids,
+                             file=_file, order=o, ids=ids,
                             scan_ops=scan_ops, stop_on_name=stop_on_name,
-                             profile=p)
+                             profile=p, smap=s)
    if len(scan_ops) > 0:
        print("", file=_file)
@@ -996,7 +1018,11 @@ def pydotprint(fct, outfile=None,
            else:
                new_name = basename + '_' + str(idx)
            new_name = os.path.join(path, new_name + ext)
-            pydotprint(scan_op.op.fn, new_name, compact, format, with_ids,
+            if hasattr(scan_op.op, 'fn'):
+                to_print = scan_op.op.fn
+            else:
+                to_print = scan_op.op.outputs
+            pydotprint(to_print, new_name, compact, format, with_ids,
                       high_contrast, cond_highlight, colorCodes,
                       max_label_size, scan_graphs)

--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -1765,10 +1765,6 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
        bottom borders.
        pad_w is the number of zero-valued pixels added to each of the left
        and right borders.
-    nd
-    	Number of dimensions of pooling, can be 2 or 3 for 2d or 3d pooling
-        If set to 3 all other params (except mode) must have an extra
-        dimension to match. 3 is only available for cudnn v3
    .. warning:: The cuDNN library only works with GPU that have a compute
      capability of 3.0 or higer.  This means that older GPU will not

--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
@@ -279,8 +279,7 @@ def test_pooling():
                    a = f1(data).__array__()
                    b = f2(data).__array__()
-                    assert numpy.allclose(a, b,
+                    utt.assert_allclose(a, b)
-                                          atol=numpy.finfo(numpy.float32).eps)
        # Test the grad
        for shp in [(1, 1, 2, 2),
@@ -338,7 +337,7 @@ def test_pooling():
                assert any([isinstance(node.op, AveragePoolGrad)
                            for node in fc.maker.fgraph.toposort()])
            c_out = fc(data)
-            assert numpy.allclose(c_out, g_out)
+            utt.assert_allclose(c_out, g_out)
 def test_pooling3d():
@@ -443,7 +442,7 @@ def test_pooling3d():
            fc = theano.function([x], theano.grad(out.sum(), x),
                                 mode=mode_without_gpu)
            c_out = fc(data)
-            assert numpy.allclose(c_out, g_out)
+            utt.assert_allclose(c_out, g_out)
 def test_pooling_opt():
@@ -1357,8 +1356,10 @@ def test_conv3d_bwd():
        # Compare the results of the two implementations
        res_ref = f_ref()
        res = f()
-        utt.assert_allclose(res_ref[0], res[0])
+        # Needed for big size for some seed
-        utt.assert_allclose(res_ref[1], res[1])
+        # raise rtol to make the test pass with more seed.
+        utt.assert_allclose(res_ref[0], res[0], rtol=2e-5)
+        utt.assert_allclose(res_ref[1], res[1], rtol=2e-5)
    test_cases = get_conv3d_test_cases()
    for (i_shape, f_shape, subsample), border_mode, conv_mode in test_cases:

--- a/theano/scan_module/scan_perform_ext.py
+++ b/theano/scan_module/scan_perform_ext.py
@@ -132,5 +132,10 @@ except ImportError:
        # Release lock on compilation directory.
        release_lock()
-from scan_perform.scan_perform import *
+# This is caused as cython use the old NumPy C-API but we use the new one.
+# To fix it completly, we would need to modify Cython to use the new API.
+with warnings.catch_warnings():
+    warnings.filterwarnings("ignore",
+                            message="numpy.ndarray size changed")
+    from scan_perform.scan_perform import *
 assert version == get_version()
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -2654,14 +2654,20 @@ class Alloc(gof.Op):
        sh = [as_tensor_variable(s) for s in shape]
        bcast = []
        for i, s in enumerate(sh):
-            if s.type.dtype[:3] not in ('int', 'uin'):
            if config.exception_verbosity == 'high':
                s_as_str = '\n' + min_informative_str(s)
            else:
                s_as_str = str(s)
+            if s.type.dtype[:3] not in ('int', 'uin'):
                raise TypeError('Shape arguments to Alloc must be integers, '
                                'but argument %s is not for apply node: %s' %
                                (i, s_as_str))
+            if s.ndim != 0:
+                raise TypeError(
+                    "Each shape dimension to Alloc must be a scalar, ",
+                    'but dimension %s have %d dimensions for apply node: %s' %
+                    (i, s.ndim, s_as_str))
            # if s is constant 1, then we're broadcastable in that dim
            try:
                const_shp = get_scalar_constant_value(s)

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -782,22 +782,24 @@ class MakeVector(T.Op):
        # So there will be (1 * nb_dtype) + ((nb len(inp) - 1 ))
        # different c code with the following algo
        out_shape = len(inp)
-        out_dtype = numpy.dtype(node.outputs[0].dtype).num
+        out_num = numpy.dtype(node.outputs[0].dtype).num
+        # don't use dtype_%(out)s as when check_input=False, it isn't defined.
+        out_dtype = node.outputs[0].type.dtype_specs()[1]
        if len(inp) > 0:
            assert self.dtype == node.inputs[0].dtype
-            out_dtype = 'PyArray_TYPE(%s)' % inp[0]
+            out_num = 'PyArray_TYPE(%s)' % inp[0]
        ret = """
        npy_intp dims[1];
        dims[0] = %(out_shape)s;
        if(!%(out)s || PyArray_DIMS(%(out)s)[0] != %(out_shape)s){
            Py_XDECREF(%(out)s);
-            %(out)s = (PyArrayObject*)PyArray_EMPTY(1, dims, %(out_dtype)s, 0);
+            %(out)s = (PyArrayObject*)PyArray_EMPTY(1, dims, %(out_num)s, 0);
        }
        """ % locals()
        for idx, i in enumerate(inp):
            ret += """
-            *((dtype_%(out)s *)PyArray_GETPTR1(%(out)s, %(idx)s)) = *((dtype_%(out)s *) PyArray_DATA(%(i)s));
+            *((%(out_dtype)s *)PyArray_GETPTR1(%(out)s, %(idx)s)) = *((%(out_dtype)s *) PyArray_DATA(%(i)s));
            """ % locals()
        return ret
@@ -3468,7 +3470,6 @@ def local_mul_switch_sink(node):
        return False
    for idx, i in enumerate(node.inputs):
        if i.owner and i.owner.op == T.switch:
-            # import ipdb;ipdb.set_trace()
            switch = i.owner
            try:
                if (get_scalar_constant_value(
@@ -4904,9 +4905,10 @@ register_canonicalize(local_inv_canon)
 @gof.local_optimizer([T.pow])
 def local_pow_canonicalize(node):
    if node.op == T.pow:
-        if local_mul_canonizer.get_constant(node.inputs[1]) == 0:
+        cst = local_mul_canonizer.get_constant(node.inputs[1])
+        if cst == 0:
            return [broadcast_like(1, node.outputs[0], node.fgraph)]
-        if local_mul_canonizer.get_constant(node.inputs[1]) == 1:
+        if cst == 1:
            return [broadcast_like(node.inputs[0], node.outputs[0], node.fgraph)]
    else:
        return False

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -2026,6 +2026,7 @@ AllocTester = makeBroadcastTester(
                    bad_shape12=(rand(7), numpy.int32(7), numpy.int32(5)),
                    ),
        bad_build=dict(
+                    vec=(rand(1), [numpy.int32(2)]),
                    too_big32=(rand(6, 2, 4), numpy.
                        int32(6), numpy.int32(2)),
                    too_big32b=(rand(6, 2, 4), numpy.

--- a/theano/tensor/tests/test_elemwise.py
+++ b/theano/tensor/tests/test_elemwise.py
@@ -159,7 +159,7 @@ class test_Broadcast(unittest.TestCase):
    cop = Elemwise
    openmp_minsize = 2*config.openmp_elemwise_minsize
-    openmp_minsize_sqrt = math.ceil(math.sqrt(openmp_minsize))
+    openmp_minsize_sqrt = int(math.ceil(math.sqrt(openmp_minsize)))
    # The order is important if you change them.
    linkers = [gof.PerformLinker, gof.CLinker]

--- a/theano/tests/test_printing.py
+++ b/theano/tests/test_printing.py
@@ -164,6 +164,8 @@ def test_debugprint():
    F = D + E
    G = C + F
+    mode = theano.compile.get_default_mode().including('fusion')
+    g = theano.function([A, B, D, E], G, mode=mode)
    # just test that it work
    debugprint(G)
@@ -249,6 +251,24 @@ def test_debugprint():
    assert s == reference
+    # test print_storage=True
+    s = StringIO()
+    debugprint(g, file=s, ids='', print_storage=True)
+    s = s.getvalue()
+    # The additional white space are needed!
+    reference = '\n'.join([
+        "Elemwise{add,no_inplace}  ''   0 [None]",
+        " |A  [None]",
+        " |B  [None]",
+        " |D  [None]",
+        " |E  [None]",
+    ]) + '\n'
+    if s != reference:
+        print('--' + s + '--')
+        print('--' + reference + '--')
+    assert s == reference
 def test_scan_debugprint1():
    k = tensor.iscalar("k")
@@ -702,3 +722,28 @@ def test_scan_debugprint5():
    for truth, out in zip(expected_output.split("\n"), lines):
        assert truth.strip() == out.strip()
+def test_printing_scan():
+    # Skip test if pydot is not available.
+    if not theano.printing.pydot_imported:
+        raise SkipTest('pydot not available')
+    def f_pow2(x_tm1):
+        return 2 * x_tm1
+    state = theano.tensor.scalar('state')
+    n_steps = theano.tensor.iscalar('nsteps')
+    output, updates = theano.scan(f_pow2,
+                                  [],
+                                  state,
+                                  [],
+                                  n_steps=n_steps,
+                                  truncate_gradient=-1,
+                                  go_backwards=False)
+    f = theano.function([state, n_steps],
+                        output,
+                        updates=updates,
+                        allow_input_downcast=True)
+    theano.printing.pydotprint(output, scan_graphs=True)
+    theano.printing.pydotprint(f, scan_graphs=True)