Merge pull request #562 from lamblin/test_preallocated_output_rebase

Checks for preallocated output memory, take 2

Merge pull request #562 from lamblin/test_preallocated_output_rebase
eace991b · nouiz · 1fcd0905 · f94d63f4 · eace991b · eace991b
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -380,7 +380,7 @@ import theano and print the config variable, as in:

 .. attribute:: config.DebugMode.check_preallocated_output

-    Default: ``'ALL'``
+    Default: ``''``

    A list of kinds of preallocated memory to use as output buffers for
    each Op's computations, separated by ``:``. Implemented modes are:
@@ -388,6 +388,8 @@ import theano and print the config variable, as in:
    * ``"previous"``: reuse previously-returned memory,
    * ``"c_contiguous"``: newly-allocated C-contiguous memory,
    * ``"f_contiguous"``: newly-allocated Fortran-contiguous memory,
+    * ``"strided"``: non-contiguous memory with various stride patterns,
+    * ``"wrong_size"``: memory with bigger or smaller dimensions,
    * ``"ALL"``: placeholder for all of the above.

    In order not to test with preallocated memory, use an empty string, ``""``.

--- a/theano/compile/builders.py
+++ b/theano/compile/builders.py
 from theano import gof
 from theano import gradient as G
-from function_module import orig_function
+from theano.compile.function_module import orig_function
+from theano.gof import ops_with_inner_function


 class OpFromGraph(gof.Op):
@@ -99,3 +100,7 @@ class OpFromGraph(gof.Op):
            return [go(*(inputs + output_grads)) for go in self.grad_ops]
        else:
            raise NotImplementedError
+
+# Since OpFromGraph contains a Theano compiled function, we should let
+# DebugMode know about it
+ops_with_inner_function[OpFromGraph] = 'fn'
--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
--- a/theano/compile/tests/test_debugmode.py
+++ b/theano/compile/tests/test_debugmode.py
@@ -264,7 +264,10 @@ def test_stochasticoptimization():
    try:
        theano.function([a, b],
                theano.tensor.add(a, b),
-                mode=debugmode.DebugMode(optimizer=opt, check_c_code=True))
+                mode=debugmode.DebugMode(
+                    optimizer=opt,
+                    check_c_code=True,
+                    stability_patience=max(2, config.DebugMode.patience)))
    except debugmode.StochasticOrder:
        return  # TEST PASS
    assert False

--- a/theano/gof/__init__.py
+++ b/theano/gof/__init__.py
@@ -18,7 +18,7 @@ from link import \
    Container, Linker, LocalLinker, PerformLinker, WrapLinker, WrapLinkerMany

 from op import \
-    Op, PureOp
+    Op, PureOp, ops_with_inner_function

 from opt import (Optimizer, optimizer, SeqOptimizer,
    MergeOptimizer, MergeOptMerge, 

--- a/theano/gof/op.py
+++ b/theano/gof/op.py
@@ -717,3 +717,17 @@ def get_debug_values(*args):
        return rval

    return [tuple(rval)]
+
+
+ops_with_inner_function = {}
+"""
+Registry of Ops that have an inner compiled Theano function.
+
+The keys are Op classes (not instances), and values are the name of the
+attribute that contains the function. For instance, if the function is
+self.fn, the value will be 'fn'.
+
+We need that to be able not to run debug checks a number of times that is
+exponential in the nesting level of those ops.
+For instance, Scan will be registered here.
+"""
--- a/theano/sandbox/cuda/tests/test_blas.py
+++ b/theano/sandbox/cuda/tests/test_blas.py
@@ -37,11 +37,6 @@ def my_rand(*shape):
    return theano._asarray(numpy.random.rand(*shape), dtype='float32')


-def transpose(cuda_mat):
-    # The easiest way to transpose a cuda matrix for now
-    return tcn.dimshuffle(cuda_mat, [1, 0])
-
-
 def test_dot22():
    def cmp(a_shp, b_shp):
        a0 = my_rand(*a_shp)

--- a/theano/sandbox/cuda/type.py
+++ b/theano/sandbox/cuda/type.py
@@ -54,6 +54,11 @@ class CudaNdarrayType(Type):
    A cyclic dependency is avoided by not hardcoding this class.
    """

+    value_zeros = staticmethod(cuda.CudaNdarray.zeros)
+    """
+    Create an CudaNdarray full of 0 values
+    """
+
    def __init__(self, broadcastable, name=None, dtype=None):
        if dtype != None and dtype != 'float32':
            raise TypeError('%s only supports dtype float32 for now. Tried '

--- a/theano/scan_module/scan_op.py
+++ b/theano/scan_module/scan_op.py
@@ -278,8 +278,8 @@ class Scan(PureOp):
                                           str(outer_mitsot),
                                           argoffset + idx,
                                           outer_mitsot.type.dtype,
-                                           otuer_mitsot.type.ndim,
-                                           str(inner_mitsot[ipos + k]),
+                                           outer_mitsot.type.ndim,
+                                           str(inner_mitsots[ipos + k]),
                                           inner_mitsots[ipos + k].type.dtype,
                                           inner_mitsots[ipos + k].type.ndim))
            ipos += len(itaps)
@@ -1676,6 +1676,11 @@ class Scan(PureOp):
        return final_outs


+# Since Scan is an op that contains a Theano compiled function, it is
+# useful to let DebugMode know about it.
+gof.ops_with_inner_function[Scan] = 'fn'
+
+
 @theano.compile.profilemode.register_profiler_printer
 def profile_printer(fct_name, compile_time, fct_call_time, fct_call,
                    apply_time, apply_cimpl, message, outputs_size,

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -1024,6 +1024,13 @@ class TensorType(Type):
        else:
            return ()

+    def value_zeros(self, shape):
+        """
+        Create an numpy ndarray full of 0 values.
+        """
+        return numpy.zeros(shape, dtype=self.dtype)
+
+
 # Register CudaNdarrayType to the OutputGuard list of known types
 # to have OutputGuard generate C code for this type.
 theano.compile.mode.register_OutputGuard_c_code(TensorType)

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -742,34 +742,45 @@ class Elemwise(Op):
                    raise ValueError('\n'.join(msg_chunks))
                else:
                    raise ValueError(base_exc_str)
-                # Other mismatches will be caught by the ufunc
+
+        # Determine the shape of outputs
+        out_shape = []
+        for values in zip(*[input.shape for input in inputs]):
+            if numpy.prod(values) == 0:
+                # All non-broadcasted dimensions should be zero
+                assert max(values) <= 1
+                out_shape.append(0)
+            else:
+                out_shape.append(max(values))
+        out_shape = tuple(out_shape)

        if not self.inplace_pattern:
            for output, storage in zip(node.outputs, output_storage):
                odat = storage[0]
-                shape = [max(values)
-                        for values in zip(*[input.shape for input in inputs])]
                if odat is not None:
-                    # reuse storage if we can
-                    odat.resize(shape, refcheck=0)
-                else:
-                    odat = numpy.ndarray(shape, dtype=output.type.dtype)
+                    if odat.shape != out_shape:
+                        # It is unsafe to try to resize odat,
+                        # we have to allocate output storage.
+                        odat = None
+                if odat is None:
+                    odat = numpy.ndarray(out_shape, dtype=output.type.dtype)
                storage[0] = odat
        else:
-            for i, (output, storage) in enumerate(zip(node.outputs,
-                    output_storage)):
+            for i, (output, storage) in enumerate(
+                    zip(node.outputs, output_storage)):
                #i is an output idx
                if i in self.inplace_pattern:
                    odat = inputs[self.inplace_pattern[i]]
                else:
                    odat = storage[0]
-                    shape = [max(values)
-                             for values in zip(*[input.shape
-                                 for input in inputs])]
                    if odat is not None:
-                        odat.resize(shape, refcheck=0)
-                    else:
-                        odat = numpy.ndarray(shape, dtype=output.type.dtype)
+                        if odat.shape != out_shape:
+                            # It is unsafe to try to resize odat,
+                            # we have to allocate output storage.
+                            odat = None
+                    if odat is None:
+                        odat = numpy.ndarray(out_shape,
+                                dtype=output.type.dtype)
                storage[0] = odat

        ufunc_args = inputs  # + output_storage
@@ -825,21 +836,16 @@ class Elemwise(Op):
                # always return an ndarray with dtype object
                variable = numpy.asarray(variable, dtype=nout.dtype)

-            if (hasattr(variable, 'shape')
-                    and storage[0].shape != variable.shape):
-                if numpy.prod(variable.shape) == 0:
-                    # numpy don't resize from a shape (1,5) to (0,5)
-                    # This bypass the inplace...
-                    # But I it is important in this case.
-                    storage[0] = variable
-                    continue
-                storage[0].resize(variable.shape)
-
-            if storage[0].shape:
-                storage[0][:] = variable
+            # The storage has been resized earlier.
+            if hasattr(variable, 'shape'):
+                assert storage[0].shape == variable.shape
            else:
-                storage[0].itemset(variable)
+                # If variable has not shape, then it is a scalar.
+                assert numpy.prod(storage[0].shape) == 1
+
+            storage[0][...] = variable
            assert str(storage[0].dtype) != 'object'
+
        # the following should be used instead of the previous loop,
        # unfortunately it tends to segfault
        # self.ufunc(*(ufunc_args+[s[0] for s in output_storage]))

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -521,7 +521,7 @@ class MakeVector(T.Op):
    def perform(self, node, inputs, out_):
        out, = out_
        # not calling theano._asarray as optimization
-        if out[0] is None:
+        if (out[0] is None) or (out[0].size != len(inputs)):
            out[0] = theano._asarray(inputs, dtype=node.outputs[0].dtype)
        else:
            # assume that out has correct dtype. there is no cheap way to check