Merge pull request #4067 from abergeron/debugmode_empty

Make DebugMode handle a special version of perform.

Merge pull request #4067 from abergeron/debugmode_empty
8215457d · Frédéric Bastien · 189069be · d3530b07 · 8215457d · 8215457d
--- a/doc/extending/op.txt
+++ b/doc/extending/op.txt
@@ -266,6 +266,19 @@ Optional methods or attributes
   As done in the Alloc op, you can return False only in some cases by
   analyzing the graph from the node parameter.
+.. function:: debug_perform(node, inputs, output_storage)
+   Undefined by default.
+   If you define this function then it will be used instead of C code
+   or perform() to do the computation while debugging (currently
+   DebugMode, but others may also use it in the future).  It has the
+   same signature and contract as :func:`perform`.
+   This enables ops that cause trouble with DebugMode with their
+   normal behaviour to adopt a different one when run under that
+   mode. If your op doesn't have any problems, don't implement this.
 If you want your op to work with gradient.grad() you also need to
 implement the functions described below.

--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
@@ -1849,8 +1849,10 @@ class _Linker(gof.link.LocalLinker):
                if new_node is not None:
                    node = new_node
+            debug = hasattr(node.op, 'debug_perform')
            try:
-                if not self.maker.mode.check_c_code:
+                if not self.maker.mode.check_c_code or debug:
                    raise utils.MethodNotDefined()
                # Ops that do not inherit from gof.op.Op don't have certain
                # methods defined that the CLinker expects (Scan is an
@@ -1868,18 +1870,18 @@ class _Linker(gof.link.LocalLinker):
            # Pure ops don't really have a perform ( or their perform just
            # raises an not implemented exception), so in those cases we
            # consider that we don't have a python implementation
-            if (self.maker.mode.check_py_code or thunks_c[-1] is None) and \
+            if (((self.maker.mode.check_py_code or thunks_c[-1] is None) and
-               node.op.perform.__code__ != gof.op.PureOp.perform.__code__:
+                 node.op.perform.__code__ != gof.op.PureOp.perform.__code__) or
+                    debug):
                thunk = node.op.make_py_thunk(node, storage_map, compute_map,
-                                              no_recycling)
+                                              no_recycling, debug=debug)
                thunks_py.append(thunk)
            else:
                thunks_py.append(None)
            if not self.maker.mode.check_c_code and thunks_py[-1] is None:
-                _logger.warn(
+                _logger.warn("Op %s doesn't have a perform, "
-                    "Op %s don't have a perform, forcing check of the c code" %
+                             "forcing check of the C code" % node.op)
-                    node.op)
                thunk = node.op.make_c_thunk(node, storage_map, compute_map,
                                             no_recycling)
                thunks_c[-1] = thunk

--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -1177,11 +1177,13 @@ class CLinker(link.Linker):
            List of lists of length 1. In order to use
            the thunk returned by __compile__, the inputs must be put in
            that storage. If None, storage will be allocated.
-        @param output_storage: list of lists of length 1. The thunk returned
+        output_storage: list of lists of length 1.
-            by __compile__ will put the variables of the computation in these
+            The thunk returned by __compile__ will put the variables
-            lists. If None, storage will be allocated.
+            of the computation in these lists. If None, storage will
-        @param storage_map: dict that map variables to storages. This is used
+            be allocated.
-            when you need to customize the storage of this thunk.
+        storage_map: dict that map variables to storages.
+            This is used when you need to customize the storage of
+            this thunk.
        Returns: thunk, input_storage, output_storage

--- a/theano/gof/op.py
+++ b/theano/gof/op.py
@@ -890,7 +890,8 @@ class Op(utils.object2, PureOp, CLinkerOp):
        rval.lazy = False
        return rval
-    def make_py_thunk(self, node, storage_map, compute_map, no_recycling):
+    def make_py_thunk(self, node, storage_map, compute_map, no_recycling,
+                      debug=False):
        """
        Like make_thunk() but only makes python thunks.
@@ -898,7 +899,10 @@ class Op(utils.object2, PureOp, CLinkerOp):
        node_input_storage = [storage_map[r] for r in node.inputs]
        node_output_storage = [storage_map[r] for r in node.outputs]
-        p = node.op.perform
+        if debug:
+            p = node.op.debug_perform
+        else:
+            p = node.op.perform
        params = node.run_params()

--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -3682,6 +3682,13 @@ class GpuAllocEmpty(GpuOp):
        output.type.filter_checks_isfinite = False
        return Apply(self, shape, [output])
+    def debug_perform(self, node, inputs, out_):
+        self.perform(self, node, inputs, out_)
+        # __setitem__ is limited on CudaNdarray
+        tmp = numpy.empty(out_[0][0].shape, dtype='float32')
+        tmp.fill(-123456789)
+        out_[0][0][:] = tmp
    def perform(self, node, inputs, out_):
        out, = out_
        sh = tuple([int(i) for i in inputs])

--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -723,6 +723,10 @@ class GpuAllocEmpty(HideC, Alloc):
        output.type.filter_checks_isfinite = False
        return Apply(self, sh, [output])
+    def debug_perform(self, node, inputs, out_, ctx):
+        self.perform(node, inputs, out_, ctx)
+        out_[0][0][:] = -123456789
    def perform(self, node, inputs, out_, ctx):
        out = out_[0]
        sh = [int(i) for i in inputs]

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -6240,6 +6240,10 @@ class AllocEmpty(gof.Op):
        output.type.filter_checks_isfinite = False
        return Apply(self, shape, [output])
+    def debug_perform(self, node, inputs, out_):
+        self.perform(node, inputs, out_)
+        out_[0][0].fill(-123456789)
    def perform(self, node, inputs, out_):
        out, = out_
        sh = tuple([int(i) for i in inputs])

--- a/theano/tensor/deprecated/__init__.py
+++ b/theano/tensor/deprecated/__init__.py