Clinker using preallocated memory for outputs

Update key for the compilation cache, the generated code is different, we should force recompilation.

Clinker using preallocated memory for outputs
9ba00a6e · Pascal Lamblin · 52b83f7a · 9ba00a6e · 9ba00a6e · 9ba00a6e
--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -301,6 +301,15 @@ def get_c_extract(r, name, sub):
    return pre + r.type.c_extract(name, sub)


+def get_c_extract_out(r, name, sub):
+    """WRITEME"""
+    pre = """
+    py_%(name)s = PyList_GET_ITEM(storage_%(name)s, 0);
+    {Py_XINCREF(py_%(name)s);}
+    """ % locals()
+    return pre + r.type.c_extract_out(name, sub)
+
+
 def get_c_cleanup(r, name, sub):
    """WRITEME"""
    post = """
@@ -514,18 +523,21 @@ class CLinker(link.Linker):
                    policy = [[get_c_declare, get_c_init, get_c_cleanup],
                              [get_nothing, get_nothing, get_nothing]]
            elif variable in self.outputs:
-                # outputs don't need to be extracted from Python, so
-                # we call c_init rather than c_extract
                if variable.type.c_is_simple() or variable in no_recycling:
+                    # Do not extract output from Python
                    policy = [[get_nothing, get_nothing, get_nothing],
                              [get_c_declare, get_c_init,
                                  (get_c_sync, get_c_cleanup)]]
                else:
-                    # it is useful for complex outputs to reuse
-                    # storage at each run, so we only clean up in the
-                    # destructor
-                    policy = [[get_c_declare, get_c_init, get_c_cleanup],
-                              [get_nothing, get_nothing, get_c_sync]]
+                    # We try to use the output that is pre-allocated.
+                    # The linker will usually just reuse the storage
+                    # from last run, but in the first execution,
+                    # it will be None.
+                    # We clean-up at each run to enable garbage collection
+                    # in the Linker.
+                    policy = [[get_nothing, get_nothing, get_nothing],
+                              [get_c_declare, get_c_extract_out,
+                                  (get_c_sync, get_c_cleanup)]]
            else:
                raise Exception("what the fuck")

@@ -961,10 +973,12 @@ class CLinker(link.Linker):
        be re-used by the computation (the elements of
        self.no_recycling) can affect the code that is generated.

-        The format of each Op's output signature is simply a list of
+        The format of each Op's output signature is a (version, no_recycle)
+        pair, where version is incremented if codegen() changes how it
+        handles the outputs, and no_recycle is simply a list of
        booleans, indicating whether each output is in the
-        no_recycling set.
-
+        no_recycling set. Older versions of compiled modules only have the
+        no_recycle list.
        """
        return self.cmodule_key_(self.env, self.no_recycling,
                          compile_args=self.compile_args(),
@@ -1086,7 +1100,8 @@ class CLinker(link.Linker):
                node.op,
                tuple((i.type, in_sig(i, node_pos, ipos))
                    for ipos, i in enumerate(node.inputs)),
-                tuple(o in no_recycling for o in node.outputs)))
+                (1,  # Increment if cmodule change its handling of outputs
+                    tuple(o in no_recycling for o in node.outputs))))

            if error_on_play[0]:
                # if one of the signatures is not hashable

--- a/theano/gof/type.py
+++ b/theano/gof/type.py
@@ -139,6 +139,26 @@ class CLinkerType(CLinkerObject):
        """
        raise MethodNotDefined("c_extract", type(self), self.__class__.__name__)

+    def c_extract_out(self, name, sub):
+        """Optional: C code to extract a PyObject * instance.
+
+        Unlike c_extract, c_extract_out has to accept Py_None,
+        meaning that the variable should be left uninitialized.
+        """
+        return """
+        if (py_%(name)s == Py_None)
+        {
+            %(c_init_code)s
+        }
+        else
+        {
+            %(c_extract_code)s
+        }
+        """ % dict(
+                name=name,
+                c_init_code=self.c_init(name, sub),
+                c_extract_code=self.c_extract(name, sub))
+
    def c_cleanup(self, name, sub):
        """Optional: Return c code to clean up after `c_extract`.


--- a/theano/sandbox/cuda/type.py
+++ b/theano/sandbox/cuda/type.py
@@ -319,6 +319,13 @@ class CudaNdarrayType(Type):
            assert(%(name)s);
            Py_INCREF(py_%(name)s);
        }
+        else if (py_%(name)s == Py_None)
+        {
+            PyErr_SetString(PyExc_TypeError,
+                            "expected a CudaNdarray, not None");
+            %(name)s = NULL;
+            %(fail)s;
+        }
        else
        {
            //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %%p %%i\\n", py_%(name)s, (py_%(name)s->ob_refcnt));