提交 9ba00a6e authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Clinker using preallocated memory for outputs

Update key for the compilation cache, the generated code is different, we should force recompilation.
上级 52b83f7a
......@@ -301,6 +301,15 @@ def get_c_extract(r, name, sub):
return pre + r.type.c_extract(name, sub)
def get_c_extract_out(r, name, sub):
"""WRITEME"""
pre = """
py_%(name)s = PyList_GET_ITEM(storage_%(name)s, 0);
{Py_XINCREF(py_%(name)s);}
""" % locals()
return pre + r.type.c_extract_out(name, sub)
def get_c_cleanup(r, name, sub):
"""WRITEME"""
post = """
......@@ -514,18 +523,21 @@ class CLinker(link.Linker):
policy = [[get_c_declare, get_c_init, get_c_cleanup],
[get_nothing, get_nothing, get_nothing]]
elif variable in self.outputs:
# outputs don't need to be extracted from Python, so
# we call c_init rather than c_extract
if variable.type.c_is_simple() or variable in no_recycling:
# Do not extract output from Python
policy = [[get_nothing, get_nothing, get_nothing],
[get_c_declare, get_c_init,
(get_c_sync, get_c_cleanup)]]
else:
# it is useful for complex outputs to reuse
# storage at each run, so we only clean up in the
# destructor
policy = [[get_c_declare, get_c_init, get_c_cleanup],
[get_nothing, get_nothing, get_c_sync]]
# We try to use the output that is pre-allocated.
# The linker will usually just reuse the storage
# from last run, but in the first execution,
# it will be None.
# We clean-up at each run to enable garbage collection
# in the Linker.
policy = [[get_nothing, get_nothing, get_nothing],
[get_c_declare, get_c_extract_out,
(get_c_sync, get_c_cleanup)]]
else:
raise Exception("what the fuck")
......@@ -961,10 +973,12 @@ class CLinker(link.Linker):
be re-used by the computation (the elements of
self.no_recycling) can affect the code that is generated.
The format of each Op's output signature is simply a list of
The format of each Op's output signature is a (version, no_recycle)
pair, where version is incremented if codegen() changes how it
handles the outputs, and no_recycle is simply a list of
booleans, indicating whether each output is in the
no_recycling set.
no_recycling set. Older versions of compiled modules only have the
no_recycle list.
"""
return self.cmodule_key_(self.env, self.no_recycling,
compile_args=self.compile_args(),
......@@ -1086,7 +1100,8 @@ class CLinker(link.Linker):
node.op,
tuple((i.type, in_sig(i, node_pos, ipos))
for ipos, i in enumerate(node.inputs)),
tuple(o in no_recycling for o in node.outputs)))
(1, # Increment if cmodule change its handling of outputs
tuple(o in no_recycling for o in node.outputs))))
if error_on_play[0]:
# if one of the signatures is not hashable
......
......@@ -139,6 +139,26 @@ class CLinkerType(CLinkerObject):
"""
raise MethodNotDefined("c_extract", type(self), self.__class__.__name__)
def c_extract_out(self, name, sub):
"""Optional: C code to extract a PyObject * instance.
Unlike c_extract, c_extract_out has to accept Py_None,
meaning that the variable should be left uninitialized.
"""
return """
if (py_%(name)s == Py_None)
{
%(c_init_code)s
}
else
{
%(c_extract_code)s
}
""" % dict(
name=name,
c_init_code=self.c_init(name, sub),
c_extract_code=self.c_extract(name, sub))
def c_cleanup(self, name, sub):
"""Optional: Return c code to clean up after `c_extract`.
......
......@@ -319,6 +319,13 @@ class CudaNdarrayType(Type):
assert(%(name)s);
Py_INCREF(py_%(name)s);
}
else if (py_%(name)s == Py_None)
{
PyErr_SetString(PyExc_TypeError,
"expected a CudaNdarray, not None");
%(name)s = NULL;
%(fail)s;
}
else
{
//fprintf(stderr, "FAILING c_extract CNDA object w refcnt %%p %%i\\n", py_%(name)s, (py_%(name)s->ob_refcnt));
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论