提交 9ba00a6e authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Clinker using preallocated memory for outputs

Update key for the compilation cache, the generated code is different, we should force recompilation.
上级 52b83f7a
...@@ -301,6 +301,15 @@ def get_c_extract(r, name, sub): ...@@ -301,6 +301,15 @@ def get_c_extract(r, name, sub):
return pre + r.type.c_extract(name, sub) return pre + r.type.c_extract(name, sub)
def get_c_extract_out(r, name, sub):
"""WRITEME"""
pre = """
py_%(name)s = PyList_GET_ITEM(storage_%(name)s, 0);
{Py_XINCREF(py_%(name)s);}
""" % locals()
return pre + r.type.c_extract_out(name, sub)
def get_c_cleanup(r, name, sub): def get_c_cleanup(r, name, sub):
"""WRITEME""" """WRITEME"""
post = """ post = """
...@@ -514,18 +523,21 @@ class CLinker(link.Linker): ...@@ -514,18 +523,21 @@ class CLinker(link.Linker):
policy = [[get_c_declare, get_c_init, get_c_cleanup], policy = [[get_c_declare, get_c_init, get_c_cleanup],
[get_nothing, get_nothing, get_nothing]] [get_nothing, get_nothing, get_nothing]]
elif variable in self.outputs: elif variable in self.outputs:
# outputs don't need to be extracted from Python, so
# we call c_init rather than c_extract
if variable.type.c_is_simple() or variable in no_recycling: if variable.type.c_is_simple() or variable in no_recycling:
# Do not extract output from Python
policy = [[get_nothing, get_nothing, get_nothing], policy = [[get_nothing, get_nothing, get_nothing],
[get_c_declare, get_c_init, [get_c_declare, get_c_init,
(get_c_sync, get_c_cleanup)]] (get_c_sync, get_c_cleanup)]]
else: else:
# it is useful for complex outputs to reuse # We try to use the output that is pre-allocated.
# storage at each run, so we only clean up in the # The linker will usually just reuse the storage
# destructor # from last run, but in the first execution,
policy = [[get_c_declare, get_c_init, get_c_cleanup], # it will be None.
[get_nothing, get_nothing, get_c_sync]] # We clean-up at each run to enable garbage collection
# in the Linker.
policy = [[get_nothing, get_nothing, get_nothing],
[get_c_declare, get_c_extract_out,
(get_c_sync, get_c_cleanup)]]
else: else:
raise Exception("what the fuck") raise Exception("what the fuck")
...@@ -961,10 +973,12 @@ class CLinker(link.Linker): ...@@ -961,10 +973,12 @@ class CLinker(link.Linker):
be re-used by the computation (the elements of be re-used by the computation (the elements of
self.no_recycling) can affect the code that is generated. self.no_recycling) can affect the code that is generated.
The format of each Op's output signature is simply a list of The format of each Op's output signature is a (version, no_recycle)
pair, where version is incremented if codegen() changes how it
handles the outputs, and no_recycle is simply a list of
booleans, indicating whether each output is in the booleans, indicating whether each output is in the
no_recycling set. no_recycling set. Older versions of compiled modules only have the
no_recycle list.
""" """
return self.cmodule_key_(self.env, self.no_recycling, return self.cmodule_key_(self.env, self.no_recycling,
compile_args=self.compile_args(), compile_args=self.compile_args(),
...@@ -1086,7 +1100,8 @@ class CLinker(link.Linker): ...@@ -1086,7 +1100,8 @@ class CLinker(link.Linker):
node.op, node.op,
tuple((i.type, in_sig(i, node_pos, ipos)) tuple((i.type, in_sig(i, node_pos, ipos))
for ipos, i in enumerate(node.inputs)), for ipos, i in enumerate(node.inputs)),
tuple(o in no_recycling for o in node.outputs))) (1, # Increment if cmodule change its handling of outputs
tuple(o in no_recycling for o in node.outputs))))
if error_on_play[0]: if error_on_play[0]:
# if one of the signatures is not hashable # if one of the signatures is not hashable
......
...@@ -139,6 +139,26 @@ class CLinkerType(CLinkerObject): ...@@ -139,6 +139,26 @@ class CLinkerType(CLinkerObject):
""" """
raise MethodNotDefined("c_extract", type(self), self.__class__.__name__) raise MethodNotDefined("c_extract", type(self), self.__class__.__name__)
def c_extract_out(self, name, sub):
"""Optional: C code to extract a PyObject * instance.
Unlike c_extract, c_extract_out has to accept Py_None,
meaning that the variable should be left uninitialized.
"""
return """
if (py_%(name)s == Py_None)
{
%(c_init_code)s
}
else
{
%(c_extract_code)s
}
""" % dict(
name=name,
c_init_code=self.c_init(name, sub),
c_extract_code=self.c_extract(name, sub))
def c_cleanup(self, name, sub): def c_cleanup(self, name, sub):
"""Optional: Return c code to clean up after `c_extract`. """Optional: Return c code to clean up after `c_extract`.
......
...@@ -319,6 +319,13 @@ class CudaNdarrayType(Type): ...@@ -319,6 +319,13 @@ class CudaNdarrayType(Type):
assert(%(name)s); assert(%(name)s);
Py_INCREF(py_%(name)s); Py_INCREF(py_%(name)s);
} }
else if (py_%(name)s == Py_None)
{
PyErr_SetString(PyExc_TypeError,
"expected a CudaNdarray, not None");
%(name)s = NULL;
%(fail)s;
}
else else
{ {
//fprintf(stderr, "FAILING c_extract CNDA object w refcnt %%p %%i\\n", py_%(name)s, (py_%(name)s->ob_refcnt)); //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %%p %%i\\n", py_%(name)s, (py_%(name)s->ob_refcnt));
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论