提交 534a1953 authored 作者: Olivier Breuleux's avatar Olivier Breuleux

documented cc.py and cutils.py

上级 7dcce643
...@@ -50,8 +50,22 @@ def compile_dir(): ...@@ -50,8 +50,22 @@ def compile_dir():
class CodeBlock: class CodeBlock:
"""
Represents a computation unit composed of:
* declare -> C code that declares variables for use by the computation
* behavior -> C code that performs the computation
* cleanup -> C code that cleans up things allocated or incref-ed in behavior
"""
def __init__(self, declare, behavior, cleanup, sub): def __init__(self, declare, behavior, cleanup, sub):
"""
Initialize a CodeBlock with templatized declare, behavior and cleanup.
The sub parameter will be used in the other arguments' templates. sub
should contain a key called 'id' that maps to an identifier for this block.
The identifier will be used to determine the failure code and a label
to jump to. It should also contain a key called 'failure_var' that contains
the name of the variable that contains the error code.
"""
self.declare = declare % sub self.declare = declare % sub
behavior_sub = copy(sub) behavior_sub = copy(sub)
behavior_sub['fail'] = "{%(failure_var)s = %(id)s; goto __label_%(id)i;}" % sub behavior_sub['fail'] = "{%(failure_var)s = %(id)s; goto __label_%(id)i;}" % sub
...@@ -64,6 +78,22 @@ class CodeBlock: ...@@ -64,6 +78,22 @@ class CodeBlock:
def code_gen(blocks): def code_gen(blocks):
"""
From a list of CodeBlock instances, returns a string that executes them
all in sequence. eg for (decl1, task1, cleanup1) and (decl2, task2, cleanup2)
the returned string will be of the form:
decl1
decl2
{
task1
{
task2
cleanup2
}
cleanup1
}
"""
decl = "" decl = ""
head = "" head = ""
...@@ -76,6 +106,26 @@ def code_gen(blocks): ...@@ -76,6 +106,26 @@ def code_gen(blocks):
def struct_gen(args, struct_builders, blocks, sub): def struct_gen(args, struct_builders, blocks, sub):
"""
Generates a struct conforming to the following specifications:
* args -> all of the PyObject* type, stored in the struct
they represent the storage and must be length 1 python lists.
* struct_builders -> list of CodeBlock instances such that
* declarations are in the struct
* behavior is in the constructor
* cleanup is in the destructor
* blocks -> list of CodeBlock instances such that
* declarations, behavior and cleanup are in the run()
method of the struct
* sub -> dictionary used to template the struct.
* failure_var -> must contain a variable name to use for
the failure code.
In a nutshell, this returns code for a struct that represents
a function with state. The state's initialization and destruction
are handled by struct_builders and the actual behavior of the
function is handled by blocks.
"""
struct_decl = "" struct_decl = ""
struct_init_head = "" struct_init_head = ""
...@@ -83,6 +133,11 @@ def struct_gen(args, struct_builders, blocks, sub): ...@@ -83,6 +133,11 @@ def struct_gen(args, struct_builders, blocks, sub):
struct_cleanup = "" struct_cleanup = ""
for block in struct_builders: for block in struct_builders:
# decl are declarations that go in the struct
# init_head are in the constructor
# init_tail and cleanup do the same thing, but the former will
# be executed if any step in the constructor fails and the
# latter only at destruction time.
struct_decl += block.declare struct_decl += block.declare
struct_init_head = struct_init_head + ("\n{\n%s" % block.behavior) struct_init_head = struct_init_head + ("\n{\n%s" % block.behavior)
struct_init_tail = ("%s\n}\n" % block.cleanup) + struct_init_tail struct_init_tail = ("%s\n}\n" % block.cleanup) + struct_init_tail
...@@ -90,13 +145,22 @@ def struct_gen(args, struct_builders, blocks, sub): ...@@ -90,13 +145,22 @@ def struct_gen(args, struct_builders, blocks, sub):
behavior = code_gen(blocks) behavior = code_gen(blocks)
# declares the storage
storage_decl = "\n".join(["PyObject* %s;" % arg for arg in args]) storage_decl = "\n".join(["PyObject* %s;" % arg for arg in args])
# in the constructor, sets the storage to the arguments
storage_set = "\n".join(["this->%s = %s;" % (arg, arg) for arg in args]) storage_set = "\n".join(["this->%s = %s;" % (arg, arg) for arg in args])
# increments the storage's refcount in the constructor
storage_incref = "\n".join(["Py_XINCREF(%s);" % arg for arg in args]) storage_incref = "\n".join(["Py_XINCREF(%s);" % arg for arg in args])
# decrements the storage's refcount in the destructor
storage_decref = "\n".join(["Py_XDECREF(this->%s);" % arg for arg in args]) storage_decref = "\n".join(["Py_XDECREF(this->%s);" % arg for arg in args])
args_names = ", ".join(args) args_names = ", ".join(args)
args_decl = ", ".join(["PyObject* %s" % arg for arg in args]) args_decl = ", ".join(["PyObject* %s" % arg for arg in args])
# The following code stores the exception data in __ERROR, which is a special
# field of the struct. __ERROR is a list of length 3 that holds the type, the
# value and the traceback. After storing the error, we return the failure code
# so we know which code block failed.
do_return = """ do_return = """
if (%(failure_var)s) { if (%(failure_var)s) {
// When there is a failure, this code puts the exception // When there is a failure, this code puts the exception
...@@ -165,7 +229,11 @@ def struct_gen(args, struct_builders, blocks, sub): ...@@ -165,7 +229,11 @@ def struct_gen(args, struct_builders, blocks, sub):
return struct_code return struct_code
# The get_<x> functions complete the return value of r.get_<x>()
# with handling of the py_<name> variable.
def get_nothing(r): def get_nothing(r):
""
return "" return ""
def get_c_declare(r): def get_c_declare(r):
...@@ -205,13 +273,33 @@ def get_c_sync(r): ...@@ -205,13 +273,33 @@ def get_c_sync(r):
""" % dict(sync = r.c_sync()) """ % dict(sync = r.c_sync())
def apply_policy(policy, r): def apply_policy(policy, r):
"""
policy -> list of functions that map a Result to a string,
or a single such function
r -> a Result
returns policy[0](r) + policy[1](r) + ...
"""
if isinstance(r, (list, tuple)): if isinstance(r, (list, tuple)):
ret = "" ret = ""
for sub_policy in policy: for sub_policy in policy:
ret += sub_policy(r) ret += sub_policy(r)
return policy(r) return policy(r)
def struct_result_codeblocks(result, policies, id, symbol_table, sub): def struct_result_codeblocks(result, policies, id, symbol_table, sub):
"""
result -> a Result
policies -> a pair of tuples ((declare_policy, behavior_policy, cleanup_policy), -- at construction
(declare_policy, behavior_policy, cleanup_policy)) -- at execution
the first list will produce an element of the 'struct_builders' argument in struct_gen
the second list will produce an element of the 'blocks' argument in struct_gen
id -> the id assigned to this result's task in the computation
symbol_table -> a dict that maps results to variable names. It is not read
by this function but a variable name for the result is computed and added
to the table.
sub -> dictionary for use by CodeBlock.
"""
name = "V%i" % id name = "V%i" % id
symbol_table[result] = name symbol_table[result] = name
...@@ -226,12 +314,23 @@ def struct_result_codeblocks(result, policies, id, symbol_table, sub): ...@@ -226,12 +314,23 @@ def struct_result_codeblocks(result, policies, id, symbol_table, sub):
class CLinker(Linker): class CLinker(Linker):
"""
Creates C code for an env or an Op instance, compiles it and returns
callables through make_thunk and make_function that make use of the
compiled code.
It can take an env or an Op as input.
"""
def __init__(self, env): def __init__(self, env):
self.env = env self.env = env
self.fetch_results() self.fetch_results()
def fetch_results(self): def fetch_results(self):
"""
Fills the inputs, outputs, results, orphans, temps and op_order fields.
"""
env = self.env env = self.env
self.inputs = env.inputs self.inputs = env.inputs
...@@ -240,6 +339,7 @@ class CLinker(Linker): ...@@ -240,6 +339,7 @@ class CLinker(Linker):
try: self.results = list(env.results()) try: self.results = list(env.results())
except AttributeError: self.results = self.inputs + self.outputs except AttributeError: self.results = self.inputs + self.outputs
# The orphans field is listified to ensure a consistent order.
try: self.orphans = list(env.orphans().difference(self.outputs)) try: self.orphans = list(env.orphans().difference(self.outputs))
except AttributeError: self.orphans = [] except AttributeError: self.orphans = []
...@@ -250,6 +350,18 @@ class CLinker(Linker): ...@@ -250,6 +350,18 @@ class CLinker(Linker):
except AttributeError: self.op_order = [env] except AttributeError: self.op_order = [env]
def code_gen(self, reuse_storage = True): def code_gen(self, reuse_storage = True):
"""
Generates code for a struct that does the computation of the env and
stores it in the struct_code field of the instance.
If reuse_storage is True, outputs and temporaries will be stored in
the struct so they can be reused each time a function returned by
make_function is called, which means that the output of a call will
be invalidated by the next. If reuse_storage is False, that problem
is avoided.
This method caches its computations.
"""
if getattr(self, 'struct_code', False) and self.reuse_storage == reuse_storage: if getattr(self, 'struct_code', False) and self.reuse_storage == reuse_storage:
return self.struct_code return self.struct_code
...@@ -260,9 +372,13 @@ class CLinker(Linker): ...@@ -260,9 +372,13 @@ class CLinker(Linker):
symbol = {} symbol = {}
# (init_)tasks contains a list of pairs (Op/Result, task_name)
# e.g. (x, 'get') or (x+y, 'code')
init_tasks = [] init_tasks = []
tasks = [] tasks = []
# (init_)blocks contain CodeBlock instances. There is a direct
# correspondance with (init_)tasks.
init_blocks = [] init_blocks = []
blocks = [] blocks = []
...@@ -273,6 +389,7 @@ class CLinker(Linker): ...@@ -273,6 +389,7 @@ class CLinker(Linker):
for result in set(self.results): for result in set(self.results):
# it might be possible to inline constant results as C literals
if getattr(result, 'constant', False): if getattr(result, 'constant', False):
if result in self.outputs or result in self.temps: if result in self.outputs or result in self.temps:
raise Exception("Temporaries and outputs should not be marked constant. Check your graph.") raise Exception("Temporaries and outputs should not be marked constant. Check your graph.")
...@@ -320,6 +437,10 @@ class CLinker(Linker): ...@@ -320,6 +437,10 @@ class CLinker(Linker):
builder, block = struct_result_codeblocks(result, policy, id, symbol, sub) builder, block = struct_result_codeblocks(result, policy, id, symbol, sub)
# each Result generates two CodeBlocks, one to declare/initialize/destroy struct variables
# and the other to declare/extract/cleanup each time the function is run.
# Typically, only one of the two actually does anything (see all the possible combinations above)
init_tasks.append((result, 'init', id)) init_tasks.append((result, 'init', id))
init_blocks.append(builder) init_blocks.append(builder)
...@@ -330,12 +451,14 @@ class CLinker(Linker): ...@@ -330,12 +451,14 @@ class CLinker(Linker):
for op in self.op_order: for op in self.op_order:
# We populate sub with a mapping from the variable names specified by the op's c_var_names
# method to the actual variable names that we will use.
ivnames, ovnames = op.c_var_names() ivnames, ovnames = op.c_var_names()
sub = dict(failure_var = failure_var) sub = dict(failure_var = failure_var)
for result, vname in zip(op.inputs + op.outputs, ivnames + ovnames): for result, vname in zip(op.inputs + op.outputs, ivnames + ovnames):
sub[vname] = symbol[result] sub[vname] = symbol[result]
# c_validate_update # Make the CodeBlock for c_validate_update
try: validate_behavior = op.c_validate_update() try: validate_behavior = op.c_validate_update()
except AbstractFunctionError: except AbstractFunctionError:
validate_behavior = "" validate_behavior = ""
...@@ -349,7 +472,7 @@ class CLinker(Linker): ...@@ -349,7 +472,7 @@ class CLinker(Linker):
tasks.append((op, 'validate_update', id)) tasks.append((op, 'validate_update', id))
id += 1 id += 1
# c_code # Make the CodeBlock for c_code
behavior = op.c_code() # this one must be implemented! behavior = op.c_code() # this one must be implemented!
try: cleanup = op.c_code_cleanup() try: cleanup = op.c_code_cleanup()
...@@ -361,13 +484,15 @@ class CLinker(Linker): ...@@ -361,13 +484,15 @@ class CLinker(Linker):
tasks.append((op, 'code', id)) tasks.append((op, 'code', id))
id += 1 id += 1
# List of arg names for use in struct_gen. Note the call to uniq: duplicate inputs
# must only be passed once because they are mapped to the same name.
args = [] args = []
in_arg_order = []
args += ["storage_%s" % symbol[result] for result in utils.uniq(self.inputs + self.outputs + self.orphans)] args += ["storage_%s" % symbol[result] for result in utils.uniq(self.inputs + self.outputs + self.orphans)]
struct_code = struct_gen(args, init_blocks, blocks, dict(failure_var = failure_var)) struct_code = struct_gen(args, init_blocks, blocks, dict(failure_var = failure_var))
# The hash calculated on the code identifies it so weave can cache properly.
# (the hash has to be used outside of the support code because weave does not consider changes in the support code)
hash = md5.md5(struct_code).hexdigest() hash = md5.md5(struct_code).hexdigest()
struct_name = '__struct_compiled_op_%s' % hash struct_name = '__struct_compiled_op_%s' % hash
struct_code %= dict(name = struct_name) struct_code %= dict(name = struct_name)
...@@ -383,46 +508,84 @@ class CLinker(Linker): ...@@ -383,46 +508,84 @@ class CLinker(Linker):
self.blocks = blocks self.blocks = blocks
self.tasks = tasks self.tasks = tasks
all = self.inputs + self.outputs + self.orphans all = self.inputs + self.outputs + self.orphans
self.dupidx = [i for i, x in enumerate(all) if all.count(x) > 1 and all.index(x) != i]
# List of indices that should be ignored when passing the arguments
# (basically, everything that the previous call to uniq eliminated)
self.dupidx = [i for i, x in enumerate(all) if all.count(x) > 1 and all.index(x) != i]
def find_task(self, failure_code): def find_task(self, failure_code):
"""
Maps a failure code to the task that is associated to it.
"""
failure_code -= 1 failure_code -= 1
n = len(self.init_tasks) n = len(self.init_tasks)
# note that the failure code is distributed in two lists
if failure_code < 2 * n: if failure_code < 2 * n:
return [self.init_tasks, self.tasks][failure_code % 2][failure_code/2] return [self.init_tasks, self.tasks][failure_code % 2][failure_code/2]
else: else:
return self.tasks[failure_code - n] return self.tasks[failure_code - n]
def support_code(self): def support_code(self):
ret = set() """
Returns a list of support code strings that are needed by
one or more Results or Ops. The support code from Results is
added before the support code from Ops.
This might contain duplicates.
"""
ret = []
for x in self.results + self.op_order: for x in self.results + self.op_order:
try: ret.add(x.c_support_code()) try: ret.append(x.c_support_code())
except AbstractFunctionError: pass except AbstractFunctionError: pass
return ret return ret
def compile_args(self): def compile_args(self):
ret = set() """
Returns a list of compile args that are needed by one
or more Results or Ops.
This might contain duplicates.
"""
ret = []
for x in self.results + self.op_order: for x in self.results + self.op_order:
try: ret.update(x.c_compile_args()) try: ret += x.c_compile_args()
except AbstractFunctionError: pass except AbstractFunctionError: pass
return ret return ret
def headers(self): def headers(self):
ret = set() """
Returns a list of headers that are needed by one
or more Results or Ops.
This might contain duplicates.
"""
ret = []
for x in self.results + self.op_order: for x in self.results + self.op_order:
try: ret.update(x.c_headers()) try: ret += x.c_headers()
except AbstractFunctionError: pass except AbstractFunctionError: pass
return ret return ret
def libraries(self): def libraries(self):
ret = set() """
Returns a list of libraries that are needed by one
or more Results or Ops.
This might contain duplicates.
"""
ret = []
for x in self.results + self.op_order: for x in self.results + self.op_order:
try: ret.update(x.c_libraries()) try: ret += x.c_libraries()
except AbstractFunctionError: pass except AbstractFunctionError: pass
return ret return ret
def __compile__(self, inplace = False): def __compile__(self, inplace = False):
"""
Compiles this linker's env. If inplace is True, it will use the
Results contained in the env, if it is False it will copy the
input and output Results.
Returns: thunk, in_results, out_results, error_storage
"""
if inplace: if inplace:
in_results = self.inputs in_results = self.inputs
out_results = self.outputs out_results = self.outputs
...@@ -450,19 +613,33 @@ class CLinker(Linker): ...@@ -450,19 +613,33 @@ class CLinker(Linker):
trace = () trace = ()
exc_type, _exc_value, exc_trace = error_storage exc_type, _exc_value, exc_trace = error_storage
exc_value = exc_type(_exc_value, task) exc_value = exc_type(_exc_value, task)
exc_value.__thunk_trace__ = trace exc_value.__thunk_trace__ = trace # this can be used to retrieve the location the Op was declared
raise exc_type, exc_value, exc_trace raise exc_type, exc_value, exc_trace
return execute, in_results, out_results return execute, in_results, out_results
def cthunk_factory(self, error_storage, in_storage, out_storage): def cthunk_factory(self, error_storage, in_storage, out_storage):
"""
error_storage -> list of length 3
in_storage -> list of lists of length 1, one per input
out_storage -> list of lists of length 1, one per output
Returns a thunk that points to an instance of a C struct that
can carry on the computation of this linker's env. That thunk,
when executed, will fetch its inputs from in_storage, put its
outputs in out_storage and if an error occurs will put the
type, value and traceback of the exception in error_storage.
"""
# check if we already compiled this
if not getattr(self, 'instantiate', False): if not getattr(self, 'instantiate', False):
self.code_gen() self.code_gen()
# Eliminate duplicate inputs and outputs from the storage that we will pass to instantiate
out_storage = [x for i, x in enumerate(out_storage) if (i+len(in_storage)) not in self.dupidx] out_storage = [x for i, x in enumerate(out_storage) if (i+len(in_storage)) not in self.dupidx]
in_storage = [x for i, x in enumerate(in_storage) if i not in self.dupidx] in_storage = [x for i, x in enumerate(in_storage) if i not in self.dupidx]
cthunk = object() cthunk = object() # dummy so weave can get the type
module_name = self.hash module_name = self.hash
mod = weave.ext_tools.ext_module(module_name) mod = weave.ext_tools.ext_module(module_name)
...@@ -470,6 +647,7 @@ class CLinker(Linker): ...@@ -470,6 +647,7 @@ class CLinker(Linker):
+ ["o%i" % i for i in xrange(len(out_storage))] \ + ["o%i" % i for i in xrange(len(out_storage))] \
+ ["orph%i" % i for i in xrange(len(self.orphans))] + ["orph%i" % i for i in xrange(len(self.orphans))]
# The code of instantiate
code = """ code = """
%(struct_name)s* struct_ptr = new %(struct_name)s(); %(struct_name)s* struct_ptr = new %(struct_name)s();
struct_ptr->init(error_storage, %(args)s); struct_ptr->init(error_storage, %(args)s);
...@@ -489,6 +667,7 @@ class CLinker(Linker): ...@@ -489,6 +667,7 @@ class CLinker(Linker):
local_dict = d, local_dict = d,
global_dict = {}) global_dict = {})
# Static methods that can run and destroy the struct built by instantiate.
static = """ static = """
int %(struct_name)s_executor(%(struct_name)s* self) { int %(struct_name)s_executor(%(struct_name)s* self) {
return self->run(); return self->run();
...@@ -501,6 +680,7 @@ class CLinker(Linker): ...@@ -501,6 +680,7 @@ class CLinker(Linker):
} }
""" % dict(struct_name = self.struct_name) """ % dict(struct_name = self.struct_name)
# We add all the support code, compile args, headers and libs we need.
for support_code in self.support_code(): for support_code in self.support_code():
instantiate.customize.add_support_code(support_code) instantiate.customize.add_support_code(support_code)
instantiate.customize.add_support_code(self.struct_code) instantiate.customize.add_support_code(self.struct_code)
...@@ -518,6 +698,10 @@ class CLinker(Linker): ...@@ -518,6 +698,10 @@ class CLinker(Linker):
module = __import__("%s" % (module_name), {}, {}, [module_name]) module = __import__("%s" % (module_name), {}, {}, [module_name])
self.instantiate = module.instantiate self.instantiate = module.instantiate
else:
# Eliminate duplicate inputs and outputs from the storage that we will pass to instantiate
out_storage = [x for i, x in enumerate(out_storage) if (i+len(in_storage)) not in self.dupidx]
in_storage = [x for i, x in enumerate(in_storage) if i not in self.dupidx]
ret = module.instantiate(error_storage, *(in_storage + out_storage + [orphan._data for orphan in self.orphans])) ret = module.instantiate(error_storage, *(in_storage + out_storage + [orphan._data for orphan in self.orphans]))
assert sys.getrefcount(ret) == 2 # refcount leak check assert sys.getrefcount(ret) == 2 # refcount leak check
...@@ -526,6 +710,13 @@ class CLinker(Linker): ...@@ -526,6 +710,13 @@ class CLinker(Linker):
class OpWiseCLinker(Linker): class OpWiseCLinker(Linker):
"""
Uses CLinker on the individual Ops that comprise an env and loops
over them in Python. The result is slower than a compiled version of
the whole env, but saves on compilation time because small changes
in the computation graph won't necessarily trigger any recompilation,
only local changes in the Results or Ops that are used.
"""
def __init__(self, env): def __init__(self, env):
self.env = env self.env = env
......
...@@ -6,6 +6,9 @@ except ImportError: ...@@ -6,6 +6,9 @@ except ImportError:
from scipy import weave from scipy import weave
# The following function takes a PyCObject instance that contains
# a void*->int function in its VoidPtr field. It then calls that
# function on the object's Desc field and returns the int result.
single_runner = """ single_runner = """
if (!PyCObject_Check(py_cthunk)) { if (!PyCObject_Check(py_cthunk)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论