提交 0c1c4710 authored 作者: James Bergstra's avatar James Bergstra

code and documentation corrections to CLinker.cmodule_key

上级 e2c94fc8
......@@ -761,38 +761,61 @@ class CLinker(link.Linker):
return res
def cmodule_key(self):
"""Return a complete hashable signature of the module we compiled
"""Return a complete hashable signature of the module we compiled.
This function must have the property that no two programs that compute different things
yield the same key.
The key returned by this function is of the form (version, signature)
The signature has the following form:
{{{
'CLinker.cmodule_key', compilation args, libraries,
op0, (input0.type, input1.type, input0 pos, input1 pos)
op1, (...)
(op0, input_signature0, output_signature0),
(op1, input_signature1, output_signature1),
...
opK, (...)
}}}
(opK, input_signatureK, output_signatureK),
}}}
The signature is a tuple, some elements of which are sub-tuples.
The signature is a tuple of tuples.
The outer tuple has a brief header, followed by elements for every node in the
topological ordering of `self.env`.
If the Op of any Apply in the Env does not have c_code_cache_ok()==True, then this
function raises a KeyError exception.
The outer tuple has one element for every node in the topological ordering of
`self.env`.
Input Signature
---------------
The inner tuple has one element for the op used at that node, and one element for the
inputs to that node. The inputs are identified by their type and "graph position"
Each input signature is a tuple with an element for each input to the corresponding
Apply node.
Each element identifies the type of the node input, and the nature of that input in the
graph.
The graph position of a typical variable is encoded by integer pairs ``(a,b)``:
The nature of a typical variable is encoded by integer pairs ``((a,b),c)``:
``a`` is the topological position of the input's owner (-1 for graph inputs),
``b`` is the index of the variable in the owner's output list.
``c`` is a flag indicating whether the variable is in the no_recycling set.
If a variable is also a graph output, then its position in the outputs list is also
bundled with this tuple (after the b).
The graph position of a Constant instance is defined as its signature, together with
The nature of a Constant instance is defined as its signature, together with
two integers: the topological position of the first Apply using that Constant instance,
and the lowest index into that Apply's inputs that refers to that Constant. (These two
integers are a surrogate for the id() of the Constant. The integers are important
because merge-able constants have the same signature, but require separate containers
in C code.)
in C code.) The membership in no_recycling is also included in the signature.
If the Op of any Apply in the Env does not have c_code_cache_ok()==True, then this
function raises a KeyError exception.
Output Signature
----------------
The outputs of a node are entirely determined by the node's Op and the nature of the
inputs, but the set of outputs that may be re-used by the computation (the elements of
self.no_recycling) can affect the code that is generated.
The format of each Op's output signature is simply a list of booleans, indicating
whether each output is in the no_recycling set.
"""
return self.cmodule_key_(self.env, self.no_recycling,
......@@ -800,68 +823,81 @@ class CLinker(link.Linker):
libraries=self.libraries()
)
@staticmethod
def cmodule_key_(env, no_recycling, compile_args=None, libraries=None):
def cmodule_key_(env, no_recycling, compile_args=[], libraries=[]):
"""
Do the actual computation of cmodule_key in a static method
to allow it to be reused in scalar.Composite.__eq__
"""
order = list(env.toposort())
env_computed_set = set()
env_inputs_dict = dict((i, [-1, pos]) for pos, i in enumerate(env.inputs))
#set of variables that have been computed by nodes we have
# seen 'so far' in the loop below
env_computed_set = set()
env_inputs_dict = dict((i, (-1, pos)) for pos, i in enumerate(env.inputs))
constant_ids = dict()
op_pos = {} # Apply -> topological position
rval = ['CLinker.cmodule_key'] # will be cast to tuple on return
if compile_args is not None: rval.append(tuple(compile_args))
if libraries is not None: rval.append(tuple(libraries))
version = []
# assert that every input to every node is one of'
# - an env input
# - an output from a node in the Env
# - a Constant
# first we put the header, compile_args, library names into the signature
sig = ['CLinker.cmodule_key'] # will be cast to tuple on return
if compile_args is not None: sig.append(tuple(compile_args))
if libraries is not None: sig.append(tuple(libraries))
def in_sig(i, topological_pos, i_idx):
# assert that every input to every node is one of'
# - an env input
# - an output from a node in the Env
# - a Constant
# It is important that a variable (i)
# yield a 'position' that reflects its role in code_gen()
def graphpos(i, topological_pos, i_idx):
rval = []
# It is important that a variable (i)
# yield a 'position' that reflects its role in code_gen()
if isinstance(i, graph.Constant): #orphans
if id(i) not in constant_ids:
constant_ids[id(i)] = [i.signature(), topological_pos, i_idx]
rval += constant_ids[id(i)]
constant_ids[id(i)] = (i.signature(), topological_pos, i_idx)
isig = constant_ids[id(i)]
#print 'SIGNATURE', i.signature()
#return i.signature()
elif i in env_inputs_dict: #inputs
rval += env_inputs_dict[i]
isig = env_inputs_dict[i]
else:
if i.owner is None:
assert all( all(out is not None for out in o.outputs) for o in order)
assert all( input.owner is None for input in env.inputs)
raise Exception('what is this?', (i, type(i), i.clients, env))
if i in env.outputs:
rval += [op_pos[i.owner], # outputs
isig = (op_pos[i.owner], # outputs
i.owner.outputs.index(i),
env.outputs.index(i)]
env.outputs.index(i))
else:
rval += [op_pos[i.owner], i.owner.outputs.index(i)] # temps
assert rval
rval.append(i in no_recycling)
return tuple(rval)
isig = (op_pos[i.owner], i.owner.outputs.index(i)) # temps
return (isig, i in no_recycling)
version = []
for node_pos, node in enumerate(order):
version.append(node.op.c_code_cache_version_apply(node))
for i in node.inputs:
version.append(i.type.c_code_cache_version())
for o in node.outputs:
version.append(o.type.c_code_cache_version())
rval.append((node.op, tuple((i.type, graphpos(i, node_pos, ipos))
for ipos,i in enumerate(node.inputs))))
#add the signature for this node
sig.append((
node.op,
tuple((i.type, in_sig(i, node_pos, ipos))
for ipos,i in enumerate(node.inputs)),
tuple(o in no_recycling for o in node.outputs)))
op_pos[node] = node_pos
env_computed_set.update(node.outputs)
#crystalize the signature and version
sig = tuple(sig)
version = tuple(version)
for v in version:
if not v: #one of the ops or types here is unversioned
return ((), tuple(rval))
return tuple(version), tuple(rval)
if not v:
# one of the ops or types here is unversioned,
# so this env is entirely unversioned
return ((), sig)
return version, sig
def compile_cmodule(self, location=None):
"""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论