提交 c2ac70a1 authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #6107 from nouiz/no_recycling2

Compile less C code
......@@ -1146,12 +1146,13 @@ class CLinker(link.Linker):
output_storage.append(map[variable])
input_storage = tuple(input_storage)
output_storage = tuple(output_storage)
thunk = self.cthunk_factory(error_storage,
input_storage,
output_storage,
storage_map,
keep_lock=keep_lock)
thunk, module = self.cthunk_factory(error_storage,
input_storage,
output_storage,
storage_map,
keep_lock=keep_lock)
return (thunk,
module,
[link.Container(input, storage) for input, storage in
izip(self.fgraph.inputs, input_storage)],
[link.Container(output, storage, True) for output, storage in
......@@ -1207,11 +1208,11 @@ class CLinker(link.Linker):
first_output = ostor[0].data
"""
init_tasks, tasks = self.get_init_tasks()
cthunk, in_storage, out_storage, error_storage = self.__compile__(
cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
input_storage, output_storage, storage_map,
keep_lock=keep_lock)
res = _CThunk(cthunk, init_tasks, tasks, error_storage)
res = _CThunk(cthunk, init_tasks, tasks, error_storage, module)
res.nodes = self.node_order
return res, in_storage, out_storage
......@@ -1623,8 +1624,7 @@ class CLinker(link.Linker):
ret = module.instantiate(error_storage,
*(in_storage + out_storage + orphd))
return ret
return ret, module
def instantiate_code(self, n_args):
code = StringIO()
......@@ -1669,10 +1669,13 @@ class _CThunk(object):
WRITEME
error_storage
WRITEME
module
The module that was used to compile this cthunk.
Mostly only useful for tests.
"""
def __init__(self, cthunk, init_tasks, tasks, error_storage):
def __init__(self, cthunk, init_tasks, tasks, error_storage, module):
global run_cthunk
if run_cthunk is None:
# Lazy import to avoid compilation when importing theano.
......@@ -1681,6 +1684,7 @@ class _CThunk(object):
self.init_tasks = init_tasks
self.tasks = tasks
self.error_storage = error_storage
self.module = module
def find_task(self, failure_code):
"""
......
......@@ -856,14 +856,15 @@ class Op(utils.object2, PureOp, CLinkerOp):
_logger.debug('Trying CLinker.make_thunk')
outputs = cl.make_thunk(input_storage=node_input_storage,
output_storage=node_output_storage)
fill_storage, node_input_filters, node_output_filters = outputs
thunk, node_input_filters, node_output_filters = outputs
def rval():
fill_storage()
thunk()
for o in node.outputs:
compute_map[o][0] = True
rval.cthunk = fill_storage.cthunk
rval.thunk = thunk
rval.cthunk = thunk.cthunk
rval.inputs = node_input_storage
rval.outputs = node_output_storage
rval.lazy = False
......
......@@ -438,3 +438,18 @@ def test_reallocation():
assert check_storage(storage_map)[0]
assert len(set(id(v) for v in
itervalues(storage_map))) < len(storage_map)
def test_no_recycling():
x = theano.tensor.vector()
for lnk in [vm.VM_Linker(use_cloop=True),
vm.VM_Linker(use_cloop=False, lazy=True),
vm.VM_Linker(use_cloop=False, lazy=False, allow_gc=True),
vm.VM_Linker(use_cloop=False, lazy=False, allow_gc=False)]:
mode = theano.Mode(optimizer='fast_compile', linker=lnk)
f = theano.function([x], x + 1, mode=mode)
f2 = theano.function([x], (x + 1) * 2, mode=mode)
m1 = f.fn.thunks[0].thunk.module
m2 = f2.fn.thunks[0].thunk.module
assert m1 is m2
......@@ -413,6 +413,9 @@ class Stack(VM):
self.node_executed_order = []
self.node_cleared_order = []
for cont in self.pre_call_clear:
cont[0] = None
for k in self.storage_map:
compute_map[k][0] = (k.owner is None)
if self.callback_input and compute_map[k][0]:
......@@ -745,8 +748,7 @@ class VM_Linker(link.LocalLinker):
self.schedule = schedule
def accept(self, fgraph, no_recycling=None, profile=None):
"""
Check if fgraph is the first FunctionGraph that has ever been
"""Check if fgraph is the first FunctionGraph that has ever been
associated to self, else, create a new VM_Linker
associated to fgraph
......@@ -755,8 +757,33 @@ class VM_Linker(link.LocalLinker):
fgraph
A PerformLinker can have accepted one FunctionGraph instance
at a time.
no_recycling
WRITEME
no_recycling is a list of storage (list of 1 element, the
value corresponding to one variable). Those variable
storage should not be reused after the call that created
them.
This happen for example for output of the graph that we
give to the user. We don't want to reuse those object in
case the user have kept it.
VM_Linker make sure this happen by setting the list
element to None at the start of each call.
Older Linker use not exactly the same mechanism. They will
also modify the c code to don't look up the value in the
storage. This cause duplicate c code compilation for the
same op if they are in the middle of the graph or in the
no_recycling. We don't want that, so compile all c code
the same (middle of the graph vs output).
TODO: change the logic to remove the reference at the end
of the call instead of the start. This will request all VM
implementation (Loop, LoopGC, Stack, CVM).__call__ to
return the user outputs as Function.__call__ won't be able
to find them anymore.
Returns
-------
......@@ -1018,7 +1045,6 @@ class VM_Linker(link.LocalLinker):
):
fgraph = self.fgraph
order = self.schedule(fgraph)
no_recycling = self.no_recycling
input_storage, output_storage, storage_map = link.map_storage(
fgraph, order, input_storage, output_storage, storage_map)
......@@ -1048,10 +1074,13 @@ class VM_Linker(link.LocalLinker):
for node in order:
try:
thunk_start = time.time()
# no-recycling is done at each VM.__call__ So there is
# no need to cause duplicate c code by passing
# no_recycling here.
thunks.append(node.op.make_thunk(node,
storage_map,
compute_map,
no_recycling,
[],
impl=impl))
linker_make_thunk_time[node] = time.time() - thunk_start
if not hasattr(thunks[-1], 'lazy'):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论