提交 c2ac70a1 authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #6107 from nouiz/no_recycling2

Compile less C code
...@@ -1146,12 +1146,13 @@ class CLinker(link.Linker): ...@@ -1146,12 +1146,13 @@ class CLinker(link.Linker):
output_storage.append(map[variable]) output_storage.append(map[variable])
input_storage = tuple(input_storage) input_storage = tuple(input_storage)
output_storage = tuple(output_storage) output_storage = tuple(output_storage)
thunk = self.cthunk_factory(error_storage, thunk, module = self.cthunk_factory(error_storage,
input_storage, input_storage,
output_storage, output_storage,
storage_map, storage_map,
keep_lock=keep_lock) keep_lock=keep_lock)
return (thunk, return (thunk,
module,
[link.Container(input, storage) for input, storage in [link.Container(input, storage) for input, storage in
izip(self.fgraph.inputs, input_storage)], izip(self.fgraph.inputs, input_storage)],
[link.Container(output, storage, True) for output, storage in [link.Container(output, storage, True) for output, storage in
...@@ -1207,11 +1208,11 @@ class CLinker(link.Linker): ...@@ -1207,11 +1208,11 @@ class CLinker(link.Linker):
first_output = ostor[0].data first_output = ostor[0].data
""" """
init_tasks, tasks = self.get_init_tasks() init_tasks, tasks = self.get_init_tasks()
cthunk, in_storage, out_storage, error_storage = self.__compile__( cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
input_storage, output_storage, storage_map, input_storage, output_storage, storage_map,
keep_lock=keep_lock) keep_lock=keep_lock)
res = _CThunk(cthunk, init_tasks, tasks, error_storage) res = _CThunk(cthunk, init_tasks, tasks, error_storage, module)
res.nodes = self.node_order res.nodes = self.node_order
return res, in_storage, out_storage return res, in_storage, out_storage
...@@ -1623,8 +1624,7 @@ class CLinker(link.Linker): ...@@ -1623,8 +1624,7 @@ class CLinker(link.Linker):
ret = module.instantiate(error_storage, ret = module.instantiate(error_storage,
*(in_storage + out_storage + orphd)) *(in_storage + out_storage + orphd))
return ret, module
return ret
def instantiate_code(self, n_args): def instantiate_code(self, n_args):
code = StringIO() code = StringIO()
...@@ -1669,10 +1669,13 @@ class _CThunk(object): ...@@ -1669,10 +1669,13 @@ class _CThunk(object):
WRITEME WRITEME
error_storage error_storage
WRITEME WRITEME
module
The module that was used to compile this cthunk.
Mostly only useful for tests.
""" """
def __init__(self, cthunk, init_tasks, tasks, error_storage): def __init__(self, cthunk, init_tasks, tasks, error_storage, module):
global run_cthunk global run_cthunk
if run_cthunk is None: if run_cthunk is None:
# Lazy import to avoid compilation when importing theano. # Lazy import to avoid compilation when importing theano.
...@@ -1681,6 +1684,7 @@ class _CThunk(object): ...@@ -1681,6 +1684,7 @@ class _CThunk(object):
self.init_tasks = init_tasks self.init_tasks = init_tasks
self.tasks = tasks self.tasks = tasks
self.error_storage = error_storage self.error_storage = error_storage
self.module = module
def find_task(self, failure_code): def find_task(self, failure_code):
""" """
......
...@@ -856,14 +856,15 @@ class Op(utils.object2, PureOp, CLinkerOp): ...@@ -856,14 +856,15 @@ class Op(utils.object2, PureOp, CLinkerOp):
_logger.debug('Trying CLinker.make_thunk') _logger.debug('Trying CLinker.make_thunk')
outputs = cl.make_thunk(input_storage=node_input_storage, outputs = cl.make_thunk(input_storage=node_input_storage,
output_storage=node_output_storage) output_storage=node_output_storage)
fill_storage, node_input_filters, node_output_filters = outputs thunk, node_input_filters, node_output_filters = outputs
def rval(): def rval():
fill_storage() thunk()
for o in node.outputs: for o in node.outputs:
compute_map[o][0] = True compute_map[o][0] = True
rval.cthunk = fill_storage.cthunk rval.thunk = thunk
rval.cthunk = thunk.cthunk
rval.inputs = node_input_storage rval.inputs = node_input_storage
rval.outputs = node_output_storage rval.outputs = node_output_storage
rval.lazy = False rval.lazy = False
......
...@@ -438,3 +438,18 @@ def test_reallocation(): ...@@ -438,3 +438,18 @@ def test_reallocation():
assert check_storage(storage_map)[0] assert check_storage(storage_map)[0]
assert len(set(id(v) for v in assert len(set(id(v) for v in
itervalues(storage_map))) < len(storage_map) itervalues(storage_map))) < len(storage_map)
def test_no_recycling():
x = theano.tensor.vector()
for lnk in [vm.VM_Linker(use_cloop=True),
vm.VM_Linker(use_cloop=False, lazy=True),
vm.VM_Linker(use_cloop=False, lazy=False, allow_gc=True),
vm.VM_Linker(use_cloop=False, lazy=False, allow_gc=False)]:
mode = theano.Mode(optimizer='fast_compile', linker=lnk)
f = theano.function([x], x + 1, mode=mode)
f2 = theano.function([x], (x + 1) * 2, mode=mode)
m1 = f.fn.thunks[0].thunk.module
m2 = f2.fn.thunks[0].thunk.module
assert m1 is m2
...@@ -413,6 +413,9 @@ class Stack(VM): ...@@ -413,6 +413,9 @@ class Stack(VM):
self.node_executed_order = [] self.node_executed_order = []
self.node_cleared_order = [] self.node_cleared_order = []
for cont in self.pre_call_clear:
cont[0] = None
for k in self.storage_map: for k in self.storage_map:
compute_map[k][0] = (k.owner is None) compute_map[k][0] = (k.owner is None)
if self.callback_input and compute_map[k][0]: if self.callback_input and compute_map[k][0]:
...@@ -745,8 +748,7 @@ class VM_Linker(link.LocalLinker): ...@@ -745,8 +748,7 @@ class VM_Linker(link.LocalLinker):
self.schedule = schedule self.schedule = schedule
def accept(self, fgraph, no_recycling=None, profile=None): def accept(self, fgraph, no_recycling=None, profile=None):
""" """Check if fgraph is the first FunctionGraph that has ever been
Check if fgraph is the first FunctionGraph that has ever been
associated to self, else, create a new VM_Linker associated to self, else, create a new VM_Linker
associated to fgraph associated to fgraph
...@@ -755,8 +757,33 @@ class VM_Linker(link.LocalLinker): ...@@ -755,8 +757,33 @@ class VM_Linker(link.LocalLinker):
fgraph fgraph
A PerformLinker can have accepted one FunctionGraph instance A PerformLinker can have accepted one FunctionGraph instance
at a time. at a time.
no_recycling no_recycling
WRITEME
no_recycling is a list of storage (list of 1 element, the
value corresponding to one variable). Those variable
storage should not be reused after the call that created
them.
This happen for example for output of the graph that we
give to the user. We don't want to reuse those object in
case the user have kept it.
VM_Linker make sure this happen by setting the list
element to None at the start of each call.
Older Linker use not exactly the same mechanism. They will
also modify the c code to don't look up the value in the
storage. This cause duplicate c code compilation for the
same op if they are in the middle of the graph or in the
no_recycling. We don't want that, so compile all c code
the same (middle of the graph vs output).
TODO: change the logic to remove the reference at the end
of the call instead of the start. This will request all VM
implementation (Loop, LoopGC, Stack, CVM).__call__ to
return the user outputs as Function.__call__ won't be able
to find them anymore.
Returns Returns
------- -------
...@@ -1018,7 +1045,6 @@ class VM_Linker(link.LocalLinker): ...@@ -1018,7 +1045,6 @@ class VM_Linker(link.LocalLinker):
): ):
fgraph = self.fgraph fgraph = self.fgraph
order = self.schedule(fgraph) order = self.schedule(fgraph)
no_recycling = self.no_recycling
input_storage, output_storage, storage_map = link.map_storage( input_storage, output_storage, storage_map = link.map_storage(
fgraph, order, input_storage, output_storage, storage_map) fgraph, order, input_storage, output_storage, storage_map)
...@@ -1048,10 +1074,13 @@ class VM_Linker(link.LocalLinker): ...@@ -1048,10 +1074,13 @@ class VM_Linker(link.LocalLinker):
for node in order: for node in order:
try: try:
thunk_start = time.time() thunk_start = time.time()
# no-recycling is done at each VM.__call__ So there is
# no need to cause duplicate c code by passing
# no_recycling here.
thunks.append(node.op.make_thunk(node, thunks.append(node.op.make_thunk(node,
storage_map, storage_map,
compute_map, compute_map,
no_recycling, [],
impl=impl)) impl=impl))
linker_make_thunk_time[node] = time.time() - thunk_start linker_make_thunk_time[node] = time.time() - thunk_start
if not hasattr(thunks[-1], 'lazy'): if not hasattr(thunks[-1], 'lazy'):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论