提交 6298f875 authored 作者: Roy Xue's avatar Roy Xue

PEP format check

上级 84205775
...@@ -1012,19 +1012,19 @@ class ProfileStats(object): ...@@ -1012,19 +1012,19 @@ class ProfileStats(object):
# Store the max of some stats by any function in this profile. # Store the max of some stats by any function in this profile.
max_sum_size = max(max_sum_size, sum_size) max_sum_size = max(max_sum_size, sum_size)
max_node_memory_size[0] = max(max_node_memory_size[0], max_node_memory_size[0] = max(max_node_memory_size[0],
sum(old_running_memory[0])) sum(old_running_memory[0]))
max_running_max_memory_size[0] = max(max_running_max_memory_size[1], max_running_max_memory_size[0] = max(max_running_max_memory_size[1],
sum(old_running_memory[2])) sum(old_running_memory[2]))
# Separate CPU and GPU # Separate CPU and GPU
max_node_memory_size[1] = max(max_node_memory_size[1], max_node_memory_size[1] = max(max_node_memory_size[1],
old_running_memory[0][0]) old_running_memory[0][0])
max_node_memory_size[2] = max(max_node_memory_size[2], max_node_memory_size[2] = max(max_node_memory_size[2],
old_running_memory[0][1]) old_running_memory[0][1])
max_running_max_memory_size[1] = max(max_running_max_memory_size[1], max_running_max_memory_size[1] = max(max_running_max_memory_size[1],
old_running_memory[2][0]) old_running_memory[2][0])
max_running_max_memory_size[2] = max(max_running_max_memory_size[2], max_running_max_memory_size[2] = max(max_running_max_memory_size[2],
old_running_memory[2][1]) old_running_memory[2][1])
max_node_memory_saved_by_inplace = max( max_node_memory_saved_by_inplace = max(
max_node_memory_saved_by_inplace, old_running_memory[3]) max_node_memory_saved_by_inplace, old_running_memory[3])
...@@ -1033,19 +1033,19 @@ class ProfileStats(object): ...@@ -1033,19 +1033,19 @@ class ProfileStats(object):
# Store max of some stats with new order # Store max of some stats with new order
new_max_node_memory_size[0] = max(new_max_node_memory_size[0], new_max_node_memory_size[0] = max(new_max_node_memory_size[0],
sum(new_running_memory[0])) sum(new_running_memory[0]))
new_max_running_max_memory_size[0] = max(new_max_running_max_memory_size[0], new_max_running_max_memory_size[0] = max(new_max_running_max_memory_size[0],
sum(new_running_memory[2])) sum(new_running_memory[2]))
# Separate CPU and GPU # Separate CPU and GPU
new_max_node_memory_size[1] = max(new_max_node_memory_size[1], new_max_node_memory_size[1] = max(new_max_node_memory_size[1],
new_running_memory[0][0]) new_running_memory[0][0])
new_max_node_memory_size[2] = max(new_max_node_memory_size[2], new_max_node_memory_size[2] = max(new_max_node_memory_size[2],
new_running_memory[0][1]) new_running_memory[0][1])
new_max_running_max_memory_size[1] = max(new_max_running_max_memory_size[1], new_max_running_max_memory_size[1] = max(new_max_running_max_memory_size[1],
new_running_memory[2][0]) new_running_memory[2][0])
new_max_running_max_memory_size[2] = max(new_max_running_max_memory_size[2], new_max_running_max_memory_size[2] = max(new_max_running_max_memory_size[2],
new_running_memory[2][1]) new_running_memory[2][1])
new_max_node_memory_saved_by_inplace = max( new_max_node_memory_saved_by_inplace = max(
new_max_node_memory_saved_by_inplace, new_running_memory[3]) new_max_node_memory_saved_by_inplace, new_running_memory[3])
......
...@@ -56,6 +56,7 @@ raise_with_op = link.raise_with_op ...@@ -56,6 +56,7 @@ raise_with_op = link.raise_with_op
class VM(object): class VM(object):
""" """
A VM object's __call__ method evaluates a Theano program. A VM object's __call__ method evaluates a Theano program.
...@@ -83,6 +84,7 @@ class VM(object): ...@@ -83,6 +84,7 @@ class VM(object):
storage. False means it *must not* repeat that feedback. storage. False means it *must not* repeat that feedback.
""" """
def __init__(self, nodes, thunks, pre_call_clear): def __init__(self, nodes, thunks, pre_call_clear):
""" """
Allocate a virtual machine. Allocate a virtual machine.
...@@ -159,10 +161,12 @@ class VM(object): ...@@ -159,10 +161,12 @@ class VM(object):
class Loop(VM): class Loop(VM):
""" """
Unconditional start-to-finish program execution in Python. Unconditional start-to-finish program execution in Python.
No garbage collection is allowed on intermediate results. No garbage collection is allowed on intermediate results.
""" """
def __call__(self): def __call__(self):
if self.time_thunks: if self.time_thunks:
for cont in self.pre_call_clear: for cont in self.pre_call_clear:
...@@ -188,10 +192,12 @@ class Loop(VM): ...@@ -188,10 +192,12 @@ class Loop(VM):
class LoopGC(VM): class LoopGC(VM):
""" """
Unconditional start-to-finish program execution in Python. Unconditional start-to-finish program execution in Python.
Garbage collection is possible on intermediate results. Garbage collection is possible on intermediate results.
""" """
def __init__(self, nodes, thunks, pre_call_clear, post_thunk_clear): def __init__(self, nodes, thunks, pre_call_clear, post_thunk_clear):
super(LoopGC, self).__init__(nodes, thunks, pre_call_clear) super(LoopGC, self).__init__(nodes, thunks, pre_call_clear)
self.post_thunk_clear = post_thunk_clear self.post_thunk_clear = post_thunk_clear
...@@ -231,6 +237,7 @@ class LoopGC(VM): ...@@ -231,6 +237,7 @@ class LoopGC(VM):
class Stack(VM): class Stack(VM):
""" """
Finish-to-start evalution order of thunks. Finish-to-start evalution order of thunks.
...@@ -340,7 +347,7 @@ class Stack(VM): ...@@ -340,7 +347,7 @@ class Stack(VM):
apply_stack = list(self.base_apply_stack) apply_stack = list(self.base_apply_stack)
last_apply_stack_len = -1 last_apply_stack_len = -1
#This record all function inputs/shared varibles and constants # This record all function inputs/shared varibles and constants
for var, data in self.storage_map.iteritems(): for var, data in self.storage_map.iteritems():
if data[0] is None: if data[0] is None:
continue continue
...@@ -396,7 +403,7 @@ class Stack(VM): ...@@ -396,7 +403,7 @@ class Stack(VM):
current_idx = self.node_idx[current_apply] current_idx = self.node_idx[current_apply]
self.call_counts[current_idx] += 1 self.call_counts[current_idx] += 1
self.call_times[current_idx] += dt self.call_times[current_idx] += dt
## Computing the memory footprint of the the op # Computing the memory footprint of the the op
# ?? What about inplace .. if the op is inplace # ?? What about inplace .. if the op is inplace
# you don't actually ask for more memory! # you don't actually ask for more memory!
for (idx, o) in enumerate( for (idx, o) in enumerate(
...@@ -411,7 +418,7 @@ class Stack(VM): ...@@ -411,7 +418,7 @@ class Stack(VM):
st = getattr(o[0], 'strides', st = getattr(o[0], 'strides',
'input no strides') 'input no strides')
if (getattr(o[0], 'flags', False) and if (getattr(o[0], 'flags', False) and
o[0].flags.c_contiguous): o[0].flags.c_contiguous):
st = 'c' st = 'c'
elif (hasattr(data[0], 'is_c_contiguous') and elif (hasattr(data[0], 'is_c_contiguous') and
data[0].is_c_contiguous()): data[0].is_c_contiguous()):
...@@ -436,15 +443,16 @@ class Stack(VM): ...@@ -436,15 +443,16 @@ class Stack(VM):
if all(compute_map[v][0] if all(compute_map[v][0]
for v in dependencies[i]): for v in dependencies[i]):
storage_map[i][0] = None storage_map[i][0] = None
input_index.append(current_apply.inputs.index(i)) input_index.append(
current_apply.inputs.index(i))
#DO NOT set compute_map to 0 # DO NOT set compute_map to 0
#If values become False and the # If values become False and the
#current_apply is still in the #current_apply is still in the
#stack, this will cause it to be # stack, this will cause it to be
#recomputed! This can cause wrong value # recomputed! This can cause wrong value
#with some combination of inplace op. # with some combination of inplace op.
compute_map[i][0] = 2 compute_map[i][0] = 2
if (config.warn.vm_gc_bug and if (config.warn.vm_gc_bug and
current_apply in apply_stack and current_apply in apply_stack and
...@@ -452,12 +460,13 @@ class Stack(VM): ...@@ -452,12 +460,13 @@ class Stack(VM):
'destroy_map', 'destroy_map',
False)): False)):
warnings.warn( warnings.warn(
"There was a bug that existed in the default Theano configuration," "There was a bug that existed in the default Theano configuration,"
" only in the development version between July 5th 2012" " only in the development version between July 5th 2012"
" and July 30th 2012. This was not in a released version." " and July 30th 2012. This was not in a released version."
" The bug was affecting this script.", " The bug was affecting this script.",
#The stack level is not good when inside a Scan. # The stack level is not good when
stacklevel=3 # inside a Scan.
stacklevel=3
) )
self.node_cleared_order.append(input_index) self.node_cleared_order.append(input_index)
...@@ -465,9 +474,8 @@ class Stack(VM): ...@@ -465,9 +474,8 @@ class Stack(VM):
# -- Non-lazy case, need inputs # -- Non-lazy case, need inputs
apply_stack.append(current_apply) apply_stack.append(current_apply)
apply_stack.extend(inp.owner apply_stack.extend(inp.owner
for inp in current_deps for inp in current_deps
if inp.owner) if inp.owner)
elif not computed_outs: elif not computed_outs:
# #
...@@ -511,7 +519,7 @@ class Stack(VM): ...@@ -511,7 +519,7 @@ class Stack(VM):
self.variable_shape[var] = sh self.variable_shape[var] = sh
st = getattr(o[0], 'strides', 'input no strides') st = getattr(o[0], 'strides', 'input no strides')
if (getattr(o[0], 'flags', False) and if (getattr(o[0], 'flags', False) and
o[0].flags.c_contiguous): o[0].flags.c_contiguous):
st = 'c' st = 'c'
elif (hasattr(data[0], 'is_c_contiguous') and elif (hasattr(data[0], 'is_c_contiguous') and
data[0].is_c_contiguous()): data[0].is_c_contiguous()):
...@@ -523,7 +531,7 @@ class Stack(VM): ...@@ -523,7 +531,7 @@ class Stack(VM):
if self.allow_gc: if self.allow_gc:
for i in current_apply.inputs: for i in current_apply.inputs:
if (dependencies[i] and i.owner and if (dependencies[i] and i.owner and
i not in self.outputs): i not in self.outputs):
empty_storage_map = True empty_storage_map = True
for x in dependencies[i]: for x in dependencies[i]:
if not compute_map[x][0]: if not compute_map[x][0]:
...@@ -531,9 +539,10 @@ class Stack(VM): ...@@ -531,9 +539,10 @@ class Stack(VM):
break break
if empty_storage_map: if empty_storage_map:
storage_map[i][0] = None storage_map[i][0] = None
input_index.append(current_apply.inputs.index(i)) input_index.append(
#See the not lazy gc code for explanations current_apply.inputs.index(i))
#of compute_map change # See the not lazy gc code for explanations
# of compute_map change
compute_map[i][0] = 2 compute_map[i][0] = 2
self.node_cleared_order.append(input_index) self.node_cleared_order.append(input_index)
...@@ -560,6 +569,7 @@ try: ...@@ -560,6 +569,7 @@ try:
import lazylinker_c import lazylinker_c
class CVM(lazylinker_c.CLazyLinker, VM): class CVM(lazylinker_c.CLazyLinker, VM):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
lazylinker_c.CLazyLinker.__init__(self, *args, **kwargs) lazylinker_c.CLazyLinker.__init__(self, *args, **kwargs)
# skip VM.__init__ # skip VM.__init__
...@@ -576,6 +586,7 @@ except (OSError, theano.gof.cmodule.MissingGXX), e: ...@@ -576,6 +586,7 @@ except (OSError, theano.gof.cmodule.MissingGXX), e:
class VM_Linker(link.LocalLinker): class VM_Linker(link.LocalLinker):
""" """
Class that satisfies the Linker interface by acting as a VM factory. Class that satisfies the Linker interface by acting as a VM factory.
""" """
...@@ -625,9 +636,9 @@ class VM_Linker(link.LocalLinker): ...@@ -625,9 +636,9 @@ class VM_Linker(link.LocalLinker):
associated to self, else, a new VM_Linker associated to fgraph. associated to self, else, a new VM_Linker associated to fgraph.
""" """
if (config.profile and if (config.profile and
hasattr(theano, 'sandbox') and hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and hasattr(theano.sandbox, 'cuda') and
theano.sandbox.cuda.cuda_enabled): theano.sandbox.cuda.cuda_enabled):
if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1': if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1':
raise Exception( raise Exception(
"You are running the Theano profiler with CUDA enabled." "You are running the Theano profiler with CUDA enabled."
...@@ -644,12 +655,12 @@ class VM_Linker(link.LocalLinker): ...@@ -644,12 +655,12 @@ class VM_Linker(link.LocalLinker):
# Warning: make sure to forward the correct values of # Warning: make sure to forward the correct values of
# all parameters to __init__ here. # all parameters to __init__ here.
return type(self)( return type(self)(
allow_gc=self.allow_gc, allow_gc=self.allow_gc,
use_cloop=self.use_cloop, use_cloop=self.use_cloop,
callback=self.callback, callback=self.callback,
lazy=self.lazy, lazy=self.lazy,
schedule=self.schedule schedule=self.schedule
).accept(fgraph, no_recycling) ).accept(fgraph, no_recycling)
self.fgraph = fgraph self.fgraph = fgraph
self.no_recycling = no_recycling self.no_recycling = no_recycling
return self return self
...@@ -700,17 +711,17 @@ class VM_Linker(link.LocalLinker): ...@@ -700,17 +711,17 @@ class VM_Linker(link.LocalLinker):
return dependencies return dependencies
def make_vm(self, nodes, thunks, def make_vm(self, nodes, thunks,
input_storage, output_storage, storage_map, input_storage, output_storage, storage_map,
post_thunk_clear, post_thunk_clear,
computed, computed,
compute_map, compute_map,
updated_vars updated_vars
): ):
pre_call_clear = [storage_map[v] for v in self.no_recycling] pre_call_clear = [storage_map[v] for v in self.no_recycling]
if (self.callback is not None or if (self.callback is not None or
(config.profile and config.profile_memory)): (config.profile and config.profile_memory)):
if self.use_cloop and self.callback is not None: if self.use_cloop and self.callback is not None:
logger.warn('CVM does not support callback, using Stack VM.') logger.warn('CVM does not support callback, using Stack VM.')
...@@ -721,11 +732,11 @@ class VM_Linker(link.LocalLinker): ...@@ -721,11 +732,11 @@ class VM_Linker(link.LocalLinker):
if self.allow_gc: if self.allow_gc:
deps = self.compute_gc_dependencies(storage_map) deps = self.compute_gc_dependencies(storage_map)
vm = Stack( vm = Stack(
nodes, thunks, pre_call_clear, nodes, thunks, pre_call_clear,
storage_map, compute_map, storage_map, compute_map,
self.fgraph, self.allow_gc, self.fgraph, self.allow_gc,
dependencies=deps, dependencies=deps,
callback=self.callback) callback=self.callback)
elif self.use_cloop: elif self.use_cloop:
# create a map from nodes to ints and vars to ints # create a map from nodes to ints and vars to ints
nodes_idx = {} nodes_idx = {}
...@@ -747,9 +758,9 @@ class VM_Linker(link.LocalLinker): ...@@ -747,9 +758,9 @@ class VM_Linker(link.LocalLinker):
# put storage_map and compute_map into a int-based scheme # put storage_map and compute_map into a int-based scheme
n_applies = len(nodes) n_applies = len(nodes)
storage_map_list = [storage_map[vars_idx_inv[i]] storage_map_list = [storage_map[vars_idx_inv[i]]
for i in xrange(len(vars_idx_inv))] for i in xrange(len(vars_idx_inv))]
compute_map_list = [compute_map[vars_idx_inv[i]] compute_map_list = [compute_map[vars_idx_inv[i]]
for i in xrange(len(vars_idx_inv))] for i in xrange(len(vars_idx_inv))]
if nodes: if nodes:
assert type(storage_map_list[0]) is list assert type(storage_map_list[0]) is list
assert type(compute_map_list[0]) is list assert type(compute_map_list[0]) is list
...@@ -796,7 +807,7 @@ class VM_Linker(link.LocalLinker): ...@@ -796,7 +807,7 @@ class VM_Linker(link.LocalLinker):
prereq_var_idxs = [] prereq_var_idxs = []
for prereq_node in ords.get(node, []): for prereq_node in ords.get(node, []):
prereq_var_idxs.extend( prereq_var_idxs.extend(
[vars_idx[v] for v in prereq_node.outputs]) [vars_idx[v] for v in prereq_node.outputs])
prereq_var_idxs = list(set(prereq_var_idxs)) prereq_var_idxs = list(set(prereq_var_idxs))
prereq_var_idxs.sort() # TODO: why sort? prereq_var_idxs.sort() # TODO: why sort?
node_prereqs.append(prereq_var_idxs) node_prereqs.append(prereq_var_idxs)
...@@ -816,27 +827,27 @@ class VM_Linker(link.LocalLinker): ...@@ -816,27 +827,27 @@ class VM_Linker(link.LocalLinker):
c0 = sys.getrefcount(node_n_inputs) c0 = sys.getrefcount(node_n_inputs)
vm = CVM( vm = CVM(
nodes, nodes,
thunks, thunks,
pre_call_clear, pre_call_clear,
allow_gc=self.allow_gc, allow_gc=self.allow_gc,
call_counts=[0] * len(nodes), call_counts=[0] * len(nodes),
call_times=[0.0] * len(nodes), call_times=[0.0] * len(nodes),
compute_map_list=compute_map_list, compute_map_list=compute_map_list,
storage_map_list=storage_map_list, storage_map_list=storage_map_list,
base_input_output_list=base_input_output_list, base_input_output_list=base_input_output_list,
node_n_inputs=node_n_inputs, node_n_inputs=node_n_inputs,
node_n_outputs=node_n_outputs, node_n_outputs=node_n_outputs,
node_input_offset=node_input_offset, node_input_offset=node_input_offset,
node_output_offset=node_output_offset, node_output_offset=node_output_offset,
var_owner=var_owner, var_owner=var_owner,
is_lazy_list=is_lazy_list, is_lazy_list=is_lazy_list,
output_vars=output_vars, output_vars=output_vars,
node_prereqs=node_prereqs, node_prereqs=node_prereqs,
node_output_size=node_output_size, node_output_size=node_output_size,
update_storage=update_storage, update_storage=update_storage,
dependencies=dependency_map_list, dependencies=dependency_map_list,
) )
assert c0 == sys.getrefcount(node_n_inputs) assert c0 == sys.getrefcount(node_n_inputs)
else: else:
lazy = self.lazy lazy = self.lazy
...@@ -848,36 +859,36 @@ class VM_Linker(link.LocalLinker): ...@@ -848,36 +859,36 @@ class VM_Linker(link.LocalLinker):
# there is no conditional in the graph # there is no conditional in the graph
if self.allow_gc: if self.allow_gc:
vm = LoopGC( vm = LoopGC(
nodes, nodes,
thunks, thunks,
pre_call_clear, pre_call_clear,
post_thunk_clear) post_thunk_clear)
else: else:
vm = Loop( vm = Loop(
nodes, nodes,
thunks, thunks,
pre_call_clear) pre_call_clear)
else: else:
deps = None deps = None
if self.allow_gc: if self.allow_gc:
deps = self.compute_gc_dependencies(storage_map) deps = self.compute_gc_dependencies(storage_map)
vm = Stack( vm = Stack(
nodes, thunks, pre_call_clear, nodes, thunks, pre_call_clear,
storage_map, compute_map, storage_map, compute_map,
self.fgraph, self.allow_gc, self.fgraph, self.allow_gc,
dependencies=deps dependencies=deps
) )
return vm return vm
def make_all(self, profiler=None, input_storage=None, def make_all(self, profiler=None, input_storage=None,
output_storage=None, output_storage=None,
): ):
fgraph = self.fgraph fgraph = self.fgraph
order = self.schedule(fgraph) order = self.schedule(fgraph)
no_recycling = self.no_recycling no_recycling = self.no_recycling
input_storage, output_storage, storage_map = link.map_storage( input_storage, output_storage, storage_map = link.map_storage(
fgraph, order, input_storage, output_storage) fgraph, order, input_storage, output_storage)
compute_map = {} compute_map = {}
for k in storage_map: for k in storage_map:
compute_map[k] = [k.owner is None] compute_map[k] = [k.owner is None]
...@@ -917,12 +928,12 @@ class VM_Linker(link.LocalLinker): ...@@ -917,12 +928,12 @@ class VM_Linker(link.LocalLinker):
post_thunk_clear = None post_thunk_clear = None
vm = self.make_vm(order, thunks, vm = self.make_vm(order, thunks,
input_storage, output_storage, storage_map, input_storage, output_storage, storage_map,
post_thunk_clear, post_thunk_clear,
computed, computed,
compute_map, compute_map,
self.updated_vars self.updated_vars
) )
vm.storage_map = storage_map vm.storage_map = storage_map
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论