提交 270ffede authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Fixes following code review

上级 e9a07232
...@@ -396,6 +396,15 @@ import theano and print the config variable, as in: ...@@ -396,6 +396,15 @@ import theano and print the config variable, as in:
In order not to test with preallocated memory, use an empty string, ``""``. In order not to test with preallocated memory, use an empty string, ``""``.
.. attribute:: config.DebugMode.check_preallocated_output_ndim
Positive int value, default: 4.
When testing with "strided" preallocated output memory, test
all combinations of strides over that number of (inner-most)
dimensions. You may want to reduce that number to reduce memory or
time usage, but it is advised to keep a minimum of 2.
.. attribute:: config.DebugMode.warn_input_not_reused .. attribute:: config.DebugMode.warn_input_not_reused
Bool value, default: True Bool value, default: True
......
...@@ -83,6 +83,15 @@ AddConfigVar('DebugMode.check_preallocated_output', ...@@ -83,6 +83,15 @@ AddConfigVar('DebugMode.check_preallocated_output',
StrParam('', is_valid=is_valid_check_preallocated_output_param), StrParam('', is_valid=is_valid_check_preallocated_output_param),
in_c_key=False) in_c_key=False)
AddConfigVar('DebugMode.check_preallocated_output_ndim',
('When testing with "strided" preallocated output memory, '
'test all combinations of strides over that number of '
'(inner-most) dimensions. You may want to reduce that number '
'to reduce memory or time usage, but it is advised to keep a '
'minimum of 2.'),
IntParam(4, lambda i: i > 0),
in_c_key=False)
import logging import logging
_logger = logging.getLogger("theano.compile.debugmode") _logger = logging.getLogger("theano.compile.debugmode")
_logger.setLevel(logging.WARNING) _logger.setLevel(logging.WARNING)
...@@ -1049,7 +1058,6 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1049,7 +1058,6 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
initial_outputs[r] = init_outputs[r] initial_outputs[r] = init_outputs[r]
if initial_outputs: if initial_outputs:
print 'initial:', initial_outputs
yield ('initial', initial_outputs) yield ('initial', initial_outputs)
# reuse_output: use a copy of the same storage returned the first time # reuse_output: use a copy of the same storage returned the first time
...@@ -1144,15 +1152,17 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1144,15 +1152,17 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
out_broadcastable = rev_out_broadcastable[::-1] out_broadcastable = rev_out_broadcastable[::-1]
if 'strided' in prealloc_modes or 'ALL' in prealloc_modes: if 'strided' in prealloc_modes or 'ALL' in prealloc_modes:
check_ndim = config.DebugMode.check_preallocated_output_ndim
# Initial allocation # Initial allocation
init_strided = {} init_strided = {}
for r in considered_outputs: for r in considered_outputs:
if isinstance(r.type, (TensorType, CudaNdarrayType)): if isinstance(r.type, (TensorType, CudaNdarrayType)):
# Create a buffer twice as large in every dimension, # Create a buffer twice as large in every dimension,
# except if broadcastable, or for dimensions above 4 # except if broadcastable, or for dimensions above
# config.DebugMode.check_preallocated_output_ndim
buf_shape = [] buf_shape = []
for s, b in zip(r_vals[r].shape, r.broadcastable): for s, b in zip(r_vals[r].shape, r.broadcastable):
if b or ((r.ndim - len(buf_shape)) > 4): if b or ((r.ndim - len(buf_shape)) > check_ndim):
buf_shape.append(s) buf_shape.append(s)
else: else:
buf_shape.append(s * 2) buf_shape.append(s * 2)
...@@ -1163,18 +1173,18 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1163,18 +1173,18 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# The number of combinations is exponential in the number of # The number of combinations is exponential in the number of
# dimensions, and some ops can have tens of outputs. To prevent # dimensions, and some ops can have tens of outputs. To prevent
# tests from lasting days, we use the same strides for all # tests from lasting days, we use the same strides for all
# dimensions but the last 4 ones. # dimensions but the last check_ndim ones.
# Moreover, to avoid memory problems, we do not test with strides # Moreover, to avoid memory problems, we do not test with strides
# 2 and -2 on those dimensions. # 2 and -2 on those dimensions.
step_signs_list = [] step_signs_list = []
for b in out_broadcastable[-4:]: for b in out_broadcastable[-check_ndim:]:
if b: if b:
step_signs_list.append((1,)) step_signs_list.append((1,))
else: else:
step_signs_list.append((-1, 1)) step_signs_list.append((-1, 1))
# Use the same step on all dimensions before the last 4. # Use the same step on all dimensions before the last check_ndim.
if all(out_broadcastable[:-4]): if all(out_broadcastable[:-check_ndim]):
step_signs_list = [(1,)] + step_signs_list step_signs_list = [(1,)] + step_signs_list
else: else:
step_signs_list = [(-1, 1)] + step_signs_list step_signs_list = [(-1, 1)] + step_signs_list
...@@ -1183,9 +1193,9 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1183,9 +1193,9 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
for step_size in (1, 2): for step_size in (1, 2):
strided = {} strided = {}
# First, the dimensions above 4, then the other ones # First, the dimensions above check_ndim, then the other ones
# Do not test with 2 or -2 for dimensions above 4 # Do not test with 2 or -2 for dimensions above check_ndim
steps = [step_signs[0]] * len(out_broadcastable[:-4]) steps = [step_signs[0]] * len(out_broadcastable[:-check_ndim])
steps += [s * step_size for s in step_signs[1:]] steps += [s * step_size for s in step_signs[1:]]
name = 'strided%s' % str(tuple(steps)) name = 'strided%s' % str(tuple(steps))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论