提交 6f829941 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Add flag enabling testing of preallocated storage in DebugMode

上级 05e88458
...@@ -46,6 +46,10 @@ AddConfigVar('DebugMode.warn_input_not_reused', ...@@ -46,6 +46,10 @@ AddConfigVar('DebugMode.warn_input_not_reused',
), ),
BoolParam(True)) BoolParam(True))
AddConfigVar('DebugMode.check_preallocated_output',
'Test thunks with pre-allocated memory as output storage.',
BoolParam(False))
import logging import logging
_logger=logging.getLogger("theano.compile.debugmode") _logger=logging.getLogger("theano.compile.debugmode")
_logger.setLevel(logging.WARNING) _logger.setLevel(logging.WARNING)
...@@ -1194,68 +1198,69 @@ class _Linker(gof.link.LocalLinker): ...@@ -1194,68 +1198,69 @@ class _Linker(gof.link.LocalLinker):
r_vals[r] = storage_map[r][0] r_vals[r] = storage_map[r][0]
storage_map[r][0] = None #clear the storage_map of outputs for the thunk_c storage_map[r][0] = None #clear the storage_map of outputs for the thunk_c
## Then, try to use different output storages if config.DebugMode.check_preallocated_output:
# reuse_output: use a copy of the same storage returned the first time ## Then, try to use different output storages
# TODO: optimization warning if the storage in reuse_outputs # reuse_output: use a copy of the same storage returned the first time
# is not reused # TODO: optimization warning if the storage in reuse_outputs
# c_cont_output: use a c-continuous ndarray (for TensorType, else None) # is not reused
# f_cont_output: use a fortran-continuous ndarray (for TensorType, else None) # c_cont_output: use a c-continuous ndarray (for TensorType, else None)
# TODO: Sparse, Scalar # f_cont_output: use a fortran-continuous ndarray (for TensorType, else None)
# TODO: wrong shape, more stride patterns # TODO: Sparse, Scalar
reuse_outputs = {} # TODO: wrong shape, more stride patterns
c_cont_outputs = {} reuse_outputs = {}
f_cont_outputs = {} c_cont_outputs = {}
for r in node.outputs: f_cont_outputs = {}
r_val = r_vals[r]
reuse_outputs[r] = _lessbroken_deepcopy(r_val)
if isinstance(r.type, TensorType):
c_cont_outputs[r] = numpy.empty(
shape=r_val.shape,
dtype=r_val.dtype,
order='C')
f_cont_outputs[r] = numpy.empty(
shape=r_val.shape,
dtype=r_val.dtype,
order='F')
elif isinstance(r.type, CudaNdarrayType):
# CudaNdarray supports only C-contiguous
c_cont_outputs[r] = CudaNdarray.zeros(
r_val.shape)
for out_map in (reuse_outputs, c_cont_outputs, f_cont_outputs):
if len(out_map) == 0:
# All storages are None, no need to test that again
continue
# Copy the inputs over again
for r in node.inputs:
storage_map[r][0] = _lessbroken_deepcopy(r_vals[r])
# Copy the appropriate output storages
for r in node.outputs:
storage_map[r][0] = out_map.get(r, None)
thunk_py()
# Check outputs
for r in node.outputs:
if not r.type.is_valid_value(storage_map[r][0]):
raise InvalidValueError(r, storage_map[r][0], hint='perform output', specific_hint = r.type.value_validity_msg(storage_map[r][0]))
_check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
clobber_dr_vals=False, perform='py',
warn_input_not_reused=False)
_check_viewmap(node, storage_map)
for r in node.outputs: for r in node.outputs:
if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]): r_val = r_vals[r]
# TODO: indicate it is not a C/Py problem reuse_outputs[r] = _lessbroken_deepcopy(r_val)
raise BadCLinkerOutput(r, val_py=r_vals[r], val_c=storage_map[r][0]) if isinstance(r.type, TensorType):
c_cont_outputs[r] = numpy.empty(
# Clear storage_map shape=r_val.shape,
for r in node.outputs: dtype=r_val.dtype,
storage_map[r][0] = None order='C')
f_cont_outputs[r] = numpy.empty(
shape=r_val.shape,
dtype=r_val.dtype,
order='F')
elif isinstance(r.type, CudaNdarrayType):
# CudaNdarray supports only C-contiguous
c_cont_outputs[r] = CudaNdarray.zeros(
r_val.shape)
for out_map in (reuse_outputs, c_cont_outputs, f_cont_outputs):
if len(out_map) == 0:
# All storages are None, no need to test that again
continue
# Copy the inputs over again
for r in node.inputs:
storage_map[r][0] = _lessbroken_deepcopy(r_vals[r])
# Copy the appropriate output storages
for r in node.outputs:
storage_map[r][0] = out_map.get(r, None)
thunk_py()
# Check outputs
for r in node.outputs:
if not r.type.is_valid_value(storage_map[r][0]):
raise InvalidValueError(r, storage_map[r][0], hint='perform output', specific_hint = r.type.value_validity_msg(storage_map[r][0]))
_check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
clobber_dr_vals=False, perform='py',
warn_input_not_reused=False)
_check_viewmap(node, storage_map)
for r in node.outputs:
if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]):
# TODO: indicate it is not a C/Py problem
raise BadCLinkerOutput(r, val_py=r_vals[r], val_c=storage_map[r][0])
# Clear storage_map
for r in node.outputs:
storage_map[r][0] = None
# print >> sys.stderr, i, "DEBUGMODE thunk_py %100s %50s %30s" % (node, # print >> sys.stderr, i, "DEBUGMODE thunk_py %100s %50s %30s" % (node,
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs], #[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs],
...@@ -1326,66 +1331,66 @@ class _Linker(gof.link.LocalLinker): ...@@ -1326,66 +1331,66 @@ class _Linker(gof.link.LocalLinker):
r_vals[r] = storage_map[r][0] r_vals[r] = storage_map[r][0]
storage_map[r][0] = None #clear the storage_map for the thunk_c storage_map[r][0] = None #clear the storage_map for the thunk_c
if config.DebugMode.check_preallocated_output:
## Then, try to use different output storages ## Then, try to use different output storages
# TODO: factorize that code with the one for Python above # TODO: factorize that code with the one for Python above
reuse_outputs = {} reuse_outputs = {}
c_cont_outputs = {} c_cont_outputs = {}
f_cont_outputs = {} f_cont_outputs = {}
for r in node.outputs:
r_val = r_vals[r]
reuse_outputs[r] = _lessbroken_deepcopy(r_val)
if isinstance(r.type, TensorType):
c_cont_outputs[r] = numpy.empty(
shape=r_val.shape,
dtype=r_val.dtype,
order='C')
f_cont_outputs[r] = numpy.empty(
shape=r_val.shape,
dtype=r_val.dtype,
order='F')
for out_map in (reuse_outputs, c_cont_outputs, f_cont_outputs):
if len(out_map) == 0:
# All storages are None, no need to test that again
continue
# Copy the inputs over again
for r in node.inputs:
storage_map[r][0] = _lessbroken_deepcopy(r_vals[r])
# Copy the appropriate output storages
for r in node.outputs:
#storage_map[r][0] = out_map.get(r, None)
if r in out_map:
storage_map[r][0] = out_map[r]
else:
print 'not tensor?', r
try:
thunk_c()
except:
raise_with_op(node)
# Check outputs
for r in node.outputs:
if not r.type.is_valid_value(storage_map[r][0]):
raise InvalidValueError(r, storage_map[r][0], hint='perform output', specific_hint = r.type.value_validity_msg(storage_map[r][0]))
_check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
clobber_dr_vals=False, perform='c',
warn_input_not_reused=False)
_check_viewmap(node, storage_map)
for r in node.outputs:
if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]):
# TODO: indicate it is not a C/Py problem
raise BadCLinkerOutput(r, val_py=r_vals[r], val_c=storage_map[r][0])
# Clear storage map
for r in node.outputs: for r in node.outputs:
storage_map[r][0] = None r_val = r_vals[r]
reuse_outputs[r] = _lessbroken_deepcopy(r_val)
if isinstance(r.type, TensorType):
c_cont_outputs[r] = numpy.empty(
shape=r_val.shape,
dtype=r_val.dtype,
order='C')
f_cont_outputs[r] = numpy.empty(
shape=r_val.shape,
dtype=r_val.dtype,
order='F')
for out_map in (reuse_outputs, c_cont_outputs, f_cont_outputs):
if len(out_map) == 0:
# All storages are None, no need to test that again
continue
# Copy the inputs over again
for r in node.inputs:
storage_map[r][0] = _lessbroken_deepcopy(r_vals[r])
# Copy the appropriate output storages
for r in node.outputs:
#storage_map[r][0] = out_map.get(r, None)
if r in out_map:
storage_map[r][0] = out_map[r]
else:
print 'not tensor?', r
try:
thunk_c()
except:
raise_with_op(node)
# Check outputs
for r in node.outputs:
if not r.type.is_valid_value(storage_map[r][0]):
raise InvalidValueError(r, storage_map[r][0], hint='perform output', specific_hint = r.type.value_validity_msg(storage_map[r][0]))
_check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
clobber_dr_vals=False, perform='c',
warn_input_not_reused=False)
_check_viewmap(node, storage_map)
for r in node.outputs:
if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]):
# TODO: indicate it is not a C/Py problem
raise BadCLinkerOutput(r, val_py=r_vals[r], val_c=storage_map[r][0])
# Clear storage map
for r in node.outputs:
storage_map[r][0] = None
# print >> sys.stderr, i, "DEBUGMODE thunk_c %100s %50s %30s" % (node, # print >> sys.stderr, i, "DEBUGMODE thunk_c %100s %50s %30s" % (node,
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs], #[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs],
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论