提交 028459c3 authored 作者: abergeron's avatar abergeron

Merge pull request #3768 from nouiz/nanguardmode

Don't let scan AllocEmpty cause false alarm by NanGuardMode
...@@ -913,6 +913,23 @@ documentation: ...@@ -913,6 +913,23 @@ documentation:
.. automodule:: theano.misc.doubleop .. automodule:: theano.misc.doubleop
:members: :members:
NanGuardMode and AllocEmpty
---------------------------
NanGuardMode help users find where in the graph NaN appear. But
sometimes, we want some variables to not be checked. For example, in
the old GPU back-end, we use a float32 CudaNdarray to store the MRG
random number generator state (they are integers). So if NanGuardMode
check it, it will generate false positive. Another case is related to
[Gpu]AllocEmpty or some computation on it (like done by Scan).
You can tell NanGuardMode to do not check a variable with:
``variable.tag.nan_guard_mode_check``. Also, this tag automatically
follow that variable during optimization. This mean if you tag a
variable that get replaced by an inplace version, it will keep that
tag.
Final Note Final Note
---------- ----------
......
...@@ -199,7 +199,7 @@ def std_fgraph(input_specs, output_specs, accept_inplace=False): ...@@ -199,7 +199,7 @@ def std_fgraph(input_specs, output_specs, accept_inplace=False):
return fgraph, list(map(SymbolicOutput, updates)) return fgraph, list(map(SymbolicOutput, updates))
std_fgraph.features = [gof.toolbox.PreserveNames] std_fgraph.features = [gof.toolbox.PreserveVariableAttributes]
class AliasedMemoryError(Exception): class AliasedMemoryError(Exception):
......
...@@ -416,7 +416,7 @@ def get_mode(orig_string): ...@@ -416,7 +416,7 @@ def get_mode(orig_string):
elif string == 'NanGuardMode': elif string == 'NanGuardMode':
# need to import later to break circular dependency. # need to import later to break circular dependency.
from .nanguardmode import NanGuardMode from .nanguardmode import NanGuardMode
# DebugMode use its own linker. # NanGuardMode use its own linker.
ret = NanGuardMode(True, True, True, optimizer=config.optimizer) ret = NanGuardMode(True, True, True, optimizer=config.optimizer)
else: else:
# This might be required if the string is 'ProfileMode' # This might be required if the string is 'ProfileMode'
......
...@@ -297,12 +297,14 @@ class NanGuardMode(Mode): ...@@ -297,12 +297,14 @@ class NanGuardMode(Mode):
# If the input is the result of computation, then we # If the input is the result of computation, then we
# don't need to check it. It is already done after the # don't need to check it. It is already done after the
# computation. # computation.
if var.owner is not None: if (var.owner is None and
getattr(var.tag, 'nan_guard_mode_check', True)):
do_check_on(x[0], node, fn, True) do_check_on(x[0], node, fn, True)
fn() fn()
outputs = fn.outputs outputs = fn.outputs
for x in outputs: for x, var in zip(outputs, node.outputs):
do_check_on(x[0], node, fn, False) if getattr(var.tag, 'nan_guard_mode_check', True):
do_check_on(x[0], node, fn, False)
wrap_linker = theano.gof.WrapLinker([theano.gof.OpWiseCLinker()], wrap_linker = theano.gof.WrapLinker([theano.gof.OpWiseCLinker()],
nan_check) nan_check)
......
...@@ -455,10 +455,28 @@ class PrintListener(Feature): ...@@ -455,10 +455,28 @@ class PrintListener(Feature):
class PreserveNames(Feature): class PreserveNames(Feature):
"""
This preserve some variables names during optimization.
Deprecated. We need to keep it to allow unpickling.
"""
def on_change_input(self, fgraph, node, i, r, new_r, reason=None):
if r.name is not None and new_r.name is None:
new_r.name = r.name
class PreserveVariableAttributes(Feature):
"""
This preserve some variables attributes and tag during optimization.
"""
def on_change_input(self, fgraph, node, i, r, new_r, reason=None): def on_change_input(self, fgraph, node, i, r, new_r, reason=None):
if r.name is not None and new_r.name is None: if r.name is not None and new_r.name is None:
new_r.name = r.name new_r.name = r.name
if getattr(r.tag, 'nan_guard_mode_check', False) and getattr(
new_r.tag, 'nan_guard_mode_check', False) is False:
new_r.tag.nan_guard_mode_check = r.tag.nan_guard_mode_check
class NoOutputFromInplace(Feature): class NoOutputFromInplace(Feature):
......
...@@ -51,8 +51,8 @@ def mysend(subject, file): ...@@ -51,8 +51,8 @@ def mysend(subject, file):
# Open the files in binary mode. Let the MIMEImage class automatically # Open the files in binary mode. Let the MIMEImage class automatically
# guess the specific image type. # guess the specific image type.
fp = open(file, 'rb') with open(file, 'rb') as fp:
s=fp.read() s=fp.read()
failures=0 failures=0
errors=0 errors=0
ran=False ran=False
...@@ -115,7 +115,6 @@ def mysend(subject, file): ...@@ -115,7 +115,6 @@ def mysend(subject, file):
s = ("Summary of the output:\n\n" + filter_output(open(file)) + s = ("Summary of the output:\n\n" + filter_output(open(file)) +
"\n\nFull output:\n\n" + s) "\n\nFull output:\n\n" + s)
img = MIMEText(s) img = MIMEText(s)
fp.close()
msg.attach(img) msg.attach(img)
# Send the email via our own SMTP server. # Send the email via our own SMTP server.
......
...@@ -2436,7 +2436,7 @@ class GpuReshape(tensor.Reshape, GpuOp): ...@@ -2436,7 +2436,7 @@ class GpuReshape(tensor.Reshape, GpuOp):
""" """
# __hash__, __eq__, __str__ come from tensor.Subtensor # __hash__, __eq__, __str__ come from tensor.Reshape
def make_node(self, x, shp): def make_node(self, x, shp):
x = as_cuda_ndarray_variable(x) x = as_cuda_ndarray_variable(x)
shp = tensor.as_tensor_variable(shp) shp = tensor.as_tensor_variable(shp)
...@@ -3680,6 +3680,7 @@ class GpuAllocEmpty(GpuOp): ...@@ -3680,6 +3680,7 @@ class GpuAllocEmpty(GpuOp):
# The outut can contain nan/inf. output.type is a new # The outut can contain nan/inf. output.type is a new
# instance, so we can do this only for that variable. # instance, so we can do this only for that variable.
output.type.filter_checks_isfinite = False output.type.filter_checks_isfinite = False
output.tag.nan_guard_mode_check = False
return Apply(self, shape, [output]) return Apply(self, shape, [output])
def debug_perform(self, node, inputs, out_): def debug_perform(self, node, inputs, out_):
......
...@@ -1190,14 +1190,16 @@ def local_gpu_incsubtensor(node): ...@@ -1190,14 +1190,16 @@ def local_gpu_incsubtensor(node):
# The IncSubtensor upcast to float32 y, so we do it # The IncSubtensor upcast to float32 y, so we do it
# explicitly to move it to the GPU. # explicitly to move it to the GPU.
y = y.astype('float32') y = y.astype('float32')
ret = GpuIncSubtensor(
return [GpuIncSubtensor(
incsubt.idx_list, incsubt.idx_list,
inplace=incsubt.inplace, inplace=incsubt.inplace,
set_instead_of_inc=incsubt.set_instead_of_inc)( set_instead_of_inc=incsubt.set_instead_of_inc)(
as_cuda_ndarray_variable(x), as_cuda_ndarray_variable(x),
as_cuda_ndarray_variable(y), as_cuda_ndarray_variable(y),
*coords)] *coords)
ret.tag.nan_guard_mode_check = getattr(
host_output.tag, 'nan_guard_mode_check', True)
return [ret]
# Incrementing a float32 x results in a float32 # Incrementing a float32 x results in a float32
# output even if y is float64, so we can downcast # output even if y is float64, so we can downcast
# y to put it on GPU # y to put it on GPU
...@@ -1221,10 +1223,16 @@ def local_gpu_incsubtensor(node): ...@@ -1221,10 +1223,16 @@ def local_gpu_incsubtensor(node):
y = tensor.cast(y, 'float32') y = tensor.cast(y, 'float32')
gpu_y = as_cuda_ndarray_variable(y) gpu_y = as_cuda_ndarray_variable(y)
if go_gpu: if go_gpu:
return [host_from_gpu(GpuIncSubtensor( ret = GpuIncSubtensor(
node.op.idx_list, inplace=node.op.inplace, node.op.idx_list, inplace=node.op.inplace,
set_instead_of_inc=node.op.set_instead_of_inc)( set_instead_of_inc=node.op.set_instead_of_inc)(
gpu_x, gpu_y, *coords))] gpu_x, gpu_y, *coords)
val = getattr(node.outputs[0].tag, 'nan_guard_mode_check', True)
ret.tag.nan_guard_mode_check = val
ret = host_from_gpu(ret)
ret.tag.nan_guard_mode_check = val
return [ret]
return False return False
...@@ -2532,6 +2540,20 @@ def local_gpu_allocempty(node): ...@@ -2532,6 +2540,20 @@ def local_gpu_allocempty(node):
return False return False
# Don't register by default.
@gof.local_optimizer([GpuAllocEmpty])
def local_gpu_alloc_empty_to_zeros(node):
# We need the exact match as GpuAlloc inherit from GpuAllocEmpty.
if type(node.op) is GpuAllocEmpty:
return [gpu_alloc(theano.tensor.constant(0, dtype='float32'),
*node.inputs)]
optdb.register('local_gpu_alloc_empty_to_zeros',
theano.tensor.opt.in2out(local_gpu_alloc_empty_to_zeros),
# After move to gpu and merge2, before inplace.
49.3,
'alloc_empty_to_zeros',)
def typeInfer(node): def typeInfer(node):
return typeConstructor return typeConstructor
......
...@@ -721,6 +721,7 @@ class GpuAllocEmpty(HideC, Alloc): ...@@ -721,6 +721,7 @@ class GpuAllocEmpty(HideC, Alloc):
output.tag.values_eq_approx = tensor.type.values_eq_approx_always_true output.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
# The outut can contain nan/inf. # The outut can contain nan/inf.
output.type.filter_checks_isfinite = False output.type.filter_checks_isfinite = False
output.tag.nan_guard_mode_check = False
return Apply(self, sh, [output]) return Apply(self, sh, [output])
def debug_perform(self, node, inputs, out_, ctx): def debug_perform(self, node, inputs, out_, ctx):
......
...@@ -300,6 +300,21 @@ def local_gpualloc_memset_0(node): ...@@ -300,6 +300,21 @@ def local_gpualloc_memset_0(node):
return [new_op(*node.inputs)] return [new_op(*node.inputs)]
# Don't register by default.
@gof.local_optimizer([GpuAllocEmpty])
def local_gpua_alloc_empty_to_zeros(node):
if isinstance(node.op, GpuAllocEmpty):
context_name = infer_context_name(*node.inputs)
z = numpy.asarray(0, dtype=node.outputs[0].dtype)
return [GpuAlloc()(as_gpuarray_variable(z, context_name),
*node.inputs)]
optdb.register('local_gpua_alloc_empty_to_zeros',
theano.tensor.opt.in2out(local_gpua_alloc_empty_to_zeros),
# After move to gpu and merge2, before inplace.
49.3,
'alloc_empty_to_zeros',)
@register_opt() @register_opt()
@local_optimizer([GpuContiguous]) @local_optimizer([GpuContiguous])
def local_gpu_contiguous_gpu_contiguous(node): def local_gpu_contiguous_gpu_contiguous(node):
...@@ -569,9 +584,13 @@ def local_gpua_subtensor(node, context_name): ...@@ -569,9 +584,13 @@ def local_gpua_subtensor(node, context_name):
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.IncSubtensor]) @op_lifter([tensor.IncSubtensor])
def local_gpua_incsubtensor(node, context_name): def local_gpua_incsubtensor(node, context_name):
return GpuIncSubtensor(node.op.idx_list, node.op.inplace, op = GpuIncSubtensor(node.op.idx_list, node.op.inplace,
node.op.set_instead_of_inc, node.op.set_instead_of_inc,
node.op.destroyhandler_tolerate_aliased) node.op.destroyhandler_tolerate_aliased)
ret = op(*node.inputs)
val = getattr(node.outputs[0].tag, 'nan_guard_mode_check', True)
ret.tag.nan_guard_mode_check = val
return ret
@register_opt('fast_compile') @register_opt('fast_compile')
......
...@@ -620,7 +620,9 @@ def expand_empty(tensor_var, size): ...@@ -620,7 +620,9 @@ def expand_empty(tensor_var, size):
new_shape = [size + shapes[0]] + shapes[1:] new_shape = [size + shapes[0]] + shapes[1:]
empty = tensor.AllocEmpty(tensor_var.dtype)(*new_shape) empty = tensor.AllocEmpty(tensor_var.dtype)(*new_shape)
return tensor.set_subtensor(empty[:shapes[0]], tensor_var) ret = tensor.set_subtensor(empty[:shapes[0]], tensor_var)
ret.tag.nan_guard_mode_check = False
return ret
def equal_computations(xs, ys, in_xs=None, in_ys=None): def equal_computations(xs, ys, in_xs=None, in_ys=None):
......
...@@ -6241,6 +6241,13 @@ class AllocEmpty(gof.Op): ...@@ -6241,6 +6241,13 @@ class AllocEmpty(gof.Op):
# The outut can contain nan/inf. output.type is a new # The outut can contain nan/inf. output.type is a new
# instance, so we can do this only for that variable. # instance, so we can do this only for that variable.
output.type.filter_checks_isfinite = False output.type.filter_checks_isfinite = False
# We can't reuse filter_checks_isfinite as by default it is
# False and it is set to true only in DebugMode.
# We can't set it in the type as other make_node can reuse the type.
# We can't set it in the variable as it isn't copied when we copy
# the variale. So we set it in the tag.
output.tag.nan_guard_mode_check = False
return Apply(self, shape, [output]) return Apply(self, shape, [output])
def debug_perform(self, node, inputs, out_): def debug_perform(self, node, inputs, out_):
......
...@@ -1733,6 +1733,26 @@ def local_useless_alloc(node): ...@@ -1733,6 +1733,26 @@ def local_useless_alloc(node):
return [node.inputs[0]] return [node.inputs[0]]
# Don't register by default.
@gof.local_optimizer([T.AllocEmpty])
def local_alloc_empty_to_zeros(node):
"""This convert AllocEmpty to Alloc of 0.
This help investigate NaN with NanGuardMode. Not registered by
default. To activate it, use the Theano flag
optimizer_including=alloc_empty_to_zeros. This also enable
the GPU version of this optimizations.
"""
if isinstance(node.op, T.AllocEmpty):
return [T.zeros(node.inputs, dtype=node.outputs[0].dtype)]
compile.optdb.register('local_alloc_empty_to_zeros',
in2out(local_alloc_empty_to_zeros),
# After move to gpu and merge2, before inplace.
49.3,
'alloc_empty_to_zeros',)
@register_specialize @register_specialize
@register_canonicalize @register_canonicalize
@gof.local_optimizer([T.shape]) @gof.local_optimizer([T.shape])
...@@ -3043,6 +3063,9 @@ def local_inplace_setsubtensor(node): ...@@ -3043,6 +3063,9 @@ def local_inplace_setsubtensor(node):
set_instead_of_inc=node.op.set_instead_of_inc, set_instead_of_inc=node.op.set_instead_of_inc,
destroyhandler_tolerate_aliased=dta) destroyhandler_tolerate_aliased=dta)
new_node = new_op(*node.inputs) new_node = new_op(*node.inputs)
val = getattr(node.outputs[0].tag, 'nan_guard_mode_check', True)
new_node.tag.nan_guard_mode_check = val
# Copy stacktrace from original outputs to new outputs. # Copy stacktrace from original outputs to new outputs.
# This is sensible, because the new operation is the # This is sensible, because the new operation is the
# same as the old one, but now with different attributes. # same as the old one, but now with different attributes.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论