提交 bc2f6793 authored 作者: Frederic's avatar Frederic 提交者: Frederic Bastien

Remove false error by NanGuardMode due to GpuAllocEmpty

上级 0c5014bf
......@@ -3680,6 +3680,7 @@ class GpuAllocEmpty(GpuOp):
# The outut can contain nan/inf. output.type is a new
# instance, so we can do this only for that variable.
output.type.filter_checks_isfinite = False
output.tag.nan_guard_mode_check = False
return Apply(self, shape, [output])
def perform(self, node, inputs, out_):
......
......@@ -1190,14 +1190,16 @@ def local_gpu_incsubtensor(node):
# The IncSubtensor upcast to float32 y, so we do it
# explicitly to move it to the GPU.
y = y.astype('float32')
return [GpuIncSubtensor(
ret = GpuIncSubtensor(
incsubt.idx_list,
inplace=incsubt.inplace,
set_instead_of_inc=incsubt.set_instead_of_inc)(
as_cuda_ndarray_variable(x),
as_cuda_ndarray_variable(y),
*coords)]
*coords)
ret.tag.nan_guard_mode_check = getattr(
host_output.tag, 'nan_guard_mode_check', True)
return [ret]
# Incrementing a float32 x results in a float32
# output even if y is float64, so we can downcast
# y to put it on GPU
......@@ -1221,10 +1223,16 @@ def local_gpu_incsubtensor(node):
y = tensor.cast(y, 'float32')
gpu_y = as_cuda_ndarray_variable(y)
if go_gpu:
return [host_from_gpu(GpuIncSubtensor(
ret = GpuIncSubtensor(
node.op.idx_list, inplace=node.op.inplace,
set_instead_of_inc=node.op.set_instead_of_inc)(
gpu_x, gpu_y, *coords))]
gpu_x, gpu_y, *coords)
val = getattr(node.outputs[0].tag, 'nan_guard_mode_check', True)
ret.tag.nan_guard_mode_check = val
ret = host_from_gpu(ret)
ret.tag.nan_guard_mode_check = val
return [ret]
return False
......
......@@ -721,6 +721,7 @@ class GpuAllocEmpty(HideC, Alloc):
output.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
# The outut can contain nan/inf.
output.type.filter_checks_isfinite = False
output.tag.nan_guard_mode_check = False
return Apply(self, sh, [output])
def perform(self, node, inputs, out_, ctx):
......
......@@ -569,9 +569,13 @@ def local_gpua_subtensor(node, context_name):
@register_opt('fast_compile')
@op_lifter([tensor.IncSubtensor])
def local_gpua_incsubtensor(node, context_name):
return GpuIncSubtensor(node.op.idx_list, node.op.inplace,
node.op.set_instead_of_inc,
node.op.destroyhandler_tolerate_aliased)
op = GpuIncSubtensor(node.op.idx_list, node.op.inplace,
node.op.set_instead_of_inc,
node.op.destroyhandler_tolerate_aliased)
ret = op(*node.inputs)
val = getattr(node.outputs[0].tag, 'nan_guard_mode_check', True)
ret.tag.nan_guard_mode_check = val
return ret
@register_opt('fast_compile')
......
......@@ -3076,6 +3076,9 @@ def local_inplace_setsubtensor(node):
set_instead_of_inc=node.op.set_instead_of_inc,
destroyhandler_tolerate_aliased=dta)
new_node = new_op(*node.inputs)
val = getattr(node.outputs[0].tag, 'nan_guard_mode_check', True)
new_node.tag.nan_guard_mode_check = val
# Copy stacktrace from original outputs to new outputs.
# This is sensible, because the new operation is the
# same as the old one, but now with different attributes.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论