提交 bc2f6793 authored 作者: Frederic's avatar Frederic 提交者: Frederic Bastien

Remove false error by NanGuardMode due to GpuAllocEmpty

上级 0c5014bf
...@@ -3680,6 +3680,7 @@ class GpuAllocEmpty(GpuOp): ...@@ -3680,6 +3680,7 @@ class GpuAllocEmpty(GpuOp):
# The outut can contain nan/inf. output.type is a new # The outut can contain nan/inf. output.type is a new
# instance, so we can do this only for that variable. # instance, so we can do this only for that variable.
output.type.filter_checks_isfinite = False output.type.filter_checks_isfinite = False
output.tag.nan_guard_mode_check = False
return Apply(self, shape, [output]) return Apply(self, shape, [output])
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_):
......
...@@ -1190,14 +1190,16 @@ def local_gpu_incsubtensor(node): ...@@ -1190,14 +1190,16 @@ def local_gpu_incsubtensor(node):
# The IncSubtensor upcast to float32 y, so we do it # The IncSubtensor upcast to float32 y, so we do it
# explicitly to move it to the GPU. # explicitly to move it to the GPU.
y = y.astype('float32') y = y.astype('float32')
ret = GpuIncSubtensor(
return [GpuIncSubtensor(
incsubt.idx_list, incsubt.idx_list,
inplace=incsubt.inplace, inplace=incsubt.inplace,
set_instead_of_inc=incsubt.set_instead_of_inc)( set_instead_of_inc=incsubt.set_instead_of_inc)(
as_cuda_ndarray_variable(x), as_cuda_ndarray_variable(x),
as_cuda_ndarray_variable(y), as_cuda_ndarray_variable(y),
*coords)] *coords)
ret.tag.nan_guard_mode_check = getattr(
host_output.tag, 'nan_guard_mode_check', True)
return [ret]
# Incrementing a float32 x results in a float32 # Incrementing a float32 x results in a float32
# output even if y is float64, so we can downcast # output even if y is float64, so we can downcast
# y to put it on GPU # y to put it on GPU
...@@ -1221,10 +1223,16 @@ def local_gpu_incsubtensor(node): ...@@ -1221,10 +1223,16 @@ def local_gpu_incsubtensor(node):
y = tensor.cast(y, 'float32') y = tensor.cast(y, 'float32')
gpu_y = as_cuda_ndarray_variable(y) gpu_y = as_cuda_ndarray_variable(y)
if go_gpu: if go_gpu:
return [host_from_gpu(GpuIncSubtensor( ret = GpuIncSubtensor(
node.op.idx_list, inplace=node.op.inplace, node.op.idx_list, inplace=node.op.inplace,
set_instead_of_inc=node.op.set_instead_of_inc)( set_instead_of_inc=node.op.set_instead_of_inc)(
gpu_x, gpu_y, *coords))] gpu_x, gpu_y, *coords)
val = getattr(node.outputs[0].tag, 'nan_guard_mode_check', True)
ret.tag.nan_guard_mode_check = val
ret = host_from_gpu(ret)
ret.tag.nan_guard_mode_check = val
return [ret]
return False return False
......
...@@ -721,6 +721,7 @@ class GpuAllocEmpty(HideC, Alloc): ...@@ -721,6 +721,7 @@ class GpuAllocEmpty(HideC, Alloc):
output.tag.values_eq_approx = tensor.type.values_eq_approx_always_true output.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
# The outut can contain nan/inf. # The outut can contain nan/inf.
output.type.filter_checks_isfinite = False output.type.filter_checks_isfinite = False
output.tag.nan_guard_mode_check = False
return Apply(self, sh, [output]) return Apply(self, sh, [output])
def perform(self, node, inputs, out_, ctx): def perform(self, node, inputs, out_, ctx):
......
...@@ -569,9 +569,13 @@ def local_gpua_subtensor(node, context_name): ...@@ -569,9 +569,13 @@ def local_gpua_subtensor(node, context_name):
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.IncSubtensor]) @op_lifter([tensor.IncSubtensor])
def local_gpua_incsubtensor(node, context_name): def local_gpua_incsubtensor(node, context_name):
return GpuIncSubtensor(node.op.idx_list, node.op.inplace, op = GpuIncSubtensor(node.op.idx_list, node.op.inplace,
node.op.set_instead_of_inc, node.op.set_instead_of_inc,
node.op.destroyhandler_tolerate_aliased) node.op.destroyhandler_tolerate_aliased)
ret = op(*node.inputs)
val = getattr(node.outputs[0].tag, 'nan_guard_mode_check', True)
ret.tag.nan_guard_mode_check = val
return ret
@register_opt('fast_compile') @register_opt('fast_compile')
......
...@@ -3076,6 +3076,9 @@ def local_inplace_setsubtensor(node): ...@@ -3076,6 +3076,9 @@ def local_inplace_setsubtensor(node):
set_instead_of_inc=node.op.set_instead_of_inc, set_instead_of_inc=node.op.set_instead_of_inc,
destroyhandler_tolerate_aliased=dta) destroyhandler_tolerate_aliased=dta)
new_node = new_op(*node.inputs) new_node = new_op(*node.inputs)
val = getattr(node.outputs[0].tag, 'nan_guard_mode_check', True)
new_node.tag.nan_guard_mode_check = val
# Copy stacktrace from original outputs to new outputs. # Copy stacktrace from original outputs to new outputs.
# This is sensible, because the new operation is the # This is sensible, because the new operation is the
# same as the old one, but now with different attributes. # same as the old one, but now with different attributes.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论