Remove false error by NanGuardMode due to GpuAllocEmpty

bc2f6793 · Frederic · Frederic Bastien · 0c5014bf · bc2f6793 · bc2f6793
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -3680,6 +3680,7 @@ class GpuAllocEmpty(GpuOp):
        # The outut can contain nan/inf.  output.type is a new
        # instance, so we can do this only for that variable.
        output.type.filter_checks_isfinite = False
+        output.tag.nan_guard_mode_check = False
        return Apply(self, shape, [output])

    def perform(self, node, inputs, out_):

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1190,14 +1190,16 @@ def local_gpu_incsubtensor(node):
                # The IncSubtensor upcast to float32 y, so we do it
                # explicitly to move it to the GPU.
                y = y.astype('float32')
-
-            return [GpuIncSubtensor(
+            ret = GpuIncSubtensor(
                incsubt.idx_list,
                inplace=incsubt.inplace,
                set_instead_of_inc=incsubt.set_instead_of_inc)(
                    as_cuda_ndarray_variable(x),
                    as_cuda_ndarray_variable(y),
-                    *coords)]
+                    *coords)
+            ret.tag.nan_guard_mode_check = getattr(
+                host_output.tag, 'nan_guard_mode_check', True)
+            return [ret]
    # Incrementing a float32 x results in a float32
    # output even if y is float64, so we can downcast
    # y to put it on GPU
@@ -1221,10 +1223,16 @@ def local_gpu_incsubtensor(node):
                y = tensor.cast(y, 'float32')
            gpu_y = as_cuda_ndarray_variable(y)
        if go_gpu:
-            return [host_from_gpu(GpuIncSubtensor(
+            ret = GpuIncSubtensor(
                node.op.idx_list, inplace=node.op.inplace,
                set_instead_of_inc=node.op.set_instead_of_inc)(
-                    gpu_x, gpu_y, *coords))]
+                    gpu_x, gpu_y, *coords)
+
+            val = getattr(node.outputs[0].tag, 'nan_guard_mode_check', True)
+            ret.tag.nan_guard_mode_check = val
+            ret = host_from_gpu(ret)
+            ret.tag.nan_guard_mode_check = val
+            return [ret]
    return False



--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -721,6 +721,7 @@ class GpuAllocEmpty(HideC, Alloc):
        output.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
        # The outut can contain nan/inf.
        output.type.filter_checks_isfinite = False
+        output.tag.nan_guard_mode_check = False
        return Apply(self, sh, [output])

    def perform(self, node, inputs, out_, ctx):

--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -569,9 +569,13 @@ def local_gpua_subtensor(node, context_name):
 @register_opt('fast_compile')
 @op_lifter([tensor.IncSubtensor])
 def local_gpua_incsubtensor(node, context_name):
-    return GpuIncSubtensor(node.op.idx_list, node.op.inplace,
-                           node.op.set_instead_of_inc,
-                           node.op.destroyhandler_tolerate_aliased)
+    op = GpuIncSubtensor(node.op.idx_list, node.op.inplace,
+                         node.op.set_instead_of_inc,
+                         node.op.destroyhandler_tolerate_aliased)
+    ret = op(*node.inputs)
+    val = getattr(node.outputs[0].tag, 'nan_guard_mode_check', True)
+    ret.tag.nan_guard_mode_check = val
+    return ret


 @register_opt('fast_compile')

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -3076,6 +3076,9 @@ def local_inplace_setsubtensor(node):
            set_instead_of_inc=node.op.set_instead_of_inc,
            destroyhandler_tolerate_aliased=dta)
        new_node = new_op(*node.inputs)
+        val = getattr(node.outputs[0].tag, 'nan_guard_mode_check', True)
+        new_node.tag.nan_guard_mode_check = val
+
        # Copy stacktrace from original outputs to new outputs.
        # This is sensible, because the new operation is the
        # same as the old one, but now with different attributes.