Merge pull request #6057 from lamblin/fix_nomem_segfault

Fail if output memory not allocated

Merge pull request #6057 from lamblin/fix_nomem_segfault
9b044407 · Pascal Lamblin · GitHub · 9ed454d3 · fde1fdf1 · 9b044407
--- a/theano/gpuarray/subtensor.py
+++ b/theano/gpuarray/subtensor.py
@@ -107,7 +107,10 @@ class GpuSubtensor(HideC, Subtensor):
            return """
        Py_XDECREF(%(out)s);
        %(out)s = pygpu_copy(%(inp)s, GA_ANY_ORDER);
-        if (!%(out)s) { %(fail)s }
+        if (!%(out)s) {
+            // Exception already set
+            %(fail)s
+        }
 """ % dict(out=outputs[0], inp=inp, fail=sub['fail'])

        sio = StringIO()
@@ -175,7 +178,7 @@ class GpuSubtensor(HideC, Subtensor):
        return sio.getvalue()

    def c_code_cache_version(self):
-        return (6,)
+        return (8,)


 class GpuIncSubtensor(IncSubtensor):
@@ -732,8 +735,10 @@ class GpuAdvancedIncSubtensor1(Op):
        num_indices = PyArray_SIZE(%(ind)s);
        if (!%(inplace)s) {
          %(out)s = theano_try_copy(%(out)s, %(x)s);
-          if (%(out)s == NULL)
+          if (%(out)s == NULL) {
+            // Exception already set
            %(fail)s
+            }
        } else {
          Py_XDECREF(%(out)s);
          %(out)s = %(x)s;
@@ -789,7 +794,7 @@ class GpuAdvancedIncSubtensor1(Op):
                   set_instead_of_inc=int(self.set_instead_of_inc))

    def c_code_cache_version(self):
-        return (1,)
+        return (3,)


 class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
@@ -839,7 +844,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
        return super(GpuAdvancedIncSubtensor1_dev20, self).perform(node, inp, out)

    def c_code_cache_version(self):
-        return (9,)
+        return (12,)

    def c_headers(self):
        return ['<numpy_compat.h>', '<gpuarray_helper.h>',
@@ -874,6 +879,7 @@ if (%(inplace)s) {
  %(out)s = theano_try_copy(%(out)s, %(x)s);
 }
 if (!%(out)s) {
+  // Exception already set
  %(fail)s
 }
 if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(set_instead_of_inc)s)) {
@@ -900,8 +906,9 @@ if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(set_instead_of_inc)s)) {
        code = """
 /*
 * This is an atomicAdd that works for doubles since that is not provided
- * natively by cuda.
+ * natively by cuda before arch 6.0.
 */
+#if __CUDA_ARCH__ < 600
 __device__ ga_double atomicAdd(ga_double* address, ga_double val) {
    unsigned long long int* address_as_ull =
                                          (unsigned long long int*)address;
@@ -914,6 +921,7 @@ __device__ ga_double atomicAdd(ga_double* address, ga_double val) {
    } while (assumed != old);
    return __longlong_as_double(old);
 }
+#endif

 __device__ ga_double atomicExch(ga_double *address, ga_double val) {
    return atomicExch((unsigned long long int *)address,

--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
@@ -1408,6 +1408,10 @@ class IncSubtensor(Op):
        {
            Py_XDECREF(%(z)s);
            %(z)s = %(copy_of_x)s;
+            if (!%(z)s) {
+                // Exception already set
+                %(fail)s
+            }
        }
        """ % locals()

@@ -1458,10 +1462,12 @@ class IncSubtensor(Op):
        """ % locals()
        return (self.decl_view() +
                copy_input_if_necessary +
+                "{" +
                get_zview +
                build_view +
                make_modification +
-                "Py_DECREF(zview);"
+                "Py_DECREF(zview);" +
+                "}"
                )

    def do_type_checking(self, node):
@@ -1477,7 +1483,7 @@ class IncSubtensor(Op):
    def c_code_cache_version(self):
        hv = Subtensor.helper_c_code_cache_version()
        if hv:
-            return (1, hv)
+            return (3, hv)
        else:
            return ()

@@ -1972,6 +1978,10 @@ class AdvancedIncSubtensor1(Op):
        {
            Py_XDECREF(%(out)s);
            %(out)s = %(copy_of_x)s;
+            if (!%(out)s) {
+                // Exception already set
+                %(fail)s
+            }
        }
        if (inplace_increment(%(out)s, (PyObject *)%(idx)s, %(y)s, %(inc_or_set)d)) {
            %(fail)s;
@@ -1980,7 +1990,7 @@ class AdvancedIncSubtensor1(Op):
        """ % locals()

    def c_code_cache_version(self):
-        return (4,)
+        return (6,)

    def perform(self, node, inp, out_):
        # TODO opt to make this inplace