Merge pull request #1222 from nouiz/fix_gpu_strides_crash

Fix gpu strides crash

Merge pull request #1222 from nouiz/fix_gpu_strides_crash
94592494 · lamblin · c4190d75 · d94978bc · 94592494 · 94592494
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -2272,9 +2272,17 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
                                       set_dim='CudaNdarray_set_dim',
                                       set_stride='CudaNdarray_set_stride',
                                       update_flags="", strides_mul=4)
+        finish_view = ""
+        #For broadcasted dimensions, set the strides to 0
+        #We can't do that only for broadcasted dimensions as this can happen for dimensions of size 0,
+        #That are rebroadcated later.
+        for idx in range(node.outputs[0].ndim):
+            finish_view += """
+            if(CudaNdarray_HOST_DIMS(xview)[%(idx)s]==1)
+            CudaNdarray_set_stride(xview, %(idx)s, 0);
+            """ % locals()
+        finish_view += """
-        finish_view = """
        //Set the base only now
        if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview),
@@ -2292,6 +2300,13 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
        return build_view + "{" + get_xview + "}" + finish_view
+    def c_code_cache_version(self):
+        hv = self.helper_c_code_cache_version()
+        # If `helper_c_code_cache_version` is not versioned we do not want to
+        # have a versioned version of this op's C code.
+        if len(hv) == 0:
+            return ()
+        return (3, hv)
 class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
    """

--- a/theano/sandbox/cuda/cuda_ndarray.cuh
+++ b/theano/sandbox/cuda/cuda_ndarray.cuh
@@ -166,7 +166,8 @@ CudaNdarray_set_dim(CudaNdarray * self, int idx, int d)
 {
    if ((idx >= self->nd) || (idx < 0) || (d < 0))
    {
-        fprintf(stderr, "WARNING: probably bad CudaNdarray_set_dim arguments: %i %i\n", idx, d);
+        fprintf(stderr, "WARNING: probably bad CudaNdarray_set_dim arguments: self->ndim=%i, idx=%i stride=%i\n",
+                self->nd, idx, d);
    }
    if (d != self->host_structure[idx])

--- a/theano/sandbox/cuda/type.py
+++ b/theano/sandbox/cuda/type.py
@@ -288,7 +288,9 @@ class CudaNdarrayType(Type):
            //std::cerr << "c_extract " << %(name)s << '\\n';
            if (%(name)s->nd != %(nd)s)
            {
-                PyErr_Format(PyExc_RuntimeError, "Some CudaNdarray has rank %%i, it was supposed to have rank %(nd)s", %(name)s->nd);
+                PyErr_Format(PyExc_RuntimeError,
+                             "c_extract: Some CudaNdarray has rank %%i, it was supposed to have rank %(nd)s",
+                             %(name)s->nd);
                %(name)s = NULL;
                %(fail)s;
            }
@@ -299,7 +301,9 @@ class CudaNdarrayType(Type):
                print >> sio, """
            if (CudaNdarray_HOST_DIMS(%(name)s)[%(i)s] != 1)
            {
-                PyErr_Format(PyExc_RuntimeError, "Some CudaNdarray has dim %%i on broadcastable dimension %%i", CudaNdarray_HOST_DIMS(%(name)s)[%(i)s], %(i)s);
+                PyErr_Format(PyExc_RuntimeError,
+                             "c_extract: Some CudaNdarray has dim %%i on broadcastable dimension %%i",
+                             CudaNdarray_HOST_DIMS(%(name)s)[%(i)s], %(i)s);
                %(name)s = NULL;
                %(fail)s;
            }
@@ -309,7 +313,9 @@ class CudaNdarrayType(Type):
            if (CudaNdarray_HOST_STRIDES(%(name)s)[%(i)s])
            {
                //std::cerr << "c_extract bad stride detected...\\n";
-                PyErr_Format(PyExc_RuntimeError, "Some CudaNdarray has a nonzero stride %%i on a broadcastable dimension %%i", CudaNdarray_HOST_STRIDES(%(name)s)[%(i)s], %(i)s);
+                PyErr_Format(PyExc_RuntimeError,
+                             "c_extract: Some CudaNdarray has a nonzero stride %%i on a broadcastable dimension %%i",
+                             CudaNdarray_HOST_STRIDES(%(name)s)[%(i)s], %(i)s);
                %(name)s = NULL;
                %(fail)s;
            }

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -3645,6 +3645,10 @@ def var(input, axis=None, keepdims=False):
    :param keepdims: If this is set to True, the axes which are reduced are
        left in the result as dimensions with size one. With this option,
        the result will broadcast correctly against the original tensor.
+    :note: It use the two-pass algorithm for more stable results.
+        https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
+        It exist other implementation that are even more stable, but probably slower.
    """
    input_ndim = input.type.ndim
@@ -3680,6 +3684,10 @@ def std(input, axis=None, keepdims=False):
        With this option,
        the result will broadcast correctly against the
        original tensor.
+    :note: It call var and var use the two-pass algorithm for more stable results.
+        https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
+        It exist other implementation that are even more stable, but probably slower.
    """
    return sqrt(var(input=input, axis=axis, keepdims=keepdims))