提交 94592494 authored 作者: lamblin's avatar lamblin

Merge pull request #1222 from nouiz/fix_gpu_strides_crash

Fix gpu strides crash
...@@ -2272,9 +2272,17 @@ class GpuSubtensor(GpuOp, tensor.Subtensor): ...@@ -2272,9 +2272,17 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
set_dim='CudaNdarray_set_dim', set_dim='CudaNdarray_set_dim',
set_stride='CudaNdarray_set_stride', set_stride='CudaNdarray_set_stride',
update_flags="", strides_mul=4) update_flags="", strides_mul=4)
finish_view = ""
#For broadcasted dimensions, set the strides to 0
#We can't do that only for broadcasted dimensions as this can happen for dimensions of size 0,
#That are rebroadcated later.
for idx in range(node.outputs[0].ndim):
finish_view += """
if(CudaNdarray_HOST_DIMS(xview)[%(idx)s]==1)
CudaNdarray_set_stride(xview, %(idx)s, 0);
""" % locals()
finish_view += """
finish_view = """
//Set the base only now //Set the base only now
if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview), if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview),
...@@ -2292,6 +2300,13 @@ class GpuSubtensor(GpuOp, tensor.Subtensor): ...@@ -2292,6 +2300,13 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
return build_view + "{" + get_xview + "}" + finish_view return build_view + "{" + get_xview + "}" + finish_view
def c_code_cache_version(self):
hv = self.helper_c_code_cache_version()
# If `helper_c_code_cache_version` is not versioned we do not want to
# have a versioned version of this op's C code.
if len(hv) == 0:
return ()
return (3, hv)
class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp): class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
""" """
......
...@@ -166,7 +166,8 @@ CudaNdarray_set_dim(CudaNdarray * self, int idx, int d) ...@@ -166,7 +166,8 @@ CudaNdarray_set_dim(CudaNdarray * self, int idx, int d)
{ {
if ((idx >= self->nd) || (idx < 0) || (d < 0)) if ((idx >= self->nd) || (idx < 0) || (d < 0))
{ {
fprintf(stderr, "WARNING: probably bad CudaNdarray_set_dim arguments: %i %i\n", idx, d); fprintf(stderr, "WARNING: probably bad CudaNdarray_set_dim arguments: self->ndim=%i, idx=%i stride=%i\n",
self->nd, idx, d);
} }
if (d != self->host_structure[idx]) if (d != self->host_structure[idx])
......
...@@ -288,7 +288,9 @@ class CudaNdarrayType(Type): ...@@ -288,7 +288,9 @@ class CudaNdarrayType(Type):
//std::cerr << "c_extract " << %(name)s << '\\n'; //std::cerr << "c_extract " << %(name)s << '\\n';
if (%(name)s->nd != %(nd)s) if (%(name)s->nd != %(nd)s)
{ {
PyErr_Format(PyExc_RuntimeError, "Some CudaNdarray has rank %%i, it was supposed to have rank %(nd)s", %(name)s->nd); PyErr_Format(PyExc_RuntimeError,
"c_extract: Some CudaNdarray has rank %%i, it was supposed to have rank %(nd)s",
%(name)s->nd);
%(name)s = NULL; %(name)s = NULL;
%(fail)s; %(fail)s;
} }
...@@ -299,7 +301,9 @@ class CudaNdarrayType(Type): ...@@ -299,7 +301,9 @@ class CudaNdarrayType(Type):
print >> sio, """ print >> sio, """
if (CudaNdarray_HOST_DIMS(%(name)s)[%(i)s] != 1) if (CudaNdarray_HOST_DIMS(%(name)s)[%(i)s] != 1)
{ {
PyErr_Format(PyExc_RuntimeError, "Some CudaNdarray has dim %%i on broadcastable dimension %%i", CudaNdarray_HOST_DIMS(%(name)s)[%(i)s], %(i)s); PyErr_Format(PyExc_RuntimeError,
"c_extract: Some CudaNdarray has dim %%i on broadcastable dimension %%i",
CudaNdarray_HOST_DIMS(%(name)s)[%(i)s], %(i)s);
%(name)s = NULL; %(name)s = NULL;
%(fail)s; %(fail)s;
} }
...@@ -309,7 +313,9 @@ class CudaNdarrayType(Type): ...@@ -309,7 +313,9 @@ class CudaNdarrayType(Type):
if (CudaNdarray_HOST_STRIDES(%(name)s)[%(i)s]) if (CudaNdarray_HOST_STRIDES(%(name)s)[%(i)s])
{ {
//std::cerr << "c_extract bad stride detected...\\n"; //std::cerr << "c_extract bad stride detected...\\n";
PyErr_Format(PyExc_RuntimeError, "Some CudaNdarray has a nonzero stride %%i on a broadcastable dimension %%i", CudaNdarray_HOST_STRIDES(%(name)s)[%(i)s], %(i)s); PyErr_Format(PyExc_RuntimeError,
"c_extract: Some CudaNdarray has a nonzero stride %%i on a broadcastable dimension %%i",
CudaNdarray_HOST_STRIDES(%(name)s)[%(i)s], %(i)s);
%(name)s = NULL; %(name)s = NULL;
%(fail)s; %(fail)s;
} }
......
...@@ -3645,6 +3645,10 @@ def var(input, axis=None, keepdims=False): ...@@ -3645,6 +3645,10 @@ def var(input, axis=None, keepdims=False):
:param keepdims: If this is set to True, the axes which are reduced are :param keepdims: If this is set to True, the axes which are reduced are
left in the result as dimensions with size one. With this option, left in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original tensor. the result will broadcast correctly against the original tensor.
:note: It use the two-pass algorithm for more stable results.
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
It exist other implementation that are even more stable, but probably slower.
""" """
input_ndim = input.type.ndim input_ndim = input.type.ndim
...@@ -3680,6 +3684,10 @@ def std(input, axis=None, keepdims=False): ...@@ -3680,6 +3684,10 @@ def std(input, axis=None, keepdims=False):
With this option, With this option,
the result will broadcast correctly against the the result will broadcast correctly against the
original tensor. original tensor.
:note: It call var and var use the two-pass algorithm for more stable results.
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
It exist other implementation that are even more stable, but probably slower.
""" """
return sqrt(var(input=input, axis=axis, keepdims=keepdims)) return sqrt(var(input=input, axis=axis, keepdims=keepdims))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论