提交 94592494 authored 作者: lamblin's avatar lamblin

Merge pull request #1222 from nouiz/fix_gpu_strides_crash

Fix gpu strides crash
......@@ -2272,9 +2272,17 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
set_dim='CudaNdarray_set_dim',
set_stride='CudaNdarray_set_stride',
update_flags="", strides_mul=4)
finish_view = ""
#For broadcasted dimensions, set the strides to 0
#We can't do that only for broadcasted dimensions as this can happen for dimensions of size 0,
#That are rebroadcated later.
for idx in range(node.outputs[0].ndim):
finish_view += """
if(CudaNdarray_HOST_DIMS(xview)[%(idx)s]==1)
CudaNdarray_set_stride(xview, %(idx)s, 0);
""" % locals()
finish_view = """
finish_view += """
//Set the base only now
if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview),
......@@ -2292,6 +2300,13 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
return build_view + "{" + get_xview + "}" + finish_view
def c_code_cache_version(self):
hv = self.helper_c_code_cache_version()
# If `helper_c_code_cache_version` is not versioned we do not want to
# have a versioned version of this op's C code.
if len(hv) == 0:
return ()
return (3, hv)
class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
"""
......
......@@ -166,7 +166,8 @@ CudaNdarray_set_dim(CudaNdarray * self, int idx, int d)
{
if ((idx >= self->nd) || (idx < 0) || (d < 0))
{
fprintf(stderr, "WARNING: probably bad CudaNdarray_set_dim arguments: %i %i\n", idx, d);
fprintf(stderr, "WARNING: probably bad CudaNdarray_set_dim arguments: self->ndim=%i, idx=%i stride=%i\n",
self->nd, idx, d);
}
if (d != self->host_structure[idx])
......
......@@ -288,7 +288,9 @@ class CudaNdarrayType(Type):
//std::cerr << "c_extract " << %(name)s << '\\n';
if (%(name)s->nd != %(nd)s)
{
PyErr_Format(PyExc_RuntimeError, "Some CudaNdarray has rank %%i, it was supposed to have rank %(nd)s", %(name)s->nd);
PyErr_Format(PyExc_RuntimeError,
"c_extract: Some CudaNdarray has rank %%i, it was supposed to have rank %(nd)s",
%(name)s->nd);
%(name)s = NULL;
%(fail)s;
}
......@@ -299,7 +301,9 @@ class CudaNdarrayType(Type):
print >> sio, """
if (CudaNdarray_HOST_DIMS(%(name)s)[%(i)s] != 1)
{
PyErr_Format(PyExc_RuntimeError, "Some CudaNdarray has dim %%i on broadcastable dimension %%i", CudaNdarray_HOST_DIMS(%(name)s)[%(i)s], %(i)s);
PyErr_Format(PyExc_RuntimeError,
"c_extract: Some CudaNdarray has dim %%i on broadcastable dimension %%i",
CudaNdarray_HOST_DIMS(%(name)s)[%(i)s], %(i)s);
%(name)s = NULL;
%(fail)s;
}
......@@ -309,7 +313,9 @@ class CudaNdarrayType(Type):
if (CudaNdarray_HOST_STRIDES(%(name)s)[%(i)s])
{
//std::cerr << "c_extract bad stride detected...\\n";
PyErr_Format(PyExc_RuntimeError, "Some CudaNdarray has a nonzero stride %%i on a broadcastable dimension %%i", CudaNdarray_HOST_STRIDES(%(name)s)[%(i)s], %(i)s);
PyErr_Format(PyExc_RuntimeError,
"c_extract: Some CudaNdarray has a nonzero stride %%i on a broadcastable dimension %%i",
CudaNdarray_HOST_STRIDES(%(name)s)[%(i)s], %(i)s);
%(name)s = NULL;
%(fail)s;
}
......
......@@ -3645,6 +3645,10 @@ def var(input, axis=None, keepdims=False):
:param keepdims: If this is set to True, the axes which are reduced are
left in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original tensor.
:note: It use the two-pass algorithm for more stable results.
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
It exist other implementation that are even more stable, but probably slower.
"""
input_ndim = input.type.ndim
......@@ -3680,6 +3684,10 @@ def std(input, axis=None, keepdims=False):
With this option,
the result will broadcast correctly against the
original tensor.
:note: It call var and var use the two-pass algorithm for more stable results.
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
It exist other implementation that are even more stable, but probably slower.
"""
return sqrt(var(input=input, axis=axis, keepdims=keepdims))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论