提交 9b044407 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #6057 from lamblin/fix_nomem_segfault

Fail if output memory not allocated
......@@ -107,7 +107,10 @@ class GpuSubtensor(HideC, Subtensor):
return """
Py_XDECREF(%(out)s);
%(out)s = pygpu_copy(%(inp)s, GA_ANY_ORDER);
if (!%(out)s) { %(fail)s }
if (!%(out)s) {
// Exception already set
%(fail)s
}
""" % dict(out=outputs[0], inp=inp, fail=sub['fail'])
sio = StringIO()
......@@ -175,7 +178,7 @@ class GpuSubtensor(HideC, Subtensor):
return sio.getvalue()
def c_code_cache_version(self):
return (6,)
return (8,)
class GpuIncSubtensor(IncSubtensor):
......@@ -732,8 +735,10 @@ class GpuAdvancedIncSubtensor1(Op):
num_indices = PyArray_SIZE(%(ind)s);
if (!%(inplace)s) {
%(out)s = theano_try_copy(%(out)s, %(x)s);
if (%(out)s == NULL)
if (%(out)s == NULL) {
// Exception already set
%(fail)s
}
} else {
Py_XDECREF(%(out)s);
%(out)s = %(x)s;
......@@ -789,7 +794,7 @@ class GpuAdvancedIncSubtensor1(Op):
set_instead_of_inc=int(self.set_instead_of_inc))
def c_code_cache_version(self):
return (1,)
return (3,)
class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
......@@ -839,7 +844,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
return super(GpuAdvancedIncSubtensor1_dev20, self).perform(node, inp, out)
def c_code_cache_version(self):
return (9,)
return (12,)
def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray_helper.h>',
......@@ -874,6 +879,7 @@ if (%(inplace)s) {
%(out)s = theano_try_copy(%(out)s, %(x)s);
}
if (!%(out)s) {
// Exception already set
%(fail)s
}
if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(set_instead_of_inc)s)) {
......@@ -900,8 +906,9 @@ if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(set_instead_of_inc)s)) {
code = """
/*
* This is an atomicAdd that works for doubles since that is not provided
* natively by cuda.
* natively by cuda before arch 6.0.
*/
#if __CUDA_ARCH__ < 600
__device__ ga_double atomicAdd(ga_double* address, ga_double val) {
unsigned long long int* address_as_ull =
(unsigned long long int*)address;
......@@ -914,6 +921,7 @@ __device__ ga_double atomicAdd(ga_double* address, ga_double val) {
} while (assumed != old);
return __longlong_as_double(old);
}
#endif
__device__ ga_double atomicExch(ga_double *address, ga_double val) {
return atomicExch((unsigned long long int *)address,
......
......@@ -1408,6 +1408,10 @@ class IncSubtensor(Op):
{
Py_XDECREF(%(z)s);
%(z)s = %(copy_of_x)s;
if (!%(z)s) {
// Exception already set
%(fail)s
}
}
""" % locals()
......@@ -1458,10 +1462,12 @@ class IncSubtensor(Op):
""" % locals()
return (self.decl_view() +
copy_input_if_necessary +
"{" +
get_zview +
build_view +
make_modification +
"Py_DECREF(zview);"
"Py_DECREF(zview);" +
"}"
)
def do_type_checking(self, node):
......@@ -1477,7 +1483,7 @@ class IncSubtensor(Op):
def c_code_cache_version(self):
hv = Subtensor.helper_c_code_cache_version()
if hv:
return (1, hv)
return (3, hv)
else:
return ()
......@@ -1972,6 +1978,10 @@ class AdvancedIncSubtensor1(Op):
{
Py_XDECREF(%(out)s);
%(out)s = %(copy_of_x)s;
if (!%(out)s) {
// Exception already set
%(fail)s
}
}
if (inplace_increment(%(out)s, (PyObject *)%(idx)s, %(y)s, %(inc_or_set)d)) {
%(fail)s;
......@@ -1980,7 +1990,7 @@ class AdvancedIncSubtensor1(Op):
""" % locals()
def c_code_cache_version(self):
return (4,)
return (6,)
def perform(self, node, inp, out_):
# TODO opt to make this inplace
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论