提交 9b044407 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #6057 from lamblin/fix_nomem_segfault

Fail if output memory not allocated
...@@ -107,7 +107,10 @@ class GpuSubtensor(HideC, Subtensor): ...@@ -107,7 +107,10 @@ class GpuSubtensor(HideC, Subtensor):
return """ return """
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
%(out)s = pygpu_copy(%(inp)s, GA_ANY_ORDER); %(out)s = pygpu_copy(%(inp)s, GA_ANY_ORDER);
if (!%(out)s) { %(fail)s } if (!%(out)s) {
// Exception already set
%(fail)s
}
""" % dict(out=outputs[0], inp=inp, fail=sub['fail']) """ % dict(out=outputs[0], inp=inp, fail=sub['fail'])
sio = StringIO() sio = StringIO()
...@@ -175,7 +178,7 @@ class GpuSubtensor(HideC, Subtensor): ...@@ -175,7 +178,7 @@ class GpuSubtensor(HideC, Subtensor):
return sio.getvalue() return sio.getvalue()
def c_code_cache_version(self): def c_code_cache_version(self):
return (6,) return (8,)
class GpuIncSubtensor(IncSubtensor): class GpuIncSubtensor(IncSubtensor):
...@@ -732,8 +735,10 @@ class GpuAdvancedIncSubtensor1(Op): ...@@ -732,8 +735,10 @@ class GpuAdvancedIncSubtensor1(Op):
num_indices = PyArray_SIZE(%(ind)s); num_indices = PyArray_SIZE(%(ind)s);
if (!%(inplace)s) { if (!%(inplace)s) {
%(out)s = theano_try_copy(%(out)s, %(x)s); %(out)s = theano_try_copy(%(out)s, %(x)s);
if (%(out)s == NULL) if (%(out)s == NULL) {
// Exception already set
%(fail)s %(fail)s
}
} else { } else {
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
%(out)s = %(x)s; %(out)s = %(x)s;
...@@ -789,7 +794,7 @@ class GpuAdvancedIncSubtensor1(Op): ...@@ -789,7 +794,7 @@ class GpuAdvancedIncSubtensor1(Op):
set_instead_of_inc=int(self.set_instead_of_inc)) set_instead_of_inc=int(self.set_instead_of_inc))
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (3,)
class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC, class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
...@@ -839,7 +844,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC, ...@@ -839,7 +844,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
return super(GpuAdvancedIncSubtensor1_dev20, self).perform(node, inp, out) return super(GpuAdvancedIncSubtensor1_dev20, self).perform(node, inp, out)
def c_code_cache_version(self): def c_code_cache_version(self):
return (9,) return (12,)
def c_headers(self): def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray_helper.h>', return ['<numpy_compat.h>', '<gpuarray_helper.h>',
...@@ -874,6 +879,7 @@ if (%(inplace)s) { ...@@ -874,6 +879,7 @@ if (%(inplace)s) {
%(out)s = theano_try_copy(%(out)s, %(x)s); %(out)s = theano_try_copy(%(out)s, %(x)s);
} }
if (!%(out)s) { if (!%(out)s) {
// Exception already set
%(fail)s %(fail)s
} }
if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(set_instead_of_inc)s)) { if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(set_instead_of_inc)s)) {
...@@ -900,8 +906,9 @@ if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(set_instead_of_inc)s)) { ...@@ -900,8 +906,9 @@ if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(set_instead_of_inc)s)) {
code = """ code = """
/* /*
* This is an atomicAdd that works for doubles since that is not provided * This is an atomicAdd that works for doubles since that is not provided
* natively by cuda. * natively by cuda before arch 6.0.
*/ */
#if __CUDA_ARCH__ < 600
__device__ ga_double atomicAdd(ga_double* address, ga_double val) { __device__ ga_double atomicAdd(ga_double* address, ga_double val) {
unsigned long long int* address_as_ull = unsigned long long int* address_as_ull =
(unsigned long long int*)address; (unsigned long long int*)address;
...@@ -914,6 +921,7 @@ __device__ ga_double atomicAdd(ga_double* address, ga_double val) { ...@@ -914,6 +921,7 @@ __device__ ga_double atomicAdd(ga_double* address, ga_double val) {
} while (assumed != old); } while (assumed != old);
return __longlong_as_double(old); return __longlong_as_double(old);
} }
#endif
__device__ ga_double atomicExch(ga_double *address, ga_double val) { __device__ ga_double atomicExch(ga_double *address, ga_double val) {
return atomicExch((unsigned long long int *)address, return atomicExch((unsigned long long int *)address,
......
...@@ -1408,6 +1408,10 @@ class IncSubtensor(Op): ...@@ -1408,6 +1408,10 @@ class IncSubtensor(Op):
{ {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = %(copy_of_x)s; %(z)s = %(copy_of_x)s;
if (!%(z)s) {
// Exception already set
%(fail)s
}
} }
""" % locals() """ % locals()
...@@ -1458,10 +1462,12 @@ class IncSubtensor(Op): ...@@ -1458,10 +1462,12 @@ class IncSubtensor(Op):
""" % locals() """ % locals()
return (self.decl_view() + return (self.decl_view() +
copy_input_if_necessary + copy_input_if_necessary +
"{" +
get_zview + get_zview +
build_view + build_view +
make_modification + make_modification +
"Py_DECREF(zview);" "Py_DECREF(zview);" +
"}"
) )
def do_type_checking(self, node): def do_type_checking(self, node):
...@@ -1477,7 +1483,7 @@ class IncSubtensor(Op): ...@@ -1477,7 +1483,7 @@ class IncSubtensor(Op):
def c_code_cache_version(self): def c_code_cache_version(self):
hv = Subtensor.helper_c_code_cache_version() hv = Subtensor.helper_c_code_cache_version()
if hv: if hv:
return (1, hv) return (3, hv)
else: else:
return () return ()
...@@ -1972,6 +1978,10 @@ class AdvancedIncSubtensor1(Op): ...@@ -1972,6 +1978,10 @@ class AdvancedIncSubtensor1(Op):
{ {
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
%(out)s = %(copy_of_x)s; %(out)s = %(copy_of_x)s;
if (!%(out)s) {
// Exception already set
%(fail)s
}
} }
if (inplace_increment(%(out)s, (PyObject *)%(idx)s, %(y)s, %(inc_or_set)d)) { if (inplace_increment(%(out)s, (PyObject *)%(idx)s, %(y)s, %(inc_or_set)d)) {
%(fail)s; %(fail)s;
...@@ -1980,7 +1990,7 @@ class AdvancedIncSubtensor1(Op): ...@@ -1980,7 +1990,7 @@ class AdvancedIncSubtensor1(Op):
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (4,) return (6,)
def perform(self, node, inp, out_): def perform(self, node, inp, out_):
# TODO opt to make this inplace # TODO opt to make this inplace
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论