提交 60c3a741 authored 作者: Frederic's avatar Frederic

Fix crash with GpuAdvancedIncSubtensor1 when y is broadcasted

上级 2bfe3c82
...@@ -2609,11 +2609,17 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp): ...@@ -2609,11 +2609,17 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
# CudaNdarray __setitem__ doesn't do broadcast nor support # CudaNdarray __setitem__ doesn't do broadcast nor support
# list of index. # list of index.
if y.ndim == x.ndim: if y.ndim == x.ndim:
assert len(y) == len(idx) if len(y) == 1:
j = 0 # Allow broadcasting of y[0]
for i in idx: y_0 = y[0]
x[i] = y[j] for i in idx:
j += 1 x[i] = y_0
else:
assert len(y) == len(idx)
j = 0
for i in idx:
x[i] = y[j]
j += 1
else: else:
for i in idx: for i in idx:
x[i] = y x[i] = y
...@@ -2639,7 +2645,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp): ...@@ -2639,7 +2645,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
out[0] = x out[0] = x
def c_code_cache_version(self): def c_code_cache_version(self):
return (4,) return (5,)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
if (self.set_instead_of_inc) or \ if (self.set_instead_of_inc) or \
...@@ -2659,6 +2665,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp): ...@@ -2659,6 +2665,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
dtype_%(ind)s *p_index; dtype_%(ind)s *p_index;
int num_indices, j; int num_indices, j;
int ret; int ret;
int broadcast_y;
num_indices = PyArray_SIZE(%(ind)s); num_indices = PyArray_SIZE(%(ind)s);
if ((num_indices - 1) > LONG_MAX) { if ((num_indices - 1) > LONG_MAX) {
...@@ -2674,7 +2681,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp): ...@@ -2674,7 +2681,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
%(out)s = %(x)s; %(out)s = %(x)s;
Py_XINCREF(%(out)s); Py_XINCREF(%(out)s);
} }
broadcast_y = CudaNdarray_DIMS(%(y)s)[0] == 1;
for (j = 0;j < num_indices; j++) { for (j = 0;j < num_indices; j++) {
p_index = (dtype_%(ind)s *)PyArray_GETPTR1(%(ind)s, j); p_index = (dtype_%(ind)s *)PyArray_GETPTR1(%(ind)s, j);
...@@ -2690,11 +2697,20 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp): ...@@ -2690,11 +2697,20 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
%(fail)s; %(fail)s;
} }
y_rowind_obj = PyInt_FromLong(j);
row_x = CudaNdarray_Subscript((PyObject*)%(out)s, x_rowind_obj); row_x = CudaNdarray_Subscript((PyObject*)%(out)s, x_rowind_obj);
if (row_x == NULL) {
Py_XDECREF(row_x);
Py_XDECREF(x_rowind_obj);
%(fail)s;
}
if (broadcast_y) {
y_rowind_obj = PyInt_FromLong(0);
} else {
y_rowind_obj = PyInt_FromLong(j);
}
row_y = CudaNdarray_Subscript(py_%(y)s, y_rowind_obj); row_y = CudaNdarray_Subscript(py_%(y)s, y_rowind_obj);
if ((row_x == NULL) || (row_y == NULL)) { if (row_y == NULL) {
Py_XDECREF(row_y); Py_XDECREF(row_y);
Py_XDECREF(row_x); Py_XDECREF(row_x);
Py_XDECREF(y_rowind_obj); Py_XDECREF(y_rowind_obj);
......
...@@ -2277,7 +2277,9 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key) ...@@ -2277,7 +2277,9 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
} }
else else
{ {
PyErr_SetString(PyExc_IndexError, "index out of bounds"); PyErr_Format(PyExc_IndexError,
"index out of bounds. Asked %d, but size of %d",
d_idx, d_dim);
return NULL; return NULL;
} }
...@@ -2449,7 +2451,9 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key) ...@@ -2449,7 +2451,9 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
} }
else else
{ {
PyErr_SetString(PyExc_IndexError, "index out of bounds"); PyErr_Format(PyExc_IndexError,
"index out of bounds. Asked %d for dimensions %d, but size of %d",
d_idx, d, d_dim);
Py_DECREF(rval); Py_DECREF(rval);
return NULL; return NULL;
} }
......
...@@ -914,6 +914,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -914,6 +914,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
data_num_init = data_num_init.reshape(data_shape) data_num_init = data_num_init.reshape(data_shape)
inc_shapes = [data_shape[i:] inc_shapes = [data_shape[i:]
for i in xrange(0, len(data_shape) + 1)] for i in xrange(0, len(data_shape) + 1)]
# Test broadcasting of y.
inc_shapes += [(1,) + inc_shapes[-1][1:]]
for inc_shape in inc_shapes: for inc_shape in inc_shapes:
inc_n_dims = len(inc_shape) inc_n_dims = len(inc_shape)
# We copy the numeric value to be 100% sure there is no # We copy the numeric value to be 100% sure there is no
...@@ -928,6 +930,11 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -928,6 +930,11 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
# Symbolic variable with rows to be incremented. # Symbolic variable with rows to be incremented.
idx_var = theano.tensor.vector(dtype='int64') idx_var = theano.tensor.vector(dtype='int64')
n_to_inc = rng.randint(data_shape[0]) n_to_inc = rng.randint(data_shape[0])
if (n_to_inc == 1 and
len(inc_shape) > 0 and
inc_shape[0] == 1 and
data_shape[0] > 1):
n_to_inc = 2
# Corresponding numeric variable. # Corresponding numeric variable.
idx_num = rng.randint(0, data_shape[0], n_to_inc) idx_num = rng.randint(0, data_shape[0], n_to_inc)
idx_num = idx_num.astype('int64') idx_num = idx_num.astype('int64')
...@@ -939,7 +946,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -939,7 +946,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
# `data_shape`: what we actually want is the first # `data_shape`: what we actually want is the first
# shape element to be equal to the number of rows to # shape element to be equal to the number of rows to
# increment. # increment.
if len(inc_shape) == len(data_shape): if len(inc_shape) == len(data_shape) and (
len(inc_shapes) == 0 or inc_shape[0] != 1):
inc_shape = (n_to_inc,) + inc_shape[1:] inc_shape = (n_to_inc,) + inc_shape[1:]
inc_size = numpy.product(inc_shape) inc_size = numpy.product(inc_shape)
# Corresponding numeric variable. # Corresponding numeric variable.
...@@ -958,11 +966,19 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -958,11 +966,19 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
data_copy = data_num.copy() data_copy = data_num.copy()
for j, idx in enumerate(idx_num): for j, idx in enumerate(idx_num):
if len(inc_shape) == len(data_shape): if len(inc_shape) == len(data_shape):
# Special case where there is no broadcasting. if inc_shape[0] == 1:
if set_instead_of_inc: # Allow broadcasting of y[0]
data_copy[idx] = inc_num[j] inc_num0 = inc_num[0]
if set_instead_of_inc:
data_copy[idx] = inc_num0
else:
data_copy[idx] += inc_num0
else: else:
data_copy[idx] += inc_num[j] # Special case where there is no broadcasting.
if set_instead_of_inc:
data_copy[idx] = inc_num[j]
else:
data_copy[idx] += inc_num[j]
else: else:
if set_instead_of_inc: if set_instead_of_inc:
data_copy[idx] = inc_num data_copy[idx] = inc_num
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论