提交 c19d6714 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add support for no indices in advincsub1 for gpuarrray.

上级 e23c1310
...@@ -608,11 +608,6 @@ class GpuAdvancedIncSubtensor1(Op): ...@@ -608,11 +608,6 @@ class GpuAdvancedIncSubtensor1(Op):
} }
step[0] = 0; step[0] = 0;
num_indices = PyArray_SIZE(%(ind)s); num_indices = PyArray_SIZE(%(ind)s);
if ((num_indices - 1) > LONG_MAX) {
PyErr_Format(PyExc_AssertionError,
"num_indices %%lld exceeds LONG_MAX + 1", (long long)num_indices);
%(fail)s
}
if (!%(inplace)s) { if (!%(inplace)s) {
%(out)s = theano_try_copy(%(out)s, %(x)s); %(out)s = theano_try_copy(%(out)s, %(x)s);
if (%(out)s == NULL) if (%(out)s == NULL)
...@@ -622,42 +617,49 @@ class GpuAdvancedIncSubtensor1(Op): ...@@ -622,42 +617,49 @@ class GpuAdvancedIncSubtensor1(Op):
%(out)s = %(x)s; %(out)s = %(x)s;
Py_INCREF(%(out)s); Py_INCREF(%(out)s);
} }
broadcast_y = PyGpuArray_DIM(%(y)s, 0) == 1; if (num_indices != 0) {
for (j = 0; j < num_indices; j++) { if ((num_indices - 1) > LONG_MAX) {
start[0] = *(dtype_%(ind)s *)PyArray_GETPTR1(%(ind)s, j); PyErr_Format(PyExc_AssertionError,
if (start[0] < 0) "num_indices %%lld exceeds LONG_MAX + 1", (long long)num_indices);
start[0] += PyGpuArray_DIM(%(out)s, 0); %(fail)s
if (start[0] < 0 || start[0] >= PyGpuArray_DIM(%(out)s, 0)) {
PyErr_SetString(PyExc_IndexError, "index out of bounds");
%(fail)s;
} }
row_x = pygpu_index(%(out)s, start, (ssize_t *)PyGpuArray_DIMS(%(out)s), step); broadcast_y = PyGpuArray_DIM(%(y)s, 0) == 1;
if (row_x == NULL) for (j = 0; j < num_indices; j++) {
%(fail)s; start[0] = *(dtype_%(ind)s *)PyArray_GETPTR1(%(ind)s, j);
if (start[0] < 0)
if (broadcast_y) start[0] += PyGpuArray_DIM(%(out)s, 0);
start[0] = 0; if (start[0] < 0 || start[0] >= PyGpuArray_DIM(%(out)s, 0)) {
else PyErr_SetString(PyExc_IndexError, "index out of bounds");
start[0] = j; %(fail)s;
}
row_x = pygpu_index(%(out)s, start, (ssize_t *)PyGpuArray_DIMS(%(out)s), step);
if (row_x == NULL)
%(fail)s;
if (broadcast_y)
start[0] = 0;
else
start[0] = j;
row_y = pygpu_index(%(y)s, start, (ssize_t *)PyGpuArray_DIMS(%(y)s), step);
if (row_y == NULL) {
Py_DECREF(row_x);
%(fail)s;
}
row_y = pygpu_index(%(y)s, start, (ssize_t *)PyGpuArray_DIMS(%(y)s), step); if (%(set_instead_of_inc)s) {
if (row_y == NULL) { ret = GpuArray_setarray(&row_x->ga, &row_y->ga);
} else {
void *args[2];
args[0] = (void *)&row_x->ga;
args[1] = (void *)&row_y->ga;
ret = GpuElemwise_call(iadd, args, GE_BROADCAST);
}
Py_DECREF(row_x); Py_DECREF(row_x);
%(fail)s; Py_DECREF(row_y);
if (ret != GA_NO_ERROR)
PyErr_SetString(PyExc_RuntimeError, "Failed to set/inc elements");
} }
if (%(set_instead_of_inc)s) {
ret = GpuArray_setarray(&row_x->ga, &row_y->ga);
} else {
void *args[2];
args[0] = (void *)&row_x->ga;
args[1] = (void *)&row_y->ga;
ret = GpuElemwise_call(iadd, args, GE_BROADCAST);
}
Py_DECREF(row_x);
Py_DECREF(row_y);
if (ret != GA_NO_ERROR)
PyErr_SetString(PyExc_RuntimeError, "Failed to set/inc elements");
} }
""" % dict(x=inputs[0], y=inputs[1], ind=inputs[2], out=outputs[0], """ % dict(x=inputs[0], y=inputs[1], ind=inputs[2], out=outputs[0],
fail=sub['fail'], inplace=int(self.inplace), fail=sub['fail'], inplace=int(self.inplace),
...@@ -665,7 +667,7 @@ class GpuAdvancedIncSubtensor1(Op): ...@@ -665,7 +667,7 @@ class GpuAdvancedIncSubtensor1(Op):
set_instead_of_inc=int(self.set_instead_of_inc)) set_instead_of_inc=int(self.set_instead_of_inc))
def c_code_cache_version(self): def c_code_cache_version(self):
return (0,) return (1,)
class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC, class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论