提交 88d9484e authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6221 from lamblin/advidx_padshape

Use E_PADSHAPE flag in GPU Incsubtensor
...@@ -387,7 +387,7 @@ int sub_setarray(GpuArray *dst, GpuArray *src) { ...@@ -387,7 +387,7 @@ int sub_setarray(GpuArray *dst, GpuArray *src) {
void *args[2]; void *args[2];
args[0] = &zview->ga; args[0] = &zview->ga;
args[1] = &%(x)s->ga; args[1] = &%(x)s->ga;
if (GpuElemwise_call(iadd, args, GE_BROADCAST) != GA_NO_ERROR) { if (GpuElemwise_call(iadd, args, GE_BROADCAST | GE_PADSHAPE) != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Error doing inplace add"); PyErr_SetString(PyExc_RuntimeError, "Error doing inplace add");
Py_DECREF(zview); Py_DECREF(zview);
%(fail)s %(fail)s
...@@ -399,7 +399,7 @@ int sub_setarray(GpuArray *dst, GpuArray *src) { ...@@ -399,7 +399,7 @@ int sub_setarray(GpuArray *dst, GpuArray *src) {
parent_version = super(GpuIncSubtensor, self).c_code_cache_version() parent_version = super(GpuIncSubtensor, self).c_code_cache_version()
if not parent_version: if not parent_version:
return return
return parent_version + (9,) return parent_version + (10,)
class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1): class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
...@@ -1009,7 +1009,7 @@ class GpuAdvancedIncSubtensor1(Op): ...@@ -1009,7 +1009,7 @@ class GpuAdvancedIncSubtensor1(Op):
void *args[2]; void *args[2];
args[0] = (void *)&row_x->ga; args[0] = (void *)&row_x->ga;
args[1] = (void *)&row_y->ga; args[1] = (void *)&row_y->ga;
ret = GpuElemwise_call(iadd, args, GE_BROADCAST); ret = GpuElemwise_call(iadd, args, GE_BROADCAST | GE_PADSHAPE);
} }
Py_DECREF(row_x); Py_DECREF(row_x);
Py_DECREF(row_y); Py_DECREF(row_y);
...@@ -1031,7 +1031,7 @@ class GpuAdvancedIncSubtensor1(Op): ...@@ -1031,7 +1031,7 @@ class GpuAdvancedIncSubtensor1(Op):
""" % dict(fail=sub['fail'])) """ % dict(fail=sub['fail']))
def c_code_cache_version(self): def c_code_cache_version(self):
return (4,) return (5,)
class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC, class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
......
...@@ -545,8 +545,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -545,8 +545,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
t = op(n[:z, :z], m) t = op(n[:z, :z], m)
gn, gm = theano.tensor.grad(theano.tensor.sum(t), [n, m]) gn, gm = theano.tensor.grad(theano.tensor.sum(t), [n, m])
utt.verify_grad(lambda m: op(n[:z, :z], m), [mv]) utt.verify_grad(lambda m: op(n[:z, :z], m), [mv], mode=self.mode)
utt.verify_grad(lambda nn: op(nn[:z, :z], mv), [data]) utt.verify_grad(lambda nn: op(nn[:z, :z], mv), [data], mode=self.mode)
def test_grad_0d(self): def test_grad_0d(self):
data = np.asarray(rand(2, 3), dtype=self.dtype) data = np.asarray(rand(2, 3), dtype=self.dtype)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论