提交 ebbaae5b authored 作者: Frederic's avatar Frederic

Fix GpuAdvancedIncSubtensor1_dev20 with negative index

上级 3f3bf149
......@@ -2974,7 +2974,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
return Apply(self, [x_, y_, ilist_], [x_.type()])
def c_code_cache_version(self):
return (3,)
return (4,)
def c_code(self, node, name, inputs, outputs, sub):
active_device_no = theano.sandbox.cuda.active_device_number()
......@@ -3030,6 +3030,8 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
for(int j = (threadIdx.x); j < numColsX;j += blockDim.x)
{
int x_row = d_indices_arr[i];
if(x_row < 0)
x_row += numRowsX;
int y_row = i;
atomicAdd(&X[(x_row * stridesX0) + (j * stridesX1)], Y[(y_row * stridesY0) + (j * stridesY1)]);
}
......
......@@ -437,7 +437,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
def test_ok_list(self):
for data, idx in [(rand(4), [1, 0]),
(rand(4, 5), [2, 3]),
(rand(4, 5), [2, 3, -1]),
(rand(4, 2, 3), [0, 3]),
(rand(4, 2, 3), [3, 3, 1, 1, 2, 2, 0, 0]),
(rand(4, 2, 3), [3, 3, 1, 1, 2, 2, 0, 0,
......@@ -479,6 +479,16 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
out2 = test_out[0][0]
assert out1 is out2
# test the grad
gn = theano.grad(t.sum(), n)
g = self.function([], gn, op=self.adv_incsub1)
theano.printing.debugprint(g)
utt.verify_grad(lambda m: m[[1, 3]],
[numpy.random.rand(5, 5).astype(self.dtype)])
g_0 = g()
utt.verify_grad(lambda m: m[idx],
[data])
def test_err_invalid_list(self):
n = self.shared(numpy.asarray(5, dtype=self.dtype))
self.assertRaises(TypeError, n.__getitem__, [0, 0])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论