提交 189b1352 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #4381 from nouiz/set_subtensor1_crash

Fix crash in GpuAdvanvedIncSubtensor1 when unbroadcasting the value.
......@@ -2888,7 +2888,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
out[0] = x
def c_code_cache_version(self):
return (7,)
return (8,)
def c_code(self, node, name, inputs, outputs, sub):
if (node.inputs[0].ndim != node.inputs[1].ndim):
......@@ -2961,7 +2961,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
%(fail)s;
}
if (%(set_instead_of_inc)s) {
ret = CudaNdarray_CopyFromCudaNdarray((CudaNdarray *) row_x, (CudaNdarray *) row_y);
ret = CudaNdarray_CopyFromCudaNdarray((CudaNdarray *) row_x, (CudaNdarray *) row_y, 1);
} else {
ret = CudaNdarray_inplace_elemwise(row_x, row_y, IADD);
}
......
......@@ -560,6 +560,25 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
utt.verify_grad(fun, [numpy.random.rand(5, 5).astype(self.dtype),
numpy.random.rand(2, 5).astype(self.dtype)])
# test set_subtensor broadcast
self.dtype = 'float32'
from theano.sandbox.cuda.dnn import dnn_conv
x = tensor.tensor4('x', dtype=self.dtype)
indexes = theano.shared(numpy.int32([1, 2, 3, 4]))
W = self.shared(numpy.random.random(
(10, 10, 3, 3)).astype(self.dtype))
h = x + W
h = tensor.set_subtensor(h[indexes], h[indexes])
g = tensor.grad(h.sum(), W)
N = 2
if theano.config.mode == "FAST_COMPILE" and self.adv_incsub1 is tensor.AdvancedIncSubtensor1:
N = 3
f = self.function([x], g, op=self.adv_incsub1, N=N)
f(numpy.random.random((10, 10, 3, 3)).astype(self.dtype))
def test_adv_sub1_idx_broadcast(self):
# The idx can be a broadcastable vector.
ones = numpy.ones((4, 3), dtype=self.dtype)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论