提交 29b460a4 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Properly handle cases where y has to be broadcasted in GpuAdvancedIncSubtensor1.

上级 40a42060
......@@ -406,49 +406,47 @@ class GpuAdvancedIncSubtensor1(HideC, tensor.AdvancedIncSubtensor1):
x, y, idx = inp
out, = out_
if not self.inplace:
x = x.copy()
out[0] = x
if len(idx) == 0:
return
# Make sure idx is not a GpuArray otherwise we cannot use its content
# to index x and y
if isinstance(idx, gpuarray.GpuArray):
idx = numpy.asarray(idx)
if not self.inplace:
x = x.copy()
if self.set_instead_of_inc:
assert y.ndim <= x.ndim # Should be guaranteed by `make_node`
if y.ndim == x.ndim:
assert len(y) == len(idx)
for (j, i) in enumerate(idx):
x[i] = y[j]
else:
for i in idx:
x[i] = y
else:
# If `y` has as many dimensions as `x`, then we want to iterate
# jointly on `x` and `y`. Otherwise, it means `y` should be
# broadcasted to fill all relevant rows of `x`.
assert y.ndim <= x.ndim # Should be guaranteed by `make_node`
if len(idx) == 0:
pass
# if len(y) == 1, we need to broadcast it.
elif y.ndim == x.ndim and len(y) != 1:
if y.ndim == x.ndim and y.shape[0] != 1:
assert len(y) == len(idx)
if self.set_instead_of_inc:
for (j, i) in enumerate(idx):
x[i] = y[j]
else:
k = self.getInplElemwiseAdditionKernel(x[0], y[0])
for (j, i) in enumerate(idx):
k(x[i], y[j], broadcast=False)
k(x[i], y[j], broadcast=True)
else:
if y.ndim == x.ndim:
# First dim is always 1 in this case.
reshaped_y = y.reshape(y.shape[1:])
else:
nb_dims_to_add = (x.ndim - 1) - y.ndim
reshaped_y = y.reshape((1,)*nb_dims_to_add + y.shape)
k = self.getInplElemwiseAdditionKernel(x[0],
reshaped_y)
if self.set_instead_of_inc:
for i in idx:
x[i] = reshaped_y
else:
k = self.getInplElemwiseAdditionKernel(x[0], reshaped_y)
for i in idx:
k(x[i], reshaped_y, broadcast=True)
out[0] = x
class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
"""Implement AdvancedIncSubtensor1 on the gpu, but use function
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论