提交 a099b54c authored 作者: Shawn Tan's avatar Shawn Tan

- Change for flattening out `y` as well

- Modified test to make sure this works
上级 d2cd02d0
...@@ -658,14 +658,11 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor): ...@@ -658,14 +658,11 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
except Exception: except Exception:
pass pass
x_ = x_.transpose(*transp) x_ = x_.transpose(*transp)
idx_ = ([slice(None)] * p + nidx[p:]) idx_ = ([slice(None)] * p + nidx[p:])
x_ = x_.__getitem__(idx_) x_ = x_.__getitem__(idx_)
# flatten the array-indexed dimensions # flatten the array-indexed dimensions
shape = ((np.prod(x_.shape[0: p]),) + x_flat = x_.reshape((np.prod(x_.shape[0: p]),) + x_.shape[p:])
x_.shape[p:]) y_flat = y.reshape((np.prod(y.shape[0: p]),) + y.shape[p:])
x_flat = x_.reshape(shape)
# build the strides # build the strides
strides = [1] strides = [1]
...@@ -674,11 +671,12 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor): ...@@ -674,11 +671,12 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
strides.insert(0, stride) strides.insert(0, stride)
# build the indices and use it # build the indices and use it
take_idx = sum((i * s for i, s in zip(nidx, strides))) take_idx = sum((i * s for i, s in zip(nidx, strides))).flatten()
k = get_iadd(node.inputs[0], node.inputs[1]) k = get_iadd(node.inputs[0], node.inputs[1])
y = pygpu.asarray(y, context=x_flat.context) y_flat = pygpu.asarray(y_flat, context=x_flat.context)
for j, i in enumerate(take_idx): for j, i in enumerate(take_idx):
k(x_flat[i], y[j], broadcast=True) k(x_flat[i], y_flat[j], broadcast=True)
out[0] = x out[0] = x
......
...@@ -78,22 +78,27 @@ class G_subtensorF16(test_subtensor.T_subtensor): ...@@ -78,22 +78,27 @@ class G_subtensorF16(test_subtensor.T_subtensor):
def test_advinc_subtensor(): def test_advinc_subtensor():
shp = (3, 3, 3) x_shp = (8, 8, 8)
y_shp = (2, 2, 8)
shared = gpuarray_shared_constructor shared = gpuarray_shared_constructor
xval = np.arange(np.prod(shp), dtype='float32').reshape(shp) + 1 xval = np.arange(np.prod(x_shp), dtype='float32').reshape(x_shp) + 1
yval = np.arange(np.prod(shp[1:]), dtype='float32').reshape(shp[1:]) yval = np.arange(np.prod(y_shp), dtype='float32').reshape(y_shp)
idx = ([0, 1, 2], [0, 1, 2]) idx = ([[0, 1],
[2, 3]],
[[0, 1],
[2, 3]])
rep = xval.copy()
rep[idx] += yval
x = shared(xval, name='x') x = shared(xval, name='x')
y = tensor.tensor(dtype='float32', y = tensor.tensor(dtype='float32',
broadcastable=(False, False), broadcastable=(False,) * len(yval.shape),
name='y') name='y')
expr = tensor.advanced_inc_subtensor(x, y, *idx) expr = tensor.advanced_inc_subtensor(x, y, *idx)
f = theano.function([y], expr, mode=mode_with_gpu) f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuAdvancedIncSubtensor) assert sum([isinstance(node.op, GpuAdvancedIncSubtensor)
for node in f.maker.fgraph.toposort()]) == 1 for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval) rval = f(yval)
rep = xval.copy()
rep[idx] += yval
assert np.allclose(rval, rep) assert np.allclose(rval, rep)
>>>>>>> Initial additions for `GpuAdvancedIncSubtensor` >>>>>>> Initial additions for `GpuAdvancedIncSubtensor`
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论