提交 80a1017d authored 作者: Shawn Tan's avatar Shawn Tan

Modifications according to review comments

上级 9e5ad298
......@@ -1065,7 +1065,8 @@ def local_gpua_advanced_subtensor(op, context_name, inputs, outputs):
@register_opt('fast_compile')
@op_lifter([tensor.AdvancedIncSubtensor1, tensor.AdvancedIncSubtensor])
@op_lifter([tensor.AdvancedIncSubtensor1])
@op_lifter([tensor.AdvancedIncSubtensor])
@register_opt2([tensor.AdvancedIncSubtensor1, tensor.AdvancedIncSubtensor], 'fast_compile')
def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
if isinstance(op, (tensor.AdvancedIncSubtensor1)):
......
......@@ -602,7 +602,7 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
y = as_gpuarray_variable(y, ctx_name)
return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
def perform(self, node, inp, out_, ctx=None):
def perform(self, node, inp, out_):
out, = out_
x = inp[0]
y = inp[1]
......@@ -614,7 +614,7 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
if isinstance(idx[i], gpuarray.GpuArray):
idx[i] = np.asarray(idx[i])
# Copied code from AdvancedSubtensor
# Insert axes for None indexing
nidx = []
nshp = list(x.shape)
for k, i in enumerate(idx):
......@@ -626,41 +626,30 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
x_ = x.reshape(nshp)
narrays = 0
transp = list(range(x_.ndim))
# Bring array indices to front
transp = []
nidx_ = []
p = 0
ap = 0
for k, i in enumerate(list(nidx)):
if (isinstance(i, np.ndarray) and
i.ndim != 0):
transp.remove(k)
transp.insert(p, k)
ap += k
i = nidx.pop(k)
nidx.insert(p, i)
if isinstance(i, np.ndarray) and i.ndim != 0:
transp.append(k)
nidx_.append(i)
p += 1
narrays += 1
else:
if narrays == 0:
try:
i.__index__()
if ap >= 0:
ap -= 1
narrays = 2
except Exception:
pass
# End of copied code from AdvancedSubtensor
for k, i in enumerate(list(nidx)):
if not (isinstance(i, np.ndarray) and i.ndim != 0):
transp.append(k)
nidx_.append(i)
transp = transp + range(len(transp), x_.ndim)
rtransp = [i for i, _ in sorted(enumerate(transp), key=lambda x:x[1])]
nidx = nidx_
# transp: order to shuffle axes of x so that single dimension
# subarrays are extracted first
# p: number of axes with array indexing
x_ = x_.transpose(*transp)
idx_ = ([slice(None)] * p + nidx[p:])
x_ = x_.__getitem__(idx_)
# flatten the array-indexed dimensions
x_flat = x_.reshape((np.prod(x_.shape[0: p]),) + x_.shape[p:])
# process y so that last axes are the same
if y.shape != (1,):
y_shape_reverse = []
......@@ -677,7 +666,6 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
y_shape_reverse.append(int(np.prod(y.shape)))
y_shape = y_shape_reverse[::-1]
assert(np.prod(y_shape) == np.prod(y.shape))
y_flat = y.reshape(y_shape)
else:
y_flat = y[0]
......@@ -689,24 +677,32 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
strides.insert(0, stride)
# build the indices and use it
index = idx_[p:] + [slice(None)] * (len(x_flat.shape) - len(idx_[p:]) - 1)
take_idx = sum(i * s for i, s in zip(nidx, strides))
k = get_iadd(node.inputs[0], node.inputs[1])
if x_flat.shape[-len(y_flat.shape):] == y_flat.shape or y_flat.shape == ():
# y_flat has to be broadcast over axes of x_flat[i]
for i in take_idx.flatten():
if len(idx_[p:]) > 0:
x_flat_sub = x_flat[i].__getitem__(index)
else:
x_flat_sub = x_flat[i]
tmp = pygpu.elemwise.elemwise2(
x_flat[i], '+', y_flat, x_flat[i],
x_flat_sub, '+', y_flat, x_flat_sub,
broadcast=True
)
x_flat.__setitem__(i, tmp)
x_flat[i].__setitem__(index, tmp)
else:
# y_flat's first axis corresponds to first exist of x_flat
k = get_iadd(node.inputs[0], node.inputs[1])
for j, i in enumerate(take_idx.flatten()):
k(x_flat[i], y_flat[j % y_flat.shape[0]], broadcast=True)
# updating the view updates the original
out[0] = x
if len(idx_[p:]) > 0:
x_flat_sub = x_flat[i].__getitem__(index)
else:
x_flat_sub = x_flat[i]
k(x_flat_sub, y_flat[j % y_flat.shape[0]], broadcast=True)
x_ = x_flat.reshape(x_.shape).transpose(*rtransp)
out[0] = x_
class GpuAdvancedIncSubtensor1(Op):
......
......@@ -78,44 +78,23 @@ class G_subtensorF16(test_subtensor.T_subtensor):
def test_advinc_subtensor():
x_shp = (20, 15, 10, 5)
idx = ([[0, 1],
[2, 3]],
[[0, 1],
[2, 3]])
for y_shp in [(2, 2, 10, 5),
(2, 10, 5),
(10, 5), (5,), (1,)]:
shared = gpuarray_shared_constructor
xval = np.arange(np.prod(x_shp), dtype='float32').reshape(x_shp) + 1
yval = np.arange(np.prod(y_shp), dtype='float32').reshape(y_shp) + 1
rep = xval.copy()
rep[idx] += yval
x = shared(xval, name='x')
y = tensor.tensor(dtype='float32',
broadcastable=(False,) * len(yval.shape),
name='y')
expr = tensor.advanced_inc_subtensor(x, y, *idx)
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuAdvancedIncSubtensor)
for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval)
assert np.allclose(rval, rep)
shp = (3, 3, 3)
shared = gpuarray_shared_constructor
xval = np.arange(np.prod(x_shp), dtype='float32').reshape(x_shp) + 1
rep = xval.copy()
rep[idx] += 1.
xval = np.arange(np.prod(shp), dtype='float32').reshape(shp) + 1
yval = np.arange(np.prod(shp[1:]), dtype='float32').reshape(shp[1:])
idx = ([0, 1, 2], [0, 1, 2])
x = shared(xval, name='x')
y = tensor.scalar(dtype='float32',
y = tensor.tensor(dtype='float32',
broadcastable=(False, False),
name='y')
expr = tensor.advanced_inc_subtensor(x, y, *idx)
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuAdvancedIncSubtensor)
for node in f.maker.fgraph.toposort()]) == 1
rval = f(np.float32(1.))
rval = f(yval)
rep = xval.copy()
rep[idx] += yval
assert np.allclose(rval, rep)
>>>>>>> Initial additions for `GpuAdvancedIncSubtensor`
def test_advinc_subtensor1():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论