提交 f71035bf authored 作者: Shawn Tan's avatar Shawn Tan

Change optimizer and include more tests

上级 80a1017d
......@@ -1066,36 +1066,39 @@ def local_gpua_advanced_subtensor(op, context_name, inputs, outputs):
@register_opt('fast_compile')
@op_lifter([tensor.AdvancedIncSubtensor1])
@op_lifter([tensor.AdvancedIncSubtensor])
@register_opt2([tensor.AdvancedIncSubtensor1, tensor.AdvancedIncSubtensor], 'fast_compile')
def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
@register_opt2([tensor.AdvancedIncSubtensor1], 'fast_compile')
def local_gpua_advanced_incsubtensor1(op, context_name, inputs, outputs):
if isinstance(op, (tensor.AdvancedIncSubtensor1)):
context = get_context(context_name)
# This is disabled on non-cuda contexts
if context.kind != b'cuda':
return None
x, y, ilist = inputs
set_instead_of_inc = op.set_instead_of_inc
compute_capability = int(context.bin_id[-2])
if compute_capability >= 2 and x.ndim == 1 and y.ndim == 0:
x = x.dimshuffle(0, 'x')
y = y.dimshuffle('x', 'x')
ret = GpuAdvancedIncSubtensor1_dev20(
set_instead_of_inc=set_instead_of_inc)(x, y, ilist)
ret = GpuDimShuffle(ret.type.broadcastable, [0])(ret)
return ret
elif compute_capability < 2 or x.ndim != 2 or y.ndim != 2:
return GpuAdvancedIncSubtensor1(
set_instead_of_inc=set_instead_of_inc)
else:
return GpuAdvancedIncSubtensor1_dev20(
set_instead_of_inc=set_instead_of_inc)
x, y, ilist = inputs
set_instead_of_inc = op.set_instead_of_inc
compute_capability = int(context.bin_id[-2])
if compute_capability >= 2 and x.ndim == 1 and y.ndim == 0:
x = x.dimshuffle(0, 'x')
y = y.dimshuffle('x', 'x')
ret = GpuAdvancedIncSubtensor1_dev20(
set_instead_of_inc=set_instead_of_inc)(x, y, ilist)
ret = GpuDimShuffle(ret.type.broadcastable, [0])(ret)
return ret
elif compute_capability < 2 or x.ndim != 2 or y.ndim != 2:
return GpuAdvancedIncSubtensor1(
set_instead_of_inc=set_instead_of_inc)
else:
return GpuAdvancedIncSubtensor1_dev20(
set_instead_of_inc=set_instead_of_inc)
elif isinstance(op, (tensor.AdvancedIncSubtensor)):
return GpuAdvancedIncSubtensor()
@register_opt('fast_compile')
@op_lifter([tensor.AdvancedIncSubtensor])
@register_opt2([tensor.AdvancedIncSubtensor], 'fast_compile')
def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
return GpuAdvancedIncSubtensor()
@register_inplace()
......
......@@ -78,23 +78,45 @@ class G_subtensorF16(test_subtensor.T_subtensor):
def test_advinc_subtensor():
shp = (3, 3, 3)
x_shp = (20, 15, 10, 5)
shared = gpuarray_shared_constructor
xval = np.arange(np.prod(shp), dtype='float32').reshape(shp) + 1
yval = np.arange(np.prod(shp[1:]), dtype='float32').reshape(shp[1:])
idx = ([0, 1, 2], [0, 1, 2])
x = shared(xval, name='x')
y = tensor.tensor(dtype='float32',
broadcastable=(False, False),
name='y')
expr = tensor.advanced_inc_subtensor(x, y, *idx)
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuAdvancedIncSubtensor)
for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval)
rep = xval.copy()
rep[idx] += yval
assert np.allclose(rval, rep)
def check(idx, y_val, x_val, true):
x = shared(x_val, name='x')
y = tensor.tensor(dtype='float32',
broadcastable=(False,) * len(y_val.shape),
name='y')
sym_idx = [tensor.as_tensor_variable(ix) for ix in idx]
expr = tensor.advanced_inc_subtensor(x, y, *sym_idx)
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuAdvancedIncSubtensor)
for node in f.maker.fgraph.toposort()]) == 1
rval = f(y_val)
assert np.allclose(rval, true)
idxs_y_shp_pairs = [
((0, [1, 3, 5], 1), (3, 5)),
(([1, 2, 4, 8],), (4, 15, 10, 5)),
(([0, 1, 2], 0, [0, 1, 2]), (3, 3, 5)),
(([[0, 1], [2, 3]], [[0, 1], [2, 3]]), (2, 2, 10, 5)),
]
for idx, y_shps in idxs_y_shp_pairs:
for i in range(len(y_shps) - 1):
y_shp = y_shps[i:]
x_val = np.arange(np.prod(x_shp), dtype='float32').reshape(x_shp) + 1
y_val = np.arange(np.prod(y_shp), dtype='float32').reshape(y_shp) + 1
rep = x_val.copy()
try:
rep[idx] += y_val
except ValueError:
continue
yield check, idx, y_val, x_val, rep
x_val = np.arange(np.prod(x_shp), dtype='float32').reshape(x_shp) + 1
y_val = np.array(1).astype(np.float32)
rep = x_val.copy()
rep[idx] += y_val
yield check, idx, y_val, x_val, rep
def test_advinc_subtensor1():
......@@ -157,6 +179,7 @@ def test_advinc_subtensor1_vector_scalar():
name='y')
expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, (GpuAdvancedIncSubtensor1_dev20,
GpuAdvancedIncSubtensor1))
for node in f.maker.fgraph.toposort()]) == 1
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论