提交 e648ddeb authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Update tests for GpuAdvancedSubtensor and disable optimizations

because of a slow down that is too big
上级 f62366bc
......@@ -1095,9 +1095,11 @@ def local_gpua_advanced_incsubtensor1(op, context_name, inputs, outputs):
set_instead_of_inc=set_instead_of_inc)
@register_opt('fast_compile')
@op_lifter([tensor.AdvancedIncSubtensor])
@register_opt2([tensor.AdvancedIncSubtensor], 'fast_compile')
# Do not register this optimization for now, as it slows down the
# execution by a lot in important cases.
# @register_opt('fast_compile')
# @op_lifter([tensor.AdvancedIncSubtensor])
# @register_opt2([tensor.AdvancedIncSubtensor], 'fast_compile')
def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
if not op.set_instead_of_inc:
return GpuAdvancedIncSubtensor()
......
......@@ -621,6 +621,10 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
y = inp[1]
idx = inp[2:]
x = x.copy()
# Get a handle to the GpuElemwise object that will be called.
# It is not necessary to have the right number of dimensions,
# so we just pass symbolic x and y.
iadd = get_iadd(node.inputs[0], node.inputs[1])
# convert all indices to np.array
for i in range(len(idx)):
......@@ -699,15 +703,10 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
else:
val = y_flat[j]
tmp = pygpu.elemwise.elemwise2(
x_flat[i], '+', val, x_flat[i],
broadcast=True,
convert_f16=True
)
x_flat.__setitem__(i, tmp)
iadd(x_flat[i], val, broadcast=True)
else:
k = get_iadd(node.inputs[0], node.inputs[1])
if x_flat.shape[-len(y_flat.shape):] == y_flat.shape or y_flat.shape == ():
if (x_flat.shape[-len(y_flat.shape):] == y_flat.shape or
y_flat.shape == ()):
# y_flat has to be broadcast over axes of x_flat[i]
for i in take_idx.flatten():
......@@ -715,13 +714,7 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
x_flat_sub = x_flat[i].__getitem__(index)
else:
x_flat_sub = x_flat[i]
tmp = pygpu.elemwise.elemwise2(
x_flat_sub, '+', y_flat, x_flat_sub,
broadcast=True,
convert_f16=True
)
x_flat[i].__setitem__(index, tmp)
iadd(x_flat_sub, y_flat, broadcast=True)
else:
# y_flat's first axis corresponds to first exist of x_flat
for j, i in enumerate(take_idx.flatten()):
......@@ -729,7 +722,7 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
x_flat_sub = x_flat[i].__getitem__(index)
else:
x_flat_sub = x_flat[i]
k(x_flat_sub, y_flat[j % y_flat.shape[0]], broadcast=True)
iadd(x_flat_sub, y_flat[j % y_flat.shape[0]], broadcast=True)
x_ = x_flat.reshape(x_.shape).transpose(*rtransp)
out[0] = x_
......
......@@ -328,8 +328,8 @@ class test_gpuallocdiag(unittest.TestCase):
grad_x = tensor.grad(sum_mtx_x, x)
grad_mtx_x = tensor.grad(sum_mtx_x, mtx_x)
fn_grad_x = theano.function([x], grad_x)
fn_grad_mtx_x = theano.function([x], grad_mtx_x)
fn_grad_x = theano.function([x], grad_x, mode=mode_with_gpu)
fn_grad_mtx_x = theano.function([x], grad_mtx_x, mode=mode_with_gpu)
computed_grad_x = fn_grad_x(np_x)
computed_grad_mtx_x = fn_grad_mtx_x(np_x)
......@@ -342,8 +342,8 @@ class test_gpuallocdiag(unittest.TestCase):
grad_x = tensor.grad(sum_mtx_x, x)
grad_mtx_x = tensor.grad(sum_mtx_x, mtx_x)
fn_grad_x = theano.function([x], grad_x)
fn_grad_mtx_x = theano.function([x], grad_mtx_x)
fn_grad_x = theano.function([x], grad_x, mode=mode_with_gpu)
fn_grad_mtx_x = theano.function([x], grad_mtx_x, mode=mode_with_gpu)
computed_grad_x = fn_grad_x(np_x)
computed_grad_mtx_x = fn_grad_mtx_x(np_x)
......@@ -356,8 +356,8 @@ class test_gpuallocdiag(unittest.TestCase):
grad_x = tensor.grad(sum_mtx_x, x)
grad_mtx_x = tensor.grad(sum_mtx_x, mtx_x)
fn_grad_x = theano.function([x], grad_x)
fn_grad_mtx_x = theano.function([x], grad_mtx_x)
fn_grad_x = theano.function([x], grad_x, mode=mode_with_gpu)
fn_grad_mtx_x = theano.function([x], grad_mtx_x, mode=mode_with_gpu)
computed_grad_x = fn_grad_x(np_x)
computed_grad_mtx_x = fn_grad_mtx_x(np_x)
......
......@@ -402,7 +402,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
s1 = s[newaxis]
assert s1.broadcastable == (True,), s1
vs1, vn3, vn4 = theano.function([s], [s1, n3, n4])(-2.0)
vs1, vn3, vn4 = theano.function([s], [s1, n3, n4], mode=self.mode)(-2.0)
assert np.all(vs1 == [-2.0])
assert np.all(vn3 ==
......@@ -1148,7 +1148,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
m1 = set_subtensor(m[:, i], 0)
m2 = inc_subtensor(m[:, i], 1)
f = theano.function([m, i], [m1, m2])
f = theano.function([m, i], [m1, m2], mode=self.mode)
m_val = rand(3, 5)
i_val = randint_ranged(min=0, max=4, shape=(4,))
......@@ -1173,7 +1173,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
m1 = set_subtensor(m[:, i], 0)
m2 = inc_subtensor(m[:, i], 1)
f = theano.function([m, i], [m1, m2])
f = theano.function([m, i], [m1, m2], mode=self.mode)
m_val = rand(5, 7)
i_val = randint_ranged(min=0, max=6, shape=(4, 2))
......@@ -1208,7 +1208,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
sub_m = m[:, i]
m1 = set_subtensor(sub_m, np.zeros(shp_v))
m2 = inc_subtensor(sub_m, np.ones(shp_v))
f = theano.function([m, i], [m1, m2])
f = theano.function([m, i], [m1, m2], mode=self.mode)
m_val = rand(3, 5)
i_val = randint_ranged(min=0, max=4, shape=shp_i)
......@@ -1245,7 +1245,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
sub_m = m[:, i]
m1 = set_subtensor(sub_m, np.zeros(shp_v))
m2 = inc_subtensor(sub_m, np.ones(shp_v))
f = theano.function([m, i], [m1, m2])
f = theano.function([m, i], [m1, m2], mode=self.mode)
m_val = rand(3, 5)
i_val = randint_ranged(min=0, max=4, shape=shp_i)
......@@ -1267,7 +1267,9 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
def test_take(self):
a = tensor.matrix()
f = theano.function([a], a.take(0, axis=-1), allow_input_downcast=True)
f = theano.function(
[a], a.take(0, axis=-1),
allow_input_downcast=True, mode=self.mode)
f(np.random.normal(0, 1, (30, 4)))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论