提交 e648ddeb authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Update tests for GpuAdvancedSubtensor and disable optimizations

because of a slow down that is too big
上级 f62366bc
...@@ -1095,9 +1095,11 @@ def local_gpua_advanced_incsubtensor1(op, context_name, inputs, outputs): ...@@ -1095,9 +1095,11 @@ def local_gpua_advanced_incsubtensor1(op, context_name, inputs, outputs):
set_instead_of_inc=set_instead_of_inc) set_instead_of_inc=set_instead_of_inc)
@register_opt('fast_compile') # Do not register this optimization for now, as it slows down the
@op_lifter([tensor.AdvancedIncSubtensor]) # execution by a lot in important cases.
@register_opt2([tensor.AdvancedIncSubtensor], 'fast_compile') # @register_opt('fast_compile')
# @op_lifter([tensor.AdvancedIncSubtensor])
# @register_opt2([tensor.AdvancedIncSubtensor], 'fast_compile')
def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs): def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
if not op.set_instead_of_inc: if not op.set_instead_of_inc:
return GpuAdvancedIncSubtensor() return GpuAdvancedIncSubtensor()
......
...@@ -621,6 +621,10 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor): ...@@ -621,6 +621,10 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
y = inp[1] y = inp[1]
idx = inp[2:] idx = inp[2:]
x = x.copy() x = x.copy()
# Get a handle to the GpuElemwise object that will be called.
# It is not necessary to have the right number of dimensions,
# so we just pass symbolic x and y.
iadd = get_iadd(node.inputs[0], node.inputs[1])
# convert all indices to np.array # convert all indices to np.array
for i in range(len(idx)): for i in range(len(idx)):
...@@ -699,15 +703,10 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor): ...@@ -699,15 +703,10 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
else: else:
val = y_flat[j] val = y_flat[j]
tmp = pygpu.elemwise.elemwise2( iadd(x_flat[i], val, broadcast=True)
x_flat[i], '+', val, x_flat[i],
broadcast=True,
convert_f16=True
)
x_flat.__setitem__(i, tmp)
else: else:
k = get_iadd(node.inputs[0], node.inputs[1]) if (x_flat.shape[-len(y_flat.shape):] == y_flat.shape or
if x_flat.shape[-len(y_flat.shape):] == y_flat.shape or y_flat.shape == (): y_flat.shape == ()):
# y_flat has to be broadcast over axes of x_flat[i] # y_flat has to be broadcast over axes of x_flat[i]
for i in take_idx.flatten(): for i in take_idx.flatten():
...@@ -715,13 +714,7 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor): ...@@ -715,13 +714,7 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
x_flat_sub = x_flat[i].__getitem__(index) x_flat_sub = x_flat[i].__getitem__(index)
else: else:
x_flat_sub = x_flat[i] x_flat_sub = x_flat[i]
tmp = pygpu.elemwise.elemwise2( iadd(x_flat_sub, y_flat, broadcast=True)
x_flat_sub, '+', y_flat, x_flat_sub,
broadcast=True,
convert_f16=True
)
x_flat[i].__setitem__(index, tmp)
else: else:
# y_flat's first axis corresponds to first exist of x_flat # y_flat's first axis corresponds to first exist of x_flat
for j, i in enumerate(take_idx.flatten()): for j, i in enumerate(take_idx.flatten()):
...@@ -729,7 +722,7 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor): ...@@ -729,7 +722,7 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
x_flat_sub = x_flat[i].__getitem__(index) x_flat_sub = x_flat[i].__getitem__(index)
else: else:
x_flat_sub = x_flat[i] x_flat_sub = x_flat[i]
k(x_flat_sub, y_flat[j % y_flat.shape[0]], broadcast=True) iadd(x_flat_sub, y_flat[j % y_flat.shape[0]], broadcast=True)
x_ = x_flat.reshape(x_.shape).transpose(*rtransp) x_ = x_flat.reshape(x_.shape).transpose(*rtransp)
out[0] = x_ out[0] = x_
......
...@@ -328,8 +328,8 @@ class test_gpuallocdiag(unittest.TestCase): ...@@ -328,8 +328,8 @@ class test_gpuallocdiag(unittest.TestCase):
grad_x = tensor.grad(sum_mtx_x, x) grad_x = tensor.grad(sum_mtx_x, x)
grad_mtx_x = tensor.grad(sum_mtx_x, mtx_x) grad_mtx_x = tensor.grad(sum_mtx_x, mtx_x)
fn_grad_x = theano.function([x], grad_x) fn_grad_x = theano.function([x], grad_x, mode=mode_with_gpu)
fn_grad_mtx_x = theano.function([x], grad_mtx_x) fn_grad_mtx_x = theano.function([x], grad_mtx_x, mode=mode_with_gpu)
computed_grad_x = fn_grad_x(np_x) computed_grad_x = fn_grad_x(np_x)
computed_grad_mtx_x = fn_grad_mtx_x(np_x) computed_grad_mtx_x = fn_grad_mtx_x(np_x)
...@@ -342,8 +342,8 @@ class test_gpuallocdiag(unittest.TestCase): ...@@ -342,8 +342,8 @@ class test_gpuallocdiag(unittest.TestCase):
grad_x = tensor.grad(sum_mtx_x, x) grad_x = tensor.grad(sum_mtx_x, x)
grad_mtx_x = tensor.grad(sum_mtx_x, mtx_x) grad_mtx_x = tensor.grad(sum_mtx_x, mtx_x)
fn_grad_x = theano.function([x], grad_x) fn_grad_x = theano.function([x], grad_x, mode=mode_with_gpu)
fn_grad_mtx_x = theano.function([x], grad_mtx_x) fn_grad_mtx_x = theano.function([x], grad_mtx_x, mode=mode_with_gpu)
computed_grad_x = fn_grad_x(np_x) computed_grad_x = fn_grad_x(np_x)
computed_grad_mtx_x = fn_grad_mtx_x(np_x) computed_grad_mtx_x = fn_grad_mtx_x(np_x)
...@@ -356,8 +356,8 @@ class test_gpuallocdiag(unittest.TestCase): ...@@ -356,8 +356,8 @@ class test_gpuallocdiag(unittest.TestCase):
grad_x = tensor.grad(sum_mtx_x, x) grad_x = tensor.grad(sum_mtx_x, x)
grad_mtx_x = tensor.grad(sum_mtx_x, mtx_x) grad_mtx_x = tensor.grad(sum_mtx_x, mtx_x)
fn_grad_x = theano.function([x], grad_x) fn_grad_x = theano.function([x], grad_x, mode=mode_with_gpu)
fn_grad_mtx_x = theano.function([x], grad_mtx_x) fn_grad_mtx_x = theano.function([x], grad_mtx_x, mode=mode_with_gpu)
computed_grad_x = fn_grad_x(np_x) computed_grad_x = fn_grad_x(np_x)
computed_grad_mtx_x = fn_grad_mtx_x(np_x) computed_grad_mtx_x = fn_grad_mtx_x(np_x)
......
...@@ -402,7 +402,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -402,7 +402,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
s1 = s[newaxis] s1 = s[newaxis]
assert s1.broadcastable == (True,), s1 assert s1.broadcastable == (True,), s1
vs1, vn3, vn4 = theano.function([s], [s1, n3, n4])(-2.0) vs1, vn3, vn4 = theano.function([s], [s1, n3, n4], mode=self.mode)(-2.0)
assert np.all(vs1 == [-2.0]) assert np.all(vs1 == [-2.0])
assert np.all(vn3 == assert np.all(vn3 ==
...@@ -1148,7 +1148,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -1148,7 +1148,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
m1 = set_subtensor(m[:, i], 0) m1 = set_subtensor(m[:, i], 0)
m2 = inc_subtensor(m[:, i], 1) m2 = inc_subtensor(m[:, i], 1)
f = theano.function([m, i], [m1, m2]) f = theano.function([m, i], [m1, m2], mode=self.mode)
m_val = rand(3, 5) m_val = rand(3, 5)
i_val = randint_ranged(min=0, max=4, shape=(4,)) i_val = randint_ranged(min=0, max=4, shape=(4,))
...@@ -1173,7 +1173,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -1173,7 +1173,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
m1 = set_subtensor(m[:, i], 0) m1 = set_subtensor(m[:, i], 0)
m2 = inc_subtensor(m[:, i], 1) m2 = inc_subtensor(m[:, i], 1)
f = theano.function([m, i], [m1, m2]) f = theano.function([m, i], [m1, m2], mode=self.mode)
m_val = rand(5, 7) m_val = rand(5, 7)
i_val = randint_ranged(min=0, max=6, shape=(4, 2)) i_val = randint_ranged(min=0, max=6, shape=(4, 2))
...@@ -1208,7 +1208,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -1208,7 +1208,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
sub_m = m[:, i] sub_m = m[:, i]
m1 = set_subtensor(sub_m, np.zeros(shp_v)) m1 = set_subtensor(sub_m, np.zeros(shp_v))
m2 = inc_subtensor(sub_m, np.ones(shp_v)) m2 = inc_subtensor(sub_m, np.ones(shp_v))
f = theano.function([m, i], [m1, m2]) f = theano.function([m, i], [m1, m2], mode=self.mode)
m_val = rand(3, 5) m_val = rand(3, 5)
i_val = randint_ranged(min=0, max=4, shape=shp_i) i_val = randint_ranged(min=0, max=4, shape=shp_i)
...@@ -1245,7 +1245,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -1245,7 +1245,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
sub_m = m[:, i] sub_m = m[:, i]
m1 = set_subtensor(sub_m, np.zeros(shp_v)) m1 = set_subtensor(sub_m, np.zeros(shp_v))
m2 = inc_subtensor(sub_m, np.ones(shp_v)) m2 = inc_subtensor(sub_m, np.ones(shp_v))
f = theano.function([m, i], [m1, m2]) f = theano.function([m, i], [m1, m2], mode=self.mode)
m_val = rand(3, 5) m_val = rand(3, 5)
i_val = randint_ranged(min=0, max=4, shape=shp_i) i_val = randint_ranged(min=0, max=4, shape=shp_i)
...@@ -1267,7 +1267,9 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -1267,7 +1267,9 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
def test_take(self): def test_take(self):
a = tensor.matrix() a = tensor.matrix()
f = theano.function([a], a.take(0, axis=-1), allow_input_downcast=True) f = theano.function(
[a], a.take(0, axis=-1),
allow_input_downcast=True, mode=self.mode)
f(np.random.normal(0, 1, (30, 4))) f(np.random.normal(0, 1, (30, 4)))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论