提交 f444bf75 authored 作者: Frederic's avatar Frederic

pep8

上级 74ca96f1
...@@ -15,7 +15,6 @@ from numpy.testing import assert_array_almost_equal ...@@ -15,7 +15,6 @@ from numpy.testing import assert_array_almost_equal
#from numpy.testing import dec #from numpy.testing import dec
#from numpy.testing.noseclasses import KnownFailureTest #from numpy.testing.noseclasses import KnownFailureTest
#from theano.tensor.blas import *
from theano.tensor.blas import (_dot22, _dot22scalar, res_is_a, _as_scalar, from theano.tensor.blas import (_dot22, _dot22scalar, res_is_a, _as_scalar,
_is_real_matrix, _gemm_canonicalize, _is_real_matrix, _gemm_canonicalize,
_factor_canonicalized, Gemm, Gemv, _factor_canonicalized, Gemm, Gemv,
...@@ -184,7 +183,9 @@ class t_gemm(TestCase): ...@@ -184,7 +183,9 @@ class t_gemm(TestCase):
f = theano.function([a, b], updates={s: lr1 * T.dot(a, b) + f = theano.function([a, b], updates={s: lr1 * T.dot(a, b) +
l2_reg * lr2 * s}, l2_reg * lr2 * s},
mode=mode_not_fast_compile).maker.env.toposort() mode=mode_not_fast_compile).maker.env.toposort()
#[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)] #[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01,
# <TensorType(float64, matrix)>, <TensorType(float64, matrix)>,
# 2e-06)]
assert len(f) == 1 assert len(f) == 1
assert f[0].op == gemm_inplace assert f[0].op == gemm_inplace
...@@ -192,14 +193,19 @@ class t_gemm(TestCase): ...@@ -192,14 +193,19 @@ class t_gemm(TestCase):
f = theano.function([a, b], updates={s: lr1 * (T.dot(a, b) - f = theano.function([a, b], updates={s: lr1 * (T.dot(a, b) -
l2_reg * s)}, l2_reg * s)},
mode=mode_not_fast_compile).maker.env.toposort() mode=mode_not_fast_compile).maker.env.toposort()
#[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, -2e-06)] #[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01,
# <TensorType(float64, matrix)>, <TensorType(float64, matrix)>,
# -2e-06)]
assert len(f) == 1 assert len(f) == 1
assert f[0].op == gemm_inplace assert f[0].op == gemm_inplace
#test factored scalar with merge and neg #test factored scalar with merge and neg
f = theano.function([a,b],updates={s:s-lr1*(s*.0002+T.dot(a,b))}, f = theano.function([a, b],
updates={s: s - lr1 * (s * .0002 + T.dot(a, b))},
mode=mode_not_fast_compile).maker.env.toposort() mode=mode_not_fast_compile).maker.env.toposort()
#[Gemm{inplace}(<TensorType(float64, matrix)>, -0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 0.999998)] #[Gemm{inplace}(<TensorType(float64, matrix)>, -0.01,
# <TensorType(float64, matrix)>, <TensorType(float64, matrix)>,
# 0.999998)]
assert len(f) == 1 assert len(f) == 1
assert f[0].op == gemm_inplace assert f[0].op == gemm_inplace
...@@ -291,7 +297,8 @@ class t_gemm(TestCase): ...@@ -291,7 +297,8 @@ class t_gemm(TestCase):
tx.set_value(y_T, borrow=True) tx.set_value(y_T, borrow=True)
f() f()
# test that the transposed version of multiplication gives same answer # test that the transposed version of multiplication gives
# same answer
self.assertTrue(_approx_eq(z_after, tz.get_value(borrow=True).T)) self.assertTrue(_approx_eq(z_after, tz.get_value(borrow=True).T))
t(C, A, B) t(C, A, B)
...@@ -330,12 +337,14 @@ class t_gemm(TestCase): ...@@ -330,12 +337,14 @@ class t_gemm(TestCase):
z_orig = z.copy() z_orig = z.copy()
z_after = numpy.zeros_like(z_orig) z_after = numpy.zeros_like(z_orig)
for i in xrange(3): for i in xrange(3):
z_after[:,:,i] = self._gemm(z[:,:,i], a, x[:,:,i], y[:,:,i], b) z_after[:, :, i] = self._gemm(z[:, :, i], a,
x[:, :, i], y[:, :, i], b)
tz, ta, tx, ty, tb = [shared(p) for p in z, a, x, y, b] tz, ta, tx, ty, tb = [shared(p) for p in z, a, x, y, b]
for i in xrange(3): for i in xrange(3):
f_i = inplace_func([], f_i = inplace_func([],
gemm_inplace(tz[:,:,i], ta, tx[:,:,i], ty[:,:,i], tb), gemm_inplace(tz[:, :, i],
ta, tx[:, :, i], ty[:, :, i], tb),
mode=compile.Mode(optimizer=None, linker=l)) mode=compile.Mode(optimizer=None, linker=l))
for j in xrange(3): for j in xrange(3):
# tz will not _always_ be overwritten, # tz will not _always_ be overwritten,
...@@ -347,30 +356,32 @@ class t_gemm(TestCase): ...@@ -347,30 +356,32 @@ class t_gemm(TestCase):
self.assertTrue( self.assertTrue(
_approx_eq(z_after[:, :, i], _approx_eq(z_after[:, :, i],
tz.get_value(borrow=True)[:,:,i]), tz.get_value(borrow=True)[:, :, i]),
(z_orig[:,:,i], z_after[:,:,i], (z_orig[:, :, i], z_after[:, :, i],
z[:,:,i], z_after[:,:,i] - z[:,:,i])) z[:, :, i], z_after[:, :, i] - z[:, :, i]))
tz_i = gemm_no_inplace(tz[:,:,i], ta, tx[:,:,i], ty[:,:,i], tb) tz_i = gemm_no_inplace(tz[:, :, i], ta, tx[
:, :, i], ty[:, :, i], tb)
g_i = theano.function([], tz_i, g_i = theano.function([], tz_i,
updates={tz:T.set_subtensor(tz[:,:,i], tz_i)}, updates={tz: T.set_subtensor(tz[:, :, i], tz_i)},
mode=compile.Mode(optimizer=None, linker=l)) mode=compile.Mode(optimizer=None, linker=l))
for j in xrange(3): for j in xrange(3):
g_i() g_i()
self.assertTrue( self.assertTrue(
_approx_eq(z_after[:,:,i], _approx_eq(z_after[:, :, i],
tz.get_value(borrow=True)[:,:,i]), tz.get_value(borrow=True)[:, :, i]),
(z_orig[:,:,i], z_after[:,:,i], (z_orig[:, :, i], z_after[:, :, i],
z[:,:,i], z_after[:,:,i] - z[:,:,i])) z[:, :, i], z_after[:, :, i] - z[:, :, i]))
t(C, A, B) t(C, A, B)
t(C.transpose((1,0,2)), A, B) t(C.transpose((1, 0, 2)), A, B)
t(C, A.transpose((1,0,2)), B, dt='float32') t(C, A.transpose((1, 0, 2)), B, dt='float32')
t(C, A, B.transpose((1,0,2))) t(C, A, B.transpose((1, 0, 2)))
t(C.transpose((1,0,2)), A.transpose((1,0,2)), B) t(C.transpose((1, 0, 2)), A.transpose((1, 0, 2)), B)
t(C, A.transpose((1,0,2)), B.transpose((1,0,2)), dt='float32') t(C, A.transpose((1, 0, 2)), B.transpose((1, 0, 2)), dt='float32')
t(C.transpose((1,0,2)), A, B.transpose((1,0,2))) t(C.transpose((1, 0, 2)), A, B.transpose((1, 0, 2)))
t(C.transpose((1,0,2)), A.transpose((1,0,2)), B.transpose((1,0,2)), dt='float32') t(C.transpose((1, 0, 2)), A.transpose((1, 0, 2)), B.transpose((
1, 0, 2)), dt='float32')
def test_res_is_a(): def test_res_is_a():
...@@ -438,8 +449,13 @@ def fail(msg): ...@@ -438,8 +449,13 @@ def fail(msg):
assert False assert False
"""This test suite ensures that Gemm is inserted where it belongs, and that the resulting """This test suite ensures that Gemm is inserted where it belongs, and
functions compute the same things as the originals.""" that the resulting functions compute the same things as the
originals.
"""
def XYZab(): def XYZab():
return T.matrix(), T.matrix(), T.matrix(), T.scalar(), T.scalar() return T.matrix(), T.matrix(), T.matrix(), T.scalar(), T.scalar()
...@@ -496,65 +512,72 @@ def just_gemm(i, o, ishapes=[(4, 3), (3, 5), (4, 5), (), ()], max_graphlen=0): ...@@ -496,65 +512,72 @@ def just_gemm(i, o, ishapes=[(4, 3), (3, 5), (4, 5), (), ()], max_graphlen=0):
def test_gemm_opt0(): def test_gemm_opt0():
"""Many subgraphs whose dots can be eliminated""" """Many subgraphs whose dots can be eliminated"""
X,Y,Z,a,b = XYZab() X, Y, Z, a, b = XYZab()
just_gemm([X,Y,Z,a,b], [T.dot(X,Y) * a + Z * b]) just_gemm([X, Y, Z, a, b], [T.dot(X, Y) * a + Z * b])
just_gemm([X,Y,Z,a,b], [a * T.dot(X,Y) + b * Z]) just_gemm([X, Y, Z, a, b], [a * T.dot(X, Y) + b * Z])
just_gemm([X,Y,Z,a,b], [b * Z + a * T.dot(X,Y)]) just_gemm([X, Y, Z, a, b], [b * Z + a * T.dot(X, Y)])
just_gemm([X,Y,Z,a,b], [T.dot(X,Y) * a - Z * b]) just_gemm([X, Y, Z, a, b], [T.dot(X, Y) * a - Z * b])
just_gemm([X,Y,Z,a,b], [a * T.dot(X,Y) - b * Z]) just_gemm([X, Y, Z, a, b], [a * T.dot(X, Y) - b * Z])
just_gemm([X,Y,Z,a,b], [b * Z - a * T.dot(X,Y)]) just_gemm([X, Y, Z, a, b], [b * Z - a * T.dot(X, Y)])
#with transposes (transposes should be pushed through dot in canonicalize) #with transposes (transposes should be pushed through dot in canonicalize)
just_gemm([X,Y,Z,a,b], [b * Z.T - a * T.dot(Y.T,X.T)]) just_gemm([X, Y, Z, a, b], [b * Z.T - a * T.dot(Y.T, X.T)])
just_gemm([X,Y,Z,a,b], [b * Z.T + a * b * T.dot(X,Y).T]) just_gemm([X, Y, Z, a, b], [b * Z.T + a * b * T.dot(X, Y).T])
just_gemm([X,Y,Z,a,b], [b * Z + a * T.dot(X,Y).T], just_gemm([X, Y, Z, a, b], [b * Z + a * T.dot(X, Y).T],
ishapes=[(5,3), (3,4), (4,5), (), ()]) ishapes=[(5, 3), (3, 4), (4, 5), (), ()])
#with N multiplications instead of just one #with N multiplications instead of just one
just_gemm([X,Y,Z,a,b], [(b * b) * Z * a + (a * a) * T.dot(X,Y) * b]) just_gemm([X, Y, Z, a, b], [(b * b) * Z * a + (a * a) * T.dot(X, Y) * b])
just_gemm([X,Y,Z,a,b], [Z + T.dot(X,Y)]) just_gemm([X, Y, Z, a, b], [Z + T.dot(X, Y)])
just_gemm([X,Y,Z,a,b], [Z*b + T.dot(X,Y)]) just_gemm([X, Y, Z, a, b], [Z * b + T.dot(X, Y)])
just_gemm([X,Y,Z,a,b], [Z + a*b*a*T.dot(X,Y)]) just_gemm([X, Y, Z, a, b], [Z + a * b * a * T.dot(X, Y)])
just_gemm([X,Y,Z,a,b], [(b * b) * Z * a - (a * a) * T.dot(X,Y) * b]) just_gemm([X, Y, Z, a, b], [(b * b) * Z * a - (a * a) * T.dot(X, Y) * b])
just_gemm([X,Y,Z,a,b], [Z - T.dot(X,Y)]) just_gemm([X, Y, Z, a, b], [Z - T.dot(X, Y)])
just_gemm([X,Y,Z,a,b], [Z*b - T.dot(X,Y)]) just_gemm([X, Y, Z, a, b], [Z * b - T.dot(X, Y)])
just_gemm([X,Y,Z,a,b], [Z - a*b*a*T.dot(X,Y)]) just_gemm([X, Y, Z, a, b], [Z - a * b * a * T.dot(X, Y)])
def test_gemm_opt_double_gemm(): def test_gemm_opt_double_gemm():
"""This is the pattern that shows up in the autoencoder""" """This is the pattern that shows up in the autoencoder"""
X,Y,Z,a,b = T.matrix(), T.matrix(), T.matrix(), T.scalar(), T.scalar() X, Y, Z, a, b = T.matrix(), T.matrix(), T.matrix(), T.scalar(), T.scalar()
R, S, c = T.matrix(), T.matrix(), T.scalar() R, S, c = T.matrix(), T.matrix(), T.scalar()
just_gemm([X,Y,Z,a,b, R, S, c], [Z *c + a * T.dot(X,Y) + b * T.dot(R,S).T], just_gemm([X, Y, Z, a, b, R, S, c],
ishapes=[(4,3), (3,5), (4,5), (), (), (5,9), (9,4), ()]) [Z * c + a * T.dot(X, Y) + b * T.dot(R, S).T],
ishapes=[(4, 3), (3, 5), (4, 5), (), (), (5, 9), (9, 4), ()])
ishapes=[(4,3), (3,5), (4,5), (), (), (5,9), (9,4), ()] ishapes = [(4, 3), (3, 5), (4, 5), (), (), (5, 9), (9, 4), ()]
i = [X,Y,Z,a,b, R, S, c] i = [X, Y, Z, a, b, R, S, c]
o = [(a * T.dot(X,Y) o = [(a * T.dot(X, Y)
+ gemm_inplace(Z, b, S.T, R.T, T.constant(1.0).astype(config.floatX)))] + gemm_inplace(Z, b, S.T, R.T, T.constant(1.0).astype(config.floatX)))]
try: try:
f = inplace_func([Param(ii, mutable=True) for ii in i],o, f = inplace_func([Param(ii, mutable=True) for ii in i], o,
mode='FAST_RUN', on_unused_input='ignore') mode='FAST_RUN', on_unused_input='ignore')
for node in f.maker.env.nodes: for node in f.maker.env.nodes:
if node.op == T.dot: raise Failure('dot in graph') if node.op == T.dot:
if node.op == _dot22: raise Failure('_dot22 in graph') raise Failure('dot in graph')
if node.op == _dot22:
raise Failure('_dot22 in graph')
g = inplace_func(i, o, mode=compile.Mode(linker='py', optimizer=None), g = inplace_func(i, o, mode=compile.Mode(linker='py', optimizer=None),
on_unused_input='ignore') on_unused_input='ignore')
#for node in g.maker.env.nodes: #for node in g.maker.env.nodes:
# if node.op == gemm_inplace: raise Failure('gemm_inplace in graph') # if node.op == gemm_inplace: raise Failure('gemm_inplace in graph')
rng = numpy.random.RandomState(unittest_tools.fetch_seed(234)) rng = numpy.random.RandomState(unittest_tools.fetch_seed(234))
r0 = f(*[numpy.asarray(rng.randn(*sh), config.floatX) for sh in ishapes]) r0 = f(*[numpy.asarray(rng.randn(*sh), config.floatX)
for sh in ishapes])
rng = numpy.random.RandomState(unittest_tools.fetch_seed(234)) rng = numpy.random.RandomState(unittest_tools.fetch_seed(234))
r1 = g(*[numpy.asarray(rng.randn(*sh), config.floatX) for sh in ishapes]) r1 = g(*[numpy.asarray(rng.randn(*sh), config.floatX)
for sh in ishapes])
max_abs_err = numpy.max(numpy.abs(r0[0] - r1[0])) max_abs_err = numpy.max(numpy.abs(r0[0] - r1[0]))
eps = 1.0e-8 eps = 1.0e-8
if config.floatX == 'float32': if config.floatX == 'float32':
eps = 1.0e-6 eps = 1.0e-6
if max_abs_err > eps: if max_abs_err > eps:
raise Failure('GEMM is computing the wrong output. max_rel_err =', max_abs_err) raise Failure(
'GEMM is computing the wrong output. max_rel_err =',
max_abs_err)
except Failure: except Failure:
for node in f.maker.env.toposort(): for node in f.maker.env.toposort():
print 'GRAPH', node print 'GRAPH', node
...@@ -562,8 +585,10 @@ def test_gemm_opt_double_gemm(): ...@@ -562,8 +585,10 @@ def test_gemm_opt_double_gemm():
def test_gemm_canonicalize(): def test_gemm_canonicalize():
X,Y,Z,a,b = T.matrix('X'), T.matrix('Y'), T.matrix('Z'), T.scalar('a'), T.scalar('b') X, Y, Z, a, b = T.matrix('X'), T.matrix('Y'), T.matrix('Z'), T.scalar(
R,S,U,c,d = T.matrix('R'), T.matrix('S'), T.matrix('U'), T.scalar('c'), T.scalar('d') 'a'), T.scalar('b')
R, S, U, c, d = T.matrix('R'), T.matrix('S'), T.matrix('U'), T.scalar(
'c'), T.scalar('d')
u = T.row('u') u = T.row('u')
v = T.vector('v') v = T.vector('v')
w = T.col('w') w = T.col('w')
...@@ -592,7 +617,7 @@ def test_gemm_canonicalize(): ...@@ -592,7 +617,7 @@ def test_gemm_canonicalize():
assert can == [(1.0, X), (1.0, Y), (1.0, w)], can assert can == [(1.0, X), (1.0, Y), (1.0, w)], can
can = [] can = []
_gemm_canonicalize(a*X + Y - b*Z*c, 1.0, can, 0) _gemm_canonicalize(a * X + Y - b * Z * c, 1.0, can, 0)
assert can[0] == (a, X) assert can[0] == (a, X)
assert can[1] == (1.0, Y) assert can[1] == (1.0, Y)
assert can[2][0].owner.op == T.mul assert can[2][0].owner.op == T.mul
...@@ -601,7 +626,7 @@ def test_gemm_canonicalize(): ...@@ -601,7 +626,7 @@ def test_gemm_canonicalize():
assert can[2][0].owner.inputs[1] == b assert can[2][0].owner.inputs[1] == b
can = [] can = []
_gemm_canonicalize((-d) * X - (a*X + Y - b*Z*c), 1.0, can, 0) _gemm_canonicalize((-d) * X - (a * X + Y - b * Z * c), 1.0, can, 0)
#print can #print can
assert can[0][0].owner.op == T.neg assert can[0][0].owner.op == T.neg
assert can[0][0].owner.inputs[0] == d assert can[0][0].owner.inputs[0] == d
...@@ -610,14 +635,18 @@ def test_gemm_canonicalize(): ...@@ -610,14 +635,18 @@ def test_gemm_canonicalize():
assert can[1][0].owner.inputs[0] == a assert can[1][0].owner.inputs[0] == a
assert can[2] == (-1.0, Y) assert can[2] == (-1.0, Y)
assert can[3][0].owner.op == T.mul assert can[3][0].owner.op == T.mul
assert can[3][0].owner.inputs == [c,b] assert can[3][0].owner.inputs == [c, b]
def test_gemm_factor(): def test_gemm_factor():
X,Y,Z,a,b = T.matrix('X'), T.matrix('Y'), T.matrix('Z'), T.scalar('a'), T.scalar('b') X, Y, Z, a, b = T.matrix('X'), T.matrix('Y'), T.matrix('Z'), T.scalar(
R,S,U,c,d = T.matrix('R'), T.matrix('S'), T.matrix('U'), T.scalar('c'), T.scalar('d') 'a'), T.scalar('b')
R, S, U, c, d = T.matrix('R'), T.matrix('S'), T.matrix('U'), T.scalar(
'c'), T.scalar('d')
assert [(1.0, X), (1.0, Y)] == _factor_canonicalized([(1.0, X), (1.0, Y)]) assert [(1.0, X), (1.0, Y)] == _factor_canonicalized([(1.0, X), (1.0, Y)])
assert [(2.0, X)] == _factor_canonicalized([(1.0, X),(1.0, X)]) assert [(2.0, X)] == _factor_canonicalized([(1.0, X), (1.0, X)])
def test_upcasting_scalar_nogemm(): def test_upcasting_scalar_nogemm():
# Test that the optimization does not crash when the scale has an incorrect # Test that the optimization does not crash when the scale has an incorrect
...@@ -651,119 +680,137 @@ def test_upcasting_scalar_nogemm(): ...@@ -651,119 +680,137 @@ def test_upcasting_scalar_nogemm():
assert numpy.sum([isinstance(n.op, Gemm) for n in t]) == 0 assert numpy.sum([isinstance(n.op, Gemm) for n in t]) == 0
#theano.printing.debugprint(f, print_type=True) #theano.printing.debugprint(f, print_type=True)
def test_gemm_nested():
X,Y,Z,a,b = T.matrix('X'), T.matrix('Y'), T.matrix('Z'), T.scalar('a'), T.scalar('b')
R,S,U,c,d = T.matrix('R'), T.matrix('S'), T.matrix('U'), T.scalar('c'), T.scalar('d')
just_gemm([X,Y,Z,R,S,U,a,b,c,d], def test_gemm_nested():
[a * Z - b * (c*T.dot(X,Y) + d*Z)], X, Y, Z, a, b = T.matrix('X'), T.matrix('Y'), T.matrix('Z'), T.scalar(
ishapes=[(2,3),(3,4),(2,4),(2,3),(3,4),(2,4),(),(),(),()], 'a'), T.scalar('b')
R, S, U, c, d = T.matrix('R'), T.matrix('S'), T.matrix('U'), T.scalar(
'c'), T.scalar('d')
just_gemm([X, Y, Z, R, S, U, a, b, c, d],
[a * Z - b * (c * T.dot(X, Y) + d * Z)],
ishapes=[(2, 3), (3, 4), (2, 4), (2, 3), (3, 4), (
2, 4), (), (), (), ()],
max_graphlen=1) max_graphlen=1)
#print "---------------------" #print "---------------------"
just_gemm([X,Y,Z,R,S,U,a,b,c,d], just_gemm([X, Y, Z, R, S, U, a, b, c, d],
[a * Z - b * (c*T.dot(X,Y) + d*Z + c*Z)], [a * Z - b * (c * T.dot(X, Y) + d * Z + c * Z)],
ishapes=[(2,3),(3,4),(2,4),(2,3),(3,4),(2,4),(),(),(),()], ishapes=[(2, 3), (3, 4), (2, 4), (2, 3), (3, 4), (
2, 4), (), (), (), ()],
max_graphlen=1) max_graphlen=1)
#print "---------------------" #print "---------------------"
just_gemm([X,Y,Z,R,S,U,a,b,c,d], just_gemm([X, Y, Z, R, S, U, a, b, c, d],
[a * Z - b * (c*T.dot(X,Y) + d*Z + c*U)], [a * Z - b * (c * T.dot(X, Y) + d * Z + c * U)],
ishapes=[(2,3),(3,4),(2,4),(2,3),(3,4),(2,4),(),(),(),()], ishapes=[(2, 3), (3, 4), (2, 4), (2, 3), (3, 4), (
2, 4), (), (), (), ()],
max_graphlen=3) max_graphlen=3)
def test_gemm_opt_wishlist(): def test_gemm_opt_wishlist():
X,Y,Z,a,b = T.matrix(), T.matrix(), T.matrix(), T.scalar(), T.scalar() X, Y, Z, a, b = T.matrix(), T.matrix(), T.matrix(), T.scalar(), T.scalar()
#with >2 additions of the same T.dot(X,Y term #with >2 additions of the same T.dot(X,Y term
just_gemm([X,Y,Z,a,b], [(b * b) * Z * a + (a * a) * T.dot(X,Y) + b * T.dot(X,Y)]) just_gemm([X, Y, Z, a, b], [(b * b) * Z * a + (a * a) * T.dot(X, Y) +
b * T.dot(X, Y)])
just_gemm([X, Y, Z, a, b], [Z + T.dot(X, Y) + T.dot(X, Y)])
just_gemm([X,Y,Z,a,b], [Z + T.dot(X,Y) + T.dot(X,Y)])
def test_gemm_with_vector(): def test_gemm_with_vector():
"""Many subgraphs whose dots can be eliminated. """Many subgraphs whose dots can be eliminated. This adds a
This adds a vector two the previous test, which triggers the long-sought GEMM bug. vector two the previous test, which triggers the long-sought GEMM
bug.
""" """
X,Y,Z,a,b = XYZab() X, Y, Z, a, b = XYZab()
v = T.vector() v = T.vector()
def my_just_gemm(o): def my_just_gemm(o):
i = [X,Y,Z,a,b,v] i = [X, Y, Z, a, b, v]
ishapes = [(4,3), (3,5), (4,5), (), (), (5,)] ishapes = [(4, 3), (3, 5), (4, 5), (), (), (5, )]
rval = just_gemm(i, o, ishapes=ishapes) rval = just_gemm(i, o, ishapes=ishapes)
my_just_gemm([v + T.dot(X,Y) * a + Z * b]) my_just_gemm([v + T.dot(X, Y) * a + Z * b])
my_just_gemm([v + a * T.dot(X,Y) + b * Z]) my_just_gemm([v + a * T.dot(X, Y) + b * Z])
my_just_gemm([v + b * Z + a * T.dot(X,Y)]) my_just_gemm([v + b * Z + a * T.dot(X, Y)])
my_just_gemm([v + T.dot(X,Y) * a - Z * b]) my_just_gemm([v + T.dot(X, Y) * a - Z * b])
my_just_gemm([v + a * T.dot(X,Y) - b * Z]) my_just_gemm([v + a * T.dot(X, Y) - b * Z])
my_just_gemm([v + b * Z - a * T.dot(X,Y)]) my_just_gemm([v + b * Z - a * T.dot(X, Y)])
#with N multiplications instead of just one #with N multiplications instead of just one
my_just_gemm([v + (b * b) * Z * a + (a * a) * T.dot(X,Y) * b]) my_just_gemm([v + (b * b) * Z * a + (a * a) * T.dot(X, Y) * b])
my_just_gemm([v + Z + T.dot(X,Y)]) my_just_gemm([v + Z + T.dot(X, Y)])
my_just_gemm([v + Z*b + T.dot(X,Y)]) my_just_gemm([v + Z * b + T.dot(X, Y)])
my_just_gemm([v + Z + a*b*a*T.dot(X,Y)]) my_just_gemm([v + Z + a * b * a * T.dot(X, Y)])
my_just_gemm([v + (b * b) * Z * a - (a * a) * T.dot(X,Y) * b]) my_just_gemm([v + (b * b) * Z * a - (a * a) * T.dot(X, Y) * b])
my_just_gemm([Z - T.dot(X,Y) + v]) my_just_gemm([Z - T.dot(X, Y) + v])
my_just_gemm([Z*b - T.dot(X,Y) + v]) my_just_gemm([Z * b - T.dot(X, Y) + v])
my_just_gemm([Z - a*b*a*T.dot(X,Y) + v]) my_just_gemm([Z - a * b * a * T.dot(X, Y) + v])
def test_gemm_opt_vector_stuff(): def test_gemm_opt_vector_stuff():
X,Y,Z,a,b = T.matrix(), T.matrix(), T.matrix(), T.scalar(), T.scalar() X, Y, Z, a, b = T.matrix(), T.matrix(), T.matrix(), T.scalar(), T.scalar()
u,v = T.vector(), T.vector() u, v = T.vector(), T.vector()
f = inplace_func([a, u, v], a + T.dot(u,v), mode='FAST_RUN') f = inplace_func([a, u, v], a + T.dot(u, v), mode='FAST_RUN')
if gemm_inplace in [n.op for n in f.maker.env.nodes]: if gemm_inplace in [n.op for n in f.maker.env.nodes]:
raise Failure('gemm_inplace in graph') raise Failure('gemm_inplace in graph')
f = inplace_func([a, u, X,Y], a * u + T.dot(X,Y), mode='FAST_RUN') f = inplace_func([a, u, X, Y], a * u + T.dot(X, Y), mode='FAST_RUN')
if (gemm_inplace in [n.op for n in f.maker.env.nodes]): if (gemm_inplace in [n.op for n in f.maker.env.nodes]):
raise Failure('gemm_inplace in graph') raise Failure('gemm_inplace in graph')
def test_inplace0(): def test_inplace0():
#should fail to insert gemm_inplace because gemm_inplace would create cycles #should fail to insert gemm_inplace because gemm_inplace would
X,Y,Z,a,b = T.matrix('X'), T.matrix('Y'), T.matrix('Z'), T.scalar('a'), T.scalar('b') #create cycles
X, Y, Z, a, b = T.matrix('X'), T.matrix('Y'), T.matrix('Z'), T.scalar(
'a'), T.scalar('b')
R, S, c = T.matrix('R'), T.matrix('S'), T.scalar('c') R, S, c = T.matrix('R'), T.matrix('S'), T.scalar('c')
f = inplace_func([Z, b, R, S], f = inplace_func([Z, b, R, S],
[Z * (Z + b * T.dot(R,S).T)], mode='FAST_RUN') [Z * (Z + b * T.dot(R, S).T)], mode='FAST_RUN')
if (gemm_inplace in [n.op for n in f.maker.env.nodes]): if (gemm_inplace in [n.op for n in f.maker.env.nodes]):
print pp(f.maker.env.outputs[0]) print pp(f.maker.env.outputs[0])
raise Failure('gemm_inplace in graph') raise Failure('gemm_inplace in graph')
assert gemm_no_inplace in [n.op for n in f.maker.env.nodes] assert gemm_no_inplace in [n.op for n in f.maker.env.nodes]
# gemm_inplace should be inserted here, to work in-place on Z*c # gemm_inplace should be inserted here, to work in-place on Z*c
f = inplace_func([X,Y,Z,a,b, R, S, c], f = inplace_func([X, Y, Z, a, b, R, S, c],
[Z * (c*Z + a * T.dot(X,Y) + b * T.dot(R,S).T)], [Z * (c * Z + a * T.dot(X, Y) + b * T.dot(R, S).T)],
mode='FAST_RUN') mode='FAST_RUN')
if (not gemm_inplace in [n.op for n in f.maker.env.nodes]): if (not gemm_inplace in [n.op for n in f.maker.env.nodes]):
theano.printing.debugprint(f) theano.printing.debugprint(f)
raise Failure('no gemm_inplace in graph') raise Failure('no gemm_inplace in graph')
def test_inplace1(): def test_inplace1():
X,Y,Z,a,b = XYZab() X, Y, Z, a, b = XYZab()
# with > 2 terms in the overall addition # with > 2 terms in the overall addition
f = inplace_func([X, Y, Z], f = inplace_func([X, Y, Z],
[Z + Z + T.dot(X,Y)], mode='FAST_RUN') [Z + Z + T.dot(X, Y)], mode='FAST_RUN')
#theano.printing.debugprint(f) #theano.printing.debugprint(f)
# it doesn't work inplace because we didn't mark Z as mutable input # it doesn't work inplace because we didn't mark Z as mutable input
assert [n.op for n in f.maker.env.nodes] == [gemm_no_inplace] assert [n.op for n in f.maker.env.nodes] == [gemm_no_inplace]
def test_dot22(): def test_dot22():
for dtype1 in ['float32', 'float64', 'complex64', 'complex128']: for dtype1 in ['float32', 'float64', 'complex64', 'complex128']:
a = T.matrix(dtype=dtype1) a = T.matrix(dtype=dtype1)
for dtype2 in ['float32', 'float64', 'complex64', 'complex128']: for dtype2 in ['float32', 'float64', 'complex64', 'complex128']:
b = T.matrix(dtype=dtype2) b = T.matrix(dtype=dtype2)
f = theano.function([a,b],T.dot(a,b),mode=mode_blas_opt) f = theano.function([a, b], T.dot(a, b), mode=mode_blas_opt)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
if dtype1 == dtype2: if dtype1 == dtype2:
assert _dot22 in [x.op for x in topo], (dtype1,dtype2) assert _dot22 in [x.op for x in topo], (dtype1, dtype2)
else: else:
assert T.dot in [x.op for x in topo], (dtype1,dtype2) assert T.dot in [x.op for x in topo], (dtype1, dtype2)
rng = numpy.random.RandomState(unittest_tools.fetch_seed()) rng = numpy.random.RandomState(unittest_tools.fetch_seed())
def cmp(a_shp, b_shp): def cmp(a_shp, b_shp):
av=rng.uniform(size=a_shp).astype(dtype1) av = rng.uniform(size=a_shp).astype(dtype1)
bv=rng.uniform(size=b_shp).astype(dtype2) bv = rng.uniform(size=b_shp).astype(dtype2)
f(av,bv) f(av, bv)
cmp((3, 4), (4, 5)) cmp((3, 4), (4, 5))
cmp((0, 4), (4, 5)) cmp((0, 4), (4, 5))
...@@ -772,11 +819,13 @@ def test_dot22(): ...@@ -772,11 +819,13 @@ def test_dot22():
cmp((0, 4), (4, 0)) cmp((0, 4), (4, 0))
cmp((0, 0), (0, 0)) cmp((0, 0), (0, 0))
def test_dot22scalar(): def test_dot22scalar():
## including does not seem to work for 'local_dot_to_dot22' and ## including does not seem to work for 'local_dot_to_dot22' and
## 'local_dot22_to_dot22scalar' ## 'local_dot22_to_dot22scalar'
## TODO: exclude other optimizations in BlasOpt? ## TODO: exclude other optimizations in BlasOpt?
#m = theano.compile.get_default_mode().including('local_dot_to_dot22','local_dot22_to_dot22scalar','specialize') #m = theano.compile.get_default_mode().including('local_dot_to_dot22',
# 'local_dot22_to_dot22scalar','specialize')
#m = theano.compile.get_default_mode().including('BlasOpt', 'specialize') #m = theano.compile.get_default_mode().including('BlasOpt', 'specialize')
rng = numpy.random.RandomState(unittest_tools.fetch_seed()) rng = numpy.random.RandomState(unittest_tools.fetch_seed())
for dtype1 in ['complex64', 'complex128']: for dtype1 in ['complex64', 'complex128']:
...@@ -792,88 +841,111 @@ def test_dot22scalar(): ...@@ -792,88 +841,111 @@ def test_dot22scalar():
def check_dot22scalar(func, len_topo_scalar=-1): def check_dot22scalar(func, len_topo_scalar=-1):
topo = func.maker.env.toposort() topo = func.maker.env.toposort()
ops = [x.op for x in topo] ops = [x.op for x in topo]
dtype4_upcast = theano.scalar.upcast(dtype4, dtype1, dtype2) dtype4_upcast = theano.scalar.upcast(dtype4, dtype1,
dtype2)
if dtype1 == dtype2 == dtype3 == dtype4_upcast: if dtype1 == dtype2 == dtype3 == dtype4_upcast:
if len_topo_scalar>0: if len_topo_scalar > 0:
assert len(topo) == len_topo_scalar assert len(topo) == len_topo_scalar
assert _dot22scalar in ops, (dtype1, dtype2, dtype3, dtype4) assert _dot22scalar in ops, (dtype1, dtype2,
dtype3, dtype4)
elif dtype1 == dtype2 == dtype4_upcast: elif dtype1 == dtype2 == dtype4_upcast:
if not (len_topo_scalar > 0): if not (len_topo_scalar > 0):
assert len(topo) == len_topo_scalar assert len(topo) == len_topo_scalar
assert _dot22scalar in ops, (dtype1, dtype2, dtype3, dtype4) assert _dot22scalar in ops, (dtype1, dtype2,
dtype3, dtype4)
else: else:
# Currently there is a problem of optimization order # Currently there is a problem of
# The constant get upcasted to float64 before we try to merge it # optimization order The constant get
# with the dot22 of float32. So this prevent the merge. # upcasted to float64 before we try to
assert _dot22scalar in ops or _dot22 in ops, (dtype1, dtype2, dtype3, dtype4) # merge it with the dot22 of
# float32. So this prevent the merge.
assert _dot22scalar in ops or _dot22 in ops, (
dtype1, dtype2, dtype3, dtype4)
elif dtype1 == dtype2: elif dtype1 == dtype2:
assert _dot22 in ops, (dtype1, dtype2, dtype3, dtype4) assert _dot22 in ops, (dtype1, dtype2,
dtype3, dtype4)
else: else:
assert T.dot in ops, (dtype1, dtype2, dtype3, dtype4) assert T.dot in ops, (dtype1, dtype2,
dtype3, dtype4)
def cmp(a_shp, b_shp, c_shp, sqr_shp=(5, 5)):
def cmp(a_shp, b_shp, c_shp, sqr_shp=(5,5)): av = rng.uniform(size=a_shp).astype(dtype1)
av=rng.uniform(size=a_shp).astype(dtype1) bv = rng.uniform(size=b_shp).astype(dtype2)
bv=rng.uniform(size=b_shp).astype(dtype2) cv = rng.uniform(size=c_shp).astype(dtype3)
cv=rng.uniform(size=c_shp).astype(dtype3) sv = rng.uniform(size=sqr_shp).astype(dtype1)
sv=rng.uniform(size=sqr_shp).astype(dtype1)
if False: if False:
f = theano.function([a,b],cst*T.dot(a,b),mode=mode_blas_opt) f = theano.function([a, b], cst * T.dot(a, b),
mode=mode_blas_opt)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
check_dot22scalar(f, 1) check_dot22scalar(f, 1)
f(av,bv) f(av, bv)
if True: if True:
f = theano.function([a,b,c],cst*c*T.dot(a,b),mode=mode_blas_opt) f = theano.function([a, b, c],
cst * c * T.dot(a, b),
mode=mode_blas_opt)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
check_dot22scalar(f, 2) check_dot22scalar(f, 2)
f(av,bv,cv) f(av, bv, cv)
f = theano.function([a,b,c],c * cst*T.dot(a,b),mode=mode_blas_opt) f = theano.function([a, b, c],
c * cst * T.dot(a, b),
mode=mode_blas_opt)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
check_dot22scalar(f, 2) check_dot22scalar(f, 2)
f(av,bv,cv) f(av, bv, cv)
## Here, canonicalize also seems needed ## Here, canonicalize also seems needed
## TODO: add only the optimizations needed? ## TODO: add only the optimizations needed?
m2 = mode_blas_opt.including('canonicalize') m2 = mode_blas_opt.including('canonicalize')
f = theano.function([a,b,c],cst2 *c * cst*T.dot(a,b),mode=m2) f = theano.function([a, b, c],
cst2 * c * cst * T.dot(a, b),
mode=m2)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
check_dot22scalar(f, 2) check_dot22scalar(f, 2)
f(av,bv,cv) f(av, bv, cv)
if dtype1 == dtype2 == dtype3: if dtype1 == dtype2 == dtype3:
f = theano.function([a,b,c],c * cst*a*T.dot(a,b),mode=m2) f = theano.function([a, b, c],
c * cst * a * T.dot(a, b),
mode=m2)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
check_dot22scalar(f, 2) check_dot22scalar(f, 2)
f(sv,sv,sv) f(sv, sv, sv)
f = theano.function([a,b,c],cst*c *a*T.dot(a,b),mode=mode_blas_opt) f = theano.function([a, b, c],
cst * c * a * T.dot(a, b),
mode=mode_blas_opt)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
#currently the canonizer don't always merge all Mul together... #currently the canonizer don't always
# dot22scalar optimizer does not do a recursive search # merge all Mul together... dot22scalar
# therefore, it doesn't find potential matches of the scalar. # optimizer does not do a recursive search
# TODO: combine with the 'canonicalization' that is part of the Gemm optimizer. # therefore, it doesn't find potential
# matches of the scalar. TODO: combine
# with the 'canonicalization' that is part
# of the Gemm optimizer.
# #
# assert _dot22scalar in [x.op for x in topo] # assert _dot22scalar in [x.op for x in topo]
# assert len(topo)==2 # assert len(topo)==2
f(sv,sv,sv) f(sv, sv, sv)
f = theano.function([a,b,c],c * a*cst*T.dot(a,b),mode=m2) f = theano.function([a, b, c],
c * a * cst * T.dot(a, b),
mode=m2)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
check_dot22scalar(f, 2) check_dot22scalar(f, 2)
f(sv,sv,sv) f(sv, sv, sv)
cmp((3,4),(4,5),(3,5)) cmp((3, 4), (4, 5), (3, 5))
cmp((0,4),(4,5),(0,5)) cmp((0, 4), (4, 5), (0, 5))
cmp((3,0),(0,5),(3,5)) cmp((3, 0), (0, 5), (3, 5))
cmp((3,4),(4,0),(3,0),(0,0)) cmp((3, 4), (4, 0), (3, 0), (0, 0))
cmp((0,4),(4,0),(0,0)) cmp((0, 4), (4, 0), (0, 0))
cmp((0,0),(0,0),(0,0)) cmp((0, 0), (0, 0), (0, 0))
def test_dot22scalar_cast(): def test_dot22scalar_cast():
...@@ -897,19 +969,20 @@ def test_dot22scalar_cast(): ...@@ -897,19 +969,20 @@ def test_dot22scalar_cast():
def test_dot_w_self(): def test_dot_w_self():
# This can trigger problems in the optimization because what would normally be a gemm must # This can trigger problems in the optimization because what would
# not be because the output is aliased to one of the inputs. # normally be a gemm must not be because the output is aliased to
# one of the inputs.
A = shared(value=numpy.ones((2,2))) A = shared(value=numpy.ones((2, 2)))
B = T.matrix() B = T.matrix()
p = T.dot(A,A)*B p = T.dot(A, A) * B
grad = T.grad(T.mean(p), A) grad = T.grad(T.mean(p), A)
f = theano.function([B], p, updates={A : A - grad}) f = theano.function([B], p, updates={A: A - grad})
# tests correctness in debugmode # tests correctness in debugmode
f(numpy.asarray([[0,1], [2,3]], dtype=config.floatX)) f(numpy.asarray([[0, 1], [2, 3]], dtype=config.floatX))
############################################################################### ###############################################################################
...@@ -935,8 +1008,9 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -935,8 +1008,9 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin):
''' Test vector dot matrix ''' ''' Test vector dot matrix '''
rng = numpy.random.RandomState(unittest_tools.fetch_seed()) rng = numpy.random.RandomState(unittest_tools.fetch_seed())
v = theano.shared(numpy.array(rng.uniform(size=(2,)), dtype='float32')) v = theano.shared(numpy.array(rng.uniform(size=(2,)), dtype='float32'))
m = theano.shared(numpy.array(rng.uniform(size=(2,3)), dtype='float32')) m = theano.shared(numpy.array(rng.uniform(size=(2, 3)),
f = theano.function([], theano.dot(v,m), mode=mode_blas_opt) dtype='float32'))
f = theano.function([], theano.dot(v, m), mode=mode_blas_opt)
# Assert that the dot was optimized somehow # Assert that the dot was optimized somehow
self.assertFunctionContains0(f, T.dot) self.assertFunctionContains0(f, T.dot)
...@@ -950,14 +1024,13 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -950,14 +1024,13 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin):
borrow=True) borrow=True)
assert numpy.allclose(f(), numpy.dot(v.get_value(), m.get_value())) assert numpy.allclose(f(), numpy.dot(v.get_value(), m.get_value()))
def test_dot_mv(self): def test_dot_mv(self):
''' Test matrix dot vector ''' ''' Test matrix dot vector '''
rng = numpy.random.RandomState(unittest_tools.fetch_seed()) rng = numpy.random.RandomState(unittest_tools.fetch_seed())
v = theano.shared(numpy.array(rng.uniform(size=(2,)), dtype='float32')) v = theano.shared(numpy.array(rng.uniform(size=(2,)), dtype='float32'))
m = theano.shared(numpy.array(rng.uniform(size=(3,2)), m = theano.shared(numpy.array(rng.uniform(size=(3, 2)),
dtype='float32')) dtype='float32'))
f = theano.function([], theano.dot(m,v), mode=mode_blas_opt) f = theano.function([], theano.dot(m, v), mode=mode_blas_opt)
# Assert that the dot was optimized somehow # Assert that the dot was optimized somehow
self.assertFunctionContains0(f, T.dot) self.assertFunctionContains0(f, T.dot)
...@@ -975,34 +1048,36 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -975,34 +1048,36 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin):
def t_gemv1(m_shp): def t_gemv1(m_shp):
''' test vector2+dot(matrix,vector1) ''' ''' test vector2+dot(matrix,vector1) '''
rng = numpy.random.RandomState(unittest_tools.fetch_seed()) rng = numpy.random.RandomState(unittest_tools.fetch_seed())
v1 = theano.shared(numpy.array(rng.uniform(size=(m_shp[1],)), dtype='float32')) v1 = theano.shared(numpy.array(rng.uniform(size=(m_shp[1],)
), dtype='float32'))
v2_orig = numpy.array(rng.uniform(size=(m_shp[0],)), dtype='float32') v2_orig = numpy.array(rng.uniform(size=(m_shp[0],)), dtype='float32')
v2 = theano.shared(v2_orig) v2 = theano.shared(v2_orig)
m = theano.shared(numpy.array(rng.uniform(size=m_shp), dtype='float32')) m = theano.shared(numpy.array(rng.uniform(size=m_shp),
dtype='float32'))
f = theano.function([], v2+theano.dot(m,v1), mode = mode_blas_opt) f = theano.function([], v2 + theano.dot(m, v1), mode=mode_blas_opt)
# Assert they produce the same output # Assert they produce the same output
assert numpy.allclose(f(), assert numpy.allclose(f(),
numpy.dot(m.get_value(), v1.get_value()) + v2_orig) numpy.dot(m.get_value(), v1.get_value()) + v2_orig)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
assert len(topo)==1 assert len(topo) == 1
assert isinstance(topo[0].op, Gemv) assert isinstance(topo[0].op, Gemv)
assert topo[0].op.inplace==False assert topo[0].op.inplace == False
#test the inplace version #test the inplace version
g = theano.function([], [], updates={v2:v2+theano.dot(m,v1)} g = theano.function([], [], updates={v2: v2 + theano.dot(m, v1)},
, mode = mode_blas_opt) mode=mode_blas_opt)
# Assert they produce the same output # Assert they produce the same output
g() g()
assert numpy.allclose(v2.get_value(), assert numpy.allclose(v2.get_value(),
numpy.dot(m.get_value(), v1.get_value()) + v2_orig) numpy.dot(m.get_value(), v1.get_value()) + v2_orig)
topo = g.maker.env.toposort() topo = g.maker.env.toposort()
assert len(topo)==1 assert len(topo) == 1
assert isinstance(topo[0].op, Gemv) assert isinstance(topo[0].op, Gemv)
if config.mode != 'FAST_COMPILE': if config.mode != 'FAST_COMPILE':
assert topo[0].op.inplace==True assert topo[0].op.inplace == True
# Do the same tests with a matrix with strides in both dimensions # Do the same tests with a matrix with strides in both dimensions
m.set_value( m.set_value(
...@@ -1016,40 +1091,42 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -1016,40 +1091,42 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin):
numpy.dot(m.get_value(), v1.get_value()) + v2_orig) numpy.dot(m.get_value(), v1.get_value()) + v2_orig)
def test_gemv1(self): def test_gemv1(self):
self.t_gemv1((3,2)) self.t_gemv1((3, 2))
self.t_gemv1((0,2)) self.t_gemv1((0, 2))
self.t_gemv1((3,0)) self.t_gemv1((3, 0))
self.t_gemv1((0,0)) self.t_gemv1((0, 0))
def test_gemv2(self): def test_gemv2(self):
''' test vector2+dot(vector1,matrix) ''' ''' test vector2+dot(vector1,matrix) '''
rng = numpy.random.RandomState(unittest_tools.fetch_seed()) rng = numpy.random.RandomState(unittest_tools.fetch_seed())
v1 = theano.shared(numpy.array(rng.uniform(size=(2,)), dtype='float32')) v1 = theano.shared(numpy.array(rng.uniform(size=(2,)),
dtype='float32'))
v2_orig = numpy.array(rng.uniform(size=(3,)), dtype='float32') v2_orig = numpy.array(rng.uniform(size=(3,)), dtype='float32')
v2 = theano.shared(v2_orig ) v2 = theano.shared(v2_orig)
m = theano.shared(numpy.array(rng.uniform(size=(2,3)), dtype='float32')) m = theano.shared(numpy.array(rng.uniform(size=(2, 3)),
dtype='float32'))
f = theano.function([], v2+theano.dot(v1,m), mode = mode_blas_opt) f = theano.function([], v2 + theano.dot(v1, m), mode=mode_blas_opt)
# Assert they produce the same output # Assert they produce the same output
assert numpy.allclose(f(), assert numpy.allclose(f(),
numpy.dot(v1.get_value(), m.get_value()) + v2.get_value()) numpy.dot(v1.get_value(), m.get_value()) + v2.get_value())
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
assert sum(isinstance(node.op, Gemv) for node in topo)==1 assert sum(isinstance(node.op, Gemv) for node in topo) == 1
assert topo[-1].op.inplace==False assert topo[-1].op.inplace == False
#test the inplace version #test the inplace version
g = theano.function([], [], updates={v2:v2+theano.dot(v1,m)} g = theano.function([], [], updates={v2: v2 + theano.dot(v1, m)},
, mode = mode_blas_opt) mode=mode_blas_opt)
# Assert they produce the same output # Assert they produce the same output
g() g()
assert numpy.allclose(v2.get_value(), assert numpy.allclose(v2.get_value(),
numpy.dot(v1.get_value(), m.get_value()) + v2_orig) numpy.dot(v1.get_value(), m.get_value()) + v2_orig)
topo = g.maker.env.toposort() topo = g.maker.env.toposort()
assert sum(isinstance(node.op, Gemv) for node in topo)==1 assert sum(isinstance(node.op, Gemv) for node in topo) == 1
if config.mode != 'FAST_COMPILE': if config.mode != 'FAST_COMPILE':
assert topo[-1].op.inplace==True assert topo[-1].op.inplace == True
# Do the same tests with a matrix with strides in both dimensions # Do the same tests with a matrix with strides in both dimensions
m.set_value( m.set_value(
...@@ -1074,7 +1151,7 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -1074,7 +1151,7 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin):
f = theano.function([A, x, y], z) f = theano.function([A, x, y], z)
# Matrix value # Matrix value
A_val = numpy.ones((5,3), dtype=config.floatX) A_val = numpy.ones((5, 3), dtype=config.floatX)
# Different vector length # Different vector length
ones_3 = numpy.ones(3, dtype=config.floatX) ones_3 = numpy.ones(3, dtype=config.floatX)
ones_4 = numpy.ones(4, dtype=config.floatX) ones_4 = numpy.ones(4, dtype=config.floatX)
...@@ -1098,7 +1175,7 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -1098,7 +1175,7 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin):
def matrixmultiply(a, b): def matrixmultiply(a, b):
if len(b.shape) == 1: if len(b.shape) == 1:
b_is_vector = True b_is_vector = True
b = b[:,newaxis] b = b[:, newaxis]
else: else:
b_is_vector = False b_is_vector = False
assert a.shape[1] == b.shape[0] assert a.shape[1] == b.shape[0]
...@@ -1107,8 +1184,8 @@ def matrixmultiply(a, b): ...@@ -1107,8 +1184,8 @@ def matrixmultiply(a, b):
for j in xrange(b.shape[1]): for j in xrange(b.shape[1]):
s = 0 s = 0
for k in xrange(a.shape[1]): for k in xrange(a.shape[1]):
s += a[i,k] * b[k, j] s += a[i, k] * b[k, j]
c[i,j] = s c[i, j] = s
if b_is_vector: if b_is_vector:
c = c.reshape((a.shape[0],)) c = c.reshape((a.shape[0],))
return c return c
...@@ -1118,23 +1195,25 @@ class BaseGemv(object): ...@@ -1118,23 +1195,25 @@ class BaseGemv(object):
mode = mode_blas_opt # can be overridden with self.mode mode = mode_blas_opt # can be overridden with self.mode
shared = staticmethod(theano.shared) shared = staticmethod(theano.shared)
def get_data(self,x_stride=1,y_stride=1): def get_data(self, x_stride=1, y_stride=1):
rng = numpy.random.RandomState(unittest_tools.fetch_seed()) rng = numpy.random.RandomState(unittest_tools.fetch_seed())
mult = array(1, dtype=self.dtype) mult = array(1, dtype=self.dtype)
if self.dtype in [complex64,complex128]: if self.dtype in [complex64, complex128]:
mult = array(1 + 1j, dtype=self.dtype) mult = array(1 + 1j, dtype=self.dtype)
alpha = array(1., dtype=self.dtype) * mult alpha = array(1., dtype=self.dtype) * mult
beta = array(1., dtype=self.dtype) * mult beta = array(1., dtype=self.dtype) * mult
a = rng.randn(3,3).astype(self.dtype) * mult a = rng.randn(3, 3).astype(self.dtype) * mult
x = arange(shape(a)[0]*x_stride,dtype=self.dtype) * mult x = arange(shape(a)[0] * x_stride, dtype=self.dtype) * mult
y = arange(shape(a)[1]*y_stride,dtype=self.dtype) * mult y = arange(shape(a)[1] * y_stride, dtype=self.dtype) * mult
return alpha,beta,a,x,y return alpha, beta, a, x, y
def test_simple(self): def test_simple(self):
alpha, beta, a, x, y = [ self.shared(value) for value in self.get_data() ] alpha, beta, a, x, y = [self.shared(value)
desired_oy = alpha.get_value() * matrixmultiply(a.get_value(),x.get_value()) + beta.get_value() * y.get_value() for value in self.get_data()]
desired_oy = alpha.get_value() * matrixmultiply(a.
get_value(), x.get_value()) + beta.get_value() * y.get_value()
oy = alpha * T.dot(a,x) + beta * y oy = alpha * T.dot(a, x) + beta * y
oy_func = theano.function([], oy, mode=self.mode) oy_func = theano.function([], oy, mode=self.mode)
...@@ -1154,7 +1233,7 @@ class BaseGemv(object): ...@@ -1154,7 +1233,7 @@ class BaseGemv(object):
desired_oy = matrixmultiply(a_v, x_v) desired_oy = matrixmultiply(a_v, x_v)
oy = T.dot(a,x) oy = T.dot(a, x)
oy_func = theano.function([], oy, mode=self.mode) oy_func = theano.function([], oy, mode=self.mode)
...@@ -1163,15 +1242,15 @@ class BaseGemv(object): ...@@ -1163,15 +1242,15 @@ class BaseGemv(object):
oy_v = oy_func() oy_v = oy_func()
assert_array_almost_equal(desired_oy, oy_v) assert_array_almost_equal(desired_oy, oy_v)
def test_simple_transpose(self): def test_simple_transpose(self):
vs = self.get_data() vs = self.get_data()
alpha_v, beta_v, a_v, x_v, y_v = vs alpha_v, beta_v, a_v, x_v, y_v = vs
alpha, beta, a, x, y = [ self.shared(v) for v in vs ] alpha, beta, a, x, y = [self.shared(v) for v in vs]
desired_oy = alpha_v * matrixmultiply(transpose(a_v),x_v)+beta_v*y_v desired_oy = alpha_v * matrixmultiply(transpose(a_v),
x_v) + beta_v * y_v
oy = alpha * T.dot(a.T,x)+beta*y oy = alpha * T.dot(a.T, x) + beta * y
oy_func = theano.function([], oy, mode=self.mode) oy_func = theano.function([], oy, mode=self.mode)
...@@ -1181,13 +1260,13 @@ class BaseGemv(object): ...@@ -1181,13 +1260,13 @@ class BaseGemv(object):
assert_array_almost_equal(desired_oy, oy_v) assert_array_almost_equal(desired_oy, oy_v)
def test_x_stride(self): def test_x_stride(self):
vs = self.get_data(x_stride = 2) vs = self.get_data(x_stride=2)
alpha_v, beta_v, a_v, x_v, y_v = vs alpha_v, beta_v, a_v, x_v, y_v = vs
alpha, beta, a, x, y = [ self.shared(v) for v in vs ] alpha, beta, a, x, y = [self.shared(v) for v in vs]
desired_oy = alpha_v * matrixmultiply(a_v,x_v[::2])+beta_v*y_v desired_oy = alpha_v * matrixmultiply(a_v, x_v[::2]) + beta_v * y_v
oy = alpha * T.dot(a,x[::2])+beta*y oy = alpha * T.dot(a, x[::2]) + beta * y
oy_func = theano.function([], oy, mode=self.mode) oy_func = theano.function([], oy, mode=self.mode)
...@@ -1197,13 +1276,14 @@ class BaseGemv(object): ...@@ -1197,13 +1276,14 @@ class BaseGemv(object):
assert_array_almost_equal(desired_oy, oy_v) assert_array_almost_equal(desired_oy, oy_v)
def test_x_stride_transpose(self): def test_x_stride_transpose(self):
vs = self.get_data(x_stride = 2) vs = self.get_data(x_stride=2)
alpha_v, beta_v, a_v, x_v, y_v = vs alpha_v, beta_v, a_v, x_v, y_v = vs
alpha, beta, a, x, y = [ self.shared(v) for v in vs ] alpha, beta, a, x, y = [self.shared(v) for v in vs]
desired_oy = alpha_v * matrixmultiply(transpose(a_v),x_v[::2])+beta_v*y_v desired_oy = alpha_v * matrixmultiply(transpose(a_v), x_v[::
2]) + beta_v * y_v
oy = alpha * T.dot(a.T,x[::2])+beta*y oy = alpha * T.dot(a.T, x[::2]) + beta * y
oy_func = theano.function([], oy, mode=self.mode) oy_func = theano.function([], oy, mode=self.mode)
...@@ -1213,13 +1293,13 @@ class BaseGemv(object): ...@@ -1213,13 +1293,13 @@ class BaseGemv(object):
assert_array_almost_equal(desired_oy, oy_v) assert_array_almost_equal(desired_oy, oy_v)
def test_y_stride(self): def test_y_stride(self):
vs = self.get_data(y_stride = 2) vs = self.get_data(y_stride=2)
alpha_v, beta_v, a_v, x_v, y_v = vs alpha_v, beta_v, a_v, x_v, y_v = vs
alpha, beta, a, x, y = [ self.shared(v) for v in vs ] alpha, beta, a, x, y = [self.shared(v) for v in vs]
desired_oy = alpha_v * matrixmultiply(a_v,x_v)+beta_v*y_v[::2] desired_oy = alpha_v * matrixmultiply(a_v, x_v) + beta_v * y_v[::2]
oy = alpha * T.dot(a,x)+beta*y[::2] oy = alpha * T.dot(a, x) + beta * y[::2]
oy_func = theano.function([], oy, mode=self.mode) oy_func = theano.function([], oy, mode=self.mode)
...@@ -1229,13 +1309,14 @@ class BaseGemv(object): ...@@ -1229,13 +1309,14 @@ class BaseGemv(object):
assert_array_almost_equal(desired_oy, oy_v) assert_array_almost_equal(desired_oy, oy_v)
def test_y_stride_transpose(self): def test_y_stride_transpose(self):
vs = self.get_data(y_stride = 2) vs = self.get_data(y_stride=2)
alpha_v, beta_v, a_v, x_v, y_v = vs alpha_v, beta_v, a_v, x_v, y_v = vs
alpha, beta, a, x, y = [ self.shared(v) for v in vs ] alpha, beta, a, x, y = [self.shared(v) for v in vs]
desired_oy = alpha_v * matrixmultiply(transpose(a_v),x_v)+beta_v*y_v[::2] desired_oy = alpha_v * matrixmultiply(transpose(a_v),
x_v) + beta_v * y_v[::2]
oy = alpha * T.dot(a.T,x)+beta*y[::2] oy = alpha * T.dot(a.T, x) + beta * y[::2]
oy_func = theano.function([], oy, mode=self.mode) oy_func = theano.function([], oy, mode=self.mode)
...@@ -1247,15 +1328,16 @@ class BaseGemv(object): ...@@ -1247,15 +1328,16 @@ class BaseGemv(object):
def test_a_strides(self): def test_a_strides(self):
vs = self.get_data() vs = self.get_data()
alpha_v, beta_v, a_v, x_v, y_v = vs alpha_v, beta_v, a_v, x_v, y_v = vs
alpha, beta, a, x, y = [ self.shared(v) for v in vs ] alpha, beta, a, x, y = [self.shared(v) for v in vs]
a_v = a_v[::-1, ::-1] a_v = a_v[::-1, ::-1]
a.set_value( a.set_value(
a.get_value(borrow=True, return_internal_type=True)[::-1, ::-1], a.get_value(borrow=True,
return_internal_type=True)[::-1, ::-1],
borrow=True) borrow=True)
desired_oy = alpha_v * matrixmultiply(a_v,x_v)+beta_v*y_v desired_oy = alpha_v * matrixmultiply(a_v, x_v) + beta_v * y_v
oy = alpha * T.dot(a,x)+beta*y oy = alpha * T.dot(a, x) + beta * y
oy_func = theano.function([], oy, mode=self.mode) oy_func = theano.function([], oy, mode=self.mode)
...@@ -1267,15 +1349,17 @@ class BaseGemv(object): ...@@ -1267,15 +1349,17 @@ class BaseGemv(object):
def test_a_strides_transpose(self): def test_a_strides_transpose(self):
vs = self.get_data() vs = self.get_data()
alpha_v, beta_v, a_v, x_v, y_v = vs alpha_v, beta_v, a_v, x_v, y_v = vs
alpha, beta, a, x, y = [ self.shared(v) for v in vs ] alpha, beta, a, x, y = [self.shared(v) for v in vs]
a_v = a_v[::-1, ::-1] a_v = a_v[::-1, ::-1]
a.set_value( a.set_value(
a.get_value(borrow=True, return_internal_type=True)[::-1, ::-1], a.get_value(borrow=True,
return_internal_type=True)[::-1, ::-1],
borrow=True) borrow=True)
desired_oy = alpha_v * matrixmultiply(transpose(a_v),x_v)+beta_v*y_v desired_oy = alpha_v * matrixmultiply(transpose(a_v),
x_v) + beta_v * y_v
oy = alpha * T.dot(a.T,x)+beta*y oy = alpha * T.dot(a.T, x) + beta * y
oy_func = theano.function([], oy, mode=self.mode) oy_func = theano.function([], oy, mode=self.mode)
...@@ -1340,6 +1424,7 @@ class TestDgemv(TestCase, BaseGemv, unittest_tools.TestOptimizationMixin): ...@@ -1340,6 +1424,7 @@ class TestDgemv(TestCase, BaseGemv, unittest_tools.TestOptimizationMixin):
## Tests for Ger ## Tests for Ger
############################################################################### ###############################################################################
class TestGer_make_node(TestCase): class TestGer_make_node(TestCase):
def setUp(self): def setUp(self):
self.iv = T.tensor(dtype='int32', broadcastable=(False,)) self.iv = T.tensor(dtype='int32', broadcastable=(False,))
...@@ -1447,19 +1532,21 @@ class TestGer(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -1447,19 +1532,21 @@ class TestGer(TestCase, unittest_tools.TestOptimizationMixin):
""" test local_gemm_to_ger opt""" """ test local_gemm_to_ger opt"""
assert T.blas.local_gemm_to_ger.transform( assert T.blas.local_gemm_to_ger.transform(
gemm_no_inplace( gemm_no_inplace(
self.A, self.a, self.x.dimshuffle(0,'x'), self.A, self.a, self.x.dimshuffle(0, 'x'),
self.y.dimshuffle('x', 0), self.b(0)).owner) self.y.dimshuffle('x', 0), self.b(0)).owner)
def test_b_1_triggers_ger(self): def test_b_1_triggers_ger(self):
""" test local_gemm_to_ger opt""" """ test local_gemm_to_ger opt"""
assert T.blas.local_gemm_to_ger.transform( assert T.blas.local_gemm_to_ger.transform(
gemm_no_inplace( gemm_no_inplace(
self.A, self.a, self.x.dimshuffle(0,'x'), self.A, self.a, self.x.dimshuffle(0, 'x'),
self.y.dimshuffle('x', 0), self.b(1)).owner) self.y.dimshuffle('x', 0), self.b(1)).owner)
def test_b_other_does_not_triggers_ger(self): def test_b_other_does_not_triggers_ger(self):
""" test local_gemm_to_ger opt""" """ test local_gemm_to_ger opt"""
assert not T.blas.local_gemm_to_ger.transform( assert not T.blas.local_gemm_to_ger.transform(
gemm_no_inplace( gemm_no_inplace(
self.A, self.a, self.x.dimshuffle(0,'x'), self.A, self.a, self.x.dimshuffle(0, 'x'),
self.y.dimshuffle('x', 0), self.b(1.5)).owner) self.y.dimshuffle('x', 0), self.b(1.5)).owner)
def test_outer(self): def test_outer(self):
...@@ -1563,6 +1650,7 @@ class TestGer(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -1563,6 +1650,7 @@ class TestGer(TestCase, unittest_tools.TestOptimizationMixin):
f(numpy.random.rand(4).astype(self.dtype), f(numpy.random.rand(4).astype(self.dtype),
numpy.random.rand(5).astype(self.dtype)) numpy.random.rand(5).astype(self.dtype))
class TestBlasStrides(TestCase): class TestBlasStrides(TestCase):
dtype = 'float64' dtype = 'float64'
shared = staticmethod(tensor._shared) shared = staticmethod(tensor._shared)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论