提交 6bc14189 authored 作者: nouiz's avatar nouiz

Merge pull request #147 from jaberg/transpose_dot_opt

Transpose dot opt
......@@ -298,16 +298,24 @@ class Env(utils.object2):
if node == 'output':
r = self.outputs[i]
if not r.type == new_r.type:
raise TypeError("The type of the replacement must be the same as the type of the original Variable.", r, new_r)
raise TypeError("The type of the replacement must be the"
" same as the type of the original Variable.",
r, new_r)
self.outputs[i] = new_r
else:
if node.env is not self:
raise Exception("Cannot operate on %s because it does not belong to this Env" % node)
raise Exception("Cannot operate on %s because it does not"
" belong to this Env" % node)
r = node.inputs[i]
if not r.type == new_r.type:
raise TypeError("The type of the replacement must be the same as the type of the original Variable.", r, new_r)
raise TypeError("The type of the replacement must be the"
" same as the type of the original Variable.",
r, new_r)
node.inputs[i] = new_r
if r is new_r:
return
self.__import_r__([new_r])
self.__add_clients__(new_r, [(node, i)])
prune = self.__remove_clients__(r, [(node, i)], False)
......
......@@ -651,6 +651,7 @@ class PatternSub(LocalOptimizer):
def skip_identities(self, expr):
if self.skip_identities_fn:
return self.skip_identities_fn(expr)
def op_key(self):
return self.op
......
......@@ -137,6 +137,7 @@ class DimShuffle(Op):
d = dict(self.__dict__)
del d['_hashval']
return d
def __setstate__(self, d):
self.__dict__.update(d)
self._rehash()
......@@ -218,13 +219,11 @@ class DimShuffle(Op):
rval.insert(augm, 1)
return [rval]
def R_op(self, inputs, eval_points):
if None in eval_points:
return [None]
return self.make_node(*eval_points).outputs
def c_code(self, node, name, inp, out, sub):
input, = inp
res, = out
......
......@@ -87,6 +87,9 @@ def broadcast_like(value, template, env, dtype=None):
filled by broadcasting value through it. `value` will be casted as necessary.
"""
value = T.as_tensor_variable(value)
if value.type == template.type:
return value
shape_of = env.shape_feature.shape_of
if template not in shape_of:
raise NotImplementedError('broadcast_like currently requires the template Variable to be in the env already')
......@@ -331,26 +334,31 @@ def local_dimshuffle_lift(node):
else:
return DimShuffle(iinput.type.broadcastable, new_order, inplace).make_node(iinput).outputs
## dot(x,y).T -> dot(y.T, x.T)
# These optimizations "lift" (propagate towards the inputs) DimShuffle
# through dot product. It allows to put the graph in a more standard shape,
# and to later merge consecutive DimShuffles.
inplace_matrix_transpose = T.DimShuffle([False,False], [1,0], inplace=True)
matrix_transpose = T.DimShuffle([False,False], [1,0], inplace=False)
# The transformation should be apply whether or not the transpose is inplace.
# The newly-introduced transpositions are not inplace, this will be taken care
# of in a later optimization phase.
# First optimization: inplace
local_transposed_dot_inplace = gof.PatternSub(
(inplace_matrix_transpose, (T.dot, 'x', 'y')),
(T.dot, (matrix_transpose, 'y'), (matrix_transpose, 'x')))
# Second optimization: not inplace
local_transposed_dot = gof.PatternSub(
(matrix_transpose, (T.dot, 'x', 'y')),
(T.dot, (matrix_transpose, 'y'), (matrix_transpose, 'x')))
# Register in the canonization phase only
register_canonicalize(local_transposed_dot_inplace, name='local_transposed_dot_inplace')
register_canonicalize(local_transposed_dot, name='local_transposed_dot')
@register_canonicalize
@gof.local_optimizer([])
def local_lift_transpose_through_dot(node):
"""
dot(x,y).T -> dot(y.T, x.T)
These optimizations "lift" (propagate towards the inputs) DimShuffle
through dot product. It allows to put the graph in a more standard shape,
and to later merge consecutive DimShuffles.
The transformation should be apply whether or not the transpose is
inplace. The newly-introduced transpositions are not inplace, this will
be taken care of in a later optimization phase.
"""
if not (isinstance(node.op, T.DimShuffle)
and node.op.new_order == (1, 0)):
return False
if not (node.inputs[0].owner and node.inputs[0].owner.op == T.dot):
return False
x, y = node.inputs[0].owner.inputs
if x.ndim == y.ndim == 2:
return [T.dot(y.T, x.T)]
@gof.local_optimizer([])
def dimshuffle_as_view(node):
......
......@@ -11,28 +11,70 @@ from numpy.testing.noseclasses import KnownFailureTest
import theano
import theano.scalar as scal
from theano import compile
from theano import config
from theano import function
from theano import gof
import theano.tensor.opt as opt
from theano.tensor.opt import local_dimshuffle_lift, out2in, local_greedy_distributor, mul_canonizer, local_add_specialize
from theano.tensor.opt import Shape_i
from theano.tensor import scalar, iscalar, dscalar, lscalar, vectors, lvector, fvector, dvector, fmatrix, dmatrix, matrices, fmatrices, dmatrices, Subtensor, as_tensor_variable, Join, join
from theano import tensor #do not use, there is an import * below that hides it
from theano import tensor as T #ugly but works for now...
from theano.tensor import TensorType, inplace
from theano import pprint
from theano import shared
from theano.gof import Env
from theano.gof.python25 import any, all
import theano.tensor.opt as opt
from theano.tensor.opt import (
local_add_specialize,
local_dimshuffle_lift,
local_greedy_distributor,
mul_canonizer,
out2in,
Shape_i,
)
from theano import tensor
from theano import tensor as T
from theano.tensor import scalar, iscalar, lscalar, fscalar, dscalar
from theano.tensor import vector, ivector, lvector, fvector, dvector
from theano.tensor import matrix, imatrix, lmatrix, fmatrix, dmatrix
from theano.tensor import scalars, vectors, matrices, fmatrices, dmatrices
from theano.tensor import (
as_tensor_variable,
inplace,
Join,
join,
Subtensor,
TensorType,
)
from theano.tensor.elemwise import DimShuffle
from theano import pprint, shared
from theano.tests import unittest_tools as utt
from theano import function, compile
mode_opt = theano.config.mode
if mode_opt == 'FAST_COMPILE':
mode_opt = 'FAST_RUN'
mode_opt = theano.compile.mode.get_mode(mode_opt)
ds = lambda x, y: DimShuffle(x.type.broadcastable, y)(x)
dimshuffle_lift = out2in(local_dimshuffle_lift)
_optimizer_stabilize = gof.Query(include=['fast_run'])
_optimizer_stabilize.position_cutoff = 1.51
_optimizer_stabilize = compile.optdb.query(_optimizer_stabilize)
_optimizer_specialize = gof.Query(include=['fast_run'])
_optimizer_specialize.position_cutoff = 2.01
_optimizer_specialize = compile.optdb.query(_optimizer_specialize)
_optimizer_fast_run = gof.Query(include=['fast_run'])
_optimizer_fast_run = compile.optdb.query(_optimizer_fast_run)
def optimize(g, level='fast_run'):
if 'fast_run' is level:
_optimizer_fast_run.optimize(g)
elif 'specialize' is level:
_optimizer_specialize.optimize(g)
elif 'stabilize' is level:
_optimizer_stabilize.optimize(g)
else:
raise ValueError(level)
return g
def inputs(xbc = (0, 0), ybc = (0, 0), zbc = (0, 0)):
x = TensorType(broadcastable = xbc, dtype = 'float64')('x')
y = TensorType(broadcastable = ybc, dtype = 'float64')('y')
......@@ -40,11 +82,7 @@ def inputs(xbc = (0, 0), ybc = (0, 0), zbc = (0, 0)):
return x, y, z
ds = lambda x, y: DimShuffle(x.type.broadcastable, y)(x)
dimshuffle_lift = out2in(local_dimshuffle_lift)
class test_dimshuffle_lift(unittest.TestCase):
def test_double_transpose(self):
x, y, z = inputs()
e = ds(ds(x, (1, 0)), (1, 0))
......@@ -83,8 +121,6 @@ class test_dimshuffle_lift(unittest.TestCase):
def test_add_canonizer_problem0():
#observed in a real graph
n_segments = 10
label = lscalar('label')
segment_labels = label + theano._asarray([0] * n_segments, dtype='int64')
......@@ -92,6 +128,7 @@ def test_add_canonizer_problem0():
r = segment_labels * 5
f = function([label], r)
class test_greedy_distribute(unittest.TestCase):
def test_main(self):
a, b, c, d, x, y, z = matrices('abcdxyz')
......@@ -130,9 +167,7 @@ class test_greedy_distribute(unittest.TestCase):
assert numpy.all(r0 == r2)
class test_canonize(unittest.TestCase):
def test_muldiv(self):
x, y, z = matrices('xyz')
a, b, c, d = matrices('abcd')
......@@ -633,6 +668,7 @@ class test_canonize(unittest.TestCase):
"""
raise SkipTest("Not implemented")
def test_local_merge_abs():
x,y,z = T.matrices('xyz')
x_val = numpy.random.rand(5,5).astype(config.floatX)
......@@ -692,8 +728,8 @@ def test_const_type_in_mul_canonizer():
f2(ival, wval, visbval, hidbval, betaval, aval),
f1(ival, wval, visbval, hidbval, betaval, aval))
class test_fusion(unittest.TestCase):
class test_fusion(unittest.TestCase):
def do(self, mode, shared_fn, shp, gpu=False, nb_repeat=1, assert_len_topo=True, slice=None):
"""
param shared_fn: if None, will use compile.function
......@@ -1122,6 +1158,7 @@ def test_log1p():
theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [T.log1p]
def test_log_add():
m = theano.config.mode
if m == 'FAST_COMPILE':
......@@ -1164,6 +1201,7 @@ def test_log_add():
#TODO: (write and) test that the optimization works with Sum in addition to working with Add.
def test_local_useless_subtensor():
x = tensor.matrix('x')
......@@ -1255,7 +1293,6 @@ def test_local_useless_subtensor():
class test_local_subtensor_lift(unittest.TestCase):
def test0(self):
# basic test that the Op works
x = tensor.matrix('x')
......@@ -1420,7 +1457,8 @@ class test_local_subtensor_lift(unittest.TestCase):
assert isinstance(prog[0].op, tensor.Subtensor)
assert isinstance(prog[1].op, tensor.Rebroadcast)
assert (f4(zval) == zval[:,3,0]).all()
class test_local_subtensor_merge(unittest.TestCase):
def setUp(self):
utt.seed_rng()
......@@ -1649,8 +1687,8 @@ class test_local_subtensor_merge(unittest.TestCase):
for s2 in s2r:
f(x_val, b1,e1,s1,b2,e2,s2)
class Test_alloc_zero(unittest.TestCase):
class Test_alloc_zero(unittest.TestCase):
def setUp(self):
mode = theano.compile.mode.get_default_mode()
self.mode = mode.including("local_incsubtensor_of_allocs", "local_setsubtensor_of_allocs", "local_0_dot_x")
......@@ -1797,6 +1835,7 @@ def test_local_fill_useless():
f = function([x,y], T.fill(x,y)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
class test_shapeoptimizer(unittest.TestCase):
def setUp(self):
utt.seed_rng()
......@@ -1963,11 +2002,8 @@ class test_assert(unittest.TestCase):
assert len(topo[0].inputs)==3
assert topo[1].op==theano.compile.function_module.deep_copy_op
def test_local_mul_specialize():
# test a few cases to make sure that the basics are covered
#
def test_local_mul_specialize():
mode = theano.config.mode
if mode == 'FAST_COMPILE':
mode = 'FAST_RUN'
......@@ -2041,11 +2077,8 @@ def speed_local_pow_specialize_range():
if not t2-t1<t3-t2:
print "WARNING WE ARE SLOWER"
def test_local_pow_specialize():
# test a few cases to make sure that the basics are covered
#
def test_local_pow_specialize():
mode = theano.config.mode
if mode == 'FAST_COMPILE':
mode = 'FAST_RUN'
......@@ -2097,10 +2130,8 @@ def test_local_pow_specialize():
# assert nodes == [T.sqrt,T.inv]#Why this don't work?
assert numpy.allclose(f(val_no0),val_no0**(-.5))
def test_local_pow_specialize_device():
# test that on cpu we use more agressive optimization
def test_local_pow_specialize_device_more_aggressive_on_cpu():
mode = theano.config.mode
if mode == 'FAST_COMPILE':
mode = 'FAST_RUN'
......@@ -2140,8 +2171,8 @@ def test_local_pow_specialize_device():
assert isinstance(nodes[-1].scalar_op,theano.scalar.basic.Inv)
assert numpy.allclose(f(val_no0),val_no0**(-16))
class T_Rebroadcast(unittest.TestCase):
class T_Rebroadcast(unittest.TestCase):
def test_local_useless_rebroadcast(self):
mode = theano.compile.get_default_mode().including('canonicalize')
v1 = T.vector()
......@@ -2164,6 +2195,7 @@ class T_Rebroadcast(unittest.TestCase):
assert len(rebroadcast_nodes) == 1
assert rebroadcast_nodes[0].op.axis == {0: True}
class T_useless_elemwise(unittest.TestCase):
def setUp(self):
self.mode = theano.compile.get_default_mode().including('canonicalize')
......@@ -2252,6 +2284,7 @@ class T_useless_elemwise(unittest.TestCase):
assert len(topo) == 1
assert topo[0].op == theano.compile.function_module.deep_copy_op
def test_constant_get_stabilized():
"""
Currently Theano enable the constant_folding optimization before stabilization optimization.
......@@ -2338,6 +2371,7 @@ class T_local_switch_sink(unittest.TestCase):
assert (res == numpy.asarray(self.resm[idx])).sum() == self.resm[idx].size
idx += 1
class T_local_erf(unittest.TestCase):
def setUp(self):
self.mode = theano.compile.mode.get_default_mode().including('canonicalize','fast_run').excluding('gpu','fusion')
......@@ -2422,6 +2456,7 @@ class T_local_erf(unittest.TestCase):
assert isinstance(topo[1].op.scalar_op,scal.Add) or isinstance(topo[1].op.scalar_op,scal.Sub)
print f(val)
class T_local_erfc(unittest.TestCase):
def setUp(self):
self.mode_fusion = theano.compile.mode.get_default_mode().including('canonicalize').including('fast_run').excluding('gpu')
......@@ -2681,6 +2716,7 @@ class test_local_remove_switch_const_cond(unittest.TestCase):
vy = numpy.array([[7,8,9],[10,11,12]], dtype='int64')
assert numpy.all(f(vx,vy) == vy)
class T_local_sum(unittest.TestCase):
def setUp(self):
self.mode = theano.compile.get_default_mode().including('canonicalize')
......@@ -2773,6 +2809,7 @@ class T_local_sum(unittest.TestCase):
finally:
config.warn.sum_sum_bug = backup
class T_local_sum_dimshuffle(unittest.TestCase):
def setUp(self):
self.mode = theano.compile.get_default_mode().including('canonicalize')
......@@ -2844,6 +2881,7 @@ class T_local_sum_dimshuffle(unittest.TestCase):
# test_local_sum_prod_dimshuffle (a * b * c)
# test_local_sum_divprod_dimshuffle ((a * b) / (c * d))
def test_make_vector():
b = T.bscalar()
i = T.iscalar()
......@@ -2927,6 +2965,7 @@ def test_make_vector():
except AssertionError:
pass
def test_local_join_1():
#test for vector
a = tensor.vector('a')
......@@ -2966,6 +3005,7 @@ def test_local_join_1():
assert len([n for n in e if isinstance(n.op, Join)]) == 1
assert f.maker.env.outputs[0].dtype == config.floatX
def test_local_mul_to_neg():
"""
Test that a multiplication by -1 or -1.0 yields the appropriate data type
......@@ -2986,8 +3026,8 @@ def test_local_mul_to_neg():
else:
raise NotImplementedError(config.cast_policy)
def test_local_add_specialize():
def test_local_add_specialize():
# test of non-zero dimension
a = tensor.vector()
s = tensor.add(tensor.zeros_like(a))
......@@ -3006,6 +3046,7 @@ def test_local_add_specialize():
assert transformed
assert transformed[0].type == s.type
def test_local_tensor_scalar_tensor():
dtypes = ['int8', 'int16', 'int32', 'int64',
'uint8', 'uint16', 'uint32', 'uint64',
......@@ -3027,6 +3068,7 @@ def test_local_tensor_scalar_tensor():
assert len(cast_nodes) == 0
f(0)
def test_local_scalar_tensor_scalar():
dtypes = ['int8', 'int16', 'int32', 'int64',
'uint8', 'uint16', 'uint32', 'uint64',
......@@ -3048,6 +3090,7 @@ def test_local_scalar_tensor_scalar():
assert len(cast_nodes) == 0
f(0)
def test_local_div_to_inv():
num_len_s = tensor.lscalar('num_len')
denom_s = tensor.scalar('denom')
......@@ -3065,6 +3108,41 @@ def test_local_div_to_inv():
assert out_val.shape == (1, 3)
assert numpy.allclose(out_val, 0.5)
class Test_lift_transpose_through_dot(unittest.TestCase):
def simple_optimize(self, g):
out2in(opt.local_useless_elemwise).optimize(g)
out2in(opt.local_lift_transpose_through_dot).optimize(g)
out2in(opt.local_useless_elemwise).optimize(g)
return g
def test_matrix_matrix(self):
a, b = matrices('ab')
g = self.simple_optimize(Env([a, b], [tensor.dot(a, b).T]))
sg = '[dot(InplaceDimShuffle{1,0}(b), InplaceDimShuffle{1,0}(a))]'
assert str(g) == sg
def test_row_matrix(self):
a = vector('a')
b = matrix('b')
g = optimize(Env(
[a, b],
[tensor.dot(a.dimshuffle('x', 0), b).T]),
level='stabilize')
sg = '[dot(DimShuffle{1,0}(b), DimShuffle{0,x}(a))]'
assert str(g) == sg
def test_matrix_col(self):
a = vector('a')
b = matrix('b')
g = optimize(Env(
[a, b],
[tensor.dot(b, a.dimshuffle(0, 'x')).T]),
level='stabilize')
sg = '[dot(DimShuffle{x,0}(a), DimShuffle{1,0}(b))]'
assert str(g) == sg
if __name__ == '__main__':
# unittest.main()
test_fusion().tes_memory_leak()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论