Merge pull request #147 from jaberg/transpose_dot_opt

Transpose dot opt

Merge pull request #147 from jaberg/transpose_dot_opt
6bc14189 · nouiz · b87e163f · de695bf1 · 6bc14189 · 6bc14189
--- a/theano/gof/env.py
+++ b/theano/gof/env.py
@@ -298,16 +298,24 @@ class Env(utils.object2):
        if node == 'output':
            r = self.outputs[i]
            if not r.type == new_r.type:
-                raise TypeError("The type of the replacement must be the same as the type of the original Variable.", r, new_r)
+                raise TypeError("The type of the replacement must be the"
+                        " same as the type of the original Variable.",
+                        r, new_r)
            self.outputs[i] = new_r
        else:
            if node.env is not self:
-                raise Exception("Cannot operate on %s because it does not belong to this Env" % node)
+                raise Exception("Cannot operate on %s because it does not"
+                        " belong to this Env" % node)
            r = node.inputs[i]
            if not r.type == new_r.type:
-                raise TypeError("The type of the replacement must be the same as the type of the original Variable.", r, new_r)
+                raise TypeError("The type of the replacement must be the"
+                        " same as the type of the original Variable.",
+                        r, new_r)
            node.inputs[i] = new_r

+        if r is new_r:
+            return
+
        self.__import_r__([new_r])
        self.__add_clients__(new_r, [(node, i)])
        prune = self.__remove_clients__(r, [(node, i)], False)

--- a/theano/gof/opt.py
+++ b/theano/gof/opt.py
@@ -651,6 +651,7 @@ class PatternSub(LocalOptimizer):
    def skip_identities(self, expr):
        if self.skip_identities_fn:
            return self.skip_identities_fn(expr)
+
    def op_key(self):
        return self.op


--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -137,6 +137,7 @@ class DimShuffle(Op):
        d = dict(self.__dict__)
        del d['_hashval']
        return d
+
    def __setstate__(self, d):
        self.__dict__.update(d)
        self._rehash()
@@ -218,13 +219,11 @@ class DimShuffle(Op):
            rval.insert(augm, 1)
        return [rval]

-
    def R_op(self, inputs, eval_points):
        if None in eval_points:
            return [None]
        return self.make_node(*eval_points).outputs

-
    def c_code(self, node, name, inp, out, sub):
        input, = inp
        res, = out

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -87,6 +87,9 @@ def broadcast_like(value, template, env, dtype=None):
    filled by broadcasting value through it. `value` will be casted as necessary.

    """
+    value = T.as_tensor_variable(value)
+    if value.type == template.type:
+        return value
    shape_of = env.shape_feature.shape_of
    if template not in shape_of:
        raise NotImplementedError('broadcast_like currently requires the template Variable to be in the env already')
@@ -331,26 +334,31 @@ def local_dimshuffle_lift(node):
        else:
            return DimShuffle(iinput.type.broadcastable, new_order, inplace).make_node(iinput).outputs

-## dot(x,y).T -> dot(y.T, x.T)
-# These optimizations "lift" (propagate towards the inputs) DimShuffle
-# through dot product.  It allows to put the graph in a more standard shape,
-# and to later merge consecutive DimShuffles.
-inplace_matrix_transpose = T.DimShuffle([False,False], [1,0], inplace=True)
-matrix_transpose = T.DimShuffle([False,False], [1,0], inplace=False)
-# The transformation should be apply whether or not the transpose is inplace.
-# The newly-introduced transpositions are not inplace, this will be taken care
-# of in a later optimization phase.
-# First optimization: inplace
-local_transposed_dot_inplace = gof.PatternSub(
-        (inplace_matrix_transpose, (T.dot, 'x', 'y')),
-        (T.dot, (matrix_transpose, 'y'), (matrix_transpose, 'x')))
-# Second optimization: not inplace
-local_transposed_dot = gof.PatternSub(
-        (matrix_transpose, (T.dot, 'x', 'y')),
-        (T.dot, (matrix_transpose, 'y'), (matrix_transpose, 'x')))
-# Register in the canonization phase only
-register_canonicalize(local_transposed_dot_inplace, name='local_transposed_dot_inplace')
-register_canonicalize(local_transposed_dot, name='local_transposed_dot')
+
+@register_canonicalize
+@gof.local_optimizer([])
+def local_lift_transpose_through_dot(node):
+    """
+    dot(x,y).T -> dot(y.T, x.T)
+
+    These optimizations "lift" (propagate towards the inputs) DimShuffle
+    through dot product.  It allows to put the graph in a more standard shape,
+    and to later merge consecutive DimShuffles.
+
+    The transformation should be apply whether or not the transpose is
+    inplace.  The newly-introduced transpositions are not inplace, this will
+    be taken care of in a later optimization phase.
+    """
+    if not (isinstance(node.op, T.DimShuffle)
+            and node.op.new_order == (1, 0)):
+        return False
+    if not (node.inputs[0].owner and node.inputs[0].owner.op == T.dot):
+        return False
+    x, y = node.inputs[0].owner.inputs
+
+    if x.ndim == y.ndim == 2:
+        return [T.dot(y.T, x.T)]
+

 @gof.local_optimizer([])
 def dimshuffle_as_view(node):

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -11,28 +11,70 @@ from numpy.testing.noseclasses import KnownFailureTest

 import theano
 import theano.scalar as scal
+from theano import compile
 from theano import config
+from theano import function
 from theano import gof
-import theano.tensor.opt as opt
-from theano.tensor.opt import local_dimshuffle_lift, out2in, local_greedy_distributor, mul_canonizer, local_add_specialize
-from theano.tensor.opt import Shape_i
-from theano.tensor import scalar, iscalar, dscalar, lscalar, vectors, lvector, fvector, dvector, fmatrix, dmatrix, matrices, fmatrices, dmatrices, Subtensor, as_tensor_variable, Join, join
-
-from theano import tensor  #do not use, there is  an import * below that hides it
-from theano import tensor as T  #ugly but works for now...
-from theano.tensor import TensorType, inplace
+from theano import pprint
+from theano import shared
 from theano.gof import Env
 from theano.gof.python25 import any, all
+import theano.tensor.opt as opt
+from theano.tensor.opt import (
+        local_add_specialize,
+        local_dimshuffle_lift,
+        local_greedy_distributor,
+        mul_canonizer,
+        out2in,
+        Shape_i,
+        )
+from theano import tensor
+from theano import tensor as T
+from theano.tensor import scalar, iscalar, lscalar, fscalar, dscalar
+from theano.tensor import vector, ivector, lvector, fvector, dvector
+from theano.tensor import matrix, imatrix, lmatrix, fmatrix, dmatrix
+from theano.tensor import scalars, vectors, matrices, fmatrices, dmatrices
+from theano.tensor import (
+        as_tensor_variable,
+        inplace,
+        Join,
+        join,
+        Subtensor,
+        TensorType,
+        )
 from theano.tensor.elemwise import DimShuffle
-from theano import pprint, shared
 from theano.tests import unittest_tools as utt

-from theano import function, compile
 mode_opt = theano.config.mode
 if mode_opt == 'FAST_COMPILE':
    mode_opt = 'FAST_RUN'
 mode_opt = theano.compile.mode.get_mode(mode_opt)

+ds = lambda x, y: DimShuffle(x.type.broadcastable, y)(x)
+dimshuffle_lift = out2in(local_dimshuffle_lift)
+
+_optimizer_stabilize = gof.Query(include=['fast_run'])
+_optimizer_stabilize.position_cutoff = 1.51
+_optimizer_stabilize = compile.optdb.query(_optimizer_stabilize)
+
+_optimizer_specialize = gof.Query(include=['fast_run'])
+_optimizer_specialize.position_cutoff = 2.01
+_optimizer_specialize = compile.optdb.query(_optimizer_specialize)
+
+_optimizer_fast_run = gof.Query(include=['fast_run'])
+_optimizer_fast_run = compile.optdb.query(_optimizer_fast_run)
+def optimize(g, level='fast_run'):
+    if 'fast_run' is level:
+        _optimizer_fast_run.optimize(g)
+    elif 'specialize' is level:
+        _optimizer_specialize.optimize(g)
+    elif 'stabilize' is level:
+        _optimizer_stabilize.optimize(g)
+    else:
+        raise ValueError(level)
+    return g
+
+
 def inputs(xbc = (0, 0), ybc = (0, 0), zbc = (0, 0)):
    x = TensorType(broadcastable = xbc, dtype = 'float64')('x')
    y = TensorType(broadcastable = ybc, dtype = 'float64')('y')
@@ -40,11 +82,7 @@ def inputs(xbc = (0, 0), ybc = (0, 0), zbc = (0, 0)):
    return x, y, z


-ds = lambda x, y: DimShuffle(x.type.broadcastable, y)(x)
-dimshuffle_lift = out2in(local_dimshuffle_lift)
-
 class test_dimshuffle_lift(unittest.TestCase):
-
    def test_double_transpose(self):
        x, y, z = inputs()
        e = ds(ds(x, (1, 0)), (1, 0))
@@ -83,8 +121,6 @@ class test_dimshuffle_lift(unittest.TestCase):


 def test_add_canonizer_problem0():
-    #observed in a real graph
-
    n_segments = 10
    label = lscalar('label')
    segment_labels = label + theano._asarray([0] * n_segments, dtype='int64')
@@ -92,6 +128,7 @@ def test_add_canonizer_problem0():
    r = segment_labels * 5
    f = function([label], r)

+
 class test_greedy_distribute(unittest.TestCase):
    def test_main(self):
        a, b, c, d, x, y, z = matrices('abcdxyz')
@@ -130,9 +167,7 @@ class test_greedy_distribute(unittest.TestCase):
        assert numpy.all(r0 == r2)


-
 class test_canonize(unittest.TestCase):
-
    def test_muldiv(self):
        x, y, z = matrices('xyz')
        a, b, c, d = matrices('abcd')
@@ -633,6 +668,7 @@ class test_canonize(unittest.TestCase):
        """
        raise SkipTest("Not implemented")

+
 def test_local_merge_abs():
    x,y,z = T.matrices('xyz')
    x_val = numpy.random.rand(5,5).astype(config.floatX)
@@ -692,8 +728,8 @@ def test_const_type_in_mul_canonizer():
        f2(ival, wval, visbval, hidbval, betaval, aval),
        f1(ival, wval, visbval, hidbval, betaval, aval))

-class test_fusion(unittest.TestCase):

+class test_fusion(unittest.TestCase):
    def do(self, mode, shared_fn, shp, gpu=False, nb_repeat=1, assert_len_topo=True, slice=None):
        """
        param shared_fn: if None, will use compile.function
@@ -1122,6 +1158,7 @@ def test_log1p():
        theano.printing.debugprint(f)
        assert [node.op for node in f.maker.env.toposort()] == [T.log1p]

+
 def test_log_add():
    m = theano.config.mode
    if m == 'FAST_COMPILE':
@@ -1164,6 +1201,7 @@ def test_log_add():

    #TODO: (write and) test that the optimization works with Sum in addition to working with Add.

+
 def test_local_useless_subtensor():
    x = tensor.matrix('x')

@@ -1255,7 +1293,6 @@ def test_local_useless_subtensor():


 class test_local_subtensor_lift(unittest.TestCase):
-
    def test0(self):
        # basic test that the Op works
        x = tensor.matrix('x')
@@ -1420,7 +1457,8 @@ class test_local_subtensor_lift(unittest.TestCase):
        assert isinstance(prog[0].op, tensor.Subtensor)
        assert isinstance(prog[1].op, tensor.Rebroadcast)
        assert (f4(zval) == zval[:,3,0]).all()
- 
+
+
 class test_local_subtensor_merge(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()
@@ -1649,8 +1687,8 @@ class test_local_subtensor_merge(unittest.TestCase):
                                for s2 in s2r:
                                    f(x_val, b1,e1,s1,b2,e2,s2)

-class Test_alloc_zero(unittest.TestCase):

+class Test_alloc_zero(unittest.TestCase):
    def setUp(self):
        mode = theano.compile.mode.get_default_mode()
        self.mode = mode.including("local_incsubtensor_of_allocs", "local_setsubtensor_of_allocs", "local_0_dot_x")
@@ -1797,6 +1835,7 @@ def test_local_fill_useless():
    f = function([x,y], T.fill(x,y)*2, mode=m)
    assert [node.op for node in f.maker.env.toposort()] == [T.mul]

+
 class test_shapeoptimizer(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()
@@ -1963,11 +2002,8 @@ class test_assert(unittest.TestCase):
        assert len(topo[0].inputs)==3
        assert topo[1].op==theano.compile.function_module.deep_copy_op

-def test_local_mul_specialize():
-
-    # test a few cases to make sure that the basics are covered
-    #

+def test_local_mul_specialize():
    mode = theano.config.mode
    if mode == 'FAST_COMPILE':
        mode = 'FAST_RUN'
@@ -2041,11 +2077,8 @@ def speed_local_pow_specialize_range():
        if not t2-t1<t3-t2:
            print "WARNING WE ARE SLOWER"

-def test_local_pow_specialize():
-
-    # test a few cases to make sure that the basics are covered
-    #

+def test_local_pow_specialize():
    mode = theano.config.mode
    if mode == 'FAST_COMPILE':
        mode = 'FAST_RUN'
@@ -2097,10 +2130,8 @@ def test_local_pow_specialize():
 #    assert nodes == [T.sqrt,T.inv]#Why this don't work?
    assert numpy.allclose(f(val_no0),val_no0**(-.5))

-def test_local_pow_specialize_device():
-
-    # test that on cpu we use more agressive optimization

+def test_local_pow_specialize_device_more_aggressive_on_cpu():
    mode = theano.config.mode
    if mode == 'FAST_COMPILE':
        mode = 'FAST_RUN'
@@ -2140,8 +2171,8 @@ def test_local_pow_specialize_device():
    assert isinstance(nodes[-1].scalar_op,theano.scalar.basic.Inv)
    assert numpy.allclose(f(val_no0),val_no0**(-16))

-class T_Rebroadcast(unittest.TestCase):

+class T_Rebroadcast(unittest.TestCase):
    def test_local_useless_rebroadcast(self):
        mode = theano.compile.get_default_mode().including('canonicalize')
        v1 = T.vector()
@@ -2164,6 +2195,7 @@ class T_Rebroadcast(unittest.TestCase):
        assert len(rebroadcast_nodes) == 1
        assert rebroadcast_nodes[0].op.axis == {0: True}

+
 class T_useless_elemwise(unittest.TestCase):
    def setUp(self):
        self.mode = theano.compile.get_default_mode().including('canonicalize')
@@ -2252,6 +2284,7 @@ class T_useless_elemwise(unittest.TestCase):
        assert len(topo) == 1
        assert topo[0].op == theano.compile.function_module.deep_copy_op

+
 def test_constant_get_stabilized():
    """
    Currently Theano enable the constant_folding optimization before stabilization optimization.
@@ -2338,6 +2371,7 @@ class T_local_switch_sink(unittest.TestCase):
                    assert (res == numpy.asarray(self.resm[idx])).sum() == self.resm[idx].size
                idx += 1

+
 class T_local_erf(unittest.TestCase):
    def setUp(self):
        self.mode = theano.compile.mode.get_default_mode().including('canonicalize','fast_run').excluding('gpu','fusion')
@@ -2422,6 +2456,7 @@ class T_local_erf(unittest.TestCase):
        assert isinstance(topo[1].op.scalar_op,scal.Add) or isinstance(topo[1].op.scalar_op,scal.Sub)
        print f(val)

+
 class T_local_erfc(unittest.TestCase):
    def setUp(self):
        self.mode_fusion = theano.compile.mode.get_default_mode().including('canonicalize').including('fast_run').excluding('gpu')
@@ -2681,6 +2716,7 @@ class test_local_remove_switch_const_cond(unittest.TestCase):
        vy = numpy.array([[7,8,9],[10,11,12]], dtype='int64')
        assert numpy.all(f(vx,vy) == vy)

+
 class T_local_sum(unittest.TestCase):
    def setUp(self):
        self.mode = theano.compile.get_default_mode().including('canonicalize')
@@ -2773,6 +2809,7 @@ class T_local_sum(unittest.TestCase):
            finally:
                config.warn.sum_sum_bug = backup

+
 class T_local_sum_dimshuffle(unittest.TestCase):
    def setUp(self):
        self.mode = theano.compile.get_default_mode().including('canonicalize')
@@ -2844,6 +2881,7 @@ class T_local_sum_dimshuffle(unittest.TestCase):
    # test_local_sum_prod_dimshuffle (a * b * c)
    # test_local_sum_divprod_dimshuffle ((a * b) / (c * d))

+
 def test_make_vector():
    b = T.bscalar()
    i = T.iscalar()
@@ -2927,6 +2965,7 @@ def test_make_vector():
        except AssertionError:
            pass

+
 def test_local_join_1():
    #test for vector
    a = tensor.vector('a')
@@ -2966,6 +3005,7 @@ def test_local_join_1():
    assert len([n for n in e if isinstance(n.op, Join)]) == 1
    assert f.maker.env.outputs[0].dtype == config.floatX

+
 def test_local_mul_to_neg():
    """
    Test that a multiplication by -1 or -1.0 yields the appropriate data type
@@ -2986,8 +3026,8 @@ def test_local_mul_to_neg():
    else:
        raise NotImplementedError(config.cast_policy)

-def test_local_add_specialize():

+def test_local_add_specialize():
    # test of non-zero dimension
    a = tensor.vector()
    s = tensor.add(tensor.zeros_like(a))
@@ -3006,6 +3046,7 @@ def test_local_add_specialize():
    assert transformed
    assert transformed[0].type == s.type

+
 def test_local_tensor_scalar_tensor():
    dtypes = ['int8', 'int16', 'int32', 'int64',
            'uint8', 'uint16', 'uint32', 'uint64',
@@ -3027,6 +3068,7 @@ def test_local_tensor_scalar_tensor():
        assert len(cast_nodes) == 0
        f(0)

+
 def test_local_scalar_tensor_scalar():
    dtypes = ['int8', 'int16', 'int32', 'int64',
            'uint8', 'uint16', 'uint32', 'uint64',
@@ -3048,6 +3090,7 @@ def test_local_scalar_tensor_scalar():
        assert len(cast_nodes) == 0
        f(0)

+
 def test_local_div_to_inv():
    num_len_s = tensor.lscalar('num_len')
    denom_s = tensor.scalar('denom')
@@ -3065,6 +3108,41 @@ def test_local_div_to_inv():
    assert out_val.shape == (1, 3)
    assert numpy.allclose(out_val, 0.5)

+
+class Test_lift_transpose_through_dot(unittest.TestCase):
+    def simple_optimize(self, g):
+        out2in(opt.local_useless_elemwise).optimize(g)
+        out2in(opt.local_lift_transpose_through_dot).optimize(g)
+        out2in(opt.local_useless_elemwise).optimize(g)
+        return g
+
+    def test_matrix_matrix(self):
+        a, b = matrices('ab')
+        g = self.simple_optimize(Env([a, b], [tensor.dot(a, b).T]))
+        sg = '[dot(InplaceDimShuffle{1,0}(b), InplaceDimShuffle{1,0}(a))]'
+        assert str(g) == sg
+
+    def test_row_matrix(self):
+        a = vector('a')
+        b = matrix('b')
+        g = optimize(Env(
+            [a, b],
+            [tensor.dot(a.dimshuffle('x', 0), b).T]),
+            level='stabilize')
+        sg = '[dot(DimShuffle{1,0}(b), DimShuffle{0,x}(a))]'
+        assert str(g) == sg
+
+    def test_matrix_col(self):
+        a = vector('a')
+        b = matrix('b')
+        g = optimize(Env(
+            [a, b],
+            [tensor.dot(b, a.dimshuffle(0, 'x')).T]),
+            level='stabilize')
+        sg = '[dot(DimShuffle{x,0}(a), DimShuffle{1,0}(b))]'
+        assert str(g) == sg
+
+
 if __name__ == '__main__':
 #    unittest.main()
    test_fusion().tes_memory_leak()