Merge pull request #2362 from aalmah/ticket_2257

Ticket #2257: tile function with grad support

Merge pull request #2362 from aalmah/ticket_2257
a72f34bc · carriepl · c7fe4e81 · 68094d1d · a72f34bc · a72f34bc
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -4047,6 +4047,8 @@ def flatten(x, outdim=1):

 class Tile(Op):
    """
+    DEPRECATED: use tile() instead.
+    
    Construct an array by repeating the input x according to reps pattern.

    Tiles its input according to reps. The length of reps is the number of
@@ -4069,6 +4071,9 @@ class Tile(Op):
        return self.__class__.__name__ + "{ndim=%d}" % self.ndim

    def make_node(self, x, reps):
+        warnings.warn((
+            "Tile op is deprecated, use tile function instead."),
+                      stacklevel=3)
        x = as_tensor_variable(x)
        reps = as_tensor_variable(reps)
        return gof.Apply(self, [x, reps], [tensor(x.type.dtype, [False] *
@@ -4139,18 +4144,19 @@ def tile(x, reps, ndim=None):
        raise ValueError("if specified, ndim must be equal to both x.ndim and "
                         "len(reps)")

-    if not hasattr(tile, 'op'):
-        tile.op = {}
-
    if ndim is None:
        ndim = len(reps)
-
-    # backport
-    # ndim = len(reps) if ndim is None else ndim
-    # not sure if len(shp) is going to work.
-    if ndim not in tile.op:
-        tile.op[ndim] = Tile(ndim)
-    return tile.op[ndim](x, reps)
+    reps = list(reps)
+    shape = [x.shape[i] for i in xrange(ndim)]
+    alloc_shape = reps + shape
+    y = alloc(x, *alloc_shape)
+    shuffle_ind = numpy.arange(ndim*2).reshape(2, ndim)
+    shuffle_ind = shuffle_ind.transpose().flatten()
+    y = y.dimshuffle(*shuffle_ind)
+    new_shapes = [sh*reps[i] for i, sh in enumerate(shape)]
+    y = y.reshape(new_shapes)
+
+    return y


 class ARange(Op):

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -4831,22 +4831,34 @@ def test_tile():
    x_ = rng.randn(2, 4, 3).astype(config.floatX)
    assert numpy.all(f(x_) == numpy.tile(x_, (2, 3, 4)))

+    # Test the four-dimensional case.
+    x = tensor4()
+    f = function([x], tile(x, (2, 3, 4, 6)))
+    x_ = rng.randn(2, 4, 3, 5).astype(config.floatX)
+    assert numpy.all(f(x_) == numpy.tile(x_, (2, 3, 4, 6)))
+
+
+def test_tile_grad():
+
+    def grad_tile(x, reps, np_x):
+        y = tile(x, reps)
+        z = y.sum()
+        g = theano.function([x], grad(z, x))
+        grad_res = g(np_x)
+        # The gradient should be the product of the tiling dimensions
+        # (since the gradients are additive through the tiling operation)
+        assert numpy.all(grad_res == numpy.prod(reps))

-# XXX: It turns out that almost no cases of the tile gradient actually work.
-# This is a test that should pass if the proper implementation is filled in.
-def test_tile_grad_3d():
-    # N.B.: we should also use verify_grad in this test.
-    raise SkipTest()  # Remove me when this is implemented.
    rng = numpy.random.RandomState(utt.fetch_seed())
-    w = rng.randn(3, 4, 2)
-    w_tiled = numpy.tile(w, (2, 3, 4))
-    x = tensor.tensor3()
-    c = (as_tensor_variable(w_tiled) * tile(x,  (2, 3, 4))).sum()
-    f = function([x], grad(c, x))
-    x_ = rng.randn(3, 4, 2)
-    # The gradient should be w, multiplied by its tiling dimensions (since
-    # the gradients are additive through the tiling operation)
-    assert numpy.all(f(x_) == 2 * 3 * 4 * w)
+
+    # test vector
+    grad_tile(vector('x'), [3], rng.randn(5))
+    # test matrix
+    grad_tile(matrix('x'), [3, 4], rng.randn(2, 3))
+    # test tensor3
+    grad_tile(tensor3('x'), [3, 4, 5], rng.randn(2, 4, 3))
+    # test tensor4
+    grad_tile(tensor4('x'), [3, 4, 5, 6], rng.randn(2, 4, 3, 5))


 class TestARange(unittest.TestCase):
@@ -6865,21 +6877,22 @@ class TestInferShape(utt.InferShapeTester):
        #                        [Reshape(ndim)(adtens4, aivec)],
        #                        [adtens4_val, [1, 3, 10, 4]], Reshape)

-        # Tile
+        # Tile op is deprecated so the tile function doesn't use it
+        # anymore, we'll test here the op directly
        advec = dvector()
        advec_val = rand(5)
        aivec_val = [3]
        ndim = 1
        self._compile_and_check([advec],
-                                [tile(advec, aivec_val, ndim)],
+                                [Tile(ndim)(advec, aivec_val)],
                                [advec_val], Tile)

        admat = dmatrix()
        admat_val = rand(2, 4)
        aivec_val = [2, 3]
-        ndim = None
+        ndim = 2
        self._compile_and_check([admat],
-                                [tile(admat, aivec_val)],
+                                [Tile(ndim)(admat, aivec_val)],
                                [admat_val], Tile)

        adtens4 = dtensor4()
@@ -6887,9 +6900,10 @@ class TestInferShape(utt.InferShapeTester):
        aivec_val = [2, 3, 1, 4]
        ndim = 4
        self._compile_and_check([adtens4],
-                                [tile(adtens4, aivec_val, ndim)],
+                                [Tile(ndim)(adtens4, aivec_val)],
                                [adtens4_val], Tile)

+
 class TestTensorInstanceMethods(unittest.TestCase):
    def setUp(self):
        self.vars = matrices('X', 'Y')

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -51,6 +51,7 @@ from theano.tensor import (
        join,
        Subtensor,
        TensorType,
+        Tile,
        )
 from theano.tensor.elemwise import DimShuffle
 from theano.tests import unittest_tools as utt
@@ -3061,6 +3062,24 @@ class Test_local_useless_alloc(unittest.TestCase):
        op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()]
        assert tensor.Alloc not in op_classes

+    def test2(self):
+        # Test that alloc never gets instantiated during optimization
+        mode = mode_opt.excluding('local_useless_alloc')
+
+        x = tensor.matrix('x')
+        y = tensor.tile(x, (1,)*2)
+
+        f = function([x], [y], mode=mode)
+        op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()]
+        print op_classes
+
+        # We are supposed to test if tensr.Alloc is not in op_classes,
+        # but since the proper proper optimization is not currently
+        # implemented it will fail. Once the correct optimization is in place,
+        # we have to change the following we should not see tensor.Alloc
+        # in op_classes and we have to change the assert.
+        assert tensor.Alloc in op_classes
+

 class test_shapeoptimizer(unittest.TestCase):
    def setUp(self):
@@ -3404,6 +3423,8 @@ def test_local_mul_specialize():

 class T_Tile(unittest.TestCase):
    def test_local_useless_tile(self):
+        # Tile op is deprecated so the tile function doesn't use it
+        # anymore, we'll test here the op directly
        v = T.vector()
        m = T.matrix()
        mode = None
@@ -3412,7 +3433,7 @@ class T_Tile(unittest.TestCase):
        for var, data in [(v, [1, 2, 3]), (m, [[1, 2], [3, 4]])]:
            # Currently, only a repeat patter == ndim is supported.
            for ndim in [var.ndim]:  # range(1, var.ndim):
-                f = theano.function([var], T.tile(var, (1,)*ndim), mode=mode)
+                f = theano.function([var], Tile(ndim)(var, (1,)*ndim), mode=mode)
                topo = f.maker.fgraph.toposort()
                assert len(topo) == 1
                assert isinstance(topo[0].op, compile.DeepCopyOp)
@@ -3422,6 +3443,7 @@ class T_Tile(unittest.TestCase):
        # replace it with a DimShuffle to add the extra parameter.
        # But it isn't supported for now, so assert that we raise an
        # error.
+
        self.assertRaises(ValueError, T.tile, v, (1,)*(v.ndim+1))
        # If the repeat parameter is shorter then m.ndim, it should
        # pad tot he left the repeat patter with 1. It is not supported for now.
@@ -3429,6 +3451,7 @@ class T_Tile(unittest.TestCase):
        #topo = f.maker.fgraph.toposort()
        #assert len(topo) == 1
        #assert isinstance(topo[0].op, DimShuffe)
+
        self.assertRaises(ValueError, T.tile, m, (1,)*(m.ndim-1))
        #f = theano.function([var], T.tile(m, (1,)*(m.ndim-1)))
        #topo = f.maker.fgraph.toposort()