Merge pull request #3480 from SinaHonari/issue950

Refactor Flatten

Merge pull request #3480 from SinaHonari/issue950
e521b20e · Pascal Lamblin · 91f08497 · cf53977b · e521b20e · e521b20e
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -6,6 +6,7 @@ import os
 import shutil
 import stat
 import sys
+import warnings
 import theano
 from theano.compat import get_unbound_function
@@ -318,7 +319,7 @@ if cuda_available:
            GpuDimShuffle, GpuCAReduce, GpuReshape, GpuContiguous,
            GpuSubtensor, GpuIncSubtensor,
            GpuAdvancedSubtensor1, GpuAdvancedIncSubtensor1,
-            GpuFlatten, GpuShape, GpuAlloc, GpuAllocEmpty, GpuSplit,
+            gpu_flatten, GpuFlatten, GpuShape, GpuAlloc, GpuAllocEmpty, GpuSplit,
            GpuJoin, fscalar, fvector, fmatrix, frow, fcol,
            ftensor3, ftensor4,
            scalar, vector, matrix, row, col,

--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -3326,7 +3326,14 @@ class GpuFlatten(gof.HideC, tensor.Flatten, GpuOp):
    """
    Implement Flatten on the gpu.
+    .. note:: The interface GpuFlatten is deprecated, you should use gpu_flatten.
    """
+    def __init__(self):
+        warnings.warn(
+            "GpuFlatten class is deprecated, "
+            "please use gpu_flatten method instead.",
+            DeprecationWarning,
+            stacklevel=4)
    def make_node(self, x):
        assert isinstance(x.type, CudaNdarrayType)
@@ -3336,6 +3343,36 @@ class GpuFlatten(gof.HideC, tensor.Flatten, GpuOp):
        return Apply(self, [x], [out_type()])
+def gpu_flatten(x, outdim=1):
+    """
+    Implement flatten on the gpu.
+    Reshapes the variable x by keeping
+    the first outdim-1 dimension size(s) of x the same,
+    and making the last dimension size of x equal to
+    the multiplication of its remaining dimension size(s).
+    Parameters
+    ----------
+        x : theano.tensor.var.TensorVariable
+            the variable that should be reshaped.
+        outdim : int
+            the number of dimensions of the returned variable
+    Returns
+    -------
+    theano.tensor.var.TensorVariable
+        the flattend variable with dimensionality of outdim
+    """
+    x = as_cuda_ndarray_variable(x)
+    if outdim > 1:
+        dims = tuple(x.shape[:outdim-1])+(-1,)
+    else:
+        dims = (-1,)
+    return  GpuReshape(outdim)(x, dims)
 class GpuShape(tensor.Shape, GpuOp):
    """
    Implement Shape on the gpu.

--- a/theano/sandbox/cuda/extra_ops.py
+++ b/theano/sandbox/cuda/extra_ops.py
@@ -3,7 +3,7 @@ import copy
 from theano import Op
 from theano.gof import local_optimizer
 from theano.sandbox.cuda import cuda_available, GpuOp
-from theano.sandbox.cuda.basic_ops import GpuFlatten
+from theano.sandbox.cuda.basic_ops import gpu_flatten
 from theano.tensor.extra_ops import CumsumOp
 if cuda_available:
@@ -453,7 +453,7 @@ def use_gpu_cumsum(node):
        x = gpu_from_host(x)
        if axis is None and x.ndim > 1:
-            x = GpuFlatten()(x)
+            x = gpu_flatten(x)
        # ``gpu_cumsum`` assume array has been flattened if needed.
        if axis is None:

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -24,7 +24,8 @@ from theano.sandbox.cuda.basic_ops import (
    gpu_eye, gpu_contiguous,
    gpu_from_host, host_from_gpu, GpuFromHost, HostFromGpu,
    GpuContiguous,
-    GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce, GpuFlatten,
+    GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce,
+    GpuFlatten, gpu_flatten,
    GpuSubtensor, GpuAdvancedSubtensor1,
    GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
    GpuIncSubtensor, gpu_alloc, GpuAlloc, gpu_shape, GpuSplit, GpuAllocEmpty)
@@ -152,7 +153,7 @@ cpu_ops_moved_to_gpu = [
    tensor.elemwise.All, tensor.elemwise.Any,
    tensor.elemwise.CAReduceDtype, tensor.elemwise.Sum,
    tensor.elemwise.Prod, tensor.elemwise.ProdWithoutZeros,
-    tensor.Reshape, tensor.Flatten, tensor.Subtensor,
+    tensor.Reshape, tensor.flatten, tensor.Subtensor,
    tensor.AdvancedSubtensor1, tensor.AdvancedIncSubtensor1,
    tensor.IncSubtensor, tensor.Shape, tensor.Join,
    tensor.Alloc, tensor.Eye]
@@ -980,14 +981,14 @@ def local_gpu_flatten(node):
        if host_input.owner and \
           isinstance(host_input.owner.op, tensor.Flatten):
            outdim = host_input.owner.op.outdim
-            return [GpuFlatten(outdim)(
+            return [gpu_flatten(host_input.owner.inputs[0], outdim)(
                as_cuda_ndarray_variable(host_input.owner.inputs[0]))]
    if isinstance(node.op, tensor.Flatten):
-        x, = node.inputs
+        x, shp= node.inputs
        outdim = node.op.outdim
        if x.owner and isinstance(x.owner.op, HostFromGpu):
            gpu_x, = x.owner.inputs
-            return [host_from_gpu(GpuFlatten(outdim)(gpu_x))]
+            return [host_from_gpu(gpu_flatten(host_input.owner.inputs[0], outdim)(gpu_x))]
    return False

--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -307,7 +307,8 @@ def test_flatten():
    x = cuda.fmatrix('x')
    f = theano.function([x], x.flatten(), mode=mode_with_gpu)
    assert any([node for node in f.maker.fgraph.toposort()
-                if isinstance(node.op, B.GpuFlatten)])
+                if isinstance(node.op, B.GpuReshape)])
+    assert theano.tensor.is_flat(x.flatten())
    assert len(f([[0., 0.], [0., 0.]]).shape) == 1

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -4417,7 +4417,7 @@ class Reshape(Op):
                    if ele == -1:
                        requ[i] = missing
            elif crit == 1:  # we reshape to -1
-                requ = [mul(*ishapes[0])]
+                requ = [mul(*ishapes[0])] if ishapes[0] else [1]
            elif crit > 1:
                raise ValueError('shape argument to Reshape.perform'
                                 ' must have at most one entry equal to -1')
@@ -4511,6 +4511,7 @@ class Flatten(Op):
    Flattens a tensor to `outdim` dimensions by preserving the leading
    outdim - 1 shape components.
+    .. note:: The interface Flatten(Op) is deprecated, you should use flatten.
    """
    view_map = {0: [0]}
@@ -4518,6 +4519,11 @@ class Flatten(Op):
    __props__ = ("outdim",)
    def __init__(self, outdim=1):
+        warnings.warn(
+            "Flatten class is deprecated, "
+            "please use flatten method instead.",
+            DeprecationWarning,
+            stacklevel=4)
        self.outdim = int(outdim)
    def __str__(self):
@@ -4656,8 +4662,70 @@ class Flatten(Op):
        """ % locals()
+def is_flat(var, outdim=1):
+    """
+    Verifies the dimensionality of the var is equal to
+    outdim. This method is usually called after flatten method on a
+    variable, where the first outdim-1 dimension size(s) of the variable
+    is kept intact, and the last dimension size of the variable is made
+    equal to the multiplication of its remaining dimension size(s), such that
+    the variable would end up with as many dimension as outdim.
+    Parameters
+    ----------
+        var : theano.tensor.var.TensorVariable
+            the theano var on which the dimensionality is checked.
+        outdim : int
+            the expected dimensionality of var.
+    Returns
+    -------
+    bool
+        the comparison result of var's dim
+        and the expected outdim.
+    """
+    return var.ndim == outdim
 def flatten(x, outdim=1):
-    return Flatten(outdim)(x)
+    """
+    Reshapes the variable x by keeping
+    the first outdim-1 dimension size(s) of x the same,
+    and making the last dimension size of x equal to
+    the multiplication of its remaining dimension size(s).
+    Parameters
+    ----------
+        x : theano.tensor.var.TensorVariable
+            the variable that should be reshaped.
+        outdim : int
+            the number of dimensions of the returned variable
+    Returns
+    -------
+    theano.tensor.var.TensorVariable
+        the flattend variable with dimensionality of outdim
+    """
+    # Any input variable can be flattened to have outdim of 1,
+    # even if it's a scalar. Otherwise, outdim must be positive
+    # and smaller than x.ndim.
+    if outdim < 1 or (outdim > 1 and outdim > x.ndim):
+        raise ValueError('outdim %s out of bound [1, %d)'
+                         % (outdim, x.ndim + 1))
+    if outdim > 1:
+        dims = tuple(x.shape[:outdim - 1]) + (-1,)
+    else:
+        dims = (-1,)
+    x_reshaped = x.reshape(dims)
+    bcast_kept_dims = x.broadcastable[:outdim - 1]
+    bcast_new_dim = python_all(x.broadcastable[outdim - 1:])
+    broadcastable = bcast_kept_dims + (bcast_new_dim,)
+    x_reshaped = theano.tensor.addbroadcast(
+        x_reshaped, *filter(lambda i: broadcastable[i], range(outdim)))
+    return x_reshaped
 # class TileGrad(Op):

--- a/theano/tensor/nnet/tests/test_sigm.py
+++ b/theano/tensor/nnet/tests/test_sigm.py
@@ -377,7 +377,7 @@ class T_softplus_opts(unittest.TestCase):
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 3
-        assert isinstance(topo[0].op, T.Flatten)
+        assert tensor.is_flat(topo[0].outputs[0])
        assert isinstance(topo[1].op.scalar_op,
                          theano.tensor.nnet.sigm.ScalarSoftplus)
        assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg)

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -18,6 +18,7 @@ from nose.plugins.skip import SkipTest
 import numpy
 from numpy.testing import dec, assert_array_equal, assert_allclose
 from distutils.version import LooseVersion
+from functools import partial
 import theano
 from theano.compat import PY3, exc_message, operator_div
@@ -31,8 +32,8 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
        fscalar, zeros_like, sum, tensor3, vector, add, addbroadcast,
        alloc, as_tensor_variable, tensor_from_scalar, ARange, autocast_float,
        clip, constant, default, dot,
-        dmatrix, dscalar, dvector, eq, eye, fill, flatten, inverse_permutation,
+        dmatrix, dscalar, dvector, eq, eye, fill, flatten, inverse_permutation, Flatten,
-        tensor4, permute_row_elements, Flatten, fmatrix, fscalars, grad,
+        tensor4, permute_row_elements, fmatrix, fscalars, grad,
        inplace, iscalar, matrix, minimum, matrices, maximum, mul, neq,
        Reshape, row, scalar, scalars, second, smallest, stack, sub, Tensor,
        tensor_copy, tensordot, TensorType, Tri, tri, tril, triu, unbroadcast,
@@ -5147,11 +5148,6 @@ def test_make_column_matrix_broadcastable():
 def test_flatten_outdimNone():
-    """Flatten always returns a copy of the array. There is no danger
-    with in-place operations and thus no need to test it.
-    """
    a = dmatrix()
    c = flatten(a)
    f = inplace_func([a], c)
@@ -5161,7 +5157,7 @@ def test_flatten_outdimNone():
    f = inplace_func([a], c)
    assert numpy.all(f(a_val) == c_val)
-    utt.verify_grad(Flatten(), [a_val])
+    utt.verify_grad(flatten, [a_val])
 def test_flatten_scalar():
@@ -5174,7 +5170,7 @@ def test_flatten_scalar():
    f = inplace_func([a], c)
    assert numpy.all(f(a_val) == c_val)
-    # utt.verify_grad(Flatten(), [a_val]) #TODO: fix verify_grd to work on scalars
+    # utt.verify_grad(flatten, [a_val]) #TODO: fix verify_grd to work on scalars
 def test_flatten_outdim1():
@@ -5187,7 +5183,7 @@ def test_flatten_outdim1():
    f = inplace_func([a], c)
    assert numpy.all(f(a_val) == c_val)
-    utt.verify_grad(Flatten(1), [a_val])
+    utt.verify_grad(flatten, [a_val])
 def test_flatten_outdim2():
@@ -5199,7 +5195,8 @@ def test_flatten_outdim2():
    f = inplace_func([a], c)
    assert numpy.all(f(a_val) == a_val)
-    utt.verify_grad(Flatten(2), [a_val])
+    flatten_2 = partial(flatten, outdim=2)
+    utt.verify_grad(flatten_2, [a_val])
 def test_flatten_outdim2_of_3():
@@ -5213,7 +5210,8 @@ def test_flatten_outdim2_of_3():
    f = inplace_func([a], c)
    assert numpy.all(f(a_val) == c_val)
-    utt.verify_grad(Flatten(2), [a_val])
+    flatten_2 = partial(flatten, outdim=2)
+    utt.verify_grad(flatten_2, [a_val])
 def test_flatten_broadcastable():
@@ -5255,6 +5253,37 @@ def test_flatten_outdim_invalid():
        pass
+def test_is_flat():
+    """
+    tests is_flat method for constant and symbolic variables,
+    as well as reshaped constant and symbolic variables on the
+    given outdim
+    """
+    # Constant variable
+    assert tensor.is_flat(tensor.as_tensor_variable(numpy.zeros((10))))
+    assert tensor.is_flat(tensor.as_tensor_variable(numpy.zeros((10, 10, 10))),
+                        outdim=3)
+    assert not tensor.is_flat(
+      tensor.as_tensor_variable(numpy.zeros((10, 10, 10))))
+    # Symbolic variable
+    assert tensor.is_flat(tensor.vector())
+    assert tensor.is_flat(tensor.tensor3(), outdim=3)
+    assert not tensor.is_flat(tensor.tensor3())
+    # Reshape with constant shape
+    X = tensor.tensor4()
+    assert tensor.is_flat(X.reshape((-1, )))
+    assert tensor.is_flat(X.reshape((10, 10, -1)), outdim=3)
+    assert not tensor.is_flat(X.reshape((10, 10, -1)))
+    # Reshape with symbolic shape
+    X = tensor.tensor4()
+    assert tensor.is_flat(X.reshape((tensor.iscalar(), )))
+    assert tensor.is_flat(X.reshape((tensor.iscalar(), ) * 3), outdim=3)
+    assert not tensor.is_flat(X.reshape((tensor.iscalar(), ) * 3))
 def test_tile():
    def run_tile(x, x_, reps, use_symbolic_reps):
        if use_symbolic_reps:
@@ -7128,24 +7157,29 @@ class TestInferShape(utt.InferShapeTester):
        # Flatten
        atens3 = tensor3()
        atens3_val = rand(4, 5, 3)
+        self._compile_and_check([atens3],
+                                [flatten(atens3, 1)],
+                                [atens3_val], Reshape)
        for outdim in (3, 2, 1):
            self._compile_and_check([atens3],
-                                    [Flatten(outdim)(atens3)],
+                                    [flatten(atens3, outdim)],
-                                    [atens3_val], Flatten)
+                                    [atens3_val], Reshape)
        amat = matrix()
        amat_val = rand(4, 5)
        for outdim in (2, 1):
            self._compile_and_check([amat],
-                                    [Flatten(outdim)(amat)],
+                                    [flatten(amat, outdim)],
-                                    [amat_val], Flatten)
+                                    [amat_val], Reshape)
        avec = vector()
        avec_val = rand(4)
        outdim = 1
        self._compile_and_check([avec],
-                                [Flatten(outdim)(avec)],
+                                [flatten(avec, outdim)],
-                                [avec_val], Flatten)
+                                [avec_val], Reshape,
+                                excluding=['local_useless_reshape'])
        # Eye
        aiscal = iscalar()

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -5879,18 +5879,22 @@ def test_local_useless_split():
 def test_local_flatten_lift():
    for i in xrange(1, 4):
-        op = tensor.Flatten(i)
        x = tensor.tensor4()
-        out = op(T.exp(x))
+        out = tensor.flatten(T.exp(x), i)
        assert out.ndim == i
        mode = compile.mode.get_default_mode()
-        mode = mode.including('local_flatten_lift')
+        mode = mode.including('local_reshape_lift')
        f = theano.function([x], out, mode=mode)
-        f(numpy.random.rand(5, 4, 3, 2).astype(config.floatX))
+        x_np = numpy.random.rand(5, 4, 3, 2).astype(config.floatX)
+        out_np = f(x_np)
        topo = f.maker.fgraph.toposort()
-        assert len(topo) == 2
+        shape_out_np = tuple(x_np.shape[:i-1])+(numpy.prod(x_np.shape[i-1:]),)
-        assert isinstance(topo[0].op, tensor.Flatten)
+        assert shape_out_np == out_np.shape
-        assert isinstance(topo[1].op, tensor.Elemwise)
+        reshape_nodes = [n for n in topo if isinstance(n.op, tensor.Reshape)]
+        assert (len(reshape_nodes) == 1 and
+            tensor.is_flat(reshape_nodes[0].outputs[0], outdim=i))
+        assert isinstance(topo[-1].op, tensor.Elemwise)
 class Test_Reshape(unittest.TestCase):