Merge pull request #3318 from Thrandis/ccw

Numpy-like interface for stack.

Merge pull request #3318 from Thrandis/ccw
dfdcd682 · Xavier Bouthillier · 681c67fa · b5742468 · dfdcd682 · dfdcd682
--- a/doc/library/tensor/basic.txt
+++ b/doc/library/tensor/basic.txt
@@ -583,6 +583,20 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`.
    :type n_ones: int
    :type n_ones: number of dimension to be added to `x`

+
+.. function:: shape_padaxis(t, axis)
+
+    Reshape `t` by adding 1 at the dimension `axis`. Note that this new
+    dimension will be broadcastable. To make it non-broadcastable
+    see the :func:`unbroadcast`.
+
+    :type x: any TensorVariable (or compatible)
+    :param x: variable to be reshaped
+
+    :type axis: int
+    :param  axis: axis where to add the new dimension to `x`
+
+
 .. autofunction:: unbroadcast(x, *axes)

 .. autofunction:: addbroadcast(x, *axes)
@@ -678,6 +692,26 @@ Creating Tensor
              except for the main diagonal, whose values are equal to one. The output
              will have same dtype as `x`.

+
+.. function:: stack(tensors, axis=0)
+
+    Warning: The interface stack(*tensors) is deprecated!
+
+    Return a Tensor representing for the arguments all stacked up into a single Tensor.
+    (of 1 rank greater).
+
+    :param tensors: a list or a tuple of one or more tensors of the same rank.
+    :param axis: the axis along which the tensors will be stacked. Default value is 0.
+    :returns: A tensor such that rval[0] == tensors[0], rval[1] == tensors[1], etc.
+
+    >>> x0 = T.scalar()
+    >>> x1 = T.scalar()
+    >>> x2 = T.scalar()
+    >>> x = T.stack([x0, x1, x2])
+    >>> x.ndim # x is a vector of length 3.
+    1
+
+
 .. function:: stack(*tensors)

    Return a Tensor representing for the arguments all stacked up into a single Tensor.

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -856,7 +856,7 @@ def local_gpu_careduce(node):
                            new_in_shp.append(x_shape[i])

                    new_greduce = GpuCAReduce(new_mask, scalar_op)
-                    reshaped_x = x.reshape(tensor.stack(*new_in_shp))
+                    reshaped_x = x.reshape(tensor.stack(new_in_shp))
                    gpu_reshaped_x = as_cuda_ndarray_variable(reshaped_x)
                    reshaped_gpu_inputs = [gpu_reshaped_x]
                    if new_greduce.supports_c_code(reshaped_gpu_inputs):
@@ -865,7 +865,7 @@ def local_gpu_careduce(node):

                        if reduce_reshaped_x.ndim != out.ndim:
                            rval = reduce_reshaped_x.reshape(
-                                tensor.stack(*shape_of[out]))
+                                tensor.stack(shape_of[out]))
                        else:
                            rval = reduce_reshaped_x
                    else:

--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -595,7 +595,7 @@ def local_gpua_careduce(node):
                dtype=getattr(node.op, 'dtype', None),
                acc_dtype=getattr(node.op, 'acc_dtype', None))

-            reshaped_x = x.reshape(tensor.stack(*new_in_shp))
+            reshaped_x = x.reshape(tensor.stack(new_in_shp))
            gpu_reshaped_x = gpu_from_host(reshaped_x)
            gvar = greduce(gpu_reshaped_x)
            # We need to have the make node called, otherwise the mask can
@@ -607,7 +607,7 @@ def local_gpua_careduce(node):

                if reduce_reshaped_x.ndim != node.outputs[0].ndim:
                    unreshaped_reduce = reduce_reshaped_x.reshape(
-                        tensor.stack(*shape_of[node.outputs[0]]))
+                        tensor.stack(shape_of[node.outputs[0]]))
                else:
                    unreshaped_reduce = reduce_reshaped_x
                return [unreshaped_reduce]

--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -3013,8 +3013,8 @@ class HStack(gof.op.Op):

        split = tensor.Split(len(inputs))(gz, 1,
                                          tensor.stack(
-                                              *[x.shape[1]
-                                                for x in inputs]))
+                                              [x.shape[1]
+                                               for x in inputs]))
        if not isinstance(split, list):
            split = [split]

@@ -3094,8 +3094,8 @@ class VStack(HStack):

        split = tensor.Split(len(inputs))(gz, 0,
                                          tensor.stack(
-                                              *[x.shape[0]
-                                                for x in inputs]))
+                                              [x.shape[0]
+                                               for x in inputs]))
        if not isinstance(split, list):
            split = [split]


--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -185,7 +185,7 @@ def as_tensor_variable(x, name=None, ndim=None):
    if isinstance(x, (tuple, list)) and python_any(isinstance(xi, Variable)
                                                   for xi in x):
        try:
-            return stack(*x)
+            return stack(x)
        except (TypeError, ValueError):
            pass

@@ -1682,7 +1682,7 @@ def smallest(*args):
        a, b = args
        return switch(a < b, a, b)
    else:
-        return min(stack(*args), axis=0)
+        return min(stack(args), axis=0)


 @constructor
@@ -1697,7 +1697,7 @@ def largest(*args):
        a, b = args
        return switch(a > b, a, b)
    else:
-        return max(stack(*args), axis=0)
+        return max(stack(args), axis=0)


 ##########################
@@ -3803,8 +3803,8 @@ class Join(Op):
        if 'float' in out_dtype or 'complex' in out_dtype:
            # assume that this is differentiable
            split = Split(len(tensors))
-            split_gz = split(gz, axis, stack(*[shape(x)[axis]
-                                               for x in tensors]))
+            split_gz = split(gz, axis, stack([shape(x)[axis]
+                                              for x in tensors]))
            # If there is only one split, it might not be in a list.
            if not isinstance(split_gz, list):
                split_gz = [split_gz]
@@ -3960,16 +3960,78 @@ def shape_padright(t, n_ones=1):


 @constructor
-def stack(*tensors):
+def shape_padaxis(t, axis):
+    """Reshape `t` by adding 1 at the dimension `axis`.
+
+    See Also
+    --------
+    shape_padleft
+    shape_padright
+    Dimshuffle
+
+    """
+    _t = as_tensor_variable(t)
+
+    ndim = _t.ndim + 1
+    if not -ndim <= axis < ndim:
+        msg = 'axis {0} is out of bounds [-{1}, {1})'.format(axis, ndim)
+        raise IndexError(msg)
+    if axis < 0:
+        axis += ndim
+
+    pattern = [i for i in xrange(_t.type.ndim)]
+    pattern.insert(axis, 'x')
+    return DimShuffle(_t.broadcastable, pattern)(_t)
+
+
+@constructor
+def stack(*tensors, **kwargs):
    """Insert the arguments as slices into a tensor of 1 rank greater.

-    The size in dimension 0 of the result will be equal to the number
+    The size in dimension `axis` of the result will be equal to the number
    of tensors passed.

+    Note: The interface stack(*tensors) is deprecated, you should use
+    stack(tensors, axis=0) insted.
+
+    Parameters
+    ----------
+    tensors : list or tuple of tensors
+        A list of tensors to be stacked.
+    axis : int
+        The index of the new axis. Default value is 0.
+
    """
-    if len(tensors) == 0:
-        raise Exception('theano.tensor.stack(*tensors) must have at least'
+    # ---> Remove this when moving to the new interface:
+    if not tensors and not kwargs:
+        raise Exception('theano.tensor.stack(tensors, axis) must have at least'
                        ' one parameter')
+
+    if not kwargs and not isinstance(tensors[0], (list, tuple)):
+        warnings.warn('stack(*tensors) interface is deprecated, use'
+                      ' stack(tensors, axis=0) instead.', DeprecationWarning,
+                      stacklevel=3)
+        axis = 0
+    elif 'tensors' in kwargs:
+        tensors = kwargs['tensors']
+        if 'axis' in kwargs:
+            axis = kwargs['axis']
+        else:
+            axis = 0
+    else:
+        if len(tensors) == 2:
+            axis = tensors[1]
+        elif 'axis' in kwargs:
+            axis = kwargs['axis']
+        else:
+            axis = 0
+        tensors = tensors[0]
+    # <--- Until here.
+
+    if len(tensors) == 0:
+        raise Exception('tensors is empty. You should at least provide one'
+                        ' tensor to theano.tensor.stack(tensors, axis).')
+
    # If all tensors are scalars of the same type, call make_vector.
    # It makes the graph simpler, by not adding DimShuffles and Rebroadcasts

@@ -3991,7 +4053,7 @@ def stack(*tensors):
        tensors = list(map(as_tensor_variable, tensors))
        dtype = scal.upcast(*[i.dtype for i in tensors])
        return theano.tensor.opt.MakeVector(dtype)(*tensors)
-    return join(0, *[shape_padleft(t, 1) for t in tensors])
+    return join(axis, *[shape_padaxis(t, axis) for t in tensors])


 @constructor
@@ -5662,7 +5724,7 @@ def stacklists(arg):

    """
    if isinstance(arg, (tuple, list)):
-        return stack(*list(map(stacklists, arg)))
+        return stack(list(map(stacklists, arg)))
    else:
        return arg


--- a/theano/tensor/fourier.py
+++ b/theano/tensor/fourier.py
@@ -83,7 +83,7 @@ class Fourier(gof.Op):
                         list(shape_a[axis.data + 1:]))
        else:
            l = len(shape_a)
-            shape_a = tensor.stack(*shape_a)
+            shape_a = tensor.stack(shape_a)
            out_shape = tensor.concatenate((shape_a[0: axis], [n],
                                            shape_a[axis + 1:]))
            n_splits = [1] * l

--- a/theano/tensor/raw_random.py
+++ b/theano/tensor/raw_random.py
@@ -365,7 +365,7 @@ def _infer_ndim_bcast(ndim, shape, *args):
        if len(pre_v_shape) == 0:
            v_shape = tensor.constant([], dtype='int32')
        else:
-            v_shape = tensor.stack(*pre_v_shape)
+            v_shape = tensor.stack(pre_v_shape)

    elif shape is None:
        # The number of drawn samples will be determined automatically,

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -3380,7 +3380,7 @@ class T_Join_and_Split(unittest.TestCase):
        a = as_tensor_variable(1)
        b = as_tensor_variable(2.0)
        c = tensor._shared(numpy.asarray(3.0, dtype=self.floatX))
-        s = stack(a, b, c)
+        s = stack([a, b, c])
        want = numpy.array([1, 2, 3])
        out = self.eval_outputs_and_check_vector([s], opt.MakeVector())
        self.assertTrue((out == want).all())
@@ -3389,7 +3389,7 @@ class T_Join_and_Split(unittest.TestCase):
        a = self.shared(numpy.asarray(1., dtype=self.floatX))
        b = as_tensor_variable(2.)
        c = as_tensor_variable(3.)
-        s = stack(a, b, c)
+        s = stack([a, b, c])

        want = numpy.array([1, 2, 3])
        out = self.eval_outputs_and_check_vector([s])
@@ -3401,7 +3401,7 @@ class T_Join_and_Split(unittest.TestCase):
        to int64"""
        a = tensor.scalar('a', dtype=self.floatX)
        b = tensor.scalar('b', dtype=self.floatX)
-        s = stack(a, b, a, b)
+        s = stack([a, b, a, b])
        f = function([a, b], s, mode=self.mode)
        val = f(1, 2)
        # print val
@@ -3416,7 +3416,7 @@ class T_Join_and_Split(unittest.TestCase):
        event when the scalar don't have the same dtype.'''
        a = tensor.iscalar('a')
        b = tensor.lscalar('b')
-        s = stack(a, b, a, b)
+        s = stack([a, b, a, b])
        f = function([a, b], s, mode=self.mode)
        val = f(1, 2)
        self.assertTrue(numpy.all(val == [1, 2, 1, 2]))
@@ -3432,7 +3432,7 @@ class T_Join_and_Split(unittest.TestCase):
        b = tensor.lscalar('b')
        # test when the constant is the first element.
        # The first element is used in a special way
-        s = stack(10, a, b, numpy.int8(3))
+        s = stack([10, a, b, numpy.int8(3)])
        f = function([a, b], s, mode=self.mode)
        val = f(1, 2)
        self.assertTrue(numpy.all(val == [10, 1, 2, 3]))
@@ -3441,11 +3441,65 @@ class T_Join_and_Split(unittest.TestCase):
        assert len([n for n in topo if isinstance(n, type(self.join_op))]) == 0
        assert f.maker.fgraph.outputs[0].dtype == 'int64'

+    def test_stack_new_interface(self):
+        """Test the new numpy-like interface: stack(tensors, axis=0)."""
+        # Testing against old interface
+        warnings.simplefilter('always', DeprecationWarning)
+        a = tensor.imatrix('a')
+        b = tensor.imatrix('b')
+        s1 = stack(a, b)
+        s2 = stack([a, b])
+        f = function([a, b], [s1, s2], mode=self.mode)
+        v1, v2 = f([[1, 2]], [[3, 4]])
+        self.assertTrue(v1.shape == v2.shape)
+        self.assertTrue(numpy.all(v1 == v2))
+        # Testing axis parameter
+        s3 = stack([a, b], 1)
+        f = function([a, b], s3, mode=self.mode)
+        v3 = f([[1, 2]], [[3, 4]])
+        v4 = numpy.array([[[1, 2], [3, 4]]])
+        self.assertTrue(v3.shape == v4.shape)
+        self.assertTrue(numpy.all(v3 == v4))
+        # Testing negative axis
+        v1 = [[1, 2, 3], [4, 5, 6]]
+        v2 = [[7, 8, 9], [10, 11, 12]]
+        s = stack([a, b], axis=-1)
+        f = function([a, b], s, mode=self.mode)
+        v = numpy.zeros((2, 3, 2))
+        v[:,:,0] = v1
+        v[:,:,1] = v2 
+        out = f(v1, v2) 
+        self.assertTrue(v.shape == out.shape)
+        self.assertTrue(numpy.all(v == out))
+        s = stack([a, b], axis=-2)
+        f = function([a, b], s, mode=self.mode)
+        v = numpy.zeros((2, 2, 3))
+        v[:,0,:] = v1
+        v[:,1,:] = v2 
+        out = f(v1, v2) 
+        self.assertTrue(v.shape == out.shape)
+        self.assertTrue(numpy.all(v == out))
+        # Testing out-of-bounds axis
+        self.assertRaises(IndexError, stack, [a, b], 4)
+        self.assertRaises(IndexError, stack, [a, b], -4)
+        # Testing depreciation warning
+        with warnings.catch_warnings(record=True) as w:
+            s = stack(a, b)
+            assert len(w) == 1
+            assert issubclass(w[-1].category, DeprecationWarning)
+        with warnings.catch_warnings(record=True) as w:
+            s = stack([a, b])
+            s = stack([a, b], 1)
+            s = stack([a, b], axis=1)
+            s = stack(tensors=[a, b])
+            s = stack(tensors=[a, b], axis=1)
+            assert not w
+
    def test_stack_hessian(self):
        # Test the gradient of stack when used in hessian, see gh-1589
        a = tensor.dvector('a')
        b = tensor.dvector('b')
-        A = stack(a, b)
+        A = stack([a, b])
        B = A.T.dot(A)
        Ha, Hb = hessian(B.sum(), [a, b])

@@ -3544,7 +3598,7 @@ class T_Join_and_Split(unittest.TestCase):
        a = self.shared(numpy.array([1, 2, 3], dtype=self.floatX))
        b = as_tensor_variable(numpy.array([7, 8, 9], dtype=self.floatX))

-        s = stack(a, b)
+        s = stack([a, b])
        want = numpy.array([[1, 2, 3], [7, 8, 9]])
        out = self.eval_outputs_and_check_join([s])
        self.assertTrue((out == want).all())
@@ -5971,7 +6025,7 @@ class test_tensordot(unittest.TestCase):
 def test_smallest_stack():
    sx, sy = dscalar(), dscalar()

-    rval = inplace_func([sx, sy], stack(sx, sy))(-4.0, -2.0)
+    rval = inplace_func([sx, sy], stack([sx, sy]))(-4.0, -2.0)
    assert type(rval) == numpy.ndarray
    assert [-4, -2] == list(rval)

@@ -6610,13 +6664,13 @@ def test_dimshuffle_duplicate():

 class T_get_scalar_constant_value(unittest.TestCase):
    def test_get_scalar_constant_value(self):
-        a = tensor.stack(1, 2, 3)
+        a = tensor.stack([1, 2, 3])
        assert get_scalar_constant_value(a[0]) == 1
        assert get_scalar_constant_value(a[1]) == 2
        assert get_scalar_constant_value(a[2]) == 3

        b = tensor.iscalar()
-        a = tensor.stack(b, 2, 3)
+        a = tensor.stack([b, 2, 3])
        self.assertRaises(tensor.basic.NotScalarConstantError, get_scalar_constant_value, a[0])
        assert get_scalar_constant_value(a[1]) == 2
        assert get_scalar_constant_value(a[2]) == 3
@@ -6624,7 +6678,7 @@ class T_get_scalar_constant_value(unittest.TestCase):
        # For now get_scalar_constant_value goes through only MakeVector and Join of
        # scalars.
        v = tensor.ivector()
-        a = tensor.stack(v, [2], [3])
+        a = tensor.stack([v, [2], [3]])
        self.assertRaises(tensor.NotScalarConstantError, get_scalar_constant_value, a[0])
        self.assertRaises(tensor.NotScalarConstantError, get_scalar_constant_value, a[1])
        self.assertRaises(tensor.NotScalarConstantError, get_scalar_constant_value, a[2])