Merge pull request #425 from delallea/improved_set_subtensor

Fixed issues with advanced inc/set subtensor in some cases

Merge pull request #425 from delallea/improved_set_subtensor
29d3f9e0 · lamblin · 0f86ecd9 · 3a0b3dfb · 29d3f9e0 · 29d3f9e0
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -5097,6 +5097,7 @@ class AdvancedSubtensor1(Op):

    def __hash__(self):
        return hash(type(self))
+
    def __eq__(self, other):
        return type(self) == type(other)

@@ -5115,7 +5116,7 @@ class AdvancedSubtensor1(Op):
        x, i = inp
        out, = out_
        # Copy always implied by numpy advanced indexing semantic.
-        if out[0] is not None and out[0].shape==(len(i),)+x.shape[1:]:
+        if out[0] is not None and out[0].shape == (len(i),) + x.shape[1:]:
            o = out[0]
        else:
            o = None
@@ -5131,8 +5132,9 @@ class AdvancedSubtensor1(Op):

    def grad(self, inputs, grads):
        gz, = grads
-        assert len(inputs)==2
-        return [advanced_inc_subtensor1(zeros_like(inputs[0]),gz,inputs[1])]+[None]*(len(inputs)-1)
+        assert len(inputs) == 2
+        rval1 = [advanced_inc_subtensor1(zeros_like(inputs[0]), gz, inputs[1])]
+        return rval1 + [None] * (len(inputs) - 1)

    def R_op(self, inputs, eval_points):
        if eval_points[0] is None:
@@ -5141,10 +5143,11 @@ class AdvancedSubtensor1(Op):

    def infer_shape(self, node, ishapes):
        x, ilist = ishapes
-        return [ilist+x[1:]]
+        return [ilist + x[1:]]

 advanced_subtensor1 = AdvancedSubtensor1()

+
 class AdvancedIncSubtensor1(Op):
    """Increments a subtensor using advanced slicing (list of index)"""
    def __init__(self, inplace=False, set_instead_of_inc=False):
@@ -5173,10 +5176,13 @@ class AdvancedIncSubtensor1(Op):
        if x_.type.ndim == 0:
            raise TypeError('cannot index into a scalar')
        if y_.type.ndim > x_.type.ndim:
-            opname = 'increment'
+            if self.set_instead_of_inc:
+                opname = 'set'
+            else:
+                opname = 'increment'
            raise TypeError('cannot %s x subtensor with ndim=%s'
-            ' by y with ndim=%s to x subtensor with ndim=%s '%(
-                opname, x_.type.ndim, y_.type.ndim ))
+            ' by y with ndim=%s to x subtensor with ndim=%s ' % (
+                opname, x_.type.ndim, y_.type.ndim))

        return Apply(self, [x_, y_, ilist_], [x_.type()])

@@ -5186,19 +5192,19 @@ class AdvancedIncSubtensor1(Op):
        out, = out_
        if not self.inplace:
            x = x.copy()
-        # x[idx] += y don't work if the same index is present many times.
-        # It do it only once
-        #  -- Numpy also behaves this way, is it a bug in numpy?
+        # In Numpy, x[idx] += y doesn't work if the same index is present
+        # many times: it does it only once. Is it a bug? In any case, for
+        # this reason we implement our own 'inc' iteration.
        if self.set_instead_of_inc:
-            if y.ndim:
-                for (j,i) in enumerate(idx):
-                    x[i] = y[j]
-            else:
-                for i in idx:
-                    x[i] = y
+            x[idx] = y
        else:
-            if y.ndim:
-                for (j,i) in enumerate(idx):
+            # If `y` has as many dimensions as `x`, then we want to iterate
+            # jointly on `x` and `y`. Otherwise, it means `y` should be
+            # broadcasted to fill all relevant rows of `x`.
+            assert y.ndim <= x.ndim   # Should be guaranteed by `make_node`
+            if y.ndim == x.ndim:
+                assert len(y) == len(idx)
+                for (j, i) in enumerate(idx):
                    x[i] += y[j]
            else:
                for i in idx:
@@ -5215,7 +5221,6 @@ class AdvancedIncSubtensor1(Op):
        return self.make_node(eval_points[0], eval_points[1],
                              *inputs[2:]).outputs

-
    def grad(self, inputs, grads):
        g_output, = grads
        x, y = inputs[:2]
@@ -5228,6 +5233,7 @@ class AdvancedIncSubtensor1(Op):

 advanced_inc_subtensor1 = AdvancedIncSubtensor1()

+
 class AdvancedSubtensor(Op):
    """Return a subtensor copy, using advanced indexing.
    """
@@ -5235,10 +5241,10 @@ class AdvancedSubtensor(Op):
    # AdvancedSubtensor(args)(self, *args),
    # if args contains and advanced indexing pattern

-    def __init__(self, args): #idx_list?
+    def __init__(self, args):  # idx_list?
        # For the moment, __init__ will be passed the whole list of arguments
        #TODO: see what's the best solution
-        self.args = args #?
+        self.args = args  # ?

        #FIXME: do not store variables in the class instance

@@ -5590,6 +5596,7 @@ class TensorDotGrad(Op):

 tensordot_grad = TensorDotGrad

+
 class TensorDot(Op):
    """Compute tensor-tensor products over the given axes.
    See numpy documentation for details.
@@ -5600,21 +5607,23 @@ class TensorDot(Op):
    @classmethod
    def parse_axes(cls, axes):

-        if not numpy.isscalar(axes) and len(axes)!=2:
-            raise ValueError("Axes should be scalar valued or a list/tuple of len 2.")
+        if not numpy.isscalar(axes) and len(axes) != 2:
+            raise ValueError("Axes should be scalar valued or a list/tuple of "
+                             "len 2.")

-        if isinstance(axes,(list,tuple)):
+        if isinstance(axes, (list, tuple)):
            axes_out = []
            # cast axes[0] and axes[1] to tuples
-            for i,a in enumerate(axes):
+            for i, a in enumerate(axes):
                if numpy.isscalar(a):
                    axes_out.append((a,))
                else:
                    axes_out.append(tuple(a))

            # these should be of same length
-            if len(axes_out[0])!=len(axes_out[1]):
-                raise ValueError("Elements of the axes list/tuple need to be of the same size.")
+            if len(axes_out[0]) != len(axes_out[1]):
+                raise ValueError("Elements of the axes list/tuple need to be "
+                                 "of the same size.")

            axes = tuple(axes_out)

@@ -5631,22 +5640,23 @@ class TensorDot(Op):

    def make_node(self, x, y):
        op = self
-        if isinstance(self.axes,int):
-            axes = [range(x.ndim-self.axes,x.ndim),range(self.axes)]
+        if isinstance(self.axes, int):
+            axes = [range(x.ndim - self.axes, x.ndim), range(self.axes)]
            op = TensorDot(axes)

-        axesdim = numpy.size(op.axes)/2
+        axesdim = numpy.size(op.axes) / 2

        x, y = map(as_tensor_variable, [x, y])

        if axesdim > x.type.ndim or axesdim > y.type.ndim:
-            raise TypeError('Cannot sum over more dimensions than input. %i > %i,%i' %
-                    axesdim, x.type.ndim, y.type.ndim)
+            raise TypeError('Cannot sum over more dimensions than input. '
+                            '%i > %i,%i' %
+                            (axesdim, x.type.ndim, y.type.ndim))

-        outdim = x.type.ndim + y.type.ndim - 2*axesdim
+        outdim = x.type.ndim + y.type.ndim - 2 * axesdim
        output = tensor(dtype=scal.upcast(x.dtype, y.dtype),
-                        broadcastable=[False]*outdim);
-        return Apply(op, inputs=[x,y], outputs=[output,])
+                        broadcastable=[False] * outdim)
+        return Apply(op, inputs=[x, y], outputs=[output, ])

    def perform(self, node, inp, out):
        x, y = inp
@@ -5654,7 +5664,8 @@ class TensorDot(Op):
        try:
            z[0] = numpy.asarray(numpy.tensordot(x, y, self.axes))
        except ValueError, e:
-            # The error raised by numpy has no shape information, we mean to add that
+            # The error raised by numpy has no shape information, we mean to
+            # add that.
            e.args = e.args + (x.shape, y.shape, self.axes)
            raise

@@ -5667,13 +5678,15 @@ class TensorDot(Op):
    def __str__(self):
        return "tensordot"

+
 def tensordot(x, y=None, axes=2):
-    if y==None:
-        raise NotImplementedError('The interface to tensordot has changed from '\
-            'tensor.tensordot(axes)(x,y) to tensor.tensordot(x,y,axes). Please '\
-            'modify your code accordingly.')
+    if y is None:
+        raise NotImplementedError(
+                'The interface to tensordot has changed from '
+                'tensor.tensordot(axes)(x,y) to tensor.tensordot(x,y,axes). '
+                'Please modify your code accordingly.')

-    if x.ndim==0 or y.ndim==0:
+    if x.ndim == 0 or y.ndim == 0:
        raise ValueError('Cannot perform tensordot of 0-d inputs.')

    axes = TensorDot.parse_axes(axes)
@@ -5682,16 +5695,16 @@ def tensordot(x, y=None, axes=2):
    if numpy.isscalar(axes):
        if axes >= x.ndim or axes >= y.ndim:
            raise ValueError('axes should be smaller than the dimension of '\
-                    'x and y (x.ndim=%i, y.ndim=%i)' % (x.ndim,y.ndim))
-    elif isinstance(axes, (list,tuple)):
+                    'x and y (x.ndim=%i, y.ndim=%i)' % (x.ndim, y.ndim))
+    elif isinstance(axes, (list, tuple)):

-        if isinstance(axes[0],(list,tuple)) and \
+        if isinstance(axes[0], (list, tuple)) and \
           (len(axes[0]) > x.ndim or (numpy.array(axes[0]) >= x.ndim).any()):
            raise ValueError('axes[0] should be array_like, of length smaller'\
                    ' than the dimension of x (x.ndim=%i, len(axes[0])=%i).' %
                    (x.ndim, len(axes[0])))

-        if isinstance(axes[1],(list,tuple)) and \
+        if isinstance(axes[1], (list, tuple)) and \
           (len(axes[1]) > y.ndim or (numpy.array(axes[1]) >= y.ndim).any()):
            raise ValueError('axes[1] should be array_like, of length smaller'\
                    'than the dimension of y (y.ndim=%i, len(axes[1])=%i).' %

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -2049,6 +2049,7 @@ class T_subtensor(unittest.TestCase):
                    raise
        finally:
            _logger.setLevel(oldlevel)
+
    def test1_err_subslice(self):
        n = self.shared(numpy.ones(3, dtype=self.dtype))
        try:
@@ -2121,6 +2122,7 @@ class T_subtensor(unittest.TestCase):
        tval = f()
        self.assertTrue(tval.shape == ())
        self.assertTrue(tval == 5.0)
+
    def test1_ok_range_infinite(self):
        #Subtensor.debug = True
        n = self.shared(numpy.ones(3, dtype=self.dtype)*5)
@@ -2185,6 +2187,7 @@ class T_subtensor(unittest.TestCase):
                    raise
        finally:
            sys.stderr = old_stderr
+
    def test2_ok_elem(self):
        n = self.shared(numpy.asarray(range(6), dtype=self.dtype).reshape((2,3)))
        t = n[0,2]
@@ -2192,6 +2195,7 @@ class T_subtensor(unittest.TestCase):
        tval = self.eval_output_and_check(t)
        self.assertTrue(tval.shape == ())
        self.assertTrue(numpy.all(tval == 2))
+
    def test2_ok_row(self):
        n = self.shared(numpy.asarray(range(6), dtype=self.dtype).reshape((2,3)))
        t = n[1]
@@ -2404,7 +2408,6 @@ class T_subtensor(unittest.TestCase):
                    assert numpy.all(
                            f(start,stop,step) == v_data[start:stop:step].shape)

-
    def test_slice_canonical_form_0(self):
        start  = tensor.iscalar('b')
        stop   = tensor.iscalar('e')
@@ -2428,7 +2431,6 @@ class T_subtensor(unittest.TestCase):
                    assert numpy.all(t_out == v_out)
                    assert numpy.all(t_out.shape == v_out.shape)

-
    def test_slice_canonical_form_1(self):
        stop   = tensor.iscalar('e')
        step   = tensor.iscalar('s')
@@ -2672,7 +2674,109 @@ class T_subtensor(unittest.TestCase):
        #single element
        utt.verify_grad(
            inc_slice(2, 1),
-            (numpy.asarray([[0, 1],[2, 3],[4, 5.]]), numpy.asarray(9.),))
+            (numpy.asarray([[0, 1], [2, 3], [4, 5.]]), numpy.asarray(9.),))
+
+    def test_advanced_inc_and_set(self):
+        """
+        Test advanced increment and set.
+        """
+        rng = numpy.random.RandomState(seed=utt.fetch_seed())
+        all_inputs_var = []
+        all_inputs_num = []
+        all_outputs_var = []
+        all_outputs_num = []
+        for set_instead_of_inc in (False, True):
+            for inplace in (False, True):
+                for data_shape in ((10,), (4, 5), (1, 2, 3), (4, 5, 6, 7)):
+                    data_n_dims = len(data_shape)
+                    # Symbolic variable to be incremented.
+                    data_var = tensor.tensor(
+                            broadcastable=[False] * data_n_dims,
+                            dtype=self.dtype)
+                    data_size = numpy.product(data_shape)
+                    # Corresponding numeric variable.
+                    data_num_init = numpy.arange(data_size, dtype=self.dtype)
+                    data_num_init = data_num_init.reshape(data_shape)
+                    inc_shapes = [data_shape[i:]
+                                  for i in xrange(0, len(data_shape) + 1)]
+                    for inc_shape in inc_shapes:
+                        inc_n_dims = len(inc_shape)
+                        # We copy the numeric value to be 100% sure there is no
+                        # risk of accidentally sharing it.
+                        data_num = data_num_init.copy()
+                        if inplace:
+                            # We need to copy `data_var` as we do not want
+                            # multiple in-place operations on it.
+                            data_var = deepcopy(data_var)
+                        # Symbolic variable with rows to be incremented.
+                        idx_var = theano.tensor.vector(dtype='int64')
+                        n_to_inc = rng.randint(data_shape[0])
+                        # Corresponding numeric variable.
+                        idx_num = rng.randint(0, data_shape[0], n_to_inc)
+                        idx_num = idx_num.astype('int64')
+                        # Symbolic variable with increment value.
+                        inc_var = tensor.tensor(
+                                broadcastable=[False] * inc_n_dims,
+                                dtype=self.dtype)
+                        # Trick for the case where `inc_shape` is the same as
+                        # `data_shape`: what we actually want is the first
+                        # shape element to be equal to the number of rows to
+                        # increment.
+                        if len(inc_shape) == len(data_shape):
+                            inc_shape = (n_to_inc,) + inc_shape[1:]
+                        inc_size = numpy.product(inc_shape)
+                        # Corresponding numeric variable.
+                        inc_num = rng.uniform(size=inc_size).astype(self.dtype)
+                        inc_num = inc_num.reshape(inc_shape)
+                        # Result of the incrementation.
+                        # (i) Theano
+                        if set_instead_of_inc:
+                            op = set_subtensor
+                        else:
+                            op = inc_subtensor
+                        output = op(data_var[idx_var], inc_var,
+                                    inplace=inplace)
+                        # (ii) Numpy (note that Numpy increments only once
+                        # duplicated indices, so we cannot directly use +=).
+                        data_copy = data_num.copy()
+                        for j, idx in enumerate(idx_num):
+                            if len(inc_shape) == len(data_shape):
+                                # Special case where there is no broadcasting.
+                                if set_instead_of_inc:
+                                    data_copy[idx] = inc_num[j]
+                                else:
+                                    data_copy[idx] += inc_num[j]
+                            else:
+                                if set_instead_of_inc:
+                                    data_copy[idx] = inc_num
+                                else:
+                                    data_copy[idx] += inc_num
+                        # Remember data for the Theano function (see below).
+                        all_inputs_var += [data_var, idx_var, inc_var]
+                        all_inputs_num += [data_num, idx_num, inc_num]
+                        all_outputs_var.append(output)
+                        all_outputs_num.append(data_copy)
+                        if False:  # Enable for debugging purpose.
+                            f = theano.function([data_var, idx_var, inc_var],
+                                                output, accept_inplace=inplace)
+                            if inplace:
+                                # Ensure calling `f` will not alter `data_num`.
+                                data_num = data_num.copy()
+                            f_out = f(data_num.copy(), idx_num, inc_num)
+                            assert numpy.allclose(f_out, data_copy)
+                            if not inplace:
+                                # Sanity check: `data_num` should be intact.
+                                assert (data_num == data_num_init).all()
+
+        # Actual test (we compile a single Theano function to make it faster).
+        f = theano.function(all_inputs_var, all_outputs_var,
+                            accept_inplace=True)
+        f_outs = f(*all_inputs_num)
+        assert len(f_outs) == len(all_outputs_num)
+        for f_out, output_num in izip(f_outs, all_outputs_num):
+            # NB: if this assert fails, it will probably be easier to debug if
+            # you enable the debug code above.
+            assert numpy.allclose(f_out, output_num)


 class TestIncSubtensor1(unittest.TestCase):
@@ -5151,7 +5255,7 @@ class test_broadcast(unittest.TestCase):

 def test_len():
    for shape in [(5,), (3, 4), (7, 4, 6)]:
-        x = tensor.tensor(dtype='floatX', broadcastable=(False,)*len(shape))
+        x = tensor.tensor(dtype='floatX', broadcastable=(False,) * len(shape))
        try:
            len(x)
            assert False, "Expected an error"
@@ -5166,12 +5270,12 @@ def test_mod():
    as Python. That is what we want.
    """
    x, y = fscalars('xy')
-    fn = gof.DualLinker().accept(gof.Env([x,y], [x%y])).make_function()
-    for a,b in ((0,1), (1,1), (0,-1), (1,-1), (-1,-1),
-                (1,2), (-1,2), (1,-2), (-1,-2),
-                (5,3), (-5,3), (5,-3), (-5,-3)
+    fn = gof.DualLinker().accept(gof.Env([x, y], [x % y])).make_function()
+    for a, b in ((0, 1), (1, 1), (0, -1), (1, -1), (-1, -1),
+                (1, 2), (-1, 2), (1, -2), (-1, -2),
+                (5, 3), (-5, 3), (5, -3), (-5, -3)
                ):
-        assert fn(a,b) == a%b, (a,)
+        assert fn(a, b) == a % b, (a,)


 def test_mod_compile():
@@ -5195,14 +5299,14 @@ def test_mod_compile():
    shape = x.shape
    out = tensor.switch(tensor.eq(3 % x.shape[0], 0), y, y[:-1])

-    f = theano.function([x,y],out)
+    f = theano.function([x, y], out)


 def test_unalign():
    if config.floatX == 'float64':
-        dtype="b1,f8"
+        dtype = "b1,f8"
    else:
-        dtype="b1,f4"
+        dtype = "b1,f4"

    a = numpy.empty(1e4, dtype=dtype)['f1']
    b = numpy.empty(1e4, dtype=dtype)['f1']
@@ -5210,24 +5314,25 @@ def test_unalign():
    assert not b.flags.aligned
    a[:] = rand(len(a))
    b[:] = rand(len(b))
-    out_numpy = 2*a + 3*b
+    out_numpy = 2 * a + 3 * b

-    av,bv = tensor.vectors('ab')
-    f = theano.function([av,bv],2*av+3*bv)
+    av, bv = tensor.vectors('ab')
+    f = theano.function([av, bv], 2 * av + 3 * bv)
    f.maker.env.toposort()
    # FAST_COMPILE use the python code that support unaligned data
    # The DebugMode make a copy of the inputs, so they will be aligned.
-    should_raise = theano.config.mode not in ["FAST_COMPILE","DebugMode", "DEBUG_MODE"]
+    should_raise = theano.config.mode not in ["FAST_COMPILE", "DebugMode",
+                                              "DEBUG_MODE"]
    try:
-        out_theano = f(a,b)
+        out_theano = f(a, b)
        assert not a.flags.aligned
        assert not b.flags.aligned
-        assert numpy.allclose(out_numpy,out_theano)
+        assert numpy.allclose(out_numpy, out_theano)
        if should_raise:
            raise Exception("Expected an error from Theano!")
    except NotImplementedError, e:
        if not should_raise:
-            raise Exception("Theano raised an exception when none was expected")
+            raise Exception("Theano raised an unexpected exception")


 def test_dimshuffle_duplicate():