Merge pull request #2029 from abergeron/fix_gpuadvsub1

Remove the restriction on indexing a broadcastable dimension.

Merge pull request #2029 from abergeron/fix_gpuadvsub1
081f64c8 · Pascal Lamblin · 1e51644a · a711ef41 · 081f64c8 · 081f64c8
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -2439,7 +2439,10 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
        if x_.type.ndim == 0:
            raise TypeError('cannot index into a scalar')

-        return Apply(self, [x_, ilist_], [x_.type()])
+        bcast = (ilist_.broadcastable[0],) + x_.broadcastable[1:]
+        return Apply(self, [x_, ilist_],
+                     [CudaNdarrayType(dtype=x.dtype,
+                                      broadcastable=bcast)()])

    def perform(self, node, inp, out_):
        # This don't work as CudaNdarray_Subscript() don't support it.
@@ -2509,15 +2512,15 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):

        if ilist_.type.dtype[:3] not in ('int', 'uin'):
            raise TypeError('index must be integers')
-        if ilist_.type.broadcastable != (False,):
+        if ilist_.type.ndim != 1:
            raise TypeError('index must be vector')
        if x_.type.ndim == 0:
            raise TypeError('cannot index into a scalar')
-        if x_.type.broadcastable[0]:
-            # the caller should have made a copy of x len(ilist) times
-            raise TypeError('cannot index into a broadcastable dimension')

-        return Apply(self, [x_, y_, ilist_], [x_.type()])
+        bcast = (ilist_.broadcastable[0],) + x_.broadcastable[1:]
+        return Apply(self, [x_, y_, ilist_],
+                     [CudaNdarrayType(dtype=x_.dtype,
+                                      broadcastable=bcast)()])

    # CudaNdarray_Subscript() doesn't support Advanced slicing.
    # But we can't use the parent version that loops on each index
@@ -2678,15 +2681,15 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):

        if ilist_.type.dtype[:3] not in ('int', 'uin'):
            raise TypeError('index must be integers')
-        if ilist_.type.broadcastable != (False,):
+        if ilist_.type.ndim != 1:
            raise TypeError('index must be vector')
        if x_.type.ndim == 0:
            raise TypeError('cannot index into a scalar')
-        if x_.type.broadcastable[0]:
-            # the caller should have made a copy of x len(ilist) times
-            raise TypeError('cannot index into a broadcastable dimension')

-        return Apply(self, [x_, y_, ilist_], [x_.type()])
+        bcast = (ilist_.broadcastable[0],) + x_.broadcastable[1:]
+        return Apply(self, [x_, y_, ilist_],
+                     [CudaNdarrayType(dtype=x_.dtype,
+                                      broadcastable=bcast)()])

    def c_code_cache_version(self):
        return (2,)

--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -986,6 +986,7 @@ class T_subtensor(theano.tensor.tests.test_subtensor.T_subtensor):
                                 # optimized for that case.
                                 ((4, 4, 2, 3), [3, 3, 1, 1, 2, 2, 0, 0,
                                                 -1, -2, -3, -4], False),
+                                 ((1, 10), [0, 0], True),
                             ]:
            # If there is not enough memory on the GPU, skip the test
            size_needed = numpy.prod(shape) * (4 + 1)

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -450,22 +450,6 @@ class TestConstructSparseFromList(unittest.TestCase):
        g = theano.grad(sub.sum(), m)
        assert isinstance(g.owner.op, tensor.AdvancedIncSubtensor1)

-        # Test that we create a sparse grad when asked
-        # OLD INTERFACE
-        m = theano.tensor.matrix()
-        sub = m[v]
-        m.type.sparse_grad = True
-        g = theano.grad(sub.sum(), m)
-        assert isinstance(g.owner.op, ConstructSparseFromList)
-
-        # Test that we create a sparse grad when asked
-        # OLD INTERFACE CONSEQUENCE
-        m = theano.tensor.matrix()
-        sub = m[v]
-        sub.type.sparse_grad = True
-        g = theano.grad(sub.sum(), m)
-        assert isinstance(g.owner.op, ConstructSparseFromList)
-
        # Test that we create a sparse grad when asked
        # USER INTERFACE
        m = theano.tensor.matrix()

--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
@@ -1523,7 +1523,9 @@ class AdvancedSubtensor1(Op):
            raise TypeError('index must be vector')
        if x_.type.ndim == 0:
            raise TypeError('cannot index into a scalar')
-        return Apply(self, [x_, ilist_], [x_.type()])
+        bcast = (ilist_.broadcastable[0],) + x_.broadcastable[1:]
+        return Apply(self, [x_, ilist_], [TensorType(dtype=x.dtype,
+                                                     broadcastable=bcast)()])

    def perform(self, node, inp, out_):
        x, i = inp
@@ -1565,14 +1567,7 @@ class AdvancedSubtensor1(Op):
        x, ilist = inputs
        gz, = grads
        assert len(inputs) == 2
-        sparse = False
-        if getattr(x.type, 'sparse_grad', False):
-            sparse = True
-            warnings.warn(
-                "DEPRECATION WARNING: AdvancedSubtensor1, you are using"
-                " an old interface to the sparse grad. You should use"
-                " theano.sparse_grad(a_tensor[an_int_vector]). ")
-        if sparse or self.sparse_grad:
+        if self.sparse_grad:
            if x.type.ndim != 2:
                raise TypeError(
                    "AdvancedSubtensor1: you can't take the sparse grad"
@@ -1742,8 +1737,9 @@ class AdvancedIncSubtensor1(Op):
                'cannot %s x subtensor with ndim=%s'
                ' by y with ndim=%s to x subtensor with ndim=%s ' % (
                    opname, x_.type.ndim, y_.type.ndim))
-
-        return Apply(self, [x_, y_, ilist_], [x_.type()])
+        bcast = (ilist_.broadcastable[0],) + x_.broadcastable[1:]
+        return Apply(self, [x_, y_, ilist_], [TensorType(dtype=x.dtype,
+                                                     broadcastable=bcast)()])

    def perform(self, node, inp, out_):
        # TODO opt to make this inplace

--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
@@ -500,9 +500,25 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
             self.ignore_topo)]
        assert len(topo_) == 1
        self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
-        self.assertTrue(numpy.allclose(f([0]), ones[0] * 5))
+        f_0 = f([0])
+        self.assertTrue(f_0.shape == (1, 3))
+        self.assertTrue(numpy.allclose(f_0, ones[0] * 5))
+        f_00 = f([0, 0])
+        self.assertTrue(f_00.shape == (2, 3))
+        self.assertTrue(numpy.allclose(f_00, 5))
        self.assertRaises(IndexError, f, [0, 1])

+        # Test the gradient
+        c = t.sum()
+        gn = theano.grad(c, n)
+        g = self.function([idx], gn, op=self.adv_incsub1)
+        g_0 = g([0])
+        self.assertTrue(g_0.shape == (1, 3))
+        self.assertTrue(numpy.allclose(g_0, 1))
+        g_00 = g([0, 0])
+        self.assertTrue(g_00.shape == (1, 3))
+        self.assertTrue(numpy.allclose(g_00, 2))
+
    def test_adv_sub1_idx_broadcast(self):
        # The idx can be a broadcastable vector.
        ones = numpy.ones((4, 3), dtype=self.dtype)
@@ -518,7 +534,18 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
             self.ignore_topo)]
        assert len(topo_) == 1
        self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
-        self.assertTrue(numpy.allclose(f([0]), ones[0] * 5))
+        f_0 = f([0])
+        self.assertTrue(f_0.shape == (1, 3))
+        self.assertTrue(numpy.allclose(f_0, 5))
+
+        # Test the gradient
+        c = t.sum()
+        gn = theano.grad(c, n)
+        g = self.function([idx], gn, op=self.adv_incsub1)
+        g_0 = g([0])
+        self.assertTrue(g_0.shape == (4, 3))
+        self.assertTrue(numpy.allclose(g_0[0], 1))
+        self.assertTrue(numpy.allclose(g_0[1:], 0))

    @attr('slow')
    def test_shape_i_const(self):