Implement the grad for inner dimensions in MaxAndArgMax.

834ad203 · Frederic · dba5815e · 834ad203 · 834ad203 · 834ad203
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -1941,32 +1941,36 @@ class MaxAndArgmax(Op):
            return [eval_points[0][arange(eval_points[0].shape[0]),
                                   max_pos], None]

-
-
    def grad(self, inp, grads):
-        # @warning: This only works if axis is 0, else the max is
-        # broadcasted wrong in the call to eq.
        # @note: This function should work correctly for L{vector}s.
 #        (x, y), (gz, gw)
 #        gz*dz/dx + gw*dw/dx, gz*dz/dy + gw*dw/dy
-#        gMax * dMax/dx + gArgMax * dArgMax/dx, gMax * dMax/daxis + gArgMax * dArgMax/daxis
-#       g_max has one less dimension than x, so you need to complete g_max to x's shape
-#        when axis=0 the broadcasting mechanism does it automatically
+#        gMax * dMax/dx + gArgMax * dArgMax/dx,
+#                           gMax * dMax/daxis + gArgMax * dArgMax/daxis
+#       g_max has one less dimension than x, so you need to complete
+#        g_max to x's shape when axis=0 the broadcasting mechanism
+#        does it automatically
        x, axis = inp
        g_max, g_max_idx = grads
-        if not ( axis.data == 0 or axis.data == x.ndim-1):
-            raise NotImplementedError('MaxAndArgmax gradient with axis corresponding to internal dimension')
-        if axis.data==0:
-            g_max_pad = shape_padleft(g_max)
-        else:
-            g_max_pad = shape_padright(g_max)
+
        xmax = max(x, axis)
-        if axis.data==0:
-            xmax_pad = shape_padleft(xmax)
-        else:
-            xmax_pad = shape_padright(xmax)
+
+        # Raise the g_max and xmax to the same number of dim as the input.
+        pattern = []
+        out_dim = 0
+        for i in range(inp[0].ndim):
+            if i == axis.data:
+                pattern.append('x')
+            else:
+                pattern.append(out_dim)
+                out_dim += 1
+        g_max_pad = DimShuffle(g_max.broadcastable, pattern)(g_max)
+        xmax_pad = DimShuffle(xmax.broadcastable, pattern)(xmax)
+
+        # Set the grad to the correct position.
        g_x = eq(xmax_pad, x) * g_max_pad
        return g_x, None
+
    def __str__(self):
        return self.__class__.__name__


--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -1561,13 +1561,16 @@ class T_max_and_argmax(unittest.TestCase):
        n = as_tensor_variable(data)

        def check_grad_max(data, max_grad_data, axis=None):
+            """
+            Why this is needed? verify_grad is not enought?
+            """
            #This work only for axis in [0,None]
            assert axis in [0,None]
            z = numpy.zeros_like(data)
            z = z.flatten()
            argmax=numpy.argmax(data,axis=axis)
            if argmax.ndim==0:
-                z[numpy.argmax(data,axis=axis)]+=1
+                z[argmax]+=1
            else:
                for id,v in enumerate(argmax):
                    z[v*numpy.prod(data.shape[data.ndim-1:axis:-1])+id]+=1
@@ -1592,6 +1595,14 @@ class T_max_and_argmax(unittest.TestCase):
        utt.verify_grad(lambda v: max_and_argmax(v.flatten())[1], [data])
        check_grad_max(data,eval_outputs(grad(max_and_argmax(n.flatten())[0],n)))

+        # Test 4d inner dimensions
+        data = numpy.random.rand(2, 3, 4, 5)
+        n = as_tensor_variable(data)
+        for i in [0, 1, 2, 3]:
+            utt.verify_grad(lambda v: max_and_argmax(v, axis=[i])[0], [data])
+            utt.verify_grad(lambda v: max_and_argmax(v, axis=[i])[1], [data])
+
+
 class T_argmin_argmax(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()

--- a/theano/tensor/tests/test_rop.py
+++ b/theano/tensor/tests/test_rop.py
@@ -3,7 +3,12 @@

 Tests for the R operator / L operator

- For the list of op with r op defined, with or without missing test see this file: defined see this file
+ For the list of op with r op defined, with or without missing test
+ see this file: doc/library/tensor/basic.txt
+
+ For function to automatically test your Rop implementation, look at
+ the docstring of the functions: check_mat_rop_lop, check_rop_lop,
+ check_nondiff_rop,

 """

@@ -41,7 +46,9 @@ class BreakRop(Op):
 break_op = BreakRop()


-class test_RopLop(unittest.TestCase):
+class RopLop_checker(unittest.TestCase):
+    """ Don't peform any test, but provide the function to test the
+    Rop to class that inherit from it."""

    def setUp(self):
        # Using vectors make things a lot simpler for generating the same
@@ -56,6 +63,8 @@ class test_RopLop(unittest.TestCase):
                             5+self.rng.randint(30))

    def check_nondiff_rop(self, y):
+        """ If you op is not differentiable(so you can't define Rop)
+        test that an error is raised."""
        raised = False
        try:
            tmp = tensor.Rop(y, self.x, self.v)
@@ -67,6 +76,24 @@ class test_RopLop(unittest.TestCase):
                ' is not differentiable'))

    def check_mat_rop_lop(self, y, out_shape):
+        """ Test the Rop/Lop when input is a matrix and the output is a vector
+
+        :param y: the output variable of the op applied to self.mx
+        :param out_shape: Used to generate a random tensor
+                          corresponding to the evaluation point of the Rop
+                          (i.e. the tensor with which you multiply the
+                          Jacobian). It should be a tuple of ints.
+
+        If the Op have more then 1 input, one of them must be mx, the
+        other must be shared variable/constant. We will test only
+        again the input self.mx, so you must call
+        check_mat_rop_lop/check_rop_lop for the others input.
+
+        We expect all inputs/outputs have dtype floatX.
+
+        If you want to test an out with an output matrix, add a sum
+        after the Op you want to test.
+        """
        vx = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
        yv = tensor.Rop(y, self.mx, self.mv)
@@ -97,9 +124,12 @@ class test_RopLop(unittest.TestCase):
        v2 = scan_f(vx,vv)
        assert numpy.allclose(v1,v2), ('LOP mismatch: %s %s' % (v1, v2))

-
-
    def check_rop_lop(self, y, out_shape):
+        """
+        As check_mat_rop_lop, except the input is self.x witch is a
+        vector. The output is still a vector.
+
+        """
        # TEST ROP
        vx = numpy.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
@@ -138,6 +168,7 @@ class test_RopLop(unittest.TestCase):
        assert numpy.allclose(v1,v2), ('LOP mismatch: %s %s' % (v1, v2))


+class test_RopLop(RopLop_checker):
    def test_shape(self):
        self.check_nondiff_rop( self.x.shape[0])