Merge pull request #5486 from affanv14/gradchange

Change grad to L_op

Merge pull request #5486 from affanv14/gradchange
7cfd2879 · Frédéric Bastien · GitHub · a206d3f0 · 2963f85a · 7cfd2879
--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
--- a/theano/scalar/basic_scipy.py
+++ b/theano/scalar/basic_scipy.py
@@ -32,12 +32,12 @@ class Erf(UnaryScalarOp):
        else:
            super(Erf, self).impl(x)

-    def grad(self, inp, grads):
-        x, = inp
+    def L_op(self, inputs, outputs, grads):
+        x, = inputs
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        if self(x).type in discrete_types:
+        if outputs[0].type in discrete_types:
            if x.type in discrete_types:
                return [x.zeros_like(dtype=theano.config.floatX)]
            else:
@@ -63,12 +63,12 @@ class Erfc(UnaryScalarOp):
        else:
            super(Erfc, self).impl(x)

-    def grad(self, inp, grads):
-        x, = inp
+    def L_op(self, inputs, outputs, grads):
+        x, = inputs
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        if self(x).type in discrete_types:
+        if outputs[0].type in discrete_types:
            if x.type in discrete_types:
                return [x.zeros_like(dtype=theano.config.floatX)]
            else:
@@ -110,12 +110,12 @@ class Erfcx(UnaryScalarOp):
        else:
            super(Erfcx, self).impl(x)

-    def grad(self, inp, grads):
-        x, = inp
+    def L_op(self, inputs, outputs, grads):
+        x, = inputs
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        if self(x).type in discrete_types:
+        if outputs[0].type in discrete_types:
            if x.type in discrete_types:
                return [x.zeros_like(dtype=theano.config.floatX)]
            else:
@@ -146,12 +146,12 @@ class Erfinv(UnaryScalarOp):
        else:
            super(Erfinv, self).impl(x)

-    def grad(self, inp, grads):
-        x, = inp
+    def L_op(self, inputs, outputs, grads):
+        x, = inputs
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        if self(x).type in discrete_types:
+        if outputs[0].type in discrete_types:
            if x.type in discrete_types:
                return [x.zeros_like(dtype=theano.config.floatX)]
            else:
@@ -179,12 +179,12 @@ class Erfcinv(UnaryScalarOp):
        else:
            super(Erfcinv, self).impl(x)

-    def grad(self, inp, grads):
-        x, = inp
+    def L_op(self, inputs, outputs, grads):
+        x, = inputs
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        if self(x).type in discrete_types:
+        if outputs[0].type in discrete_types:
            if x.type in discrete_types:
                return [x.zeros_like(dtype=theano.config.floatX)]
            else:
@@ -216,12 +216,12 @@ class Gamma(UnaryScalarOp):
        else:
            super(Gamma, self).impl(x)

-    def grad(self, inputs, gout):
+    def L_op(self, inputs, outputs, gout):
        (x,) = inputs
        (gz,) = gout
        if x.type in complex_types:
            raise NotImplementedError()
-        if self(x).type in discrete_types:
+        if outputs[0].type in discrete_types:
            if x.type in discrete_types:
                return [x.zeros_like(dtype=theano.config.floatX)]
            else:
@@ -253,12 +253,12 @@ class GammaLn(UnaryScalarOp):
        else:
            super(GammaLn, self).impl(x)

-    def grad(self, inp, grads):
-        x, = inp
+    def L_op(self, inputs, outputs, grads):
+        x, = inputs
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        if self(x).type in discrete_types:
+        if outputs[0].type in discrete_types:
            if x.type in discrete_types:
                return [x.zeros_like(dtype=theano.config.floatX)]
            else:

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -602,7 +602,7 @@ second dimension
            ograds = [x.zeros_like() for x in outs]
            ograds[idx] = theano.tensor.ones_like(out)

-            bgrads = self._bgrad(inputs, ograds)
+            bgrads = self._bgrad(inputs, outs, ograds)
            rop_out = None

            for jdx, (inp, eval_point) in enumerate(izip(inputs,
@@ -636,7 +636,7 @@ second dimension
    def L_op(self, inputs, outs, ograds):

        # compute grad with respect to broadcasted input
-        rval = self._bgrad(inputs, ograds)
+        rval = self._bgrad(inputs, outs, ograds)

        # TODO: make sure that zeros are clearly identifiable
        # to the gradient.grad method when the outputs have
@@ -684,7 +684,7 @@ second dimension

        return rval

-    def _bgrad(self, inputs, ograds):
+    def _bgrad(self, inputs, outputs, ograds):
        # returns grad, with respect to broadcasted versions of inputs

        with change_flags(compute_test_value='off'):
@@ -695,7 +695,10 @@ second dimension

            scalar_inputs = list(map(as_scalar, inputs))
            scalar_ograds = list(map(as_scalar, ograds))
-            scalar_igrads = self.scalar_op.grad(scalar_inputs, scalar_ograds)
+            scalar_outputs = self.scalar_op.make_node(
+                *[get_scalar_type(dtype=i.type.dtype).make_variable()
+                    for i in inputs]).outputs
+            scalar_igrads = self.scalar_op.L_op(scalar_inputs, scalar_outputs, scalar_ograds)
            for igrad in scalar_igrads:
                assert igrad is not None, self.scalar_op

@@ -711,6 +714,8 @@ second dimension
                return r
            if r in scalar_inputs:
                return inputs[scalar_inputs.index(r)]
+            if r in scalar_outputs:
+                return outputs[scalar_outputs.index(r)]
            if r in scalar_ograds:
                return ograds[scalar_ograds.index(r)]
            node = r.owner

--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -100,15 +100,14 @@ class SoftmaxWithBias(gof.Op):
        # data type matches.
        output_storage[0][0] = e_x.astype(x_dtype, copy=False)

-    def grad(self, inp, grads):
+    def L_op(self, inp, outputs, grads):
        x, b = inp
        g_sm, = grads

        if isinstance(g_sm.type, DisconnectedType):
            return [DisconnectedType()(), DisconnectedType()()]

-        sm = softmax_with_bias(x, b)
-        dx = softmax_grad(g_sm, sm)
+        dx = softmax_grad(g_sm, outputs[0])
        db = tensor.sum(dx, axis=0)
        return dx, db

@@ -440,18 +439,17 @@ class Softmax(gof.Op):
        sm = e_x / e_x.sum(axis=1)[:, None]
        output_storage[0][0] = sm

-    def grad(self, inp, grads):
+    def L_op(self, inp, outputs, grads):
        x, = inp
        g_sm, = grads
-        sm = softmax_op(x)
-        return [softmax_grad(g_sm, sm)]
+        return [softmax_grad(g_sm, outputs[0])]

    def R_op(self, inputs, eval_points):
        # I think the Jacobian is symmetric so the R_op
        # is the same as the grad
        if None in eval_points:
            return [None]
-        return self.grad(inputs, eval_points)
+        return self.L_op(inputs, [self(*inputs)], eval_points)

    def infer_shape(self, node, shape):
        return shape
@@ -1060,7 +1058,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
            db_terms.append(db)

        if not isinstance(g_sm.type, DisconnectedType):
-            dx, db = softmax_with_bias.grad((x, b), (g_sm, ))
+            dx, db = softmax_with_bias.L_op((x, b), [softmax_with_bias(x, b)], (g_sm, ))
            dx_terms.append(dx)
            db_terms.append(db)


--- a/theano/tensor/signal/pool.py
+++ b/theano/tensor/signal/pool.py
@@ -562,15 +562,14 @@ class Pool(OpenMPOp):
                             pad, self.ndim)
        return [shp]

-    def grad(self, inp, grads):
-        x, ws, stride, pad = inp
+    def L_op(self, inputs, outputs, grads):
+        x, ws, stride, pad = inputs
        gz, = grads
-        disc = [DisconnectedType()() for i in inp[1:]]
+        disc = [DisconnectedType()() for i in inputs[1:]]
        if self.mode == 'max':
-            maxout = self(x, ws, stride, pad)
            return [MaxPoolGrad(ndim=self.ndim,
                                ignore_border=self.ignore_border)(
-                x, maxout, gz, ws=ws, stride=stride, pad=pad)] + disc
+                x, outputs[0], gz, ws=ws, stride=stride, pad=pad)] + disc
        else:
            return [AveragePoolGrad(ndim=self.ndim,
                                    ignore_border=self.ignore_border,