Merge pull request #1074 from goodfeli/test_grad

Ready to merge: Adds a unit test of undefined gradients on integers

Merge pull request #1074 from goodfeli/test_grad
17458063 · lamblin · 51290164 · 23ae9a3f · 17458063 · 17458063
--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -468,7 +468,7 @@ def grad(cost, wrt, consider_constant=None,
                'Ambiguous whether %s should be made into tensor'
                ' or sparse theano variable' % str(type(g_var)))

-        if g_var.type not in [NullType, DisconnectedType] and 'float' \
+        if not isinstance(g_var.type, (NullType, DisconnectedType)) and 'float' \
            not in str(g_var.type.dtype):
            raise TypeError("Gradients must always be NullType, "
                    "DisconnectedType, or continuous, but grad was "
@@ -776,8 +776,42 @@ def _populate_grad_dict(var_to_node_to_idx,
                        input_to_outputs in connection_pattern
                    ]

-            if True in inputs_connected:
-                # At least one input of this op is connected to the cost so we must
+            #List of bools indicating if each output is an integer dtype
+            output_is_int = [hasattr(output.type, 'dtype') and
+                    output.type.dtype in theano.tensor.discrete_dtypes
+                    for output in node.outputs]
+
+            #List of bools indicating if each output is NullType
+            ograd_is_nan = [isinstance(output.type, NullType)
+                    for output in output_grads]
+
+            # List of bools indicating if each input only has NullType outputs
+            only_connected_to_nan = [(True not in
+                [in_to_out and out_to_cost and not out_nan
+                    for in_to_out, out_to_cost, out_nan in
+                    zip(in_to_outs, outputs_connected, ograd_is_nan)])
+                for in_to_outs in connection_pattern]
+
+            if True not in inputs_connected:
+                # All outputs of this op are disconnected so we can skip
+                # Calling the op's grad method and report that the inputs
+                # are disconnected
+                # (The op's grad method could do this too, but this saves the
+                # implementer the trouble of worrying about this case)
+                input_grads = [DisconnectedType()() for ipt in inputs]
+            elif False not in only_connected_to_nan:
+                # All inputs are only connected to nan gradients, so we don't
+                # need to bother calling the grad method. We know the gradient
+                # with respect to all connected inputs is nan.
+                input_grads = []
+                for connected in inputs_connected:
+                    if connected:
+                        input_grads.append(NullType()())
+                    else:
+                        input_grads.append(DisconnectedType()())
+            else:
+                # At least one input of this op is connected to the cost so and
+                # not all output gradients are undefined so we must
                # call the op's grad method

                # Each Op's grad function requires inputs and output_grads
@@ -848,13 +882,6 @@ def _populate_grad_dict(var_to_node_to_idx,
                if len(input_grads) != len(inputs):
                    raise ValueError(("%s returned the wrong number of" +\
                            " gradient terms.") % str(node.op))
-            else:
-                # All outputs of this op are disconnected so we can skip
-                # Calling the op's grad method and report that the inputs
-                # are disconnected
-                # (The op's grad method could do this too, but this saves the
-                # implementer the trouble of worrying about this case)
-                input_grads = [DisconnectedType()() for ipt in inputs]

            # must convert to list in case the op returns a tuple
            # we won't be able to post-process out the Nones if it does that
@@ -862,18 +889,15 @@ def _populate_grad_dict(var_to_node_to_idx,

            # Do type checking on the result

-            #List of bools indicating if each output is an integer dtype
-            output_is_int = [hasattr(output.type, 'dtype') and
-                    output.type.dtype in theano.tensor.discrete_dtypes
-                    for output in node.outputs]

-            #List of bools indicating if each input only has integer outputs
+            # List of bools indicating if each input only has integer outputs
            only_connected_to_int = [(True not in
                [in_to_out and out_to_cost and not out_int
                    for in_to_out, out_to_cost, out_int in
                    zip(in_to_outs, outputs_connected, output_is_int)])
                for in_to_outs in connection_pattern]

+
            for i, term in enumerate(input_grads):

                # Disallow Nones
@@ -898,6 +922,10 @@ def _populate_grad_dict(var_to_node_to_idx,
                                ' returned an integer-valued variable.'
                                ' (Input index %d, dtype %s)' % (i,
                                    term.type.dtype))
+
+                    if only_connected_to_nan[i]:
+                        assert isinstance(term.type, NullType)
+
                    if only_connected_to_int[i]:
                        # This term has only integer outputs and we know
                        # it's not undefined or disconnected

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -722,20 +722,19 @@ class Elemwise(Op):
    def _bgrad(self, inputs, ograds):
        # returns grad, with respect to broadcasted versions of inputs

-        # Gradients (especially on the final costs) don't have to be symbolic
-        # e.g., ograds will be [ 1. ] if your objective is c and the output
-        # of the current apply node is c
-        ograds = map(as_tensor_variable, ograds)
-
        prev_setting = theano.config.compute_test_value

        try:

            theano.config.compute_test_value = 'off'

-            scalar_inputs = [Scalar(dtype=t.type.dtype)() for t in inputs]
-            scalar_ograds = [Scalar(dtype=ograd.type.dtype)()
-                    for ograd in ograds]
+            def as_scalar(t):
+                if isinstance(t.type, (NullType, DisconnectedType)):
+                    return t
+                return Scalar(t.type.dtype)()
+
+            scalar_inputs = map(as_scalar, inputs)
+            scalar_ograds = map(as_scalar, ograds)
            scalar_igrads = self.scalar_op.grad(scalar_inputs, scalar_ograds)
            for igrad in scalar_igrads:
                assert igrad is not None

--- a/theano/tests/test_gradient.py
+++ b/theano/tests/test_gradient.py
@@ -517,5 +517,22 @@ def test_known_grads_integers():

    assert np.allclose(g_actual, gv)

+def test_undefined_cost_grad():
+
+        # Tests that if we say the cost is not differentiable via the
+        # known_grads mechanism, it is treated as such by the rest of the
+        # system.
+
+        x = theano.tensor.iscalar()
+        y = theano.tensor.iscalar()
+        cost = x + y
+        assert cost.dtype in theano.tensor.discrete_dtypes
+        try:
+            grads = theano.tensor.grad(cost, [x, y], known_grads = {cost: NullType()() })
+        except theano.gradient.NullTypeGradError:
+            return
+        raise AssertionError("An undefined gradient has been ignored.")
+
+
 if __name__ == '__main__':
    unittest.main()