Merge pull request #885 from goodfeli/undef_grad

Support for undefined gradients

Merge pull request #885 from goodfeli/undef_grad
010aa620 · nouiz · 6dcd4721 · c171d871 · 010aa620 · 010aa620
--- a/theano/gof/op.py
+++ b/theano/gof/op.py
@@ -639,6 +639,8 @@ class UncomputableOp(Op):
        return "Uncomputable{%s,%s}"%(self.exc,self.msg)
    def make_node(self,x):
+        if x is None:
+            x = graph.Constant(theano.gof.type.generic,None)
        return graph.Apply(self, [x], [x.type()] )
    def perform(self, node, inputs, out_storage):

--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -13,10 +13,8 @@ import warnings
 _logger = logging.getLogger('theano.gradient')
 import numpy  # for numeric_grad
-from collections import deque
 import theano
-from theano.raise_op import Raise
 from theano import gof
 from theano.gof import Variable
@@ -196,10 +194,10 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
 class GradNotImplementedOp(gof.op.UncomputableOp):
-    """ A BadGradOp representing a gradient that hasn't been implemented yet.
+    """ An UncomputableOp representing a gradient that hasn't been implemented yet.
    """
-    def __init__(self, op, x_pos):
+    def __init__(self, op, x_pos, comment = ""):
        """
            op: A theano op  whose grad is not implemented for some input
            x_pos: An int, giving the index in the op's input list of
@@ -207,6 +205,8 @@ class GradNotImplementedOp(gof.op.UncomputableOp):
                (if op has unimplemented gradients for several inputs,
                it must still return a separate UnimplementedGradOp for
                each)
+            comment: An optional comment explaining why the gradient isn't
+                implemented.
        """
        assert isinstance(op, gof.Op)
@@ -214,11 +214,11 @@ class GradNotImplementedOp(gof.op.UncomputableOp):
        assert x_pos >= 0
        super(GradNotImplementedOp,self).__init__(NotImplementedError,
-                "%s does not implement its gradient with respect to input %d" \
+            "%s does not implement its gradient with respect to input %d. %s" \
-                % (str(type(op)), x_pos))
+            % (str(type(op)), x_pos, comment))
-def grad_not_implemented(op, x_pos, x):
+def grad_not_implemented(op, x_pos, x, comment = ""):
    """
    Return an un-computable symbolic variable of type `x.type`.
@@ -227,9 +227,61 @@ def grad_not_implemented(op, x_pos, x):
    raised indicating that the gradient on the
    `x_pos`'th input of `op` has not been implemented. Likewise if
    any call to theano.function involves this variable.
+    Optionally adds a comment to the exception explaining why this
+    gradient is not implemented.
+    """
+    return GradNotImplementedOp(op, x_pos, comment)(x)
+class GradUndefinedError(Exception):
+    """ An exception raised upon attempts to use an undefined gradient.
+    """
+class GradUndefinedOp(gof.op.UncomputableOp):
+    """ An UncomputableOp representing a gradient that is mathematically
+        undefined.
+    """
+    def __init__(self, op, x_pos, comment = ""):
+        """
+            op: A theano op  whose grad is mathematically undefined for
+                some input
+            x_pos: An int, giving the index in the op's input list of
+                a variable for which the gradient is undefined
+                (if op has undefined gradients for several inputs,
+                it must still return a separate GradUndefinedOp for
+                each)
+            comment: An optional comment explaining why the gradient isn't
+                defined.
+        """
+        assert isinstance(op, gof.Op)
+        assert isinstance(x_pos, int)
+        assert x_pos >= 0
+        super(GradUndefinedOp,self).__init__(GradUndefinedError,
+            "%s does not implement its gradient with respect to input %d. %s" \
+            % (str(type(op)), x_pos, comment))
+def grad_undefined(op, x_pos, x, comment = ""):
+    """
+    Return an un-computable symbolic variable of type `x.type`.
+    If any call to tensor.grad results in an expression containing this
+    un-computable variable, an exception (GradUndefinedError) will be
+    raised indicating that the gradient on the
+    `x_pos`'th input of `op` is mathematically undefined. Likewise if
+    any call to theano.function involves this variable.
+    Optionally adds a comment to the exception explaining why this
+    gradient is not defined.
    """
-    return GradNotImplementedOp(op, x_pos)(x)
+    return GradUndefinedOp(op, x_pos, comment)(x)
 ########################

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -24,6 +24,7 @@ from theano.tensor.utils import hash_from_ndarray
 # We use these exceptions as well.
 from theano.scalar import ComplexError, IntegerDivisionError
 import theano.scalar.sharedvar
+from theano.gradient import grad_undefined
 ### set up the external interface
 from elemwise import Elemwise, DimShuffle, CAReduce, Sum
@@ -2094,7 +2095,7 @@ class Shape(Op):
        return [[len(in_shapes[0])]]
    def grad(self, inp, grads):
-        return [None]
+        return [grad_undefined(self,0,inp[0])]
    def R_op(self, inputs, eval_points):
        return [None]
@@ -2335,7 +2336,7 @@ class MaxAndArgmax(Op):
        # Set the grad to the correct position.
        g_x = eq(xmax_pad, x) * g_max_pad
-        return g_x, None
+        return g_x, grad_undefined(self, 1, axis)
    def __str__(self):
        return self.__class__.__name__
@@ -2916,7 +2917,7 @@ class Eye(gof.Op):
        return [out_shape]
    def grad(self, inp, grads):
-        return [None, None, None]
+        return [ grad_undefined(self,i,inp[i]) for i in xrange(3) ]
    def __eq__(self, other):
        return type(self) == type(other) and self.dtype == other.dtype
@@ -2926,6 +2927,27 @@ class Eye(gof.Op):
 def eye(n, m=None, k=0, dtype=None):
+    """Return a 2-D array with ones on the diagonal and zeros elsewhere.
+    Parameters
+    ----------
+    n : int
+      Number of rows in the output.
+    m : int, optional
+      Number of columns in the output. If None, defaults to `N`.
+    k : int, optional
+      Index of the diagonal: 0 (the default) refers to the main diagonal,
+      a positive value refers to an upper diagonal, and a negative value
+      to a lower diagonal.
+    dtype : data-type, optional
+      Data-type of the returned array.
+    Returns
+    -------
+    I : ndarray of shape (N,M)
+      An array where all elements are equal to zero, except for the `k`-th
+      diagonal, whose values are equal to one.
+    """
    if dtype is None:
        dtype = config.floatX
    if m is None:

--- a/theano/tensor/nnet/Conv3D.py
+++ b/theano/tensor/nnet/Conv3D.py
@@ -2,10 +2,10 @@ import theano
 from theano.tensor import basic as T
 import numpy as N
 #from util import strutil
-from theano import printing
 from theano.tensor.blas_headers import blas_header_text
 from theano.tensor.blas import ldflags
 from theano.misc import strutil
+from theano.gradient import grad_undefined
 #Note: not a true convolution because we don't bother with flipping the kernel
@@ -36,7 +36,8 @@ from theano.misc import strutil
 # (partial sum_s sum_u sum_v sum_a W[j,a, s,u,v] V[i,dr*p+s,dc*q+u,dt*r+v,a] ) / partial W[j,k,l,m,z])
 # = partial C / partial W[j,k,l,m,z] = sum_i sum_p sum_q sum_r (partial C /partial H[i,p,q,r,j] ) *  V[i,dr*p+k,dc*q+l,dt*r+m,z]
-#derivatives wrt V unimplemented for now. derivatives wrt dr, dc, dt are undefined since dr, dc, dt are natural numbers.
+#derivatives wrt V unimplemented for now. derivatives wrt dr, dc, dt are undefined since
+#the output function is only defined when dr, dc, dt are natural numbers.
 class Conv3D(theano.Op):
    """ 3D "convolution" of multiple filters on a minibatch (does not flip the kernel, moves kernel with a user specified stride) """
@@ -89,7 +90,10 @@ class Conv3D(theano.Op):
        dCdW = T.patternbroadcast(dCdW, W.broadcastable)
        dCdb = T.sum(dCdH, axis=(0,1,2,3))
        dCdb = T.patternbroadcast(dCdb, b.broadcastable)
-        dCdd = None #not differentiable, since d is not continuous
+        dCdd = grad_undefined(self,3,inputs[3],
+                "The gradient of Conv3D with respect to the convolution"+\
+                " stride is undefined because Conv3D is only defined for"+\
+                " integer strides.")
        if 'name' in dir(dCdH) and dCdH.name is not None:
            dCdH_name = dCdH.name

--- a/theano/tests/test_gradient.py
+++ b/theano/tests/test_gradient.py
@@ -3,12 +3,13 @@
 # UNIT TEST
 #
 import unittest
-import numpy
 import theano
 from theano import gof
-from theano.gradient import *
+from theano.gradient import grad_sources_inputs
 from theano import gradient
+from theano.tensor.nnet.Conv3D import conv3D
+from theano import config
 def _grad_sources_inputs(*args):
@@ -265,6 +266,20 @@ def test_unimplemented_grad_func():
    except NotImplementedError:
        pass
+def test_undefined_grad_func():
+    #tests that function compilation catches undefined grads in the graph
+    a = theano.tensor.vector()
+    b = theano.gradient.grad_undefined(theano.tensor.add, 0, a)
+    try:
+        f = theano.function([a],b)
+        assert 0
+        #Note: it's important that the GradUndefinedOp is cauhgt at
+        #COMPILATION time, not execution time.
+        #If the uncomputable variable is, for example, multiplied by0,
+        #it could be optimized out of the final graph
+    except theano.gradient.GradUndefinedError:
+        pass
 def test_unimplemented_grad_grad():
    #tests that unimplemented grads are caught in the grad method
@@ -284,6 +299,24 @@ def test_unimplemented_grad_grad():
    except NotImplementedError:
        pass
+def test_undefined_grad_grad():
+    #tests that undefined grads are caught in the grad method
+    V = theano.tensor.TensorType(dtype=config.floatX,
+            broadcastable = (False,False,False,False,False))()
+    W = theano.tensor.TensorType(dtype=config.floatX,
+            broadcastable = (False, False, False, False, False))()
+    b = theano.tensor.vector()
+    d = theano.tensor.ivector()
+    Z = conv3D(V,W,b,d)
+    try:
+        g = theano.gradient.grad(Z.sum(),d)
+        assert False
+    except theano.gradient.GradUndefinedError:
+        pass
 def test_grad_name():
    A = theano.tensor.matrix('A')
    x = theano.tensor.vector('x')