提交 010aa620 authored 作者: nouiz's avatar nouiz

Merge pull request #885 from goodfeli/undef_grad

Support for undefined gradients
...@@ -639,6 +639,8 @@ class UncomputableOp(Op): ...@@ -639,6 +639,8 @@ class UncomputableOp(Op):
return "Uncomputable{%s,%s}"%(self.exc,self.msg) return "Uncomputable{%s,%s}"%(self.exc,self.msg)
def make_node(self,x): def make_node(self,x):
if x is None:
x = graph.Constant(theano.gof.type.generic,None)
return graph.Apply(self, [x], [x.type()] ) return graph.Apply(self, [x], [x.type()] )
def perform(self, node, inputs, out_storage): def perform(self, node, inputs, out_storage):
......
...@@ -13,10 +13,8 @@ import warnings ...@@ -13,10 +13,8 @@ import warnings
_logger = logging.getLogger('theano.gradient') _logger = logging.getLogger('theano.gradient')
import numpy # for numeric_grad import numpy # for numeric_grad
from collections import deque
import theano import theano
from theano.raise_op import Raise
from theano import gof from theano import gof
from theano.gof import Variable from theano.gof import Variable
...@@ -196,10 +194,10 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True): ...@@ -196,10 +194,10 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
class GradNotImplementedOp(gof.op.UncomputableOp): class GradNotImplementedOp(gof.op.UncomputableOp):
""" A BadGradOp representing a gradient that hasn't been implemented yet. """ An UncomputableOp representing a gradient that hasn't been implemented yet.
""" """
def __init__(self, op, x_pos): def __init__(self, op, x_pos, comment = ""):
""" """
op: A theano op whose grad is not implemented for some input op: A theano op whose grad is not implemented for some input
x_pos: An int, giving the index in the op's input list of x_pos: An int, giving the index in the op's input list of
...@@ -207,6 +205,8 @@ class GradNotImplementedOp(gof.op.UncomputableOp): ...@@ -207,6 +205,8 @@ class GradNotImplementedOp(gof.op.UncomputableOp):
(if op has unimplemented gradients for several inputs, (if op has unimplemented gradients for several inputs,
it must still return a separate UnimplementedGradOp for it must still return a separate UnimplementedGradOp for
each) each)
comment: An optional comment explaining why the gradient isn't
implemented.
""" """
assert isinstance(op, gof.Op) assert isinstance(op, gof.Op)
...@@ -214,11 +214,11 @@ class GradNotImplementedOp(gof.op.UncomputableOp): ...@@ -214,11 +214,11 @@ class GradNotImplementedOp(gof.op.UncomputableOp):
assert x_pos >= 0 assert x_pos >= 0
super(GradNotImplementedOp,self).__init__(NotImplementedError, super(GradNotImplementedOp,self).__init__(NotImplementedError,
"%s does not implement its gradient with respect to input %d" \ "%s does not implement its gradient with respect to input %d. %s" \
% (str(type(op)), x_pos)) % (str(type(op)), x_pos, comment))
def grad_not_implemented(op, x_pos, x): def grad_not_implemented(op, x_pos, x, comment = ""):
""" """
Return an un-computable symbolic variable of type `x.type`. Return an un-computable symbolic variable of type `x.type`.
...@@ -227,9 +227,61 @@ def grad_not_implemented(op, x_pos, x): ...@@ -227,9 +227,61 @@ def grad_not_implemented(op, x_pos, x):
raised indicating that the gradient on the raised indicating that the gradient on the
`x_pos`'th input of `op` has not been implemented. Likewise if `x_pos`'th input of `op` has not been implemented. Likewise if
any call to theano.function involves this variable. any call to theano.function involves this variable.
Optionally adds a comment to the exception explaining why this
gradient is not implemented.
"""
return GradNotImplementedOp(op, x_pos, comment)(x)
class GradUndefinedError(Exception):
""" An exception raised upon attempts to use an undefined gradient.
"""
class GradUndefinedOp(gof.op.UncomputableOp):
""" An UncomputableOp representing a gradient that is mathematically
undefined.
"""
def __init__(self, op, x_pos, comment = ""):
"""
op: A theano op whose grad is mathematically undefined for
some input
x_pos: An int, giving the index in the op's input list of
a variable for which the gradient is undefined
(if op has undefined gradients for several inputs,
it must still return a separate GradUndefinedOp for
each)
comment: An optional comment explaining why the gradient isn't
defined.
"""
assert isinstance(op, gof.Op)
assert isinstance(x_pos, int)
assert x_pos >= 0
super(GradUndefinedOp,self).__init__(GradUndefinedError,
"%s does not implement its gradient with respect to input %d. %s" \
% (str(type(op)), x_pos, comment))
def grad_undefined(op, x_pos, x, comment = ""):
"""
Return an un-computable symbolic variable of type `x.type`.
If any call to tensor.grad results in an expression containing this
un-computable variable, an exception (GradUndefinedError) will be
raised indicating that the gradient on the
`x_pos`'th input of `op` is mathematically undefined. Likewise if
any call to theano.function involves this variable.
Optionally adds a comment to the exception explaining why this
gradient is not defined.
""" """
return GradNotImplementedOp(op, x_pos)(x) return GradUndefinedOp(op, x_pos, comment)(x)
######################## ########################
......
...@@ -24,6 +24,7 @@ from theano.tensor.utils import hash_from_ndarray ...@@ -24,6 +24,7 @@ from theano.tensor.utils import hash_from_ndarray
# We use these exceptions as well. # We use these exceptions as well.
from theano.scalar import ComplexError, IntegerDivisionError from theano.scalar import ComplexError, IntegerDivisionError
import theano.scalar.sharedvar import theano.scalar.sharedvar
from theano.gradient import grad_undefined
### set up the external interface ### set up the external interface
from elemwise import Elemwise, DimShuffle, CAReduce, Sum from elemwise import Elemwise, DimShuffle, CAReduce, Sum
...@@ -2094,7 +2095,7 @@ class Shape(Op): ...@@ -2094,7 +2095,7 @@ class Shape(Op):
return [[len(in_shapes[0])]] return [[len(in_shapes[0])]]
def grad(self, inp, grads): def grad(self, inp, grads):
return [None] return [grad_undefined(self,0,inp[0])]
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
return [None] return [None]
...@@ -2335,7 +2336,7 @@ class MaxAndArgmax(Op): ...@@ -2335,7 +2336,7 @@ class MaxAndArgmax(Op):
# Set the grad to the correct position. # Set the grad to the correct position.
g_x = eq(xmax_pad, x) * g_max_pad g_x = eq(xmax_pad, x) * g_max_pad
return g_x, None return g_x, grad_undefined(self, 1, axis)
def __str__(self): def __str__(self):
return self.__class__.__name__ return self.__class__.__name__
...@@ -2916,7 +2917,7 @@ class Eye(gof.Op): ...@@ -2916,7 +2917,7 @@ class Eye(gof.Op):
return [out_shape] return [out_shape]
def grad(self, inp, grads): def grad(self, inp, grads):
return [None, None, None] return [ grad_undefined(self,i,inp[i]) for i in xrange(3) ]
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) and self.dtype == other.dtype return type(self) == type(other) and self.dtype == other.dtype
...@@ -2926,6 +2927,27 @@ class Eye(gof.Op): ...@@ -2926,6 +2927,27 @@ class Eye(gof.Op):
def eye(n, m=None, k=0, dtype=None): def eye(n, m=None, k=0, dtype=None):
"""Return a 2-D array with ones on the diagonal and zeros elsewhere.
Parameters
----------
n : int
Number of rows in the output.
m : int, optional
Number of columns in the output. If None, defaults to `N`.
k : int, optional
Index of the diagonal: 0 (the default) refers to the main diagonal,
a positive value refers to an upper diagonal, and a negative value
to a lower diagonal.
dtype : data-type, optional
Data-type of the returned array.
Returns
-------
I : ndarray of shape (N,M)
An array where all elements are equal to zero, except for the `k`-th
diagonal, whose values are equal to one.
"""
if dtype is None: if dtype is None:
dtype = config.floatX dtype = config.floatX
if m is None: if m is None:
......
...@@ -2,10 +2,10 @@ import theano ...@@ -2,10 +2,10 @@ import theano
from theano.tensor import basic as T from theano.tensor import basic as T
import numpy as N import numpy as N
#from util import strutil #from util import strutil
from theano import printing
from theano.tensor.blas_headers import blas_header_text from theano.tensor.blas_headers import blas_header_text
from theano.tensor.blas import ldflags from theano.tensor.blas import ldflags
from theano.misc import strutil from theano.misc import strutil
from theano.gradient import grad_undefined
#Note: not a true convolution because we don't bother with flipping the kernel #Note: not a true convolution because we don't bother with flipping the kernel
...@@ -36,7 +36,8 @@ from theano.misc import strutil ...@@ -36,7 +36,8 @@ from theano.misc import strutil
# (partial sum_s sum_u sum_v sum_a W[j,a, s,u,v] V[i,dr*p+s,dc*q+u,dt*r+v,a] ) / partial W[j,k,l,m,z]) # (partial sum_s sum_u sum_v sum_a W[j,a, s,u,v] V[i,dr*p+s,dc*q+u,dt*r+v,a] ) / partial W[j,k,l,m,z])
# = partial C / partial W[j,k,l,m,z] = sum_i sum_p sum_q sum_r (partial C /partial H[i,p,q,r,j] ) * V[i,dr*p+k,dc*q+l,dt*r+m,z] # = partial C / partial W[j,k,l,m,z] = sum_i sum_p sum_q sum_r (partial C /partial H[i,p,q,r,j] ) * V[i,dr*p+k,dc*q+l,dt*r+m,z]
#derivatives wrt V unimplemented for now. derivatives wrt dr, dc, dt are undefined since dr, dc, dt are natural numbers. #derivatives wrt V unimplemented for now. derivatives wrt dr, dc, dt are undefined since
#the output function is only defined when dr, dc, dt are natural numbers.
class Conv3D(theano.Op): class Conv3D(theano.Op):
""" 3D "convolution" of multiple filters on a minibatch (does not flip the kernel, moves kernel with a user specified stride) """ """ 3D "convolution" of multiple filters on a minibatch (does not flip the kernel, moves kernel with a user specified stride) """
...@@ -89,7 +90,10 @@ class Conv3D(theano.Op): ...@@ -89,7 +90,10 @@ class Conv3D(theano.Op):
dCdW = T.patternbroadcast(dCdW, W.broadcastable) dCdW = T.patternbroadcast(dCdW, W.broadcastable)
dCdb = T.sum(dCdH, axis=(0,1,2,3)) dCdb = T.sum(dCdH, axis=(0,1,2,3))
dCdb = T.patternbroadcast(dCdb, b.broadcastable) dCdb = T.patternbroadcast(dCdb, b.broadcastable)
dCdd = None #not differentiable, since d is not continuous dCdd = grad_undefined(self,3,inputs[3],
"The gradient of Conv3D with respect to the convolution"+\
" stride is undefined because Conv3D is only defined for"+\
" integer strides.")
if 'name' in dir(dCdH) and dCdH.name is not None: if 'name' in dir(dCdH) and dCdH.name is not None:
dCdH_name = dCdH.name dCdH_name = dCdH.name
......
...@@ -3,12 +3,13 @@ ...@@ -3,12 +3,13 @@
# UNIT TEST # UNIT TEST
# #
import unittest import unittest
import numpy
import theano import theano
from theano import gof from theano import gof
from theano.gradient import * from theano.gradient import grad_sources_inputs
from theano import gradient from theano import gradient
from theano.tensor.nnet.Conv3D import conv3D
from theano import config
def _grad_sources_inputs(*args): def _grad_sources_inputs(*args):
...@@ -265,6 +266,20 @@ def test_unimplemented_grad_func(): ...@@ -265,6 +266,20 @@ def test_unimplemented_grad_func():
except NotImplementedError: except NotImplementedError:
pass pass
def test_undefined_grad_func():
#tests that function compilation catches undefined grads in the graph
a = theano.tensor.vector()
b = theano.gradient.grad_undefined(theano.tensor.add, 0, a)
try:
f = theano.function([a],b)
assert 0
#Note: it's important that the GradUndefinedOp is cauhgt at
#COMPILATION time, not execution time.
#If the uncomputable variable is, for example, multiplied by0,
#it could be optimized out of the final graph
except theano.gradient.GradUndefinedError:
pass
def test_unimplemented_grad_grad(): def test_unimplemented_grad_grad():
#tests that unimplemented grads are caught in the grad method #tests that unimplemented grads are caught in the grad method
...@@ -284,6 +299,24 @@ def test_unimplemented_grad_grad(): ...@@ -284,6 +299,24 @@ def test_unimplemented_grad_grad():
except NotImplementedError: except NotImplementedError:
pass pass
def test_undefined_grad_grad():
#tests that undefined grads are caught in the grad method
V = theano.tensor.TensorType(dtype=config.floatX,
broadcastable = (False,False,False,False,False))()
W = theano.tensor.TensorType(dtype=config.floatX,
broadcastable = (False, False, False, False, False))()
b = theano.tensor.vector()
d = theano.tensor.ivector()
Z = conv3D(V,W,b,d)
try:
g = theano.gradient.grad(Z.sum(),d)
assert False
except theano.gradient.GradUndefinedError:
pass
def test_grad_name(): def test_grad_name():
A = theano.tensor.matrix('A') A = theano.tensor.matrix('A')
x = theano.tensor.vector('x') x = theano.tensor.vector('x')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论