提交 cb94334d authored 作者: Ian Goodfellow's avatar Ian Goodfellow

made unimplemented and undefined grads handled by NaNType

上级 122d7246
...@@ -21,6 +21,7 @@ from theano.gof import Variable ...@@ -21,6 +21,7 @@ from theano.gof import Variable
from theano.gof.python25 import all from theano.gof.python25 import all
import theano.gof.utils import theano.gof.utils
tensor = None tensor = None
from theano.gof.nan_type import NaNType
_msg_retType = 'op.grad(...) returned a non-list' _msg_retType = 'op.grad(...) returned a non-list'
_msg_badlen = 'op.grad(...) returned wrong number of gradients' _msg_badlen = 'op.grad(...) returned wrong number of gradients'
...@@ -193,32 +194,6 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True): ...@@ -193,32 +194,6 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
gmap[r] = g_r gmap[r] = g_r
return gmap return gmap
class GradNotImplementedOp(gof.op.UncomputableOp):
""" An UncomputableOp representing a gradient that hasn't been implemented yet.
"""
def __init__(self, op, x_pos, comment = ""):
"""
op: A theano op whose grad is not implemented for some input
x_pos: An int, giving the index in the op's input list of
a variable for which the gradient is not implemented
(if op has unimplemented gradients for several inputs,
it must still return a separate UnimplementedGradOp for
each)
comment: An optional comment explaining why the gradient isn't
implemented.
"""
assert isinstance(op, gof.Op)
assert isinstance(x_pos, int)
assert x_pos >= 0
super(GradNotImplementedOp,self).__init__(NotImplementedError,
"%s does not implement its gradient with respect to input %d. %s" \
% (str(type(op)), x_pos, comment))
def grad_not_implemented(op, x_pos, x, comment = ""): def grad_not_implemented(op, x_pos, x, comment = ""):
""" """
Return an un-computable symbolic variable of type `x.type`. Return an un-computable symbolic variable of type `x.type`.
...@@ -233,38 +208,9 @@ def grad_not_implemented(op, x_pos, x, comment = ""): ...@@ -233,38 +208,9 @@ def grad_not_implemented(op, x_pos, x, comment = ""):
gradient is not implemented. gradient is not implemented.
""" """
return GradNotImplementedOp(op, x_pos, comment)(x) return NaNType("This variable is NaN because the grad method for " + \
"input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
class GradUndefinedError(Exception): "not implemented.")()
""" An exception raised upon attempts to use an undefined gradient.
"""
class GradUndefinedOp(gof.op.UncomputableOp):
""" An UncomputableOp representing a gradient that is mathematically
undefined.
"""
def __init__(self, op, x_pos, comment = ""):
"""
op: A theano op whose grad is mathematically undefined for
some input
x_pos: An int, giving the index in the op's input list of
a variable for which the gradient is undefined
(if op has undefined gradients for several inputs,
it must still return a separate GradUndefinedOp for
each)
comment: An optional comment explaining why the gradient isn't
defined.
"""
assert isinstance(op, gof.Op)
assert isinstance(x_pos, int)
assert x_pos >= 0
super(GradUndefinedOp,self).__init__(GradUndefinedError,
"%s does not implement its gradient with respect to input %d. %s" \
% (str(type(op)), x_pos, comment))
def grad_undefined(op, x_pos, x, comment = ""): def grad_undefined(op, x_pos, x, comment = ""):
""" """
...@@ -280,7 +226,9 @@ def grad_undefined(op, x_pos, x, comment = ""): ...@@ -280,7 +226,9 @@ def grad_undefined(op, x_pos, x, comment = ""):
gradient is not defined. gradient is not defined.
""" """
return GradUndefinedOp(op, x_pos, comment)(x) return NaNType("This variable is NaN because the gradient for " + \
"input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
"mathematically undefined.")()
...@@ -503,6 +451,11 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore ...@@ -503,6 +451,11 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore
if tensor is None: if tensor is None:
from theano import tensor from theano import tensor
if isinstance(cost.type, NaNType):
raise ValueError("Can't differentiate a NaN cost. cost is NaN because "+\
cost.type.why_nan)
if consider_constant is None: if consider_constant is None:
consider_constant = [] consider_constant = []
else: else:
...@@ -593,6 +546,9 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore ...@@ -593,6 +546,9 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore
term_dict[node] = node.op.grad(node.inputs, term_dict[node] = node.op.grad(node.inputs,
[access_grad_cache(var) for var in node.outputs]) [access_grad_cache(var) for var in node.outputs])
for i in xrange(len(term_dict[node])): for i in xrange(len(term_dict[node])):
if isinstance(term_dict[node][i].type,NaNType):
raise TypeError("tensor.grad encountered a NaN. "+\
term_dict[node][i].type.why_nan)
if term_dict[node][i] is None: if term_dict[node][i] is None:
term_dict[node][i] = tensor.zeros_like(node.inputs[i]) term_dict[node][i] = tensor.zeros_like(node.inputs[i])
return term_dict[node] return term_dict[node]
......
...@@ -2217,6 +2217,7 @@ class T_argmin_argmax(unittest.TestCase): ...@@ -2217,6 +2217,7 @@ class T_argmin_argmax(unittest.TestCase):
def test_grad_argmin(self): def test_grad_argmin(self):
data = rand(2, 3) data = rand(2, 3)
n = as_tensor_variable(data) n = as_tensor_variable(data)
n.name = 'n'
#test grad of argmin #test grad of argmin
utt.verify_grad(lambda v: argmin(v, axis=-1), [data]) utt.verify_grad(lambda v: argmin(v, axis=-1), [data])
...@@ -2228,7 +2229,11 @@ class T_argmin_argmax(unittest.TestCase): ...@@ -2228,7 +2229,11 @@ class T_argmin_argmax(unittest.TestCase):
utt.verify_grad(lambda v: argmin(v.flatten()), [data]) utt.verify_grad(lambda v: argmin(v.flatten()), [data])
try: try:
grad(argmin(n, axis=-1), n) cost = argmin(n, axis=-1)
cost.name = None
g = grad(cost, n)
from theano.printing import min_informative_str
print min_informative_str(g)
raise Exception('Expected an error') raise Exception('Expected an error')
except TypeError: except TypeError:
pass pass
......
...@@ -6,17 +6,22 @@ import unittest ...@@ -6,17 +6,22 @@ import unittest
import theano import theano
from theano import gof from theano import gof
from theano.gradient import grad_sources_inputs #from theano.gradient import grad_sources_inputs
from theano import gradient from theano import gradient
from theano.tensor.nnet.Conv3D import conv3D from theano.tensor.nnet.Conv3D import conv3D
from theano import config from theano import config
def _grad_sources_inputs(*args): #def _grad_sources_inputs(*args):
# warn_type was introduced after this code, it complains throughout for nothing. # warn_type was introduced after this code, it complains throughout for nothing.
return grad_sources_inputs(warn_type=False, *args) # return grad_sources_inputs(warn_type=False, *args)
class test_grad_sources_inputs(unittest.TestCase): if 0:
#most of these tests are no longer relevant now that grad_sources_inputs is gone
#also, some of our policies about what is allowed or not have changed
#nonetheless, it may be a good idea to resurrect some of these tests and write
#them in terms of tensor.grad directly
class test_grad_sources_inputs(unittest.TestCase):
def test_retNone1(self): def test_retNone1(self):
"""Test that it is not ok to return None from op.grad()""" """Test that it is not ok to return None from op.grad()"""
class retNone(gof.op.Op): class retNone(gof.op.Op):
...@@ -257,13 +262,13 @@ def test_unimplemented_grad_func(): ...@@ -257,13 +262,13 @@ def test_unimplemented_grad_func():
a = theano.tensor.vector() a = theano.tensor.vector()
b = theano.gradient.grad_not_implemented(theano.tensor.add, 0, a) b = theano.gradient.grad_not_implemented(theano.tensor.add, 0, a)
try: try:
f = theano.function([a], b) f = theano.function([a], b, on_unused_input = 'ignore')
assert 0 assert 0
#Note: it's important that the NotImplementedGradOp is caught #Note: it's important that the NotImplementedGradOp is caught
#at COMPILATION time, not execution time. #at COMPILATION time, not execution time.
#If the uncomputable variable is, for example, multiplied by 0, #If the uncomputable variable is, for example, multiplied by 0,
#it could be optimized out of the final graph. #it could be optimized out of the final graph.
except NotImplementedError: except TypeError:
pass pass
def test_undefined_grad_func(): def test_undefined_grad_func():
...@@ -271,13 +276,13 @@ def test_undefined_grad_func(): ...@@ -271,13 +276,13 @@ def test_undefined_grad_func():
a = theano.tensor.vector() a = theano.tensor.vector()
b = theano.gradient.grad_undefined(theano.tensor.add, 0, a) b = theano.gradient.grad_undefined(theano.tensor.add, 0, a)
try: try:
f = theano.function([a],b) f = theano.function([a],b, on_unused_input = 'ignore')
assert 0 assert 0
#Note: it's important that the GradUndefinedOp is cauhgt at #Note: it's important that the GradUndefinedOp is caught at
#COMPILATION time, not execution time. #COMPILATION time, not execution time.
#If the uncomputable variable is, for example, multiplied by0, #If the uncomputable variable is, for example, multiplied by0,
#it could be optimized out of the final graph #it could be optimized out of the final graph
except theano.gradient.GradUndefinedError: except TypeError:
pass pass
def test_unimplemented_grad_grad(): def test_unimplemented_grad_grad():
...@@ -296,7 +301,7 @@ def test_unimplemented_grad_grad(): ...@@ -296,7 +301,7 @@ def test_unimplemented_grad_grad():
try: try:
g = theano.gradient.grad(b,a) g = theano.gradient.grad(b,a)
assert False assert False
except NotImplementedError: except TypeError:
pass pass
def test_undefined_grad_grad(): def test_undefined_grad_grad():
...@@ -314,7 +319,7 @@ def test_undefined_grad_grad(): ...@@ -314,7 +319,7 @@ def test_undefined_grad_grad():
try: try:
g = theano.gradient.grad(Z.sum(),d) g = theano.gradient.grad(Z.sum(),d)
assert False assert False
except theano.gradient.GradUndefinedError: except TypeError:
pass pass
def test_grad_name(): def test_grad_name():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论