提交 cb94334d authored 作者: Ian Goodfellow's avatar Ian Goodfellow

made unimplemented and undefined grads handled by NaNType

上级 122d7246
......@@ -21,6 +21,7 @@ from theano.gof import Variable
from theano.gof.python25 import all
import theano.gof.utils
tensor = None
from theano.gof.nan_type import NaNType
_msg_retType = 'op.grad(...) returned a non-list'
_msg_badlen = 'op.grad(...) returned wrong number of gradients'
......@@ -193,32 +194,6 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
gmap[r] = g_r
return gmap
class GradNotImplementedOp(gof.op.UncomputableOp):
""" An UncomputableOp representing a gradient that hasn't been implemented yet.
"""
def __init__(self, op, x_pos, comment = ""):
"""
op: A theano op whose grad is not implemented for some input
x_pos: An int, giving the index in the op's input list of
a variable for which the gradient is not implemented
(if op has unimplemented gradients for several inputs,
it must still return a separate UnimplementedGradOp for
each)
comment: An optional comment explaining why the gradient isn't
implemented.
"""
assert isinstance(op, gof.Op)
assert isinstance(x_pos, int)
assert x_pos >= 0
super(GradNotImplementedOp,self).__init__(NotImplementedError,
"%s does not implement its gradient with respect to input %d. %s" \
% (str(type(op)), x_pos, comment))
def grad_not_implemented(op, x_pos, x, comment = ""):
"""
Return an un-computable symbolic variable of type `x.type`.
......@@ -233,38 +208,9 @@ def grad_not_implemented(op, x_pos, x, comment = ""):
gradient is not implemented.
"""
return GradNotImplementedOp(op, x_pos, comment)(x)
class GradUndefinedError(Exception):
""" An exception raised upon attempts to use an undefined gradient.
"""
class GradUndefinedOp(gof.op.UncomputableOp):
""" An UncomputableOp representing a gradient that is mathematically
undefined.
"""
def __init__(self, op, x_pos, comment = ""):
"""
op: A theano op whose grad is mathematically undefined for
some input
x_pos: An int, giving the index in the op's input list of
a variable for which the gradient is undefined
(if op has undefined gradients for several inputs,
it must still return a separate GradUndefinedOp for
each)
comment: An optional comment explaining why the gradient isn't
defined.
"""
assert isinstance(op, gof.Op)
assert isinstance(x_pos, int)
assert x_pos >= 0
super(GradUndefinedOp,self).__init__(GradUndefinedError,
"%s does not implement its gradient with respect to input %d. %s" \
% (str(type(op)), x_pos, comment))
return NaNType("This variable is NaN because the grad method for " + \
"input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
"not implemented.")()
def grad_undefined(op, x_pos, x, comment = ""):
"""
......@@ -280,7 +226,9 @@ def grad_undefined(op, x_pos, x, comment = ""):
gradient is not defined.
"""
return GradUndefinedOp(op, x_pos, comment)(x)
return NaNType("This variable is NaN because the gradient for " + \
"input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
"mathematically undefined.")()
......@@ -503,6 +451,11 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore
if tensor is None:
from theano import tensor
if isinstance(cost.type, NaNType):
raise ValueError("Can't differentiate a NaN cost. cost is NaN because "+\
cost.type.why_nan)
if consider_constant is None:
consider_constant = []
else:
......@@ -593,6 +546,9 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore
term_dict[node] = node.op.grad(node.inputs,
[access_grad_cache(var) for var in node.outputs])
for i in xrange(len(term_dict[node])):
if isinstance(term_dict[node][i].type,NaNType):
raise TypeError("tensor.grad encountered a NaN. "+\
term_dict[node][i].type.why_nan)
if term_dict[node][i] is None:
term_dict[node][i] = tensor.zeros_like(node.inputs[i])
return term_dict[node]
......
......@@ -2217,6 +2217,7 @@ class T_argmin_argmax(unittest.TestCase):
def test_grad_argmin(self):
data = rand(2, 3)
n = as_tensor_variable(data)
n.name = 'n'
#test grad of argmin
utt.verify_grad(lambda v: argmin(v, axis=-1), [data])
......@@ -2228,7 +2229,11 @@ class T_argmin_argmax(unittest.TestCase):
utt.verify_grad(lambda v: argmin(v.flatten()), [data])
try:
grad(argmin(n, axis=-1), n)
cost = argmin(n, axis=-1)
cost.name = None
g = grad(cost, n)
from theano.printing import min_informative_str
print min_informative_str(g)
raise Exception('Expected an error')
except TypeError:
pass
......
......@@ -6,17 +6,22 @@ import unittest
import theano
from theano import gof
from theano.gradient import grad_sources_inputs
#from theano.gradient import grad_sources_inputs
from theano import gradient
from theano.tensor.nnet.Conv3D import conv3D
from theano import config
def _grad_sources_inputs(*args):
#def _grad_sources_inputs(*args):
# warn_type was introduced after this code, it complains throughout for nothing.
return grad_sources_inputs(warn_type=False, *args)
# return grad_sources_inputs(warn_type=False, *args)
class test_grad_sources_inputs(unittest.TestCase):
if 0:
#most of these tests are no longer relevant now that grad_sources_inputs is gone
#also, some of our policies about what is allowed or not have changed
#nonetheless, it may be a good idea to resurrect some of these tests and write
#them in terms of tensor.grad directly
class test_grad_sources_inputs(unittest.TestCase):
def test_retNone1(self):
"""Test that it is not ok to return None from op.grad()"""
class retNone(gof.op.Op):
......@@ -257,13 +262,13 @@ def test_unimplemented_grad_func():
a = theano.tensor.vector()
b = theano.gradient.grad_not_implemented(theano.tensor.add, 0, a)
try:
f = theano.function([a], b)
f = theano.function([a], b, on_unused_input = 'ignore')
assert 0
#Note: it's important that the NotImplementedGradOp is caught
#at COMPILATION time, not execution time.
#If the uncomputable variable is, for example, multiplied by 0,
#it could be optimized out of the final graph.
except NotImplementedError:
except TypeError:
pass
def test_undefined_grad_func():
......@@ -271,13 +276,13 @@ def test_undefined_grad_func():
a = theano.tensor.vector()
b = theano.gradient.grad_undefined(theano.tensor.add, 0, a)
try:
f = theano.function([a],b)
f = theano.function([a],b, on_unused_input = 'ignore')
assert 0
#Note: it's important that the GradUndefinedOp is cauhgt at
#Note: it's important that the GradUndefinedOp is caught at
#COMPILATION time, not execution time.
#If the uncomputable variable is, for example, multiplied by0,
#it could be optimized out of the final graph
except theano.gradient.GradUndefinedError:
except TypeError:
pass
def test_unimplemented_grad_grad():
......@@ -296,7 +301,7 @@ def test_unimplemented_grad_grad():
try:
g = theano.gradient.grad(b,a)
assert False
except NotImplementedError:
except TypeError:
pass
def test_undefined_grad_grad():
......@@ -314,7 +319,7 @@ def test_undefined_grad_grad():
try:
g = theano.gradient.grad(Z.sum(),d)
assert False
except theano.gradient.GradUndefinedError:
except TypeError:
pass
def test_grad_name():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论