提交 cb94334d authored 作者: Ian Goodfellow's avatar Ian Goodfellow

made unimplemented and undefined grads handled by NaNType

上级 122d7246
......@@ -21,6 +21,7 @@ from theano.gof import Variable
from theano.gof.python25 import all
import theano.gof.utils
tensor = None
from theano.gof.nan_type import NaNType
_msg_retType = 'op.grad(...) returned a non-list'
_msg_badlen = 'op.grad(...) returned wrong number of gradients'
......@@ -193,32 +194,6 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
gmap[r] = g_r
return gmap
class GradNotImplementedOp(gof.op.UncomputableOp):
""" An UncomputableOp representing a gradient that hasn't been implemented yet.
"""
def __init__(self, op, x_pos, comment = ""):
"""
op: A theano op whose grad is not implemented for some input
x_pos: An int, giving the index in the op's input list of
a variable for which the gradient is not implemented
(if op has unimplemented gradients for several inputs,
it must still return a separate UnimplementedGradOp for
each)
comment: An optional comment explaining why the gradient isn't
implemented.
"""
assert isinstance(op, gof.Op)
assert isinstance(x_pos, int)
assert x_pos >= 0
super(GradNotImplementedOp,self).__init__(NotImplementedError,
"%s does not implement its gradient with respect to input %d. %s" \
% (str(type(op)), x_pos, comment))
def grad_not_implemented(op, x_pos, x, comment = ""):
"""
Return an un-computable symbolic variable of type `x.type`.
......@@ -233,38 +208,9 @@ def grad_not_implemented(op, x_pos, x, comment = ""):
gradient is not implemented.
"""
return GradNotImplementedOp(op, x_pos, comment)(x)
class GradUndefinedError(Exception):
""" An exception raised upon attempts to use an undefined gradient.
"""
class GradUndefinedOp(gof.op.UncomputableOp):
""" An UncomputableOp representing a gradient that is mathematically
undefined.
"""
def __init__(self, op, x_pos, comment = ""):
"""
op: A theano op whose grad is mathematically undefined for
some input
x_pos: An int, giving the index in the op's input list of
a variable for which the gradient is undefined
(if op has undefined gradients for several inputs,
it must still return a separate GradUndefinedOp for
each)
comment: An optional comment explaining why the gradient isn't
defined.
"""
assert isinstance(op, gof.Op)
assert isinstance(x_pos, int)
assert x_pos >= 0
super(GradUndefinedOp,self).__init__(GradUndefinedError,
"%s does not implement its gradient with respect to input %d. %s" \
% (str(type(op)), x_pos, comment))
return NaNType("This variable is NaN because the grad method for " + \
"input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
"not implemented.")()
def grad_undefined(op, x_pos, x, comment = ""):
"""
......@@ -280,7 +226,9 @@ def grad_undefined(op, x_pos, x, comment = ""):
gradient is not defined.
"""
return GradUndefinedOp(op, x_pos, comment)(x)
return NaNType("This variable is NaN because the gradient for " + \
"input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
"mathematically undefined.")()
......@@ -503,6 +451,11 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore
if tensor is None:
from theano import tensor
if isinstance(cost.type, NaNType):
raise ValueError("Can't differentiate a NaN cost. cost is NaN because "+\
cost.type.why_nan)
if consider_constant is None:
consider_constant = []
else:
......@@ -593,6 +546,9 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore
term_dict[node] = node.op.grad(node.inputs,
[access_grad_cache(var) for var in node.outputs])
for i in xrange(len(term_dict[node])):
if isinstance(term_dict[node][i].type,NaNType):
raise TypeError("tensor.grad encountered a NaN. "+\
term_dict[node][i].type.why_nan)
if term_dict[node][i] is None:
term_dict[node][i] = tensor.zeros_like(node.inputs[i])
return term_dict[node]
......
......@@ -2217,6 +2217,7 @@ class T_argmin_argmax(unittest.TestCase):
def test_grad_argmin(self):
data = rand(2, 3)
n = as_tensor_variable(data)
n.name = 'n'
#test grad of argmin
utt.verify_grad(lambda v: argmin(v, axis=-1), [data])
......@@ -2228,7 +2229,11 @@ class T_argmin_argmax(unittest.TestCase):
utt.verify_grad(lambda v: argmin(v.flatten()), [data])
try:
grad(argmin(n, axis=-1), n)
cost = argmin(n, axis=-1)
cost.name = None
g = grad(cost, n)
from theano.printing import min_informative_str
print min_informative_str(g)
raise Exception('Expected an error')
except TypeError:
pass
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论