提交 cb94334d authored 作者: Ian Goodfellow's avatar Ian Goodfellow

made unimplemented and undefined grads handled by NaNType

上级 122d7246
......@@ -21,6 +21,7 @@ from theano.gof import Variable
from theano.gof.python25 import all
import theano.gof.utils
tensor = None
from theano.gof.nan_type import NaNType
_msg_retType = 'op.grad(...) returned a non-list'
_msg_badlen = 'op.grad(...) returned wrong number of gradients'
......@@ -193,32 +194,6 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
gmap[r] = g_r
return gmap
class GradNotImplementedOp(gof.op.UncomputableOp):
""" An UncomputableOp representing a gradient that hasn't been implemented yet.
"""
def __init__(self, op, x_pos, comment = ""):
"""
op: A theano op whose grad is not implemented for some input
x_pos: An int, giving the index in the op's input list of
a variable for which the gradient is not implemented
(if op has unimplemented gradients for several inputs,
it must still return a separate UnimplementedGradOp for
each)
comment: An optional comment explaining why the gradient isn't
implemented.
"""
assert isinstance(op, gof.Op)
assert isinstance(x_pos, int)
assert x_pos >= 0
super(GradNotImplementedOp,self).__init__(NotImplementedError,
"%s does not implement its gradient with respect to input %d. %s" \
% (str(type(op)), x_pos, comment))
def grad_not_implemented(op, x_pos, x, comment = ""):
"""
Return an un-computable symbolic variable of type `x.type`.
......@@ -233,38 +208,9 @@ def grad_not_implemented(op, x_pos, x, comment = ""):
gradient is not implemented.
"""
return GradNotImplementedOp(op, x_pos, comment)(x)
class GradUndefinedError(Exception):
""" An exception raised upon attempts to use an undefined gradient.
"""
class GradUndefinedOp(gof.op.UncomputableOp):
""" An UncomputableOp representing a gradient that is mathematically
undefined.
"""
def __init__(self, op, x_pos, comment = ""):
"""
op: A theano op whose grad is mathematically undefined for
some input
x_pos: An int, giving the index in the op's input list of
a variable for which the gradient is undefined
(if op has undefined gradients for several inputs,
it must still return a separate GradUndefinedOp for
each)
comment: An optional comment explaining why the gradient isn't
defined.
"""
assert isinstance(op, gof.Op)
assert isinstance(x_pos, int)
assert x_pos >= 0
super(GradUndefinedOp,self).__init__(GradUndefinedError,
"%s does not implement its gradient with respect to input %d. %s" \
% (str(type(op)), x_pos, comment))
return NaNType("This variable is NaN because the grad method for " + \
"input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
"not implemented.")()
def grad_undefined(op, x_pos, x, comment = ""):
"""
......@@ -280,7 +226,9 @@ def grad_undefined(op, x_pos, x, comment = ""):
gradient is not defined.
"""
return GradUndefinedOp(op, x_pos, comment)(x)
return NaNType("This variable is NaN because the gradient for " + \
"input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
"mathematically undefined.")()
......@@ -503,6 +451,11 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore
if tensor is None:
from theano import tensor
if isinstance(cost.type, NaNType):
raise ValueError("Can't differentiate a NaN cost. cost is NaN because "+\
cost.type.why_nan)
if consider_constant is None:
consider_constant = []
else:
......@@ -593,6 +546,9 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore
term_dict[node] = node.op.grad(node.inputs,
[access_grad_cache(var) for var in node.outputs])
for i in xrange(len(term_dict[node])):
if isinstance(term_dict[node][i].type,NaNType):
raise TypeError("tensor.grad encountered a NaN. "+\
term_dict[node][i].type.why_nan)
if term_dict[node][i] is None:
term_dict[node][i] = tensor.zeros_like(node.inputs[i])
return term_dict[node]
......
......@@ -2217,6 +2217,7 @@ class T_argmin_argmax(unittest.TestCase):
def test_grad_argmin(self):
data = rand(2, 3)
n = as_tensor_variable(data)
n.name = 'n'
#test grad of argmin
utt.verify_grad(lambda v: argmin(v, axis=-1), [data])
......@@ -2228,7 +2229,11 @@ class T_argmin_argmax(unittest.TestCase):
utt.verify_grad(lambda v: argmin(v.flatten()), [data])
try:
grad(argmin(n, axis=-1), n)
cost = argmin(n, axis=-1)
cost.name = None
g = grad(cost, n)
from theano.printing import min_informative_str
print min_informative_str(g)
raise Exception('Expected an error')
except TypeError:
pass
......
......@@ -6,264 +6,269 @@ import unittest
import theano
from theano import gof
from theano.gradient import grad_sources_inputs
#from theano.gradient import grad_sources_inputs
from theano import gradient
from theano.tensor.nnet.Conv3D import conv3D
from theano import config
def _grad_sources_inputs(*args):
#def _grad_sources_inputs(*args):
# warn_type was introduced after this code, it complains throughout for nothing.
return grad_sources_inputs(warn_type=False, *args)
# return grad_sources_inputs(warn_type=False, *args)
class test_grad_sources_inputs(unittest.TestCase):
def test_retNone1(self):
"""Test that it is not ok to return None from op.grad()"""
class retNone(gof.op.Op):
def make_node(self):
inputs = [gof.generic()]
outputs = [gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
x, = inp
gz, = grads
pass
a = retNone().make_node()
try:
_grad_sources_inputs([(a.out, 1)], None)
except ValueError, e:
self.assertTrue(e[0] is gradient._msg_retType)
return
self.fail()
def test_retNone1_b(self):
"""Test that it is ok to return [None] from op.grad()"""
class retNone(gof.op.Op):
def make_node(self, *inputs):
outputs = [gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
return [None]
i = gof.generic()
a = retNone().make_node(i)
g = _grad_sources_inputs([(a.out, 1)], None)
self.assertTrue(not i in g)
if 0:
#most of these tests are no longer relevant now that grad_sources_inputs is gone
#also, some of our policies about what is allowed or not have changed
#nonetheless, it may be a good idea to resurrect some of these tests and write
#them in terms of tensor.grad directly
class test_grad_sources_inputs(unittest.TestCase):
def test_retNone1(self):
"""Test that it is not ok to return None from op.grad()"""
class retNone(gof.op.Op):
def make_node(self):
inputs = [gof.generic()]
outputs = [gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
x, = inp
gz, = grads
pass
a = retNone().make_node()
try:
_grad_sources_inputs([(a.out, 1)], None)
except ValueError, e:
self.assertTrue(e[0] is gradient._msg_retType)
return
self.fail()
def test_retNone1_b(self):
"""Test that it is ok to return [None] from op.grad()"""
class retNone(gof.op.Op):
def make_node(self, *inputs):
outputs = [gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
return [None]
i = gof.generic()
a = retNone().make_node(i)
g = _grad_sources_inputs([(a.out, 1)], None)
self.assertTrue(not i in g)
def test_wrong_rval_len1(self):
"""Test that it is not ok to return the wrong number of gradients"""
class retNone(gof.op.Op):
def make_node(self, *inputs):
outputs = [gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, grads):
return [None]
def test_wrong_rval_len1(self):
"""Test that it is not ok to return the wrong number of gradients"""
class retNone(gof.op.Op):
def make_node(self, *inputs):
outputs = [gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, grads):
return [None]
i = gof.generic()
j = gof.generic()
a1 = retNone().make_node(i)
g = _grad_sources_inputs([(a1.out, 1)], None)
a2 = retNone().make_node(i,j)
try:
g = _grad_sources_inputs([(a2.out, 1)], None)
except ValueError, e:
self.assertTrue(e[0] is gradient._msg_badlen)
return
self.fail()
i = gof.generic()
j = gof.generic()
a1 = retNone().make_node(i)
g = _grad_sources_inputs([(a1.out, 1)], None)
a2 = retNone().make_node(i,j)
try:
g = _grad_sources_inputs([(a2.out, 1)], None)
except ValueError, e:
self.assertTrue(e[0] is gradient._msg_badlen)
return
self.fail()
def test_stop_on_all_none(self):
"""Test that op.grad() is not called when output grads are all None"""
class retNone(gof.op.Op):
def __init__(self, tst):
self.tst = tst
def make_node(self, *inputs):
outputs = [gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, grads):
self.tst.fail()
def test_stop_on_all_none(self):
"""Test that op.grad() is not called when output grads are all None"""
class retNone(gof.op.Op):
def __init__(self, tst):
self.tst = tst
def make_node(self, *inputs):
outputs = [gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, grads):
self.tst.fail()
i = gof.generic()
a1 = retNone(self).make_node(i)
g = _grad_sources_inputs([(a1.out, None)], None)
i = gof.generic()
a1 = retNone(self).make_node(i)
g = _grad_sources_inputs([(a1.out, None)], None)
def test_1in_1out(self):
"""Test grad is called correctly for a 1-to-1 op"""
gval = gof.generic()
class O(gof.op.Op):
def make_node(self):
inputs = [gof.generic()]
outputs = [gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
return gval,
a1 = O().make_node()
g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
self.assertTrue(g[a1.inputs[0]] is gval)
def test_1in_1out(self):
"""Test grad is called correctly for a 1-to-1 op"""
gval = gof.generic()
class O(gof.op.Op):
def make_node(self):
inputs = [gof.generic()]
outputs = [gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
return gval,
a1 = O().make_node()
g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
self.assertTrue(g[a1.inputs[0]] is gval)
def test_1in_Nout(self):
"""Test grad is called correctly for a 1-to-many op"""
gval = gof.generic()
class O(gof.op.Op):
def make_node(self):
inputs = [gof.generic()]
outputs = [gof.generic(),gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
x, = inp
gz1, gz2 = grads
return gval,
a1 = O().make_node()
g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
self.assertTrue(g[a1.inputs[0]] is gval)
def test_Nin_1out(self):
"""Test grad is called correctly for a many-to-1 op"""
gval0 = gof.generic()
gval1 = gof.generic()
class O(gof.op.Op):
def make_node(self):
inputs = [gof.generic(),gof.generic()]
outputs = [gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
x0, x1 = inp
gz, = grads
return (gval0, gval1)
a1 = O().make_node()
g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
self.assertTrue(g[a1.inputs[0]] is gval0)
self.assertTrue(g[a1.inputs[1]] is gval1)
def test_Nin_Nout(self):
"""Test grad is called correctly for a many-to-many op"""
gval0 = gof.generic()
gval1 = gof.generic()
class O(gof.op.Op):
def make_node(self):
inputs = [gof.generic(),gof.generic()]
outputs = [gof.generic(),gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
return gval0, gval1
a1 = O().make_node()
g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
self.assertTrue(g[a1.inputs[0]] is gval0)
self.assertTrue(g[a1.inputs[1]] is gval1)
def test_some_None_ograds(self):
"""Test grad is called when some output gradients are None"""
class O(gof.op.Op):
def __init__(self, tst):
self.tst = tst
def make_node(self, *inputs):
outputs = [gof.generic(),gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, g_out):
return [1]
i = gof.generic()
a1 = O(self).make_node(i)
g = grad_sources_inputs([(a1.outputs[0], 1)], None, warn_type=False)
self.assertTrue(g[i] is 1)
def test_1in_Nout(self):
"""Test grad is called correctly for a 1-to-many op"""
gval = gof.generic()
class O(gof.op.Op):
def make_node(self):
inputs = [gof.generic()]
outputs = [gof.generic(),gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
x, = inp
gz1, gz2 = grads
return gval,
a1 = O().make_node()
g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
self.assertTrue(g[a1.inputs[0]] is gval)
def test_Nin_1out(self):
"""Test grad is called correctly for a many-to-1 op"""
gval0 = gof.generic()
gval1 = gof.generic()
class O(gof.op.Op):
def make_node(self):
inputs = [gof.generic(),gof.generic()]
outputs = [gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
x0, x1 = inp
gz, = grads
return (gval0, gval1)
a1 = O().make_node()
g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
self.assertTrue(g[a1.inputs[0]] is gval0)
self.assertTrue(g[a1.inputs[1]] is gval1)
def test_Nin_Nout(self):
"""Test grad is called correctly for a many-to-many op"""
gval0 = gof.generic()
gval1 = gof.generic()
class O(gof.op.Op):
def make_node(self):
inputs = [gof.generic(),gof.generic()]
outputs = [gof.generic(),gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inp, grads):
return gval0, gval1
a1 = O().make_node()
g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
self.assertTrue(g[a1.inputs[0]] is gval0)
self.assertTrue(g[a1.inputs[1]] is gval1)
def test_some_None_ograds(self):
"""Test grad is called when some output gradients are None"""
class O(gof.op.Op):
def __init__(self, tst):
self.tst = tst
def make_node(self, *inputs):
outputs = [gof.generic(),gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, g_out):
return [1]
i = gof.generic()
a1 = O(self).make_node(i)
g = grad_sources_inputs([(a1.outputs[0], 1)], None, warn_type=False)
self.assertTrue(g[i] is 1)
def test_some_None_igrads(self):
"""Test that traversal works properly when an op return some None"""
class O(gof.op.Op):
def __init__(self, tst, grad_ok):
self.tst = tst
self.grad_ok = grad_ok
def make_node(self, *inputs):
outputs = [gof.generic(),gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, g_out):
if not self.grad_ok:
self.tst.fail()
else:
return [1, None]
i = gof.generic()
j = gof.generic()
k = gof.generic()
a1 = O(self, True).make_node(i,j)
a2 = O(self, True).make_node(a1.outputs[1], k)
g = grad_sources_inputs([(a2.outputs[0], 1)], None, warn_type=False)
self.assertTrue(g[i] is 1 and j not in g and k not in g)
def test_some_None_igrads(self):
"""Test that traversal works properly when an op return some None"""
class O(gof.op.Op):
def __init__(self, tst, grad_ok):
self.tst = tst
self.grad_ok = grad_ok
def make_node(self, *inputs):
outputs = [gof.generic(),gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, g_out):
if not self.grad_ok:
self.tst.fail()
else:
return [1, None]
i = gof.generic()
j = gof.generic()
k = gof.generic()
a1 = O(self, True).make_node(i,j)
a2 = O(self, True).make_node(a1.outputs[1], k)
g = grad_sources_inputs([(a2.outputs[0], 1)], None, warn_type=False)
self.assertTrue(g[i] is 1 and j not in g and k not in g)
a1 = O(self, True).make_node(i,j)
a2 = O(self, True).make_node(k, a1.outputs[1])
g = _grad_sources_inputs([(a2.outputs[0], 1)], None)
self.assertTrue(g[k] is 1 and i not in g and j not in g)
a1 = O(self, True).make_node(i,j)
a2 = O(self, True).make_node(k, a1.outputs[1])
g = _grad_sources_inputs([(a2.outputs[0], 1)], None)
self.assertTrue(g[k] is 1 and i not in g and j not in g)
def test_inputs(self):
"""Test that passing inputs shortens the traversal"""
class O(gof.op.Op):
def __init__(self, tst, grad_ok):
self.tst = tst
self.grad_ok = grad_ok
def make_node(self, *inputs):
outputs = [gof.generic(),gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, grads):
g0, g1 = grads
if not self.grad_ok:
self.tst.fail()
else:
if g1:
return [g0, g0+g1]
def test_inputs(self):
"""Test that passing inputs shortens the traversal"""
class O(gof.op.Op):
def __init__(self, tst, grad_ok):
self.tst = tst
self.grad_ok = grad_ok
def make_node(self, *inputs):
outputs = [gof.generic(),gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, grads):
g0, g1 = grads
if not self.grad_ok:
self.tst.fail()
else:
return [g0, g0]
i = gof.generic()
j = gof.generic()
k = gof.generic()
a1 = O(self, True).make_node(i,j)
a2 = O(self, True).make_node(k,a1.outputs[1])
g = _grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
(a1.outputs[0], 3), (a1.outputs[0], 3)], a1.outputs)
self.assertTrue(g[a2.inputs[0]] == 1)
self.assertTrue(g[a2.inputs[1]] == 5)
self.assertTrue(g[a1.outputs[0]] == 6)
self.assertTrue(g[a1.outputs[1]] == 5)
self.assertTrue(a1.inputs[0] not in g)
self.assertTrue(a1.inputs[1] not in g)
if g1:
return [g0, g0+g1]
else:
return [g0, g0]
i = gof.generic()
j = gof.generic()
k = gof.generic()
a1 = O(self, True).make_node(i,j)
a2 = O(self, True).make_node(k,a1.outputs[1])
g = _grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
(a1.outputs[0], 3), (a1.outputs[0], 3)], a1.outputs)
self.assertTrue(g[a2.inputs[0]] == 1)
self.assertTrue(g[a2.inputs[1]] == 5)
self.assertTrue(g[a1.outputs[0]] == 6)
self.assertTrue(g[a1.outputs[1]] == 5)
self.assertTrue(a1.inputs[0] not in g)
self.assertTrue(a1.inputs[1] not in g)
def test_multiple_sources(self):
"""Test that passing multiple sources works"""
class O(gof.op.Op):
def __init__(self, tst, grad_ok):
self.tst = tst
self.grad_ok = grad_ok
def make_node(self, *inputs):
outputs = [gof.generic(),gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, grads):
g0, g1 = grads
if not self.grad_ok:
self.tst.fail()
else:
if g1:
return [g0, g0+g1]
def test_multiple_sources(self):
"""Test that passing multiple sources works"""
class O(gof.op.Op):
def __init__(self, tst, grad_ok):
self.tst = tst
self.grad_ok = grad_ok
def make_node(self, *inputs):
outputs = [gof.generic(),gof.generic()]
return gof.Apply(self, inputs, outputs)
def grad(self, inputs, grads):
g0, g1 = grads
if not self.grad_ok:
self.tst.fail()
else:
return [g0, g0]
i = gof.generic()
j = gof.generic()
k = gof.generic()
a1 = O(self,True).make_node(i,j)
a2 = O(self,True).make_node(k,a1.outputs[1])
g = _grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
(a1.outputs[0], 3), (a1.outputs[0], 3)], None)
self.assertTrue(g[a2.inputs[0]] == 1)
self.assertTrue(g[a2.inputs[1]] == 5)
self.assertTrue(g[a1.outputs[0]] == 6)
self.assertTrue(g[a1.outputs[1]] == 5)
self.assertTrue(g[a1.inputs[0]] == 6)
self.assertTrue(g[a1.inputs[1]] == 11)
if g1:
return [g0, g0+g1]
else:
return [g0, g0]
i = gof.generic()
j = gof.generic()
k = gof.generic()
a1 = O(self,True).make_node(i,j)
a2 = O(self,True).make_node(k,a1.outputs[1])
g = _grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
(a1.outputs[0], 3), (a1.outputs[0], 3)], None)
self.assertTrue(g[a2.inputs[0]] == 1)
self.assertTrue(g[a2.inputs[1]] == 5)
self.assertTrue(g[a1.outputs[0]] == 6)
self.assertTrue(g[a1.outputs[1]] == 5)
self.assertTrue(g[a1.inputs[0]] == 6)
self.assertTrue(g[a1.inputs[1]] == 11)
def test_unimplemented_grad_func():
#tests that function compilation catches unimplemented grads in the graph
a = theano.tensor.vector()
b = theano.gradient.grad_not_implemented(theano.tensor.add, 0, a)
try:
f = theano.function([a], b)
f = theano.function([a], b, on_unused_input = 'ignore')
assert 0
#Note: it's important that the NotImplementedGradOp is caught
#at COMPILATION time, not execution time.
#If the uncomputable variable is, for example, multiplied by 0,
#it could be optimized out of the final graph.
except NotImplementedError:
except TypeError:
pass
def test_undefined_grad_func():
......@@ -271,13 +276,13 @@ def test_undefined_grad_func():
a = theano.tensor.vector()
b = theano.gradient.grad_undefined(theano.tensor.add, 0, a)
try:
f = theano.function([a],b)
f = theano.function([a],b, on_unused_input = 'ignore')
assert 0
#Note: it's important that the GradUndefinedOp is cauhgt at
#Note: it's important that the GradUndefinedOp is caught at
#COMPILATION time, not execution time.
#If the uncomputable variable is, for example, multiplied by0,
#it could be optimized out of the final graph
except theano.gradient.GradUndefinedError:
except TypeError:
pass
def test_unimplemented_grad_grad():
......@@ -296,7 +301,7 @@ def test_unimplemented_grad_grad():
try:
g = theano.gradient.grad(b,a)
assert False
except NotImplementedError:
except TypeError:
pass
def test_undefined_grad_grad():
......@@ -314,7 +319,7 @@ def test_undefined_grad_grad():
try:
g = theano.gradient.grad(Z.sum(),d)
assert False
except theano.gradient.GradUndefinedError:
except TypeError:
pass
def test_grad_name():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论