提交 17458063 authored 作者: lamblin's avatar lamblin

Merge pull request #1074 from goodfeli/test_grad

Ready to merge: Adds a unit test of undefined gradients on integers
......@@ -468,7 +468,7 @@ def grad(cost, wrt, consider_constant=None,
'Ambiguous whether %s should be made into tensor'
' or sparse theano variable' % str(type(g_var)))
if g_var.type not in [NullType, DisconnectedType] and 'float' \
if not isinstance(g_var.type, (NullType, DisconnectedType)) and 'float' \
not in str(g_var.type.dtype):
raise TypeError("Gradients must always be NullType, "
"DisconnectedType, or continuous, but grad was "
......@@ -776,8 +776,42 @@ def _populate_grad_dict(var_to_node_to_idx,
input_to_outputs in connection_pattern
]
if True in inputs_connected:
# At least one input of this op is connected to the cost so we must
#List of bools indicating if each output is an integer dtype
output_is_int = [hasattr(output.type, 'dtype') and
output.type.dtype in theano.tensor.discrete_dtypes
for output in node.outputs]
#List of bools indicating if each output is NullType
ograd_is_nan = [isinstance(output.type, NullType)
for output in output_grads]
# List of bools indicating if each input only has NullType outputs
only_connected_to_nan = [(True not in
[in_to_out and out_to_cost and not out_nan
for in_to_out, out_to_cost, out_nan in
zip(in_to_outs, outputs_connected, ograd_is_nan)])
for in_to_outs in connection_pattern]
if True not in inputs_connected:
# All outputs of this op are disconnected so we can skip
# Calling the op's grad method and report that the inputs
# are disconnected
# (The op's grad method could do this too, but this saves the
# implementer the trouble of worrying about this case)
input_grads = [DisconnectedType()() for ipt in inputs]
elif False not in only_connected_to_nan:
# All inputs are only connected to nan gradients, so we don't
# need to bother calling the grad method. We know the gradient
# with respect to all connected inputs is nan.
input_grads = []
for connected in inputs_connected:
if connected:
input_grads.append(NullType()())
else:
input_grads.append(DisconnectedType()())
else:
# At least one input of this op is connected to the cost so and
# not all output gradients are undefined so we must
# call the op's grad method
# Each Op's grad function requires inputs and output_grads
......@@ -848,13 +882,6 @@ def _populate_grad_dict(var_to_node_to_idx,
if len(input_grads) != len(inputs):
raise ValueError(("%s returned the wrong number of" +\
" gradient terms.") % str(node.op))
else:
# All outputs of this op are disconnected so we can skip
# Calling the op's grad method and report that the inputs
# are disconnected
# (The op's grad method could do this too, but this saves the
# implementer the trouble of worrying about this case)
input_grads = [DisconnectedType()() for ipt in inputs]
# must convert to list in case the op returns a tuple
# we won't be able to post-process out the Nones if it does that
......@@ -862,18 +889,15 @@ def _populate_grad_dict(var_to_node_to_idx,
# Do type checking on the result
#List of bools indicating if each output is an integer dtype
output_is_int = [hasattr(output.type, 'dtype') and
output.type.dtype in theano.tensor.discrete_dtypes
for output in node.outputs]
#List of bools indicating if each input only has integer outputs
# List of bools indicating if each input only has integer outputs
only_connected_to_int = [(True not in
[in_to_out and out_to_cost and not out_int
for in_to_out, out_to_cost, out_int in
zip(in_to_outs, outputs_connected, output_is_int)])
for in_to_outs in connection_pattern]
for i, term in enumerate(input_grads):
# Disallow Nones
......@@ -898,6 +922,10 @@ def _populate_grad_dict(var_to_node_to_idx,
' returned an integer-valued variable.'
' (Input index %d, dtype %s)' % (i,
term.type.dtype))
if only_connected_to_nan[i]:
assert isinstance(term.type, NullType)
if only_connected_to_int[i]:
# This term has only integer outputs and we know
# it's not undefined or disconnected
......
......@@ -722,20 +722,19 @@ class Elemwise(Op):
def _bgrad(self, inputs, ograds):
# returns grad, with respect to broadcasted versions of inputs
# Gradients (especially on the final costs) don't have to be symbolic
# e.g., ograds will be [ 1. ] if your objective is c and the output
# of the current apply node is c
ograds = map(as_tensor_variable, ograds)
prev_setting = theano.config.compute_test_value
try:
theano.config.compute_test_value = 'off'
scalar_inputs = [Scalar(dtype=t.type.dtype)() for t in inputs]
scalar_ograds = [Scalar(dtype=ograd.type.dtype)()
for ograd in ograds]
def as_scalar(t):
if isinstance(t.type, (NullType, DisconnectedType)):
return t
return Scalar(t.type.dtype)()
scalar_inputs = map(as_scalar, inputs)
scalar_ograds = map(as_scalar, ograds)
scalar_igrads = self.scalar_op.grad(scalar_inputs, scalar_ograds)
for igrad in scalar_igrads:
assert igrad is not None
......
......@@ -517,5 +517,22 @@ def test_known_grads_integers():
assert np.allclose(g_actual, gv)
def test_undefined_cost_grad():
# Tests that if we say the cost is not differentiable via the
# known_grads mechanism, it is treated as such by the rest of the
# system.
x = theano.tensor.iscalar()
y = theano.tensor.iscalar()
cost = x + y
assert cost.dtype in theano.tensor.discrete_dtypes
try:
grads = theano.tensor.grad(cost, [x, y], known_grads = {cost: NullType()() })
except theano.gradient.NullTypeGradError:
return
raise AssertionError("An undefined gradient has been ignored.")
if __name__ == '__main__':
unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论