提交 0ec49804 authored 作者: Ian Goodfellow's avatar Ian Goodfellow

fixed bug where comments did not appear

added DisconnectedType
上级 17562537
...@@ -110,9 +110,16 @@ following methods: ...@@ -110,9 +110,16 @@ following methods:
the gradient with respect to that input computed based on the symbolic gradients with the gradient with respect to that input computed based on the symbolic gradients with
respect to each output. respect to each output.
If the output is not differentiable with respect to any inputs, If the output is not differentiable with respect to an input
then this method should be defined to return ``[None for i in then this method should be defined to return a variable of type
inputs]``. If this method is not defined, then Theano assumes it has been NullType for that input.
If an element of output_gradient is of type theano.gradient.DisconnectedType,
it means that the cost is not a function of this output. If any of the
op's inputs participate in the computation of only disconnected outputs,
then Op.grad should return DisconnectedType variables for those inputs.
If the grad method is not defined, then Theano assumes it has been
forgotten. Symbolic differentiation will fail on a graph that forgotten. Symbolic differentiation will fail on a graph that
includes this Op. includes this Op.
......
...@@ -70,7 +70,7 @@ def grad_not_implemented(op, x_pos, x, comment = ""): ...@@ -70,7 +70,7 @@ def grad_not_implemented(op, x_pos, x, comment = ""):
return NaNType("This variable is NaN because the grad method for " + \ return NaNType("This variable is NaN because the grad method for " + \
"input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \ "input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
" not implemented.")() " not implemented."+comment)()
def grad_undefined(op, x_pos, x, comment = ""): def grad_undefined(op, x_pos, x, comment = ""):
""" """
...@@ -88,7 +88,31 @@ def grad_undefined(op, x_pos, x, comment = ""): ...@@ -88,7 +88,31 @@ def grad_undefined(op, x_pos, x, comment = ""):
return NaNType("This variable is NaN because the gradient for " + \ return NaNType("This variable is NaN because the gradient for " + \
"input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \ "input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
" mathematically undefined.")() " mathematically undefined."+comment)()
class DisconnectedType(theano.gof.type.Type):
""" A type indicating that a variable is a result
of taking the gradient of c with respect to x
when c is not a function of x.
A symbolic placeholder for 0, but to convey
the extra information that this gradient is 0
because it is disconnected.
"""
def filter(self, data, strict=False, allow_downcast=None):
raise AssertionError("If you're assigning to a DisconnectedType you're"
" doing something wrong. It should only be used as "
"symbolic placeholder.")
def fiter_variable(self, other):
raise
def may_share_memory(a, b):
return False
def value_eq(a, b, force_same_dtype=True):
raise
######################## ########################
...@@ -378,7 +402,7 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = False, ...@@ -378,7 +402,7 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = False,
#the gradient of the constants is 0 #the gradient of the constants is 0
for const in consider_constant: for const in consider_constant:
grad_dict[const] = const.zeros_like() grad_dict[const] = DisconnectedType()()
#variables that do not influence the cost have zero gradient. #variables that do not influence the cost have zero gradient.
#if wrt is such a variable, populate the grad_dict with this info #if wrt is such a variable, populate the grad_dict with this info
...@@ -400,12 +424,16 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = False, ...@@ -400,12 +424,16 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = False,
raise ValueError("Invalid value for keyword " raise ValueError("Invalid value for keyword "
"'disconnected_inputs', valid values are " "'disconnected_inputs', valid values are "
"'ignore', 'warn' and 'raise'.") "'ignore', 'warn' and 'raise'.")
grad_dict[elem] = elem.zeros_like() grad_dict[elem] = DisconnectedType()()
rval = _populate_grad_dict(var_to_node_to_idx, rval = _populate_grad_dict(var_to_node_to_idx,
grad_dict, wrt, warn_type, grad_dict, wrt, warn_type,
cost.name) cost.name)
for i in xrange(len(rval)):
if isinstance(rval[i].type, DisconnectedType):
rval[i] = wrt[i].zeros_like()
if using_tuple: if using_tuple:
rval = tuple(rval) rval = tuple(rval)
elif not using_list: elif not using_list:
...@@ -468,12 +496,12 @@ def _populate_grad_dict(var_to_node_to_idx,\ ...@@ -468,12 +496,12 @@ def _populate_grad_dict(var_to_node_to_idx,\
grad_dict: a dictionary mapping variables to their gradients grad_dict: a dictionary mapping variables to their gradients
should be populated by grad or grad_sources_inputs should be populated by grad or grad_sources_inputs
grad should set gradients to zeros_like for grad should set gradients to DisconnectedType()() for
variables to be considered constant, set the variables to be considered constant, set the
gradient for the cost variable to g_cost, etc. gradient for the cost variable to g_cost, etc.
both should set the gradient for disconnected both should set the gradient for disconnected
inputs to zeros_like inputs to DisconnectedType()
wrt: the minimal set of variables that must be included in grad_dict wrt: the minimal set of variables that must be included in grad_dict
...@@ -513,6 +541,11 @@ def _populate_grad_dict(var_to_node_to_idx,\ ...@@ -513,6 +541,11 @@ def _populate_grad_dict(var_to_node_to_idx,\
for i in xrange(len(term_dict[node])): for i in xrange(len(term_dict[node])):
if term_dict[node][i] is None: if term_dict[node][i] is None:
#we don't know what None means. in the past it has been used to
#mean undefined, zero, or disconnected. So for now we assume it is
#zero. Assuming it is zero prevents us from disconnecting NaNs above.
#eventually we should disallow this return type and force all ops
#to return the correct thing
term_dict[node][i] = node.inputs[i].zeros_like() term_dict[node][i] = node.inputs[i].zeros_like()
if warn_type: if warn_type:
...@@ -560,8 +593,8 @@ def _populate_grad_dict(var_to_node_to_idx,\ ...@@ -560,8 +593,8 @@ def _populate_grad_dict(var_to_node_to_idx,\
grad_dict[var].name = '(d%s/d%s)' % (cost_name, var.name) grad_dict[var].name = '(d%s/d%s)' % (cost_name, var.name)
else: else:
#this variable is not connected to the cost in the computational #this variable is not connected to the cost in the computational
#graph so the gradient on it is zero #graph
grad_dict[var] = var.zeros_like() grad_dict[var] = DisconnectedType()()
return grad_dict[var] return grad_dict[var]
...@@ -657,7 +690,7 @@ def grad_sources_inputs(sources, graph_inputs, warn_type = True): ...@@ -657,7 +690,7 @@ def grad_sources_inputs(sources, graph_inputs, warn_type = True):
#according to the flag, possibly raise an error if wrt is disconnected #according to the flag, possibly raise an error if wrt is disconnected
for elem in wrt: for elem in wrt:
if elem not in var_to_node_to_idx and elem not in outputs: if elem not in var_to_node_to_idx and elem not in outputs:
grad_dict[elem] = elem.zeros_like() grad_dict[elem] = DisconnectedType()
_populate_grad_dict(var_to_node_to_idx, _populate_grad_dict(var_to_node_to_idx,
......
...@@ -25,6 +25,7 @@ from theano.tensor.utils import hash_from_ndarray ...@@ -25,6 +25,7 @@ from theano.tensor.utils import hash_from_ndarray
from theano.scalar import ComplexError, IntegerDivisionError from theano.scalar import ComplexError, IntegerDivisionError
import theano.scalar.sharedvar import theano.scalar.sharedvar
from theano.gradient import grad_undefined from theano.gradient import grad_undefined
from theano.gradient import DisconnectedType
### set up the external interface ### set up the external interface
from elemwise import Elemwise, DimShuffle, CAReduce, Sum from elemwise import Elemwise, DimShuffle, CAReduce, Sum
...@@ -2324,9 +2325,21 @@ class MaxAndArgmax(Op): ...@@ -2324,9 +2325,21 @@ class MaxAndArgmax(Op):
x, axis = inp x, axis = inp
g_max, g_max_idx = grads g_max, g_max_idx = grads
# Check to see if the gradient on max is None g_max_disconnected = isinstance(g_max.type, DisconnectedType)
if g_max is None: g_max_idx_disconnected = isinstance(g_max_idx.type, DisconnectedType)
return None, None
#if the op is totally disconnected, so are its inputs
if g_max_disconnected and g_max_idx_disconnected:
return [ DisconnectedType()(), DisconnectedType()() ]
axis_grad = grad_undefined(self, 1, axis,
"argmax is not defined for non-integer axes so"
" argmax(x, axis+eps) is undefined" )
#if the max is disconnected but the argmax is not,
#the gradient on its inputs is zero
if g_max_disconnected:
return [ x.zeros_like(), axis_grad ]
xmax = max(x, axis) xmax = max(x, axis)
# Raise the g_max and xmax to the same number of dim as the input. # Raise the g_max and xmax to the same number of dim as the input.
...@@ -2346,7 +2359,7 @@ class MaxAndArgmax(Op): ...@@ -2346,7 +2359,7 @@ class MaxAndArgmax(Op):
# Set the grad to the correct position. # Set the grad to the correct position.
g_x = eq(xmax_pad, x) * g_max_pad g_x = eq(xmax_pad, x) * g_max_pad
return g_x, None return g_x, axis_grad
def __str__(self): def __str__(self):
return self.__class__.__name__ return self.__class__.__name__
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论