提交 ea432825 authored 作者: Ian Goodfellow's avatar Ian Goodfellow

add spaces at the start of comments

上级 6b3b2ee7
...@@ -309,7 +309,7 @@ def Lop(f, wrt, eval_points, consider_constant=None, warn_type=False, ...@@ -309,7 +309,7 @@ def Lop(f, wrt, eval_points, consider_constant=None, warn_type=False,
if not isinstance(f, (list, tuple)): if not isinstance(f, (list, tuple)):
f = [f] f = [f]
#make copies of f and grads so we don't modify the client's copy # make copies of f and grads so we don't modify the client's copy
f = list(f) f = list(f)
grads = list(eval_points) grads = list(eval_points)
...@@ -417,7 +417,7 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False, ...@@ -417,7 +417,7 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
if consider_constant is None: if consider_constant is None:
consider_constant = [] consider_constant = []
else: else:
#error checking on consider_constant: verify that it is a collection # error checking on consider_constant: verify that it is a collection
# of theano variables # of theano variables
# this is important, if someone accidentally passes a nested data # this is important, if someone accidentally passes a nested data
# structure with theano variables at the leaves, only the root will # structure with theano variables at the leaves, only the root will
...@@ -437,21 +437,21 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False, ...@@ -437,21 +437,21 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
var_to_node_to_idx = _populate_var_to_node_to_idx([cost]) var_to_node_to_idx = _populate_var_to_node_to_idx([cost])
#build a dict mapping var to the gradient of cost with respect to var # build a dict mapping var to the gradient of cost with respect to var
grad_dict = {} grad_dict = {}
#by default, the gradient of the cost is 1 # by default, the gradient of the cost is 1
if g_cost is None: if g_cost is None:
g_cost = tensor.ones_like(cost) g_cost = tensor.ones_like(cost)
grad_dict[cost] = g_cost grad_dict[cost] = g_cost
#the gradient of the constants is 0 # the gradient of the constants is 0
for const in consider_constant: for const in consider_constant:
grad_dict[const] = DisconnectedType()() grad_dict[const] = DisconnectedType()()
#variables that do not influence the cost have zero gradient. # variables that do not influence the cost have zero gradient.
#if wrt is such a variable, populate the grad_dict with this info # if wrt is such a variable, populate the grad_dict with this info
#so that wrt not being in var_to_node_to_idx won't cause an error below # so that wrt not being in var_to_node_to_idx won't cause an error below
#according to the flag, possibly raise an error if wrt is disconnected # according to the flag, possibly raise an error if wrt is disconnected
for elem in wrt: for elem in wrt:
if elem not in var_to_node_to_idx and elem is not cost: if elem not in var_to_node_to_idx and elem is not cost:
message = ("grad method was asked to compute the gradient " message = ("grad method was asked to compute the gradient "
...@@ -504,10 +504,10 @@ def _populate_var_to_node_to_idx(outputs): ...@@ -504,10 +504,10 @@ def _populate_var_to_node_to_idx(outputs):
""" """
#var_to_node_to_idx[var][node] = [i,j] means node has # var_to_node_to_idx[var][node] = [i,j] means node has
#var as input at positions i and j # var as input at positions i and j
var_to_node_to_idx = {} var_to_node_to_idx = {}
#set of variables or nodes that have been added to their parents # set of variables or nodes that have been added to their parents
accounted_for = set([]) accounted_for = set([])
def account_for(var): def account_for(var):
...@@ -568,11 +568,11 @@ def _populate_grad_dict(var_to_node_to_idx, ...@@ -568,11 +568,11 @@ def _populate_grad_dict(var_to_node_to_idx,
returns: a list of gradients corresponding to wrt returns: a list of gradients corresponding to wrt
""" """
#build a dict mapping node to the terms node contributes to each of # build a dict mapping node to the terms node contributes to each of
#its inputs' gradients # its inputs' gradients
term_dict = {} term_dict = {}
#populate term_dict[node] and return it # populate term_dict[node] and return it
def access_term_cache(node): def access_term_cache(node):
if node not in term_dict: if node not in term_dict:
...@@ -600,8 +600,8 @@ def _populate_grad_dict(var_to_node_to_idx, ...@@ -600,8 +600,8 @@ def _populate_grad_dict(var_to_node_to_idx,
if False in [isinstance(g.type, DisconnectedType) if False in [isinstance(g.type, DisconnectedType)
for g in output_grads]: for g in output_grads]:
#Some outputs of this op are connected to the cost so we must # Some outputs of this op are connected to the cost so we must
#call the ops grad method # call the ops grad method
input_grads = node.op.grad(inputs, output_grads) input_grads = node.op.grad(inputs, output_grads)
...@@ -613,30 +613,30 @@ def _populate_grad_dict(var_to_node_to_idx, ...@@ -613,30 +613,30 @@ def _populate_grad_dict(var_to_node_to_idx,
raise ValueError(("%s returned the wrong number of" +\ raise ValueError(("%s returned the wrong number of" +\
" gradient terms.") % str(node.op)) " gradient terms.") % str(node.op))
else: else:
#All outputs of this op are disconnected so we can skip # All outputs of this op are disconnected so we can skip
#Calling the op's grad method and report that the inputs # Calling the op's grad method and report that the inputs
#are disconnected # are disconnected
#(The op's grad method could do this too, but this saves the # (The op's grad method could do this too, but this saves the
#implementer the trouble of worrying about this case) # implementer the trouble of worrying about this case)
input_grads = [DisconnectedType()() for ipt in inputs] input_grads = [DisconnectedType()() for ipt in inputs]
#must convert to list in case the op returns a tuple # must convert to list in case the op returns a tuple
#we won't be able to post-process out the Nones if it does that # we won't be able to post-process out the Nones if it does that
term_dict[node] = list(input_grads) term_dict[node] = list(input_grads)
for i in xrange(len(term_dict[node])): for i in xrange(len(term_dict[node])):
if term_dict[node][i] is None: if term_dict[node][i] is None:
#we don't know what None means. in the past it has been # we don't know what None means. in the past it has been
#used to # used to
#mean undefined, zero, or disconnected. So for now we # mean undefined, zero, or disconnected. So for now we
#assume it is # assume it is
#zero. Assuming it is zero prevents # zero. Assuming it is zero prevents
#us from disconnecting NaNs above. # us from disconnecting NaNs above.
#eventually we should disallow this # eventually we should disallow this
#return type and force all ops # return type and force all ops
#to return the correct thing # to return the correct thing
#raise AssertionError('%s returned None for' +\ # raise AssertionError('%s returned None for' +\
# ' a gradient term, ' # ' a gradient term, '
# 'this is prohibited' % node.op) # 'this is prohibited' % node.op)
term_dict[node][i] = node.inputs[i].zeros_like() term_dict[node][i] = node.inputs[i].zeros_like()
...@@ -652,16 +652,16 @@ def _populate_grad_dict(var_to_node_to_idx, ...@@ -652,16 +652,16 @@ def _populate_grad_dict(var_to_node_to_idx,
return term_dict[node] return term_dict[node]
#built-in python sum adds an extraneous TensorConstant(0) # built-in python sum adds an extraneous TensorConstant(0)
#we can exploit the knowledge that iterable always has at # we can exploit the knowledge that iterable always has at
#least one element to avoid starting the sum at 0 # least one element to avoid starting the sum at 0
def nonempty_sum(iterable): def nonempty_sum(iterable):
rval = iterable[0] rval = iterable[0]
for elem in iterable[1:]: for elem in iterable[1:]:
rval = rval + elem rval = rval + elem
return rval return rval
#populate grad_dict[var] and return it # populate grad_dict[var] and return it
def access_grad_cache(var): def access_grad_cache(var):
if var not in grad_dict: if var not in grad_dict:
if var in var_to_node_to_idx: if var in var_to_node_to_idx:
...@@ -692,8 +692,8 @@ def _populate_grad_dict(var_to_node_to_idx, ...@@ -692,8 +692,8 @@ def _populate_grad_dict(var_to_node_to_idx,
if cost_name is not None and var.name is not None: if cost_name is not None and var.name is not None:
grad_dict[var].name = '(d%s/d%s)' % (cost_name, var.name) grad_dict[var].name = '(d%s/d%s)' % (cost_name, var.name)
else: else:
#this variable isn't connected to the cost in the computational # this variable isn't connected to the cost in the computational
#graph # graph
grad_dict[var] = DisconnectedType()() grad_dict[var] = DisconnectedType()()
return grad_dict[var] return grad_dict[var]
...@@ -776,16 +776,16 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True): ...@@ -776,16 +776,16 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
var_to_node_to_idx = _populate_var_to_node_to_idx(outputs) var_to_node_to_idx = _populate_var_to_node_to_idx(outputs)
#build a dict mapping var to the gradient of cost with respect to var # build a dict mapping var to the gradient of cost with respect to var
grad_dict = {} grad_dict = {}
#by default, the gradient of the cost is 1 # by default, the gradient of the cost is 1
for output, output_grad in sources: for output, output_grad in sources:
grad_dict[output] = output_grad grad_dict[output] = output_grad
#variables that do not influence the cost have zero gradient. # variables that do not influence the cost have zero gradient.
#if wrt is such a variable, populate the grad_dict with this info # if wrt is such a variable, populate the grad_dict with this info
#so that wrt not being in var_to_node_to_idx won't cause an error below # so that wrt not being in var_to_node_to_idx won't cause an error below
#according to the flag, possibly raise an error if wrt is disconnected # according to the flag, possibly raise an error if wrt is disconnected
for elem in wrt: for elem in wrt:
if elem not in var_to_node_to_idx and elem not in outputs: if elem not in var_to_node_to_idx and elem not in outputs:
grad_dict[elem] = DisconnectedType()() grad_dict[elem] = DisconnectedType()()
...@@ -793,7 +793,7 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True): ...@@ -793,7 +793,7 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
_populate_grad_dict(var_to_node_to_idx, _populate_grad_dict(var_to_node_to_idx,
grad_dict, wrt, warn_type) grad_dict, wrt, warn_type)
#post-process out the DisconnectedTypes # post-process out the DisconnectedTypes
for key in grad_dict: for key in grad_dict:
if isinstance(grad_dict[key].type, DisconnectedType): if isinstance(grad_dict[key].type, DisconnectedType):
if hasattr(key, 'zeros_like'): if hasattr(key, 'zeros_like'):
...@@ -1091,7 +1091,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, ...@@ -1091,7 +1091,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
as_tensor_variable(p).broadcastable)(name='input %i' % i) as_tensor_variable(p).broadcastable)(name='input %i' % i)
for i, p in enumerate(pt)] for i, p in enumerate(pt)]
#fun can be either a function or an actual Op instance # fun can be either a function or an actual Op instance
o_output = fun(*tensor_pt) o_output = fun(*tensor_pt)
if isinstance(o_output, list): if isinstance(o_output, list):
...@@ -1126,7 +1126,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, ...@@ -1126,7 +1126,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
cost_fn = function(tensor_pt, cost) cost_fn = function(tensor_pt, cost)
#todo-- determine if this is actually needed # todo-- determine if this is actually needed
g_cost = as_tensor_variable(1.0, name='g_cost') g_cost = as_tensor_variable(1.0, name='g_cost')
if cast_to_output_type: if cast_to_output_type:
g_cost = cast(g_cost, o_output.dtype) g_cost = cast(g_cost, o_output.dtype)
...@@ -1152,7 +1152,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, ...@@ -1152,7 +1152,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
raise verify_grad.E_grad(max_arg, max_err_pos, raise verify_grad.E_grad(max_arg, max_err_pos,
max_abs_err, max_rel_err, abs_tol, rel_tol) max_abs_err, max_rel_err, abs_tol, rel_tol)
#get new random projection for next test # get new random projection for next test
if test_num < n_tests - 1: if test_num < n_tests - 1:
t_r.set_value(random_projection(), borrow=True) t_r.set_value(random_projection(), borrow=True)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论