提交 c8f8a276 authored 作者: abergeron's avatar abergeron

Merge pull request #2485 from nouiz/profile

Profile fix.
...@@ -1559,6 +1559,7 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False, ...@@ -1559,6 +1559,7 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False,
t2 = time.time() t2 = time.time()
if profile: if profile:
profile.compile_time += t2 - t1 profile.compile_time += t2 - t1
profile.nb_nodes = len(fn.maker.fgraph.apply_nodes)
fn.name = name fn.name = name
fn.maker.fgraph.name = name fn.maker.fgraph.name = name
......
...@@ -199,6 +199,11 @@ class ProfileStats(object): ...@@ -199,6 +199,11 @@ class ProfileStats(object):
line_width = config.profiling.output_line_width line_width = config.profiling.output_line_width
nb_nodes = -1
# The number of nodes in the graph. We need the infomartion
# separatly in case we print the profile when the function wasn't
# executed or if there is lazy operation in the graph.
optimizer_profile = None optimizer_profile = None
# None or tuple (the optimizer, the profile it returned) # None or tuple (the optimizer, the profile it returned)
...@@ -637,7 +642,7 @@ class ProfileStats(object): ...@@ -637,7 +642,7 @@ class ProfileStats(object):
print >> file, ' Time in thunks: %es (%.3f%%)' % ( print >> file, ' Time in thunks: %es (%.3f%%)' % (
local_time, 100 * local_time / self.fct_call_time) local_time, 100 * local_time / self.fct_call_time)
print >> file, ' Total compile time: %es' % self.compile_time print >> file, ' Total compile time: %es' % self.compile_time
print >> file, ' Number of Apply nodes: %s' % len(self.apply_time) print >> file, ' Number of Apply nodes: %d' % self.nb_nodes
print >> file, ' Theano Optimizer time: %es' % self.optimizer_time print >> file, ' Theano Optimizer time: %es' % self.optimizer_time
print >> file, ' Theano validate time: %es' % self.validate_time print >> file, ' Theano validate time: %es' % self.validate_time
print >> file, (' Theano Linker time (includes C,' print >> file, (' Theano Linker time (includes C,'
...@@ -649,6 +654,9 @@ class ProfileStats(object): ...@@ -649,6 +654,9 @@ class ProfileStats(object):
# The validation time is a subset of optimizer_time # The validation time is a subset of optimizer_time
assert self.validate_time < self.optimizer_time assert self.validate_time < self.optimizer_time
def summary_globals(self, file):
print >> file, 'Time in all call to theano.grad() %es' % theano.gradient.grad_time
def summary_memory(self, file, N=None): def summary_memory(self, file, N=None):
fct_memory = {} # fgraph->dict(node->[outputs size]) fct_memory = {} # fgraph->dict(node->[outputs size])
fct_shapes = {} # fgraph->dict(node->[outputs shapes])) fct_shapes = {} # fgraph->dict(node->[outputs shapes]))
...@@ -1204,6 +1212,7 @@ class ProfileStats(object): ...@@ -1204,6 +1212,7 @@ class ProfileStats(object):
def summary(self, file=sys.stderr, n_ops_to_print=20, def summary(self, file=sys.stderr, n_ops_to_print=20,
n_apply_to_print=20): n_apply_to_print=20):
self.summary_function(file) self.summary_function(file)
self.summary_globals(file)
local_time = sum(self.apply_time.values()) local_time = sum(self.apply_time.values())
if local_time > 0: if local_time > 0:
self.summary_class(file, n_ops_to_print) self.summary_class(file, n_ops_to_print)
......
...@@ -804,7 +804,7 @@ def io_toposort(inputs, outputs, orderings=None): ...@@ -804,7 +804,7 @@ def io_toposort(inputs, outputs, orderings=None):
"""WRITEME """WRITEME
inputs: a list or tuple of Variable instances inputs: a list or tuple of Variable instances
outputs: a list or tuple of Variable instances outputs: a list or tuple of Apply instances
orderings: a dictionary orderings: a dictionary
key: Apply instance key: Apply instance
......
...@@ -10,6 +10,7 @@ __docformat__ = "restructuredtext en" ...@@ -10,6 +10,7 @@ __docformat__ = "restructuredtext en"
import __builtin__ import __builtin__
from itertools import izip from itertools import izip
import logging import logging
import time
import warnings import warnings
_logger = logging.getLogger('theano.gradient') _logger = logging.getLogger('theano.gradient')
...@@ -36,6 +37,8 @@ tensor = None ...@@ -36,6 +37,8 @@ tensor = None
_msg_retType = 'op.grad(...) returned a non-list' _msg_retType = 'op.grad(...) returned a non-list'
grad_time = 0
def format_as(use_list, use_tuple, outputs): def format_as(use_list, use_tuple, outputs):
""" """
...@@ -412,6 +415,7 @@ def grad(cost, wrt, consider_constant=None, ...@@ -412,6 +415,7 @@ def grad(cost, wrt, consider_constant=None,
or Variable in all cases. or Variable in all cases.
""" """
t0 = time.time()
global tensor global tensor
if tensor is None: if tensor is None:
from theano import tensor from theano import tensor
...@@ -483,14 +487,14 @@ def grad(cost, wrt, consider_constant=None, ...@@ -483,14 +487,14 @@ def grad(cost, wrt, consider_constant=None,
if not hasattr(g_var, 'type'): if not hasattr(g_var, 'type'):
raise TypeError('output grads must be theano variables.' raise TypeError('output grads must be theano variables.'
'Ambiguous whether %s should be made into tensor' 'Ambiguous whether %s should be made into tensor'
' or sparse theano variable' % str(type(g_var))) ' or sparse theano variable' % str(type(g_var)))
if (not isinstance(g_var.type, (NullType, DisconnectedType)) and if (not isinstance(g_var.type, (NullType, DisconnectedType)) and
'float' not in str(g_var.type.dtype)): 'float' not in str(g_var.type.dtype)):
raise TypeError("Gradients must always be NullType, " raise TypeError("Gradients must always be NullType, "
"DisconnectedType, or continuous, but grad was " "DisconnectedType, or continuous, but grad was "
"given a known_grad of type "+str(g_var.type)) "given a known_grad of type "+str(g_var.type))
# DO NOT check that these gradients are equal to 0 if var is int # DO NOT check that these gradients are equal to 0 if var is int
# The gradient is allowed to be non-zero on var in that case # The gradient is allowed to be non-zero on var in that case
...@@ -499,12 +503,11 @@ def grad(cost, wrt, consider_constant=None, ...@@ -499,12 +503,11 @@ def grad(cost, wrt, consider_constant=None,
grad_dict[var] = g_var grad_dict[var] = g_var
def handle_disconnected(var): def handle_disconnected(var):
message = ("grad method was asked to compute the gradient " message = ("grad method was asked to compute the gradient "
"with respect to a variable that is not part of " "with respect to a variable that is not part of "
"the computational graph of the cost, or is used " "the computational graph of the cost, or is used "
"only by a non-differentiable operator: %s" % var) "only by a non-differentiable operator: %s" % var)
if disconnected_inputs == 'ignore': if disconnected_inputs == 'ignore':
pass pass
elif disconnected_inputs == 'warn': elif disconnected_inputs == 'warn':
...@@ -513,9 +516,8 @@ def grad(cost, wrt, consider_constant=None, ...@@ -513,9 +516,8 @@ def grad(cost, wrt, consider_constant=None,
raise DisconnectedInputError(message) raise DisconnectedInputError(message)
else: else:
raise ValueError("Invalid value for keyword " raise ValueError("Invalid value for keyword "
"'disconnected_inputs', valid values are " "'disconnected_inputs', valid values are "
"'ignore', 'warn' and 'raise'.") "'ignore', 'warn' and 'raise'.")
# variables that do not influence the cost have zero gradient. # variables that do not influence the cost have zero gradient.
# if wrt is such a variable, populate the grad_dict with this info # if wrt is such a variable, populate the grad_dict with this info
...@@ -540,7 +542,7 @@ def grad(cost, wrt, consider_constant=None, ...@@ -540,7 +542,7 @@ def grad(cost, wrt, consider_constant=None,
assert g.type.dtype in tensor.float_dtypes assert g.type.dtype in tensor.float_dtypes
rval = _populate_grad_dict(var_to_app_to_idx, rval = _populate_grad_dict(var_to_app_to_idx,
grad_dict, wrt, cost_name) grad_dict, wrt, cost_name)
for i in xrange(len(rval)): for i in xrange(len(rval)):
if isinstance(rval[i].type, DisconnectedType): if isinstance(rval[i].type, DisconnectedType):
...@@ -556,8 +558,12 @@ def grad(cost, wrt, consider_constant=None, ...@@ -556,8 +558,12 @@ def grad(cost, wrt, consider_constant=None,
rval = tuple(rval) rval = tuple(rval)
elif not using_list: elif not using_list:
rval, = rval rval, = rval
t1 = time.time()
global grad_time
grad_time += t1 - t0
return rval return rval
def subgraph_grad(wrt, end, start=None, cost=None, details=False): def subgraph_grad(wrt, end, start=None, cost=None, details=False):
''' '''
With respect to `wrt`, computes gradients of cost and/or from With respect to `wrt`, computes gradients of cost and/or from
...@@ -565,7 +571,7 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False): ...@@ -565,7 +571,7 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
symbolic digraph. In other words, computes gradients for a symbolic digraph. In other words, computes gradients for a
subgraph of the symbolic theano function. Ignores all disconnected subgraph of the symbolic theano function. Ignores all disconnected
inputs. inputs.
This can be useful when one needs to perform the gradient descent This can be useful when one needs to perform the gradient descent
iteratively (e.g. one layer at a time in an MLP), or when a iteratively (e.g. one layer at a time in an MLP), or when a
particular operation is not differentiable in theano particular operation is not differentiable in theano
...@@ -580,7 +586,7 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False): ...@@ -580,7 +586,7 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
process, to variables, could then be fed into another process, to variables, could then be fed into another
subgraph_grad as `start` with any other `cost` (e.g. weight subgraph_grad as `start` with any other `cost` (e.g. weight
decay). decay).
In an MLP, we could use subgraph_grad to iteratively backpropagate: In an MLP, we could use subgraph_grad to iteratively backpropagate:
.. code-block:: python .. code-block:: python
...@@ -611,13 +617,13 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False): ...@@ -611,13 +617,13 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
:type wrt: list of variables :type wrt: list of variables
:param wrt: :param wrt:
Gradients are computed with respect to `wrt`. Gradients are computed with respect to `wrt`.
:type end: list of variables :type end: list of variables
:param end: :param end:
Theano variables at which to end gradient descent (they are Theano variables at which to end gradient descent (they are
considered constant in theano.grad). For convenience, the considered constant in theano.grad). For convenience, the
gradients with respect to these variables are also returned. gradients with respect to these variables are also returned.
:type start: dictionary of variables :type start: dictionary of variables
:param start: :param start:
If not None, a dictionary mapping variables to their If not None, a dictionary mapping variables to their
...@@ -625,9 +631,9 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False): ...@@ -625,9 +631,9 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
are known. These are used to compute the gradients backwards up are known. These are used to compute the gradients backwards up
to the variables in `end` (they are used as known_grad in to the variables in `end` (they are used as known_grad in
theano.grad). theano.grad).
:type cost: scalar (0-dimensional) variable :type cost: scalar (0-dimensional) variable
:param cost: :param cost:
Additional costs for which to compute the gradients. For Additional costs for which to compute the gradients. For
example, these could be weight decay, an l1 constraint, MSE, example, these could be weight decay, an l1 constraint, MSE,
NLL, etc. May optionally be None if start is provided. Warning NLL, etc. May optionally be None if start is provided. Warning
...@@ -647,10 +653,10 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False): ...@@ -647,10 +653,10 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
When True, additionally returns the list of gradients from When True, additionally returns the list of gradients from
`start` and of `cost`, respectively, with respect to `wrt` (not `start` and of `cost`, respectively, with respect to `wrt` (not
`end`). `end`).
:rtype: Tuple of 2 or 4 Lists of Variables :rtype: Tuple of 2 or 4 Lists of Variables
:return: Returns lists of gradients with respect to `wrt` and `end`, :return: Returns lists of gradients with respect to `wrt` and `end`,
respectively. respectively.
.. versionadded:: 0.6.1 .. versionadded:: 0.6.1
...@@ -660,20 +666,20 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False): ...@@ -660,20 +666,20 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
assert isinstance(wrt, list) assert isinstance(wrt, list)
if start is not None: if start is not None:
assert isinstance(start, dict) assert isinstance(start, dict)
params = list(set(wrt + end)) params = list(set(wrt + end))
start_grads = None start_grads = None
cost_grads = None cost_grads = None
if start is not None: if start is not None:
start_grads = list( start_grads = list(
theano.grad( theano.grad(
cost=None, wrt=params, known_grads=start, cost=None, wrt=params, known_grads=start,
consider_constant=end, consider_constant=end,
disconnected_inputs='ignore' disconnected_inputs='ignore'
) )
) )
if cost is not None: if cost is not None:
cost_grads = list( cost_grads = list(
theano.grad( theano.grad(
...@@ -682,7 +688,7 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False): ...@@ -682,7 +688,7 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
disconnected_inputs='ignore' disconnected_inputs='ignore'
) )
) )
grads = None grads = None
if start is None: if start is None:
grads = cost_grads grads = cost_grads
...@@ -691,18 +697,18 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False): ...@@ -691,18 +697,18 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
if cost_grads is not None: if cost_grads is not None:
for i in range(len(grads)): for i in range(len(grads)):
grads[i] += cost_grads[i] grads[i] += cost_grads[i]
pgrads = OrderedDict(zip(params, grads)) pgrads = OrderedDict(zip(params, grads))
# separate wrt from end grads: # separate wrt from end grads:
wrt_grads = list(pgrads[k] for k in wrt) wrt_grads = list(pgrads[k] for k in wrt)
end_grads = list(pgrads[k] for k in end) end_grads = list(pgrads[k] for k in end)
if details: if details:
return wrt_grads, end_grads, start_grads, cost_grads return wrt_grads, end_grads, start_grads, cost_grads
return wrt_grads, end_grads return wrt_grads, end_grads
def _node_to_pattern(node): def _node_to_pattern(node):
""" given an apply node, obtain its connection pattern """ given an apply node, obtain its connection pattern
this is just a wrapper around Op.connection_pattern this is just a wrapper around Op.connection_pattern
...@@ -714,30 +720,31 @@ def _node_to_pattern(node): ...@@ -714,30 +720,31 @@ def _node_to_pattern(node):
connection_pattern = node.op.connection_pattern(node) connection_pattern = node.op.connection_pattern(node)
if not isinstance(connection_pattern, list): if not isinstance(connection_pattern, list):
raise TypeError("Op.connection_pattern should return " + \ raise TypeError(
("list of list of bool, but for Op=%s" % node.op) +\ "Op.connection_pattern should return " +
"got %s with type %s." % (connection_pattern, ("list of list of bool, but for Op=%s" % node.op) +
type(connection_pattern))) "got %s with type %s." % (connection_pattern,
type(connection_pattern)))
if len(connection_pattern) != len(node.inputs): if len(connection_pattern) != len(node.inputs):
raise ValueError('%s.connection_pattern should have %d' % raise ValueError(
(node.op, len(node.inputs)) + ' rows but has %d.' % '%s.connection_pattern should have %d' %
len(connection_pattern)) (node.op, len(node.inputs)) + ' rows but has %d.' %
len(connection_pattern))
for ii, output_pattern in enumerate(connection_pattern): for ii, output_pattern in enumerate(connection_pattern):
if not isinstance(output_pattern, list): if not isinstance(output_pattern, list):
raise TypeError('%s.connection_pattern should return' % raise TypeError(
node.op + ' a list of lists, but element %d' % ii\ '%s.connection_pattern should return' %
+ 'is %s of type %s.' % (output_pattern, node.op + ' a list of lists, but element %d' % ii
type(output_pattern))) + 'is %s of type %s.' % (output_pattern,
type(output_pattern)))
else: else:
connection_pattern = \ connection_pattern = [[True for output in node.outputs]
[[True for output in node.outputs] for ipt in node.inputs]
for ipt in node.inputs]
assert isinstance(connection_pattern, list) assert isinstance(connection_pattern, list)
assert len(connection_pattern) == len(node.inputs) assert len(connection_pattern) == len(node.inputs)
for ii in xrange(len(node.inputs)): for ii in xrange(len(node.inputs)):
assert isinstance(connection_pattern[ii], list) assert isinstance(connection_pattern[ii], list)
assert len(connection_pattern[ii]) == \ assert len(connection_pattern[ii]) == len(node.outputs)
len(node.outputs)
return connection_pattern return connection_pattern
...@@ -792,7 +799,7 @@ def _populate_var_to_app_to_idx(outputs, wrt, consider_constant): ...@@ -792,7 +799,7 @@ def _populate_var_to_app_to_idx(outputs, wrt, consider_constant):
iter(consider_constant) iter(consider_constant)
except TypeError: except TypeError:
raise TypeError('consider_constant must be an iterable collection,' raise TypeError('consider_constant must be an iterable collection,'
' got ' + str(type(consider_constant))) ' got ' + str(type(consider_constant)))
for elem in consider_constant: for elem in consider_constant:
if not isinstance(elem, gof.Variable): if not isinstance(elem, gof.Variable):
raise TypeError('Elements of consider_constant must be ' raise TypeError('Elements of consider_constant must be '
...@@ -951,26 +958,27 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -951,26 +958,27 @@ def _populate_grad_dict(var_to_app_to_idx,
# list of bools indicating if each input is connected to the cost # list of bools indicating if each input is connected to the cost
inputs_connected = [ inputs_connected = [
(True in [input_to_output and output_to_cost for (True in [input_to_output and output_to_cost for
input_to_output, output_to_cost in input_to_output, output_to_cost in
zip(input_to_outputs, outputs_connected)]) for zip(input_to_outputs, outputs_connected)]) for
input_to_outputs in connection_pattern input_to_outputs in connection_pattern
] ]
#List of bools indicating if each output is an integer dtype #List of bools indicating if each output is an integer dtype
output_is_int = [hasattr(output.type, 'dtype') and output_is_int = [hasattr(output.type, 'dtype') and
output.type.dtype in theano.tensor.discrete_dtypes output.type.dtype in theano.tensor.discrete_dtypes
for output in node.outputs] for output in node.outputs]
#List of bools indicating if each output is NullType #List of bools indicating if each output is NullType
ograd_is_nan = [isinstance(output.type, NullType) ograd_is_nan = [isinstance(output.type, NullType)
for output in output_grads] for output in output_grads]
# List of bools indicating if each input only has NullType outputs # List of bools indicating if each input only has NullType outputs
only_connected_to_nan = [(True not in only_connected_to_nan = [
[in_to_out and out_to_cost and not out_nan (True not in
for in_to_out, out_to_cost, out_nan in [in_to_out and out_to_cost and not out_nan
zip(in_to_outs, outputs_connected, ograd_is_nan)]) for in_to_out, out_to_cost, out_nan in
zip(in_to_outs, outputs_connected, ograd_is_nan)])
for in_to_outs in connection_pattern] for in_to_outs in connection_pattern]
if True not in inputs_connected: if True not in inputs_connected:
...@@ -1013,8 +1021,6 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -1013,8 +1021,6 @@ def _populate_grad_dict(var_to_app_to_idx,
inputs = [try_to_copy_if_needed(ipt) for ipt in inputs] inputs = [try_to_copy_if_needed(ipt) for ipt in inputs]
# Build a list of output gradients with the same dtype as # Build a list of output gradients with the same dtype as
# the corresponding output variable. # the corresponding output variable.
# If an output is of a float dtype, we want to cast the # If an output is of a float dtype, we want to cast the
...@@ -1108,10 +1114,11 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -1108,10 +1114,11 @@ def _populate_grad_dict(var_to_app_to_idx,
# Do type checking on the result # Do type checking on the result
# List of bools indicating if each input only has integer outputs # List of bools indicating if each input only has integer outputs
only_connected_to_int = [(True not in only_connected_to_int = [
[in_to_out and out_to_cost and not out_int (True not in
for in_to_out, out_to_cost, out_int in [in_to_out and out_to_cost and not out_int
zip(in_to_outs, outputs_connected, output_is_int)]) for in_to_out, out_to_cost, out_int in
zip(in_to_outs, outputs_connected, output_is_int)])
for in_to_outs in connection_pattern] for in_to_outs in connection_pattern]
for i, term in enumerate(input_grads): for i, term in enumerate(input_grads):
...@@ -1122,13 +1129,14 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -1122,13 +1129,14 @@ def _populate_grad_dict(var_to_app_to_idx,
# used to mean undefined, zero, or disconnected. # used to mean undefined, zero, or disconnected.
# We therefore don't allow it because its usage has become # We therefore don't allow it because its usage has become
# so muddied. # so muddied.
raise TypeError(('%s.grad returned None for' + raise TypeError(
' a gradient term, ' ('%s.grad returned None for' +
'this is prohibited. Instead of None,' ' a gradient term, '
'return zeros_like(input), disconnected_type(),' 'this is prohibited. Instead of None,'
' or a NullType variable such as those made with ' 'return zeros_like(input), disconnected_type(),'
'the grad_undefined or grad_unimplemented helper ' ' or a NullType variable such as those made with '
'functions.') % node.op) 'the grad_undefined or grad_unimplemented helper '
'functions.') % node.op)
# Check that the gradient term for this input has the right shape # Check that the gradient term for this input has the right shape
if hasattr(term, 'shape'): if hasattr(term, 'shape'):
...@@ -1137,18 +1145,18 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -1137,18 +1145,18 @@ def _populate_grad_dict(var_to_app_to_idx,
i_shape = orig_ipt_v.shape i_shape = orig_ipt_v.shape
t_shape = term_v.shape t_shape = term_v.shape
if i_shape != t_shape: if i_shape != t_shape:
raise ValueError("%s.grad returned object of " raise ValueError(
"shape %s as gradient term on input %d " "%s.grad returned object of "
"of shape %s" % (node.op, t_shape, i, "shape %s as gradient term on input %d "
i_shape)) "of shape %s" % (node.op, t_shape, i, i_shape))
if not isinstance(term.type, if not isinstance(term.type,
(NullType, DisconnectedType)): (NullType, DisconnectedType)):
if term.type.dtype not in theano.tensor.float_dtypes: if term.type.dtype not in theano.tensor.float_dtypes:
raise TypeError(str(node.op) + '.grad illegally ' raise TypeError(str(node.op) + '.grad illegally '
' returned an integer-valued variable.' ' returned an integer-valued variable.'
' (Input index %d, dtype %s)' % (i, ' (Input index %d, dtype %s)' % (
term.type.dtype)) i, term.type.dtype))
if only_connected_to_nan[i]: if only_connected_to_nan[i]:
assert isinstance(term.type, NullType) assert isinstance(term.type, NullType)
...@@ -1233,23 +1241,25 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -1233,23 +1241,25 @@ def _populate_grad_dict(var_to_app_to_idx,
term = access_term_cache(node)[idx] term = access_term_cache(node)[idx]
if not isinstance(term, gof.Variable): if not isinstance(term, gof.Variable):
raise TypeError("%s.grad returned %s, expected" raise TypeError(
" Variable instance." % (str(node.op), "%s.grad returned %s, expected"
type(term))) " Variable instance." % (str(node.op),
type(term)))
if isinstance(term.type, NullType): if isinstance(term.type, NullType):
raise NullTypeGradError("tensor.grad " raise NullTypeGradError("tensor.grad "
"encountered a NaN. " + "encountered a NaN. " +
term.type.why_null) term.type.why_null)
#Don't try to sum up DisconnectedType placeholders #Don't try to sum up DisconnectedType placeholders
if isinstance(term.type, DisconnectedType): if isinstance(term.type, DisconnectedType):
continue continue
if hasattr(var, 'ndim') and term.ndim != var.ndim: if hasattr(var, 'ndim') and term.ndim != var.ndim:
raise ValueError(("%s.grad returned a term with" raise ValueError(
" %d dimensions, but %d are required.") % ( ("%s.grad returned a term with"
str(node.op), term.ndim, var.ndim)) " %d dimensions, but %d are required.") % (
str(node.op), term.ndim, var.ndim))
terms.append(term) terms.append(term)
...@@ -1561,12 +1571,13 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, ...@@ -1561,12 +1571,13 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
for i, p in enumerate(pt): for i, p in enumerate(pt):
if p.dtype not in ('float32', 'float64'): if p.dtype not in ('float32', 'float64'):
raise TypeError(('verify_grad can work only with floating point ' raise TypeError(
'inputs, but input %i has dtype "%s".') % (i, p.dtype)) ('verify_grad can work only with floating point '
'inputs, but input %i has dtype "%s".') % (i, p.dtype))
_type_tol = dict( # relative error tolerances for different types _type_tol = dict( # relative error tolerances for different types
float32=1e-2, float32=1e-2,
float64=1e-4) float64=1e-4)
if abs_tol is None: if abs_tol is None:
abs_tol = __builtin__.max(_type_tol[str(p.dtype)] for p in pt) abs_tol = __builtin__.max(_type_tol[str(p.dtype)] for p in pt)
...@@ -1593,7 +1604,8 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, ...@@ -1593,7 +1604,8 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
on_unused_input='ignore') on_unused_input='ignore')
return f return f
tensor_pt = [TensorType( tensor_pt = [
TensorType(
as_tensor_variable(p).dtype, as_tensor_variable(p).dtype,
as_tensor_variable(p).broadcastable)(name='input %i' % i) as_tensor_variable(p).broadcastable)(name='input %i' % i)
for i, p in enumerate(pt)] for i, p in enumerate(pt)]
...@@ -1612,9 +1624,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, ...@@ -1612,9 +1624,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
o_fn_out = o_fn(*[p.copy() for p in pt]) o_fn_out = o_fn(*[p.copy() for p in pt])
if isinstance(o_fn_out, tuple) or isinstance(o_fn_out, list): if isinstance(o_fn_out, tuple) or isinstance(o_fn_out, list):
raise TypeError('It seems like you are trying to use verify_grad ' raise TypeError(
'on an op or a function which outputs a list: there should' 'It seems like you are trying to use verify_grad '
' be a single (array-like) output instead') 'on an op or a function which outputs a list: there should'
' be a single (array-like) output instead')
# random_projection should not have elements too small, # random_projection should not have elements too small,
# otherwise too much precision is lost in numerical gradient # otherwise too much precision is lost in numerical gradient
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论