提交 47ab45df authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Convert some docstrings to numpydoc and improve rendering

上级 03970991
...@@ -44,6 +44,7 @@ grad_time = 0 ...@@ -44,6 +44,7 @@ grad_time = 0
def format_as(use_list, use_tuple, outputs): def format_as(use_list, use_tuple, outputs):
""" """
Formats the outputs according to the flags `use_list` and `use_tuple`. Formats the outputs according to the flags `use_list` and `use_tuple`.
If `use_list` is True, `outputs` is returned as a list (if `outputs` If `use_list` is True, `outputs` is returned as a list (if `outputs`
is not a list or a tuple then it is converted in a one element list). is not a list or a tuple then it is converted in a one element list).
If `use_tuple` is True, `outputs` is returned as a tuple (if `outputs` If `use_tuple` is True, `outputs` is returned as a tuple (if `outputs`
...@@ -163,20 +164,23 @@ disconnected_type = DisconnectedType() ...@@ -163,20 +164,23 @@ disconnected_type = DisconnectedType()
def Rop(f, wrt, eval_points, disconnected_outputs="raise", def Rop(f, wrt, eval_points, disconnected_outputs="raise",
return_disconnected="zero"): return_disconnected="zero"):
""" """
Computes the R operation on `f` wrt to `wrt` evaluated at points given Computes the R operation on `f` wrt to `wrt` at `eval_points`.
in `eval_points`. Mathematically this stands for the jacobian of `f` wrt
Mathematically this stands for the jacobian of `f` wrt
to `wrt` right muliplied by the eval points. to `wrt` right muliplied by the eval points.
:type f: Variable or list of Variables Parameters
----------
f: :class:`~theano.gof.graph.Variable` or list of Variables
`f` stands for the output of the computational graph to which you `f` stands for the output of the computational graph to which you
want to apply the R operator want to apply the R operator
:type wrt: Variable or list of `Variables`s wrt: :class:`~theano.gof.graph.Variable` or list of Variables
variables for which you compute the R operator of the expression variables for which you compute the R operator of the expression
described by `f` described by `f`
:type eval_points: Variable or list of Variables eval_points: :class:`~theano.gof.graph.Variable` or list of Variables
evalutation points for each of the variables in `wrt` evalutation points for each of the variables in `wrt`
:type disconnected_outputs: str disconnected_outputs: str
Defines the behaviour if some of the variables in `f` are Defines the behaviour if some of the variables in `f`
have no dependency on any of the variable in `wrt` (or if have no dependency on any of the variable in `wrt` (or if
all links are non-differentiable). The possible values are: all links are non-differentiable). The possible values are:
...@@ -184,16 +188,18 @@ def Rop(f, wrt, eval_points, disconnected_outputs="raise", ...@@ -184,16 +188,18 @@ def Rop(f, wrt, eval_points, disconnected_outputs="raise",
- 'warn': consider the gradient zero, and print a warning. - 'warn': consider the gradient zero, and print a warning.
- 'raise': raise DisconnectedInputError. - 'raise': raise DisconnectedInputError.
:type return_disconnected : {'zero', 'None', 'Disconnected'} return_disconnected : {'zero', 'None', 'Disconnected'}
- 'zero' : If wrt[i] is disconnected, return value i will be - 'zero' : If wrt[i] is disconnected, return value i will be
wrt[i].zeros_like() wrt[i].zeros_like()
- 'None' : If wrt[i] is disconnected, return value i will be - 'None' : If wrt[i] is disconnected, return value i will be
None None
- 'Disconnected' : returns variables of type DisconnectedType - 'Disconnected' : returns variables of type DisconnectedType
:rtype: :class:`~theano.gof.Variable` or list/tuple of Variables depending on type of f Returns
:return: symbolic expression such that -------
R_op[i] = sum_j ( d f[i] / d wrt[j]) eval_point[j] :class:`~theano.gof.graph.Variable` or list/tuple of Variables depending on type of f
Symbolic expression such that
R_op[i] = sum_j (d f[i] / d wrt[j]) eval_point[j]
where the indices in that expression are magic multidimensional where the indices in that expression are magic multidimensional
indices that specify both the position within a list and all indices that specify both the position within a list and all
coordinates of the tensor element in the last. coordinates of the tensor element in the last.
...@@ -349,22 +355,27 @@ def Rop(f, wrt, eval_points, disconnected_outputs="raise", ...@@ -349,22 +355,27 @@ def Rop(f, wrt, eval_points, disconnected_outputs="raise",
def Lop(f, wrt, eval_points, consider_constant=None, def Lop(f, wrt, eval_points, consider_constant=None,
disconnected_inputs='raise'): disconnected_inputs='raise'):
""" """
Computes the L operation on `f` wrt to `wrt` evaluated at points given Computes the L operation on `f` wrt to `wrt` at `eval_points`.
in `eval_points`. Mathematically this stands for the jacobian of `f` wrt
Mathematically this stands for the jacobian of `f` wrt
to `wrt` left muliplied by the eval points. to `wrt` left muliplied by the eval points.
:type f: Variable or list of Variables Parameters
----------
f: :class:`~theano.gof.graph.Variable` or list of Variables
`f` stands for the output of the computational graph to which you `f` stands for the output of the computational graph to which you
want to apply the L operator want to apply the L operator
:type wrt: Variable or list of `Variables`s wrt: :class:`~theano.gof.graph.Variable` or list of Variables
variables for which you compute the L operator of the expression variables for which you compute the L operator of the expression
described by `f` described by `f`
:type eval_points: Variable or list of Variables eval_points: :class:`~theano.gof.graph.Variable` or list of Variables
evalutation points for each of the variables in `f` evalutation points for each of the variables in `f`
:rtype: :class:`~theano.gof.Variable` or list/tuple of Variables depending on type of f Returns
:return: symbolic expression such that -------
L_op[i] = sum_i ( d f[i] / d wrt[j]) eval_point[i] :class:`~theano.gof.Variable` or list/tuple of Variables depending on type of f
Symbolic expression such that
L_op[i] = sum_i (d f[i] / d wrt[j]) eval_point[i]
where the indices in that expression are magic multidimensional where the indices in that expression are magic multidimensional
indices that specify both the position within a list and all indices that specify both the position within a list and all
coordinates of the tensor element in the last coordinates of the tensor element in the last
...@@ -414,10 +425,10 @@ def grad(cost, wrt, consider_constant=None, ...@@ -414,10 +425,10 @@ def grad(cost, wrt, consider_constant=None,
Parameters Parameters
---------- ----------
cost : :class:`~theano.gof.Variable` scalar (0-dimensional) tensor variable or None cost : :class:`~theano.gof.graph.Variable` scalar (0-dimensional) tensor variable or None
Value with respect to which we are differentiating. May be Value with respect to which we are differentiating. May be
`None` if known_grads is provided. `None` if known_grads is provided.
wrt : :class:`~theano.gof.Variable` or list of Variables wrt : :class:`~theano.gof.graph.Variable` or list of Variables
term[s] for which we want gradients term[s] for which we want gradients
consider_constant : list of variables consider_constant : list of variables
expressions not to backpropagate through expressions not to backpropagate through
...@@ -453,7 +464,7 @@ def grad(cost, wrt, consider_constant=None, ...@@ -453,7 +464,7 @@ def grad(cost, wrt, consider_constant=None,
Returns Returns
------- -------
variable or list/tuple of variables (matches `wrt`) variable or list/tuple of variables (matches `wrt`)
symbolic expression of gradient of `cost` with respect to each Symbolic expression of gradient of `cost` with respect to each
of the `wrt` terms. If an element of `wrt` is not of the `wrt` terms. If an element of `wrt` is not
differentiable with respect to the output, then a zero differentiable with respect to the output, then a zero
variable is returned. variable is returned.
...@@ -670,32 +681,28 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False): ...@@ -670,32 +681,28 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
next_grad = dict(zip(grad_ends[i], next_grad)) next_grad = dict(zip(grad_ends[i], next_grad))
param_grads.extend(param_grad) param_grads.extend(param_grad)
:type wrt: list of variables Parameters
:param wrt: ----------
wrt: list of variables
Gradients are computed with respect to `wrt`. Gradients are computed with respect to `wrt`.
:type end: list of variables end: list of variables
:param end:
Theano variables at which to end gradient descent (they are Theano variables at which to end gradient descent (they are
considered constant in theano.grad). For convenience, the considered constant in theano.grad). For convenience, the
gradients with respect to these variables are also returned. gradients with respect to these variables are also returned.
:type start: dictionary of variables start: dictionary of variables
:param start:
If not None, a dictionary mapping variables to their If not None, a dictionary mapping variables to their
gradients. This is useful when the gradient on some variables gradients. This is useful when the gradient on some variables
are known. These are used to compute the gradients backwards up are known. These are used to compute the gradients backwards up
to the variables in `end` (they are used as known_grad in to the variables in `end` (they are used as known_grad in
theano.grad). theano.grad).
:type cost: :class:`~theano.gof.Variable` scalar (0-dimensional) variable cost: :class:`~theano.gof.Variable` scalar (0-dimensional) variable
:param cost:
Additional costs for which to compute the gradients. For Additional costs for which to compute the gradients. For
example, these could be weight decay, an l1 constraint, MSE, example, these could be weight decay, an l1 constraint, MSE,
NLL, etc. May optionally be None if start is provided. Warning NLL, etc. May optionally be None if start is provided.
: If the gradients of `cost` with respect to any of the `start`
variables is already part of the `start` dictionary, then it may
be counted twice with respect to `wrt` and `end`.
.. warning:: .. warning::
...@@ -703,18 +710,18 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False): ...@@ -703,18 +710,18 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
variables is already part of the `start` dictionary, then it variables is already part of the `start` dictionary, then it
may be counted twice with respect to `wrt` and `end`. may be counted twice with respect to `wrt` and `end`.
details: bool
:type details: bool
:param details:
When True, additionally returns the list of gradients from When True, additionally returns the list of gradients from
`start` and of `cost`, respectively, with respect to `wrt` (not `start` and of `cost`, respectively, with respect to `wrt` (not
`end`). `end`).
:rtype: Tuple of 2 or 4 Lists of Variables Returns
-------
:return: Returns lists of gradients with respect to `wrt` and `end`, Tuple of 2 or 4 Lists of Variables
Returns lists of gradients with respect to `wrt` and `end`,
respectively. respectively.
.. versionadded:: 0.7 .. versionadded:: 0.7
''' '''
assert ((cost is not None) or (start is not None)) assert ((cost is not None) or (start is not None))
...@@ -1813,26 +1820,31 @@ verify_grad.E_grad = GradientError ...@@ -1813,26 +1820,31 @@ verify_grad.E_grad = GradientError
def jacobian(expression, wrt, consider_constant=None, def jacobian(expression, wrt, consider_constant=None,
disconnected_inputs='raise'): disconnected_inputs='raise'):
""" """
:type expression: Vector (1-dimensional) Variable Compute the full Jacobian
:type wrt: Variable or list of Variables
:param consider_constant: a list of expressions not to backpropagate Parameters
through ----------
expression: Vector (1-dimensional) :class:`~theano.gof.graph.Variable`
wrt: :class:`~theano.gof.graph.Variable` or list of Variables
consider_constant:
a list of expressions not to backpropagate through
:type disconnected_inputs: string disconnected_inputs: string
:param disconnected_inputs: Defines the behaviour if some of the variables Defines the behaviour if some of the variables
in ``wrt`` are not part of the computational graph computing ``cost`` in ``wrt`` are not part of the computational graph computing ``cost``
(or if all links are non-differentiable). The possible values are: (or if all links are non-differentiable). The possible values are:
- 'ignore': considers that the gradient on these parameters is zero. - 'ignore': considers that the gradient on these parameters is zero.
- 'warn': consider the gradient zero, and print a warning. - 'warn': consider the gradient zero, and print a warning.
- 'raise': raise an exception. - 'raise': raise an exception.
:return: either a instance of Variable or list/tuple of Variables Returns
(depending upon `wrt`) repesenting the jacobian of `expression` -------
with respect to (elements of) `wrt`. If an element of `wrt` is not :class:`~theano.gof.graph.Variable` or list/tuple of Variables (depending upon `wrt`)
differentiable with respect to the output, then a zero The jacobian of `expression` with respect to (elements of) `wrt`.
variable is returned. The return value is of same type If an element of `wrt` is not differentiable with respect to the
as `wrt`: a list/tuple or TensorVariable in all cases. output, then a zero variable is returned. The return value is
of same type as `wrt`: a list/tuple or TensorVariable in all cases.
""" """
from theano.tensor import arange from theano.tensor import arange
# Check inputs have the right format # Check inputs have the right format
...@@ -1886,27 +1898,29 @@ def jacobian(expression, wrt, consider_constant=None, ...@@ -1886,27 +1898,29 @@ def jacobian(expression, wrt, consider_constant=None,
def hessian(cost, wrt, consider_constant=None, def hessian(cost, wrt, consider_constant=None,
disconnected_inputs='raise'): disconnected_inputs='raise'):
""" """
:type cost: Scalar (0-dimensional) Variable. Parameters
:type wrt: Vector (1-dimensional tensor) 'Variable' or list of ----------
cost: Scalar (0-dimensional) variable.
wrt: Vector (1-dimensional tensor) 'Variable' or list of
vectors (1-dimensional tensors) Variables vectors (1-dimensional tensors) Variables
consider_constant:
:param consider_constant: a list of expressions not to backpropagate a list of expressions not to backpropagate through
through disconnected_inputs: string
Defines the behaviour if some of the variables
:type disconnected_inputs: string
:param disconnected_inputs: Defines the behaviour if some of the variables
in ``wrt`` are not part of the computational graph computing ``cost`` in ``wrt`` are not part of the computational graph computing ``cost``
(or if all links are non-differentiable). The possible values are: (or if all links are non-differentiable). The possible values are:
- 'ignore': considers that the gradient on these parameters is zero. - 'ignore': considers that the gradient on these parameters is zero.
- 'warn': consider the gradient zero, and print a warning. - 'warn': consider the gradient zero, and print a warning.
- 'raise': raise an exception. - 'raise': raise an exception.
:return: either a instance of Variable or list/tuple of Variables Returns
(depending upon `wrt`) repressenting the Hessian of the `cost` -------
with respect to (elements of) `wrt`. If an element of `wrt` is not :class:`~theano.gof.graph.Variable` or list/tuple of Variables
differentiable with respect to the output, then a zero The Hessian of the `cost` with respect to (elements of) `wrt`.
variable is returned. The return value is of same type If an element of `wrt` is not differentiable with respect to the
as `wrt`: a list/tuple or TensorVariable in all cases. output, then a zero variable is returned. The return value is
of same type as `wrt`: a list/tuple or TensorVariable in all cases.
""" """
from theano.tensor import arange from theano.tensor import arange
# Check inputs have the right format # Check inputs have the right format
...@@ -2034,10 +2048,16 @@ def zero_grad(x): ...@@ -2034,10 +2048,16 @@ def zero_grad(x):
through with a value of zero. In other words, the gradient of through with a value of zero. In other words, the gradient of
the expression is truncated to 0. the expression is truncated to 0.
:param x: A Theano expression whose gradient should be truncated. Parameters
----------
x: :class:`~theano.gof.graph.Variable`
A Theano expression whose gradient should be truncated.
:return: The expression is returned unmodified, but its gradient Returns
is now truncated to 0. -------
:class:`~theano.gof.graph.Variable`
An expression equivalent to ``x``, with its gradient
truncated to 0.
""" """
return zero_grad_(x) return zero_grad_(x)
...@@ -2058,18 +2078,24 @@ undefined_grad_ = UndefinedGrad() ...@@ -2058,18 +2078,24 @@ undefined_grad_ = UndefinedGrad()
def undefined_grad(x): def undefined_grad(x):
""" """
Consider the gradient of this variable undefined and Consider the gradient of this variable undefined.
generate an error message if its gradient is taken.
This will generate an error message if its gradient is taken.
The expression itself is unaffected, but when its gradient is The expression itself is unaffected, but when its gradient is
computed, or the gradient of another expression that this computed, or the gradient of another expression that this
expression is a subexpression of, an error message will be generated expression is a subexpression of, an error message will be generated
specifying such gradient is not defined. specifying such gradient is not defined.
:param x: A Theano expression whose gradient should be undefined. Parameters
----------
x: :class:`~theano.gof.graph.Variable`
A Theano expression whose gradient should be undefined.
:return: The expression is returned unmodified, but its gradient Returns
is now undefined. -------
:class:`~theano.gof.graph.Variable`
An expression equivalent to ``x``, with its gradient undefined.
""" """
return undefined_grad_(x) return undefined_grad_(x)
...@@ -2090,8 +2116,9 @@ disconnected_grad_ = DisconnectedGrad() ...@@ -2090,8 +2116,9 @@ disconnected_grad_ = DisconnectedGrad()
def disconnected_grad(x): def disconnected_grad(x):
""" """
Consider an expression constant when computing gradients, Consider an expression constant when computing gradients.
while effectively not backpropagating through it.
It will effectively not backpropagating through it.
The expression itself is unaffected, but when its gradient is The expression itself is unaffected, but when its gradient is
computed, or the gradient of another expression that this computed, or the gradient of another expression that this
...@@ -2101,11 +2128,17 @@ def disconnected_grad(x): ...@@ -2101,11 +2128,17 @@ def disconnected_grad(x):
has to go through the underlying computational graph related to the has to go through the underlying computational graph related to the
expression. expression.
:param x: A Theano expression whose gradient should not be Parameters
----------
x: :class:`~theano.gof.graph.Variable`
A Theano expression whose gradient should not be
backpropagated through. backpropagated through.
:return: The expression is returned unmodified, but its gradient Returns
is now effectively truncated to 0. -------
:class:`~theano.gof.graph.Variable`
An expression equivalent to ``x``, with its gradient
now effectively truncated to 0.
""" """
return disconnected_grad_(x) return disconnected_grad_(x)
...@@ -2133,22 +2166,27 @@ def grad_clip(x, lower_bound, upper_bound): ...@@ -2133,22 +2166,27 @@ def grad_clip(x, lower_bound, upper_bound):
This is an elemwise operation. This is an elemwise operation.
:param x: the variable we want its gradient inputs clipped Parameters
:param lower_bound: The lower bound of the gradient value ----------
:param upper_bound: The upper bound of the gradient value. x:
The variable we want its gradient inputs clipped
:examples: lower_bound:
The lower bound of the gradient value
x = theano.tensor.scalar() upper_bound:
The upper bound of the gradient value.
z = theano.tensor.grad(grad_clip(x, -1, 1)**2, x)
z2 = theano.tensor.grad(x**2, x) Examples
--------
f = theano.function([x], outputs = [z, z2]) >>> x = theano.tensor.scalar()
>>> z = theano.tensor.grad(grad_clip(x, -1, 1)**2, x)
print(f(2.0)) # output (1.0, 4.0) >>> z2 = theano.tensor.grad(x**2, x)
>>> f = theano.function([x], outputs = [z, z2])
:note: We register an opt in tensor/opt.py that remove the GradClip. >>> print(f(2.0))
[array(1.0), array(4.0)]
Note
----
We register an opt in tensor/opt.py that remove the GradClip.
So it have 0 cost in the forward and only do work in the grad. So it have 0 cost in the forward and only do work in the grad.
""" """
...@@ -2167,21 +2205,25 @@ def grad_scale(x, multiplier): ...@@ -2167,21 +2205,25 @@ def grad_scale(x, multiplier):
""" """
This op scale or inverse the gradient in the backpropagation. This op scale or inverse the gradient in the backpropagation.
:param x: the variable we want its gradient inputs scale Parameters
:param multiplier: scale of the gradient ----------
x:
:examples: The variable we want its gradient inputs scale
multiplier:
x = theano.tensor.fscalar() Scale of the gradient
fx = theano.tensor.sin(x)
Examples
fp = theano.tensor.grad(fx, wrt=x) --------
fprime = theano.function([x], fp) >>> x = theano.tensor.fscalar()
print(fprime(2))#-0.416 >>> fx = theano.tensor.sin(x)
>>> fp = theano.tensor.grad(fx, wrt=x)
f_inverse=grad_scale(fx,-1.) >>> fprime = theano.function([x], fp)
fpp = theano.tensor.grad(f_inverse, wrt=x) >>> print(fprime(2)) # doctest: +ELLIPSIS
fpprime = theano.function([x], fpp) -0.416...
print(fpprime(2))#0.416 >>> f_inverse=grad_scale(fx, -1.)
>>> fpp = theano.tensor.grad(f_inverse, wrt=x)
>>> fpprime = theano.function([x], fpp)
>>> print(fpprime(2)) # doctest: +ELLIPSIS
0.416...
""" """
return GradScale(multiplier)(x) return GradScale(multiplier)(x)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论