提交 6304a061 authored 作者: abergeron's avatar abergeron

Merge pull request #3293 from harlouci/numpydoc_tensor

Numpydoc tensor
"""A `Type` and `Op` classes to work with numpy.ndarrays symbolically.""" """A `Type` and `Op` classes to work with numpy.ndarrays symbolically."""
import sys import sys
import warnings import warnings
...@@ -63,8 +62,10 @@ class ShapeError(Exception): ...@@ -63,8 +62,10 @@ class ShapeError(Exception):
def check_equal_numpy(x, y): def check_equal_numpy(x, y):
""" """
Returns True iff x and y are equal (checks the dtype and Return True iff x and y are equal.
shape if x and y are numpy.ndarray instances).
Checks the dtype and shape if x and y are numpy.ndarray instances.
""" """
if isinstance(x, numpy.ndarray) and isinstance(y, numpy.ndarray): if isinstance(x, numpy.ndarray) and isinstance(y, numpy.ndarray):
return (x.dtype == y.dtype and x.shape == y.shape and return (x.dtype == y.dtype and x.shape == y.shape and
...@@ -89,6 +90,7 @@ def constructor(f): ...@@ -89,6 +90,7 @@ def constructor(f):
Make `f` appear as a constructor in the oplist (`gen_oplist`, Make `f` appear as a constructor in the oplist (`gen_oplist`,
doc/oplist.txt). doc/oplist.txt).
""" """
__oplist_constructor_list.append(f) __oplist_constructor_list.append(f)
return f return f
...@@ -107,8 +109,7 @@ if 0: ...@@ -107,8 +109,7 @@ if 0:
# - JB 20100226 # - JB 20100226
def as_cuda_or_tensor_variable(x, name=None, ndim=None): def as_cuda_or_tensor_variable(x, name=None, ndim=None):
""" """
This function do the same as_tensor_variable, but don't Do the same as_tensor_variable, but do not transfer the value on the gpu.
transfert the value on the gpu
""" """
if hasattr(x, '_as_CudaNdarrayVariable'): if hasattr(x, '_as_CudaNdarrayVariable'):
# TODO: pass name and ndim arguments # TODO: pass name and ndim arguments
...@@ -117,29 +118,31 @@ if 0: ...@@ -117,29 +118,31 @@ if 0:
def as_tensor_variable(x, name=None, ndim=None): def as_tensor_variable(x, name=None, ndim=None):
"""Return `x`, transformed into a `TensorType` """Return `x`, transformed into a `TensorType`.
This function is often used by `make_node` methods of `Op` This function is often used by `make_node` methods of `Op` subclasses
subclasses to turn ndarrays, numbers, `Scalar` instances, `Apply` to turn ndarrays, numbers, `Scalar` instances, `Apply` instances and
instances and `TensorType` instances into valid input list `TensorType` instances into valid input list elements.
elements.
:Parameters: Parameters
- `x`: Apply instance, Variable instance, numpy.ndarray, or number ----------
x : Apply instance, Variable instance, numpy.ndarray, or number
This thing will be transformed into a `Variable` in a sensible way. An This thing will be transformed into a `Variable` in a sensible way. An
ndarray argument will not be copied, but a list of numbers will be ndarray argument will not be copied, but a list of numbers will be
copied to make an ndarray. copied to make an ndarray.
- `name`: str or None name : str or None
If a new `Variable` instance is created, it will be named with this If a new `Variable` instance is created, it will be named with this
string. string.
- `ndim`: None or integer ndim : None or integer
Return a Variable with this many dimensions. Raise TypeError if it's Return a Variable with this many dimensions. Raise TypeError if it's
not possible. not possible.
:Exceptions: Raises
- `ValueError`: raised if an `Apply` with more then one output is fetched ------
- `AsTensorError`: raised if `x` cannot be converted to a TensorType ValueError
Variable If an `Apply` with more than one output is fetched.
AsTensorError
If `x` cannot be converted to a TensorType Variable.
""" """
if hasattr(x, '_as_TensorVariable'): if hasattr(x, '_as_TensorVariable'):
...@@ -231,16 +234,18 @@ class NumpyAutocaster(object): ...@@ -231,16 +234,18 @@ class NumpyAutocaster(object):
float32); float32);
- if no data type can represent `x` without loss of precision, then - if no data type can represent `x` without loss of precision, then
the last data type in the tuple will be used. the last data type in the tuple will be used.
"""
def __init__(self, dtypes):
"""
Constructor.
:type dtypes: Tuple of strings.
:param dtypes: The ordered list of preferred data types (only used when Parameters
`config.cast_policy` is set to 'custom', see the `NumpyAutocaster` help ----------
for details). dtypes: tuple of strings
The ordered list of preferred data types (only used when
`config.cast_policy` is set to 'custom', see the `NumpyAutocaster`
help for details).
""" """
def __init__(self, dtypes):
self.dtypes = tuple(dtypes) self.dtypes = tuple(dtypes)
def __call__(self, x): def __call__(self, x):
...@@ -312,17 +317,20 @@ autocast_float = NumpyAutocaster(('float16', 'float32', 'float64')) ...@@ -312,17 +317,20 @@ autocast_float = NumpyAutocaster(('float16', 'float32', 'float64'))
# #
class autocast_float_as(object): class autocast_float_as(object):
""" """
Temporarily adjust autocasting behavior.
This class makes it possible to temporarily and locally adjust autocasting This class makes it possible to temporarily and locally adjust autocasting
behavior when `config.cast_policy` is set to 'custom'. behavior when `config.cast_policy` is set to 'custom'.
If `config.cast_policy` is not 'custom', an exception is raised. If `config.cast_policy` is not 'custom', an exception is raised.
This class might be convenient in some code, but it definitely
helps to test the autocasting mechanism.
For example: Examples
--------
>>> with autocast_float_as('float32'): >>> with autocast_float_as('float32'):
... assert (fvector() + 1.1).dtype == 'float32' # temporary downcasting ... assert (fvector() + 1.1).dtype == 'float32' # temporary downcasting
>>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour >>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour
This class might be convenient in some code, but it definitely
helps to test the autocasting mechanism.
""" """
def __init__(self, *dtypes): def __init__(self, *dtypes):
self.dtypes = dtypes self.dtypes = dtypes
...@@ -339,11 +347,14 @@ class autocast_float_as(object): ...@@ -339,11 +347,14 @@ class autocast_float_as(object):
def constant_or_value(x, rtype, name=None, ndim=None, dtype=None): def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
"""Return a symbolic `Constant` with value `x` """Return a symbolic `Constant` with value `x`.
:Exceptions: Raises
- `TypeError`: `x` could not be converted to a numpy.ndarray ------
- `ValueError`: `x` could not be expanded to have ndim dimensions TypeError
`x` could not be converted to a numpy.ndarray.
ValueError
`x` could not be expanded to have ndim dimensions.
""" """
if dtype is not None: if dtype is not None:
...@@ -507,8 +518,13 @@ class EmptyConstantError(NotScalarConstantError): ...@@ -507,8 +518,13 @@ class EmptyConstantError(NotScalarConstantError):
def numpy_scalar(data): def numpy_scalar(data):
""" Return a scalar stored in a numpy ndarray, or raise """ Return a scalar stored in a numpy ndarray.
NotScalarConstantError if the numpy ndarray is not a scalar
Raises
------
NotScalarConstantError
If the numpy ndarray is not a scalar.
""" """
# handle case where data is numpy.array([]) # handle case where data is numpy.array([])
...@@ -536,26 +552,29 @@ get_scalar_constant_value_elemwises = ( ...@@ -536,26 +552,29 @@ get_scalar_constant_value_elemwises = (
def get_scalar_constant_value(orig_v, elemwise=True, def get_scalar_constant_value(orig_v, elemwise=True,
only_process_constants=False): only_process_constants=False):
"""return the constant scalar(0-D) value underlying variable `v` """Return the constant scalar(0-D) value underlying variable `v`.
If v is the output of dimshuffles, fills, allocs, rebroadcasts, If `v` is the output of dimshuffles, fills, allocs, rebroadcasts,
cast, OutputGuard, DeepCopyOp, ScalarFromTensor, ScalarOp, cast, OutputGuard, DeepCopyOp, ScalarFromTensor, ScalarOp, Elemwise
Elemwise and some pattern with Subtensor, and some pattern with Subtensor, this function digs through them.
this function digs through them.
If `v` is not some view of constant scalar data, then raise a If `v` is not some view of constant scalar data, then raise a
NotScalarConstantError. NotScalarConstantError.
:param elemwise: If False, we won't try to go into elemwise. Parameters
So this call is faster. ----------
elemwise : bool
If False, we won't try to go into elemwise. So this call is faster.
only_process_constants : bool
If True, we only attempt to obtain the value of `orig_v` if it's
directly constant and don't try to dig through dimshuffles, fills,
allocs, and other to figure out its value.
:param only_process_constants: If True, we only attempt to obtain Notes
the value of `orig_v` if it's directly constant and don't -----
try to dig through dimshuffles, fills, allocs, and other to figure There may be another function similar to this one in the code,
out its value. but I'm not sure where it is.
:note: There may be another function similar to this one in the
code, but I'm not sure where it is.
""" """
v = orig_v v = orig_v
while True: while True:
...@@ -773,8 +792,14 @@ lscalar = TensorType('int64', ()) ...@@ -773,8 +792,14 @@ lscalar = TensorType('int64', ())
def scalar(name=None, dtype=None): def scalar(name=None, dtype=None):
"""Return a symbolic scalar variable. """Return a symbolic scalar variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable Parameters
----------
dtype: numeric
None means to use theano.config.floatX.
name
A name to attach to this variable.
""" """
if dtype is None: if dtype is None:
dtype = config.floatX dtype = config.floatX
...@@ -803,8 +828,14 @@ lvector = TensorType('int64', (False, )) ...@@ -803,8 +828,14 @@ lvector = TensorType('int64', (False, ))
def vector(name=None, dtype=None): def vector(name=None, dtype=None):
"""Return a symbolic vector variable. """Return a symbolic vector variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable Parameters
----------
dtype: numeric
None means to use theano.config.floatX.
name
A name to attach to this variable
""" """
if dtype is None: if dtype is None:
dtype = config.floatX dtype = config.floatX
...@@ -830,8 +861,14 @@ lmatrix = TensorType('int64', (False, False)) ...@@ -830,8 +861,14 @@ lmatrix = TensorType('int64', (False, False))
def matrix(name=None, dtype=None): def matrix(name=None, dtype=None):
"""Return a symbolic matrix variable. """Return a symbolic matrix variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable Parameters
----------
dtype: numeric
None means to use theano.config.floatX.
name
A name to attach to this variable.
""" """
if dtype is None: if dtype is None:
dtype = config.floatX dtype = config.floatX
...@@ -857,8 +894,14 @@ lrow = TensorType('int64', (True, False)) ...@@ -857,8 +894,14 @@ lrow = TensorType('int64', (True, False))
def row(name=None, dtype=None): def row(name=None, dtype=None):
"""Return a symbolic row variable (ndim=2, broadcastable=[True,False]). """Return a symbolic row variable (ndim=2, broadcastable=[True,False]).
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable Parameters
----------
dtype: numeric type
None means to use theano.config.floatX.
name
A name to attach to this variable.
""" """
if dtype is None: if dtype is None:
dtype = config.floatX dtype = config.floatX
...@@ -878,8 +921,14 @@ lcol = TensorType('int64', (False, True)) ...@@ -878,8 +921,14 @@ lcol = TensorType('int64', (False, True))
def col(name=None, dtype=None): def col(name=None, dtype=None):
"""Return a symbolic column variable (ndim=2, broadcastable=[False,True]). """Return a symbolic column variable (ndim=2, broadcastable=[False,True]).
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable Parameters
----------
dtype : numeric
None means to use theano.config.floatX.
name
A name to attach to this variable.
""" """
if dtype is None: if dtype is None:
dtype = config.floatX dtype = config.floatX
...@@ -899,8 +948,14 @@ ltensor3 = TensorType('int64', ((False,) * 3)) ...@@ -899,8 +948,14 @@ ltensor3 = TensorType('int64', ((False,) * 3))
def tensor3(name=None, dtype=None): def tensor3(name=None, dtype=None):
"""Return a symbolic 3-D variable. """Return a symbolic 3-D variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable Parameters
----------
dtype: numeric type
None means to use theano.config.floatX.
name
A name to attach to this variable.
""" """
if dtype is None: if dtype is None:
dtype = config.floatX dtype = config.floatX
...@@ -922,8 +977,14 @@ ltensor4 = TensorType('int64', ((False,) * 4)) ...@@ -922,8 +977,14 @@ ltensor4 = TensorType('int64', ((False,) * 4))
def tensor4(name=None, dtype=None): def tensor4(name=None, dtype=None):
"""Return a symbolic 4-D variable. """Return a symbolic 4-D variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable Parameters
----------
dtype: numeric type
None means to use theano.config.floatX.
name
A name to attach to this variable.
""" """
if dtype is None: if dtype is None:
dtype = config.floatX dtype = config.floatX
...@@ -957,6 +1018,7 @@ def _scal_elemwise_with_nfunc(nfunc, nin, nout): ...@@ -957,6 +1018,7 @@ def _scal_elemwise_with_nfunc(nfunc, nin, nout):
**destination** inputs it takes. That is, the function should **destination** inputs it takes. That is, the function should
take nin+nout inputs. nout == 0 means that the numpy function take nin+nout inputs. nout == 0 means that the numpy function
does not take a numpy array argument to put its result in. does not take a numpy array argument to put its result in.
""" """
def construct(symbol): def construct(symbol):
symbolname = symbol.__name__ symbolname = symbol.__name__
...@@ -1183,7 +1245,9 @@ def cast(x, dtype): ...@@ -1183,7 +1245,9 @@ def cast(x, dtype):
class MaxAndArgmax(Op): class MaxAndArgmax(Op):
"""Calculate the max and argmax over a given axis or over all axes. """
Calculate the max and argmax over a given axis or over all axes.
""" """
nin = 2 # tensor, axis nin = 2 # tensor, axis
nout = 2 # max val, max idx nout = 2 # max val, max idx
...@@ -1418,6 +1482,7 @@ def makeKeepDims(x, y, axis): ...@@ -1418,6 +1482,7 @@ def makeKeepDims(x, y, axis):
Reintroduces in y with length one the axes of x which have been left out Reintroduces in y with length one the axes of x which have been left out
in a prior reduction of x. With this option, the resulting tensor will in a prior reduction of x. With this option, the resulting tensor will
broadcast correctly against the original tensor x. broadcast correctly against the original tensor x.
""" """
x = as_tensor_variable(x) x = as_tensor_variable(x)
y = as_tensor_variable(y) y = as_tensor_variable(y)
...@@ -1453,14 +1518,18 @@ def makeKeepDims(x, y, axis): ...@@ -1453,14 +1518,18 @@ def makeKeepDims(x, y, axis):
def max_and_argmax(a, axis=None, keepdims=False): def max_and_argmax(a, axis=None, keepdims=False):
""" """
Returns maximum elements and their indices obtained by iterating over Returns maximum elements and their indices obtained by iterating over
given axis given axis.
When axis is None (the default value), the max is performed When axis is None (the default value), the max is performed
over the flattened tensor. over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in Parameters
----------
keepdims : bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor. will broadcast correctly against the original tensor.
""" """
out, argout = _max_and_argmax(a, axis) out, argout = _max_and_argmax(a, axis)
...@@ -1474,16 +1543,22 @@ def max_and_argmax(a, axis=None, keepdims=False): ...@@ -1474,16 +1543,22 @@ def max_and_argmax(a, axis=None, keepdims=False):
@constructor @constructor
def max(x, axis=None, keepdims=False): def max(x, axis=None, keepdims=False):
""" """
Returns maximum elements obtained by iterating over given axis Returns maximum elements obtained by iterating over given axis.
When axis is None (the default value), the max is performed When axis is None (the default value), the max is performed
over the flattened tensor. over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in Parameters
----------
keepdims: bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor. will broadcast correctly against the original tensor.
:note: we return an error as numpy when we reduce a dim with a shape of 0 Notes
-----
We return an error as numpy when we reduce a dim with a shape of 0.
""" """
# We have a choice of implementing this call with the # We have a choice of implementing this call with the
...@@ -1511,14 +1586,18 @@ def max(x, axis=None, keepdims=False): ...@@ -1511,14 +1586,18 @@ def max(x, axis=None, keepdims=False):
@constructor @constructor
def argmax(x, axis=None, keepdims=False): def argmax(x, axis=None, keepdims=False):
""" """
Returns indices of maximum elements obtained by iterating over given axis Returns indices of maximum elements obtained by iterating over given axis.
When axis is None (the default value), the argmax is performed When axis is None (the default value), the argmax is performed
over the flattened tensor. over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in Parameters
----------
keepdims : bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor. will broadcast correctly against the original tensor.
""" """
# In python (using MaxAndArgmax.perform()) this leads to a wasteful # In python (using MaxAndArgmax.perform()) this leads to a wasteful
# implementation that goes through the data twice instead of once # implementation that goes through the data twice instead of once
...@@ -1534,14 +1613,18 @@ def argmax(x, axis=None, keepdims=False): ...@@ -1534,14 +1613,18 @@ def argmax(x, axis=None, keepdims=False):
@constructor @constructor
def min(x, axis=None, keepdims=False): def min(x, axis=None, keepdims=False):
""" """
Returns minimum elements obtained by iterating over given axis Returns minimum elements obtained by iterating over given axis.
When axis is None (the default value), the min is performed When axis is None (the default value), the min is performed
over the flattened tensor. over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in Parameters
----------
keepdims: bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor. will broadcast correctly against the original tensor.
""" """
x = as_tensor_variable(x) x = as_tensor_variable(x)
str_x_type = str(x.dtype) str_x_type = str(x.dtype)
...@@ -1555,14 +1638,18 @@ def min(x, axis=None, keepdims=False): ...@@ -1555,14 +1638,18 @@ def min(x, axis=None, keepdims=False):
@constructor @constructor
def argmin(x, axis=None, keepdims=False): def argmin(x, axis=None, keepdims=False):
""" """
Returns indices of minimum elements obtained by iterating over given axis Returns indices of minimum elements obtained by iterating over given axis.
When axis is None (the default value), the argmin is performed When axis is None (the default value), the argmin is performed
over the flattened tensor. over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in Parameters
----------
keepdims: bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor. will broadcast correctly against the original tensor.
""" """
x = as_tensor_variable(x) x = as_tensor_variable(x)
str_x_type = str(x.dtype) str_x_type = str(x.dtype)
...@@ -1579,6 +1666,7 @@ def smallest(*args): ...@@ -1579,6 +1666,7 @@ def smallest(*args):
Return the [elementwise] smallest of a variable number of arguments. Return the [elementwise] smallest of a variable number of arguments.
Like python's min. Like python's min.
""" """
if len(args) == 2: if len(args) == 2:
a, b = args a, b = args
...@@ -1593,6 +1681,7 @@ def largest(*args): ...@@ -1593,6 +1681,7 @@ def largest(*args):
Return the [elementwise] largest of a variable number of arguments. Return the [elementwise] largest of a variable number of arguments.
Like python's max. Like python's max.
""" """
if len(args) == 2: if len(args) == 2:
a, b = args a, b = args
...@@ -1647,31 +1736,34 @@ def isinf(a): ...@@ -1647,31 +1736,34 @@ def isinf(a):
def allclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False): def allclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
""" """
Implements Numpy's ``allclose`` on tensors. Implement Numpy's ``allclose`` on tensors.
``absolute(a - b) <= (atol + rtol * absolute(b))`` ``absolute(a - b) <= (atol + rtol * absolute(b))``
:note: Not a symmetric equation. See Numpy's documentation. Parameters
----------
:param a: input to compare a : tensor
:type a: tensor Input to compare.
b : tensor
:param b: input to compare Input to compare.
:type b: tensor rtol : float
The relative tolerance parameter.
:param rtol: the relative tolerance parameter atol : float
:type rtol: float The absolute tolerance parameter.
equal_nan: bool
Whether to consider nan's in the same place to be close.
:param atol: the absolute tolerance parameter Returns
:type atol: float -------
bool
A boolean value (of type int8 returned by the tensor elementwise `all`
function) whether all elements in a and b are in the tolerance range
defined above.
:param equal_nan: whether to consider nan's in the same place to be close Notes
:type equal_nan: bool -----
Not a symmetric equation. See Numpy's documentation.
:returns: a boolean value (of type int8 returned by the tensor
elementwise `all` function) whether all elements in a and b are in
the tolerance range defined above.
:rtype: int8
""" """
return all(isclose(a, b, rtol, atol, equal_nan)) return all(isclose(a, b, rtol, atol, equal_nan))
...@@ -1687,27 +1779,31 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False): ...@@ -1687,27 +1779,31 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
``absolute(a - b) <= (atol + rtol * absolute(b))`` ``absolute(a - b) <= (atol + rtol * absolute(b))``
:note: Not a symmetric equation. See Numpy's documentation. Parameters
----------
:param a: input to compare a : tensor
:type a: tensor Input to compare.
b : tensor
:param b: input to compare Input to compare.
:type b: tensor rtol : float
The relative tolerance parameter.
:param rtol: the relative tolerance parameter atol : float
:type rtol: float The absolute tolerance parameter.
equal_nan : bool
:param atol: the absolute tolerance parameter Whether to consider nan's in the same place to be close
:type atol: float
:param equal_nan: whether to consider nan's in the same place to be close Returns
:type equal_nan: bool -------
int8
A boolean (int8) array where two arrays are element-wise equal
within a tolerance.
:returns: returns a boolean (int8) array where two arrays are element-wise Notes
equal within a tolerance. -----
:rtype: int8 Not a symmetric equation. See Numpy's documentation.
Examples
--------
>>> import theano >>> import theano
>>> import numpy as np >>> import numpy as np
>>> a = theano._asarray([1e10, 1e-7], dtype="float64") >>> a = theano._asarray([1e10, 1e-7], dtype="float64")
...@@ -1738,6 +1834,7 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False): ...@@ -1738,6 +1834,7 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
>>> b = theano._asarray([1.0, np.inf], dtype="float64") >>> b = theano._asarray([1.0, np.inf], dtype="float64")
>>> theano.tensor.isclose(a, b).eval() >>> theano.tensor.isclose(a, b).eval()
array([1, 1], dtype==int8) array([1, 1], dtype==int8)
""" """
# close will be an int8 array of 1 where within tolerance # close will be an int8 array of 1 where within tolerance
# and 0 where not within tolerance or there was a nan or inf value. # and 0 where not within tolerance or there was a nan or inf value.
...@@ -2164,8 +2261,8 @@ class Nonzero(gof.Op): ...@@ -2164,8 +2261,8 @@ class Nonzero(gof.Op):
Returns Returns
------- -------
result : matrix matrix
matrix containing the indices of the non-zero elements of a. Matrix containing the indices of the non-zero elements of a.
See Also See Also
-------- --------
...@@ -2220,14 +2317,13 @@ def nonzero(a, return_matrix=False): ...@@ -2220,14 +2317,13 @@ def nonzero(a, return_matrix=False):
---------- ----------
a : array_like a : array_like
Input array. Input array.
return_matrix : bool return_matrix : bool
If True, returns a symbolic matrix. If False, returns a tuple of If True, returns a symbolic matrix. If False, returns a tuple of
arrays. Defaults to False. arrays. Defaults to False.
Returns Returns
------- -------
result : tuple of vectors or matrix tuple of vectors or matrix
See Also See Also
-------- --------
...@@ -2260,7 +2356,7 @@ def flatnonzero(a): ...@@ -2260,7 +2356,7 @@ def flatnonzero(a):
Returns Returns
------- -------
res : vector vector
Output vector, containing the indices of the elements of `a.flatten()` Output vector, containing the indices of the elements of `a.flatten()`
that are non-zero. that are non-zero.
...@@ -2268,6 +2364,7 @@ def flatnonzero(a): ...@@ -2268,6 +2364,7 @@ def flatnonzero(a):
-------- --------
nonzero : Return the indices of the non-zero elements of the input array. nonzero : Return the indices of the non-zero elements of the input array.
nonzero_values : Return the non-zero elements of the input array nonzero_values : Return the non-zero elements of the input array
""" """
if a.ndim == 0: if a.ndim == 0:
raise ValueError('Nonzero only supports non-scalar arrays.') raise ValueError('Nonzero only supports non-scalar arrays.')
...@@ -2299,7 +2396,7 @@ def nonzero_values(a): ...@@ -2299,7 +2396,7 @@ def nonzero_values(a):
Returns Returns
------- -------
res : vector vector
Output vector, containing the non-zero elements of a. Output vector, containing the non-zero elements of a.
See Also See Also
...@@ -2307,6 +2404,7 @@ def nonzero_values(a): ...@@ -2307,6 +2404,7 @@ def nonzero_values(a):
nonzero : Return the indices of the non-zero elements of the input array. nonzero : Return the indices of the non-zero elements of the input array.
flatnonzero : Return the indices of the non-zero elements of the flatnonzero : Return the indices of the non-zero elements of the
flattened input array. flattened input array.
""" """
return a.flatten()[flatnonzero(a)] return a.flatten()[flatnonzero(a)]
...@@ -2362,9 +2460,10 @@ def tri(N, M=None, k=0, dtype=None): ...@@ -2362,9 +2460,10 @@ def tri(N, M=None, k=0, dtype=None):
Returns Returns
------- -------
tri : Array of shape (N, M) Array of shape (N, M)
Array with its lower triangle filled with ones and zero elsewhere; Array with its lower triangle filled with ones and zero elsewhere;
in other words ``T[i,j] == 1`` for ``i <= j + k``, 0 otherwise. in other words ``T[i,j] == 1`` for ``i <= j + k``, 0 otherwise.
""" """
if dtype is None: if dtype is None:
dtype = config.floatX dtype = config.floatX
...@@ -2390,12 +2489,13 @@ def tril(m, k=0): ...@@ -2390,12 +2489,13 @@ def tril(m, k=0):
Returns Returns
------- -------
tril : array, shape (M, N) array, shape (M, N)
Lower triangle of `m`, of same shape and data-type as `m`. Lower triangle of `m`, of same shape and data-type as `m`.
See Also See Also
-------- --------
triu : same thing, only for the upper triangle triu : Same thing, only for the upper triangle.
""" """
return m * tri(m.shape[0], m.shape[1], k=k, dtype=m.dtype) return m * tri(m.shape[0], m.shape[1], k=k, dtype=m.dtype)
...@@ -2411,7 +2511,8 @@ def triu(m, k=0): ...@@ -2411,7 +2511,8 @@ def triu(m, k=0):
See Also See Also
-------- --------
tril : lower triangle of an array tril : Lower triangle of an array.
""" """
return m * (1 - tri(m.shape[0], m.shape[1], k=k - 1, dtype=m.dtype)) return m * (1 - tri(m.shape[0], m.shape[1], k=k - 1, dtype=m.dtype))
...@@ -2468,9 +2569,10 @@ def eye(n, m=None, k=0, dtype=None): ...@@ -2468,9 +2569,10 @@ def eye(n, m=None, k=0, dtype=None):
Returns Returns
------- -------
I : ndarray of shape (N,M) ndarray of shape (N,M)
An array where all elements are equal to zero, except for the `k`-th An array where all elements are equal to zero, except for the `k`-th
diagonal, whose values are equal to one. diagonal, whose values are equal to one.
""" """
if dtype is None: if dtype is None:
dtype = config.floatX dtype = config.floatX
...@@ -2485,7 +2587,7 @@ def identity_like(x): ...@@ -2485,7 +2587,7 @@ def identity_like(x):
class Alloc(gof.Op): class Alloc(gof.Op):
"""Create a Tensor from an initial value and a desired shape """Create a Tensor from an initial value and a desired shape.
alloc(value, shape0, shape1, ..., shapeN) alloc(value, shape0, shape1, ..., shapeN)
...@@ -2500,6 +2602,7 @@ class Alloc(gof.Op): ...@@ -2500,6 +2602,7 @@ class Alloc(gof.Op):
This Op is used to replace fill() during optimizations because after shapes This Op is used to replace fill() during optimizations because after shapes
are lifted, the first argument to fill can often be pruned from the graph. are lifted, the first argument to fill can often be pruned from the graph.
""" """
__props__ = () __props__ = ()
...@@ -2642,6 +2745,7 @@ class Alloc(gof.Op): ...@@ -2642,6 +2745,7 @@ class Alloc(gof.Op):
for size mismatches. for size mismatches.
If you always want an Alloc node, call make_node. If you always want an Alloc node, call make_node.
""" """
ret = super(Alloc, self).__call__(val, *shapes, **kwargs) ret = super(Alloc, self).__call__(val, *shapes, **kwargs)
try: try:
...@@ -2709,18 +2813,22 @@ pprint.assign(tensor_copy, printing.IgnorePrinter()) ...@@ -2709,18 +2813,22 @@ pprint.assign(tensor_copy, printing.IgnorePrinter())
@constructor @constructor
def sum(input, axis=None, dtype=None, keepdims=False, acc_dtype=None): def sum(input, axis=None, dtype=None, keepdims=False, acc_dtype=None):
""" """
Computes the sum along the given axis(es) of a tensor `input` Computes the sum along the given axis(es) of a tensor `input`.
When axis is None (the default value), the sum is performed When axis is None (the default value), the sum is performed
over the flattened tensor. over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor.
For full documentation see ``tensor.elemwise.Sum``. For full documentation see ``tensor.elemwise.Sum``.
In particular please pay attention to the important warning when using In particular please pay attention to the important warning when using
a custom acc_dtype. a custom acc_dtype.
Parameters
----------
keepdims: bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor.
""" """
out = elemwise.Sum(axis=axis, dtype=dtype, acc_dtype=acc_dtype)(input) out = elemwise.Sum(axis=axis, dtype=dtype, acc_dtype=acc_dtype)(input)
...@@ -2736,16 +2844,20 @@ pprint.assign(Sum(), printing.FunctionPrinter('sum')) ...@@ -2736,16 +2844,20 @@ pprint.assign(Sum(), printing.FunctionPrinter('sum'))
def prod(input, axis=None, dtype=None, keepdims=False, acc_dtype=None, def prod(input, axis=None, dtype=None, keepdims=False, acc_dtype=None,
no_zeros_in_input=False): no_zeros_in_input=False):
""" """
Computes the product along the given axis(es) of a tensor `input` Computes the product along the given axis(es) of a tensor `input`.
When axis is None (the default value), the product is performed When axis is None (the default value), the product is performed
over the flattened tensor. over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in For full documentation see ``tensor.elemwise.Prod``.
Parameters
----------
keepdims: bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor. will broadcast correctly against the original tensor.
For full documentation see ``tensor.elemwise.Prod``.
""" """
out = elemwise.Prod(axis, dtype=dtype, acc_dtype=acc_dtype, out = elemwise.Prod(axis, dtype=dtype, acc_dtype=acc_dtype,
...@@ -2803,31 +2915,32 @@ class Mean(elemwise.CAReduce): ...@@ -2803,31 +2915,32 @@ class Mean(elemwise.CAReduce):
def mean(input, axis=None, dtype=None, op=False, keepdims=False, def mean(input, axis=None, dtype=None, op=False, keepdims=False,
acc_dtype=None): acc_dtype=None):
""" """
Computes the mean value along the given axis(es) of a tensor `input` Computes the mean value along the given axis(es) of a tensor `input`.
:param axis: compute the mean along this axis of the tensor. Parameters
----------
axis : None or int or (list of int) (see `Sum`)
Compute the mean along this axis of the tensor.
None means all axes (like numpy). None means all axes (like numpy).
:type axis: None or int or (list of int) (see `Sum`) dtype: None or string
Dtype to cast the result of the inner summation into.
:param dtype: dtype to cast the result of the inner summation into.
For instance, by default, a sum of a float32 tensor will be For instance, by default, a sum of a float32 tensor will be
done in float64 (acc_dtype would be float64 by default), done in float64 (acc_dtype would be float64 by default),
but that result will be casted back in float32. but that result will be casted back in float32.
:type dtype: None or string keepdims: bool
If this is set to True, the axes which are reduced are
:param keepdims: If this is set to True, the axes which are reduced are
left in the result as dimensions with size one. With this option, left in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original tensor. the result will broadcast correctly against the original tensor.
acc_dtype: None or string
:param acc_dtype: dtype to use for the inner summation. This will not Dtype to use for the inner summation. This will not
necessarily be the dtype of the output (in particular necessarily be the dtype of the output (in particular
if it is a discrete (int/uint) dtype, the output will if it is a discrete (int/uint) dtype, the output will
be in a float type). be in a float type). If None, then we use the same rules as `sum()`.
If None, then we use the same rules as `sum()`.
:type acc_dtype: None or string Notes
-----
For gpu, if you specify dtype=float32, everything will be done on the gpu.
:note: for gpu, if you specify dtype=float32, everything will be done
on the gpu.
""" """
if op: if op:
...@@ -2896,18 +3009,23 @@ def var(input, axis=None, keepdims=False): ...@@ -2896,18 +3009,23 @@ def var(input, axis=None, keepdims=False):
""" """
Computes the variance along the given axis(es) of a tensor `input`. Computes the variance along the given axis(es) of a tensor `input`.
:param axis: Compute the variance along this axis of the tensor. Parameters
----------
axis: None or int or (list of int) (see `Sum`)
Compute the variance along this axis of the tensor.
None means all axes (like numpy). None means all axes (like numpy).
:type axis: None or int or (list of int) (see `Sum`) keepdims : bool
If this is set to True, the axes which are reduced are
:param keepdims: If this is set to True, the axes which are reduced are
left in the result as dimensions with size one. With this option, left in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original tensor. the result will broadcast correctly against the original tensor.
:note: It uses the two-pass algorithm for more stable results. Notes
-----
It uses the two-pass algorithm for more stable results.
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
There exist other implementations that are even more stable, but There exist other implementations that are even more stable, but probably
probably slower. slower.
""" """
input_ndim = input.type.ndim input_ndim = input.type.ndim
...@@ -2933,26 +3051,26 @@ def var(input, axis=None, keepdims=False): ...@@ -2933,26 +3051,26 @@ def var(input, axis=None, keepdims=False):
@constructor @constructor
def std(input, axis=None, keepdims=False): def std(input, axis=None, keepdims=False):
""" """
Computes the standard deviation along the given axis(es) Computes the standard deviation along the given axis(es) of a tensor `input`.
of a tensor `input`.
:param axis: Compute the standard deviation along this Parameters
axis of the tensor. ----------
axis : None or int or (list of int) (see `Sum`)
Compute the standard deviation along this axis of the tensor.
None means all axes (like numpy). None means all axes (like numpy).
:type axis: None or int or (list of int) (see `Sum`) keepdims : bool
If this is set to True, the axes which are reduced are left in the
:param keepdims: If this is set to True, the axes result as dimensions with size one. With this option, the result will
which are reduced are broadcast correctly against the original tensor.
left in the result as dimensions with size one.
With this option, Notes
the result will broadcast correctly against the -----
original tensor. It calls `var()` and `var()` uses the two-pass algorithm for more stable
results.
:note: It calls `var()` and `var()` uses the two-pass algorithm for more
stable results.
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
There exist other implementations that are even more stable, but There exist other implementations that are even more stable, but probably
probably slower. slower.
""" """
return sqrt(var(input=input, axis=axis, keepdims=keepdims)) return sqrt(var(input=input, axis=axis, keepdims=keepdims))
...@@ -2960,10 +3078,12 @@ def std(input, axis=None, keepdims=False): ...@@ -2960,10 +3078,12 @@ def std(input, axis=None, keepdims=False):
class Default(gof.Op): class Default(gof.Op):
""" """
Takes an input x and a default value. If the input is not None, a Takes an input x and a default value.
reference to it is returned. If the input is None, a copy of the
default value is returned instead. The input and the default must If the input is not None, a reference to it is returned.
have exactly the same type. If the input is None, a copy of the default value is returned instead.
The input and the default must have exactly the same type.
""" """
view_map = {0: [0]} view_map = {0: [0]}
__props__ = () __props__ = ()
...@@ -2994,15 +3114,13 @@ setdefault = default # legacy ...@@ -2994,15 +3114,13 @@ setdefault = default # legacy
########################## ##########################
@_scal_elemwise_with_nfunc('maximum', 2, 1) @_scal_elemwise_with_nfunc('maximum', 2, 1)
def maximum(x, y): def maximum(x, y):
"""elemwise maximum. See max for the maximum in one tensor """elemwise maximum. See max for the maximum in one tensor"""
"""
# see decorator for function body # see decorator for function body
@_scal_elemwise_with_nfunc('minimum', 2, 1) @_scal_elemwise_with_nfunc('minimum', 2, 1)
def minimum(x, y): def minimum(x, y):
"""elemwise minimum. See min for the minimum in one tensor """elemwise minimum. See min for the minimum in one tensor"""
"""
# see decorator for function body # see decorator for function body
...@@ -3058,6 +3176,7 @@ def ceil_intdiv(a, b): ...@@ -3058,6 +3176,7 @@ def ceil_intdiv(a, b):
Safely compute ceil(float_division(a, b)). Safely compute ceil(float_division(a, b)).
Works for all dtypes, but mostly useful when a and b are int. Works for all dtypes, but mostly useful when a and b are int.
""" """
# If a and b are int with not many significant bits, we could # If a and b are int with not many significant bits, we could
# cast them to float to avoid doing the modulo. We do not know if this # cast them to float to avoid doing the modulo. We do not know if this
...@@ -3099,13 +3218,17 @@ def pow(a, b): ...@@ -3099,13 +3218,17 @@ def pow(a, b):
# So we do not use @scal_elemwise_with_nfunc('clip', 3, 1) # So we do not use @scal_elemwise_with_nfunc('clip', 3, 1)
@_scal_elemwise @_scal_elemwise
def clip(x, min, max): def clip(x, min, max):
"""clip x to be between min and max. """
Clip x to be between min and max.
:note: When `x` is equal to the boundaries, the output is considered Notes
-----
When `x` is equal to the boundaries, the output is considered
to be `x`, so at these points, the gradient of the cost wrt the output to be `x`, so at these points, the gradient of the cost wrt the output
will be propagated to `x`, not to `min` nor `max`. In other words, will be propagated to `x`, not to `min` nor `max`. In other words,
on these points, the gradient wrt `x` will be equal to the gradient wrt on these points, the gradient wrt `x` will be equal to the gradient wrt
the output, and the gradient wrt `min` and `max` will be zero. the output, and the gradient wrt `min` and `max` will be zero.
""" """
# see decorator for function body # see decorator for function body
# for grep: clamp, bound # for grep: clamp, bound
...@@ -3125,14 +3248,16 @@ pprint.assign(pow, printing.OperatorPrinter('**', 1, 'right')) ...@@ -3125,14 +3248,16 @@ pprint.assign(pow, printing.OperatorPrinter('**', 1, 'right'))
def extract_constant(x, elemwise=True): def extract_constant(x, elemwise=True):
''' """
This function is basically a call to tensor.get_scalar_constant_value. The This function is basically a call to tensor.get_scalar_constant_value.
main difference is the behaviour in case of failure. While
The main difference is the behaviour in case of failure. While
get_scalar_constant_value raises an TypeError, this function returns x, get_scalar_constant_value raises an TypeError, this function returns x,
as a tensor if possible. If x is a ScalarVariable from a as a tensor if possible. If x is a ScalarVariable from a
scalar_from_tensor, we remove the conversion. If x is just a scalar_from_tensor, we remove the conversion. If x is just a
ScalarVariable, we convert it to a tensor with tensor_from_scalar. ScalarVariable, we convert it to a tensor with tensor_from_scalar.
'''
"""
try: try:
x = get_scalar_constant_value(x, elemwise=elemwise) x = get_scalar_constant_value(x, elemwise=elemwise)
except NotScalarConstantError: except NotScalarConstantError:
...@@ -3150,8 +3275,7 @@ def transpose(x, axes=None): ...@@ -3150,8 +3275,7 @@ def transpose(x, axes=None):
""" """
Reorder the dimensions of x. (Default: reverse them) Reorder the dimensions of x. (Default: reverse them)
This is a macro around dimshuffle that matches the numpy.transpose This is a macro around dimshuffle that matches the numpy.transpose function.
function.
""" """
if axes is None: if axes is None:
...@@ -3164,18 +3288,33 @@ def transpose(x, axes=None): ...@@ -3164,18 +3288,33 @@ def transpose(x, axes=None):
def batched_dot(x, y): def batched_dot(x, y):
""" """
:param x: A Tensor with sizes e.g.: for 3D (dim1, dim3, dim2)
:param y: A Tensor with sizes e.g.: for 3D (dim1, dim2, dim4)
This function computes the dot product between the two tensors, by This function computes the dot product between the two tensors, by
iterating over the first dimension using scan. iterating over the first dimension using scan.
Returns a tensor of size e.g. if it is 3D: (dim1, dim3, dim4)
Example: Parameters
----------
x : tensor
A Tensor with sizes e.g.: for 3D (dim1, dim3, dim2).
y : tensor
A Tensor with sizes e.g.: for 3D (dim1, dim2, dim4).
Returns
-------
tensor
A tensor of size e.g. if it is 3D: (dim1, dim3, dim4).
Notes
-----
This is a subset of numpy.einsum, but we do not provide it for now.
But numpy einsum is slower than dot or tensordot:
http://mail.scipy.org/pipermail/numpy-discussion/2012-October/064259.html
Examples
--------
>>> first = tensor.tensor3('first') >>> first = tensor.tensor3('first')
>>> second = tensor.tensor3('second') >>> second = tensor.tensor3('second')
>>> result = batched_dot(first, second) >>> result = batched_dot(first, second)
:note: This is a subset of numpy.einsum, but we do not provide it for now.
But numpy einsum is slower than dot or tensordot:
http://mail.scipy.org/pipermail/numpy-discussion/2012-October/064259.html
""" """
result, updates = theano.scan( result, updates = theano.scan(
fn=lambda x_mat, y_mat: fn=lambda x_mat, y_mat:
...@@ -3188,11 +3327,22 @@ def batched_dot(x, y): ...@@ -3188,11 +3327,22 @@ def batched_dot(x, y):
def batched_tensordot(x, y, axes=2): def batched_tensordot(x, y, axes=2):
""" """
:param x: A Tensor with sizes e.g.: for 3D (dim1, dim3, dim2) Compute the tensordot product.
:param y: A Tensor with sizes e.g.: for 3D (dim1, dim2, dim4)
:param axes: an integer or array. If an integer, the number of axes A hybrid of batch_dot and tensordot, this function computes the
to sum over. If an array, it must have two array tensordot product between the two tensors, by iterating over the
elements containing the axes to sum over in each tensor. first dimension using scan to perform a sequence of tensordots.
Parameters
----------
x : tensor
A Tensor with sizes e.g.: for 3D (dim1, dim3, dim2)
y : tensor
A Tensor with sizes e.g.: for 3D (dim1, dim2, dim4)
axes: int or array-like of length 2
If an integer, the number of axes to sum over.
If an array, it must have two array elements containing the axes to sum
over in each tensor.
If an integer i, it is converted to an array containing If an integer i, it is converted to an array containing
the last i dimensions of the first tensor and the first the last i dimensions of the first tensor and the first
...@@ -3206,11 +3356,7 @@ def batched_tensordot(x, y, axes=2): ...@@ -3206,11 +3356,7 @@ def batched_tensordot(x, y, axes=2):
(Remember axes are zero-indexed!) The 2nd axis of a and the (Remember axes are zero-indexed!) The 2nd axis of a and the
3rd axis of b must have the same shape; the same is true for 3rd axis of b must have the same shape; the same is true for
the 3rd axis of a and the 5th axis of b. the 3rd axis of a and the 5th axis of b.
:type axes: int or array-like of length 2
A hybrid of batch_dot and tensordot, this function computes the
tensordot product between the two tensors, by iterating over the
first dimension using scan to perform a sequence of tensordots.
""" """
if isinstance(axes, (list, numpy.ndarray)): if isinstance(axes, (list, numpy.ndarray)):
if isinstance(axes, list): if isinstance(axes, list):
...@@ -3239,20 +3385,17 @@ def split(x, splits_size, n_splits, axis=0): ...@@ -3239,20 +3385,17 @@ def split(x, splits_size, n_splits, axis=0):
class Split(Op): class Split(Op):
"""Partition a `TensorVariable` along some axis. """Partition a `TensorVariable` along some axis.
.. python:: Examples
--------
x = vector() >>> x = vector()
splits = lvector() >>> splits = lvector()
# you have to declare right away how many split_points there will be. You have to declare right away how many split_points there will be.
ra, rb, rc = split(x, splits, n_splits = 3, axis = 0) >>> ra, rb, rc = split(x, splits, n_splits = 3, axis = 0)
>>> f = function([x, splits], [ra, rb, rc])
f = function([x, splits], [ra, rb, rc]) >>> a, b, c = f([0,1,2,3,4,5], [3, 2, 1])
a == [0,1,2]
a, b, c = f([0,1,2,3,4,5], [3, 2, 1]) b == [3, 4]
c == [5]
#a == [0,1,2]
#b == [3, 4]
#c == [5]
""" """
...@@ -3370,6 +3513,7 @@ class Split(Op): ...@@ -3370,6 +3513,7 @@ class Split(Op):
def addbroadcast(x, *axes): def addbroadcast(x, *axes):
""" """
Make the input broadcastable in the specified axes. Make the input broadcastable in the specified axes.
For example, addbroadcast(x, 0) will make the first dimension of For example, addbroadcast(x, 0) will make the first dimension of
x broadcastable. When performing the function, if the length of x broadcastable. When performing the function, if the length of
x along that dimension is not 1, a ValueError will be raised. x along that dimension is not 1, a ValueError will be raised.
...@@ -3377,20 +3521,19 @@ def addbroadcast(x, *axes): ...@@ -3377,20 +3521,19 @@ def addbroadcast(x, *axes):
We apply the opt here not to pollute the graph especially during We apply the opt here not to pollute the graph especially during
the gpu optimization the gpu optimization
Parameters: Parameters
------------ ----------
x : tensor_like x : tensor_like
Input theano tensor. Input theano tensor.
axis : an int or an iterable object such as list or tuple axis : an int or an iterable object such as list or tuple of int values
of int values The dimension along which the tensor x should be broadcastable.
If the length of x along these dimensions is not 1, a ValueError will
be raised.
The dimension along which the tensor x should be Returns
broadcastable. if the length of x along these -------
dimensions is not 1, a ValueError will be raised. tensor
A theano tensor, which is broadcastable along the specified dimensions.
returns:
----------
a theano tensor, which is broadcastable along the specified dimensions.
""" """
rval = Rebroadcast(*[(axis, True) for axis in axes])(x) rval = Rebroadcast(*[(axis, True) for axis in axes])(x)
...@@ -3400,6 +3543,7 @@ def addbroadcast(x, *axes): ...@@ -3400,6 +3543,7 @@ def addbroadcast(x, *axes):
def unbroadcast(x, *axes): def unbroadcast(x, *axes):
""" """
Make the input impossible to broadcast in the specified axes. Make the input impossible to broadcast in the specified axes.
For example, addbroadcast(x, 0) will make the first dimension For example, addbroadcast(x, 0) will make the first dimension
of x broadcastable. When performing the function, if the length of x broadcastable. When performing the function, if the length
of x along that dimension is not 1, a ValueError will be raised. of x along that dimension is not 1, a ValueError will be raised.
...@@ -3407,20 +3551,19 @@ def unbroadcast(x, *axes): ...@@ -3407,20 +3551,19 @@ def unbroadcast(x, *axes):
We apply the opt here not to pollute the graph especially during We apply the opt here not to pollute the graph especially during
the gpu optimization the gpu optimization
Parameters: Parameters
------------ ----------
x : tensor_like x : tensor_like
Input theano tensor. Input theano tensor.
axis : an int or an iterable object such as list or tuple axis : an int or an iterable object such as list or tuple of int values
of int values The dimension along which the tensor x should be unbroadcastable.
If the length of x along these dimensions is not 1, a ValueError will
The dimension along which the tensor x should be be raised.
unbroadcastable. if the length of x along these
dimensions is not 1, a ValueError will be raised.
returns: Returns
---------- -------
a theano tensor, which is unbroadcastable along the specified dimensions. tensor
A theano tensor, which is unbroadcastable along the specified dimensions.
""" """
rval = Rebroadcast(*[(axis, False) for axis in axes])(x) rval = Rebroadcast(*[(axis, False) for axis in axes])(x)
...@@ -3430,7 +3573,8 @@ def unbroadcast(x, *axes): ...@@ -3430,7 +3573,8 @@ def unbroadcast(x, *axes):
def patternbroadcast(x, broadcastable): def patternbroadcast(x, broadcastable):
""" """
Make the input adopt a specific broadcasting pattern. Make the input adopt a specific broadcasting pattern.
broadcastable must be iterable. For example,
Broadcastable must be iterable. For example,
patternbroadcast(x, (True, False)) will make the first patternbroadcast(x, (True, False)) will make the first
dimension of x broadcastable and the second dimension dimension of x broadcastable and the second dimension
not broadcastable, so x will now be a row. not broadcastable, so x will now be a row.
...@@ -3438,21 +3582,20 @@ def patternbroadcast(x, broadcastable): ...@@ -3438,21 +3582,20 @@ def patternbroadcast(x, broadcastable):
We apply the opt here not to pollute the graph especially during the gpu We apply the opt here not to pollute the graph especially during the gpu
optimization. optimization.
Parameters: Parameters
------------ ----------
x : tensor_like x : tensor_like
Input theano tensor. Input theano tensor.
broadcastable : an iterable object such as list or tuple broadcastable : an iterable object such as list or tuple of bool values
of bool values A set of boolean values indicating whether a dimension should be
broadcastable or not. If the length of x along these dimensions is
not 1, a ValueError will be raised.
a set of boolean values indicating whether a dimension Returns
should be broadcastable or not. -------
if the length of x along these dimensions is not 1, tensor
a ValueError will be raised. A theano tensor, which is unbroadcastable along the specified dimensions.
returns:
----------
a theano tensor, which is unbroadcastable along the specified dimensions.
""" """
rval = Rebroadcast(*[(i, broadcastable[i]) rval = Rebroadcast(*[(i, broadcastable[i])
for i in xrange(len(broadcastable))])(x) for i in xrange(len(broadcastable))])(x)
...@@ -3468,31 +3611,39 @@ class Join(Op): ...@@ -3468,31 +3611,39 @@ class Join(Op):
Of course, TensorVariable instances do not have a shape, so this error Of course, TensorVariable instances do not have a shape, so this error
cannot be caught until runtime. See `perform()`. cannot be caught until runtime. See `perform()`.
For joins involving scalar values, see @stack. See Also
--------
stack : For joins involving scalar values
.. python:: Examples
--------
>>> x, y, z = tensor.matrix(), tensor.matrix(), tensor.matrix()
>>> u = tensor.vector()
x, y, z = tensor.matrix(), tensor.matrix(), tensor.matrix() >>> r = join(0, x, y, z)
u = tensor.vector() >>> c = join(1, x, y, z)
>>> join(2, x, y, z) # WRONG: the axis has to be an index into the shape
>>> join(0, x, u) # WRONG: joined tensors must have the same rank
r = join(0, x, y, z)
c = join(1, x, y, z)
join(2, x, y, z) # WRONG: the axis has to be an index into the shape
join(0, x, u) # WRONG: joined tensors must have the same rank
""" """
check_input = False check_input = False
__props__ = () __props__ = ()
def make_node(self, *axis_and_tensors): def make_node(self, *axis_and_tensors):
""" """
:param axis: an Int or integer-valued Variable Parameters
----------
:param tensors: a variable number (but not zero) of tensors to axis: an Int or integer-valued Variable
tensors
A variable number (but not zero) of tensors to
concatenate along the specified axis. These tensors must have concatenate along the specified axis. These tensors must have
the same shape along all dimensions other than this axis. the same shape along all dimensions other than this axis.
:returns: a symbolic Variable. It has the same ndim as the Returns
input tensors, and the most inclusive dtype. -------
A symbolic Variable
It has the same ndim as the input tensors, and the most inclusive
dtype.
""" """
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:] axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
...@@ -3709,26 +3860,25 @@ class Join(Op): ...@@ -3709,26 +3860,25 @@ class Join(Op):
""" """
Convenience function to concatenate `TensorType`s along the given axis. Convenience function to concatenate `TensorType`s along the given axis.
:Parameters: Parameters
- `tensors` : list of tensors (or list-like) ----------
tensors : list of tensors (or list-like)
A list of tensors to be concatenated along the given axis. A list of tensors to be concatenated along the given axis.
- `axis` : int (symbolic or literal) The shapes of the tensors to be concatenated must be all
identical, except in the dimension (`axis`) on which they are to
be joined.
axis : int (symbolic or literal)
On which dimension should the tensors be joined? The `axis` On which dimension should the tensors be joined? The `axis`
must be a valid index into the shape of the tensors to be must be a valid index into the shape of the tensors to be
concatenated. concatenated.
The `axis` parameter may either be an integer or an object that The `axis` parameter may either be an integer or an object that
can be converted to a scalar using `as_scalar`(`axis`). In the can be converted to a scalar using `as_scalar`(`axis`). In the
former case, the axis is fixed at construction, while in the former case, the axis is fixed at construction, while in the
latter it may vary over time depending on the value of the latter it may vary over time depending on the value of the
`axis` variable. `axis` variable.
The shapes of the tensors to be concatenated must be all """
identical, except in the dimension (`axis`) on which they are to
be joined.
"""
join = Join() join = Join()
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Join), pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Join),
...@@ -3738,7 +3888,8 @@ pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Join), ...@@ -3738,7 +3888,8 @@ pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Join),
def roll(x, shift, axis=None): def roll(x, shift, axis=None):
""" """
Convenience function to roll `TensorType`s along the given axis. Convenience function to roll `TensorType`s along the given axis.
Syntax copies numpy.roll function
Syntax copies numpy.roll function.
Parameters Parameters
---------- ----------
...@@ -3746,15 +3897,16 @@ def roll(x, shift, axis=None): ...@@ -3746,15 +3897,16 @@ def roll(x, shift, axis=None):
Input tensor. Input tensor.
shift : int (symbolic or literal) shift : int (symbolic or literal)
The number of places by which elements are shifted. The number of places by which elements are shifted.
axis : int (symbolic or literal) (optional) axis : int (symbolic or literal), optional
The axis along which elements are shifted. By default, the array The axis along which elements are shifted. By default, the array
is flattened before shifting, after which the original is flattened before shifting, after which the original
shape is restored. shape is restored.
Returns Returns
------- -------
res : tensor tensor
Output tensor, with the same shape as `x`. Output tensor, with the same shape as `x`.
""" """
if axis is None: if axis is None:
if x.ndim > 1: if x.ndim > 1:
...@@ -3780,9 +3932,13 @@ def roll(x, shift, axis=None): ...@@ -3780,9 +3932,13 @@ def roll(x, shift, axis=None):
@constructor @constructor
def shape_padleft(t, n_ones=1): def shape_padleft(t, n_ones=1):
"""Reshape `t` by left-padding the shape with `n_ones` 1s """Reshape `t` by left-padding the shape with `n_ones` 1s.
See Also
--------
shape_padright
Dimshuffle
See also: `shape_padright` and `Dimshuffle`
""" """
_t = as_tensor_variable(t) _t = as_tensor_variable(t)
...@@ -3792,9 +3948,13 @@ def shape_padleft(t, n_ones=1): ...@@ -3792,9 +3948,13 @@ def shape_padleft(t, n_ones=1):
@constructor @constructor
def shape_padright(t, n_ones=1): def shape_padright(t, n_ones=1):
"""Reshape `t` by right-padding the shape with `n_ones` 1s """Reshape `t` by right-padding the shape with `n_ones` 1s.
See Also
--------
shape_padleft
Dimshuffle
See also: `shape_padleft` and `Dimshuffle`
""" """
_t = as_tensor_variable(t) _t = as_tensor_variable(t)
...@@ -3808,6 +3968,7 @@ def stack(*tensors): ...@@ -3808,6 +3968,7 @@ def stack(*tensors):
The size in dimension 0 of the result will be equal to the number The size in dimension 0 of the result will be equal to the number
of tensors passed. of tensors passed.
""" """
if len(tensors) == 0: if len(tensors) == 0:
raise Exception('theano.tensor.stack(*tensors) must have at least' raise Exception('theano.tensor.stack(*tensors) must have at least'
...@@ -3843,9 +4004,10 @@ def concatenate(tensor_list, axis=0): ...@@ -3843,9 +4004,10 @@ def concatenate(tensor_list, axis=0):
This function is similar to `join`, but uses the signature of This function is similar to `join`, but uses the signature of
numpy's concatenate function. numpy's concatenate function.
This function Raises
:Exceptions: ------
- `TypeError` : the tensor_list must be a tuple or list TypeError
The tensor_list must be a tuple or list.
""" """
# Check someone did not make the common mistake to do something like: # Check someone did not make the common mistake to do something like:
...@@ -3863,13 +4025,17 @@ def concatenate(tensor_list, axis=0): ...@@ -3863,13 +4025,17 @@ def concatenate(tensor_list, axis=0):
def get_vector_length(v): def get_vector_length(v):
"""Return the run-time length of a symbolic vector. """Return the run-time length of a symbolic vector.
:Parameters: Parameters
- `v` : A rank-1 TensorType variable. ----------
v
:Exceptions: A rank-1 TensorType variable.
- `TypeError` : `v` hasn't the proper type.
- `ValueError` : No special case applies, the length is not known. Raises
------
TypeError
`v` hasn't the proper type.
ValueError
No special case applies, the length is not known.
In general this is not possible, but for a number of special cases In general this is not possible, but for a number of special cases
the length can be determined at compile / graph-construction time. the length can be determined at compile / graph-construction time.
This function implements these special cases. This function implements these special cases.
...@@ -3909,9 +4075,11 @@ def get_vector_length(v): ...@@ -3909,9 +4075,11 @@ def get_vector_length(v):
def horizontal_stack(*args): def horizontal_stack(*args):
""" """
Horizontally stack two L{TensorType}s. Horizontally stack two L{TensorType}s.
Stack two L{TensorType}s along the second axis (column wise). These Stack two L{TensorType}s along the second axis (column wise). These
L{TensorType}s must have the same shape along all dimensions but the L{TensorType}s must have the same shape along all dimensions but the
second. second.
""" """
# Note: 'horizontal_stack' and 'vertical_stack' do not behave exactly like # Note: 'horizontal_stack' and 'vertical_stack' do not behave exactly like
# Numpy's hstack and vstack functions. This is intended, because Numpy's # Numpy's hstack and vstack functions. This is intended, because Numpy's
...@@ -3937,7 +4105,9 @@ class Reshape(Op): ...@@ -3937,7 +4105,9 @@ class Reshape(Op):
"""Perform a reshape operation of the input x to the new shape shp. """Perform a reshape operation of the input x to the new shape shp.
The number of dimensions to which to reshape to (ndim) must be The number of dimensions to which to reshape to (ndim) must be
known at graph build time.""" known at graph build time.
"""
view_map = {0: [0]} # output 0 is potentially aliased to inputs [0] view_map = {0: [0]} # output 0 is potentially aliased to inputs [0]
_f16_ok = True _f16_ok = True
...@@ -4131,8 +4301,11 @@ def reshape(x, newshape, ndim=None, name=None): ...@@ -4131,8 +4301,11 @@ def reshape(x, newshape, ndim=None, name=None):
class Flatten(Op): class Flatten(Op):
""" """
Flatten a tensor.
Flattens a tensor to `outdim` dimensions by preserving the leading Flattens a tensor to `outdim` dimensions by preserving the leading
outdim - 1 shape components. outdim - 1 shape components.
""" """
view_map = {0: [0]} view_map = {0: [0]}
...@@ -4305,16 +4478,19 @@ def flatten(x, outdim=1): ...@@ -4305,16 +4478,19 @@ def flatten(x, outdim=1):
class Tile(Op): class Tile(Op):
""" """
DEPRECATED: use tile() instead.
Construct an array by repeating the input x according to reps pattern. Construct an array by repeating the input x according to reps pattern.
.. note:: Deprecated
Use tile() instead.
Tiles its input according to reps. The length of reps is the number of Tiles its input according to reps. The length of reps is the number of
dimension of x and contains the number of times to tile x in each dimension of x and contains the number of times to tile x in each
dimension. dimension.
:see: `numpy.tile See Also
<http://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html>`_ --------
numpy.tile : http://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html
""" """
__props__ = ("ndim",) __props__ = ("ndim",)
...@@ -4377,13 +4553,15 @@ class Tile(Op): ...@@ -4377,13 +4553,15 @@ class Tile(Op):
def tile(x, reps, ndim=None): def tile(x, reps, ndim=None):
""" """
Tile input array `x` according to `reps`. See the docstring of `numpy.tile` Tile input array `x` according to `reps`.
for details.
See the docstring of `numpy.tile` for details.
Currently, x.ndim and len(reps) must be equal, and, if specified, 'ndim' Currently, x.ndim and len(reps) must be equal, and, if specified, 'ndim'
must be equal to both. must be equal to both.
TODO: expand this. TODO: expand this.
""" """
try: try:
...@@ -4420,6 +4598,7 @@ class ARange(Op): ...@@ -4420,6 +4598,7 @@ class ARange(Op):
"""Create an array containing evenly spaced values within a given interval. """Create an array containing evenly spaced values within a given interval.
Parameters and behaviour are the same as numpy.arange(). Parameters and behaviour are the same as numpy.arange().
""" """
__props__ = ("dtype",) __props__ = ("dtype",)
...@@ -4550,13 +4729,13 @@ class _nd_grid(object): ...@@ -4550,13 +4729,13 @@ class _nd_grid(object):
to their numpy equivalents. to their numpy equivalents.
Parameters Parameters
========== ----------
sparse : boolean, optional, default=True sparse : boolean, optional, default=True
Specifying False leads to the equivalent of numpy's mgrid Specifying False leads to the equivalent of numpy's mgrid functionality.
functionality. Specifying True leads to the equivalent of ogrid. Specifying True leads to the equivalent of ogrid.
Examples Examples
======== --------
>>> a = T.mgrid[0:5, 0:3] >>> a = T.mgrid[0:5, 0:3]
>>> a[0].eval() >>> a[0].eval()
array([[0, 0, 0], array([[0, 0, 0],
...@@ -4570,7 +4749,6 @@ class _nd_grid(object): ...@@ -4570,7 +4749,6 @@ class _nd_grid(object):
[0, 1, 2], [0, 1, 2],
[0, 1, 2], [0, 1, 2],
[0, 1, 2]], dtype=int8) [0, 1, 2]], dtype=int8)
>>> b = T.ogrid[0:5, 0:3] >>> b = T.ogrid[0:5, 0:3]
>>> b[0].eval() >>> b[0].eval()
array([[0], array([[0],
...@@ -4580,6 +4758,7 @@ class _nd_grid(object): ...@@ -4580,6 +4758,7 @@ class _nd_grid(object):
[4]], dtype=int8) [4]], dtype=int8)
>>> b[1].eval() >>> b[1].eval()
array([[0, 1, 2, 3]], dtype=int8) array([[0, 1, 2, 3]], dtype=int8)
""" """
def __init__(self, sparse=False): def __init__(self, sparse=False):
...@@ -4689,11 +4868,19 @@ class PermuteRowElements(Op): ...@@ -4689,11 +4868,19 @@ class PermuteRowElements(Op):
The terminal case is reached when the current tensors are vector, The terminal case is reached when the current tensors are vector,
then the permutation contained in y is applied to x. then the permutation contained in y is applied to x.
:param x: The input tensor, on which the permutation is applied Parameters
:param y: Tensor containing the permutations to apply ----------
:param out: Tensor storing the output result x : tensor
:param curdim: Counter of the current depth of recursion The input tensor, on which the permutation is applied.
:param inverse: Wether to apply permutations or their inverse y : tensor
Tensor containing the permutations to apply.
out : tensor
Tensor storing the output result.
curdim : int
Counter of the current depth of recursion.
inverse
Wether to apply permutations or their inverse.
""" """
if len(x.shape) == 1: if len(x.shape) == 1:
# Numpy advanced indexing works in this case # Numpy advanced indexing works in this case
...@@ -4817,7 +5004,9 @@ def permute_row_elements(x, y, inverse=0): ...@@ -4817,7 +5004,9 @@ def permute_row_elements(x, y, inverse=0):
def inverse_permutation(perm): def inverse_permutation(perm):
"""Computes the inverse of permutations. """Computes the inverse of permutations.
Each row of input should contain a permutation of the first integers. Each row of input should contain a permutation of the first integers.
""" """
return permute_row_elements( return permute_row_elements(
arange(perm.shape[-1], dtype=perm.dtype), arange(perm.shape[-1], dtype=perm.dtype),
...@@ -4840,14 +5029,14 @@ class Dot(Op): ...@@ -4840,14 +5029,14 @@ class Dot(Op):
equivalent to matrix multiplication. For two vectors, this is the inner equivalent to matrix multiplication. For two vectors, this is the inner
product. product.
:note: matrix-matrix products are sometimes optimized to Dot22 or Gemm ops. Notes
(see tensor.blas) -----
Matrix-matrix products are sometimes optimized to Dot22 or Gemm ops
:note: vector-vector products are sometimes optimized to Ger or CGer. (see (see tensor.blas).
tensor.blas) Vector-vector products are sometimes optimized to Ger or CGer (see
tensor.blas).
:note: matrix-vector products are sometimes optimized to Gemv, CGemv (see Matrix-vector products are sometimes optimized to Gemv, CGemv (see
tensor.blas) tensor.blas).
""" """
__props__ = () __props__ = ()
...@@ -5031,10 +5220,12 @@ pprint.assign(_dot, printing.OperatorPrinter(printing.special['middle_dot'], ...@@ -5031,10 +5220,12 @@ pprint.assign(_dot, printing.OperatorPrinter(printing.special['middle_dot'],
def dot(a, b): def dot(a, b):
""" """
Computes the dot product of two variables. For two matrices, this is Computes the dot product of two variables.
equivalent to matrix multiplication. For two vectors, this is the inner
product. When one variable is a scalar, this is like elementwise For two matrices, this is equivalent to matrix multiplication.
multiplication. For N dimensions, this is a sum product over the last axis For two vectors, this is the inner product.
When one variable is a scalar, this is like elementwise multiplication.
For N dimensions, this is a sum product over the last axis
of the first array and the second-to-last axis of the second array: of the first array and the second-to-last axis of the second array:
dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m]) dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
...@@ -5054,14 +5245,14 @@ def dot(a, b): ...@@ -5054,14 +5245,14 @@ def dot(a, b):
3. If both a and b have either 1 or 2 dimensions, it calls Theano's 3. If both a and b have either 1 or 2 dimensions, it calls Theano's
Dot op on a and b. Dot op on a and b.
:note: matrix-matrix products are sometimes optimized to Dot22 or Gemm ops. Notes
(see tensor.blas) -----
Matrix-matrix products are sometimes optimized to Dot22 or Gemm ops
:note: vector-vector products are sometimes optimized to Ger or CGer. (see (see tensor.blas).
tensor.blas) Vector-vector products are sometimes optimized to Ger or CGer (see
tensor.blas).
:note: matrix-vector products are sometimes optimized to Gemv, CGemv (see Matrix-vector products are sometimes optimized to Gemv, CGemv (see
tensor.blas) tensor.blas).
""" """
a, b = as_tensor_variable(a), as_tensor_variable(b) a, b = as_tensor_variable(a), as_tensor_variable(b)
...@@ -5080,20 +5271,23 @@ def dot(a, b): ...@@ -5080,20 +5271,23 @@ def dot(a, b):
def tensordot(a, b, axes=2): def tensordot(a, b, axes=2):
""" """
Given two tensors a and b,tensordot computes a generalized dot product over Compute a generalized dot product over provided axes.
Given two tensors a and b, tensordot computes a generalized dot product over
the provided axes. Theano's implementation reduces all expressions to the provided axes. Theano's implementation reduces all expressions to
matrix or vector dot products and is based on code from Tijmen Tieleman's matrix or vector dot products and is based on code from Tijmen Tieleman's
gnumpy (http://www.cs.toronto.edu/~tijmen/gnumpy.html). gnumpy (http://www.cs.toronto.edu/~tijmen/gnumpy.html).
:param a: the first tensor variable Parameters
:type a: symbolic tensor ----------
a: symbolic tensor
:param b: the second tensor variable The first tensor variable.
:type b: symbolic tensor b: symbolic tensor
The second tensor variable
:param axes: an integer or array. If an integer, the number of axes axes: int or array-like of length 2
to sum over. If an array, it must have two array If an integer, the number of axes to sum over.
elements containing the axes to sum over in each tensor. If an array, it must have two array elements containing the axes
to sum over in each tensor.
Note that the default value of 2 is not guaranteed to work Note that the default value of 2 is not guaranteed to work
for all values of a and b, and an error will be raised if for all values of a and b, and an error will be raised if
...@@ -5113,13 +5307,16 @@ def tensordot(a, b, axes=2): ...@@ -5113,13 +5307,16 @@ def tensordot(a, b, axes=2):
(Remember axes are zero-indexed!) The 2nd axis of a and the (Remember axes are zero-indexed!) The 2nd axis of a and the
3rd axis of b must have the same shape; the same is true for 3rd axis of b must have the same shape; the same is true for
the 3rd axis of a and the 1st axis of b. the 3rd axis of a and the 1st axis of b.
:type axes: int or array-like of length 2
:returns: a tensor with shape equal to the concatenation of a's shape Returns
-------
symbolic tensor
A tensor with shape equal to the concatenation of a's shape
(less any dimensions that were summed over) and b's shape (less any dimensions that were summed over) and b's shape
(less any dimensions that were summed over). (less any dimensions that were summed over).
:rtype: symbolic tensor
Examples
--------
It may be helpful to consider an example to see what tensordot does. It may be helpful to consider an example to see what tensordot does.
Theano's implementation is identical to NumPy's. Here a has shape (2, 3, 4) Theano's implementation is identical to NumPy's. Here a has shape (2, 3, 4)
and b has shape (5, 6, 4, 3). The axes to sum over are [[1, 2], [3, 2]] -- and b has shape (5, 6, 4, 3). The axes to sum over are [[1, 2], [3, 2]] --
...@@ -5127,29 +5324,30 @@ def tensordot(a, b, axes=2): ...@@ -5127,29 +5324,30 @@ def tensordot(a, b, axes=2):
are compatible. The resulting tensor will have shape (2, 5, 6) -- the are compatible. The resulting tensor will have shape (2, 5, 6) -- the
dimensions that are not being summed: dimensions that are not being summed:
a = np.random.random((2,3,4)) >>> a = np.random.random((2,3,4))
b = np.random.random((5,6,4,3)) >>> b = np.random.random((5,6,4,3))
#tensordot #tensordot
c = np.tensordot(a, b, [[1,2],[3,2]]) >>> c = np.tensordot(a, b, [[1,2],[3,2]])
#loop replicating tensordot #loop replicating tensordot
a0, a1, a2 = a.shape >>> a0, a1, a2 = a.shape
b0, b1, _, _ = b.shape >>> b0, b1, _, _ = b.shape
cloop = np.zeros((a0,b0,b1)) >>> cloop = np.zeros((a0,b0,b1))
#loop over non-summed indices -- these exist #loop over non-summed indices -- these exist
#in the tensor product. #in the tensor product.
for i in range(a0): >>> for i in range(a0):
for j in range(b0): ... for j in range(b0):
for k in range(b1): ... for k in range(b1):
#loop over summed indices -- these don't exist ... #loop over summed indices -- these don't exist
#in the tensor product. ... #in the tensor product.
for l in range(a1): ... for l in range(a1):
for m in range(a2): ... for m in range(a2):
cloop[i,j,k] += a[i,l,m] * b[j,k,m,l] ... cloop[i,j,k] += a[i,l,m] * b[j,k,m,l]
np.allclose(c, cloop) #true >>> np.allclose(c, cloop)
true
This specific implementation avoids a loop by transposing a and b such that This specific implementation avoids a loop by transposing a and b such that
the summed axes of a are last and the summed axes of b are first. The the summed axes of a are last and the summed axes of b are first. The
...@@ -5160,12 +5358,16 @@ def tensordot(a, b, axes=2): ...@@ -5160,12 +5358,16 @@ def tensordot(a, b, axes=2):
In an extreme case, no axes may be specified. The resulting tensor In an extreme case, no axes may be specified. The resulting tensor
will have shape equal to the concatenation of the shapes of a and b: will have shape equal to the concatenation of the shapes of a and b:
c = np.tensordot(a, b, 0) >>> c = np.tensordot(a, b, 0)
print(a.shape) #(2,3,4) >>> print(a.shape)
print(b.shape) #(5,6,4,3) (2,3,4)
print(c.shape) #(2,3,4,5,6,4,3) >>> print(b.shape)
(5,6,4,3)
>>> print(c.shape)
(2,3,4,5,6,4,3)
See the documentation of numpy.tensordot for more examples. See the documentation of numpy.tensordot for more examples.
""" """
a, b = as_tensor_variable(a), as_tensor_variable(b) a, b = as_tensor_variable(a), as_tensor_variable(b)
...@@ -5275,6 +5477,7 @@ def outer(x, y): ...@@ -5275,6 +5477,7 @@ def outer(x, y):
"""Return vector-vector outer product. """Return vector-vector outer product.
If an input isn't a vector, we flatten it first. If an input isn't a vector, we flatten it first.
""" """
if x.ndim != 1: if x.ndim != 1:
x = x.flatten() x = x.flatten()
...@@ -5310,9 +5513,16 @@ del x ...@@ -5310,9 +5513,16 @@ del x
class Diagonal(Op): class Diagonal(Op):
"""Return specified diagonals. """Return specified diagonals.
:param x: A tensor variable with x.ndim >= 2. Parameters
----------
x
A tensor variable with x.ndim >= 2.
Returns
-------
vector
A vector representing the diagonal elements.
:return: A vector representing the diagonal elements.
""" """
__props__ = ("offset", "axis1", "axis2") __props__ = ("offset", "axis1", "axis2")
...@@ -5402,6 +5612,8 @@ def stacklists(arg): ...@@ -5402,6 +5612,8 @@ def stacklists(arg):
This function can create a tensor from a shaped list of scalars: This function can create a tensor from a shaped list of scalars:
Examples
--------
>>> from theano.tensor import stacklists, scalars, matrices >>> from theano.tensor import stacklists, scalars, matrices
>>> from theano import function >>> from theano import function
>>> a, b, c, d = scalars('abcd') >>> a, b, c, d = scalars('abcd')
...@@ -5421,6 +5633,7 @@ def stacklists(arg): ...@@ -5421,6 +5633,7 @@ def stacklists(arg):
>>> x = ones((4, 4), 'float32') >>> x = ones((4, 4), 'float32')
>>> f(x, x, x, x).shape >>> f(x, x, x, x).shape
(2, 2, 4, 4) (2, 2, 4, 4)
""" """
if isinstance(arg, (tuple, list)): if isinstance(arg, (tuple, list)):
return stack(*list(map(stacklists, arg))) return stack(*list(map(stacklists, arg)))
...@@ -5434,12 +5647,18 @@ def ptp(a, axis=None): ...@@ -5434,12 +5647,18 @@ def ptp(a, axis=None):
The name of the function comes from the acronym for peak to peak. The name of the function comes from the acronym for peak to peak.
:param a : Input tensor. Parameters
----------
a
Input tensor.
axis
Axis along which to find the peaks. By default, flatten the array.
:param axis : Axis along which to find the peaks. By default, Returns
flatten the array. -------
array
A new array holding the result.
:return : A new array holding the result.
""" """
a = as_tensor_variable(a) a = as_tensor_variable(a)
...@@ -5495,28 +5714,36 @@ def choose(a, choices, out=None, mode='raise'): ...@@ -5495,28 +5714,36 @@ def choose(a, choices, out=None, mode='raise'):
negative integers are mapped to 0; values greater than n-1 are mapped negative integers are mapped to 0; values greater than n-1 are mapped
to n-1; and then the new array is constructed as above. to n-1; and then the new array is constructed as above.
:Parameter: *a* - int array Parameters
----------
a : int array
This array must contain integers in [0, n-1], where n is the number of This array must contain integers in [0, n-1], where n is the number of
choices, unless mode=wrap or mode=clip, in which cases any integers choices, unless mode=wrap or mode=clip, in which cases any integers
are permissible. are permissible.
:Parameter: *choices* - sequence of arrays choices : sequence of arrays
Choice arrays. a and all of the choices must be broadcastable to Choice arrays. a and all of the choices must be broadcastable to
the same shape. If choices is itself an array (not recommended), the same shape. If choices is itself an array (not recommended),
then its outermost dimension (i.e., the one corresponding to then its outermost dimension (i.e., the one corresponding to
choices.shape[0]) is taken as defining the ``sequence``. choices.shape[0]) is taken as defining the ``sequence``.
:Parameter: *out* - array, optional out : array, optional
If provided, the result will be inserted into this array. If provided, the result will be inserted into this array.
It should be of the appropriate shape and dtype. It should be of the appropriate shape and dtype.
:Parameter: *mode* - {``raise`` (default), ``wrap``, ``clip``}, optional mode : {``raise`` (default), ``wrap``, ``clip``}, optional
Specifies how indices outside [0, n-1] will be treated: Specifies how indices outside [0, n-1] will be treated:
``raise`` : an exception is raised ``raise`` : an exception is raised
``wrap`` : value becomes value mod n ``wrap`` : value becomes value mod n
``clip`` : values < 0 are mapped to 0, values > n-1 are mapped to n-1 ``clip`` : values < 0 are mapped to 0, values > n-1 are mapped to n-1
:Returns: merged_array - array
Returns
-------
merged_array - array
The merged result. The merged result.
:Raises:
Raises
------
ValueError - shape mismatch ValueError - shape mismatch
If a and each choice array are not all broadcastable to the same shape. If a and each choice array are not all broadcastable to the same shape.
""" """
# This is done to keep the same function signature then NumPy. # This is done to keep the same function signature then NumPy.
assert out is None assert out is None
...@@ -5609,6 +5836,7 @@ class Choose(Op): ...@@ -5609,6 +5836,7 @@ class Choose(Op):
class AllocEmpty(gof.Op): class AllocEmpty(gof.Op):
"""Implement Alloc on the cpu, but without initializing memory.""" """Implement Alloc on the cpu, but without initializing memory."""
__props__ = ("dtype",) __props__ = ("dtype",)
# specify the type of the data # specify the type of the data
......
...@@ -17,10 +17,12 @@ There are four kinds of BLAS Ops in Theano: ...@@ -17,10 +17,12 @@ There are four kinds of BLAS Ops in Theano:
- C-based (blas_c) - C-based (blas_c)
- CUDA-based (theano.sandbox.cuda.blas) - CUDA-based (theano.sandbox.cuda.blas)
:note: Unfortunately (because it's confusing) this file currently contains Ops Notes
that contain both Python and C versions. I think it would be better to -----
move the C implementations to blas_c so that this file is pure Python. Unfortunately (because it's confusing) this file currently contains Ops
-JB that contain both Python and C versions. I think it would be better to
move the C implementations to blas_c so that this file is pure Python.
-JB
Ops Ops
...@@ -121,7 +123,6 @@ Specialize Gemm to Gemv ...@@ -121,7 +123,6 @@ Specialize Gemm to Gemv
If arguments to GEMM are dimshuffled vectors, then we can use GEMV If arguments to GEMM are dimshuffled vectors, then we can use GEMV
instead. This optimization is `local_gemm_to_gemv`. instead. This optimization is `local_gemm_to_gemv`.
""" """
from __future__ import print_function from __future__ import print_function
import copy import copy
...@@ -359,7 +360,9 @@ class Gemv(Op): ...@@ -359,7 +360,9 @@ class Gemv(Op):
x, y are vectors x, y are vectors
alpha, beta are scalars alpha, beta are scalars
output is a vector that can be inplace on y output is a vector that can be inplace on y
""" """
__props__ = ("inplace",) __props__ = ("inplace",)
def __init__(self, inplace): def __init__(self, inplace):
...@@ -443,12 +446,13 @@ class Ger(Op): ...@@ -443,12 +446,13 @@ class Ger(Op):
for matrix A, scalar alpha, vectors x and y. for matrix A, scalar alpha, vectors x and y.
This interface to GER allows non-destructive operation on A via the This interface to GER allows non-destructive operation on A via the
`destructive` `destructive` argument to the constructor.
argument to the constructor.
:TODO: Create better classes ScipyGer and CGer that inherit from this class :TODO: Create better classes ScipyGer and CGer that inherit from this class
and override the make_thunk() method to use Scipy and C respectively. and override the make_thunk() method to use Scipy and C respectively.
""" """
__props__ = ("destructive",) __props__ = ("destructive",)
def __init__(self, destructive): def __init__(self, destructive):
...@@ -508,16 +512,22 @@ def ldflags(libs=True, flags=False, libs_dir=False, include_dir=False): ...@@ -508,16 +512,22 @@ def ldflags(libs=True, flags=False, libs_dir=False, include_dir=False):
It returns a list of libraries against which an Op's object file It returns a list of libraries against which an Op's object file
should be linked to benefit from a BLAS implementation. should be linked to benefit from a BLAS implementation.
:type libs: bool, defaults to True Parameters
:param libs: extract flags starting with "-l" ----------
:type libs_dir: bool, defaults to False libs : bool, optional
:param libs_dir: extract flags starting with "-L" Extract flags starting with "-l" (the default is True).
:type include_dir: bool, defaults to False libs_dir : bool, optional
:param include_dir: extract flags starting with "-I" Extract flags starting with "-L" (the default is False).
:type flags: bool, defaults to False include_dir : bool, optional
:param flags: extract all the other flags Extract flags starting with "-I" (the default is False).
:rtype: list of strings flags: bool, optional
:returns: extracted flags Extract all the other flags (the default is False).
Returns
-------
list of strings
Extracted flags.
""" """
ldflags_str = theano.config.blas.ldflags ldflags_str = theano.config.blas.ldflags
return _ldflags(ldflags_str=ldflags_str, return _ldflags(ldflags_str=ldflags_str,
...@@ -533,19 +543,25 @@ def _ldflags(ldflags_str, libs, flags, libs_dir, include_dir): ...@@ -533,19 +543,25 @@ def _ldflags(ldflags_str, libs, flags, libs_dir, include_dir):
Depending on the options, different type of flags will be kept. Depending on the options, different type of flags will be kept.
:type ldflags_str: string Parameters
:param ldflags_str: the string to process. Typically, this will ----------
be the content of `theano.config.blas.ldflags` ldflags_str : string
:type libs: bool The string to process. Typically, this will be the content of
:param libs: extract flags starting with "-l" `theano.config.blas.ldflags`.
:type libs_dir: bool libs : bool
:param libs_dir: extract flags starting with "-L" Extract flags starting with "-l".
:type include_dir: bool flags: bool
:param include_dir: extract flags starting with "-I" Extract all the other flags.
:type flags: bool libs_dir: bool
:param flags: extract all the other flags Extract flags starting with "-L".
:rtype: list of strings include_dir: bool
:returns: extracted flags Extract flags starting with "-I".
Returns
-------
list of strings
Extracted flags.
""" """
rval = [] rval = []
if libs_dir: if libs_dir:
...@@ -598,10 +614,12 @@ def _ldflags(ldflags_str, libs, flags, libs_dir, include_dir): ...@@ -598,10 +614,12 @@ def _ldflags(ldflags_str, libs, flags, libs_dir, include_dir):
class GemmRelated(Op): class GemmRelated(Op):
"""Base class for Gemm and Dot22 """Base class for Gemm and Dot22.
This class provides a kind of templated gemm Op. This class provides a kind of templated gemm Op.
""" """
__props__ = () __props__ = ()
def c_support_code(self): def c_support_code(self):
...@@ -915,7 +933,7 @@ class GemmRelated(Op): ...@@ -915,7 +933,7 @@ class GemmRelated(Op):
class Gemm(GemmRelated): class Gemm(GemmRelated):
"""In-place version of matrix-matrix multiplication (with accumulation): """In-place version of matrix-matrix multiplication (with accumulation).
When a and b are scalars and x, y, and z are matrices, then When a and b are scalars and x, y, and z are matrices, then
...@@ -936,6 +954,7 @@ class Gemm(GemmRelated): ...@@ -936,6 +954,7 @@ class Gemm(GemmRelated):
optimized linear algebra operations.) optimized linear algebra operations.)
""" """
E_rank = 'gemm only works for rank 2' E_rank = 'gemm only works for rank 2'
E_scalar = 'gemm requires scalar argument' E_scalar = 'gemm requires scalar argument'
E_z_uniq = 'argument z aliased to x or y' # TODO: justify / delete this E_z_uniq = 'argument z aliased to x or y' # TODO: justify / delete this
...@@ -1430,9 +1449,10 @@ def _factor_canonicalized(lst): ...@@ -1430,9 +1449,10 @@ def _factor_canonicalized(lst):
def _gemm_from_factored_list(lst): def _gemm_from_factored_list(lst):
"""Returns None, or a list to replace node.outputs
""" """
Returns None, or a list to replace node.outputs.
"""
lst2 = [] lst2 = []
# Remove the tuple that can't be cast correctly. # Remove the tuple that can't be cast correctly.
# This can happen when we try to cast a complex to a real # This can happen when we try to cast a complex to a real
...@@ -1524,7 +1544,7 @@ def _gemm_from_node2(node): ...@@ -1524,7 +1544,7 @@ def _gemm_from_node2(node):
class GemmOptimizer(Optimizer): class GemmOptimizer(Optimizer):
"""Graph optimizer for inserting Gemm operations""" """Graph optimizer for inserting Gemm operations."""
def __init__(self): def __init__(self):
Optimizer.__init__(self) Optimizer.__init__(self)
self.warned = False self.warned = False
...@@ -1645,8 +1665,11 @@ class GemmOptimizer(Optimizer): ...@@ -1645,8 +1665,11 @@ class GemmOptimizer(Optimizer):
class Dot22(GemmRelated): class Dot22(GemmRelated):
"""Compute a matrix-matrix product. """Compute a matrix-matrix product.
This is a specialization of the more general Dot()
This is a specialization of the more general Dot().
""" """
def make_node(self, x, y): def make_node(self, x, y):
dtypes = ('float32', 'float64', 'complex64', 'complex128') dtypes = ('float32', 'float64', 'complex64', 'complex128')
if x.type.ndim != 2 or x.type.dtype not in dtypes: if x.type.ndim != 2 or x.type.dtype not in dtypes:
...@@ -1780,8 +1803,7 @@ def local_inplace_ger(node): ...@@ -1780,8 +1803,7 @@ def local_inplace_ger(node):
@local_optimizer([gemm_no_inplace]) @local_optimizer([gemm_no_inplace])
def local_gemm_to_gemv(node): def local_gemm_to_gemv(node):
"""GEMM acting on row or column matrices -> GEMV """GEMM acting on row or column matrices -> GEMV."""
"""
if node.op == gemm_no_inplace: if node.op == gemm_no_inplace:
z, a, x, y, b = node.inputs z, a, x, y, b = node.inputs
if z.broadcastable == x.broadcastable == (True, False): if z.broadcastable == x.broadcastable == (True, False):
...@@ -1794,8 +1816,7 @@ def local_gemm_to_gemv(node): ...@@ -1794,8 +1816,7 @@ def local_gemm_to_gemv(node):
@local_optimizer([gemm_no_inplace]) @local_optimizer([gemm_no_inplace])
def local_gemm_to_ger(node): def local_gemm_to_ger(node):
"""GEMM computing an outer-product -> GER """GEMM computing an outer-product -> GER."""
"""
if node.op == gemm_no_inplace: if node.op == gemm_no_inplace:
z, a, x, y, b = node.inputs z, a, x, y, b = node.inputs
if x.broadcastable[1] and y.broadcastable[0]: if x.broadcastable[1] and y.broadcastable[0]:
...@@ -1825,8 +1846,7 @@ def local_gemm_to_ger(node): ...@@ -1825,8 +1846,7 @@ def local_gemm_to_ger(node):
# working # working
@local_optimizer([_dot22]) @local_optimizer([_dot22])
def local_dot22_to_ger_or_gemv(node): def local_dot22_to_ger_or_gemv(node):
"""dot22 computing an outer-product -> GER """dot22 computing an outer-product -> GER."""
"""
if node.op == _dot22: if node.op == _dot22:
x, y = node.inputs x, y = node.inputs
xb = x.broadcastable xb = x.broadcastable
...@@ -1904,11 +1924,14 @@ optdb.register('InplaceBlasOpt', ...@@ -1904,11 +1924,14 @@ optdb.register('InplaceBlasOpt',
class Dot22Scalar(GemmRelated): class Dot22Scalar(GemmRelated):
"""Compute a matrix-matrix product. """Compute a matrix-matrix product.
This is a specialization of the more general Dot() This is a specialization of the more general Dot()
Used to call optimized gemm implementation. Used to call optimized gemm implementation.
Also used to generate a gemm later. Also used to generate a gemm later.
compute scalar*dot(x,y) compute scalar*dot(x,y).
""" """
def make_node(self, x, y, a): def make_node(self, x, y, a):
if a.ndim != 0: if a.ndim != 0:
raise TypeError(Gemm.E_scalar, a) raise TypeError(Gemm.E_scalar, a)
...@@ -1996,25 +2019,27 @@ _dot22scalar = Dot22Scalar() ...@@ -1996,25 +2019,27 @@ _dot22scalar = Dot22Scalar()
@local_optimizer([T.mul]) @local_optimizer([T.mul])
def local_dot22_to_dot22scalar(node): def local_dot22_to_dot22scalar(node):
""" """
:note: Previous attempts to alter this optimization to replace dot22 with Notes
-----
Previous attempts to alter this optimization to replace dot22 with
gemm instead of dot22scalar resulted in some Scan nodes being gemm instead of dot22scalar resulted in some Scan nodes being
duplicated and the ScanSaveMem optimization never running on them, duplicated and the ScanSaveMem optimization never running on them,
resulting in highly increased memory usage. Until this issue is resulting in highly increased memory usage. Until this issue is
resolved, this optimization should keep using dot22scalar instead of resolved, this optimization should keep using dot22scalar instead of
gemm. gemm.
:note: we upcast the scalar if after the multiplication with the We upcast the scalar if after the multiplication with the dot this give
dot this give the same type. the same type.
.. note: We execute this optimizer after the gemm optimizer. This We execute this optimizer after the gemm optimizer. This
allow to give more priority to gemm that give more speed up allow to give more priority to gemm that give more speed up
then this optimizer, but allow the gemm optimizer to ignore then this optimizer, but allow the gemm optimizer to ignore
this op. this op.
TODO: support when we can reorder the mul to generate a TODO: support when we can reorder the mul to generate a
dot22scalar or fix the canonizer to merge them(1 mul with multiple dot22scalar or fix the canonizer to merge them(1 mul with multiple
inputs) inputs)
""" """
if node.op != T.mul: if node.op != T.mul:
return False return False
...@@ -2102,7 +2127,6 @@ def local_dot22_to_dot22scalar(node): ...@@ -2102,7 +2127,6 @@ def local_dot22_to_dot22scalar(node):
return [T.mul(_dot22scalar(d.owner.inputs[0], return [T.mul(_dot22scalar(d.owner.inputs[0],
d.owner.inputs[1], a), *o)] d.owner.inputs[1], a), *o)]
# must happen after gemm as the gemm optimizer don't understant # must happen after gemm as the gemm optimizer don't understant
# dot22scalar and gemm give more speed up then dot22scalar # dot22scalar and gemm give more speed up then dot22scalar
blas_optdb.register('local_dot22_to_dot22scalar', blas_optdb.register('local_dot22_to_dot22scalar',
......
""" Header text for the C and Fortran BLAS interfaces. """ Header text for the C and Fortran BLAS interfaces.
There is no standard name or location for this header, so we just insert it There is no standard name or location for this header, so we just insert it
ourselves into the C code ourselves into the C code.
""" """
import logging import logging
import textwrap import textwrap
...@@ -32,6 +33,7 @@ def detect_macos_sdot_bug(): ...@@ -32,6 +33,7 @@ def detect_macos_sdot_bug():
detected. Its value is returned by the function detected. Its value is returned by the function
- detect_macos_sdot_bug.fix_works will be set to True if the fix was - detect_macos_sdot_bug.fix_works will be set to True if the fix was
attempted, and succeeded. attempted, and succeeded.
""" """
_logger.debug('Starting detection of bug in Mac OS BLAS sdot_ routine') _logger.debug('Starting detection of bug in Mac OS BLAS sdot_ routine')
if detect_macos_sdot_bug.tested: if detect_macos_sdot_bug.tested:
......
...@@ -62,7 +62,30 @@ class DimShuffle(Op): ...@@ -62,7 +62,30 @@ class DimShuffle(Op):
dimension and a numerical index represents the dimension of the same dimension and a numerical index represents the dimension of the same
rank in the tensor passed to perform. rank in the tensor passed to perform.
Examples: Parameters
----------
input_broadcastable
The expected broadcastable pattern of the input
new_order
A list representing the relationship between the input's
dimensions and the output's dimensions. Each element of the
list can either be an index or 'x'. Indices must be encoded
as python integers, not theano symbolic integers.
inplace : bool, optional
If True, the output will be a view of the input.
If False (default), the output will be a copy of the input.
If j = new_order[i] is an index, the output's ith dimension
will be the input's jth dimension.
If new_order[i] is 'x', the output's ith dimension will
be 1 and Broadcast operations will be allowed to do broadcasting
over that dimension.
If input.broadcastable[i] == False then i must be found in new_order.
Broadcastable dimensions, on the other hand, can be discarded.
Extended Summary
----------------
DimShuffle((False, False, False), ['x', 2, 'x', 0, 1]) DimShuffle((False, False, False), ['x', 2, 'x', 0, 1])
This op will only work on 3d tensors with no broadcastable This op will only work on 3d tensors with no broadcastable
...@@ -81,7 +104,7 @@ class DimShuffle(Op): ...@@ -81,7 +104,7 @@ class DimShuffle(Op):
If the tensor has shape (1, 20), the resulting tensor will have shape If the tensor has shape (1, 20), the resulting tensor will have shape
(20, ). (20, ).
More examples: More examples :
DimShuffle((), ['x']) -> make a 0d (scalar) into a 1d vector DimShuffle((), ['x']) -> make a 0d (scalar) into a 1d vector
DimShuffle((False, False), [0, 1]) -> identity DimShuffle((False, False), [0, 1]) -> identity
DimShuffle((False, False), [1, 0]) -> inverts the 1st and 2nd dimensions DimShuffle((False, False), [1, 0]) -> inverts the 1st and 2nd dimensions
...@@ -96,33 +119,13 @@ class DimShuffle(Op): ...@@ -96,33 +119,13 @@ class DimShuffle(Op):
The reordering of the dimensions can be done in numpy with the The reordering of the dimensions can be done in numpy with the
transpose function. transpose function.
Adding, subtracting dimensions can be done with reshape. Adding, subtracting dimensions can be done with reshape.
""" """
_f16_ok = True _f16_ok = True
check_input = False check_input = False
def __init__(self, input_broadcastable, new_order, inplace=False): def __init__(self, input_broadcastable, new_order, inplace=False):
"""
Usage: DimShuffle(input_broadcastable, new_order, inplace = False)
- input_broadcastable: the expected broadcastable pattern of the
input
- new_order: a list representing the relationship between the
input's dimensions and the output's dimensions. Each
element of the list can either be an index or 'x'.
Indices must be encoded as python integers, not
theano symbolic integers.
- inplace: if True, the output will be a view of the input.
If False, the output will be a copy of the input.
If j = new_order[i] is an index, the output's ith dimension
will be the input's jth dimension.
If new_order[i] is 'x', the output's ith dimension will
be 1 and Broadcast operations will be allowed to do broadcasting
over that dimension.
If input.broadcastable[i] == False then i must be found in new_order.
Broadcastable dimensions, on the other hand, can be discarded.
"""
input_broadcastable = tuple(input_broadcastable) input_broadcastable = tuple(input_broadcastable)
self.input_broadcastable = input_broadcastable self.input_broadcastable = input_broadcastable
new_order = tuple(new_order) new_order = tuple(new_order)
...@@ -456,7 +459,26 @@ class Elemwise(OpenMPOp): ...@@ -456,7 +459,26 @@ class Elemwise(OpenMPOp):
be the same as the corresponding input type (see the doc of be the same as the corresponding input type (see the doc of
scalar.ScalarOp to get help about controlling the output type) scalar.ScalarOp to get help about controlling the output type)
Examples: Parameters
-----------
scalar_op
An instance of a subclass of scalar.ScalarOp which works uniquely
on scalars.
inplace_pattern
A dictionary that maps the index of an output to the
index of an input so the output is calculated inplace using
the input's storage. (Just like destroymap, but without the lists.)
nfunc_spec
Either None or a tuple of three elements,
(nfunc_name, nin, nout) such that getattr(numpy, nfunc_name)
implements this operation, takes nin inputs and nout outputs.
Note that nin cannot always be inferred from the scalar op's
own nin field because that value is sometimes 0 (meaning a
variable number of inputs), whereas the numpy function may
not have varargs.
Examples
--------
Elemwise(add) # represents + on tensors (x + y) Elemwise(add) # represents + on tensors (x + y)
Elemwise(add, {0 : 0}) # represents the += operation (x += y) Elemwise(add, {0 : 0}) # represents the += operation (x += y)
Elemwise(add, {0 : 1}) # represents += on the second argument (y += x) Elemwise(add, {0 : 1}) # represents += on the second argument (y += x)
...@@ -466,26 +488,11 @@ class Elemwise(OpenMPOp): ...@@ -466,26 +488,11 @@ class Elemwise(OpenMPOp):
# second dimension # second dimension
Elemwise(int_div)(rand(1, 5), rand(10, 1)) # the output has size (10, 5) Elemwise(int_div)(rand(1, 5), rand(10, 1)) # the output has size (10, 5)
Elemwise(log)(rand(3, 4, 5)) Elemwise(log)(rand(3, 4, 5))
""" """
def __init__(self, scalar_op, inplace_pattern=None, name=None, def __init__(self, scalar_op, inplace_pattern=None, name=None,
nfunc_spec=None, openmp=None): nfunc_spec=None, openmp=None):
"""
Usage: Elemwise(scalar_op, inplace_pattern = {})
* scalar_op: an instance of a subclass of scalar.ScalarOp which works
uniquely on scalars
* inplace_pattern: a dictionary that maps the index of an output to the
index of an input so the output is calculated inplace using
the input's storage. (Just like destroymap, but without the lists.)
* nfunc_spec: either None or a tuple of three elements,
(nfunc_name, nin, nout) such that getattr(numpy, nfunc_name)
implements this operation, takes nin inputs and nout outputs.
Note that nin cannot always be inferred from the scalar op's
own nin field because that value is sometimes 0 (meaning a
variable number of inputs), whereas the numpy function may
not have varargs.
"""
if inplace_pattern is None: if inplace_pattern is None:
inplace_pattern = {} inplace_pattern = {}
self.name = name self.name = name
...@@ -1252,7 +1259,18 @@ class CAReduce(Op): ...@@ -1252,7 +1259,18 @@ class CAReduce(Op):
dimensions. It will contain the variable of accumulating all values dimensions. It will contain the variable of accumulating all values
over the reduced dimensions using the specified scalar op. over the reduced dimensions using the specified scalar op.
Examples: Parameters
----------
scalar_op
A binary scalar op with only one output.
It must be commutative and associative.
axis
- The dimension along which we want to reduce
- List of dimensions that we want to reduce
- If None, all dimensions are reduced
Examples
--------
CAReduce(add) -> sum (ie, acts like the numpy sum operation) CAReduce(add) -> sum (ie, acts like the numpy sum operation)
CAReduce(mul) -> product CAReduce(mul) -> product
CAReduce(maximum) -> max CAReduce(maximum) -> max
...@@ -1270,18 +1288,10 @@ class CAReduce(Op): ...@@ -1270,18 +1288,10 @@ class CAReduce(Op):
operation represented by the reduction must be both commutative operation represented by the reduction must be both commutative
and associative (eg add, multiply, maximum, binary or/and/xor - but not and associative (eg add, multiply, maximum, binary or/and/xor - but not
subtract, divide or power). subtract, divide or power).
"""
def __init__(self, scalar_op, axis=None):
""" """
Usage: CAReduce(scalar_op, axis = None)
* scalar_op: a binary scalar op with only one output. def __init__(self, scalar_op, axis=None):
It must be commutative and associative.
* axis: - the dimension along which we want to reduce
- list of dimensions that we want to reduce
- if None, all dimensions are reduced
"""
if scalar_op.nin not in [-1, 2] or scalar_op.nout != 1: if scalar_op.nin not in [-1, 2] or scalar_op.nout != 1:
raise NotImplementedError(( raise NotImplementedError((
"CAReduce only supports binary functions with a single " "CAReduce only supports binary functions with a single "
...@@ -1656,8 +1666,10 @@ class All(CAReduce): ...@@ -1656,8 +1666,10 @@ class All(CAReduce):
""" Applies `bitwise and` to all the values of a tensor along the """ Applies `bitwise and` to all the values of a tensor along the
specified axis(es). specified axis(es).
Equivalent to CAReduce(scalar.and_, axis=axis) Equivalent to CAReduce(scalar.and_, axis=axis).
""" """
def __init__(self, axis=None): def __init__(self, axis=None):
CAReduce.__init__(self, scalar.and_, axis) CAReduce.__init__(self, scalar.and_, axis)
...@@ -1686,8 +1698,10 @@ class Any(CAReduce): ...@@ -1686,8 +1698,10 @@ class Any(CAReduce):
""" Applies `bitwise or` to all the values of a tensor along the """ Applies `bitwise or` to all the values of a tensor along the
specified axis(es). specified axis(es).
Equivalent to CAReduce(scalar.or_, axis=axis) Equivalent to CAReduce(scalar.or_, axis=axis).
""" """
def __init__(self, axis=None): def __init__(self, axis=None):
CAReduce.__init__(self, scalar.or_, axis) CAReduce.__init__(self, scalar.or_, axis)
...@@ -1727,22 +1741,21 @@ class CAReduceDtype(CAReduce): ...@@ -1727,22 +1741,21 @@ class CAReduceDtype(CAReduce):
If no dtype is provided, one will be inferred so as not to lose If no dtype is provided, one will be inferred so as not to lose
too much precision. too much precision.
"""
def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None): Parameters
""" ----------
Usage: CAReduceDtype(scalar_op, axis=None, dtype=None, acc_dtype=None) scalar_op
A binary scalar op with only one output.
:param scalar_op: a binary scalar op with only one output.
It must be commutative and associative. It must be commutative and associative.
:param axis: - the dimension along which we want to reduce axis
- the dimension along which we want to reduce
- list of dimensions that we want to reduce - list of dimensions that we want to reduce
- if None, all dimensions are reduced - if None, all dimensions are reduced
:param dtype: The dtype of the returned dtype
tensor. If None, then we use the default dtype which is the same The dtype of the returned tensor. If None, then we use the default
as the input tensor's dtype except when: dtype which is the same as the input tensor's dtype except when:
- the input dtype is a signed integer of precision < 64 bit, in - the input dtype is a signed integer of precision < 64 bit, in
which case we use int64 which case we use int64
- the input dtype is an unsigned integer of precision < 64 bit, in - the input dtype is an unsigned integer of precision < 64 bit, in
...@@ -1752,7 +1765,8 @@ class CAReduceDtype(CAReduce): ...@@ -1752,7 +1765,8 @@ class CAReduceDtype(CAReduce):
uses the default machine integer while we always use 64 bit uses the default machine integer while we always use 64 bit
integers to avoid platform-dependent behavior). integers to avoid platform-dependent behavior).
:param acc_dtype: The dtype of the internal accumulator. acc_dtype
The dtype of the internal accumulator.
If None (default), we use the dtype in the list below, If None (default), we use the dtype in the list below,
or the input dtype if its precision is higher: or the input dtype if its precision is higher:
- for int dtypes, we use at least int64; - for int dtypes, we use at least int64;
...@@ -1761,6 +1775,8 @@ class CAReduceDtype(CAReduce): ...@@ -1761,6 +1775,8 @@ class CAReduceDtype(CAReduce):
- for complex dtypes, we use at least complex128. - for complex dtypes, we use at least complex128.
""" """
def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None):
CAReduce.__init__(self, scalar_op, axis=axis) CAReduce.__init__(self, scalar_op, axis=axis)
self.dtype = dtype self.dtype = dtype
self.acc_dtype = acc_dtype self.acc_dtype = acc_dtype
...@@ -1888,17 +1904,16 @@ class Sum(CAReduceDtype): ...@@ -1888,17 +1904,16 @@ class Sum(CAReduceDtype):
Equivalent to CAReduceDtype(scalar.add, axis=axis, dtype=dtype), Equivalent to CAReduceDtype(scalar.add, axis=axis, dtype=dtype),
with the difference that this defines the gradient of sum wrt its with the difference that this defines the gradient of sum wrt its
tensor input. tensor input.
"""
def __init__(self, axis=None, dtype=None, acc_dtype=None):
"""
Constructor.
:param axis: Axis(es) along which the tensor should be summed Parameters
----------
axis
Axis(es) along which the tensor should be summed
(use None to sum over all axes, and a list or tuple to sum along more (use None to sum over all axes, and a list or tuple to sum along more
than one axis). than one axis).
:param dtype: The dtype of the internal accumulator and returned dtype
The dtype of the internal accumulator and returned
tensor. If None, then we use the default dtype which is the same as the tensor. If None, then we use the default dtype which is the same as the
input tensor's dtype except when: input tensor's dtype except when:
- the input dtype is a signed integer of precision < 64 bit, in - the input dtype is a signed integer of precision < 64 bit, in
...@@ -1907,14 +1922,18 @@ class Sum(CAReduceDtype): ...@@ -1907,14 +1922,18 @@ class Sum(CAReduceDtype):
which case we use uint64 which case we use uint64
This value does not depend on the value of "acc_dtype". This value does not depend on the value of "acc_dtype".
:param acc_dtype: The dtype of the internal accumulator. acc_dtype
The dtype of the internal accumulator.
If None (default), we use the dtype in the list below, If None (default), we use the dtype in the list below,
or the input dtype if its precision is higher: or the input dtype if its precision is higher:
- for int dtypes, we use at least int64; - for int dtypes, we use at least int64;
- for uint dtypes, we use at least uint64; - for uint dtypes, we use at least uint64;
- for float dtypes, we use at least float64; - for float dtypes, we use at least float64;
- for complex dtypes, we use at least complex128. - for complex dtypes, we use at least complex128.
""" """
def __init__(self, axis=None, dtype=None, acc_dtype=None):
CAReduceDtype.__init__(self, scalar.add, axis=axis, CAReduceDtype.__init__(self, scalar.add, axis=axis,
dtype=dtype, acc_dtype=acc_dtype) dtype=dtype, acc_dtype=acc_dtype)
...@@ -1960,7 +1979,9 @@ class Prod(CAReduceDtype): ...@@ -1960,7 +1979,9 @@ class Prod(CAReduceDtype):
Equivalent to CAReduce(scalar.prod, axis = axis), with the Equivalent to CAReduce(scalar.prod, axis = axis), with the
difference that this defines the gradient of prod wrt its tensor difference that this defines the gradient of prod wrt its tensor
input. input.
""" """
def __init__(self, axis=None, dtype=None, acc_dtype=None, def __init__(self, axis=None, dtype=None, acc_dtype=None,
no_zeros_in_input=False): no_zeros_in_input=False):
CAReduceDtype.__init__(self, scalar.mul, axis=axis, CAReduceDtype.__init__(self, scalar.mul, axis=axis,
...@@ -1982,7 +2003,7 @@ class Prod(CAReduceDtype): ...@@ -1982,7 +2003,7 @@ class Prod(CAReduceDtype):
hash(self.no_zeros_in_input)) hash(self.no_zeros_in_input))
def grad(self, inp, grads): def grad(self, inp, grads):
''' """
The grad of this Op could be very easy, if it is was not for the case The grad of this Op could be very easy, if it is was not for the case
where zeros are present in a given "group" (ie. elements reduced where zeros are present in a given "group" (ie. elements reduced
together to form the product). together to form the product).
...@@ -2026,7 +2047,8 @@ class Prod(CAReduceDtype): ...@@ -2026,7 +2047,8 @@ class Prod(CAReduceDtype):
I do this by first counting the number of zeros in each group (see I do this by first counting the number of zeros in each group (see
the "T.eq()" bits), then taking this or that behavior (see T.switch) the "T.eq()" bits), then taking this or that behavior (see T.switch)
based on the result of this count. based on the result of this count.
'''
"""
prod_in, = inp prod_in, = inp
gz, = grads gz, = grads
......
...@@ -5,8 +5,8 @@ import theano ...@@ -5,8 +5,8 @@ import theano
def make_declare(loop_orders, dtypes, sub): def make_declare(loop_orders, dtypes, sub):
""" """
Produce code to declare all necessary variables. Produce code to declare all necessary variables.
"""
"""
decl = "" decl = ""
for i, (loop_order, dtype) in enumerate(zip(loop_orders, dtypes)): for i, (loop_order, dtype) in enumerate(zip(loop_orders, dtypes)):
var = sub['lv%i' % i] # input name corresponding to ith loop variable var = sub['lv%i' % i] # input name corresponding to ith loop variable
...@@ -117,8 +117,11 @@ def make_checks(loop_orders, dtypes, sub): ...@@ -117,8 +117,11 @@ def make_checks(loop_orders, dtypes, sub):
def make_alloc(loop_orders, dtype, sub, fortran='0'): def make_alloc(loop_orders, dtype, sub, fortran='0'):
"""Generate C code to allocate outputs. """Generate C code to allocate outputs.
:param fortran: a string included in the generated code. If it Parameters
evaludate to non-zero, an ndarray in fortran order will be ----------
fortran : str
A string included in the generated code. If it
evaluate to non-zero, an ndarray in fortran order will be
created, otherwise it will be c order. created, otherwise it will be c order.
""" """
...@@ -179,24 +182,23 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub, openmp=None): ...@@ -179,24 +182,23 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub, openmp=None):
Make a nested loop over several arrays and associate specific code Make a nested loop over several arrays and associate specific code
to each level of nesting. to each level of nesting.
@type loop_orders: list of N tuples of length M. Parameters
@param loop_orders: Each value of each ----------
tuple can be either the index of a dimension to loop over or loop_orders : list of N tuples of length M
the letter 'x' which means there is no looping to be done Each value of each tuple can be either the index of a dimension to
loop over or the letter 'x' which means there is no looping to be done
over that variable at that point (in other words we broadcast over that variable at that point (in other words we broadcast
over that dimension). If an entry is an integer, it will become over that dimension). If an entry is an integer, it will become
an alias of the entry of that rank. an alias of the entry of that rank.
loop_tasks : list of M+1 pieces of code
@type loop_tasks: list of M+1 pieces of code. The ith loop_task is a pair of strings, the first
@param loop_tasks: The ith loop_task is a pair of strings, the first
string is code to be executed before the ith loop starts, the second string is code to be executed before the ith loop starts, the second
one contains code to be executed just before going to the next element one contains code to be executed just before going to the next element
of the ith dimension. of the ith dimension.
The last element if loop_tasks is a single string, containing code The last element if loop_tasks is a single string, containing code
to be executed at the very end. to be executed at the very end.
sub : dictionary
@type sub: a dictionary. Maps 'lv#' to a suitable variable name.
@param sub: Maps 'lv#' to a suitable variable name.
The 'lvi' variable corresponds to the ith element of loop_orders. The 'lvi' variable corresponds to the ith element of loop_orders.
""" """
...@@ -244,8 +246,9 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub, openmp=None): ...@@ -244,8 +246,9 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub, openmp=None):
return "{%s}" % s return "{%s}" % s
def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub, openmp=None): def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub,
'''A bit like make_loop, but when only the inner-most loop executes code. openmp=None):
"""A bit like make_loop, but when only the inner-most loop executes code.
All the loops will be reordered so that the loops over the output tensor All the loops will be reordered so that the loops over the output tensor
are executed with memory access as contiguous as possible. are executed with memory access as contiguous as possible.
...@@ -253,7 +256,8 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub, op ...@@ -253,7 +256,8 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub, op
will be on its rows; if it's f_contiguous, it will be on its columns. will be on its rows; if it's f_contiguous, it will be on its columns.
The output tensor's index among the loop variables is indicated by olv_index. The output tensor's index among the loop variables is indicated by olv_index.
'''
"""
# Number of variables # Number of variables
nvars = len(init_loop_orders) nvars = len(init_loop_orders)
...@@ -338,6 +342,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub, op ...@@ -338,6 +342,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub, op
Returns a list containing a C expression representing the Returns a list containing a C expression representing the
stride for each dimension of the ith variable, in the stride for each dimension of the ith variable, in the
specified loop_order. specified loop_order.
""" """
var = sub["lv%i" % i] var = sub["lv%i" % i]
r = [] r = []
...@@ -463,25 +468,25 @@ def make_loop_careduce(loop_orders, dtypes, loop_tasks, sub): ...@@ -463,25 +468,25 @@ def make_loop_careduce(loop_orders, dtypes, loop_tasks, sub):
Make a nested loop over several arrays and associate specific code Make a nested loop over several arrays and associate specific code
to each level of nesting. to each level of nesting.
@type loop_orders: list of N tuples of length M. Parameters
@param loop_orders: Each value of each ----------
tuple can be either the index of a dimension to loop over or loop_orders : list of N tuples of length M
the letter 'x' which means there is no looping to be done Each value of each tuple can be either the index of a dimension to
loop over or the letter 'x' which means there is no looping to be done
over that variable at that point (in other words we broadcast over that variable at that point (in other words we broadcast
over that dimension). If an entry is an integer, it will become over that dimension). If an entry is an integer, it will become
an alias of the entry of that rank. an alias of the entry of that rank.
loop_tasks : list of M+1 pieces of code
@type loop_tasks: list of M+1 pieces of code. The ith loop_task is a pair of strings, the first
@param loop_tasks: The ith loop_task is a pair of strings, the first
string is code to be executed before the ith loop starts, the second string is code to be executed before the ith loop starts, the second
one contains code to be executed just before going to the next element one contains code to be executed just before going to the next element
of the ith dimension. of the ith dimension.
The last element if loop_tasks is a single string, containing code The last element if loop_tasks is a single string, containing code
to be executed at the very end. to be executed at the very end.
sub: dictionary
@type sub: a dictionary. Maps 'lv#' to a suitable variable name.
@param sub: Maps 'lv#' to a suitable variable name.
The 'lvi' variable corresponds to the ith element of loop_orders. The 'lvi' variable corresponds to the ith element of loop_orders.
""" """
def loop_over(preloop, code, indices, i): def loop_over(preloop, code, indices, i):
......
...@@ -14,8 +14,9 @@ tensor = basic ...@@ -14,8 +14,9 @@ tensor = basic
class CpuContiguous(theano.Op): class CpuContiguous(theano.Op):
""" """
Check to see if the input is c-contiguous, Check to see if the input is c-contiguous,
if it is, do nothing, else return a contiguous array if it is, do nothing, else return a contiguous array.
""" """
__props__ = () __props__ = ()
view_map = {0: [0]} view_map = {0: [0]}
...@@ -171,12 +172,16 @@ def cumsum(x, axis=None): ...@@ -171,12 +172,16 @@ def cumsum(x, axis=None):
Wraping of numpy.cumsum. Wraping of numpy.cumsum.
:param x: Input tensor variable. Parameters
----------
:param axis: The axis along which the cumulative sum is computed. x
Input tensor variable.
axis
The axis along which the cumulative sum is computed.
The default (None) is to compute the cumsum over the flattened array. The default (None) is to compute the cumsum over the flattened array.
.. versionadded:: 0.7 .. versionadded:: 0.7
""" """
return CumsumOp(axis=axis)(x) return CumsumOp(axis=axis)(x)
...@@ -291,18 +296,24 @@ def cumprod(x, axis=None): ...@@ -291,18 +296,24 @@ def cumprod(x, axis=None):
Wraping of numpy.cumprod. Wraping of numpy.cumprod.
:param x: Input tensor variable. Parameters
----------
x
Input tensor variable.
:param axis: The axis along which the cumulative product is computed. axis
The axis along which the cumulative product is computed.
The default (None) is to compute the cumprod over the flattened array. The default (None) is to compute the cumprod over the flattened array.
.. versionadded:: 0.7 .. versionadded:: 0.7
""" """
return CumprodOp(axis=axis)(x) return CumprodOp(axis=axis)(x)
class DiffOp(theano.Op): class DiffOp(theano.Op):
# See function diff for docstring # See function diff for docstring
__props__ = ("n", "axis") __props__ = ("n", "axis")
def __init__(self, n=1, axis=-1): def __init__(self, n=1, axis=-1):
...@@ -354,23 +365,29 @@ def diff(x, n=1, axis=-1): ...@@ -354,23 +365,29 @@ def diff(x, n=1, axis=-1):
along the given axis, higher order differences are calculated by along the given axis, higher order differences are calculated by
using diff recursively. Wraping of numpy.diff. using diff recursively. Wraping of numpy.diff.
:param x: Input tensor variable. Parameters
----------
x
Input tensor variable.
:param n: The number of times values are differenced, default is 1. n
The number of times values are differenced, default is 1.
:param axis: The axis along which the difference is taken, axis
default is the last axis. The axis along which the difference is taken, default is the last axis.
.. versionadded:: 0.6 .. versionadded:: 0.6
""" """
return DiffOp(n=n, axis=axis)(x) return DiffOp(n=n, axis=axis)(x)
class BinCountOp(theano.Op): class BinCountOp(theano.Op):
""" """
DEPRECATED: use bincount() instead. .. note:: Deprecated
Use bincount() instead.
See function bincount for docstring.
See function bincount for docstring
""" """
compatible_type = ('int8', 'int16', 'int32', 'int64', compatible_type = ('int8', 'int16', 'int32', 'int64',
'uint8', 'uint16', 'uint32', 'uint64') 'uint8', 'uint16', 'uint32', 'uint64')
...@@ -473,17 +490,19 @@ def bincount(x, weights=None, minlength=None, assert_nonneg=False): ...@@ -473,17 +490,19 @@ def bincount(x, weights=None, minlength=None, assert_nonneg=False):
specified the input array is weighted by it, i.e. if a value n specified the input array is weighted by it, i.e. if a value n
is found at position i, out[n] += weight[i] instead of out[n] += 1. is found at position i, out[n] += weight[i] instead of out[n] += 1.
:param x: 1 dimension, nonnegative ints Parameters
----------
:param weights: array of the same shape as x with corresponding weights. x : 1 dimension, nonnegative ints
weights : array of the same shape as x with corresponding weights.
Optional. Optional.
:param minlength: A minimum number of bins for the output array. minlength : A minimum number of bins for the output array.
Optional. Optional.
:param assert_nonneg: A flag that inserts an assert_op to check if assert_nonneg : A flag that inserts an assert_op to check if
every input x is nonnegative. every input x is nonnegative.
Optional. Optional.
.. versionadded:: 0.6 .. versionadded:: 0.6
""" """
compatible_type = ('int8', 'int16', 'int32', 'int64', compatible_type = ('int8', 'int16', 'int32', 'int64',
'uint8', 'uint16', 'uint32') 'uint8', 'uint16', 'uint32')
...@@ -520,18 +539,25 @@ def bincount(x, weights=None, minlength=None, assert_nonneg=False): ...@@ -520,18 +539,25 @@ def bincount(x, weights=None, minlength=None, assert_nonneg=False):
def squeeze(x): def squeeze(x):
"""Remove broadcastable dimensions from """
the shape of an array. Remove broadcastable dimensions from the shape of an array.
It returns the input array, but with the It returns the input array, but with the
broadcastable dimensions removed. This is broadcastable dimensions removed. This is
always `x` itself or a view into `x`. always `x` itself or a view into `x`.
:param x: Input data, tensor variable. .. versionadded:: 0.6
:return: `x` without its broadcastable dimensions. Parameters
----------
x
Input data, tensor variable.
Returns
-------
object
`x` without its broadcastable dimensions.
.. versionadded:: 0.6
""" """
view = x.dimshuffle([i for i in range(x.ndim) view = x.dimshuffle([i for i in range(x.ndim)
if not x.broadcastable[i]]) if not x.broadcastable[i]])
...@@ -539,20 +565,28 @@ def squeeze(x): ...@@ -539,20 +565,28 @@ def squeeze(x):
def compress(condition, x, axis=None): def compress(condition, x, axis=None):
"""Return selected slices of an array along given axis. """
Return selected slices of an array along given axis.
It returns the input tensor, but with selected slices along a given axis It returns the input tensor, but with selected slices along a given axis
retained. If no axis is provided, the tensor is flattened retained. If no axis is provided, the tensor is flattened.
Corresponds to numpy.compress Corresponds to numpy.compress
:param x: Input data, tensor variable .. versionadded:: 0.7
:param condition: 1 dimensional array of non-zero and zero values Parameters
corresponding to indices of slices along a selected axis ----------
x
Input data, tensor variable.
condition
1 dimensional array of non-zero and zero values
corresponding to indices of slices along a selected axis.
:return: `x` with selected slices Returns
-------
object
`x` with selected slices.
.. versionadded:: 0.7
""" """
indices = theano.tensor.basic.flatnonzero(condition) indices = theano.tensor.basic.flatnonzero(condition)
return x.take(indices, axis=axis) return x.take(indices, axis=axis)
...@@ -560,6 +594,7 @@ def compress(condition, x, axis=None): ...@@ -560,6 +594,7 @@ def compress(condition, x, axis=None):
class RepeatOp(theano.Op): class RepeatOp(theano.Op):
# See the repeat function for docstring # See the repeat function for docstring
__props__ = ("axis",) __props__ = ("axis",)
def __init__(self, axis=None): def __init__(self, axis=None):
...@@ -678,14 +713,19 @@ def repeat(x, repeats, axis=None): ...@@ -678,14 +713,19 @@ def repeat(x, repeats, axis=None):
The number of repetitions for each element is `repeat`. The number of repetitions for each element is `repeat`.
`repeats` is broadcasted to fit the length of the given `axis`. `repeats` is broadcasted to fit the length of the given `axis`.
:param x: Input data, tensor variable. Parameters
:param repeats: int, scalar or tensor variable. ----------
x
Input data, tensor variable.
repeats : int, scalar or tensor variable
axis : int, optional
:param axis: int, optional. See Also
--------
:see: :func:`tensor.tile <tensor.tile>` tensor.tile
.. versionadded:: 0.6 .. versionadded:: 0.6
""" """
repeats = tensor.as_tensor_variable(repeats) repeats = tensor.as_tensor_variable(repeats)
...@@ -763,21 +803,27 @@ bartlett_ = Bartlett() ...@@ -763,21 +803,27 @@ bartlett_ = Bartlett()
# I create a function only to have the doc show well. # I create a function only to have the doc show well.
def bartlett(M): def bartlett(M):
"""An instance of this class returns the Bartlett spectral window in the """
An instance of this class returns the Bartlett spectral window in the
time-domain. The Bartlett window is very similar to a triangular window, time-domain. The Bartlett window is very similar to a triangular window,
except that the end points are at zero. It is often used in signal except that the end points are at zero. It is often used in signal
processing for tapering a signal, without generating too much ripple in processing for tapering a signal, without generating too much ripple in
the frequency domain. the frequency domain.
:param M: (integer scalar) Number of points in the output .. versionadded:: 0.6
window. If zero or less, an empty vector is returned.
:return: (vector of doubles) The triangular window, with the Parameters
maximum value normalized to one (the value one appears only if ----------
the number of samples is odd), with the first and last samples M : integer scalar
equal to zero. Number of points in the output window. If zero or less,
an empty vector is returned.
.. versionadded:: 0.6 Returns
-------
vector of doubles
The triangular window, with the maximum value normalized to one
(the value one appears only if the number of samples is odd), with
the first and last samples equal to zero.
""" """
return bartlett_(M) return bartlett_(M)
...@@ -823,8 +869,10 @@ class FillDiagonal(gof.Op): ...@@ -823,8 +869,10 @@ class FillDiagonal(gof.Op):
def grad(self, inp, cost_grad): def grad(self, inp, cost_grad):
""" """
Note: The gradient is currently implemented for matrices Notes
only. -----
The gradient is currently implemented for matrices only.
""" """
a, val = inp a, val = inp
grad = cost_grad[0] grad = cost_grad[0]
...@@ -843,15 +891,25 @@ fill_diagonal_ = FillDiagonal() ...@@ -843,15 +891,25 @@ fill_diagonal_ = FillDiagonal()
# I create a function only to have the doc show well. # I create a function only to have the doc show well.
def fill_diagonal(a, val): def fill_diagonal(a, val):
""" Returns a copy of an array with all """
Returns a copy of an array with all
elements of the main diagonal set to a specified scalar value. elements of the main diagonal set to a specified scalar value.
:param a: Rectangular array of at least two dimensions. .. versionadded:: 0.6
:param val: Scalar value to fill the diagonal whose type must be
Parameters
----------
a
Rectangular array of at least two dimensions.
val
Scalar value to fill the diagonal whose type must be
compatible with that of array 'a' (i.e. 'val' cannot be viewed compatible with that of array 'a' (i.e. 'val' cannot be viewed
as an upcast of 'a'). as an upcast of 'a').
:return: An array identical to 'a' except that its main diagonal Returns
-------
array
An array identical to 'a' except that its main diagonal
is filled with scalar 'val'. (For an array 'a' with a.ndim >= is filled with scalar 'val'. (For an array 'a' with a.ndim >=
2, the main diagonal is the list of locations a[i, i, ..., i] 2, the main diagonal is the list of locations a[i, i, ..., i]
(i.e. with indices all identical).) (i.e. with indices all identical).)
...@@ -859,7 +917,8 @@ def fill_diagonal(a, val): ...@@ -859,7 +917,8 @@ def fill_diagonal(a, val):
Support rectangular matrix and tensor with more than 2 dimensions Support rectangular matrix and tensor with more than 2 dimensions
if the later have all dimensions are equals. if the later have all dimensions are equals.
.. versionadded:: 0.6
""" """
return fill_diagonal_(a, val) return fill_diagonal_(a, val)
...@@ -902,13 +961,16 @@ class FillDiagonalOffset(gof.Op): ...@@ -902,13 +961,16 @@ class FillDiagonalOffset(gof.Op):
height, width = a.shape height, width = a.shape
""" """
Note: The fill_diagonal only support rectangular matrix. The output Notes
-----
The fill_diagonal only support rectangular matrix. The output
of tall matrix is "wrapped", which is an option in numpy 1.9.0 of tall matrix is "wrapped", which is an option in numpy 1.9.0
but was regarded as a bug in numpy 1.6.2. Here I implement the but was regarded as a bug in numpy 1.6.2. Here I implement the
fill_diagonal_offset with unwrapped output, so fill_diagonal_offset fill_diagonal_offset with unwrapped output, so fill_diagonal_offset
supports tall matrix.(This make a little difference between the output supports tall matrix.(This make a little difference between the output
of fill_diagonal and fill_diagonal_offset only in the case of tall of fill_diagonal and fill_diagonal_offset only in the case of tall
matrix) matrix)
""" """
if offset >= 0: if offset >= 0:
start = offset start = offset
...@@ -925,8 +987,9 @@ class FillDiagonalOffset(gof.Op): ...@@ -925,8 +987,9 @@ class FillDiagonalOffset(gof.Op):
def grad(self, inp, cost_grad): def grad(self, inp, cost_grad):
""" """
Note: The gradient is currently implemented for matrices Notes
only. -----
The gradient is currently implemented for matrices only.
""" """
a, val, offset = inp a, val, offset = inp
grad = cost_grad[0] grad = cost_grad[0]
...@@ -972,29 +1035,47 @@ def fill_diagonal_offset(a, val, offset): ...@@ -972,29 +1035,47 @@ def fill_diagonal_offset(a, val, offset):
Returns a copy of an array with all Returns a copy of an array with all
elements of the main diagonal set to a specified scalar value. elements of the main diagonal set to a specified scalar value.
:param a: Rectangular array of two dimensions. Parameters
:param val: Scalar value to fill the diagonal whose type must be ----------
a
Rectangular array of two dimensions.
val
Scalar value to fill the diagonal whose type must be
compatible with that of array 'a' (i.e. 'val' cannot be viewed compatible with that of array 'a' (i.e. 'val' cannot be viewed
as an upcast of 'a'). as an upcast of 'a').
:param offset: Scalar value Offset of the diagonal from the main offset
Scalar value Offset of the diagonal from the main
diagonal. Can be positive or negative integer. diagonal. Can be positive or negative integer.
:return: An array identical to 'a' except that its offset diagonal
Returns
-------
array
An array identical to 'a' except that its offset diagonal
is filled with scalar 'val'. The output is unwrapped. is filled with scalar 'val'. The output is unwrapped.
""" """
return fill_diagonal_offset_(a, val, offset) return fill_diagonal_offset_(a, val, offset)
def to_one_hot(y, nb_class, dtype=None): def to_one_hot(y, nb_class, dtype=None):
"""Return a matrix where each row correspond to the one hot """
Return a matrix where each row correspond to the one hot
encoding of each element in y. encoding of each element in y.
:param y: A vector of integer value between 0 and nb_class - 1. Parameters
:param nb_class: The number of class in y. ----------
:param dtype: The dtype of the returned matrix. Default floatX. y
A vector of integer value between 0 and nb_class - 1.
:return: A matrix of shape (y.shape[0], nb_class), where each nb_class : int
row ``i`` is the one hot encoding of the corresponding ``y[i]`` The number of class in y.
value. dtype : data-type
The dtype of the returned matrix. Default floatX.
Returns
-------
object
A matrix of shape (y.shape[0], nb_class), where each row ``i`` is
the one hot encoding of the corresponding ``y[i]`` value.
""" """
ret = theano.tensor.zeros((y.shape[0], nb_class), ret = theano.tensor.zeros((y.shape[0], nb_class),
...@@ -1006,11 +1087,10 @@ def to_one_hot(y, nb_class, dtype=None): ...@@ -1006,11 +1087,10 @@ def to_one_hot(y, nb_class, dtype=None):
class Unique(theano.Op): class Unique(theano.Op):
""" """
Wraps numpy.unique. Wraps numpy.unique. This op is not implemented on the GPU.
This op is not implemented on the GPU.
Examples Examples
======== --------
>>> import numpy as np >>> import numpy as np
>>> x = theano.tensor.vector() >>> x = theano.tensor.vector()
...@@ -1022,7 +1102,9 @@ class Unique(theano.Op): ...@@ -1022,7 +1102,9 @@ class Unique(theano.Op):
>>> g = theano.function([y], Unique(True, True, False)(y)) >>> g = theano.function([y], Unique(True, True, False)(y))
>>> g([[1, 1, 1.0], (2, 3, 3.0)]) >>> g([[1, 1, 1.0], (2, 3, 3.0)])
[array([ 1., 2., 3.]), array([0, 3, 4]), array([0, 0, 0, 1, 2, 2])] [array([ 1., 2., 3.]), array([0, 3, 4]), array([0, 0, 0, 1, 2, 2])]
""" """
__props__ = ("return_index", "return_inverse", "return_counts") __props__ = ("return_index", "return_inverse", "return_counts")
def __init__(self, return_index=False, return_inverse=False, def __init__(self, return_index=False, return_inverse=False,
......
...@@ -11,13 +11,18 @@ import theano ...@@ -11,13 +11,18 @@ import theano
class LoadFromDisk(Op): class LoadFromDisk(Op):
""" """
An operation to load an array from disk An operation to load an array from disk.
See Also See Also
--------
load load
@note: Non-differentiable. Notes
-----
Non-differentiable.
""" """
__props__ = ("dtype", "broadcastable", "mmap_mode") __props__ = ("dtype", "broadcastable", "mmap_mode")
def __init__(self, dtype, broadcastable, mmap_mode=None): def __init__(self, dtype, broadcastable, mmap_mode=None):
...@@ -53,18 +58,26 @@ def load(path, dtype, broadcastable, mmap_mode=None): ...@@ -53,18 +58,26 @@ def load(path, dtype, broadcastable, mmap_mode=None):
""" """
Load an array from an .npy file. Load an array from an .npy file.
:param path: A Generic symbolic variable, that will contain a string Parameters
:param dtype: The data type of the array to be read. ----------
:param broadcastable: The broadcastable pattern of the loaded array, path
for instance, (False,) for a vector, (False, True) for a column, A Generic symbolic variable, that will contain a string
dtype : data-type
The data type of the array to be read.
broadcastable
The broadcastable pattern of the loaded array, for instance,
(False,) for a vector, (False, True) for a column,
(False, False) for a matrix. (False, False) for a matrix.
:param mmap_mode: How the file will be loaded. None means that the mmap_mode
How the file will be loaded. None means that the
data will be copied into an array in memory, 'c' means that the file data will be copied into an array in memory, 'c' means that the file
will be mapped into virtual memory, so only the parts that are will be mapped into virtual memory, so only the parts that are
needed will be actually read from disk and put into memory. needed will be actually read from disk and put into memory.
Other modes supported by numpy.load ('r', 'r+', 'w+') cannot Other modes supported by numpy.load ('r', 'r+', 'w+') cannot
be supported by Theano. be supported by Theano.
Examples
--------
>>> from theano import * >>> from theano import *
>>> path = Variable(Generic()) >>> path = Variable(Generic())
>>> x = tensor.load(path, 'int64', (False,)) >>> x = tensor.load(path, 'int64', (False,))
...@@ -72,6 +85,7 @@ def load(path, dtype, broadcastable, mmap_mode=None): ...@@ -72,6 +85,7 @@ def load(path, dtype, broadcastable, mmap_mode=None):
>>> fn = function([path], y) >>> fn = function([path], y)
>>> fn("stored-array.npy") >>> fn("stored-array.npy")
array([0, 2, 4, 6, 8], dtype=int64) array([0, 2, 4, 6, 8], dtype=int64)
""" """
return LoadFromDisk(dtype, broadcastable, mmap_mode)(path) return LoadFromDisk(dtype, broadcastable, mmap_mode)(path)
...@@ -91,14 +105,19 @@ else: ...@@ -91,14 +105,19 @@ else:
class MPIRecv(Op): class MPIRecv(Op):
""" """
An operation to asynchronously receive an array to a remote host using MPI An operation to asynchronously receive an array to a remote host using MPI.
See Also See Also
--------
MPIRecv MPIRecv
MPIWait MPIWait
@note: Non-differentiable. Notes
-----
Non-differentiable.
""" """
__props__ = ("source", "tag", "shape", "dtype") __props__ = ("source", "tag", "shape", "dtype")
def __init__(self, source, tag, shape, dtype): def __init__(self, source, tag, shape, dtype):
...@@ -134,13 +153,18 @@ class MPIRecv(Op): ...@@ -134,13 +153,18 @@ class MPIRecv(Op):
class MPIRecvWait(Op): class MPIRecvWait(Op):
""" """
An operation to wait on a previously received array using MPI An operation to wait on a previously received array using MPI.
See Also See Also
--------
MPIRecv MPIRecv
@note: Non-differentiable. Notes
-----
Non-differentiable.
""" """
__props__ = ("tag",) __props__ = ("tag",)
def __init__(self, tag): def __init__(self, tag):
...@@ -168,14 +192,19 @@ class MPIRecvWait(Op): ...@@ -168,14 +192,19 @@ class MPIRecvWait(Op):
class MPISend(Op): class MPISend(Op):
""" """
An operation to asynchronously Send an array to a remote host using MPI An operation to asynchronously Send an array to a remote host using MPI.
See Also See Also
--------
MPIRecv MPIRecv
MPISendWait MPISendWait
@note: Non-differentiable. Notes
-----
Non-differentiable.
""" """
__props__ = ("dest", "tag") __props__ = ("dest", "tag")
def __init__(self, dest, tag): def __init__(self, dest, tag):
...@@ -202,12 +231,16 @@ class MPISend(Op): ...@@ -202,12 +231,16 @@ class MPISend(Op):
class MPISendWait(Op): class MPISendWait(Op):
""" """
An operation to wait on a previously sent array using MPI An operation to wait on a previously sent array using MPI.
See Also: See Also
--------
MPISend MPISend
@note: Non-differentiable. Notes
-----
Non-differentiable.
""" """
__props__ = ("tag",) __props__ = ("tag",)
...@@ -227,35 +260,35 @@ class MPISendWait(Op): ...@@ -227,35 +260,35 @@ class MPISendWait(Op):
def isend(var, dest, tag): def isend(var, dest, tag):
""" """
Non blocking send Non blocking send.
""" """
return MPISend(dest, tag)(var) return MPISend(dest, tag)(var)
def send(var, dest, tag): def send(var, dest, tag):
""" """
blocking send Blocking send.
""" """
return MPISendWait(tag)(*isend(var, dest, tag)) return MPISendWait(tag)(*isend(var, dest, tag))
def irecv(shape, dtype, source, tag): def irecv(shape, dtype, source, tag):
""" """
non-blocking receive Non-blocking receive.
""" """
return MPIRecv(source, tag, shape, dtype)() return MPIRecv(source, tag, shape, dtype)()
def recv(shape, dtype, source, tag): def recv(shape, dtype, source, tag):
""" """
blocking receive Blocking receive.
""" """
return MPIRecvWait(tag)(*irecv(shape, dtype, source, tag)) return MPIRecvWait(tag)(*irecv(shape, dtype, source, tag))
# Ordering keys for scheduling # Ordering keys for scheduling
def mpi_send_wait_key(a): def mpi_send_wait_key(a):
""" Wait as long as possible on Waits, Start Send/Recvs early """ """Wait as long as possible on Waits, Start Send/Recvs early."""
if isinstance(a.op, (MPIRecvWait, MPISendWait)): if isinstance(a.op, (MPIRecvWait, MPISendWait)):
return 1 return 1
if isinstance(a.op, (MPIRecv, MPISend)): if isinstance(a.op, (MPIRecv, MPISend)):
...@@ -264,7 +297,7 @@ def mpi_send_wait_key(a): ...@@ -264,7 +297,7 @@ def mpi_send_wait_key(a):
def mpi_tag_key(a): def mpi_tag_key(a):
""" Break MPI ties by using the variable tag - prefer lower tags first """ """Break MPI ties by using the variable tag - prefer lower tags first."""
if isinstance(a.op, (MPISend, MPIRecv, MPIRecvWait, MPISendWait)): if isinstance(a.op, (MPISend, MPIRecv, MPIRecvWait, MPISendWait)):
return a.op.tag return a.op.tag
else: else:
......
...@@ -17,17 +17,18 @@ logger = logging.getLogger(__name__) ...@@ -17,17 +17,18 @@ logger = logging.getLogger(__name__)
class MatrixPinv(Op): class MatrixPinv(Op):
"""Computes the pseudo-inverse of a matrix :math:`A`. """Computes the pseudo-inverse of a matrix :math:`A`.
The pseudo-inverse of a matrix A, denoted :math:`A^+`, is The pseudo-inverse of a matrix :math:`A`, denoted :math:`A^+`, is
defined as: "the matrix that 'solves' [the least-squares problem] defined as: "the matrix that 'solves' [the least-squares problem]
:math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then :math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then
:math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`. :math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`.
Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity matrix. Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity matrix.
This method is not faster then `matrix_inverse`. Its strength comes from This method is not faster than `matrix_inverse`. Its strength comes from
that it works for non-square matrices. that it works for non-square matrices.
If you have a square matrix though, `matrix_inverse` can be both more If you have a square matrix though, `matrix_inverse` can be both more
exact and faster to compute. Also this op does not get optimized into a exact and faster to compute. Also this op does not get optimized into a
solve op. solve op.
""" """
__props__ = () __props__ = ()
...@@ -55,8 +56,11 @@ class MatrixInverse(Op): ...@@ -55,8 +56,11 @@ class MatrixInverse(Op):
matrix :math:`A_{inv}` such that the dot product :math:`A \cdot A_{inv}` matrix :math:`A_{inv}` such that the dot product :math:`A \cdot A_{inv}`
and :math:`A_{inv} \cdot A` equals the identity matrix :math:`I`. and :math:`A_{inv} \cdot A` equals the identity matrix :math:`I`.
:note: When possible, the call to this op will be optimized to the call Notes
-----
When possible, the call to this op will be optimized to the call
of ``solve``. of ``solve``.
""" """
__props__ = () __props__ = ()
...@@ -82,7 +86,7 @@ class MatrixInverse(Op): ...@@ -82,7 +86,7 @@ class MatrixInverse(Op):
where :math:`V` corresponds to ``g_outputs`` and :math:`X` to where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
``inputs``. Using the `matrix cookbook ``inputs``. Using the `matrix cookbook
<http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_, <http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
once can deduce that the relation corresponds to one can deduce that the relation corresponds to
.. math:: (X^{-1} \cdot V^{T} \cdot X^{-1})^T. .. math:: (X^{-1} \cdot V^{T} \cdot X^{-1})^T.
...@@ -101,7 +105,7 @@ class MatrixInverse(Op): ...@@ -101,7 +105,7 @@ class MatrixInverse(Op):
where :math:`V` corresponds to ``g_outputs`` and :math:`X` to where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
``inputs``. Using the `matrix cookbook ``inputs``. Using the `matrix cookbook
<http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_, <http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
once can deduce that the relation corresponds to one can deduce that the relation corresponds to
.. math:: X^{-1} \cdot V \cdot X^{-1}. .. math:: X^{-1} \cdot V \cdot X^{-1}.
...@@ -120,11 +124,12 @@ matrix_inverse = MatrixInverse() ...@@ -120,11 +124,12 @@ matrix_inverse = MatrixInverse()
def matrix_dot(*args): def matrix_dot(*args):
""" Shorthand for product between several dots """ Shorthand for product between several dots.
Given :math:`N` matrices :math:`A_0, A_1, .., A_N`, ``matrix_dot`` will Given :math:`N` matrices :math:`A_0, A_1, .., A_N`, ``matrix_dot`` will
generate the matrix product between all in the given order, namely generate the matrix product between all in the given order, namely
:math:`A_0 \cdot A_1 \cdot A_2 \cdot .. \cdot A_N`. :math:`A_0 \cdot A_1 \cdot A_2 \cdot .. \cdot A_N`.
""" """
rval = args[0] rval = args[0]
for a in args[1:]: for a in args[1:]:
...@@ -163,10 +168,14 @@ alloc_diag = AllocDiag() ...@@ -163,10 +168,14 @@ alloc_diag = AllocDiag()
class ExtractDiag(Op): class ExtractDiag(Op):
""" Return the diagonal of a matrix. """Return the diagonal of a matrix.
Notes
-----
Works on the GPU.
:note: work on the GPU.
""" """
__props__ = ("view",) __props__ = ("view",)
def __init__(self, view=False): def __init__(self, view=False):
...@@ -246,14 +255,18 @@ def trace(X): ...@@ -246,14 +255,18 @@ def trace(X):
""" """
Returns the sum of diagonal elements of matrix X. Returns the sum of diagonal elements of matrix X.
:note: work on GPU since 0.6rc4. Notes
-----
Works on GPU since 0.6rc4.
""" """
return extract_diag(X).sum() return extract_diag(X).sum()
class Det(Op): class Det(Op):
"""Matrix determinant """
Input should be a square matrix Matrix determinant. Input should be a square matrix.
""" """
__props__ = () __props__ = ()
...@@ -287,9 +300,11 @@ det = Det() ...@@ -287,9 +300,11 @@ det = Det()
class Eig(Op): class Eig(Op):
"""Compute the eigenvalues and right eigenvectors of a square array. """
Compute the eigenvalues and right eigenvectors of a square array.
""" """
_numop = staticmethod(numpy.linalg.eig) _numop = staticmethod(numpy.linalg.eig)
__props__ = () __props__ = ()
...@@ -317,6 +332,7 @@ class Eigh(Eig): ...@@ -317,6 +332,7 @@ class Eigh(Eig):
Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix. Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
""" """
_numop = staticmethod(numpy.linalg.eigh) _numop = staticmethod(numpy.linalg.eigh)
__props__ = ('UPLO',) __props__ = ('UPLO',)
...@@ -363,6 +379,7 @@ class Eigh(Eig): ...@@ -363,6 +379,7 @@ class Eigh(Eig):
.. math:: \frac{\partial\,v_{kn}} .. math:: \frac{\partial\,v_{kn}}
{\partial a_{ij}} = {\partial a_{ij}} =
\sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m} \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
""" """
x, = inputs x, = inputs
w, v = self(x) w, v = self(x)
...@@ -383,9 +400,11 @@ def _zero_disconnected(outputs, grads): ...@@ -383,9 +400,11 @@ def _zero_disconnected(outputs, grads):
class EighGrad(Op): class EighGrad(Op):
"""Gradient of an eigensystem of a Hermitian matrix. """
Gradient of an eigensystem of a Hermitian matrix.
""" """
__props__ = ('UPLO',) __props__ = ('UPLO',)
def __init__(self, UPLO='L'): def __init__(self, UPLO='L'):
...@@ -414,6 +433,7 @@ class EighGrad(Op): ...@@ -414,6 +433,7 @@ class EighGrad(Op):
""" """
Implements the "reverse-mode" gradient for the eigensystem of Implements the "reverse-mode" gradient for the eigensystem of
a square matrix. a square matrix.
""" """
x, w, v, W, V = inputs x, w, v, W, V = inputs
N = x.shape[0] N = x.shape[0]
...@@ -453,10 +473,13 @@ def eigh(a, UPLO='L'): ...@@ -453,10 +473,13 @@ def eigh(a, UPLO='L'):
class QRFull(Op): class QRFull(Op):
""" """
Full QR Decomposition. Full QR Decomposition.
Computes the QR decomposition of a matrix. Computes the QR decomposition of a matrix.
Factor the matrix a as qr, where q is orthonormal Factor the matrix a as qr, where q is orthonormal
and r is upper-triangular. and r is upper-triangular.
""" """
_numop = staticmethod(numpy.linalg.qr) _numop = staticmethod(numpy.linalg.qr)
__props__ = ('mode',) __props__ = ('mode',)
...@@ -484,9 +507,12 @@ class QRFull(Op): ...@@ -484,9 +507,12 @@ class QRFull(Op):
class QRIncomplete(Op): class QRIncomplete(Op):
""" """
Incomplete QR Decomposition. Incomplete QR Decomposition.
Computes the QR decomposition of a matrix. Computes the QR decomposition of a matrix.
Factor the matrix a as qr and return a single matrix. Factor the matrix a as qr and return a single matrix.
""" """
_numop = staticmethod(numpy.linalg.qr) _numop = staticmethod(numpy.linalg.qr)
__props__ = ('mode',) __props__ = ('mode',)
...@@ -513,15 +539,12 @@ def qr(a, mode="full"): ...@@ -513,15 +539,12 @@ def qr(a, mode="full"):
Factor the matrix a as qr, where q Factor the matrix a as qr, where q
is orthonormal and r is upper-triangular. is orthonormal and r is upper-triangular.
:type a: Parameters
array_like, shape (M, N) ----------
:param a: a : array_like, shape (M, N)
Matrix to be factored. Matrix to be factored.
:type mode: mode : {'reduced', 'complete', 'r', 'raw', 'full', 'economic'}, optional
one of 'reduced', 'complete', 'r', 'raw', 'full' and
'economic', optional
:keyword mode:
If K = min(M, N), then If K = min(M, N), then
'reduced' 'reduced'
...@@ -558,19 +581,18 @@ def qr(a, mode="full"): ...@@ -558,19 +581,18 @@ def qr(a, mode="full"):
both doing the same thing in the new numpy version but only both doing the same thing in the new numpy version but only
full works on the old previous numpy version. full works on the old previous numpy version.
:rtype q: Returns
matrix of float or complex, optional -------
:return q: q : matrix of float or complex, optional
A matrix with orthonormal columns. When mode = 'complete' the A matrix with orthonormal columns. When mode = 'complete' the
result is an orthogonal/unitary matrix depending on whether or result is an orthogonal/unitary matrix depending on whether or
not a is real/complex. The determinant may be either +/- 1 in not a is real/complex. The determinant may be either +/- 1 in
that case. that case.
r : matrix of float or complex, optional
:rtype r:
matrix of float or complex, optional
:return r:
The upper-triangular matrix. The upper-triangular matrix.
""" """
x = [[2, 1], [3, 4]] x = [[2, 1], [3, 4]]
if isinstance(numpy.linalg.qr(x, mode), tuple): if isinstance(numpy.linalg.qr(x, mode), tuple):
return QRFull(mode)(a) return QRFull(mode)(a)
...@@ -579,13 +601,10 @@ def qr(a, mode="full"): ...@@ -579,13 +601,10 @@ def qr(a, mode="full"):
class SVD(Op): class SVD(Op):
# See doc in the docstring of the function just after this class.
_numop = staticmethod(numpy.linalg.svd)
__props__ = ('full_matrices', 'compute_uv')
def __init__(self, full_matrices=True, compute_uv=True):
""" """
Parameters
----------
full_matrices : bool, optional full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N), If True (default), u and v have the shapes (M, M) and (N, N),
respectively. respectively.
...@@ -594,7 +613,14 @@ class SVD(Op): ...@@ -594,7 +613,14 @@ class SVD(Op):
compute_uv : bool, optional compute_uv : bool, optional
Whether or not to compute u and v in addition to s. Whether or not to compute u and v in addition to s.
True by default. True by default.
""" """
# See doc in the docstring of the function just after this class.
_numop = staticmethod(numpy.linalg.svd)
__props__ = ('full_matrices', 'compute_uv')
def __init__(self, full_matrices=True, compute_uv=True):
self.full_matrices = full_matrices self.full_matrices = full_matrices
self.compute_uv = compute_uv self.compute_uv = compute_uv
...@@ -619,18 +645,21 @@ def svd(a, full_matrices=1, compute_uv=1): ...@@ -619,18 +645,21 @@ def svd(a, full_matrices=1, compute_uv=1):
""" """
This function performs the SVD on CPU. This function performs the SVD on CPU.
:type full_matrices: bool, optional Parameters
:param full_matrices: ----------
full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N), If True (default), u and v have the shapes (M, M) and (N, N),
respectively. respectively.
Otherwise, the shapes are (M, K) and (K, N), respectively, Otherwise, the shapes are (M, K) and (K, N), respectively,
where K = min(M, N). where K = min(M, N).
:type compute_uv: bool, optional compute_uv : bool, optional
:param compute_uv:
Whether or not to compute u and v in addition to s. Whether or not to compute u and v in addition to s.
True by default. True by default.
:returns: U, V and D matrices. Returns
-------
U, V, D : matrices
""" """
return SVD(full_matrices, compute_uv)(a) return SVD(full_matrices, compute_uv)(a)
......
...@@ -44,8 +44,13 @@ from theano.gradient import grad_undefined ...@@ -44,8 +44,13 @@ from theano.gradient import grad_undefined
# the output function is only defined when dr, dc, dt are natural numbers. # the output function is only defined when dr, dc, dt are natural numbers.
class Conv3D(theano.Op): class Conv3D(theano.Op):
""" 3D `convolution` of multiple filters on a minibatch """
:note: does not flip the kernel, moves kernel with a user specified stride 3D `convolution` of multiple filters on a minibatch.
Notes
-----
Does not flip the kernel, moves kernel with a user specified stride.
""" """
__props__ = () __props__ = ()
...@@ -54,10 +59,17 @@ class Conv3D(theano.Op): ...@@ -54,10 +59,17 @@ class Conv3D(theano.Op):
def make_node(self, V, W, b, d): def make_node(self, V, W, b, d):
""" """
:param V: Visible unit, input(batch,row,column,time,in channel) Parameters
:param W: Weights, filter(out channel,row,column,time,in channel) ----------
:param b: bias, shape == (W.shape[0],) V
:param d: strides when moving the filter over the input(dx,dy,dt) Visible unit, input(batch,row,column,time,in channel)
W
Weights, filter(out channel,row,column,time,in channel)
b
bias, shape == (W.shape[0],)
d
strides when moving the filter over the input(dx,dy,dt)
""" """
V_ = T.as_tensor_variable(V) V_ = T.as_tensor_variable(V)
...@@ -539,28 +551,39 @@ _conv3D = Conv3D() ...@@ -539,28 +551,39 @@ _conv3D = Conv3D()
def conv3D(V, W, b, d): def conv3D(V, W, b, d):
""" """
3D "convolution" of multiple filters on a minibatch 3D "convolution" of multiple filters on a minibatch.
(does not flip the kernel, moves kernel with a user specified stride)
:param V: Visible unit, input. (does not flip the kernel, moves kernel with a user specified stride)
dimensions: (batch, row, column, time, in channel)
:param W: Weights, filter.
dimensions: (out channel, row, column, time ,in channel)
:param b: bias, shape == (W.shape[0],)
:param d: strides when moving the filter over the input(dx, dy, dt)
:note: The order of dimensions does not correspond to the one in `conv2d`. Parameters
----------
V
Visible unit, input.
Dimensions: (batch, row, column, time, in channel).
W
Weights, filter.
Dimensions: (out channel, row, column, time ,in channel).
b
Bias, shape == (W.shape[0],).
d
Strides when moving the filter over the input(dx, dy, dt).
Notes
-----
The order of dimensions does not correspond to the one in `conv2d`.
This is for optimization. This is for optimization.
:note: The GPU implementation is very slow. You should use The GPU implementation is very slow. You should use
:func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>` or :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>` or
:func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>` for a :func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>` for a
GPU graph instead. GPU graph instead.
:see: Someone made a script that shows how to swap the axes See Also
--------
Someone made a script that shows how to swap the axes
between both 3d convolution implementations in Theano. See between both 3d convolution implementations in Theano. See
the last `attachment the last `attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_
<https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_.
""" """
return _conv3D(V, W, b, d) return _conv3D(V, W, b, d)
......
...@@ -13,7 +13,11 @@ from theano.gradient import DisconnectedType ...@@ -13,7 +13,11 @@ from theano.gradient import DisconnectedType
# than visiting each weight gradient element once and passing through whole video # than visiting each weight gradient element once and passing through whole video
class ConvGrad3D(theano.Op): class ConvGrad3D(theano.Op):
""" Gradient of Conv3D with respect to W """ """
Gradient of Conv3D with respect to W.
"""
__props__ = () __props__ = ()
def c_code_cache_version(self): def c_code_cache_version(self):
......
...@@ -11,7 +11,12 @@ from theano.gradient import DisconnectedType ...@@ -11,7 +11,12 @@ from theano.gradient import DisconnectedType
class ConvTransp3D(theano.Op): class ConvTransp3D(theano.Op):
""" "Transpose" of Conv3D (Conv3D implements multiplication by an implicitly defined matrix W. This implements multiplication by its transpose) """ """
"Transpose" of Conv3D (Conv3D implements multiplication by an implicitly
defined matrix W. This implements multiplication by its transpose).
"""
__props__ = () __props__ = ()
def c_code_cache_version(self): def c_code_cache_version(self):
...@@ -19,10 +24,17 @@ class ConvTransp3D(theano.Op): ...@@ -19,10 +24,17 @@ class ConvTransp3D(theano.Op):
def make_node(self, W, b, d, H, RShape=None): def make_node(self, W, b, d, H, RShape=None):
""" """
:param W: Weights, filter Parameters
:param b: bias, shape == (W.shape[0],) ----------
:param d: strides when moving the filter over the input W
:param H: The output of Conv3D Weights, filter
b
Bias, shape == (W.shape[0],).
d
Strides when moving the filter over the input.
H
The output of Conv3D.
""" """
W_ = T.as_tensor_variable(W) W_ = T.as_tensor_variable(W)
b_ = T.as_tensor_variable(b) b_ = T.as_tensor_variable(b)
......
...@@ -36,46 +36,41 @@ _logger = logging.getLogger("theano.tensor.nnet.conv") ...@@ -36,46 +36,41 @@ _logger = logging.getLogger("theano.tensor.nnet.conv")
def conv2d(input, filters, image_shape=None, filter_shape=None, def conv2d(input, filters, image_shape=None, filter_shape=None,
border_mode='valid', subsample=(1, 1), **kargs): border_mode='valid', subsample=(1, 1), **kargs):
"""This function will build the symbolic graph for convolving a stack of """
This function will build the symbolic graph for convolving a stack of
input images with a set of filters. The implementation is modelled after input images with a set of filters. The implementation is modelled after
Convolutional Neural Networks (CNN). It is simply a wrapper to the ConvOp Convolutional Neural Networks (CNN). It is simply a wrapper to the ConvOp
but provides a much cleaner interface. but provides a much cleaner interface.
:type input: symbolic 4D tensor Parameters
:param input: mini-batch of feature map stacks, of shape ----------
input : symbolic 4D tensor
Mini-batch of feature map stacks, of shape
(batch size, stack size, nb row, nb col) (batch size, stack size, nb row, nb col)
see the optional parameter image_shape see the optional parameter image_shape
filters: symbolic 4D tensor
:type filters: symbolic 4D tensor Set of filters used in CNN layer of shape
:param filters: set of filters used in CNN layer of shape
(nb filters, stack size, nb row, nb col) (nb filters, stack size, nb row, nb col)
see the optional parameter filter_shape see the optional parameter filter_shape
border_mode : {'valid', 'full'}
:param border_mode: 'valid'only apply filter to complete patches of the image. Generates
'valid'-- only apply filter to complete patches of the image. Generates output of shape: image_shape - filter_shape + 1.
output of shape: image_shape - filter_shape + 1 'full' zero-pads image to multiple of filter shape to generate output
'full' -- zero-pads image to multiple of filter shape to generate output of shape: image_shape + filter_shape - 1.
of shape: image_shape + filter_shape - 1 subsample: tuple of len 2
Factor by which to subsample the output. Also called strides elsewhere.
:type subsample: tuple of len 2 image_shape: None, tuple/list of len 4 of int, None or Constant variable
:param subsample: factor by which to subsample the output. The shape of the input parameter.
Also called strides elsewhere. Optional, used for optimization like loop unrolling
You can put None for any element of the list to tell that this element
:type image_shape: None, tuple/list of len 4 of int, None or is not constant.
Constant variable filter_shape : None, tuple/list of len 4 of int, None or Constant variable
:param image_shape: The shape of the input parameter.
Optional, used for optimization like loop unrolling Optional, used for optimization like loop unrolling
You can put None for any element of the list You can put None for any element of the list
to tell that this element is not constant. to tell that this element is not constant.
:type filter_shape: None, tuple/list of len 4 of int, None or kwargs
Constant variable Kwargs are passed onto ConvOp. Can be used to set the following:
:param filter_shape: Optional, used for optimization like loop unrolling unroll_batch, unroll_kern, unroll_patch, openmp (see ConvOp doc).
You can put None for any element of the list
to tell that this element is not constant.
:param kwargs: kwargs are passed onto ConvOp.
Can be used to set the following:
unroll_batch, unroll_kern, unroll_patch,
openmp (see ConvOp doc)
openmp: By default have the same value as openmp: By default have the same value as
config.openmp. For small image, filter, config.openmp. For small image, filter,
...@@ -88,9 +83,11 @@ def conv2d(input, filters, image_shape=None, filter_shape=None, ...@@ -88,9 +83,11 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
grow the batch size to 10, it is faster grow the batch size to 10, it is faster
with openmp on a core 2 duo. with openmp on a core 2 duo.
:rtype: symbolic 4D tensor Returns
:return: set of feature maps generated by convolutional layer. Tensor is -------
of shape (batch size, nb filters, output row, output col) symbolic 4D tensor
Set of feature maps generated by convolutional layer. Tensor is
of shape (batch size, nb filters, output row, output col).
""" """
...@@ -171,6 +168,97 @@ class ConvOp(OpenMPOp): ...@@ -171,6 +168,97 @@ class ConvOp(OpenMPOp):
output[b,k,:,:] = \sum_i input[b,i,:,:] * filter[k,i,:,:] \forall b,k output[b,k,:,:] = \sum_i input[b,i,:,:] * filter[k,i,:,:] \forall b,k
where b is the mini-batch index, k the filter index and * is the where b is the mini-batch index, k the filter index and * is the
convolution operator. convolution operator.
The constructor initializes a ConvOp with given output_mode (full/valid).
All other parameters are optional and are only used to generate more
optimized c code, or to enable graph optimizers to optimally replace the
ConvOp.
NOTES ON OPTIMIZATION:
There are two types of optimization. The first is the selection of the
fastest algo when bsize and nkern are provided with imshp and kshp.
By default we try to select the fastest version. You can specify it
with the unroll_batch, unroll_kern, and unroll_patch parameter.
The second type of optimization is hardcoding some dimensions into the
code when all shape are know.
This make a significant difference for the 'full' output_mode.
Sometimes, the fastest implementation on x86-64 uses
{unroll_batch=4, unroll_kern=4, unroll_patch=False}
with all other shape parameters being provided.
For optimizing other architectures, see:
Kazushige Goto and Robert A. Van De Geijn, Anatomy of High-Performance
Matrix Multiplication, (mr x nr). ACM Transactions on Mathematical
Software, May 2008.
Figure 12: (mr x nr). For x86 use 2x4, itanium 8x8, etc.
Parameters
----------
output_mode : {'valid', 'full'}
'valid' gives an output smaller then the image.
'full' gives an output bigger then the image.
See 'border_mode' in conv2d's doc.
Optional parameters: (will generate more optimal c code)
imshp : tuple of len 2 or 3: 2 for 2d image, 3 for a stack of 2d images.
Stacksize, nb image row, nb image col.
kshp : tuple of len 2
Nb kernel row, nb kernel col.
nkern : int
The number of kernel.
bsize : int
The size of the minibatch.
dx : int
Patch stride rows.
dy : int
Patch stride cols
Params which select the version of code used:
unroll_patch : bool
Use a version of c_code that unroll the patch loop that don't
request all shape information to work, but if all shape information
are present, will use it to hardcode the value in the code for
faster code.
unroll_batch : int
Use a version of c_code that unroll the batch (by unroll_batch)
and the nkern (by unroll_kern) loop. The size must by a multiple
of bsize or nkern respectively.
unroll_kern : int
Use a version of c_code that unroll the batch
(by unroll_batch) and the nkern(by unroll_kern) loop. The size
must by a multiple of bsize or nkern respectively.
verbose : int
Passed to GpuConv.
version: int or str
Passed to GpuConv, if version='no_fft', fft
optimization will be desactivated at the op level.
direction_hint: {'forward', 'bprop weights', 'bprop inputs'}
Passed to GpuConv, used by graph optimizers to aid algorithm choice.
The 3 following parameters are used internally when we generate
the gradient when dx!=1 or dy!=1.
imshp_logical
Default None. None value is equivalent to imshp value.
When imshp_logical != imshp, it tell we need to insert 0 in
the image before we do the convolution. For example, when dx==dy==2
and the image is [[1, 2], [3, 4]], we should make as if the image
was [[1, 0, 2, 0], [0, 0, 0, 0], [3, 0, 4, 0], [0, 0, 0, 0]].
Our python code insert the zero, but the c code optimize it.
imshp_logical != imshp when taking the grad again the weights or
the image when the output_mode is full and `dx != 1` or `dy != 1`.
kshp_logical
Idem but for kshp and used for the grad again the
weights when the output_mode is valid and `dx != 1` or `dy != 1`.
kshp_logical_top_aligned
Used in the same case. Default to True.
Set to False in the grad again the weight when the
output_mode is full.
""" """
__attrnames = ['imshp', 'kshp', 'nkern', 'bsize', 'dx', 'dy', 'out_mode', __attrnames = ['imshp', 'kshp', 'nkern', 'bsize', 'dx', 'dy', 'out_mode',
...@@ -257,10 +345,20 @@ class ConvOp(OpenMPOp): ...@@ -257,10 +345,20 @@ class ConvOp(OpenMPOp):
with kernels of shape "kshp". Accepts symbolic or integer shapes. with kernels of shape "kshp". Accepts symbolic or integer shapes.
Propagates `None`s (for unknown shapes). Propagates `None`s (for unknown shapes).
:param inshp: (rows,cols) of input image Parameters
:param kshp: (rows,cols) of filters ----------
:param mode: 'valid' or 'full' (see 'border_mode' in conv2d's doc) inshp
:return: (rows,cols) of output image (rows,cols) of input image.
kshp
(rows,cols) of filters.
mode: {'valid', 'full'}
See 'border_mode' in conv2d's doc.
Returns
-------
object
(rows,cols) of output image.
""" """
# The formula would be ceil((i + s * k - s * 1) / float(d)), # The formula would be ceil((i + s * k - s * 1) / float(d)),
# with s=1 for mode=='full' and s=-1 for mode=='valid'. # with s=1 for mode=='full' and s=-1 for mode=='valid'.
...@@ -284,92 +382,6 @@ class ConvOp(OpenMPOp): ...@@ -284,92 +382,6 @@ class ConvOp(OpenMPOp):
version=-1, version=-1,
direction_hint='forward', direction_hint='forward',
openmp=None): openmp=None):
"""
Initializes a ConvOp with given output_mode (full/valid). All other
parameters are optional and are only used to generate more optimized c
code, or to enable graph optimizers to optimally replace the ConvOp.
NOTES ON OPTIMIZATION:
Their is two type of optimization. The first is the selection of the
fastest algo when bsize and nkern are probided with imshp and kshp.
By default we try to select the fastest version. You can specify it
with the unroll_batch, unroll_kern, and unroll_patch parameter.
The second type of optimization is hardcoding some dimensions into the
code when all shape are know.
This make a significant difference for the 'full' output_mode.
Some times, the fastest implementation on x86-64 uses
{unroll_batch=4, unroll_kern=4, unroll_patch=False}
with all other shape parameters being provided.
For optimizing other architectures, see:
Kazushige Goto and Robert A. Van De Geijn, Anatomy of High-Performance
Matrix Multiplication, (mr x nr). ACM Transactions on Mathematical
Software, May 2008.
Figure 12: (mr x nr). For x86 use 2x4, itanium 8x8, etc.
:type output_mode: string
:param output_mode: 'valid' -- gives an output smaller then the image
'full' -- gives an output bigger then the image
Optional parameters: (will generate more optimal c code)
:type imshp: tuple of len 2 or 3: 2 for 2d image,
3 for a stack of 2d images.
:param imshp: (stacksize, nb image row, nb image col)
:type kshp: tuple of len 2
:param kshp: (nb kernel row, nb kernel col)
:type nkern: int
:param nkern: the number of kernel
:type bsize: int
:param bsize: the size of the minibatch
:type dx: int
:param dx: patch stride rows
:type dy: int
:param dy: patch stride cols
Params which select the version of code used:
:type unroll_patch: bool
:param unroll_patch: use a version of c_code that unroll the patch loop
that don't request all shape information to work, but if all shape
information are present, will
use it to hardcode the value in the code for faster code.
:type unroll_batch:int
:param unroll_batch: use a version of c_code that unroll the batch
(by unroll_batch) and the nkern(by unroll_kern) loop. The size
must by a multiple of bsize or nkern respectively.
:type unroll_kern:int
:param unroll_kern: use a version of c_code that unroll the batch
(by unroll_batch) and the nkern(by unroll_kern) loop. The size
must by a multiple of bsize or nkern
respectively.
:type verbose: int
:param verbose: passed to GpuConv
:type version: int or str
:param version: passed to GpuConv, if version='no_fft', fft
optimization will be desactivated at the op level.
:param direction_hint: 'forward', 'bprop weights' or 'bprop inputs'.
Passed to GpuConv, used by graph optimizers to aid algorithm choice
The 3 following parameters are used internally when we generate
the gradient when dx!=1 or dy!=1.
:param imshp_logical: Default None. None value is equivalent to imshp
value. When imshp_logical != imshp, it tell we need to insert 0 in
the image before we do the convolution. For example, when dx==dy==2
and the image is [[1, 2], [3, 4]], we should make as if the image
was [[1, 0, 2, 0], [0, 0, 0, 0], [3, 0, 4, 0], [0, 0, 0, 0]].
Our python code insert the zero, but the c code optimize it.
imshp_logical != imshp when taking the grad again the weights or
the image when the output_mode is full and `dx != 1` or `dy != 1`.
:param kshp_logical: idem but for kshp and used for the grad again the
weights when the output_mode is valid and `dx != 1` or `dy != 1`.
:param kshp_logical_top_aligned: Used in the same case.Default to True.
Set to False in the grad again the weight when the
output_mode is full.
"""
# Deactivate fft_optimization at the op level if specified # Deactivate fft_optimization at the op level if specified
if version == "no_fft": if version == "no_fft":
self.fft_opt = False self.fft_opt = False
...@@ -587,7 +599,10 @@ class ConvOp(OpenMPOp): ...@@ -587,7 +599,10 @@ class ConvOp(OpenMPOp):
for a in self.__attrnames) + "}" for a in self.__attrnames) + "}"
def flops(self, inputs, outputs): def flops(self, inputs, outputs):
""" Useful with the hack in profilemode to print the MFlops""" """
Useful with the hack in profilemode to print the MFlops.
"""
images, kerns = inputs images, kerns = inputs
out, = outputs out, = outputs
assert images[1] == kerns[1] assert images[1] == kerns[1]
...@@ -608,8 +623,13 @@ class ConvOp(OpenMPOp): ...@@ -608,8 +623,13 @@ class ConvOp(OpenMPOp):
def make_node(self, inputs, kerns): def make_node(self, inputs, kerns):
# TODO: find a way to make ConvOp work for N-D (after NIPS09) # TODO: find a way to make ConvOp work for N-D (after NIPS09)
""" """
inputs - 4 dim: batches x stacksize x rows x cols Parameters
kerns - 4 dim: nkern x stackidx x rows x cols ----------
inputs
4 dim: batches x stacksize x rows x cols.
kerns
4 dim: nkern x stackidx x rows x cols.
""" """
_inputs = as_tensor_variable(inputs) _inputs = as_tensor_variable(inputs)
_kerns = as_tensor_variable(kerns) _kerns = as_tensor_variable(kerns)
...@@ -655,7 +675,8 @@ class ConvOp(OpenMPOp): ...@@ -655,7 +675,8 @@ class ConvOp(OpenMPOp):
def perform(self, node, inp, out): def perform(self, node, inp, out):
""" """
By default if len(img2d.shape)==3, we By default if len(img2d.shape)==3, we TODO
""" """
img2d, filtersflipped = inp img2d, filtersflipped = inp
z, = out z, = out
...@@ -1818,7 +1839,9 @@ Py_XDECREF(img2d); ...@@ -1818,7 +1839,9 @@ Py_XDECREF(img2d);
def gen_conv_code_unroll_batch_kern(d, unroll_bsize=1, unroll_ksize=1): def gen_conv_code_unroll_batch_kern(d, unroll_bsize=1, unroll_ksize=1):
""" c_code for ConvOp that unroll the batch size loop """
c_code for ConvOp that unroll the batch size loop.
""" """
assert unroll_bsize > 0 and unroll_ksize > 0 assert unroll_bsize > 0 and unroll_ksize > 0
if "unroll_bsize" in d or "unroll_ksize" in d or "unroll_iter" in d or "unroll_biter" in d or "unroll_kiter" in d: if "unroll_bsize" in d or "unroll_ksize" in d or "unroll_iter" in d or "unroll_biter" in d or "unroll_kiter" in d:
......
...@@ -6,10 +6,13 @@ import theano.sandbox.cuda as cuda ...@@ -6,10 +6,13 @@ import theano.sandbox.cuda as cuda
def get_diagonal_subtensor_view(x, i0, i1): def get_diagonal_subtensor_view(x, i0, i1):
"""Helper function for DiagonalSubtensor and """
IncDiagonalSubtensor Helper function for DiagonalSubtensor and IncDiagonalSubtensor.
Notes
-----
It returns a partial view of x, not a partial copy.
:note: it return a partial view of x, not a partial copy.
""" """
# We have to cast i0 and i0 to int because python 2.4 (and maybe later) # We have to cast i0 and i0 to int because python 2.4 (and maybe later)
# do not support indexing with 0-dim, 'int*' ndarrays. # do not support indexing with 0-dim, 'int*' ndarrays.
...@@ -27,13 +30,24 @@ def get_diagonal_subtensor_view(x, i0, i1): ...@@ -27,13 +30,24 @@ def get_diagonal_subtensor_view(x, i0, i1):
class DiagonalSubtensor(Op): class DiagonalSubtensor(Op):
"""Return a form a nd diagonal subtensor. """
Return a form a nd diagonal subtensor.
:param x: n-d tensor
:param i0: axis index in x Parameters
:param i1: axis index in x ----------
:note: Work on the GPU. x
n-d tensor
i0
Axis index in x
i1
Axis index in x
Notes
-----
Work on the GPU.
Extended summary
----------------
``x`` is some n-dimensional tensor, but this Op only deals with a ``x`` is some n-dimensional tensor, but this Op only deals with a
matrix-shaped slice, using axes i0 and i1. Without loss of matrix-shaped slice, using axes i0 and i1. Without loss of
generality, suppose that ``i0`` picks out our ``row`` dimension, generality, suppose that ``i0`` picks out our ``row`` dimension,
...@@ -73,6 +87,7 @@ class DiagonalSubtensor(Op): ...@@ -73,6 +87,7 @@ class DiagonalSubtensor(Op):
see what's necessary at that point. see what's necessary at that point.
""" """
__props__ = ("inplace",) __props__ = ("inplace",)
def __str__(self): def __str__(self):
...@@ -111,8 +126,10 @@ diagonal_subtensor = DiagonalSubtensor(False) ...@@ -111,8 +126,10 @@ diagonal_subtensor = DiagonalSubtensor(False)
class IncDiagonalSubtensor(Op): class IncDiagonalSubtensor(Op):
""" """
The gradient of DiagonalSubtensor The gradient of DiagonalSubtensor.
""" """
__props__ = ("inplace",) __props__ = ("inplace",)
def __str__(self): def __str__(self):
...@@ -153,26 +170,39 @@ inc_diagonal_subtensor = IncDiagonalSubtensor(False) ...@@ -153,26 +170,39 @@ inc_diagonal_subtensor = IncDiagonalSubtensor(False)
def conv3d(signals, filters, def conv3d(signals, filters,
signals_shape=None, filters_shape=None, signals_shape=None, filters_shape=None,
border_mode='valid'): border_mode='valid'):
"""Convolve spatio-temporal filters with a movie. """
Convolve spatio-temporal filters with a movie.
It flips the filters. It flips the filters.
:param signals: timeseries of images whose pixels have color channels. Parameters
shape: [Ns, Ts, C, Hs, Ws] ----------
:param filters: spatio-temporal filters signals
shape: [Nf, Tf, C, Hf, Wf] Timeseries of images whose pixels have color channels.
:param signals_shape: None or a tuple/list with the shape of signals Shape: [Ns, Ts, C, Hs, Ws].
:param filters_shape: None or a tuple/list with the shape of filters filters
:param border_mode: The only one tested is 'valid'. Spatio-temporal filters.
Shape: [Nf, Tf, C, Hf, Wf].
:note: Another way to define signals: (batch, time, in channel, row, column) signals_shape
None or a tuple/list with the shape of signals.
filters_shape
None or a tuple/list with the shape of filters.
border_mode
The only one tested is 'valid'.
Notes
-----
Another way to define signals: (batch, time, in channel, row, column)
Another way to define filters: (out channel,time,in channel, row, column) Another way to define filters: (out channel,time,in channel, row, column)
:note: For the GPU, you can use this implementation or
For the GPU, you can use this implementation or
:func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>`. :func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>`.
:see: Someone made a script that shows how to swap the axes between See Also
--------
Someone made a script that shows how to swap the axes between
both 3d convolution implementations in Theano. See the last both 3d convolution implementations in Theano. See the last
`attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_. `attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_
""" """
...@@ -264,7 +294,8 @@ def conv3d(signals, filters, ...@@ -264,7 +294,8 @@ def conv3d(signals, filters,
def make_gpu_optimizer(op, to_gpu): def make_gpu_optimizer(op, to_gpu):
"""This function create optimizer that move some inputs to the GPU """
This function create optimizer that move some inputs to the GPU
for op that work on both CPU and GPU. for op that work on both CPU and GPU.
The op object is created by calling op(), so good default value The op object is created by calling op(), so good default value
...@@ -272,8 +303,12 @@ def make_gpu_optimizer(op, to_gpu): ...@@ -272,8 +303,12 @@ def make_gpu_optimizer(op, to_gpu):
We suppose the same op work with CPU and GPU inputs. We suppose the same op work with CPU and GPU inputs.
:param op: the op that support GPU inputs Parameters
:param to_gpu: a list of op inputs that are moved to the GPU. ----------
op
The op that support GPU inputs.
to_gpu
A list of op inputs that are moved to the GPU.
""" """
@theano.gof.local_optimizer([op, cuda.gpu_from_host]) @theano.gof.local_optimizer([op, cuda.gpu_from_host])
...@@ -281,6 +316,7 @@ def make_gpu_optimizer(op, to_gpu): ...@@ -281,6 +316,7 @@ def make_gpu_optimizer(op, to_gpu):
""" """
op(host_from_gpu()) -> host_from_gpu(op) op(host_from_gpu()) -> host_from_gpu(op)
gpu_from_host(op) -> op(gpu_from_host) gpu_from_host(op) -> op(gpu_from_host)
""" """
if isinstance(node.op, op): if isinstance(node.op, op):
# op(host_from_gpu()) -> host_from_gpu(op) # op(host_from_gpu()) -> host_from_gpu(op)
...@@ -314,7 +350,7 @@ if cuda.cuda_available: ...@@ -314,7 +350,7 @@ if cuda.cuda_available:
@theano.gof.local_optimizer([DiagonalSubtensor, IncDiagonalSubtensor]) @theano.gof.local_optimizer([DiagonalSubtensor, IncDiagonalSubtensor])
def local_inplace_DiagonalSubtensor(node): def local_inplace_DiagonalSubtensor(node):
""" also work for IncDiagonalSubtensor """ """Also work for IncDiagonalSubtensor."""
if (isinstance(node.op, (DiagonalSubtensor, IncDiagonalSubtensor)) and if (isinstance(node.op, (DiagonalSubtensor, IncDiagonalSubtensor)) and
not node.op.inplace): not node.op.inplace):
new_op = node.op.__class__(inplace=True) new_op = node.op.__class__(inplace=True)
......
...@@ -13,23 +13,29 @@ from theano.gradient import grad_undefined ...@@ -13,23 +13,29 @@ from theano.gradient import grad_undefined
class Images2Neibs(Op): class Images2Neibs(Op):
__props__ = ("mode",)
def __init__(self, mode='valid'):
""" """
:type mode: str
:param mode: Possible values: Parameters
----------
mode : {'valid', 'ignore_borders', 'wrap_centered'}
'valid': Requires an input that is a multiple of the 'valid': Requires an input that is a multiple of the
pooling factor (in each direction) pooling factor (in each direction).
'ignore_borders': Same as valid, but will ignore the borders 'ignore_borders': Same as valid, but will ignore the borders
if the shape(s) of the input if the shape(s) of the input is not a multiple of the pooling
is not a multiple of the pooling factor(s) factor(s).
'wrap_centered' : ?? TODO comment 'wrap_centered' : ?? TODO comment
:return:
Returns
-------
object
Reshapes the input as a 2D tensor where each row is an Reshapes the input as a 2D tensor where each row is an
pooling example pooling example.
""" """
__props__ = ("mode",)
def __init__(self, mode='valid'):
if mode not in ['valid', 'wrap_centered', 'ignore_borders']: if mode not in ['valid', 'wrap_centered', 'ignore_borders']:
raise NotImplementedError("Only the mode valid, ignore_borders" raise NotImplementedError("Only the mode valid, ignore_borders"
" and wrap_centered have been" " and wrap_centered have been"
...@@ -46,20 +52,22 @@ class Images2Neibs(Op): ...@@ -46,20 +52,22 @@ class Images2Neibs(Op):
def make_node(self, ten4, neib_shape, neib_step=None): def make_node(self, ten4, neib_shape, neib_step=None):
""" """
:param ten4: a list of lists of images Parameters
ten4 is of shape (list 1 dim, list 2 dim, ----------
row, col) ten4 : a list of lists of images
:param neib_shape: (r,c) where r is the height of the neighborhood ten4 is of shape (list 1 dim, list 2 dim, row, col).
in rows and c is the width of the neighborhood neib_shape
in columns (r,c) where r is the height of the neighborhood in rows and c is
:param neib_step: (dr,dc) where dr is the number of rows to the width of the neighborhood in columns.
skip between patch and dc is the number of neib_step
columns. When None, this is the same as (dr,dc) where dr is the number of rows to skip between patch and dc
neib_shape(patch are disjoint) is the number of columns. When None, this is the same as neib_shape
(patch are disjoint).
output:
a 2D matrix, written using the following pattern Returns
-------
matrix
A 2D matrix, written using the following pattern
idx = 0 idx = 0
for i in xrange(list 1 dim) for i in xrange(list 1 dim)
for j in xrange(list 2 dim) for j in xrange(list 2 dim)
...@@ -68,9 +76,10 @@ class Images2Neibs(Op): ...@@ -68,9 +76,10 @@ class Images2Neibs(Op):
output[idx,:] output[idx,:]
= flattened version of ten4[i,j,l:l+r,k:k+c] = flattened version of ten4[i,j,l:l+r,k:k+c]
idx += 1 idx += 1
(note: the op isn't necessarily implemented internally with these .. note:: The op isn't necessarily implemented internally with these
for loops, they're just the easiest way to describe the output for loops, they're just the easiest way to describe the output
pattern) pattern.
""" """
ten4 = T.as_tensor_variable(ten4) ten4 = T.as_tensor_variable(ten4)
neib_shape = T.as_tensor_variable(neib_shape) neib_shape = T.as_tensor_variable(neib_shape)
...@@ -420,61 +429,46 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'): ...@@ -420,61 +429,46 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
""" """
Function :func:`images2neibs <theano.sandbox.neighbours.images2neibs>` Function :func:`images2neibs <theano.sandbox.neighbours.images2neibs>`
allows to apply a sliding window operation to a tensor containing allows to apply a sliding window operation to a tensor containing
images images or other two-dimensional objects.
or other two-dimensional objects. The sliding window operation loops over points in input data and stores
The sliding window operation loops a rectangular neighbourhood of each point.
over points in input data and stores a rectangular neighbourhood of It is possible to assign a step of selecting patches (parameter `neib_step`).
each point.
It is possible to assign a step of selecting patches (parameter Parameters
`neib_step`). ----------
ten4 : A 4d tensor-like
:param ten4: A 4-dimensional tensor which represents A 4-dimensional tensor which represents a list of lists of images.
a list of lists of images.a list of lists of images. It should have shape (list 1 dim, list 2 dim, row, col). The first
It should have shape (list 1 dim, list 2 dim, two dimensions can be useful to store different channels and batches.
row, col). The first two dimensions can be neib_shape : A 1d tensor-like of 2 values
useful to store different channels and batches. A tuple containing two values: height and width of the neighbourhood.
:type ten4: A 4d tensor-like. It should have shape (r,c) where r is the height of the neighborhood
:param neib_shape: A tuple containing two in rows and c is the width of the neighborhood in columns.
values: height and width of the neighbourhood. neib_step : A 1d tensor-like of 2 values
It should have shape (r,c) where r is the height of the (dr,dc) where dr is the number of rows to skip between patch and dc is
neighborhood in rows and c is the width of the neighborhood the number of columns. The parameter should be a tuple of two elements:
in columns number of rows and number of columns to skip each iteration.
:type neib_shape: A 1d tensor-like of 2 values. Basically, when the step is 1, the neighbourhood of every first element
:param neib_step: (dr,dc) where dr is the number of rows to is taken and every possible rectangular subset is returned.
skip between patch and dc is the number of By default it is equal to `neib_shape` in other words, the patches are
columns. The parameter should be a tuple of two elements: disjoint. When the step is greater than `neib_shape`, some elements are
number omitted. When None, this is the same as neib_shape (patch are disjoint).
of rows and number of columns to skip each iteration.
Basically, when the step is 1, the neighbourhood of every
first element is taken and every possible rectangular
subset is returned. By default it is equal to
`neib_shape` in other words, the
patches are disjoint. When the step is greater than
`neib_shape`, some elements are omitted. When None, this
is the same as
neib_shape(patch are disjoint)
.. note:: Currently the step size should be chosen in the way that the .. note:: Currently the step size should be chosen in the way that the
corresponding dimension :math:`i` (width or height) is equal to corresponding dimension :math:`i` (width or height) is equal to
:math:`n * step\_size_i + neib\_shape_i` for some :math:`n` :math:`n * step\_size_i + neib\_shape_i` for some :math:`n`
:type neib_step: A 1d tensor-like of 2 values. mode : {'valid', 'ignore_borders', 'wrap_centered}
:param mode:
Possible values:
``valid`` ``valid``
Requires an input that is a multiple of the Requires an input that is a multiple of the
pooling factor (in each direction) pooling factor (in each direction).
``ignore_borders`` ``ignore_borders``
Same as valid, but will ignore the borders Same as valid, but will ignore the borders if the shape(s) of
if the shape(s) of the input the input is not a multiple of the pooling factor(s).
is not a multiple of the pooling factor(s)
``wrap_centered`` ``wrap_centered``
?? TODO comment ?? TODO comment
:type mode: str Returns
:return: -------
object
Reshapes the input as a 2D tensor where each row is an Reshapes the input as a 2D tensor where each row is an
pooling example. Pseudo-code of the output: pooling example. Pseudo-code of the output:
...@@ -493,7 +487,8 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'): ...@@ -493,7 +487,8 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
these for loops, they're just the easiest way to describe the these for loops, they're just the easiest way to describe the
output pattern. output pattern.
Example: Examples
--------
.. code-block:: python .. code-block:: python
...@@ -512,6 +507,7 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'): ...@@ -512,6 +507,7 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
.. note:: The underlying code will construct a 2D tensor of disjoint .. note:: The underlying code will construct a 2D tensor of disjoint
patches 5x5. The output has shape 4x25. patches 5x5. The output has shape 4x25.
""" """
return Images2Neibs(mode)(ten4, neib_shape, neib_step) return Images2Neibs(mode)(ten4, neib_shape, neib_step)
...@@ -524,18 +520,28 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'): ...@@ -524,18 +520,28 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
the output of :func:`images2neibs <theano.sandbox.neigbours.neibs2images>` the output of :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`
and reconstructs its input. and reconstructs its input.
:param neibs: matrix like the one obtained by Parameters
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>` ----------
:param neib_shape: `neib_shape` that was used in neibs: matrix
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>` Like the one obtained by
:param original_shape: original shape of the 4d tensor given to :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`.
neib_shape
`neib_shape` that was used in
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`.
original_shape
Original shape of the 4d tensor given to
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>` :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`
:return: Reconstructs the input of Returns
-------
object
Reconstructs the input of
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`, :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`,
a 4d tensor of shape `original_shape`. a 4d tensor of shape `original_shape`.
.. note:: Currently, the function doesn't support tensors created with Notes
-----
Currently, the function doesn't support tensors created with
`neib_step` different from default value. This means that it may be `neib_step` different from default value. This means that it may be
impossible to compute the gradient of a variable gained by impossible to compute the gradient of a variable gained by
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>` w.r.t. :func:`images2neibs <theano.sandbox.neigbours.neibs2images>` w.r.t.
...@@ -543,6 +549,8 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'): ...@@ -543,6 +549,8 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>` for :func:`images2neibs <theano.sandbox.neigbours.neibs2images>` for
gradient computation. gradient computation.
Examples
--------
Example, which uses a tensor gained in example for Example, which uses a tensor gained in example for
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`: :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`:
...@@ -555,6 +563,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'): ...@@ -555,6 +563,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
im_new_val = inv_window(neibs_val) im_new_val = inv_window(neibs_val)
.. note:: The code will output the initial image array. .. note:: The code will output the initial image array.
""" """
neibs = T.as_tensor_variable(neibs) neibs = T.as_tensor_variable(neibs)
neib_shape = T.as_tensor_variable(neib_shape) neib_shape = T.as_tensor_variable(neib_shape)
......
"""Provides neural-network specific Ops. """
Provides neural-network specific Ops.
:note: TODO: factor this out into a neural-network toolbox. Notes
-----
TODO: factor this out into a neural-network toolbox.
:note: We register all optimization with the gpu tag as we don't We register all optimization with the gpu tag as we don't
implement all the intermediate case on the GPU (in particular implement all the intermediate case on the GPU (in particular
AdvancedSubtensor). So to make sure it run well on the gpu with AdvancedSubtensor). So to make sure it run well on the gpu with
fast_compile, we register them as needed for the GPU. This can be fast_compile, we register them as needed for the GPU. This can be
revisited later when all the intermediate part are on the GPU. revisited later when all the intermediate part are on the GPU.
""" """
import logging import logging
...@@ -38,13 +41,16 @@ class SoftmaxWithBias(gof.Op): ...@@ -38,13 +41,16 @@ class SoftmaxWithBias(gof.Op):
""" """
An L{Op} for the output of neural-net multiclass classifiers. An L{Op} for the output of neural-net multiclass classifiers.
@type x: is a matrix of floats (32 or 64) Attributes
@type b: is a [row] vector of floats (32 or 64), ----------
length is number of cols in x x : a matrix of floats (32 or 64)
b : a [row] vector of floats (32 or 64), length is number of cols in x
This L{Op}'s output is softmax(x+b). This L{Op}'s output is softmax(x+b).
softmax(x[i]) is the i'th distribution over len(x[i]) options. softmax(x[i]) is the i'th distribution over len(x[i]) options.
""" """
nin = 2 nin = 2
nout = 1 nout = 1
__props__ = () __props__ = ()
...@@ -270,7 +276,11 @@ softmax_with_bias = SoftmaxWithBias() ...@@ -270,7 +276,11 @@ softmax_with_bias = SoftmaxWithBias()
class SoftmaxGrad(gof.Op): class SoftmaxGrad(gof.Op):
"""Gradient wrt x of the Softmax Op""" """
Gradient wrt x of the Softmax Op.
"""
nin = 2 nin = 2
nout = 1 nout = 1
__props__ = () __props__ = ()
...@@ -391,6 +401,7 @@ class Softmax(gof.Op): ...@@ -391,6 +401,7 @@ class Softmax(gof.Op):
\\frac{e^{\mathbf{x}_j}}{\sum_{k=1}^K e^{\mathbf{x}_k}}` \\frac{e^{\mathbf{x}_j}}{\sum_{k=1}^K e^{\mathbf{x}_k}}`
where :math:`K` is the total number of neurons in the layer. This where :math:`K` is the total number of neurons in the layer. This
activation function gets applied row-wise. activation function gets applied row-wise.
""" """
nin = 1 nin = 1
...@@ -584,7 +595,9 @@ def softmax(c): ...@@ -584,7 +595,9 @@ def softmax(c):
@opt.register_specialize('fast_compile_gpu') @opt.register_specialize('fast_compile_gpu')
@gof.local_optimizer([softmax_op]) @gof.local_optimizer([softmax_op])
def local_softmax_with_bias(node): def local_softmax_with_bias(node):
"""Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias) """
Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias).
""" """
if node.op == softmax_op: if node.op == softmax_op:
x, = node.inputs x, = node.inputs
...@@ -789,15 +802,19 @@ if 0: ...@@ -789,15 +802,19 @@ if 0:
class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
"""A special compound L{Op} for the output of neural-net classifiers. """
A special compound L{Op} for the output of neural-net classifiers.
:type x: is a matrix of floats (32 or 64) Parameters
:type b: is a [row] vector of floats (32 or 64), ----------
length is number of cols in x x : a matrix of floats (32 or 64)
:type y_idx: a [column] vector of int (32 or 64), b : a [row] vector of floats (32 or 64), length is number of cols in x
length is number of rows in x y_idx : a [column] vector of int (32 or 64), length is number of rows in x
:returns: row-wise NLL, softmax(x+b), row-wise argmax of (x+b) Returns
-------
object
row-wise NLL, softmax(x+b), row-wise argmax of (x+b).
@precondition: every entry in y_idx is a valid (non-negative) @precondition: every entry in y_idx is a valid (non-negative)
column index into x column index into x
...@@ -816,6 +833,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -816,6 +833,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
i'th example. i'th example.
""" """
nin = 3 nin = 3
nout = 3 nout = 3
__props__ = () __props__ = ()
...@@ -846,7 +864,8 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -846,7 +864,8 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
return Apply(self, [x, b, y_idx], [nll, sm, am]) return Apply(self, [x, b, y_idx], [nll, sm, am])
def perform(self, node, input_storage, output_storage): def perform(self, node, input_storage, output_storage):
"""The math, where x is an input vector, and t is a target index: """
The math, where x is an input vector, and t is a target index:
softmax(x)[i] = exp(x[i]) / sum_j(exp(x[j])) softmax(x)[i] = exp(x[i]) / sum_j(exp(x[j]))
nll(x,t) = -log(softmax(x)[t]) nll(x,t) = -log(softmax(x)[t])
...@@ -1037,12 +1056,15 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -1037,12 +1056,15 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
class CrossentropySoftmax1HotWithBiasDx(gof.Op): class CrossentropySoftmax1HotWithBiasDx(gof.Op):
"""
Gradient wrt x of the CrossentropySoftmaxArgmax1HotWithBias Op.
"""
nin = 3 nin = 3
nout = 1 nout = 1
__props__ = () __props__ = ()
"""Gradient wrt x of the CrossentropySoftmaxArgmax1HotWithBias Op"""
def make_node(self, dy, sm, y_idx, **kwargs): def make_node(self, dy, sm, y_idx, **kwargs):
dy = tensor.as_tensor_variable(dy) dy = tensor.as_tensor_variable(dy)
sm = tensor.as_tensor_variable(sm) sm = tensor.as_tensor_variable(sm)
...@@ -1217,15 +1239,19 @@ def crossentropy_softmax_1hot(x, y_idx, **kwargs): ...@@ -1217,15 +1239,19 @@ def crossentropy_softmax_1hot(x, y_idx, **kwargs):
def crossentropy_softmax_max_and_argmax_1hot_with_bias(x, b, y_idx, **kwargs): def crossentropy_softmax_max_and_argmax_1hot_with_bias(x, b, y_idx, **kwargs):
""" """
@return: The cross-entropy, the softmax output, the max probability, Returns
and the argmax index -------
object
The cross-entropy, the softmax output, the max probability,
and the argmax index.
@todo: Since we are recomputing the argmax, TODO: Since we are recomputing the argmax,
we might as well assert that it is correct. we might as well assert that it is correct.
@todo: Make this entire function is TODO: Make this entire function is
unnecessary? e.g. CrossentropySoftmaxArgmax1HotWithBias should return unnecessary? e.g. CrossentropySoftmaxArgmax1HotWithBias should return
the appropriate information (i.e. the max probability)? the appropriate information (i.e. the max probability)?
""" """
(xent, softmax) = crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs) (xent, softmax) = crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
(max_pr, argmax) = tensor.max_and_argmax(softmax, axis=-1) (max_pr, argmax) = tensor.max_and_argmax(softmax, axis=-1)
...@@ -1262,16 +1288,17 @@ crossentropy_categorical_1hot_grad = CrossentropyCategorical1HotGrad() ...@@ -1262,16 +1288,17 @@ crossentropy_categorical_1hot_grad = CrossentropyCategorical1HotGrad()
class CrossentropyCategorical1Hot(gof.Op): class CrossentropyCategorical1Hot(gof.Op):
"""
"""Compute the cross entropy between a coding distribution and Compute the cross entropy between a coding distribution and
a true distribution of the form [0, 0, ... 0, 1, 0, ..., 0] a true distribution of the form [0, 0, ... 0, 1, 0, ..., 0].
.. math:: .. math::
y[i] = - \log(coding_dist[i, one_of_n[i]) y[i] = - \log(coding_dist[i, one_of_n[i])
Notes
:note: In the case that the coding distribution is the output of a -----
In the case that the coding distribution is the output of a
softmax, an application of this Op will probably be optimized softmax, an application of this Op will probably be optimized
away in favour of one with a C implementation. away in favour of one with a C implementation.
...@@ -1280,11 +1307,15 @@ class CrossentropyCategorical1Hot(gof.Op): ...@@ -1280,11 +1307,15 @@ class CrossentropyCategorical1Hot(gof.Op):
def make_node(self, coding_dist, true_one_of_n): def make_node(self, coding_dist, true_one_of_n):
""" """
:type coding_dist: dense matrix Parameters
----------
coding_dist : dense matrix
true_one_of_n : lvector
:type true_one_of_n: lvector Returns
-------
dvector
:rtype: dvector
""" """
_coding_dist = tensor.as_tensor_variable(coding_dist) _coding_dist = tensor.as_tensor_variable(coding_dist)
_true_one_of_n = tensor.as_tensor_variable(true_one_of_n) _true_one_of_n = tensor.as_tensor_variable(true_one_of_n)
...@@ -1332,10 +1363,13 @@ crossentropy_categorical_1hot = CrossentropyCategorical1Hot() ...@@ -1332,10 +1363,13 @@ crossentropy_categorical_1hot = CrossentropyCategorical1Hot()
@opt.register_specialize('fast_compile_gpu') @opt.register_specialize('fast_compile_gpu')
@gof.optimizer @gof.optimizer
def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph): def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
"""This is a stabilization optimization """
This is a stabilization optimization.
:note: not a local optimization because we are replacing outputs Notes
from several nodes at once -----
Not a local optimization because we are replacing outputs
from several nodes at once.
""" """
...@@ -1362,16 +1396,19 @@ def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph): ...@@ -1362,16 +1396,19 @@ def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
@gof.optimizer @gof.optimizer
def crossentropy_to_crossentropy_with_softmax(fgraph): def crossentropy_to_crossentropy_with_softmax(fgraph):
"""This is a stabilization optimization that is more general then """
crossentropy_to_crossentropy_with_softmax_with_bias This is a stabilization optimization that is more general than
crossentropy_to_crossentropy_with_softmax_with_bias.
It must be executed after local_softmax_with_bias optimization in It must be executed after local_softmax_with_bias optimization in
specialize specialize.
:todo: This is a stabilization optimization! How to make this more cleanly? TODO : This is a stabilization optimization! How to make this more cleanly?
:note: not a local optimization because we are replacing outputs Notes
from several nodes at once -----
Not a local optimization because we are replacing outputs from several
nodes at once.
""" """
...@@ -1460,11 +1497,13 @@ def local_argmax_pushdown(node): ...@@ -1460,11 +1497,13 @@ def local_argmax_pushdown(node):
def _check_rows_is_arange_len_labels(rows, labels): def _check_rows_is_arange_len_labels(rows, labels):
'''Check that 'rows' is the same node as T.arange(labels.shape[0]) """
Check that 'rows' is the same node as T.arange(labels.shape[0]).
Also considers the case where labels.shape[0] is constant and equal Also considers the case where labels.shape[0] is constant and equal
to 1, and T.arange(labels.shape[0]) has been constant-folded into 0. to 1, and T.arange(labels.shape[0]) has been constant-folded into 0.
'''
"""
if labels.owner and hasattr(labels.owner.fgraph, 'shape_feature'): if labels.owner and hasattr(labels.owner.fgraph, 'shape_feature'):
shape_of = labels.owner.fgraph.shape_feature.shape_of shape_of = labels.owner.fgraph.shape_feature.shape_of
...@@ -1795,10 +1834,11 @@ def graph_merge_softmax_with_crossentropy_softmax(node): ...@@ -1795,10 +1834,11 @@ def graph_merge_softmax_with_crossentropy_softmax(node):
@gof.local_optimizer([CrossentropySoftmax1HotWithBiasDx]) @gof.local_optimizer([CrossentropySoftmax1HotWithBiasDx])
def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(node): def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(node):
""" """
Replaces a CrossentropySoftmax1HotWithBiasDx op, whose incoming gradient is Replace a CrossentropySoftmax1HotWithBiasDx op, whose incoming gradient is
an `alloc` of a scalar variable or one that has either broadcastable or an `alloc` of a scalar variable or one that has either broadcastable or
matching dimensions with the output variable, by one that skips the matching dimensions with the output variable, by one that skips the
intermediate `alloc`. intermediate `alloc`.
""" """
if isinstance(node.op, CrossentropySoftmax1HotWithBiasDx): if isinstance(node.op, CrossentropySoftmax1HotWithBiasDx):
dy, sm, y_idx = node.inputs dy, sm, y_idx = node.inputs
...@@ -1850,30 +1890,38 @@ def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(node): ...@@ -1850,30 +1890,38 @@ def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(node):
def binary_crossentropy(output, target): def binary_crossentropy(output, target):
""" """
Compute the crossentropy of binary random variables Compute the crossentropy of binary random variables.
output and target are each expectations of binary random
Output and target are each expectations of binary random
variables; target may be exactly 0 or 1 but output must variables; target may be exactly 0 or 1 but output must
lie strictly between 0 and 1. lie strictly between 0 and 1.
@note: we could use the x log y op to support output=0
@ and output=1. The gradient would still be undefined though. Notes
@note: We do not sum, crossentropy is computed by component. -----
@todo: Rewrite as a scalar, and then broadcast to tensor. We could use the x log y op to support output=0 and output=1.
The gradient would still be undefined though.
We do not sum, crossentropy is computed by component.
TODO : Rewrite as a scalar, and then broadcast to tensor.
""" """
return -(target * tensor.log(output) + (1.0 - target) * tensor.log(1.0 - output)) return -(target * tensor.log(output) + (1.0 - target) * tensor.log(1.0 - output))
def categorical_crossentropy(coding_dist, true_dist): def categorical_crossentropy(coding_dist, true_dist):
""" """
WARNING: THIS FUNCTION IS UNNECESSARILY POLYMORPHIC. Return the cross-entropy between an approximating distribution and a true
We ultimately don't want the polymorphism, and will move this function to pylearn.algorithms.cost. distribution.
The 1hot version will be removed.
The length of the documentation here is a form of code smell.
Return the cross-entropy between an approximating distribution and a true distribution .. warning:: THIS FUNCTION IS UNNECESSARILY POLYMORPHIC.
We ultimately don't want the polymorphism, and will move this function
to pylearn.algorithms.cost. The 1hot version will be removed.
The length of the documentation here is a form of code smell.
The cross entropy between two probability distributions measures the average number of bits The cross entropy between two probability distributions measures the average
needed to identify an event from a set of possibilities, if a coding scheme is used based number of bits needed to identify an event from a set of possibilities, if a
on a given probability distribution q, rather than the "true" distribution p. coding scheme is used based on a given probability distribution q, rather
than the "true" distribution p.
Mathematically it is defined as follows: Mathematically it is defined as follows:
...@@ -1881,20 +1929,25 @@ def categorical_crossentropy(coding_dist, true_dist): ...@@ -1881,20 +1929,25 @@ def categorical_crossentropy(coding_dist, true_dist):
H(p,q) = - \sum_x p(x) \log(q(x)) H(p,q) = - \sum_x p(x) \log(q(x))
:type coding_dist: a dense matrix. Parameters
:param coding_dist: Each slice along axis represents one distribution. ----------
coding_dist : a dense matrix
:type true_dist: a dense matrix or sparse matrix or integer vector. Each slice along axis represents one distribution.
:param coding_dist: In the case of a matrix argument, each slice along axis represents one true_dist : a dense matrix or sparse matrix or integer vector
distribution. In the case of an integer vector argument, each element represents the In the case of a matrix argument, each slice along axis represents one
position of the '1' in a 1-of-N encoding. distribution. In the case of an integer vector argument, each element
represents the position of the '1' in a 1-of-N encoding.
:type axis: int
:param axis: the dimension over which each distribution runs. (1 for row distributions, 0 Returns
for column distributions) -------
tensor of rank one-less-than `coding_dist`
:rtype: tensor of rank one-less-than `coding_dist` The cross entropy between each coding and true distribution.
:returns: the cross entropy between each coding and true distribution.
Notes
-----
axis : int
The dimension over which each distribution runs
(1 for row distributions, 0 for column distributions).
""" """
if true_dist.ndim == coding_dist.ndim: if true_dist.ndim == coding_dist.ndim:
...@@ -2036,21 +2089,25 @@ def relu(x, alpha=0): ...@@ -2036,21 +2089,25 @@ def relu(x, alpha=0):
""" """
Compute the element-wise rectified linear activation function. Compute the element-wise rectified linear activation function.
:type x: symbolic tensor Parameters
:param x: Tensor to compute the activation function for. ----------
x : symbolic tensor
:type alpha: scalar or tensor, optional Tensor to compute the activation function for.
:param alpha: Slope for negative input, usually between 0 and 1. The alpha : scalar or tensor, optional
default value of 0 will lead to the standard rectifier, 1 will lead to Slope for negative input, usually between 0 and 1. The default value
of 0 will lead to the standard rectifier, 1 will lead to
a linear activation function, and any value in between will give a a linear activation function, and any value in between will give a
leaky rectifier. A shared variable (broadcastable against `x`) will leaky rectifier. A shared variable (broadcastable against `x`) will
result in a parameterized rectifier with learnable slope(s). result in a parameterized rectifier with learnable slope(s).
:rtype: symbolic tensor Returns
:return: element-wise rectifier applied to `x` -------
symbolic tensor
Element-wise rectifier applied to `x`.
.. note:: This is numerically equivalent to Notes
``T.switch(x > 0, x, alpha * x)`` -----
This is numerically equivalent to ``T.switch(x > 0, x, alpha * x)``
(or ``T.maximum(x, alpha * x)`` for ``alpha < 1``), but uses a faster (or ``T.maximum(x, alpha * x)`` for ``alpha < 1``), but uses a faster
formulation or an optimized Op, so we encourage to use this function. formulation or an optimized Op, so we encourage to use this function.
......
"""Ops and optimizations: sigmoid, softplus """
Ops and optimizations: sigmoid, softplus.
These functions implement special cases of exp and log to improve numerical
stability.
These functions implement special cases of exp and log to improve numerical stability.
""" """
from __future__ import print_function from __future__ import print_function
...@@ -25,6 +28,7 @@ from theano.tensor import elemwise, opt, NotScalarConstantError ...@@ -25,6 +28,7 @@ from theano.tensor import elemwise, opt, NotScalarConstantError
class ScalarSigmoid(scalar.UnaryScalarOp): class ScalarSigmoid(scalar.UnaryScalarOp):
""" """
This is just speed opt. Not for stability. This is just speed opt. Not for stability.
""" """
@staticmethod @staticmethod
def st_impl(x): def st_impl(x):
...@@ -126,7 +130,8 @@ class ScalarSigmoid(scalar.UnaryScalarOp): ...@@ -126,7 +130,8 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
@staticmethod @staticmethod
def gen_graph(): def gen_graph():
""" """
This method was used to generate the graph: sigmoid_prec.png in the doc This method was used to generate the graph: sigmoid_prec.png in the doc.
""" """
data = numpy.arange(-15, 15, .1) data = numpy.arange(-15, 15, .1)
val = 1 / (1 + numpy.exp(-data)) val = 1 / (1 + numpy.exp(-data))
...@@ -173,6 +178,7 @@ pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid')) ...@@ -173,6 +178,7 @@ pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid'))
class UltraFastScalarSigmoid(scalar.UnaryScalarOp): class UltraFastScalarSigmoid(scalar.UnaryScalarOp):
""" """
This is just speed opt. Not for stability. This is just speed opt. Not for stability.
""" """
@staticmethod @staticmethod
def st_impl(x): def st_impl(x):
...@@ -245,7 +251,7 @@ def local_ultra_fast_sigmoid(node): ...@@ -245,7 +251,7 @@ def local_ultra_fast_sigmoid(node):
When enabled, change all sigmoid to ultra_fast_sigmoid. When enabled, change all sigmoid to ultra_fast_sigmoid.
For example do mode.including('local_ultra_fast_sigmoid') For example do mode.including('local_ultra_fast_sigmoid')
or use the Theano flag optimizer_including=local_ultra_fast_sigmoid or use the Theano flag optimizer_including=local_ultra_fast_sigmoid.
This speeds up the sigmoid op by using an approximation. This speeds up the sigmoid op by using an approximation.
...@@ -269,11 +275,12 @@ theano.compile.optdb['uncanonicalize'].register("local_ultra_fast_sigmoid", ...@@ -269,11 +275,12 @@ theano.compile.optdb['uncanonicalize'].register("local_ultra_fast_sigmoid",
def hard_sigmoid(x): def hard_sigmoid(x):
"""An approximation of sigmoid. """
An approximation of sigmoid.
More approximate and faster than ultra_fast_sigmoid. More approximate and faster than ultra_fast_sigmoid.
Approx in 3 parts: 0, scaled linear, 1 Approx in 3 parts: 0, scaled linear, 1.
Removing the slope and shift does not make it faster. Removing the slope and shift does not make it faster.
...@@ -375,7 +382,13 @@ logsigm_to_softplus = gof.PatternSub( ...@@ -375,7 +382,13 @@ logsigm_to_softplus = gof.PatternSub(
def _is_1(expr): def _is_1(expr):
"""rtype bool. True iff expr is a constant close to 1 """
Returns
-------
bool
True iff expr is a constant close to 1.
""" """
try: try:
v = opt.get_scalar_constant_value(expr) v = opt.get_scalar_constant_value(expr)
...@@ -405,8 +418,13 @@ opt.register_stabilize(log1pexp_to_softplus, name='log1pexp_to_softplus') ...@@ -405,8 +418,13 @@ opt.register_stabilize(log1pexp_to_softplus, name='log1pexp_to_softplus')
def is_1pexp(t): def is_1pexp(t):
""" """
Returns
-------
object
If 't' is of the form (1+exp(x)), return (False, x). If 't' is of the form (1+exp(x)), return (False, x).
Else return None. Else return None.
""" """
if t.owner and t.owner.op == tensor.add: if t.owner and t.owner.op == tensor.add:
scalars, scalar_inputs, nonconsts = \ scalars, scalar_inputs, nonconsts = \
...@@ -449,11 +467,18 @@ def is_exp(var): ...@@ -449,11 +467,18 @@ def is_exp(var):
""" """
Match a variable with either of the `exp(x)` or `-exp(x)` patterns. Match a variable with either of the `exp(x)` or `-exp(x)` patterns.
:param var: The Variable to analyze. Parameters
----------
var
The Variable to analyze.
Returns
-------
tuple
A pair (b, x) with `b` a boolean set to True if `var` is of the
form `-exp(x)` and False if `var` is of the form `exp(x)`. If `var`
cannot be cast into either form, then return `None`.
:return: A pair (b, x) with `b` a boolean set to True if `var` is of the
form `-exp(x)` and False if `var` is of the form `exp(x)`. If `var` cannot
be cast into either form, then return `None`.
""" """
neg = False neg = False
neg_info = is_neg(var) neg_info = is_neg(var)
...@@ -468,10 +493,17 @@ def is_mul(var): ...@@ -468,10 +493,17 @@ def is_mul(var):
""" """
Match a variable with `x * y * z * ...`. Match a variable with `x * y * z * ...`.
:param var: The Variable to analyze. Parameters
----------
var
The Variable to analyze.
:return: A list [x, y, z, ...] if `var` is of the form `x * y * z * ...`, Returns
-------
object
A list [x, y, z, ...] if `var` is of the form `x * y * z * ...`,
or None if `var` cannot be cast into this form. or None if `var` cannot be cast into this form.
""" """
if var.owner and var.owner.op == tensor.mul: if var.owner and var.owner.op == tensor.mul:
return var.owner.inputs return var.owner.inputs
...@@ -504,9 +536,16 @@ def is_neg(var): ...@@ -504,9 +536,16 @@ def is_neg(var):
""" """
Match a variable with the `-x` pattern. Match a variable with the `-x` pattern.
:param var: The Variable to analyze. Parameters
----------
var
The Variable to analyze.
Returns
-------
object
`x` if `var` is of the form `-x`, or None otherwise.
:return: `x` if `var` is of the form `-x`, or None otherwise.
""" """
apply = var.owner apply = var.owner
if not apply: if not apply:
...@@ -538,8 +577,10 @@ def is_neg(var): ...@@ -538,8 +577,10 @@ def is_neg(var):
@opt.register_stabilize @opt.register_stabilize
@gof.local_optimizer([tensor.true_div]) @gof.local_optimizer([tensor.true_div])
def local_exp_over_1_plus_exp(node): def local_exp_over_1_plus_exp(node):
"""exp(x)/(1+exp(x)) -> sigm(x) """
exp(x)/(1+exp(x)) -> sigm(x)
c/(1+exp(x)) -> c*sigm(-x) c/(1+exp(x)) -> c*sigm(-x)
""" """
# this optimization should be done for numerical stability # this optimization should be done for numerical stability
# so we don't care to check client counts # so we don't care to check client counts
...@@ -585,20 +626,28 @@ def parse_mul_tree(root): ...@@ -585,20 +626,28 @@ def parse_mul_tree(root):
""" """
Parse a tree of multiplications starting at the given root. Parse a tree of multiplications starting at the given root.
:param root: The variable at the root of the tree. Parameters
----------
:return: A tree where each non-leaf node corresponds to a multiplication root
in the computation of `root`, represented by the list of its inputs. Each The variable at the root of the tree.
input is a pair [n, x] with `n` a boolean value indicating whether
Returns
-------
object
A tree where each non-leaf node corresponds to a multiplication
in the computation of `root`, represented by the list of its inputs.
Each input is a pair [n, x] with `n` a boolean value indicating whether
sub-tree `x` should be negated. sub-tree `x` should be negated.
Examples: Examples
--------
x * y -> [False, [[False, x], [False, y]]] x * y -> [False, [[False, x], [False, y]]]
-(x * y) -> [True, [[False, x], [False, y]]] -(x * y) -> [True, [[False, x], [False, y]]]
-x * y -> [False, [[True, x], [False, y]]] -x * y -> [False, [[True, x], [False, y]]]
-x -> [True, x] -x -> [True, x]
(x * y) * -z -> [False, [[False, [[False, x], [False, y]]], (x * y) * -z -> [False, [[False, [[False, x], [False, y]]],
[True, z]]] [True, z]]]
""" """
# Is it a multiplication? # Is it a multiplication?
mul_info = is_mul(root) mul_info = is_mul(root)
...@@ -619,29 +668,36 @@ def parse_mul_tree(root): ...@@ -619,29 +668,36 @@ def parse_mul_tree(root):
def replace_leaf(arg, leaves, new_leaves, op, neg): def replace_leaf(arg, leaves, new_leaves, op, neg):
""" """
Attempts to replace a leaf of a multiplication tree. Attempt to replace a leaf of a multiplication tree.
We search for a leaf in `leaves` whose argument is `arg`, and if we find We search for a leaf in `leaves` whose argument is `arg`, and if we find
one, we remove it from `leaves` and add to `new_leaves` a leaf with one, we remove it from `leaves` and add to `new_leaves` a leaf with
argument `arg` and variable `op(arg)`. argument `arg` and variable `op(arg)`.
:param arg: The argument of the leaf we are looking for. Parameters
----------
:param leaves: List of leaves to look into. Each leaf should be a pair arg
The argument of the leaf we are looking for.
leaves
List of leaves to look into. Each leaf should be a pair
(x, l) with `x` the argument of the Op found in the leaf, and `l` the (x, l) with `x` the argument of the Op found in the leaf, and `l` the
actual leaf as found in a multiplication tree output by `parse_mul_tree` actual leaf as found in a multiplication tree output by `parse_mul_tree`
(i.e. a pair [boolean, variable]). (i.e. a pair [boolean, variable]).
new_leaves
:param new_leaves: If a replacement occurred, then the leaf is removed from If a replacement occurred, then the leaf is removed from `leaves`
`leaves` and added to the list `new_leaves` (after being modified by `op`). and added to the list `new_leaves` (after being modified by `op`).
op
:param op: A function that, when applied to `arg`, returns the Variable A function that, when applied to `arg`, returns the Variable
we want to replace the original leaf variable with. we want to replace the original leaf variable with.
neg : bool
:param neg: If True, then the boolean value associated to the leaf should If True, then the boolean value associated to the leaf should
be swapped. If False, then this value should remain unchanged. be swapped. If False, then this value should remain unchanged.
:return: True if a replacement occurred, or False otherwise. Returns
-------
bool
True if a replacement occurred, or False otherwise.
""" """
for idx, x in enumerate(leaves): for idx, x in enumerate(leaves):
if x[0] == arg: if x[0] == arg:
...@@ -657,12 +713,19 @@ def simplify_mul(tree): ...@@ -657,12 +713,19 @@ def simplify_mul(tree):
""" """
Simplify a multiplication tree. Simplify a multiplication tree.
:param tree: A multiplication tree (as output by `parse_mul_tree`). Parameters
----------
:return: A multiplication tree computing the same output as `tree` but tree
without useless multiplications by 1 nor -1 (identified by leaves of the A multiplication tree (as output by `parse_mul_tree`).
form [False, None] or [True, None] respectively). Useless multiplications
Returns
-------
object
A multiplication tree computing the same output as `tree` but without
useless multiplications by 1 nor -1 (identified by leaves of the form
[False, None] or [True, None] respectively). Useless multiplications
(with less than two inputs) are also removed from the tree. (with less than two inputs) are also removed from the tree.
""" """
neg, inputs = tree neg, inputs = tree
if isinstance(inputs, list): if isinstance(inputs, list):
...@@ -694,12 +757,18 @@ def compute_mul(tree): ...@@ -694,12 +757,18 @@ def compute_mul(tree):
Compute the Variable that is the output of a multiplication tree. Compute the Variable that is the output of a multiplication tree.
This is the inverse of the operation performed by `parse_mul_tree`, i.e. This is the inverse of the operation performed by `parse_mul_tree`, i.e.
compute_mul(parse_mul_tree(tree)) == tree compute_mul(parse_mul_tree(tree)) == tree.
Parameters
----------
tree
A multiplication tree (as output by `parse_mul_tree`).
:param tree: A multiplication tree (as output by `parse_mul_tree`). Returns
-------
object
A Variable that computes the multiplication represented by the tree.
:return: A Variable that computes the multiplication represented by the
tree.
""" """
neg, inputs = tree neg, inputs = tree
if inputs is None: if inputs is None:
...@@ -727,32 +796,38 @@ def perform_sigm_times_exp(tree, exp_x=None, exp_minus_x=None, sigm_x=None, ...@@ -727,32 +796,38 @@ def perform_sigm_times_exp(tree, exp_x=None, exp_minus_x=None, sigm_x=None,
by replacing matching pairs (exp, sigmoid) with the desired optimized by replacing matching pairs (exp, sigmoid) with the desired optimized
version. version.
:param tree: The sub-tree to operate on. Parameters
----------
:exp_x: List of arguments x so that `exp(x)` exists somewhere in the whole tree
The sub-tree to operate on.
exp_x
List of arguments x so that `exp(x)` exists somewhere in the whole
multiplication tree. Each argument is a pair (x, leaf) with `x` the multiplication tree. Each argument is a pair (x, leaf) with `x` the
argument of the exponential, and `leaf` the corresponding leaf in the argument of the exponential, and `leaf` the corresponding leaf in the
multiplication tree (of the form [n, exp(x)] -- see `parse_mul_tree`). multiplication tree (of the form [n, exp(x)] -- see `parse_mul_tree`).
If None, this argument is initialized to an empty list. If None, this argument is initialized to an empty list.
exp_minus_x
Similar to `exp_x`, but for `exp(-x)`.
sigm_x
Similar to `exp_x`, but for `sigmoid(x)`.
sigm_minus_x
Similar to `exp_x`, but for `sigmoid(-x)`.
parent
Parent of `tree` (None if `tree` is the global root).
child_idx
Index of `tree` in its parent's inputs (None if `tree` is the global
root).
full_tree
The global multiplication tree (should not be set except by recursive
calls to this function). Used for debugging only.
Returns
-------
bool
True if a modification was performed somewhere in the whole multiplication
tree, or False otherwise.
:param exp_minus_x: Similar to `exp_x`, but for `exp(-x)`.
:param sigm_x: Similar to `exp_x`, but for `sigmoid(x)`.
:param sigm_minus_x: Similar to `exp_x`, but for `sigmoid(-x)`.
:param parent: Parent of `tree` (None if `tree` is the global root).
:param child_idx: Index of `tree` in its parent's inputs (None if `tree` is
the global root).
:param full_tree: The global multiplication tree (should not be set except
by recursive calls to this function). Used for debugging only.
:return: True if a modification was performed somewhere in the whole
multiplication tree, or False otherwise.
""" """
if exp_x is None: if exp_x is None:
exp_x = [] exp_x = []
if exp_minus_x is None: if exp_minus_x is None:
...@@ -836,6 +911,7 @@ def local_sigm_times_exp(node): ...@@ -836,6 +911,7 @@ def local_sigm_times_exp(node):
""" """
exp(x) * sigm(-x) -> sigm(x) exp(x) * sigm(-x) -> sigm(x)
exp(-x) * sigm(x) -> sigm(-x) exp(-x) * sigm(x) -> sigm(-x)
""" """
# Bail early if it is not a multiplication. # Bail early if it is not a multiplication.
if node.op != tensor.mul: if node.op != tensor.mul:
...@@ -859,6 +935,7 @@ def local_sigm_times_exp(node): ...@@ -859,6 +935,7 @@ def local_sigm_times_exp(node):
def local_inv_1_plus_exp(node): def local_inv_1_plus_exp(node):
""" """
1/(1+exp(x)) -> sigm(-x) 1/(1+exp(x)) -> sigm(-x)
""" """
# this optimization should be done for numerical stability # this optimization should be done for numerical stability
# so we don't care to check client counts # so we don't care to check client counts
...@@ -883,6 +960,7 @@ def local_inv_1_plus_exp(node): ...@@ -883,6 +960,7 @@ def local_inv_1_plus_exp(node):
def local_1msigmoid(node): def local_1msigmoid(node):
""" """
1-sigm(x) -> sigm(-x) 1-sigm(x) -> sigm(-x)
""" """
if node.op == tensor.sub: if node.op == tensor.sub:
sub_l, sub_r = node.inputs sub_l, sub_r = node.inputs
......
""" """
Tensor optimizations addressing the ops in basic.py Tensor optimizations addressing the ops in basic.py.
""" """
from __future__ import print_function from __future__ import print_function
# TODO: intelligent merge for mul/add # TODO: intelligent merge for mul/add
...@@ -68,15 +68,20 @@ def copy_stack_trace(from_var, to_var): ...@@ -68,15 +68,20 @@ def copy_stack_trace(from_var, to_var):
Copies the stack trace from one or more tensor variables to Copies the stack trace from one or more tensor variables to
one or more tensor variables. one or more tensor variables.
:param from_var: tensor variable or list of tensor variables to Parameters
copy stack traces from. ----------
:param to_var: tensor variable or list of tensor variables to from_var
copy stack traces to. Tensor variable or list of tensor variables to copy stack traces from.
to_var
Tensor variable or list of tensor variables to copy stack traces to.
.. note:: The stacktrace is assumed to be of the form of a list of lists Notes
-----
The stacktrace is assumed to be of the form of a list of lists
of tuples. Each tuple contains the filename, line number, function name of tuples. Each tuple contains the filename, line number, function name
and so on. Each list of tuples contains the truples belonging to a and so on. Each list of tuples contains the truples belonging to a
particular variable. particular variable.
""" """
# Store stack traces from from_var # Store stack traces from from_var
...@@ -151,11 +156,20 @@ def _fill_chain(new_out, orig_inputs): ...@@ -151,11 +156,20 @@ def _fill_chain(new_out, orig_inputs):
def encompasses_broadcastable(b1, b2): def encompasses_broadcastable(b1, b2):
""" """
Returns True if the broadcastable patterns b1 and b2 are such that b2 is
Parameters
----------
b1
The broadcastable attribute of a tensor type.
b2
The broadcastable attribute of a tensor type.
Returns
-------
bool
True if the broadcastable patterns b1 and b2 are such that b2 is
broadcasted to b1's shape and not the opposite. broadcasted to b1's shape and not the opposite.
:param b1: the broadcastable attribute of a tensor type
:param b2: the broadcastable attribute of a tensor type
""" """
if len(b1) < len(b2): if len(b1) < len(b2):
return False return False
...@@ -184,7 +198,8 @@ def scalarconsts_rest(inputs): ...@@ -184,7 +198,8 @@ def scalarconsts_rest(inputs):
def broadcast_like(value, template, fgraph, dtype=None): def broadcast_like(value, template, fgraph, dtype=None):
"""Return a Variable with the same shape and dtype as the template, """
Return a Variable with the same shape and dtype as the template,
filled by broadcasting value through it. `value` will be cast as filled by broadcasting value through it. `value` will be cast as
necessary. necessary.
...@@ -240,9 +255,11 @@ def inplace_elemwise_optimizer_op(OP): ...@@ -240,9 +255,11 @@ def inplace_elemwise_optimizer_op(OP):
see if it can operate inplace on that input. If so, makes the see if it can operate inplace on that input. If so, makes the
change and go to the next output or Broadcast Op. change and go to the next output or Broadcast Op.
Examples: Examples
--------
x + y + z -> x += y += z x + y + z -> x += y += z
(x + y) * (x * y) -> (x += y) *= (x * y) or (x + y) *= (x *= y) (x + y) * (x * y) -> (x += y) *= (x * y) or (x + y) *= (x *= y)
""" """
# We should not validate too often as this takes too much time to # We should not validate too often as this takes too much time to
# execute! # execute!
...@@ -507,6 +524,7 @@ def local_dimshuffle_lift(node): ...@@ -507,6 +524,7 @@ def local_dimshuffle_lift(node):
After this transform, clusters of Elemwise operations are After this transform, clusters of Elemwise operations are
void of DimShuffle operations. void of DimShuffle operations.
""" """
op = node.op op = node.op
if not isinstance(op, DimShuffle): if not isinstance(op, DimShuffle):
...@@ -556,6 +574,7 @@ def local_lift_transpose_through_dot(node): ...@@ -556,6 +574,7 @@ def local_lift_transpose_through_dot(node):
The transformation should be apply whether or not the transpose is The transformation should be apply whether or not the transpose is
inplace. The newly-introduced transpositions are not inplace, this will inplace. The newly-introduced transpositions are not inplace, this will
be taken care of in a later optimization phase. be taken care of in a later optimization phase.
""" """
if not (isinstance(node.op, T.DimShuffle) and node.op.new_order == (1, 0)): if not (isinstance(node.op, T.DimShuffle) and node.op.new_order == (1, 0)):
return False return False
...@@ -639,11 +658,12 @@ def local_scalar_tensor_scalar(node): ...@@ -639,11 +658,12 @@ def local_scalar_tensor_scalar(node):
class MakeVector(T.Op): class MakeVector(T.Op):
"""Concatenate a number of scalars together into a vector """Concatenate a number of scalars together into a vector.
This is a simple version of stack() that introduces far less cruft This is a simple version of stack() that introduces far less cruft
into the graph. Should work with 0 inputs. The constant_folding into the graph. Should work with 0 inputs. The constant_folding
optimization will remove it. optimization will remove it.
""" """
__props__ = ("dtype",) __props__ = ("dtype",)
...@@ -755,7 +775,7 @@ T.pprint.assign(lambda pstate, r: r.owner and ...@@ -755,7 +775,7 @@ T.pprint.assign(lambda pstate, r: r.owner and
class ShapeFeature(object): class ShapeFeature(object):
"""Graph optimizer for removing all calls to shape() """Graph optimizer for removing all calls to shape().
This optimizer replaces all Shapes and Subtensors of Shapes with This optimizer replaces all Shapes and Subtensors of Shapes with
Shape_i and MakeVector Ops. Shape_i and MakeVector Ops.
...@@ -791,7 +811,6 @@ class ShapeFeature(object): ...@@ -791,7 +811,6 @@ class ShapeFeature(object):
For example the infer_shape for a matrix-matrix product would accept For example the infer_shape for a matrix-matrix product would accept
input_shapes=((x0,x1), (y0,y1)) and return ((x0, y1),). input_shapes=((x0,x1), (y0,y1)) and return ((x0, y1),).
Inferring the shape of internal nodes in the graph is important Inferring the shape of internal nodes in the graph is important
for doing size-driven optimizations. If we know how big various for doing size-driven optimizations. If we know how big various
intermediate results will be, we can estimate the cost of many Ops intermediate results will be, we can estimate the cost of many Ops
...@@ -800,8 +819,8 @@ class ShapeFeature(object): ...@@ -800,8 +819,8 @@ class ShapeFeature(object):
In cases where you cannot figure out the shape, raise a ShapeError. In cases where you cannot figure out the shape, raise a ShapeError.
.. note:: Notes
-----
Right now there is only the ConvOp that could really take Right now there is only the ConvOp that could really take
advantage of this shape inference, but it is worth it even advantage of this shape inference, but it is worth it even
just for the ConvOp. All that's necessary to do shape just for the ConvOp. All that's necessary to do shape
...@@ -842,7 +861,7 @@ class ShapeFeature(object): ...@@ -842,7 +861,7 @@ class ShapeFeature(object):
""" """
def shape_ir(self, i, r): def shape_ir(self, i, r):
"""Return symbolic r.shape[i] for tensor variable r, int i""" """Return symbolic r.shape[i] for tensor variable r, int i."""
if hasattr(r.type, "broadcastable") and r.type.broadcastable[i]: if hasattr(r.type, "broadcastable") and r.type.broadcastable[i]:
return self.lscalar_one return self.lscalar_one
else: else:
...@@ -855,7 +874,7 @@ class ShapeFeature(object): ...@@ -855,7 +874,7 @@ class ShapeFeature(object):
return s return s
def shape_tuple(self, r): def shape_tuple(self, r):
"""Return a tuple of symbolic shape vars for tensor variable r""" """Return a tuple of symbolic shape vars for tensor variable r."""
if not hasattr(r, 'ndim'): if not hasattr(r, 'ndim'):
# This happen for NoneConst. # This happen for NoneConst.
return None return None
...@@ -867,6 +886,7 @@ class ShapeFeature(object): ...@@ -867,6 +886,7 @@ class ShapeFeature(object):
This function is used for Ops that don't implement infer_shape. This function is used for Ops that don't implement infer_shape.
Ops that do implement infer_shape should use the i_shapes parameter, Ops that do implement infer_shape should use the i_shapes parameter,
but this default implementation ignores it. but this default implementation ignores it.
""" """
rval = [] rval = []
for r in node.outputs: for r in node.outputs:
...@@ -880,6 +900,7 @@ class ShapeFeature(object): ...@@ -880,6 +900,7 @@ class ShapeFeature(object):
"""Return a symbolic integer scalar for the shape element s_i. """Return a symbolic integer scalar for the shape element s_i.
The s_i argument was produced by the infer_shape() of an Op subclass. The s_i argument was produced by the infer_shape() of an Op subclass.
""" """
# unpack the s_i that the Op returned # unpack the s_i that the Op returned
assert s_i is not None assert s_i is not None
...@@ -933,8 +954,11 @@ class ShapeFeature(object): ...@@ -933,8 +954,11 @@ class ShapeFeature(object):
def set_shape(self, r, s): def set_shape(self, r, s):
"""Assign the shape `s` to previously un-shaped variable `r`. """Assign the shape `s` to previously un-shaped variable `r`.
:type r: a variable Parameters
:type s: None or a tuple of symbolic integers ----------
r : a variable
s : None or a tuple of symbolic integers
""" """
assert r not in self.shape_of, 'r already in shape_of' assert r not in self.shape_of, 'r already in shape_of'
if s is None: if s is None:
...@@ -972,11 +996,12 @@ class ShapeFeature(object): ...@@ -972,11 +996,12 @@ class ShapeFeature(object):
self.shape_of_reverse_index.setdefault(sv, set()).add(r) self.shape_of_reverse_index.setdefault(sv, set()).add(r)
def update_shape(self, r, other_r): def update_shape(self, r, other_r):
'''Replace shape of r by shape of other_r. """Replace shape of r by shape of other_r.
If, on some dimensions, the shape of other_r is not informative, If, on some dimensions, the shape of other_r is not informative,
keep the shape of r on those dimensions. keep the shape of r on those dimensions.
'''
"""
# other_r should already have a shape # other_r should already have a shape
assert other_r in self.shape_of, ('other_r not in shape_of', other_r) assert other_r in self.shape_of, ('other_r not in shape_of', other_r)
other_shape = self.shape_of[other_r] other_shape = self.shape_of[other_r]
...@@ -1303,8 +1328,7 @@ class ShapeFeature(object): ...@@ -1303,8 +1328,7 @@ class ShapeFeature(object):
class ShapeOptimizer(Optimizer): class ShapeOptimizer(Optimizer):
"""Optimizer that serves to add ShapeFeature as an fgraph feature. """Optimizer that serves to add ShapeFeature as an fgraph feature."""
"""
def __init__(self): def __init__(self):
Optimizer.__init__(self) Optimizer.__init__(self)
...@@ -1392,6 +1416,7 @@ def local_useless_alloc(node): ...@@ -1392,6 +1416,7 @@ def local_useless_alloc(node):
If the input type is the same as the output type (dtype and broadcast) If the input type is the same as the output type (dtype and broadcast)
there is no change in the shape of the input. So this is just a simple copy there is no change in the shape of the input. So this is just a simple copy
of the input. This is not needed. of the input. This is not needed.
""" """
if node.op == T.alloc: if node.op == T.alloc:
if node.inputs[0].type == node.outputs[0].type: if node.inputs[0].type == node.outputs[0].type:
...@@ -1438,14 +1463,15 @@ def local_track_shape_i(node): ...@@ -1438,14 +1463,15 @@ def local_track_shape_i(node):
@gof.local_optimizer([Subtensor, AdvancedSubtensor1]) @gof.local_optimizer([Subtensor, AdvancedSubtensor1])
def local_subtensor_make_vector(node): def local_subtensor_make_vector(node):
""" """
replace all subtensor(make_vector) like: Replace all subtensor(make_vector) like:
[a,b,c][0] -> a [a,b,c][0] -> a
[a,b,c][0:2] -> [a,b] [a,b,c][0:2] -> [a,b]
replace all AdvancedSubtensor1(make_vector) like: Replace all AdvancedSubtensor1(make_vector) like:
[a,b,c][[0,2]] -> [a,c] [a,b,c][[0,2]] -> [a,c]
we can do this for constant indexes We can do this for constant indexes.
""" """
x = node.inputs[0] x = node.inputs[0]
if not x.owner or x.owner.op != make_vector: if not x.owner or x.owner.op != make_vector:
...@@ -1514,7 +1540,6 @@ def local_subtensor_make_vector(node): ...@@ -1514,7 +1540,6 @@ def local_subtensor_make_vector(node):
@gof.local_optimizer([T.Elemwise]) @gof.local_optimizer([T.Elemwise])
def local_useless_elemwise(node): def local_useless_elemwise(node):
""" """
eq(x,x) -> 1 eq(x,x) -> 1
neq(x,x) -> 0 neq(x,x) -> 0
mul(x) -> x mul(x) -> x
...@@ -1559,8 +1584,7 @@ def local_useless_elemwise(node): ...@@ -1559,8 +1584,7 @@ def local_useless_elemwise(node):
@register_specialize @register_specialize
@gof.local_optimizer([T.Elemwise]) @gof.local_optimizer([T.Elemwise])
def local_alloc_unary(node): def local_alloc_unary(node):
"""unary(alloc(x, shp)) -> alloc(unary(x), shp) """unary(alloc(x, shp)) -> alloc(unary(x), shp)"""
"""
if isinstance(node.op, T.Elemwise) and len(node.inputs) == 1: if isinstance(node.op, T.Elemwise) and len(node.inputs) == 1:
a = node.inputs[0] a = node.inputs[0]
if a.owner and isinstance(a.owner.op, T.Alloc): if a.owner and isinstance(a.owner.op, T.Alloc):
...@@ -1587,6 +1611,7 @@ def local_cast_cast(node): ...@@ -1587,6 +1611,7 @@ def local_cast_cast(node):
dtype1 == dtype2 dtype1 == dtype2
TODO: the base dtype is the same (int, uint, float, complex) TODO: the base dtype is the same (int, uint, float, complex)
and the first cast cause an upcast. and the first cast cause an upcast.
""" """
if (not isinstance(node.op, T.Elemwise) or if (not isinstance(node.op, T.Elemwise) or
not isinstance(node.op.scalar_op, scalar.Cast)): not isinstance(node.op.scalar_op, scalar.Cast)):
...@@ -1607,9 +1632,9 @@ def local_cast_cast(node): ...@@ -1607,9 +1632,9 @@ def local_cast_cast(node):
def local_func_inv(node): def local_func_inv(node):
""" """
Check for two consecutive operations that are functional inverses Check for two consecutive operations that are functional inverses
and remove them from the function graph and remove them from the function graph.
"""
"""
inv_pairs = ( inv_pairs = (
(basic.Deg2Rad, basic.Rad2Deg), (basic.Deg2Rad, basic.Rad2Deg),
(basic.Cosh, basic.ArcCosh), (basic.Cosh, basic.ArcCosh),
...@@ -1641,9 +1666,9 @@ def local_func_inv(node): ...@@ -1641,9 +1666,9 @@ def local_func_inv(node):
def is_inverse_pair(node_op, prev_op, inv_pair): def is_inverse_pair(node_op, prev_op, inv_pair):
""" """
Given two consecutive operations, check if they are the Given two consecutive operations, check if they are the
provided pair of inverse functions provided pair of inverse functions.
"""
"""
node_is_op0 = isinstance(node_op, inv_pair[0]) node_is_op0 = isinstance(node_op, inv_pair[0])
node_is_op1 = isinstance(node_op, inv_pair[1]) node_is_op1 = isinstance(node_op, inv_pair[1])
prev_is_op0 = isinstance(prev_op, inv_pair[0]) prev_is_op0 = isinstance(prev_op, inv_pair[0])
...@@ -1659,20 +1684,24 @@ class Assert(T.Op): ...@@ -1659,20 +1684,24 @@ class Assert(T.Op):
Returns the first parameter if the condition is true, otherwise, triggers Returns the first parameter if the condition is true, otherwise, triggers
AssertionError. AssertionError.
Example: Notes
T = theano.tensor -----
x = T.vector('x')
assert_op = T.opt.Assert()
func = theano.function([x], assert_op(x, x.size<2))
Notes:
This Op is a debugging feature. It can be removed from the graph This Op is a debugging feature. It can be removed from the graph
because of optimizations, and can hide some possible optimizations to because of optimizations, and can hide some possible optimizations to
the optimizer. Specifically, removing happens if it can be determined the optimizer. Specifically, removing happens if it can be determined
that condition will always be true. Also, the output of the Op must be that condition will always be true. Also, the output of the Op must be
used in the function computing the graph, but it doesn't have to be used in the function computing the graph, but it doesn't have to be
returned. returned.
Examples
--------
T = theano.tensor
x = T.vector('x')
assert_op = T.opt.Assert()
func = theano.function([x], assert_op(x, x.size<2))
""" """
__props__ = ('msg',) __props__ = ('msg',)
view_map = {0: [0]} view_map = {0: [0]}
...@@ -1770,7 +1799,9 @@ def local_remove_all_assert(node): ...@@ -1770,7 +1799,9 @@ def local_remove_all_assert(node):
"""An optimization disabled by default that removes all asserts from """An optimization disabled by default that removes all asserts from
the graph. the graph.
:note: See the :ref:`unsafe` section to know how to enable it. Notes
-----
See the :ref:`unsafe` section to know how to enable it.
""" """
if not isinstance(node.op, Assert): if not isinstance(node.op, Assert):
...@@ -1804,11 +1835,12 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP): ...@@ -1804,11 +1835,12 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
BROADCAST CONDITION: the condition is that the one input that are BROADCAST CONDITION: the condition is that the one input that are
not to be optimized to have the same broadcast pattern as the not to be optimized to have the same broadcast pattern as the
output output.
We can change the alloc by a dimshuffle as the elemwise We can change the alloc by a dimshuffle as the elemwise
already have the shape info. The dimshuffle will be faster already have the shape info. The dimshuffle will be faster
to exec to exec.
""" """
if not isinstance(node.op, ElemwiseOP): if not isinstance(node.op, ElemwiseOP):
return False return False
...@@ -1969,6 +2001,7 @@ def local_upcast_elemwise_constant_inputs(node): ...@@ -1969,6 +2001,7 @@ def local_upcast_elemwise_constant_inputs(node):
those Ops do implicit upcasting anyway. those Ops do implicit upcasting anyway.
Rationale: it helps merge things like (1-x) and (1.0 - x). Rationale: it helps merge things like (1-x) and (1.0 - x).
""" """
if len(node.outputs) > 1: if len(node.outputs) > 1:
return return
...@@ -2033,7 +2066,8 @@ def local_upcast_elemwise_constant_inputs(node): ...@@ -2033,7 +2066,8 @@ def local_upcast_elemwise_constant_inputs(node):
@register_specialize @register_specialize
@gof.local_optimizer([IncSubtensor]) @gof.local_optimizer([IncSubtensor])
def local_useless_inc_subtensor(node): def local_useless_inc_subtensor(node):
"""Remove IncSubtensor, when we overwrite the full inputs with the """
Remove IncSubtensor, when we overwrite the full inputs with the
new value. new value.
""" """
...@@ -2082,6 +2116,7 @@ def local_set_to_inc_subtensor(node): ...@@ -2082,6 +2116,7 @@ def local_set_to_inc_subtensor(node):
""" """
AdvancedIncSubtensor1(x, x[ilist]+other, ilist, set_instead_of_inc=True) -> AdvancedIncSubtensor1(x, x[ilist]+other, ilist, set_instead_of_inc=True) ->
AdvancedIncSubtensor1(x, other, ilist, set_instead_of_inc=False) AdvancedIncSubtensor1(x, other, ilist, set_instead_of_inc=False)
""" """
if (isinstance(node.op, AdvancedIncSubtensor1) and if (isinstance(node.op, AdvancedIncSubtensor1) and
node.op.set_instead_of_inc and node.op.set_instead_of_inc and
...@@ -2144,6 +2179,7 @@ def local_useless_subtensor(node): ...@@ -2144,6 +2179,7 @@ def local_useless_subtensor(node):
AdvancedSubtensor1 case, the full input is taken when the indices are AdvancedSubtensor1 case, the full input is taken when the indices are
equivalent to `arange(0, input.shape[0], 1)` using either an explicit equivalent to `arange(0, input.shape[0], 1)` using either an explicit
list/vector or the ARange op. list/vector or the ARange op.
""" """
# This optimization needs ShapeOpt and fgraph.shape_feature # This optimization needs ShapeOpt and fgraph.shape_feature
if not hasattr(node.fgraph, 'shape_feature'): if not hasattr(node.fgraph, 'shape_feature'):
...@@ -2261,6 +2297,7 @@ def local_subtensor_lift(node): ...@@ -2261,6 +2297,7 @@ def local_subtensor_lift(node):
elemwise(x,...)[idx] -> elemwise(x[idx],...) elemwise(x,...)[idx] -> elemwise(x[idx],...)
when x,... are broadcasted scalar or not broadcasted at all when x,... are broadcasted scalar or not broadcasted at all
rebroadcast(x)[idx] => rebroadcast(x[idx]) rebroadcast(x)[idx] => rebroadcast(x[idx])
""" """
if isinstance(node.op, Subtensor): if isinstance(node.op, Subtensor):
u = node.inputs[0] u = node.inputs[0]
...@@ -2327,7 +2364,7 @@ def local_subtensor_lift(node): ...@@ -2327,7 +2364,7 @@ def local_subtensor_lift(node):
def merge_two_slices(slice1, len1, slice2, len2): def merge_two_slices(slice1, len1, slice2, len2):
''' """
This function merges two slices into a single slice. The code works on This function merges two slices into a single slice. The code works on
the assumption that: the assumption that:
a) slice1 is actually a slice and not an index, while slice2 a) slice1 is actually a slice and not an index, while slice2
...@@ -2340,7 +2377,7 @@ def merge_two_slices(slice1, len1, slice2, len2): ...@@ -2340,7 +2377,7 @@ def merge_two_slices(slice1, len1, slice2, len2):
the two consecutive slices. the two consecutive slices.
``len1`` is the length of the tensor **before** applying the first slice, ``len1`` is the length of the tensor **before** applying the first slice,
while ``len2`` is the length **after** applying the first slice. while ``len2`` is the length **after** applying the first slice.
''' """
list_opt = [local_abs_merge, local_mul_switch_sink, list_opt = [local_abs_merge, local_mul_switch_sink,
local_upcast_elemwise_constant_inputs, local_upcast_elemwise_constant_inputs,
local_remove_switch_const_cond, constant_folding] local_remove_switch_const_cond, constant_folding]
...@@ -2466,6 +2503,7 @@ def local_subtensor_merge(node): ...@@ -2466,6 +2503,7 @@ def local_subtensor_merge(node):
Refactored optimization to deal with all cases of tensor merging. Refactored optimization to deal with all cases of tensor merging.
Given a subgraph of the form Subtensor(Subtensor(u)), the optimization Given a subgraph of the form Subtensor(Subtensor(u)), the optimization
expresses all slices in a canonical form, and then merges them together. expresses all slices in a canonical form, and then merges them together.
""" """
if isinstance(node.op, Subtensor): if isinstance(node.op, Subtensor):
...@@ -2601,7 +2639,8 @@ def local_subtensor_of_dot(node): ...@@ -2601,7 +2639,8 @@ def local_subtensor_of_dot(node):
idxs_a is the first A.ndim-1 entries of idxs, idxs_a is the first A.ndim-1 entries of idxs,
and idxs_b is the remaining entries of idxs (if any), and idxs_b is the remaining entries of idxs (if any),
modified to skip the second-to-last dimension of B modified to skip the second-to-last dimension of B
(because dot sums over this dimension) (because dot sums over this dimension).
""" """
if not isinstance(node.op, Subtensor): if not isinstance(node.op, Subtensor):
return return
...@@ -2715,7 +2754,8 @@ compile.optdb.register('pre_local_IncSubtensor_serialize', ...@@ -2715,7 +2754,8 @@ compile.optdb.register('pre_local_IncSubtensor_serialize',
@gof.local_optimizer([IncSubtensor], inplace=True) @gof.local_optimizer([IncSubtensor], inplace=True)
def local_inplace_setsubtensor(node): def local_inplace_setsubtensor(node):
""" """
Also work for GpuIncSubtensor Also work for GpuIncSubtensor.
""" """
if isinstance(node.op, IncSubtensor) and not node.op.inplace: if isinstance(node.op, IncSubtensor) and not node.op.inplace:
new_op = node.op.__class__( new_op = node.op.__class__(
...@@ -2734,7 +2774,10 @@ compile.optdb.register('local_inplace_setsubtensor', ...@@ -2734,7 +2774,10 @@ compile.optdb.register('local_inplace_setsubtensor',
@gof.local_optimizer([AdvancedIncSubtensor1], inplace=True) @gof.local_optimizer([AdvancedIncSubtensor1], inplace=True)
def local_inplace_incsubtensor1(node): def local_inplace_incsubtensor1(node):
""" also work for GpuAdvancedIncSubtensor1 """ """
Also work for GpuAdvancedIncSubtensor1.
"""
if isinstance(node.op, AdvancedIncSubtensor1) and not node.op.inplace: if isinstance(node.op, AdvancedIncSubtensor1) and not node.op.inplace:
new_op = node.op.clone_inplace() new_op = node.op.clone_inplace()
new_node = new_op(*node.inputs) new_node = new_op(*node.inputs)
...@@ -2756,6 +2799,7 @@ compile.optdb.register('local_inplace_incsubtensor1', ...@@ -2756,6 +2799,7 @@ compile.optdb.register('local_inplace_incsubtensor1',
def local_incsubtensor_of_zeros(node): def local_incsubtensor_of_zeros(node):
""" """
IncSubtensor(x, zeros, idx) -> x IncSubtensor(x, zeros, idx) -> x
""" """
if (isinstance(node.op, (IncSubtensor, if (isinstance(node.op, (IncSubtensor,
AdvancedIncSubtensor, AdvancedIncSubtensor,
...@@ -2784,6 +2828,7 @@ def local_setsubtensor_of_constants(node): ...@@ -2784,6 +2828,7 @@ def local_setsubtensor_of_constants(node):
SetSubtensor(x, x[idx], idx) -> x SetSubtensor(x, x[idx], idx) -> x
when x is constant or alloc. when x is constant or alloc.
""" """
if isinstance(node.op, IncSubtensor) and node.op.set_instead_of_inc: if isinstance(node.op, IncSubtensor) and node.op.set_instead_of_inc:
x = node.inputs[0] x = node.inputs[0]
...@@ -2813,12 +2858,14 @@ def local_setsubtensor_of_constants(node): ...@@ -2813,12 +2858,14 @@ def local_setsubtensor_of_constants(node):
@register_stabilize @register_stabilize
@gof.local_optimizer([AdvancedSubtensor1]) @gof.local_optimizer([AdvancedSubtensor1])
def local_adv_sub1_adv_inc_sub1(node): def local_adv_sub1_adv_inc_sub1(node):
"""Optimize the possible AdvSub1(AdvIncSub1(...), ...) """Optimize the possible AdvSub1(AdvIncSub1(...), ...).
AdvancedSubtensor1(AdvancedIncSubtensor1(0s, y, idx), idx) -> y AdvancedSubtensor1(AdvancedIncSubtensor1(0s, y, idx), idx) -> y
AdvancedSubtensor1(AdvancedSetSubtensor1(x, y, idx), idx) -> y AdvancedSubtensor1(AdvancedSetSubtensor1(x, y, idx), idx) -> y
:note: This opt add AssertOp. Otherwise, it would remove shape and Notes
-----
This opt add AssertOp. Otherwise, it would remove shape and
index error. If you want to get rid of them, see the index error. If you want to get rid of them, see the
:ref:`unsafe_optimization` section. :ref:`unsafe_optimization` section.
...@@ -2862,6 +2909,7 @@ def local_useless_inc_subtensor_alloc(node): ...@@ -2862,6 +2909,7 @@ def local_useless_inc_subtensor_alloc(node):
Replaces an [Advanced]IncSubtensor[1], whose increment is an `alloc` of Replaces an [Advanced]IncSubtensor[1], whose increment is an `alloc` of
a fully or partially broadcastable variable, by one that skips the a fully or partially broadcastable variable, by one that skips the
intermediate `alloc` where possible. intermediate `alloc` where possible.
""" """
if isinstance(node.op, (IncSubtensor, if isinstance(node.op, (IncSubtensor,
AdvancedIncSubtensor, AdvancedIncSubtensor,
...@@ -2962,7 +3010,8 @@ def local_useless_inc_subtensor_alloc(node): ...@@ -2962,7 +3010,8 @@ def local_useless_inc_subtensor_alloc(node):
@gof.local_optimizer([T.Rebroadcast]) @gof.local_optimizer([T.Rebroadcast])
def local_useless_rebroadcast(node): def local_useless_rebroadcast(node):
""" """
Remove Rebroadcast if id does not actually change the broadcasting pattern Remove Rebroadcast if id does not actually change the broadcasting pattern.
""" """
if isinstance(node.op, T.Rebroadcast): if isinstance(node.op, T.Rebroadcast):
x = node.inputs[0] x = node.inputs[0]
...@@ -2992,6 +3041,7 @@ def local_rebroadcast_lift(node): ...@@ -2992,6 +3041,7 @@ def local_rebroadcast_lift(node):
Rebroadcast(Elemwise(x)) => Elemwise(Rebroadcast(x)) Rebroadcast(Elemwise(x)) => Elemwise(Rebroadcast(x))
Rebroadcast(Rebroadcast(x)) => Rebroadcast(x) Rebroadcast(Rebroadcast(x)) => Rebroadcast(x)
""" """
op = node.op op = node.op
if not isinstance(op, T.Rebroadcast): if not isinstance(op, T.Rebroadcast):
...@@ -3023,8 +3073,14 @@ def apply_rebroadcast_opt(rval): ...@@ -3023,8 +3073,14 @@ def apply_rebroadcast_opt(rval):
Apply as many times as required the optimization local_useless_rebroadcast Apply as many times as required the optimization local_useless_rebroadcast
and local_rebroadcast_lift. and local_rebroadcast_lift.
:param rval: a Variable Parameters
:return: a Variable (the same if no optimization can be applied) ----------
rval: a Variable
Returns
-------
A Variable (the same if no optimization can be applied)
""" """
changed = True changed = True
...@@ -3056,6 +3112,7 @@ def local_join_1(node): ...@@ -3056,6 +3112,7 @@ def local_join_1(node):
"""Join(i, x) => x """Join(i, x) => x
Remove Join() when only one element is joined. Remove Join() when only one element is joined.
""" """
if not isinstance(node.op, T.Join): if not isinstance(node.op, T.Join):
return return
...@@ -3070,7 +3127,8 @@ def local_join_1(node): ...@@ -3070,7 +3127,8 @@ def local_join_1(node):
def local_join_empty(node): def local_join_empty(node):
"""Join(i, x, y, empty) => Join(i, x, y) """Join(i, x, y, empty) => Join(i, x, y)
remove empty inputs to joins. The empty inputs can be anywhere. Remove empty inputs to joins. The empty inputs can be anywhere.
""" """
if not isinstance(node.op, T.Join): if not isinstance(node.op, T.Join):
return return
...@@ -3147,6 +3205,7 @@ def local_remove_switch_const_cond(node): ...@@ -3147,6 +3205,7 @@ def local_remove_switch_const_cond(node):
T.switch(cond,left,right) --> T.switch(cond,left,right) -->
if cond is constant and cond == 0: right if cond is constant and cond == 0: right
if cond is constant and cond != 0: left if cond is constant and cond != 0: left
""" """
if (isinstance(node.op, T.Elemwise) and if (isinstance(node.op, T.Elemwise) and
isinstance(node.op.scalar_op, scalar.basic.Switch)): isinstance(node.op.scalar_op, scalar.basic.Switch)):
...@@ -3183,7 +3242,9 @@ def local_mul_switch_sink(node): ...@@ -3183,7 +3242,9 @@ def local_mul_switch_sink(node):
This is useful because A and B may not be numerically stable and give This is useful because A and B may not be numerically stable and give
NaN or inf values for cases where the switch returns 0. NaN or inf values for cases where the switch returns 0.
With this optimization T.grad(T.switch(...)) has the right behavior. With this optimization T.grad(T.switch(...)) has the right behavior.
Exemple:
Examples
--------
x -> f(x) x -> f(x)
x -> g(x) x -> g(x)
y = T.switch(cond,f(x),g(x)) y = T.switch(cond,f(x),g(x))
...@@ -3193,6 +3254,7 @@ def local_mul_switch_sink(node): ...@@ -3193,6 +3254,7 @@ def local_mul_switch_sink(node):
T.grad(y,x) -> switch(cond,grad(f(x),x), 0) + switch(cond,0,grad(g(x),x)) T.grad(y,x) -> switch(cond,grad(f(x),x), 0) + switch(cond,0,grad(g(x),x))
This will be particularly useful for the lazyif because we skip This will be particularly useful for the lazyif because we skip
an entire part of the graph. an entire part of the graph.
""" """
if node.op != T.mul: if node.op != T.mul:
return False return False
...@@ -3234,6 +3296,7 @@ def local_div_switch_sink(node): ...@@ -3234,6 +3296,7 @@ def local_div_switch_sink(node):
This is useful because A may not be numerically stable and give This is useful because A may not be numerically stable and give
NaN or inf values for cases where the switch returns 0. NaN or inf values for cases where the switch returns 0.
See local_mul_switch_sink for more details. See local_mul_switch_sink for more details.
""" """
if (node.op != T.true_div and node.op != T.int_div): if (node.op != T.true_div and node.op != T.int_div):
return False return False
...@@ -3308,6 +3371,7 @@ def local_useless_split(node): ...@@ -3308,6 +3371,7 @@ def local_useless_split(node):
""" Split{n_splits=1}(x, y) -> x """ Split{n_splits=1}(x, y) -> x
Remove Split with only 1 split. Remove Split with only 1 split.
""" """
if isinstance(node.op, T.Split): if isinstance(node.op, T.Split):
if node.op.len_splits == 1: if node.op.len_splits == 1:
...@@ -3329,6 +3393,7 @@ def local_flatten_lift(node): ...@@ -3329,6 +3393,7 @@ def local_flatten_lift(node):
This optimization is needed by optimization This optimization is needed by optimization
nnet/sigm.py:log1msigm_to_softplus to get applied when there is a flatten. nnet/sigm.py:log1msigm_to_softplus to get applied when there is a flatten.
""" """
if (isinstance(node.op, T.Flatten) and if (isinstance(node.op, T.Flatten) and
node.inputs[0].owner and node.inputs[0].owner and
...@@ -3347,6 +3412,7 @@ def local_flatten_lift(node): ...@@ -3347,6 +3412,7 @@ def local_flatten_lift(node):
def local_reshape_chain(node): def local_reshape_chain(node):
""" """
Reshape(Reshape(shape1),shape2) -> Reshape(shape2) Reshape(Reshape(shape1),shape2) -> Reshape(shape2)
""" """
if not opt.check_chain(node, T.Reshape, T.Reshape): if not opt.check_chain(node, T.Reshape, T.Reshape):
return False return False
...@@ -3378,6 +3444,7 @@ def local_reshape_lift(node): ...@@ -3378,6 +3444,7 @@ def local_reshape_lift(node):
This optimization is needed by optimization This optimization is needed by optimization
nnet/sigm.py:log1msigm_to_softplus to get applied when there is a reshape. nnet/sigm.py:log1msigm_to_softplus to get applied when there is a reshape.
""" """
if (isinstance(node.op, T.Reshape) and if (isinstance(node.op, T.Reshape) and
node.inputs[0].owner and node.inputs[0].owner and
...@@ -3526,15 +3593,20 @@ class Canonizer(gof.LocalOptimizer): ...@@ -3526,15 +3593,20 @@ class Canonizer(gof.LocalOptimizer):
Usage: Canonizer(main, inverse, reciprocal, calculate) Usage: Canonizer(main, inverse, reciprocal, calculate)
* main: a suitable Op class that is commutative, associative and Parameters
----------
main
A suitable Op class that is commutative, associative and
takes one to an arbitrary number of inputs, e.g. add or takes one to an arbitrary number of inputs, e.g. add or
mul mul
* inverse: an Op class such that inverse(main(x, y), y) == x inverse
An Op class such that inverse(main(x, y), y) == x
e.g. sub or true_div e.g. sub or true_div
* reciprocal: a function such that main(x, reciprocal(y)) == reciprocal
inverse(x, y) e.g. neg or inv A function such that main(x, reciprocal(y)) == inverse(x, y)
e.g. neg or inv
* calculate: function that takes a list of numpy.ndarray instances calculate
Function that takes a list of numpy.ndarray instances
for the numerator, another list for the denumerator, for the numerator, another list for the denumerator,
and calculates inverse(main(*num), main(*denum)). It and calculates inverse(main(*num), main(*denum)). It
takes a keyword argument, aslist. If True, the value takes a keyword argument, aslist. If True, the value
...@@ -3545,7 +3617,8 @@ class Canonizer(gof.LocalOptimizer): ...@@ -3545,7 +3617,8 @@ class Canonizer(gof.LocalOptimizer):
The variable is a local_optimizer. It is best used with a TopoOptimizer in The variable is a local_optimizer. It is best used with a TopoOptimizer in
in_to_out order. in_to_out order.
Examples: Examples
--------
T = theano.tensor T = theano.tensor
add_canonizer = Canonizer(T.add, T.sub, T.neg, add_canonizer = Canonizer(T.add, T.sub, T.neg,
lambda n, d: sum(n) - sum(d)) lambda n, d: sum(n) - sum(d))
...@@ -3563,6 +3636,7 @@ class Canonizer(gof.LocalOptimizer): ...@@ -3563,6 +3636,7 @@ class Canonizer(gof.LocalOptimizer):
2 * x / 2 -> x 2 * x / 2 -> x
x * y * z -> Elemwise(T.mul){x,y,z} #only one pass over the memory. x * y * z -> Elemwise(T.mul){x,y,z} #only one pass over the memory.
!-> Elemwise(T.mul){x,Elemwise(T.mul){y,z}} !-> Elemwise(T.mul){x,Elemwise(T.mul){y,z}}
""" """
def __init__(self, main, inverse, reciprocal, calculate, def __init__(self, main, inverse, reciprocal, calculate,
...@@ -3747,8 +3821,13 @@ class Canonizer(gof.LocalOptimizer): ...@@ -3747,8 +3821,13 @@ class Canonizer(gof.LocalOptimizer):
@staticmethod @staticmethod
def get_constant(v): def get_constant(v):
""" """
Returns a numeric constant if v is a Constant or, well, a
Returns
-------
object
A numeric constant if v is a Constant or, well, a
numeric constant. If v is a plain Variable, returns None. numeric constant. If v is a plain Variable, returns None.
""" """
if isinstance(v, Variable): if isinstance(v, Variable):
try: try:
...@@ -3762,6 +3841,7 @@ class Canonizer(gof.LocalOptimizer): ...@@ -3762,6 +3841,7 @@ class Canonizer(gof.LocalOptimizer):
""" """
Shorthand for: Shorthand for:
self.simplify_constants(*self.simplify_factors(num, denum)) self.simplify_constants(*self.simplify_factors(num, denum))
""" """
rval = self.simplify_constants(*self.simplify_factors(num, denum), rval = self.simplify_constants(*self.simplify_factors(num, denum),
out_type=out_type) out_type=out_type)
...@@ -3781,6 +3861,7 @@ class Canonizer(gof.LocalOptimizer): ...@@ -3781,6 +3861,7 @@ class Canonizer(gof.LocalOptimizer):
[x], [x] -> [], [] [x], [x] -> [], []
[x, y], [x] -> [y], [] [x, y], [x] -> [y], []
[a, b], [c, d] -> [a, b], [c, d] [a, b], [c, d] -> [a, b], [c, d]
""" """
for v in list(num): for v in list(num):
if v in denum: if v in denum:
...@@ -3790,18 +3871,22 @@ class Canonizer(gof.LocalOptimizer): ...@@ -3790,18 +3871,22 @@ class Canonizer(gof.LocalOptimizer):
def simplify_constants(self, orig_num, orig_denum, out_type=None): def simplify_constants(self, orig_num, orig_denum, out_type=None):
""" """
Find all constants and put them together into a single constant.
Finds all constants in orig_num and orig_denum (using Finds all constants in orig_num and orig_denum (using
get_constant) and puts them together into a single get_constant) and puts them together into a single
constant. The constant is inserted as the first element of the constant. The constant is inserted as the first element of the
numerator. If the constant is the neutral element, it is numerator. If the constant is the neutral element, it is
removed from the numerator. Examples: removed from the numerator.
Examples
--------
Let main be multiplication: Let main be multiplication:
[2, 3, x], [] -> [6, x], [] [2, 3, x], [] -> [6, x], []
[x, y, 2], [4, z] -> [0.5, x, y], [z] [x, y, 2], [4, z] -> [0.5, x, y], [z]
[x, 2, y], [z, 2] -> [x, y], [z] [x, 2, y], [z, 2] -> [x, y], [z]
""" """
# Lists representing the numerator and denumerator # Lists representing the numerator and denumerator
...@@ -3969,13 +4054,15 @@ register_canonicalize(local_neg_to_mul) ...@@ -3969,13 +4054,15 @@ register_canonicalize(local_neg_to_mul)
@register_specialize @register_specialize
@gof.local_optimizer([T.Sum, T.elemwise.Prod]) @gof.local_optimizer([T.Sum, T.elemwise.Prod])
def local_sum_prod_mul_by_scalar(node): def local_sum_prod_mul_by_scalar(node):
"""sum(scalar * smth) -> scalar * sum(smth) """
sum(scalar * smth) -> scalar * sum(smth)
sum(-smth) -> -sum(smth) sum(-smth) -> -sum(smth)
or or
prod(scalar * smth) -> scalar ** size(smth) * prod(smth) prod(scalar * smth) -> scalar ** size(smth) * prod(smth)
prod(-smth) -> -1 ** size(smth) * prod(smth) prod(-smth) -> -1 ** size(smth) * prod(smth)
""" """
# TODO: if the the thing inside the Sum is a division, # TODO: if the the thing inside the Sum is a division,
# we should get at the numerator.... # we should get at the numerator....
...@@ -4040,8 +4127,11 @@ def local_elemwise_sub_zeros(node): ...@@ -4040,8 +4127,11 @@ def local_elemwise_sub_zeros(node):
@register_specialize @register_specialize
@gof.local_optimizer([T.Sum]) @gof.local_optimizer([T.Sum])
def local_sum_div_dimshuffle(node): def local_sum_div_dimshuffle(node):
'''sum(a / dimshuffle{...}(b), axis=l) -> sum(a, axis={...}) / b, """
if dimension l of the DimShuffle is 'x'.''' sum(a / dimshuffle{...}(b), axis=l) -> sum(a, axis={...}) / b,
if dimension l of the DimShuffle is 'x'.
"""
# TODO: extend it to product, and quotient of products # TODO: extend it to product, and quotient of products
# It does not make much sense now to extend it to the case where the # It does not make much sense now to extend it to the case where the
...@@ -4128,8 +4218,10 @@ def local_sum_div_dimshuffle(node): ...@@ -4128,8 +4218,10 @@ def local_sum_div_dimshuffle(node):
@register_canonicalize @register_canonicalize
@gof.local_optimizer([T.Sum, T.elemwise.Prod]) @gof.local_optimizer([T.Sum, T.elemwise.Prod])
def local_sum_prod_all_to_none(node): def local_sum_prod_all_to_none(node):
"""Sum{0,1,...N} -> Sum{} or """
Sum{0,1,...N} -> Sum{} or
Prod{0,1,...N} -> Prod{} Prod{0,1,...N} -> Prod{}
""" """
if isinstance(node.op, T.Sum) or isinstance(node.op, T.elemwise.Prod): if isinstance(node.op, T.Sum) or isinstance(node.op, T.elemwise.Prod):
opt_type = T.Sum if isinstance(node.op, T.Sum) else T.elemwise.Prod opt_type = T.Sum if isinstance(node.op, T.Sum) else T.elemwise.Prod
...@@ -4148,6 +4240,7 @@ def local_op_of_op(node): ...@@ -4148,6 +4240,7 @@ def local_op_of_op(node):
Prod(Prod()) -> single Prod() Prod(Prod()) -> single Prod()
or or
Sum(Sum()) -> single Sum() Sum(Sum()) -> single Sum()
""" """
if isinstance(node.op, T.elemwise.Prod) or isinstance(node.op, T.Sum): if isinstance(node.op, T.elemwise.Prod) or isinstance(node.op, T.Sum):
opt_type = T.Sum if isinstance(node.op, T.Sum) else T.elemwise.Prod opt_type = T.Sum if isinstance(node.op, T.Sum) else T.elemwise.Prod
...@@ -4219,14 +4312,16 @@ ALL_REDUCE = [T.elemwise.CAReduce, T.elemwise.All, T.elemwise.Any, ...@@ -4219,14 +4312,16 @@ ALL_REDUCE = [T.elemwise.CAReduce, T.elemwise.All, T.elemwise.Any,
@register_uncanonicalize # Needed for MaxAndArgmax -> CAReduce @register_uncanonicalize # Needed for MaxAndArgmax -> CAReduce
@gof.local_optimizer(ALL_REDUCE) @gof.local_optimizer(ALL_REDUCE)
def local_reduce_join(node): def local_reduce_join(node):
"""Reduce{scalar.op}(Join(axis=0, a, b), axis=0) -> Elemwise{scalar.op}(a, b) """
Reduce{scalar.op}(Join(axis=0, a, b), axis=0) -> Elemwise{scalar.op}(a, b)
:note: supported scalar.op are Maximum, Mimimum in some cases and Notes
Add and Mul in all cases. -----
Supported scalar.op are Maximum, Mimimum in some cases and Add and Mul in
all cases.
:note: Currently we must reduce on axis 0. It is probably Currently we must reduce on axis 0. It is probably extensible to the case
extensible to the case where we join and reduce on the same where we join and reduce on the same set of axis.
set of axis.
""" """
if (isinstance(node.op, T.CAReduce) and if (isinstance(node.op, T.CAReduce) and
...@@ -4312,7 +4407,7 @@ def local_cut_useless_reduce(node): ...@@ -4312,7 +4407,7 @@ def local_cut_useless_reduce(node):
@register_specialize @register_specialize
@gof.local_optimizer(ALL_REDUCE) @gof.local_optimizer(ALL_REDUCE)
def local_reduce_broadcastable(node): def local_reduce_broadcastable(node):
"""Remove reduction over broadcastable dimensions""" """Remove reduction over broadcastable dimensions."""
if isinstance(node.op, T.CAReduce): if isinstance(node.op, T.CAReduce):
reduced, = node.inputs reduced, = node.inputs
odtype = node.outputs[0].dtype odtype = node.outputs[0].dtype
...@@ -4351,9 +4446,11 @@ def local_reduce_broadcastable(node): ...@@ -4351,9 +4446,11 @@ def local_reduce_broadcastable(node):
@register_specialize @register_specialize
@gof.local_optimizer([T.Sum, T.elemwise.Prod]) @gof.local_optimizer([T.Sum, T.elemwise.Prod])
def local_opt_alloc(node): def local_opt_alloc(node):
""" sum(alloc(constant,shapes...)) => constant*prod(shapes) """
sum(alloc(constant,shapes...)) => constant*prod(shapes)
or or
prod(alloc(constant,shapes...)) => constant**prod(shapes) prod(alloc(constant,shapes...)) => constant**prod(shapes)
""" """
if isinstance(node.op, T.Sum) or isinstance(node.op, T.elemwise.Prod): if isinstance(node.op, T.Sum) or isinstance(node.op, T.elemwise.Prod):
node_inps, = node.inputs node_inps, = node.inputs
...@@ -4406,9 +4503,11 @@ def local_neg_neg(node): ...@@ -4406,9 +4503,11 @@ def local_neg_neg(node):
@register_specialize @register_specialize
@gof.local_optimizer([T.neg]) @gof.local_optimizer([T.neg])
def local_neg_div_neg(node): def local_neg_div_neg(node):
"""- (-a / b) -> a / b """
- (-a / b) -> a / b
Also performs - (c / b) -> ((-c) / b) when c is a scalar constant. Also performs - (c / b) -> ((-c) / b) when c is a scalar constant.
""" """
if node.op == T.neg: if node.op == T.neg:
if node.inputs[0].owner and node.inputs[0].owner.op == T.true_div: if node.inputs[0].owner and node.inputs[0].owner.op == T.true_div:
...@@ -4427,8 +4526,10 @@ def local_neg_div_neg(node): ...@@ -4427,8 +4526,10 @@ def local_neg_div_neg(node):
@gof.local_optimizer([T.mul]) @gof.local_optimizer([T.mul])
def local_mul_zero(node): def local_mul_zero(node):
"""As part of canonicalization, we replace multiplication by zero """
As part of canonicalization, we replace multiplication by zero
with zero. with zero.
""" """
if node.op == T.mul: if node.op == T.mul:
otype = node.outputs[0].type otype = node.outputs[0].type
...@@ -4489,10 +4590,12 @@ register_canonicalize(local_pow_canonicalize) ...@@ -4489,10 +4590,12 @@ register_canonicalize(local_pow_canonicalize)
@register_specialize @register_specialize
@gof.local_optimizer([T.mul]) @gof.local_optimizer([T.mul])
def local_mul_to_sqr(node): def local_mul_to_sqr(node):
"""x*x -> sqr(x) """
x*x -> sqr(x)
This is faster on the GPU when memory fetching is a big part of This is faster on the GPU when memory fetching is a big part of
the computation time. the computation time.
""" """
if node.op == T.mul: if node.op == T.mul:
if len(node.inputs) == 2: if len(node.inputs) == 2:
...@@ -4620,7 +4723,8 @@ def local_pow_specialize_device(node): ...@@ -4620,7 +4723,8 @@ def local_pow_specialize_device(node):
@gof.local_optimizer([T.mul]) @gof.local_optimizer([T.mul])
def local_mul_specialize(node): def local_mul_specialize(node):
"""Remove special-case constants from mul arguments and useless neg in inputs. """
Remove special-case constants from mul arguments and useless neg in inputs.
mul(-1, x) -> neg(x) mul(-1, x) -> neg(x)
mul(1, x, y) -> mul(x, y) mul(1, x, y) -> mul(x, y)
...@@ -4629,6 +4733,7 @@ def local_mul_specialize(node): ...@@ -4629,6 +4733,7 @@ def local_mul_specialize(node):
This is not done if we would add more nodes in the graph, like with: This is not done if we would add more nodes in the graph, like with:
mul(-1, x, y) -/-> neg(mul(x, y)) mul(-1, x, y) -/-> neg(mul(x, y))
""" """
# here, we are past the point of canonicalization, so we don't # here, we are past the point of canonicalization, so we don't
# want to put in un-necessary fills. # want to put in un-necessary fills.
...@@ -4766,8 +4871,9 @@ local_mul_canonizer.add_simplifier(check_for_x_over_absX, 'X_over_absX') ...@@ -4766,8 +4871,9 @@ local_mul_canonizer.add_simplifier(check_for_x_over_absX, 'X_over_absX')
@gof.local_optimizer([T.abs_]) @gof.local_optimizer([T.abs_])
def local_abs_lift(node): def local_abs_lift(node):
""" """
move the abs toward the input. This is needed for Move the abs toward the input.
check_for_x_over_absX to apply in more case.
This is needed for check_for_x_over_absX to apply in more case.
""" """
if node.op == T.abs_ and node.inputs[0].owner: if node.op == T.abs_ and node.inputs[0].owner:
...@@ -4783,7 +4889,7 @@ def local_abs_lift(node): ...@@ -4783,7 +4889,7 @@ def local_abs_lift(node):
@gof.local_optimizer([T.mul, T.true_div]) @gof.local_optimizer([T.mul, T.true_div])
def local_abs_merge(node): def local_abs_merge(node):
""" """
merge abs generated by local_abs_lift when the canonizer don't Merge abs generated by local_abs_lift when the canonizer don't
need it anymore need it anymore
""" """
...@@ -4968,6 +5074,8 @@ def attempt_distribution(factor, num, denum, out_type): ...@@ -4968,6 +5074,8 @@ def attempt_distribution(factor, num, denum, out_type):
@gof.local_optimizer([T.mul, T.true_div, T.inv]) @gof.local_optimizer([T.mul, T.true_div, T.inv])
def local_greedy_distributor(node): def local_greedy_distributor(node):
""" """
Optimize by reducing the number of multiplications and/or divisions.
This optimization tries to apply distributivity of multiplication This optimization tries to apply distributivity of multiplication
to addition in order to reduce the number of multiplications to addition in order to reduce the number of multiplications
and/or divisions that must be done. The algorithm weighs division and/or divisions that must be done. The algorithm weighs division
...@@ -4985,6 +5093,7 @@ def local_greedy_distributor(node): ...@@ -4985,6 +5093,7 @@ def local_greedy_distributor(node):
This optimization aims to reduce computational cost. It may also This optimization aims to reduce computational cost. It may also
increase numerical stability, e.g. when x and/or y tend to 0 in increase numerical stability, e.g. when x and/or y tend to 0 in
example 1. example 1.
""" """
out = node.outputs[0] out = node.outputs[0]
...@@ -5083,7 +5192,13 @@ def constant_folding(node): ...@@ -5083,7 +5192,13 @@ def constant_folding(node):
def _is_1(expr): def _is_1(expr):
"""rtype bool. True iff expr is a constant close to 1 """
Returns
-------
bool
True iff expr is a constant close to 1.
""" """
try: try:
v = get_scalar_constant_value(expr) v = get_scalar_constant_value(expr)
...@@ -5093,7 +5208,13 @@ def _is_1(expr): ...@@ -5093,7 +5208,13 @@ def _is_1(expr):
def _is_minus1(expr): def _is_minus1(expr):
"""rtype bool. True iff expr is a constant close to -1 """
Returns
-------
bool
True iff expr is a constant close to -1.
""" """
try: try:
v = get_scalar_constant_value(expr) v = get_scalar_constant_value(expr)
...@@ -5103,13 +5224,19 @@ def _is_minus1(expr): ...@@ -5103,13 +5224,19 @@ def _is_minus1(expr):
def get_clients(node): def get_clients(node):
"Used by erf/erfc opt to track less frequent op" """
Used by erf/erfc opt to track less frequent op.
"""
return [c for c, i in node.outputs[0].clients return [c for c, i in node.outputs[0].clients
if c != "output"] if c != "output"]
def get_clients2(node): def get_clients2(node):
"Used by erf/erfc opt to track less frequent op" """
Used by erf/erfc opt to track less frequent op.
"""
l = [] l = []
for c, i in node.outputs[0].clients: for c, i in node.outputs[0].clients:
if c != "output": if c != "output":
...@@ -5622,9 +5749,12 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32, ...@@ -5622,9 +5749,12 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
""" """
We parametrize it to make it work for Elemwise and GpuElemwise op. We parametrize it to make it work for Elemwise and GpuElemwise op.
:param OP: GpuElemwise or Elemwise class (the one that we want to fuse) Parameters
----------
:param max_input_fct: a function that returns the maximum number of inputs OP
GpuElemwise or Elemwise class (the one that we want to fuse)
max_input_fct
A function that returns the maximum number of inputs
that this elemwise can take (useful for GpuElemwise). that this elemwise can take (useful for GpuElemwise).
GPU kernel currently has a limit of 256 bytes for GPU kernel currently has a limit of 256 bytes for
the size of all parameters passed to it. As currently the size of all parameters passed to it. As currently
...@@ -5634,6 +5764,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32, ...@@ -5634,6 +5764,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
On the CPU we limit to 32 input variables On the CPU we limit to 32 input variables
since that is the maximum numpy support. since that is the maximum numpy support.
""" """
if maker is None: if maker is None:
def maker(node, scalar_op): def maker(node, scalar_op):
...@@ -5647,6 +5778,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32, ...@@ -5647,6 +5778,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
For mixed dtype, we let the Composite op do the cast. It lets the C For mixed dtype, we let the Composite op do the cast. It lets the C
compiler do the cast. compiler do the cast.
The number of dimensions is validated at call time by theano itself. The number of dimensions is validated at call time by theano itself.
""" """
# META TODO: PUT THESE THINGS IN TRAC, NOT TODO NOTES!! # META TODO: PUT THESE THINGS IN TRAC, NOT TODO NOTES!!
# TODO: use broadcast flag? # TODO: use broadcast flag?
...@@ -5862,7 +5994,7 @@ local_elemwise_fusion = local_elemwise_fusion_op(T.Elemwise, ...@@ -5862,7 +5994,7 @@ local_elemwise_fusion = local_elemwise_fusion_op(T.Elemwise,
class FusionOptimizer(Optimizer): class FusionOptimizer(Optimizer):
"""Graph optimizer for Fusion of elemwise operations""" """Graph optimizer for Fusion of elemwise operations."""
def __init__(self, local_optimizer): def __init__(self, local_optimizer):
Optimizer.__init__(self) Optimizer.__init__(self)
self.optimizer = local_optimizer self.optimizer = local_optimizer
......
...@@ -28,8 +28,8 @@ problem. ...@@ -28,8 +28,8 @@ problem.
Also, we should make the fgraph refuse optimization that break the Also, we should make the fgraph refuse optimization that break the
canonization of the graph in the optimizations phases where the graph is canonization of the graph in the optimizations phases where the graph is
supposed to be canonical. supposed to be canonical.
"""
"""
# TODO: intelligent merge for mul/add # TODO: intelligent merge for mul/add
# TODO: 0*x -> 0 # TODO: 0*x -> 0
import logging import logging
...@@ -72,12 +72,15 @@ def local_max_and_argmax(node): ...@@ -72,12 +72,15 @@ def local_max_and_argmax(node):
@gof.local_optimizer([T.neg]) @gof.local_optimizer([T.neg])
def local_max_to_min(node): def local_max_to_min(node):
""" """
change -(max(-x)) to min Change -(max(-x)) to min.
This is tested in tensor/tests/test_basic.py:test_min_max This is tested in tensor/tests/test_basic.py:test_min_max.
:note: we don't need an opt that will do the reverse as by default Notes
-----
We don't need an opt that will do the reverse as by default
the interface put only MaxAndArgmax into the graph. the interface put only MaxAndArgmax into the graph.
""" """
if node.op == T.neg and node.inputs[0].owner: if node.op == T.neg and node.inputs[0].owner:
max = node.inputs[0] max = node.inputs[0]
......
...@@ -19,7 +19,8 @@ __docformat__ = "restructuredtext en" ...@@ -19,7 +19,8 @@ __docformat__ = "restructuredtext en"
class RandomStateType(gof.Type): class RandomStateType(gof.Type):
"""A Type wrapper for numpy.random.RandomState """
A Type wrapper for numpy.random.RandomState.
The reason this exists (and `Generic` doesn't suffice) is that The reason this exists (and `Generic` doesn't suffice) is that
RandomState objects that would appear to be equal do not compare RandomState objects that would appear to be equal do not compare
...@@ -99,35 +100,36 @@ random_state_type = RandomStateType() ...@@ -99,35 +100,36 @@ random_state_type = RandomStateType()
class RandomFunction(gof.Op): class RandomFunction(gof.Op):
"""Op that draws random numbers from a numpy.random.RandomState object
""" """
__props__ = ("fn", "outtype", "inplace", "ndim_added") Op that draws random numbers from a numpy.random.RandomState object.
def __init__(self, fn, outtype, inplace=False, ndim_added=0): Parameters
""" ----------
:param fn: a member function of numpy.random.RandomState fn : string or function reference
A member function of numpy.random.RandomState. A string will
be interpreted as the name of a member function of
numpy.random.RandomState.
Technically, any function with a signature like the ones in Technically, any function with a signature like the ones in
numpy.random.RandomState will do. This function must accept numpy.random.RandomState will do. This function must accept
the shape (sometimes called size) of the output as the last the shape (sometimes called size) of the output as the last
positional argument. positional argument.
outtype
:type fn: string or function reference. A string will The theano Type of the output.
be interpreted as the name of a member function of args
numpy.random.RandomState. A list of default arguments for the function
kwargs
:param outtype: the theano Type of the output
:param args: a list of default arguments for the function
:param kwargs:
If the 'inplace' key is there, its value will be used to If the 'inplace' key is there, its value will be used to
determine if the op operates inplace or not. determine if the op operates inplace or not.
If the 'ndim_added' key is there, its value indicates how If the 'ndim_added' key is there, its value indicates how
many more dimensions this op will add to the output, in many more dimensions this op will add to the output, in
addition to the shape's dimensions (used in multinomial and addition to the shape's dimensions (used in multinomial and
permutation). permutation).
""" """
__props__ = ("fn", "outtype", "inplace", "ndim_added")
def __init__(self, fn, outtype, inplace=False, ndim_added=0):
self.__setstate__([fn, outtype, inplace, ndim_added]) self.__setstate__([fn, outtype, inplace, ndim_added])
def __getstate__(self): def __getstate__(self):
...@@ -151,10 +153,13 @@ class RandomFunction(gof.Op): ...@@ -151,10 +153,13 @@ class RandomFunction(gof.Op):
def make_node(self, r, shape, *args): def make_node(self, r, shape, *args):
""" """
:param r: a numpy.random.RandomState instance, or a Variable of Type Parameters
----------
r
A numpy.random.RandomState instance, or a Variable of Type
RandomStateType that will contain a RandomState instance. RandomStateType that will contain a RandomState instance.
shape
:param shape: an lvector with a shape defining how many samples An lvector with a shape defining how many samples
to draw. In the case of scalar distributions, it is the shape to draw. In the case of scalar distributions, it is the shape
of the tensor output by this Op. In that case, at runtime, the of the tensor output by this Op. In that case, at runtime, the
value associated with this lvector must have a length equal to value associated with this lvector must have a length equal to
...@@ -163,16 +168,16 @@ class RandomFunction(gof.Op): ...@@ -163,16 +168,16 @@ class RandomFunction(gof.Op):
len(self.outtype), is equal to len(shape)+self.ndim_added. len(self.outtype), is equal to len(shape)+self.ndim_added.
The special case where len(shape) == 0 means that the smallest The special case where len(shape) == 0 means that the smallest
shape compatible with the argument's shape will be used. shape compatible with the argument's shape will be used.
args
:param args: the values associated with these variables will The values associated with these variables will be passed to the
be passed to the RandomState function during perform as extra RandomState function during perform as extra "*args"-style
"*args"-style arguments. These should be castable to variables arguments. These should be castable to variables of Type TensorType.
of Type TensorType.
Returns
:rtype: Apply -------
Apply
:return: Apply with two outputs. The first output is a Apply with two outputs. The first output is a gof.generic Variable
gof.generic Variable from which to draw further random numbers. from which to draw further random numbers.
The second output is the outtype() instance holding the random The second output is the outtype() instance holding the random
draw. draw.
...@@ -289,12 +294,15 @@ def _infer_ndim_bcast(ndim, shape, *args): ...@@ -289,12 +294,15 @@ def _infer_ndim_bcast(ndim, shape, *args):
""" """
Infer the number of dimensions from the shape or the other arguments. Infer the number of dimensions from the shape or the other arguments.
:rtype: (int, variable, tuple) triple, where the variable is an integer Returns
vector, and the tuple contains Booleans. -------
:returns: the first element returned is the inferred number of dimensions. (int, variable, tuple) triple, where the variable is an integer vector,
The second element is the shape inferred (combining symbolic and constant and the tuple contains Booleans
informations from shape and args). The first element returned is the inferred number of dimensions.
The second element is the shape inferred (combining symbolic and
constant informations from shape and args).
The third element is a broadcasting pattern corresponding to that shape. The third element is a broadcasting pattern corresponding to that shape.
""" """
# Find the minimum value of ndim required by the *args # Find the minimum value of ndim required by the *args
...@@ -390,7 +398,7 @@ def _infer_ndim_bcast(ndim, shape, *args): ...@@ -390,7 +398,7 @@ def _infer_ndim_bcast(ndim, shape, *args):
def _generate_broadcasting_indices(out_shape, *shapes): def _generate_broadcasting_indices(out_shape, *shapes):
''' """
Return indices over each shape that broadcast them to match out_shape. Return indices over each shape that broadcast them to match out_shape.
The first returned list is equivalent to numpy.ndindex(out_shape), The first returned list is equivalent to numpy.ndindex(out_shape),
...@@ -400,7 +408,8 @@ def _generate_broadcasting_indices(out_shape, *shapes): ...@@ -400,7 +408,8 @@ def _generate_broadcasting_indices(out_shape, *shapes):
The shapes should have the same length as out_shape. If they are longer, The shapes should have the same length as out_shape. If they are longer,
the right-most dimensions are ignored. the right-most dimensions are ignored.
'''
"""
all_shapes = (out_shape,) + shapes all_shapes = (out_shape,) + shapes
# Will contain the return value: a list of indices for each argument # Will contain the return value: a list of indices for each argument
ret_indices = [[()] for shape in all_shapes] ret_indices = [[()] for shape in all_shapes]
...@@ -447,6 +456,7 @@ def uniform(random_state, size=None, low=0.0, high=1.0, ndim=None, dtype=None): ...@@ -447,6 +456,7 @@ def uniform(random_state, size=None, low=0.0, high=1.0, ndim=None, dtype=None):
If dtype is not specified, it will be inferred from the dtype of If dtype is not specified, it will be inferred from the dtype of
low and high, but will be at least as precise as floatX. low and high, but will be at least as precise as floatX.
""" """
low = tensor.as_tensor_variable(low) low = tensor.as_tensor_variable(low)
high = tensor.as_tensor_variable(high) high = tensor.as_tensor_variable(high)
...@@ -471,6 +481,7 @@ def normal(random_state, size=None, avg=0.0, std=1.0, ndim=None, dtype=None): ...@@ -471,6 +481,7 @@ def normal(random_state, size=None, avg=0.0, std=1.0, ndim=None, dtype=None):
If dtype is not specified, it will be inferred from the dtype of If dtype is not specified, it will be inferred from the dtype of
avg and std, but will be at least as precise as floatX. avg and std, but will be at least as precise as floatX.
""" """
avg = tensor.as_tensor_variable(avg) avg = tensor.as_tensor_variable(avg)
std = tensor.as_tensor_variable(std) std = tensor.as_tensor_variable(std)
...@@ -493,6 +504,7 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None, ...@@ -493,6 +504,7 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None,
If size is None, the output shape will be determined by the shapes If size is None, the output shape will be determined by the shapes
of n and prob. of n and prob.
""" """
if prob is not None: if prob is not None:
p = prob p = prob
...@@ -514,12 +526,13 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None, ...@@ -514,12 +526,13 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None,
def random_integers_helper(random_state, low, high, size): def random_integers_helper(random_state, low, high, size):
''' """
Helper function to draw random integers. Helper function to draw random integers.
This is a generalization of numpy.random.random_integers to the case where This is a generalization of numpy.random.random_integers to the case where
low and high are tensors. low and high are tensors.
'''
"""
# Figure out the output shape # Figure out the output shape
if size is not None: if size is not None:
out_ndim = len(size) out_ndim = len(size)
...@@ -570,6 +583,7 @@ def random_integers(random_state, size=None, low=0, high=1, ndim=None, ...@@ -570,6 +583,7 @@ def random_integers(random_state, size=None, low=0, high=1, ndim=None,
If size is None, the output shape will be determined by the shapes If size is None, the output shape will be determined by the shapes
of low and high. of low and high.
""" """
low = tensor.as_tensor_variable(low) low = tensor.as_tensor_variable(low)
high = tensor.as_tensor_variable(high) high = tensor.as_tensor_variable(high)
...@@ -580,11 +594,13 @@ def random_integers(random_state, size=None, low=0, high=1, ndim=None, ...@@ -580,11 +594,13 @@ def random_integers(random_state, size=None, low=0, high=1, ndim=None,
def choice_helper(random_state, a, replace, p, size): def choice_helper(random_state, a, replace, p, size):
"""Helper function to draw random numbers using numpy's choice function. """
Helper function to draw random numbers using numpy's choice function.
This is a generalization of numpy.random.choice that coerces This is a generalization of numpy.random.choice that coerces
`replace` to a bool and replaces `p` with None when p is a vector `replace` to a bool and replaces `p` with None when p is a vector
of 0 elements. of 0 elements.
""" """
if a.ndim > 1: if a.ndim > 1:
raise ValueError('a.ndim (%i) must be 0 or 1' % a.ndim) raise ValueError('a.ndim (%i) must be 0 or 1' % a.ndim)
...@@ -608,6 +624,7 @@ def choice(random_state, size=None, a=2, replace=True, p=None, ndim=None, ...@@ -608,6 +624,7 @@ def choice(random_state, size=None, a=2, replace=True, p=None, ndim=None,
may be a plain integer to supplement the missing information. may be a plain integer to supplement the missing information.
If size is None, a scalar will be returned. If size is None, a scalar will be returned.
""" """
# numpy.random.choice is only available for numpy versions >= 1.7 # numpy.random.choice is only available for numpy versions >= 1.7
major, minor, _ = numpy.version.short_version.split('.') major, minor, _ = numpy.version.short_version.split('.')
...@@ -631,17 +648,21 @@ def poisson(random_state, size=None, lam=1.0, ndim=None, dtype='int64'): ...@@ -631,17 +648,21 @@ def poisson(random_state, size=None, lam=1.0, ndim=None, dtype='int64'):
""" """
Draw samples from a Poisson distribution. Draw samples from a Poisson distribution.
The Poisson distribution is the limit of the Binomial distribution for large N. The Poisson distribution is the limit of the Binomial distribution for
large N.
:param lam: float or ndarray-like of the same shape as size parameter Parameters
----------
lam : float or ndarray-like of the same shape as size parameter
Expectation of interval, should be >= 0. Expectation of interval, should be >= 0.
size: int or tuple of ints, optional
Output shape. If the given shape is, e.g., (m, n, k), then m * n * k
samples are drawn.
dtype
The dtype of the return value (which will represent counts).
:param size: int or tuple of ints, optional size or ndim must be given.
Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn.
:param dtype: the dtype of the return value (which will represent counts)
size or ndim must be given
""" """
lam = tensor.as_tensor_variable(lam) lam = tensor.as_tensor_variable(lam)
...@@ -653,7 +674,8 @@ def poisson(random_state, size=None, lam=1.0, ndim=None, dtype='int64'): ...@@ -653,7 +674,8 @@ def poisson(random_state, size=None, lam=1.0, ndim=None, dtype='int64'):
def permutation_helper(random_state, n, shape): def permutation_helper(random_state, n, shape):
"""Helper function to generate permutations from integers. """
Helper function to generate permutations from integers.
permutation_helper(random_state, n, (1,)) will generate a permutation of permutation_helper(random_state, n, (1,)) will generate a permutation of
integers 0..n-1. integers 0..n-1.
...@@ -666,6 +688,7 @@ def permutation_helper(random_state, n, shape): ...@@ -666,6 +688,7 @@ def permutation_helper(random_state, n, shape):
This is a generalization of numpy.random.permutation to tensors. This is a generalization of numpy.random.permutation to tensors.
Otherwise it behaves the same. Otherwise it behaves the same.
""" """
# n should be a 0-dimension array # n should be a 0-dimension array
assert n.shape == () assert n.shape == ()
...@@ -688,17 +711,20 @@ def permutation_helper(random_state, n, shape): ...@@ -688,17 +711,20 @@ def permutation_helper(random_state, n, shape):
def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'): def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'):
""" """
Returns permutations of the integers between 0 and n-1, as many times Return permutations of the integers between 0 and n-1.
as required by size. For instance, if size=(p,q), p*q permutations
will be generated, and the output shape will be (p,q,n), because each Returns them as many times as required by size. For instance, if size=(p,q),
permutation is of size n. p*q permutations will be generated, and the output shape will be (p,q,n),
because each permutation is of size n.
Theano tries to infer the number of dimensions from the length of Theano tries to infer the number of dimensions from the length of
the size argument and the shape of n, but you may always specify it the size argument and the shape of n, but you may always specify it
with the `ndim` parameter. with the `ndim` parameter.
:note: Notes
-----
Note that the output will then be of dimension ndim+1. Note that the output will then be of dimension ndim+1.
""" """
if size is None or size == (): if size is None or size == ():
if not(ndim is None or ndim == 1): if not(ndim is None or ndim == 1):
...@@ -718,12 +744,13 @@ def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'): ...@@ -718,12 +744,13 @@ def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'):
def multinomial_helper(random_state, n, pvals, size): def multinomial_helper(random_state, n, pvals, size):
''' """
Helper function drawing from multinomial distributions. Helper function drawing from multinomial distributions.
This is a generalization of numpy.random.multinomial to the case where This is a generalization of numpy.random.multinomial to the case where
n and pvals are tensors. n and pvals are tensors.
'''
"""
# Figure out the shape if it's None # Figure out the shape if it's None
# Note: the output ndim will be ndim+1, because the multinomial # Note: the output ndim will be ndim+1, because the multinomial
# adds a dimension. The length of that dimension is pvals.shape[-1]. # adds a dimension. The length of that dimension is pvals.shape[-1].
...@@ -791,31 +818,40 @@ def multinomial_helper(random_state, n, pvals, size): ...@@ -791,31 +818,40 @@ def multinomial_helper(random_state, n, pvals, size):
def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5], def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
ndim=None, dtype='int64'): ndim=None, dtype='int64'):
"""Sample from one or more multinomial distributions defined by """
Sample from one or more multinomial distributions defined by
one-dimensional slices in pvals. one-dimensional slices in pvals.
:param pvals: a tensor of shape "nmulti+(L,)" describing each multinomial Parameters
----------
pvals
A tensor of shape "nmulti+(L,)" describing each multinomial
distribution. This tensor must have the property that distribution. This tensor must have the property that
numpy.allclose(pvals.sum(axis=-1), 1) is true. numpy.allclose(pvals.sum(axis=-1), 1) is true.
size
:param size: a vector of shape information for the output; this can also A vector of shape information for the output; this can also
specify the "nmulti" part of pvals' shape. A -1 in the k'th position specify the "nmulti" part of pvals' shape. A -1 in the k'th position
from the right means to borrow the k'th position from the from the right means to borrow the k'th position from the
right in nmulti. (See examples below.) right in nmulti. (See examples below.)
Default ``None`` means size=nmulti. Default ``None`` means size=nmulti.
n
:param n: the number of experiments to simulate for each The number of experiments to simulate for each
multinomial. This can be a scalar, or tensor, it will be multinomial. This can be a scalar, or tensor, it will be
broadcasted to have shape "nmulti". broadcasted to have shape "nmulti".
dtype
The dtype of the return value (which will represent counts)
:param dtype: the dtype of the return value (which will represent counts) Returns
-------
:returns: tensor of len(size)+1 dimensions, and shape[-1]==L, with tensor
Tensor of len(size)+1 dimensions, and shape[-1]==L, with
the specified ``dtype``, with the experiment counts. See the specified ``dtype``, with the experiment counts. See
examples to understand the shape of the return value, which is examples to understand the shape of the return value, which is
derived from both size and pvals.shape. In return value rval, derived from both size and pvals.shape. In return value rval,
"numpy.allclose(rval.sum(axis=-1), n)" will be true. "numpy.allclose(rval.sum(axis=-1), n)" will be true.
Extended Summary
----------------
For example, to simulate n experiments from each multinomial in a batch of For example, to simulate n experiments from each multinomial in a batch of
size B: size B:
...@@ -881,8 +917,8 @@ class RandomStreamsBase(object): ...@@ -881,8 +917,8 @@ class RandomStreamsBase(object):
return the number of successes. return the number of successes.
If the size argument is ambiguous on the number of dimensions, If the size argument is ambiguous on the number of dimensions,
ndim may be a plain integer to supplement the missing ndim may be a plain integer to supplement the missing information.
information.
""" """
if prob is not None: if prob is not None:
p = prob p = prob
...@@ -895,8 +931,8 @@ class RandomStreamsBase(object): ...@@ -895,8 +931,8 @@ class RandomStreamsBase(object):
distribution between low and high. distribution between low and high.
If the size argument is ambiguous on the number of dimensions, If the size argument is ambiguous on the number of dimensions,
ndim may be a plain integer to supplement the missing ndim may be a plain integer to supplement the missing information.
information.
""" """
return self.gen(uniform, size, low, high, ndim=ndim, dtype=dtype) return self.gen(uniform, size, low, high, ndim=ndim, dtype=dtype)
...@@ -906,8 +942,8 @@ class RandomStreamsBase(object): ...@@ -906,8 +942,8 @@ class RandomStreamsBase(object):
the specified standard deviation (std). the specified standard deviation (std).
If the size argument is ambiguous on the number of dimensions, If the size argument is ambiguous on the number of dimensions,
ndim may be a plain integer to supplement the missing ndim may be a plain integer to supplement the missing information.
information.
""" """
return self.gen(normal, size, avg, std, ndim=ndim, dtype=dtype) return self.gen(normal, size, avg, std, ndim=ndim, dtype=dtype)
...@@ -917,8 +953,8 @@ class RandomStreamsBase(object): ...@@ -917,8 +953,8 @@ class RandomStreamsBase(object):
Sample a random integer between low and high, both inclusive. Sample a random integer between low and high, both inclusive.
If the size argument is ambiguous on the number of dimensions, If the size argument is ambiguous on the number of dimensions,
ndim may be a plain integer to supplement the missing ndim may be a plain integer to supplement the missing information.
information.
""" """
return self.gen(random_integers, size, low, high, ndim=ndim, return self.gen(random_integers, size, low, high, ndim=ndim,
dtype=dtype) dtype=dtype)
...@@ -926,13 +962,14 @@ class RandomStreamsBase(object): ...@@ -926,13 +962,14 @@ class RandomStreamsBase(object):
def choice(self, size=None, a=2, replace=True, p=None, ndim=None, def choice(self, size=None, a=2, replace=True, p=None, ndim=None,
dtype='int64'): dtype='int64'):
""" """
Choose values from `a` with or without replacement. `a` can be a 1-D Choose values from `a` with or without replacement.
array or a positive scalar. If `a` is a scalar, the samples are drawn
from the range 0,...,a-1. `a` can be a 1-D array or a positive scalar.
If `a` is a scalar, the samples are drawn from the range 0,...,a-1.
If the size argument is ambiguous on the number of dimensions, If the size argument is ambiguous on the number of dimensions,
ndim may be a plain integer to supplement the missing ndim may be a plain integer to supplement the missing information.
information.
""" """
return self.gen(choice, size, a, replace, p, ndim=ndim, dtype=dtype) return self.gen(choice, size, a, replace, p, ndim=ndim, dtype=dtype)
...@@ -940,27 +977,32 @@ class RandomStreamsBase(object): ...@@ -940,27 +977,32 @@ class RandomStreamsBase(object):
""" """
Draw samples from a Poisson distribution. Draw samples from a Poisson distribution.
The Poisson distribution is the limit of the Binomial distribution for large N. The Poisson distribution is the limit of the Binomial distribution for
large N.
If the size argument is ambiguous on the number of dimensions, If the size argument is ambiguous on the number of dimensions,
ndim may be a plain integer to supplement the missing ndim may be a plain integer to supplement the missing information.
information.
""" """
return self.gen(poisson, size, lam, ndim=ndim, dtype=dtype) return self.gen(poisson, size, lam, ndim=ndim, dtype=dtype)
def permutation(self, size=None, n=1, ndim=None, dtype='int64'): def permutation(self, size=None, n=1, ndim=None, dtype='int64'):
""" """
Returns permutations of the integers between 0 and n-1, as many times Return permutations of the integers between 0 and n-1.
as required by size. For instance, if size=(p,q), p*q permutations
will be generated, and the output shape will be (p,q,n), because each Returns them as many times as required by size. For instance,
if size=(p,q), p*q permutations will be generated,
and the output shape will be (p,q,n), because each
permutation is of size n. permutation is of size n.
Theano tries to infer the number of dimensions from the length Theano tries to infer the number of dimensions from the length
of the size argument and the shape of n, but you may always of the size argument and the shape of n, but you may always
specify it with the `ndim` parameter. specify it with the `ndim` parameter.
.. note:: Notes
-----
Note that the output will then be of dimension ndim+1. Note that the output will then be of dimension ndim+1.
""" """
return self.gen(permutation, size, n, ndim=ndim, dtype=dtype) return self.gen(permutation, size, n, ndim=ndim, dtype=dtype)
...@@ -976,16 +1018,20 @@ class RandomStreamsBase(object): ...@@ -976,16 +1018,20 @@ class RandomStreamsBase(object):
of the size argument and the shapes of n and pvals, but you may of the size argument and the shapes of n and pvals, but you may
always specify it with the `ndim` parameter. always specify it with the `ndim` parameter.
.. note:: Notes
-----
Note that the output will then be of dimension ndim+1. Note that the output will then be of dimension ndim+1.
""" """
return self.gen(multinomial, size, n, pvals, ndim=ndim, dtype=dtype) return self.gen(multinomial, size, n, pvals, ndim=ndim, dtype=dtype)
def shuffle_row_elements(self, input): def shuffle_row_elements(self, input):
"""Return a variable with every row (rightmost index) shuffled. """
Return a variable with every row (rightmost index) shuffled.
This uses permutation random variable internally, available via This uses permutation random variable internally, available via
the ``.permutation`` attribute of the return value. the ``.permutation`` attribute of the return value.
""" """
perm = self.permutation(size=input.shape[:-1], n=input.shape[-1], perm = self.permutation(size=input.shape[:-1], n=input.shape[-1],
ndim=input.ndim - 1) ndim=input.ndim - 1)
......
"""Define RandomStreams, providing random number variables for Theano
graphs.
""" """
Define RandomStreams, providing random number variables for Theano
graphs.
"""
import copy import copy
import numpy import numpy
...@@ -20,7 +21,10 @@ class RandomStateSharedVariable(SharedVariable): ...@@ -20,7 +21,10 @@ class RandomStateSharedVariable(SharedVariable):
@shared_constructor @shared_constructor
def randomstate_constructor(value, name=None, strict=False, def randomstate_constructor(value, name=None, strict=False,
allow_downcast=None, borrow=False): allow_downcast=None, borrow=False):
"""SharedVariable Constructor for RandomState""" """
SharedVariable Constructor for RandomState.
"""
if not isinstance(value, numpy.random.RandomState): if not isinstance(value, numpy.random.RandomState):
raise TypeError raise TypeError
if not borrow: if not borrow:
...@@ -37,20 +41,20 @@ class RandomStreams(raw_random.RandomStreamsBase): ...@@ -37,20 +41,20 @@ class RandomStreams(raw_random.RandomStreamsBase):
""" """
Module component with similar interface to numpy.random Module component with similar interface to numpy.random
(numpy.random.RandomState) (numpy.random.RandomState)
Parameters
----------
seed: None or int
A default seed to initialize the RandomState
instances after build. See `RandomStreamsInstance.__init__`
for more details.
""" """
def updates(self): def updates(self):
return list(self.state_updates) return list(self.state_updates)
def __init__(self, seed=None): def __init__(self, seed=None):
"""
:type seed: None or int
:param seed: a default seed to initialize the RandomState
instances after build. See `RandomStreamsInstance.__init__`
for more details.
"""
super(RandomStreams, self).__init__() super(RandomStreams, self).__init__()
# A list of pairs of the form (input_r, output_r). This will be # A list of pairs of the form (input_r, output_r). This will be
# over-ridden by the module instance to contain stream generators. # over-ridden by the module instance to contain stream generators.
...@@ -62,14 +66,18 @@ class RandomStreams(raw_random.RandomStreamsBase): ...@@ -62,14 +66,18 @@ class RandomStreams(raw_random.RandomStreamsBase):
self.gen_seedgen = numpy.random.RandomState(seed) self.gen_seedgen = numpy.random.RandomState(seed)
def seed(self, seed=None): def seed(self, seed=None):
"""Re-initialize each random stream """
Re-initialize each random stream.
:param seed: each random stream will be assigned a unique
state that depends deterministically on this value.
:type seed: None or integer in range 0 to 2**30 Parameters
----------
seed : None or integer in range 0 to 2**30
Each random stream will be assigned a unique state that depends
deterministically on this value.
:rtype: None Returns
-------
None
""" """
if seed is None: if seed is None:
...@@ -82,54 +90,72 @@ class RandomStreams(raw_random.RandomStreamsBase): ...@@ -82,54 +90,72 @@ class RandomStreams(raw_random.RandomStreamsBase):
borrow=True) borrow=True)
def __getitem__(self, item): def __getitem__(self, item):
"""Retrieve the numpy RandomState instance associated with a """
particular stream Retrieve the numpy RandomState instance associated with a particular
stream.
:param item: a variable of type RandomStateType, associated Parameters
with this RandomStream ----------
item
A variable of type RandomStateType, associated
with this RandomStream.
:rtype: numpy RandomState (or None, before initialize) Returns
-------
numpy RandomState (or None, before initialize)
:note: This is kept for compatibility with Notes
`tensor.randomstreams.RandomStreams`. The simpler syntax -----
``item.rng.get_value()`` is also valid. This is kept for compatibility with `tensor.randomstreams.RandomStreams`.
The simpler syntax ``item.rng.get_value()`` is also valid.
""" """
return item.get_value(borrow=True) return item.get_value(borrow=True)
def __setitem__(self, item, val): def __setitem__(self, item, val):
"""Set the numpy RandomState instance associated with a """
particular stream Set the numpy RandomState instance associated with a particular stream.
:param item: a variable of type RandomStateType, associated Parameters
with this RandomStream ----------
item
A variable of type RandomStateType, associated with this
RandomStream.
:param val: the new value val : numpy RandomState
:type val: numpy RandomState The new value.
:rtype: None Returns
-------
None
:note: This is kept for compatibility with Notes
`tensor.randomstreams.RandomStreams`. The simpler syntax -----
``item.rng.set_value(val)`` is also valid. This is kept for compatibility with `tensor.randomstreams.RandomStreams`.
The simpler syntax ``item.rng.set_value(val)`` is also valid.
""" """
item.set_value(val, borrow=True) item.set_value(val, borrow=True)
def gen(self, op, *args, **kwargs): def gen(self, op, *args, **kwargs):
"""Create a new random stream in this container. """
Create a new random stream in this container.
:param op: a RandomFunction instance to
Parameters
:param args: interpreted by `op` ----------
op
:param kwargs: interpreted by `op` A RandomFunction instance to
args
:returns: The symbolic random draw part of op()'s return Interpreted by `op`.
value. This function stores the updated RandomStateType kwargs
Variable for use at `build` time. Interpreted by `op`.
:rtype: TensorVariable Returns
-------
Tensor Variable
The symbolic random draw part of op()'s return value.
This function stores the updated RandomStateType Variable
for use at `build` time.
""" """
seed = int(self.gen_seedgen.randint(2 ** 30)) seed = int(self.gen_seedgen.randint(2 ** 30))
......
...@@ -8,9 +8,12 @@ from theano.compile import shared_constructor, SharedVariable ...@@ -8,9 +8,12 @@ from theano.compile import shared_constructor, SharedVariable
def load_shared_variable(val): def load_shared_variable(val):
"""This function is only here to keep some pickles loading """
This function is only here to keep some pickles loading
after a failed fix done in August 2011. after a failed fix done in August 2011.
It can be removed after sufficient time has passed.""" It can be removed after sufficient time has passed.
"""
return tensor_constructor(val) return tensor_constructor(val)
...@@ -22,13 +25,15 @@ class TensorSharedVariable(_tensor_py_operators, SharedVariable): ...@@ -22,13 +25,15 @@ class TensorSharedVariable(_tensor_py_operators, SharedVariable):
@shared_constructor @shared_constructor
def tensor_constructor(value, name=None, strict=False, allow_downcast=None, def tensor_constructor(value, name=None, strict=False, allow_downcast=None,
borrow=False, broadcastable=None): borrow=False, broadcastable=None):
"""SharedVariable Constructor for TensorType """
SharedVariable Constructor for TensorType.
:note: Regarding the inference of the broadcastable pattern... Notes
-----
Regarding the inference of the broadcastable pattern...
The default is to assume that the value might be resized in any The default is to assume that the value might be resized in any
dimension, so the default broadcastable is dimension, so the default broadcastable is ``(False,)*len(value.shape)``.
``(False,)*len(value.shape)``. The optional `broadcastable` The optional `broadcastable` argument will override this default.
argument will override this default.
""" """
if not isinstance(value, numpy.ndarray): if not isinstance(value, numpy.ndarray):
...@@ -61,11 +66,14 @@ class ScalarSharedVariable(_tensor_py_operators, SharedVariable): ...@@ -61,11 +66,14 @@ class ScalarSharedVariable(_tensor_py_operators, SharedVariable):
@shared_constructor @shared_constructor
def scalar_constructor(value, name=None, strict=False, allow_downcast=None, def scalar_constructor(value, name=None, strict=False, allow_downcast=None,
borrow=False): borrow=False):
"""SharedVariable constructor for scalar values. Default: int64 or float64. """
SharedVariable constructor for scalar values. Default: int64 or float64.
:note: We implement this using 0-d tensors for now. Notes
-----
We implement this using 0-d tensors for now.
:note: We ignore the borrow parameter as we convert ``value`` to an We ignore the borrow parameter as we convert ``value`` to an
ndarray (this is a new object). This respects the semantic of ndarray (this is a new object). This respects the semantic of
borrow, as it is a hint to Theano that we can reuse it. borrow, as it is a hint to Theano that we can reuse it.
......
""" """
Contains a wrapper function for tensor.nnet.ConvOp, which can be used to perform Contains a wrapper function for tensor.nnet.ConvOp, which can be used to perform
generic 2D convolution. generic 2D convolution.
"""
"""
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
import warnings import warnings
...@@ -25,20 +25,29 @@ def conv2d(input, filters, image_shape=None, filter_shape=None, ...@@ -25,20 +25,29 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
Shape parameters are optional and will result in faster execution. Shape parameters are optional and will result in faster execution.
:type input: dmatrix of dtensor3 Parameters
:param input: symbolic variable for images to be filtered ----------
:type filters: dmatrix of dtensor3 input : dmatrix of dtensor3
:param filters: symbolic variable containing filter values Symbolic variable for images to be filtered.
:param border_mode: 'valid' or 'full'. see scipy.signal.convolve2d filters : dmatrix of dtensor3
:param subsample: factor by which to subsample output Symbolic variable containing filter values.
:type image_shape: tuple of length 2 or 3 border_mode: {'valid', 'full'}
:param image_shape: ([number images,] image height, image width) See scipy.signal.convolve2d.
:type filter_shape: tuple of length 2 or 3 subsample
:param filter_shape: ([number filters,] filter height, filter width) Factor by which to subsample output.
:param kwargs: see theano.tensor.nnet.conv.conv2d image_shape : tuple of length 2 or 3
:rtype: symbolic 2D,3D or 4D tensor ([number images,] image height, image width).
:return: tensor of filtered images, with shape filter_shape : tuple of length 2 or 3
([number images,] [number filters,] image height, image width) ([number filters,] filter height, filter width).
kwargs
See theano.tensor.nnet.conv.conv2d.
Returns
-------
symbolic 2D,3D or 4D tensor
Tensor of filtered images, with shape
([number images,] [number filters,] image height, image width).
""" """
assert input.ndim in (2, 3) assert input.ndim in (2, 3)
assert filters.ndim in (2, 3) assert filters.ndim in (2, 3)
......
""" Ops for downsampling images. """
Ops for downsampling images.
Planned: Planned:
DownsampleFactorMax, DownsampleAvg, DownsampleSoftmax. DownsampleFactorMax, DownsampleAvg, DownsampleSoftmax.
...@@ -29,12 +30,14 @@ def max_pool_2d_same_size(input, patch_size): ...@@ -29,12 +30,14 @@ def max_pool_2d_same_size(input, patch_size):
keeping only the maximum values. The output has the same dimensions as keeping only the maximum values. The output has the same dimensions as
the input. the input.
:type input: 4-D theano tensor of input images. Parameters
:param input: input images. Max pooling will be done over the 2 last ----------
dimensions. input : 4-D theano tensor of input images
:type patch_size: tuple of length 2 Input images. Max pooling will be done over the 2 last dimensions.
:param patch_size: size of the patch (patch height, patch width). patch_size : tuple of length 2
Size of the patch (patch height, patch width).
(2,2) will retain only one non-zero value per patch of 4 values. (2,2) will retain only one non-zero value per patch of 4 values.
""" """
output = DownsampleFactorMax(patch_size, True)(input) output = DownsampleFactorMax(patch_size, True)(input)
outs = MaxPoolGrad(patch_size, True)(input, output, output) outs = MaxPoolGrad(patch_size, True)(input, output, output)
...@@ -48,29 +51,29 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0), ...@@ -48,29 +51,29 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
the specified factor, by keeping only the maximum value of non-overlapping the specified factor, by keeping only the maximum value of non-overlapping
patches of size (ds[0],ds[1]) patches of size (ds[0],ds[1])
:type input: N-D theano tensor of input images. Parameters
:param input: input images. Max pooling will be done over the 2 last ----------
dimensions. input : N-D theano tensor of input images
:type ds: tuple of length 2 Input images. Max pooling will be done over the 2 last dimensions.
:param ds: factor by which to downscale (vertical ds, horizontal ds). ds : tuple of length 2
Factor by which to downscale (vertical ds, horizontal ds).
(2,2) will halve the image in each dimension. (2,2) will halve the image in each dimension.
:type ignore_border: bool ignore_border : bool
:param ignore_border: When True, (5,5) input with ds=(2,2) When True, (5,5) input with ds=(2,2) will generate a (2,2) output.
will generate a (2,2) output. (3,3) otherwise. (3,3) otherwise.
:type st: tuple of lenght 2 st : tuple of lenght 2
:param st: stride size, which is the number of shifts Stride size, which is the number of shifts over rows/cols to get the
over rows/cols to get the the next pool region. next pool region. If st is None, it is considered equal to ds
if st is None, it is considered equal to ds (no overlap on pooling regions).
(no overlap on pooling regions) padding : tuple of two ints
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins, of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins. and pad_w is the size of the left and right margins.
:type padding: tuple of two ints mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
:param mode: 'max', 'sum', 'average_inc_pad' or 'average_exc_pad'.
Operation executed on each window. `max` and `sum` always exclude Operation executed on each window. `max` and `sum` always exclude
the padding in the computation. `average` gives you the choice to the padding in the computation. `average` gives you the choice to
include or exclude it. include or exclude it.
:type mode: string
""" """
if input.ndim < 2: if input.ndim < 2:
raise NotImplementedError('max_pool_2d requires a dimension >= 2') raise NotImplementedError('max_pool_2d requires a dimension >= 2')
...@@ -104,44 +107,69 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0), ...@@ -104,44 +107,69 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
class DownsampleFactorMax(Op): class DownsampleFactorMax(Op):
"""For N-dimensional tensors, consider that the last two """
dimensions span images. This Op downsamples these images by For N-dimensional tensors, consider that the last two dimensions span
taking the max, sum or average over different patch. images. This Op downsamples these images by taking the max, sum or average
over different patch.
The constructor takes the max, sum or average or different input patches.
Parameters
----------
ds : list or tuple of two ints
Downsample factor over rows and column.
ds indicates the pool region size.
ignore_border : bool
If ds doesn't divide imgshape, do we include an extra row/col of partial
downsampling (False) or ignore it (True).
st : list or tuple of two ints or None
Stride size, which is the number of shifts over rows/cols to get the
next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions).
padding: tuple of two ints
(pad_h, pad_w), pad zeros to extend beyond four borders of the images,
pad_h is the size of the top and bottom margins, and pad_w is the size
of the left and right margins.
mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
('average_inc_pad' excludes the padding from the count,
'average_exc_pad' include it)
""" """
__props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode') __props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode')
@staticmethod @staticmethod
def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)): def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)):
"""Return the shape of the output from this op, for input of given """
Return the shape of the output from this op, for input of given
shape and flags. shape and flags.
:param imgshape: the shape of a tensor of images. The last two elements Parameters
are interpreted as the number of rows, and the number of cols. ----------
:type imgshape: tuple, list, or similar of integer or imgshape : tuple, list, or similar of integer or scalar Theano variable
scalar Theano variable. The shape of a tensor of images. The last two elements are
interpreted as the number of rows, and the number of cols.
:param ds: downsample factor over rows and columns ds : list or tuple of two ints
this parameter indicates the size of the pooling region Downsample factor over rows and columns this parameter indicates
:type ds: list or tuple of two ints the size of the pooling region.
st : list or tuple of two ints
:param st: the stride size. This is the distance between the pooling The stride size. This is the distance between the pooling regions.
regions. If it's set to None, in which case it equlas ds. If it's set to None, it equals ds.
:type st: list or tuple of two ints ignore_border : bool
If ds doesn't divide imgshape, do we include an extra row/col of
:param ignore_border: if ds doesn't divide imgshape, do we include an partial downsampling (False) or ignore it (True).
extra row/col of partial downsampling (False) or ignore it (True). padding : tuple of two ints
:type ignore_border: bool (pad_h, pad_w), pad zeros to extend beyond four borders
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins, of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins. and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
:rtype: list Returns
:returns: the shape of the output from this op, for input of given -------
shape. This will have the same length as imgshape, but with last list
two elements reduced as per the downsampling & ignore_border flags. The shape of the output from this op, for input of given shape.
This will have the same length as imgshape, but with last two
elements reduced as per the downsampling & ignore_border flags.
""" """
if len(imgshape) < 2: if len(imgshape) < 2:
raise TypeError('imgshape must have at least two elements ' raise TypeError('imgshape must have at least two elements '
...@@ -190,33 +218,6 @@ class DownsampleFactorMax(Op): ...@@ -190,33 +218,6 @@ class DownsampleFactorMax(Op):
def __init__(self, ds, ignore_border=False, st=None, padding=(0, 0), def __init__(self, ds, ignore_border=False, st=None, padding=(0, 0),
mode='max'): mode='max'):
""" Take the max, sum or average or different input patches.
:param ds: downsample factor over rows and column.
ds indicates the pool region size.
:type ds: list or tuple of two ints
:param ignore_border: if ds doesn't divide imgshape, do we include
an extra row/col of partial downsampling (False) or
ignore it (True).
:type ignore_border: bool
: param st: stride size, which is the number of shifts
over rows/cols to get the the next pool region.
if st is None, it is considered equal to ds
(no overlap on pooling regions)
: type st: list or tuple of two ints or None
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
:param mode: 'max', 'sum', 'average_inc_pad', 'average_exc_pad'.
('average_inc_pad' excludes the padding from the count,
'average_exc_pad' include it)
"""
self.ds = tuple(ds) self.ds = tuple(ds)
if not all([isinstance(d, int) for d in ds]): if not all([isinstance(d, int) for d in ds]):
raise ValueError( raise ValueError(
...@@ -876,35 +877,36 @@ class DownsampleFactorMaxGradGrad(Op): ...@@ -876,35 +877,36 @@ class DownsampleFactorMaxGradGrad(Op):
@staticmethod @staticmethod
def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)): def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)):
"""Return the shape of the output from this op, for input of given """
Return the shape of the output from this op, for input of given
shape and flags. shape and flags.
:param imgshape: the shape of a tensor of images. The last two elements Parameters
----------
imgshape : tuple, list, or similar of integer or scalar Theano variable
The shape of a tensor of images. The last two elements
are interpreted as the number of rows, and the number of cols. are interpreted as the number of rows, and the number of cols.
:type imgshape: tuple, list, or similar of integer or ds : list or tuple of two ints
scalar Theano variable. Downsample factor over rows and columns this parameter indicates the
size of the pooling region.
:param ds: downsample factor over rows and columns st: list or tuple of two ints
this parameter indicates the size of the pooling region The stride size. This is the distance between the pooling regions.
:type ds: list or tuple of two ints If it's set to None, in which case it equlas ds.
ignore_border: bool
:param st: the stride size. This is the distance between the pooling If ds doesn't divide imgshape, do we include an
regions. If it's set to None, in which case it equlas ds.
:type st: list or tuple of two ints
:param ignore_border: if ds doesn't divide imgshape, do we include an
extra row/col of partial downsampling (False) or ignore it (True). extra row/col of partial downsampling (False) or ignore it (True).
:type ignore_border: bool padding : tuple of two ints
(pad_h, pad_w), pad zeros to extend beyond four borders
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins, of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins. and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
:rtype: list Returns
:returns: the shape of the output from this op, for input of given -------
shape. This will have the same length as imgshape, but with last list
two elements reduced as per the downsampling & ignore_border flags. The shape of the output from this op, for input of given shape.
This will have the same length as imgshape, but with last two
elements reduced as per the downsampling & ignore_border flags.
""" """
if len(imgshape) < 2: if len(imgshape) < 2:
raise TypeError('imgshape must have at least two elements ' raise TypeError('imgshape must have at least two elements '
......
...@@ -31,9 +31,10 @@ MATRIX_STRUCTURES = ( ...@@ -31,9 +31,10 @@ MATRIX_STRUCTURES = (
class Cholesky(Op): class Cholesky(Op):
""" """
Return a triangular matrix square root of positive semi-definite `x` Return a triangular matrix square root of positive semi-definite `x`.
L = cholesky(X, lower=True) implies dot(L, L.T) == X.
L = cholesky(X, lower=True) implies dot(L, L.T) == X
""" """
# TODO: inplace # TODO: inplace
# TODO: for specific dtypes # TODO: for specific dtypes
...@@ -90,9 +91,12 @@ class CholeskyGrad(Op): ...@@ -90,9 +91,12 @@ class CholeskyGrad(Op):
return Apply(self, [x, l, dz], [x.type()]) return Apply(self, [x, l, dz], [x.type()])
def perform(self, node, inputs, outputs): def perform(self, node, inputs, outputs):
"""Implements the "reverse-mode" gradient [1]_ for the """
Implements the "reverse-mode" gradient [1]_ for the
Cholesky factorization of a positive-definite matrix. Cholesky factorization of a positive-definite matrix.
References
----------
.. [1] S. P. Smith. "Differentiation of the Cholesky Algorithm". .. [1] S. P. Smith. "Differentiation of the Cholesky Algorithm".
Journal of Computational and Graphical Statistics, Journal of Computational and Graphical Statistics,
Vol. 4, No. 2 (Jun.,1995), pp. 134-147 Vol. 4, No. 2 (Jun.,1995), pp. 134-147
...@@ -133,7 +137,10 @@ class CholeskyGrad(Op): ...@@ -133,7 +137,10 @@ class CholeskyGrad(Op):
class Solve(Op): class Solve(Op):
"""Solve a system of linear equations""" """
Solve a system of linear equations.
"""
__props__ = ('A_structure', 'lower', 'overwrite_A', 'overwrite_b') __props__ = ('A_structure', 'lower', 'overwrite_A', 'overwrite_b')
...@@ -195,7 +202,9 @@ solve = Solve() # general solve ...@@ -195,7 +202,9 @@ solve = Solve() # general solve
class Eigvalsh(Op): class Eigvalsh(Op):
"""Generalized eigenvalues of a Hermetian positive definite eigensystem """
Generalized eigenvalues of a Hermitian positive definite eigensystem.
""" """
__props__ = ('lower',) __props__ = ('lower',)
...@@ -243,8 +252,10 @@ class Eigvalsh(Op): ...@@ -243,8 +252,10 @@ class Eigvalsh(Op):
class EigvalshGrad(Op): class EigvalshGrad(Op):
"""Gradient of generalized eigenvalues of a Hermetian positive definite """
eigensystem Gradient of generalized eigenvalues of a Hermitian positive definite
eigensystem.
""" """
# Note: This Op (EigvalshGrad), should be removed and replaced with a graph # Note: This Op (EigvalshGrad), should be removed and replaced with a graph
...@@ -303,18 +314,25 @@ def eigvalsh(a, b, lower=True): ...@@ -303,18 +314,25 @@ def eigvalsh(a, b, lower=True):
def kron(a, b): def kron(a, b):
""" Kronecker product """ Kronecker product.
Same as scipy.linalg.kron(a, b). Same as scipy.linalg.kron(a, b).
:note: numpy.kron(a, b) != scipy.linalg.kron(a, b)! Parameters
----------
a: array_like
b: array_like
Returns
-------
array_like with a.ndim + b.ndim - 2 dimensions
Notes
-----
numpy.kron(a, b) != scipy.linalg.kron(a, b)!
They don't have the same shape and order when They don't have the same shape and order when
a.ndim != b.ndim != 2. a.ndim != b.ndim != 2.
:param a: array_like
:param b: array_like
:return: array_like with a.ndim + b.ndim - 2 dimensions.
""" """
a = tensor.as_tensor_variable(a) a = tensor.as_tensor_variable(a)
b = tensor.as_tensor_variable(b) b = tensor.as_tensor_variable(b)
...@@ -336,7 +354,9 @@ def kron(a, b): ...@@ -336,7 +354,9 @@ def kron(a, b):
class Expm(Op): class Expm(Op):
"""Compute the matrix exponential of a square array """
Compute the matrix exponential of a square array.
""" """
__props__ = () __props__ = ()
...@@ -365,7 +385,9 @@ class Expm(Op): ...@@ -365,7 +385,9 @@ class Expm(Op):
class ExpmGrad(Op): class ExpmGrad(Op):
"""Gradient of the matrix exponential of a square array. """
Gradient of the matrix exponential of a square array.
""" """
__props__ = () __props__ = ()
......
...@@ -5,7 +5,8 @@ from theano.tensor.basic import mul, arange ...@@ -5,7 +5,8 @@ from theano.tensor.basic import mul, arange
class SortOp(theano.Op): class SortOp(theano.Op):
""" """
This class is a wrapper for numpy sort function This class is a wrapper for numpy sort function.
""" """
__props__ = ("kind", "order") __props__ = ("kind", "order")
...@@ -62,12 +63,15 @@ class SortOp(theano.Op): ...@@ -62,12 +63,15 @@ class SortOp(theano.Op):
return index_val return index_val
def __get_argsort_indices(self, a, axis): def __get_argsort_indices(self, a, axis):
"""Calculates indices which can be used to reverse """
sorting operation of "a" tensor along "axis" Calculates indices which can be used to reverse sorting operation of
"a" tensor along "axis".
returns: Returns
-------
1d array if axis is None 1d array if axis is None
list of lenght len(a.shape) otherwise list of lenght len(a.shape) otherwise
""" """
# The goal is to get gradient wrt input from gradient # The goal is to get gradient wrt input from gradient
...@@ -99,24 +103,26 @@ class SortOp(theano.Op): ...@@ -99,24 +103,26 @@ class SortOp(theano.Op):
def sort(a, axis=-1, kind='quicksort', order=None): def sort(a, axis=-1, kind='quicksort', order=None):
""" """
Return a sorted copy of an array.
Parameters
----------
a : Tensor a : Tensor
Tensor to be sorted Tensor to be sorted
axis : Tensor axis : Tensor
Axis along which to sort. If None, the array is Axis along which to sort. If None, the array is flattened before
flattened before sorting. sorting.
kind : {'quicksort', 'mergesort', 'heapsort'}, optional kind : {'quicksort', 'mergesort', 'heapsort'}, optional
Sorting algorithm. Default is 'quicksort'. Sorting algorithm. Default is 'quicksort'.
order : list, optional order : list, optional
When `a` is a structured array, this argument specifies which When `a` is a structured array, this argument specifies which
fields to compare first, second, and so on. This list does not fields to compare first, second, and so on. This list does not
need to include all of the fields. need to include all of the fields.
Returns
-------
array
A sorted copy of an array.
""" """
if axis is None: if axis is None:
a = a.flatten() a = a.flatten()
...@@ -126,7 +132,8 @@ def sort(a, axis=-1, kind='quicksort', order=None): ...@@ -126,7 +132,8 @@ def sort(a, axis=-1, kind='quicksort', order=None):
class ArgSortOp(theano.Op): class ArgSortOp(theano.Op):
""" """
This class is a wrapper for numpy argsort function This class is a wrapper for numpy argsort function.
""" """
__props__ = ("kind", "order") __props__ = ("kind", "order")
...@@ -196,6 +203,7 @@ def argsort(a, axis=-1, kind='quicksort', order=None): ...@@ -196,6 +203,7 @@ def argsort(a, axis=-1, kind='quicksort', order=None):
specified by the kind keyword. It returns an array of indices of specified by the kind keyword. It returns an array of indices of
the same shape as a that index data along the given axis in sorted the same shape as a that index data along the given axis in sorted
order. order.
""" """
if axis is None: if axis is None:
a = a.flatten() a = a.flatten()
......
...@@ -39,6 +39,7 @@ sparse_module_ref = None ...@@ -39,6 +39,7 @@ sparse_module_ref = None
class AdvancedIndexingError(TypeError): class AdvancedIndexingError(TypeError):
""" """
Raised when Subtensor is asked to perform advanced indexing. Raised when Subtensor is asked to perform advanced indexing.
""" """
def __init__(self, *args): def __init__(self, *args):
...@@ -52,6 +53,7 @@ class AdvancedIndexingError(TypeError): ...@@ -52,6 +53,7 @@ class AdvancedIndexingError(TypeError):
def make_constant(args): def make_constant(args):
""" """
Convert python litterals to theano constants in subtensor arguments. Convert python litterals to theano constants in subtensor arguments.
""" """
def conv(a): def conv(a):
if a is None: if a is None:
...@@ -68,13 +70,14 @@ def make_constant(args): ...@@ -68,13 +70,14 @@ def make_constant(args):
def get_idx_list(inputs, idx_list, get_count=False): def get_idx_list(inputs, idx_list, get_count=False):
''' """
Given a list of inputs to the subtensor and its idx_list reorders Given a list of inputs to the subtensor and its idx_list reorders
the inputs according to the idx list to get the right values. the inputs according to the idx list to get the right values.
If get_counts=True, instead returns the number of inputs consumed If get_counts=True, instead returns the number of inputs consumed
during this process. during this process.
'''
"""
# The number of indices # The number of indices
n = len(inputs) - 1 n = len(inputs) - 1
...@@ -102,14 +105,15 @@ def get_idx_list(inputs, idx_list, get_count=False): ...@@ -102,14 +105,15 @@ def get_idx_list(inputs, idx_list, get_count=False):
def get_canonical_form_slice(theslice, length): def get_canonical_form_slice(theslice, length):
''' """
Given a slice [start:stop:step] transform it into a canonical form Given a slice [start:stop:step] transform it into a canonical form
that respects the conventions imposed by python and numpy. that respects the conventions imposed by python and numpy.
In a canonical form a slice is represented by a canonical form slice, In a canonical form a slice is represented by a canonical form slice,
in which 0 <= start <= stop <= length and step > 0, and a flag which says in which 0 <= start <= stop <= length and step > 0, and a flag which says
if the resulting set of numbers needs to be reversed or not. if the resulting set of numbers needs to be reversed or not.
'''
"""
from theano.tensor import switch, lt, ge, sgn from theano.tensor import switch, lt, ge, sgn
if isinstance(theslice, slice): if isinstance(theslice, slice):
...@@ -252,7 +256,8 @@ def get_canonical_form_slice(theslice, length): ...@@ -252,7 +256,8 @@ def get_canonical_form_slice(theslice, length):
class Subtensor(Op): class Subtensor(Op):
"""Return a subtensor view """
Return a subtensor view.
The inputs array is the tensor x, followed by scalar integer types. The inputs array is the tensor x, followed by scalar integer types.
TODO: WRITEME: how are the scalar integer variables formatted? TODO: WRITEME: how are the scalar integer variables formatted?
...@@ -297,12 +302,16 @@ class Subtensor(Op): ...@@ -297,12 +302,16 @@ class Subtensor(Op):
@staticmethod @staticmethod
def collapse(idxs, cond): def collapse(idxs, cond):
""" """
Parameters
----------
idxs : a list of indices or slices.
cond : a callable that returns a bool
idxs: a list of indices or slices. Returns
cond: a callable that returns a bool -------
list
returns: idxs, with the slices flattened out into a list. idxs, with the slices flattened out into a list.
if cond is true for an entry, does not flatten it. If cond is true for an entry, does not flatten it.
""" """
ret = [] ret = []
...@@ -323,12 +332,14 @@ class Subtensor(Op): ...@@ -323,12 +332,14 @@ class Subtensor(Op):
@staticmethod @staticmethod
def convert(entry, slice_ok=True): def convert(entry, slice_ok=True):
""" """
Change references to Variables into references to Types.
The "idx_list" field is unique to each Subtensor instance. The "idx_list" field is unique to each Subtensor instance.
It is not unique to each Apply node, so it should not refer to It is not unique to each Apply node, so it should not refer to
specific Variables. This method changes references to Variables specific Variables.
into references to Types.
TODO: WRITEME: This method also accepts "entry" already being a Type; TODO: WRITEME: This method also accepts "entry" already being a Type;
when would that happen? when would that happen?
""" """
invalid_scal_types = [scal.float64, scal.float32, scal.float16] invalid_scal_types = [scal.float64, scal.float32, scal.float16]
scal_types = [scal.int64, scal.int32, scal.int16, scal.int8] scal_types = [scal.int64, scal.int32, scal.int16, scal.int8]
...@@ -389,18 +400,25 @@ class Subtensor(Op): ...@@ -389,18 +400,25 @@ class Subtensor(Op):
only_process_constants=False): only_process_constants=False):
""" """
Return the idx_list with constant inputs replaced by their Return the idx_list with constant inputs replaced by their
python scalar equivalent. May raise python scalar equivalent.
`theano.tensor.NotScalarConstantError` if the idx contains May raise `theano.tensor.NotScalarConstantError` if the idx contains
non-constant entries. non-constant entries.
If allow_partial is True, then entries that are not constant If allow_partial is True, then entries that are not constant will
will stay as their input variable rather than raising an stay as their input variable rather than raising an exception.
exception.
None entries are always left as-is. None entries are always left as-is.
Example usage (where v, a are appropriately typed theano variables): Parameters
----------
only_process_constants
If True, we only attempt to obtain the value of an index/slice if
it's directly constant and don't try to dig through dimshuffles,
fills, allocs, and other to figure out its value.
Examples
--------
Example usage where v, a are appropriately typed theano variables :
>>> b = a[v, 1:3] >>> b = a[v, 1:3]
>>> b.owner.op.idx_list >>> b.owner.op.idx_list
(Scalar(int64), slice(Scalar(int64), Scalar(int64), None)) (Scalar(int64), slice(Scalar(int64), Scalar(int64), None))
...@@ -409,10 +427,6 @@ class Subtensor(Op): ...@@ -409,10 +427,6 @@ class Subtensor(Op):
>>> b.owner.op.get_constant_idx(b.owner.inputs) >>> b.owner.op.get_constant_idx(b.owner.inputs)
NotScalarConstantError: v NotScalarConstantError: v
:param only_process_constants: If True, we only attempt to obtain
the value of an index/slice if it's directly constant and don't
try to dig through dimshuffles, fills, allocs, and other to figure
out its value.
""" """
real_idx = get_idx_list(inputs, self.idx_list) real_idx = get_idx_list(inputs, self.idx_list)
...@@ -451,8 +465,13 @@ class Subtensor(Op): ...@@ -451,8 +465,13 @@ class Subtensor(Op):
def make_node(self, x, *inputs): def make_node(self, x, *inputs):
""" """
x: the tensor to take a subtensor of Parameters
inputs: a list of theano Scalars ----------
x
The tensor to take a subtensor of.
inputs
A list of theano Scalars.
""" """
x = theano.tensor.as_tensor_variable(x) x = theano.tensor.as_tensor_variable(x)
inputs = tuple(self.my_as_scalar(a) for a in inputs) inputs = tuple(self.my_as_scalar(a) for a in inputs)
...@@ -607,8 +626,8 @@ class Subtensor(Op): ...@@ -607,8 +626,8 @@ class Subtensor(Op):
@staticmethod @staticmethod
def default_helper_c_code_args(): def default_helper_c_code_args():
""" """
Returns a dictionary of default arguments to Returns a dictionary of default arguments to helper_c_code.
helper_c_code
""" """
return {"c_prefix": "PyArray", return {"c_prefix": "PyArray",
...@@ -622,7 +641,8 @@ class Subtensor(Op): ...@@ -622,7 +641,8 @@ class Subtensor(Op):
The parameters c_prefix are there to allow reusing this The parameters c_prefix are there to allow reusing this
function on PyArray and CudaNdarray object. function on PyArray and CudaNdarray object.
This fct take as input the x, This fct take as input the x.
""" """
default_args = Subtensor.default_helper_c_code_args() default_args = Subtensor.default_helper_c_code_args()
...@@ -986,16 +1006,25 @@ pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor), ...@@ -986,16 +1006,25 @@ pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor),
def set_subtensor(x, y, inplace=False, def set_subtensor(x, y, inplace=False,
tolerate_inplace_aliasing=False): tolerate_inplace_aliasing=False):
"""Return x with the given subtensor overwritten by y. """
Return x with the given subtensor overwritten by y.
Example: To replicate the numpy expression "r[10:] = 5", type Parameters
----------
x
Symbolic variable for the lvalue of = operation.
y
Symbolic variable for the rvalue of = operation.
tolerate_inplace_aliasing
See inc_subtensor for documentation.
Examples
--------
To replicate the numpy expression "r[10:] = 5", type
>>> r = ivector() >>> r = ivector()
>>> new_r = set_subtensor(r[10:], 5) >>> new_r = set_subtensor(r[10:], 5)
:param x: symbolic variable for the lvalue of = operation
:param y: symbolic variable for the rvalue of = operation
:param tolerate_inplace_aliasing: see inc_subtensor for documentation.
""" """
return inc_subtensor(x, y, inplace, set_instead_of_inc=True, return inc_subtensor(x, y, inplace, set_instead_of_inc=True,
tolerate_inplace_aliasing=tolerate_inplace_aliasing) tolerate_inplace_aliasing=tolerate_inplace_aliasing)
...@@ -1003,22 +1032,32 @@ def set_subtensor(x, y, inplace=False, ...@@ -1003,22 +1032,32 @@ def set_subtensor(x, y, inplace=False,
def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False, def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
tolerate_inplace_aliasing=False): tolerate_inplace_aliasing=False):
"""Return x with the given subtensor incremented by y. """
Return x with the given subtensor incremented by y.
:param x: the symbolic result of a Subtensor operation.
:param y: the amount by which to increment ths subtensor in question Parameters
:param inplace: Don't use. Theano will do it when possible. ----------
:param set_instead_of_inc: If True, do a set_subtensor instead. x
:param tolerate_inplace_aliasing: allow x and y to be views of a single The symbolic result of a Subtensor operation.
underlying array even while working inplace. For correct results, y
x and y must not be overlapping views; if they overlap, the result The amount by which to increment the subtensor in question.
of this Op will generally be incorrect. This value has no effect if inplace
inplace=False. Don't use. Theano will do it when possible.
set_instead_of_inc
Example: To replicate the numpy expression "r[10:] += 5", type If True, do a set_subtensor instead.
tolerate_inplace_aliasing:
Allow x and y to be views of a single underlying array even while
working inplace. For correct results, x and y must not be overlapping
views; if they overlap, the result of this Op will generally be
incorrect. This value has no effect if inplace=False.
Examples
--------
To replicate the numpy expression "r[10:] += 5", type
>>> r = ivector() >>> r = ivector()
>>> new_r = inc_subtensor(r[10:], 5) >>> new_r = inc_subtensor(r[10:], 5)
""" """
# First of all, y cannot have a higher dimension than x, # First of all, y cannot have a higher dimension than x,
# nor have non-broadcastable dimensions where x is broadcastable. # nor have non-broadcastable dimensions where x is broadcastable.
...@@ -1159,7 +1198,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False, ...@@ -1159,7 +1198,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
class IncSubtensor(Op): class IncSubtensor(Op):
"""Increment a subtensor. """
Increment a subtensor.
This is like numpy's This is like numpy's
...@@ -1167,8 +1207,12 @@ class IncSubtensor(Op): ...@@ -1167,8 +1207,12 @@ class IncSubtensor(Op):
It is used internally to implement the gradient on SubTensor. It is used internally to implement the gradient on SubTensor.
:param set_instead_of_inc: if True set the subtensor to the value instead Parameters
of incrementing it by that value. ----------
set_instead_of_inc
If True set the subtensor to the value instead of incrementing it by
that value.
""" """
check_input = False check_input = False
...@@ -1225,9 +1269,14 @@ class IncSubtensor(Op): ...@@ -1225,9 +1269,14 @@ class IncSubtensor(Op):
def make_node(self, x, y, *inputs): def make_node(self, x, y, *inputs):
""" """
x: the tensor to increment Parameters
y: the value to increment by ----------
x
The tensor to increment.
y
The value to increment by.
inputs: TODO WRITEME inputs: TODO WRITEME
""" """
x, y = map(theano.tensor.as_tensor_variable, [x, y]) x, y = map(theano.tensor.as_tensor_variable, [x, y])
if y.ndim > x.ndim: if y.ndim > x.ndim:
...@@ -1411,8 +1460,10 @@ class IncSubtensor(Op): ...@@ -1411,8 +1460,10 @@ class IncSubtensor(Op):
) )
def do_type_checking(self, node): def do_type_checking(self, node):
""" Should raise NotImplementedError if c_code does not support """
Should raise NotImplementedError if c_code does not support
the types involved in this node. the types involved in this node.
""" """
if not isinstance(node.inputs[0].type, theano.tensor.TensorType): if not isinstance(node.inputs[0].type, theano.tensor.TensorType):
...@@ -1427,13 +1478,19 @@ class IncSubtensor(Op): ...@@ -1427,13 +1478,19 @@ class IncSubtensor(Op):
def copy_of_x(self, x): def copy_of_x(self, x):
""" """
:param x: a string giving the name of a C variable Parameters
pointing to an array ----------
x
A string giving the name of a C variable pointing to an array.
:return: C code expression to make a copy of x Returns
-------
object
C code expression to make a copy of x.
Base class uses PyArrayObject *, subclasses may override for Base class uses PyArrayObject *, subclasses may override for
different types of arrays. different types of arrays.
""" """
# Parameters of PyArrary_FromAny are: # Parameters of PyArrary_FromAny are:
# array # array
...@@ -1448,12 +1505,16 @@ class IncSubtensor(Op): ...@@ -1448,12 +1505,16 @@ class IncSubtensor(Op):
def make_view_array(self, x, view_ndim): def make_view_array(self, x, view_ndim):
""" """
:param x: a string identifying an array to be viewed Parameters
:param view_ndim: a string specifying the number of dimensions ----------
to have in the view x
A string identifying an array to be viewed.
view_ndim
A string specifying the number of dimensions to have in the view.
This doesn't need to actually set up the view with the right indexing;
we'll do that manually later.
This doesn't need to actually set up the view with the
right indexing; we'll do that manually later.
""" """
return """Py_INCREF(PyArray_DESCR(%(x)s)); return """Py_INCREF(PyArray_DESCR(%(x)s));
...@@ -1471,22 +1532,35 @@ class IncSubtensor(Op): ...@@ -1471,22 +1532,35 @@ class IncSubtensor(Op):
""" % locals() """ % locals()
def get_helper_c_code_args(self): def get_helper_c_code_args(self):
""" Return a dictionary of arguments to pass to helper_c_code.""" """
Return a dictionary of arguments to pass to helper_c_code.
"""
return Subtensor.default_helper_c_code_args() return Subtensor.default_helper_c_code_args()
def copy_into(self, view, source): def copy_into(self, view, source):
""" """
view: string, C code expression for an array Parameters
source: string, C code expression for an array ----------
view : string
C code expression for an array.
source : string
C code expression for an array.
Returns
-------
object
C code expression to copy source into view, and 0 on success.
returns a C code expression to copy source into view, and
return 0 on success
""" """
return """PyArray_CopyInto(%(view)s, %(source)s)""" % locals() return """PyArray_CopyInto(%(view)s, %(source)s)""" % locals()
def add_to_zview(self, name, x, fail): def add_to_zview(self, name, x, fail):
""" Return C code to add x to zview. Should DECREF zview if the """
add fails.""" Return C code to add x to zview. Should DECREF zview if the
add fails.
"""
return """ return """
PyArrayObject * add_rval = (PyArrayObject*)PyNumber_InPlaceAdd( PyArrayObject * add_rval = (PyArrayObject*)PyNumber_InPlaceAdd(
...@@ -1551,11 +1625,13 @@ class IncSubtensor(Op): ...@@ -1551,11 +1625,13 @@ class IncSubtensor(Op):
def _sum_grad_over_bcasted_dims(x, gx): def _sum_grad_over_bcasted_dims(x, gx):
"""Sum of gx over dimensions to reproduce x.broadcastable. """
Sum of gx over dimensions to reproduce x.broadcastable.
This is useful to sum gradients over certain dimensions when This is useful to sum gradients over certain dimensions when
x has been broadcasted, and we need to sum the gradient contributions x has been broadcasted, and we need to sum the gradient contributions
over all duplications. over all duplications.
""" """
if gx.broadcastable != x.broadcastable: if gx.broadcastable != x.broadcastable:
x_dim_added = gx.ndim - x.ndim x_dim_added = gx.ndim - x.ndim
...@@ -1592,7 +1668,10 @@ def _sum_grad_over_bcasted_dims(x, gx): ...@@ -1592,7 +1668,10 @@ def _sum_grad_over_bcasted_dims(x, gx):
class AdvancedSubtensor1(Op): class AdvancedSubtensor1(Op):
"""Implement x[ilist] where ilist is a vector of integers.""" """
Implement x[ilist] where ilist is a vector of integers.
"""
# sparse_grad doesn't go in here since it only affects the output # sparse_grad doesn't go in here since it only affects the output
# of the grad() method. # of the grad() method.
__props__ = () __props__ = ()
...@@ -1777,7 +1856,11 @@ advanced_subtensor1 = AdvancedSubtensor1() ...@@ -1777,7 +1856,11 @@ advanced_subtensor1 = AdvancedSubtensor1()
class AdvancedIncSubtensor1(Op): class AdvancedIncSubtensor1(Op):
"""Increments a subtensor using advanced slicing (list of index)""" """
Increments a subtensor using advanced slicing (list of index).
"""
__props__ = ('inplace', 'set_instead_of_inc') __props__ = ('inplace', 'set_instead_of_inc')
def __init__(self, inplace=False, set_instead_of_inc=False): def __init__(self, inplace=False, set_instead_of_inc=False):
...@@ -1828,13 +1911,19 @@ class AdvancedIncSubtensor1(Op): ...@@ -1828,13 +1911,19 @@ class AdvancedIncSubtensor1(Op):
def copy_of_x(self, x): def copy_of_x(self, x):
""" """
:param x: a string giving the name of a C variable Parameters
pointing to an array ----------
x : string
Gives the name of a C variable pointing to an array.
:return: C code expression to make a copy of x Returns
-------
object
C code expression to make a copy of x.
Base class uses PyArrayObject *, subclasses may override for Base class uses PyArrayObject *, subclasses may override for
different types of arrays. different types of arrays.
""" """
# Parameters of PyArrary_FromAny are: # Parameters of PyArrary_FromAny are:
# array # array
...@@ -1994,6 +2083,7 @@ def adv_index_broadcastable_pattern(a, idx): ...@@ -1994,6 +2083,7 @@ def adv_index_broadcastable_pattern(a, idx):
For this, we make a fake ndarray and a fake idx and call use ask numpy For this, we make a fake ndarray and a fake idx and call use ask numpy
the output. From this, we find the output broadcast pattern. the output. From this, we find the output broadcast pattern.
""" """
def replace_slice(v): def replace_slice(v):
...@@ -2021,8 +2111,11 @@ def adv_index_broadcastable_pattern(a, idx): ...@@ -2021,8 +2111,11 @@ def adv_index_broadcastable_pattern(a, idx):
class AdvancedSubtensor(Op): class AdvancedSubtensor(Op):
"""Return a subtensor copy, using advanced indexing.
""" """
Return a subtensor copy, using advanced indexing.
"""
# Should be used by __getitem__ and __getslice__, as follow: # Should be used by __getitem__ and __getslice__, as follow:
# AdvancedSubtensor()(self, *args), # AdvancedSubtensor()(self, *args),
# if args contains and advanced indexing pattern # if args contains and advanced indexing pattern
...@@ -2094,13 +2187,16 @@ advanced_subtensor = AdvancedSubtensor() ...@@ -2094,13 +2187,16 @@ advanced_subtensor = AdvancedSubtensor()
class AdvancedIncSubtensor(Op): class AdvancedIncSubtensor(Op):
"""Increments a subtensor using advanced indexing. """
Increments a subtensor using advanced indexing.
:note: We need the numpy.inplace_increment() function currently Notes
numpy's PR 326 to be able to make an inplace version of this -----
op. We need the numpy.inplace_increment() function currently
numpy's PR 326 to be able to make an inplace version of this op.
""" """
__props__ = ("inplace", "set_instead_of_inc") __props__ = ("inplace", "set_instead_of_inc")
def __init__(self, inplace=False, set_instead_of_inc=False): def __init__(self, inplace=False, set_instead_of_inc=False):
......
...@@ -12,30 +12,35 @@ _logger = logging.getLogger("theano.tensor.type") ...@@ -12,30 +12,35 @@ _logger = logging.getLogger("theano.tensor.type")
class TensorType(Type): class TensorType(Type):
"""Symbolic `Type` representing a numpy.ndarray value."""
filter_checks_isfinite = False
"""
When this is True, strict filtering rejects data containing NaN or
Inf entries. (Used in `DebugMode`)
""" """
Symbolic `Type` representing a numpy.ndarray value.
def __init__(self, dtype, broadcastable, name=None, sparse_grad=False): Initialize self.dtype and self.broadcastable.
"""Initialize self.dtype and self.broadcastable.
:Parameters: Parameters
- `dtype`: str corresponding to numpy dtype (e.g., 'int64') ----------
dtype: str
Corresponding to numpy dtype (e.g., 'int64')
The value (ndarray) associated to a `Variable` of this `Type` will The value (ndarray) associated to a `Variable` of this `Type` will
have this dtype. have this dtype.
- `broadcastable`: tuple, list, or array of boolean values broadcastable: tuple, list, or array of boolean values
This argument serves two purposes. First, the True elements of this This argument serves two purposes. First, the True elements of this
list indicate the dimensions where the shape of an associated value list indicate the dimensions where the shape of an associated value
must be 1. Secondly, the length of this list is the number of must be 1. Secondly, the length of this list is the number of
dimensions that an associated value must have. See dimensions that an associated value must have. See
:doc:`broadcasting` for an explanation of how this list is used. doc:`broadcasting` for an explanation of how this list is used.
- `name`: str name : str
Optional name for this type. Optional name for this type.
""" """
filter_checks_isfinite = False
"""
When this is True, strict filtering rejects data containing NaN or
Inf entries. (Used in `DebugMode`)
"""
def __init__(self, dtype, broadcastable, name=None, sparse_grad=False):
self.dtype = str(dtype) self.dtype = str(dtype)
if self.dtype == 'floatX': if self.dtype == 'floatX':
self.dtype = config.floatX self.dtype = config.floatX
...@@ -56,6 +61,7 @@ class TensorType(Type): ...@@ -56,6 +61,7 @@ class TensorType(Type):
""" """
Return a copy of the type optionally with a new dtype or Return a copy of the type optionally with a new dtype or
broadcastable pattern. broadcastable pattern.
""" """
if dtype is None: if dtype is None:
dtype = self.dtype dtype = self.dtype
...@@ -65,11 +71,13 @@ class TensorType(Type): ...@@ -65,11 +71,13 @@ class TensorType(Type):
sparse_grad=self.sparse_grad) sparse_grad=self.sparse_grad)
def filter(self, data, strict=False, allow_downcast=None): def filter(self, data, strict=False, allow_downcast=None):
"""Convert `data` to something which can be associated to a """
Convert `data` to something which can be associated to a
`TensorVariable`. `TensorVariable`.
This function is not meant to be called in user code. It is for This function is not meant to be called in user code. It is for
`Linker` instances to use when running a compiled graph. `Linker` instances to use when running a compiled graph.
""" """
# Explicit error message when one accidentally uses a Variable as # Explicit error message when one accidentally uses a Variable as
# input (typical mistake, especially with shared variables). # input (typical mistake, especially with shared variables).
...@@ -191,11 +199,13 @@ class TensorType(Type): ...@@ -191,11 +199,13 @@ class TensorType(Type):
return data return data
def filter_variable(self, other, allow_convert=True): def filter_variable(self, other, allow_convert=True):
"""Convert a symbolic Variable into a TensorType, if compatible. """
Convert a symbolic Variable into a TensorType, if compatible.
For the moment, only a TensorType or CudaNdarrayType will be For the moment, only a TensorType or CudaNdarrayType will be
converted, provided they have the same number of dimensions, converted, provided they have the same number of dimensions,
broadcastable pattern, and dtype. broadcastable pattern, and dtype.
""" """
if hasattr(other, '_as_TensorVariable'): if hasattr(other, '_as_TensorVariable'):
other = other._as_TensorVariable() other = other._as_TensorVariable()
...@@ -230,10 +240,12 @@ class TensorType(Type): ...@@ -230,10 +240,12 @@ class TensorType(Type):
return "value is valid" return "value is valid"
def dtype_specs(self): def dtype_specs(self):
"""Return a tuple (python type, c type, numpy typenum) that corresponds """
Return a tuple (python type, c type, numpy typenum) that corresponds
to self.dtype. to self.dtype.
This function is used internally as part of C code generation. This function is used internally as part of C code generation.
""" """
# TODO: add more type correspondances for e.g. int32, int64, float32, # TODO: add more type correspondances for e.g. int32, int64, float32,
# complex64, etc. # complex64, etc.
...@@ -261,7 +273,10 @@ class TensorType(Type): ...@@ -261,7 +273,10 @@ class TensorType(Type):
return scal.get_scalar_type(dtype=self.dtype) return scal.get_scalar_type(dtype=self.dtype)
def __eq__(self, other): def __eq__(self, other):
"""Compare True iff other is the same kind of TensorType""" """
Compare True iff other is the same kind of TensorType.
"""
return type(self) == type(other) and other.dtype == self.dtype \ return type(self) == type(other) and other.dtype == self.dtype \
and other.broadcastable == self.broadcastable and other.broadcastable == self.broadcastable
...@@ -305,14 +320,19 @@ class TensorType(Type): ...@@ -305,14 +320,19 @@ class TensorType(Type):
def values_eq_approx(a, b, allow_remove_inf=False, allow_remove_nan=False, def values_eq_approx(a, b, allow_remove_inf=False, allow_remove_nan=False,
rtol=None, atol=None): rtol=None, atol=None):
""" """
:param allow_remove_inf: If True, when there is an inf in a, Parameters
we allow any value in b in that position. ----------
Event -inf allow_remove_inf
:param allow_remove_nan: If True, when there is a nan in a, If True, when there is an inf in a, we allow any value in b in
we allow any value in b in that position. that position. Event -inf
Event +-inf allow_remove_nan
:param rtol: relative tolerance, passed to _allclose If True, when there is a nan in a, we allow any value in b in
:param atol: absolute tolerance, passed to _allclose that position. Event +-inf
rtol
Relative tolerance, passed to _allclose.
atol
Absolute tolerance, passed to _allclose.
""" """
if isinstance(a, numpy.ndarray) and isinstance(b, numpy.ndarray): if isinstance(a, numpy.ndarray) and isinstance(b, numpy.ndarray):
if a.shape != b.shape: if a.shape != b.shape:
...@@ -389,7 +409,8 @@ class TensorType(Type): ...@@ -389,7 +409,8 @@ class TensorType(Type):
ndim = property(lambda self: len(self.broadcastable), ndim = property(lambda self: len(self.broadcastable),
doc="number of dimensions") doc="number of dimensions")
"""Number of dimensions """
Number of dimensions.
This read-only property is the preferred way to get the number of This read-only property is the preferred way to get the number of
dimensions of a `TensorType`. dimensions of a `TensorType`.
...@@ -397,12 +418,15 @@ class TensorType(Type): ...@@ -397,12 +418,15 @@ class TensorType(Type):
""" """
def make_variable(self, name=None): def make_variable(self, name=None):
"""Return a `TensorVariable` of this type """
Return a `TensorVariable` of this type.
:Parameters: Parameters
- `name`: str ----------
name : str
A pretty name to identify this `Variable` when printing and A pretty name to identify this `Variable` when printing and
debugging debugging
""" """
return self.Variable(self, name=name) return self.Variable(self, name=name)
...@@ -430,7 +454,10 @@ class TensorType(Type): ...@@ -430,7 +454,10 @@ class TensorType(Type):
# "TensorType{%s, %s}" % (str(self.dtype), str(self.broadcastable)) # "TensorType{%s, %s}" % (str(self.dtype), str(self.broadcastable))
def c_declare(self, name, sub, check_input=True): def c_declare(self, name, sub, check_input=True):
"""Override `CLinkerType.c_declare` """ """
Override `CLinkerType.c_declare`.
"""
if(check_input): if(check_input):
check = """ check = """
typedef %(dtype)s dtype_%(name)s; typedef %(dtype)s dtype_%(name)s;
...@@ -444,13 +471,19 @@ class TensorType(Type): ...@@ -444,13 +471,19 @@ class TensorType(Type):
return declaration + check return declaration + check
def c_init(self, name, sub): def c_init(self, name, sub):
"""Override `CLinkerType.c_init` """ """
Override `CLinkerType.c_init`.
"""
return """ return """
%(name)s = NULL; %(name)s = NULL;
""" % dict(sub, name=name, type_num=self.dtype_specs()[2]) """ % dict(sub, name=name, type_num=self.dtype_specs()[2])
def c_extract(self, name, sub, check_input=True): def c_extract(self, name, sub, check_input=True):
"""Override `CLinkerType.c_extract` """ """
Override `CLinkerType.c_extract`.
"""
if(check_input): if(check_input):
check = """ check = """
%(name)s = NULL; %(name)s = NULL;
...@@ -509,7 +542,10 @@ class TensorType(Type): ...@@ -509,7 +542,10 @@ class TensorType(Type):
""" % dict(sub, name=name, type_num=self.dtype_specs()[2]) """ % dict(sub, name=name, type_num=self.dtype_specs()[2])
def c_cleanup(self, name, sub): def c_cleanup(self, name, sub):
"""Override `CLinkerType.c_cleanup` """ """
Override `CLinkerType.c_cleanup`.
"""
return """ return """
if (%(name)s) { if (%(name)s) {
Py_XDECREF(%(name)s); Py_XDECREF(%(name)s);
...@@ -517,7 +553,10 @@ class TensorType(Type): ...@@ -517,7 +553,10 @@ class TensorType(Type):
""" % locals() """ % locals()
def c_sync(self, name, sub): def c_sync(self, name, sub):
"""Override `CLinkerType.c_sync` """ """
Override `CLinkerType.c_sync`.
"""
fail = sub['fail'] fail = sub['fail']
type_num = self.dtype_specs()[2] type_num = self.dtype_specs()[2]
return """ return """
...@@ -558,7 +597,10 @@ class TensorType(Type): ...@@ -558,7 +597,10 @@ class TensorType(Type):
""" % locals() """ % locals()
def c_headers(self): def c_headers(self):
"""Override `CLinkerObject.c_headers` """ """
Override `CLinkerObject.c_headers`.
"""
return scal.get_scalar_type(self.dtype).c_headers() return scal.get_scalar_type(self.dtype).c_headers()
def c_libraries(self): def c_libraries(self):
...@@ -568,7 +610,10 @@ class TensorType(Type): ...@@ -568,7 +610,10 @@ class TensorType(Type):
return scal.get_scalar_type(self.dtype).c_compile_args() return scal.get_scalar_type(self.dtype).c_compile_args()
def c_support_code(self): def c_support_code(self):
"""Override `CLinkerObject.c_support_code` """ """
Override `CLinkerObject.c_support_code`.
"""
return scal.get_scalar_type(self.dtype).c_support_code() return scal.get_scalar_type(self.dtype).c_support_code()
def c_init_code(self): def c_init_code(self):
...@@ -584,6 +629,7 @@ class TensorType(Type): ...@@ -584,6 +629,7 @@ class TensorType(Type):
def value_zeros(self, shape): def value_zeros(self, shape):
""" """
Create an numpy ndarray full of 0 values. Create an numpy ndarray full of 0 values.
""" """
return numpy.zeros(shape, dtype=self.dtype) return numpy.zeros(shape, dtype=self.dtype)
...@@ -604,17 +650,33 @@ class TensorType(Type): ...@@ -604,17 +650,33 @@ class TensorType(Type):
``get_size()`` will be called on the output of this function ``get_size()`` will be called on the output of this function
when printing the memory profile. when printing the memory profile.
:param obj: The object that this Type represents during execution Parameters
:return: Python object that ``self.get_size()`` understands ----------
obj
The object that this Type represents during execution.
Returns
-------
object
Python object that ``self.get_size()`` understands.
""" """
return obj.shape return obj.shape
def get_size(self, shape_info): def get_size(self, shape_info):
""" Number of bytes taken by the object represented by shape_info. """
Number of bytes taken by the object represented by shape_info.
Parameters
----------
shape_info
The output of the call to get_shape_info().
Returns
-------
int
The number of bytes taken by the object described by ``shape_info``.
:param shape_info: the output of the call to get_shape_info()
:return: the number of bytes taken by the object described by
``shape_info``.
""" """
if shape_info: if shape_info:
return numpy.prod(shape_info) * numpy.dtype(self.dtype).itemsize return numpy.prod(shape_info) * numpy.dtype(self.dtype).itemsize
......
...@@ -105,6 +105,7 @@ SliceType.Constant = SliceConstant ...@@ -105,6 +105,7 @@ SliceType.Constant = SliceConstant
class NoneTypeT(Generic): class NoneTypeT(Generic):
""" """
Inherit from Generic to have c code working. Inherit from Generic to have c code working.
""" """
def filter(self, x, strict=False, allow_downcast=None): def filter(self, x, strict=False, allow_downcast=None):
......
...@@ -6,7 +6,8 @@ from theano.gof.utils import hash_from_code ...@@ -6,7 +6,8 @@ from theano.gof.utils import hash_from_code
def hash_from_ndarray(data): def hash_from_ndarray(data):
"""Return a hash from an ndarray """
Return a hash from an ndarray.
It takes care of the data, shapes, strides and dtype. It takes care of the data, shapes, strides and dtype.
...@@ -32,23 +33,31 @@ def hash_from_ndarray(data): ...@@ -32,23 +33,31 @@ def hash_from_ndarray(data):
def shape_of_variables(fgraph, input_shapes): def shape_of_variables(fgraph, input_shapes):
""" """
Compute the numeric shape of all intermediate variables given input shapes Compute the numeric shape of all intermediate variables given input shapes.
Inputs: Parameters
fgraph - the theano.FunctionGraph in question ----------
input_shapes - a dict mapping input to shape fgraph
The theano.FunctionGraph in question.
input_shapes : dict
A dict mapping input to shape.
Outputs: Returns
shapes - a dict mapping variable to shape -------
shapes : dict
A dict mapping variable to shape
WARNING : This modifies the fgraph. Not pure. .. warning:: This modifies the fgraph. Not pure.
Examples
--------
>>> import theano >>> import theano
>>> x = theano.tensor.matrix('x') >>> x = theano.tensor.matrix('x')
>>> y = x[512:]; y.name = 'y' >>> y = x[512:]; y.name = 'y'
>>> fgraph = theano.FunctionGraph([x], [y], clone=False) >>> fgraph = theano.FunctionGraph([x], [y], clone=False)
>>> shape_of_variables(fgraph, {x: (1024, 1024)}) >>> shape_of_variables(fgraph, {x: (1024, 1024)})
{y: (512, 1024), x: (1024, 1024)} {y: (512, 1024), x: (1024, 1024)}
""" """
if not hasattr(fgraph, 'shape_feature'): if not hasattr(fgraph, 'shape_feature'):
......
...@@ -22,8 +22,9 @@ def equal_slices(s1, s2): ...@@ -22,8 +22,9 @@ def equal_slices(s1, s2):
class AsTensorError(TypeError): class AsTensorError(TypeError):
"""Raised when as_tensor_variable isn't able to create a """
TensorVariable. Raised when as_tensor_variable isn't able to create a TensorVariable.
""" """
pass pass
...@@ -254,8 +255,11 @@ class _tensor_py_operators: ...@@ -254,8 +255,11 @@ class _tensor_py_operators:
def transpose(self, *axes): def transpose(self, *axes):
""" """
Return `tensor.transpose(self, axes)`
or `tensor.transpose(self, axes[0])` Returns
-------
object
`tensor.transpose(self, axes)` or `tensor.transpose(self, axes[0])`.
If only one `axes` argument is provided and it is iterable, then it is If only one `axes` argument is provided and it is iterable, then it is
assumed to be the entire axes tuple, and passed intact to assumed to be the entire axes tuple, and passed intact to
...@@ -298,16 +302,18 @@ class _tensor_py_operators: ...@@ -298,16 +302,18 @@ class _tensor_py_operators:
def reshape(self, shape, ndim=None): def reshape(self, shape, ndim=None):
"""Return a reshaped view/copy of this variable. """Return a reshaped view/copy of this variable.
:param shape: something that can be converted to a symbolic vector of Parameters
integers ----------
shape
Something that can be converted to a symbolic vector of integers.
ndim
The length of the shape. Passing None here means for
Theano to try and guess the length of `shape`.
:param ndim: the length of the shape. Passing None here means for .. warning:: This has a different signature than numpy's
theano to try and guess the length of `shape`.
* warning-- this has a different signature than numpy's
ndarray.reshape! ndarray.reshape!
in numpy you do not need to wrap the shape arguments In numpy you do not need to wrap the shape arguments
in a tuple, in theano you do need to in a tuple, in theano you do need to.
""" """
...@@ -323,21 +329,29 @@ class _tensor_py_operators: ...@@ -323,21 +329,29 @@ class _tensor_py_operators:
Reorder the dimensions of this variable, optionally inserting Reorder the dimensions of this variable, optionally inserting
broadcasted dimensions. broadcasted dimensions.
:param pattern: list/tuple of int mixed with 'x' for broadcastable Parameters
dimensions ----------
pattern
List/tuple of int mixed with 'x' for broadcastable dimensions.
Examples
--------
For example, to create a 3D view of a [2D] matrix, call For example, to create a 3D view of a [2D] matrix, call
``dimshuffle([0,'x',1])``. This will create a 3D view such that the ``dimshuffle([0,'x',1])``. This will create a 3D view such that the
middle dimension is an implicit broadcasted dimension. To do the same middle dimension is an implicit broadcasted dimension. To do the same
thing on the transpose of that matrix, call thing on the transpose of that matrix, call ``dimshuffle([1, 'x', 0])``.
``dimshuffle([1, 'x', 0])``.
Notes
-----
This function supports the pattern passed as a tuple, or as a This function supports the pattern passed as a tuple, or as a
variable-length argument (e.g. ``a.dimshuffle(pattern)`` is equivalent variable-length argument (e.g. ``a.dimshuffle(pattern)`` is equivalent
to ``a.dimshuffle(*pattern)`` where ``pattern`` is a list/tuple of ints to ``a.dimshuffle(*pattern)`` where ``pattern`` is a list/tuple of ints
mixed with 'x' characters). mixed with 'x' characters).
For more information, see `DimShuffle`. See Also
--------
DimShuffle
""" """
if (len(pattern) == 1) and (isinstance(pattern[0], (list, tuple))): if (len(pattern) == 1) and (isinstance(pattern[0], (list, tuple))):
pattern = pattern[0] pattern = pattern[0]
...@@ -524,13 +538,17 @@ class _tensor_py_operators: ...@@ -524,13 +538,17 @@ class _tensor_py_operators:
"""The rank of this tensor.""" """The rank of this tensor."""
broadcastable = property(lambda self: self.type.broadcastable) broadcastable = property(lambda self: self.type.broadcastable)
"""The broadcastable signature of this tensor. """
The broadcastable signature of this tensor.
See Also
--------
broadcasting
See :doc:`broadcasting` for details.
""" """
dtype = property(lambda self: self.type.dtype) dtype = property(lambda self: self.type.dtype)
""" The dtype of this tensor. """ """The dtype of this tensor."""
# extra pseudo-operator symbols # extra pseudo-operator symbols
def __dot__(left, right): def __dot__(left, right):
...@@ -542,13 +560,13 @@ class _tensor_py_operators: ...@@ -542,13 +560,13 @@ class _tensor_py_operators:
dot = __dot__ dot = __dot__
def sum(self, axis=None, dtype=None, keepdims=False, acc_dtype=None): def sum(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
"""See `theano.tensor.sum`""" """See `theano.tensor.sum`."""
return theano.tensor.basic.sum(self, axis=axis, return theano.tensor.basic.sum(self, axis=axis,
dtype=dtype, keepdims=keepdims, dtype=dtype, keepdims=keepdims,
acc_dtype=acc_dtype) acc_dtype=acc_dtype)
def prod(self, axis=None, dtype=None, keepdims=False, acc_dtype=None): def prod(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
"""See `theano.tensor.prod`""" """See `theano.tensor.prod`."""
return theano.tensor.basic.prod(self, axis=axis, return theano.tensor.basic.prod(self, axis=axis,
dtype=dtype, keepdims=keepdims, dtype=dtype, keepdims=keepdims,
acc_dtype=acc_dtype) acc_dtype=acc_dtype)
...@@ -564,49 +582,49 @@ class _tensor_py_operators: ...@@ -564,49 +582,49 @@ class _tensor_py_operators:
theano.tensor.basic.abs_(self), L).sum(axis=axis), 1.0 / L) theano.tensor.basic.abs_(self), L).sum(axis=axis), 1.0 / L)
def mean(self, axis=None, dtype=None, keepdims=False, acc_dtype=None): def mean(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
"""See `theano.tensor.mean`""" """See `theano.tensor.mean`."""
return theano.tensor.basic.mean(self, axis=axis, return theano.tensor.basic.mean(self, axis=axis,
dtype=dtype, keepdims=keepdims, dtype=dtype, keepdims=keepdims,
acc_dtype=acc_dtype) acc_dtype=acc_dtype)
def var(self, axis=None, keepdims=False): def var(self, axis=None, keepdims=False):
"""See `theano.tensor.var`""" """See `theano.tensor.var`."""
return theano.tensor.basic.var(self, axis, keepdims=keepdims) return theano.tensor.basic.var(self, axis, keepdims=keepdims)
def std(self, axis=None, keepdims=False): def std(self, axis=None, keepdims=False):
"""See `theano.tensor.std`""" """See `theano.tensor.std`."""
return theano.tensor.basic.std(self, axis, keepdims=keepdims) return theano.tensor.basic.std(self, axis, keepdims=keepdims)
def min(self, axis=None, keepdims=False): def min(self, axis=None, keepdims=False):
"""See `theano.tensor.min`""" """See `theano.tensor.min`."""
return theano.tensor.basic.min(self, axis, keepdims=keepdims) return theano.tensor.basic.min(self, axis, keepdims=keepdims)
def max(self, axis=None, keepdims=False): def max(self, axis=None, keepdims=False):
"""See `theano.tensor.max`""" """See `theano.tensor.max`."""
return theano.tensor.basic.max(self, axis, keepdims=keepdims) return theano.tensor.basic.max(self, axis, keepdims=keepdims)
def argmin(self, axis=None, keepdims=False): def argmin(self, axis=None, keepdims=False):
"""See `theano.tensor.argmin`""" """See `theano.tensor.argmin`."""
return theano.tensor.basic.argmin(self, axis, keepdims=keepdims) return theano.tensor.basic.argmin(self, axis, keepdims=keepdims)
def argmax(self, axis=None, keepdims=False): def argmax(self, axis=None, keepdims=False):
"""See `theano.tensor.argmax`""" """See `theano.tensor.argmax`."""
return theano.tensor.basic.argmax(self, axis, keepdims=keepdims) return theano.tensor.basic.argmax(self, axis, keepdims=keepdims)
def nonzero(self, return_matrix=False): def nonzero(self, return_matrix=False):
"""See `theano.tensor.nonzero`""" """See `theano.tensor.nonzero`."""
return theano.tensor.basic.nonzero(self, return_matrix=return_matrix) return theano.tensor.basic.nonzero(self, return_matrix=return_matrix)
def nonzero_values(self): def nonzero_values(self):
"""See `theano.tensor.nonzero_values`""" """See `theano.tensor.nonzero_values`."""
return theano.tensor.basic.nonzero_values(self) return theano.tensor.basic.nonzero_values(self)
def sort(self, axis=-1, kind='quicksort', order=None): def sort(self, axis=-1, kind='quicksort', order=None):
"""See `theano.tensor.sort`""" """See `theano.tensor.sort`."""
return theano.tensor.sort(self, axis, kind, order) return theano.tensor.sort(self, axis, kind, order)
def argsort(self, axis=-1, kind='quicksort', order=None): def argsort(self, axis=-1, kind='quicksort', order=None):
"""See `theano.tensor.argsort`""" """See `theano.tensor.argsort`."""
return theano.tensor.argsort(self, axis, kind, order) return theano.tensor.argsort(self, axis, kind, order)
def clip(self, a_min, a_max): def clip(self, a_min, a_max):
...@@ -614,17 +632,17 @@ class _tensor_py_operators: ...@@ -614,17 +632,17 @@ class _tensor_py_operators:
return theano.tensor.basic.clip(self, a_min, a_max) return theano.tensor.basic.clip(self, a_min, a_max)
def conj(self): def conj(self):
"""See `theano.tensor.conj`""" """See `theano.tensor.conj`."""
return theano.tensor.basic.conj(self) return theano.tensor.basic.conj(self)
conjugate = conj conjugate = conj
def repeat(self, repeats, axis=None): def repeat(self, repeats, axis=None):
"""See `theano.tensor.repeat`""" """See `theano.tensor.repeat`."""
return theano.tensor.extra_ops.repeat(self, repeats, axis) return theano.tensor.extra_ops.repeat(self, repeats, axis)
def round(self, mode="half_away_from_zero"): def round(self, mode="half_away_from_zero"):
"""See `theano.tensor.round`""" """See `theano.tensor.round`."""
return theano.tensor.basic.round(self, mode) return theano.tensor.basic.round(self, mode)
def trace(self): def trace(self):
...@@ -646,12 +664,13 @@ class _tensor_py_operators: ...@@ -646,12 +664,13 @@ class _tensor_py_operators:
return theano.tensor.extra_ops.cumprod(self, axis) return theano.tensor.extra_ops.cumprod(self, axis)
def ptp(self, axis=None): def ptp(self, axis=None):
"""see 'theano.tensor.ptp'""" """See 'theano.tensor.ptp'."""
return theano.tensor.ptp(self, axis) return theano.tensor.ptp(self, axis)
def swapaxes(self, axis1, axis2): def swapaxes(self, axis1, axis2):
"""Return 'tensor.swapaxes(self, axis1, axis2) """
Return 'tensor.swapaxes(self, axis1, axis2).
If a matrix is provided with the right axes, its transpose If a matrix is provided with the right axes, its transpose
will be returned. will be returned.
...@@ -660,32 +679,38 @@ class _tensor_py_operators: ...@@ -660,32 +679,38 @@ class _tensor_py_operators:
return theano.tensor.basic.swapaxes(self, axis1, axis2) return theano.tensor.basic.swapaxes(self, axis1, axis2)
def fill(self, value): def fill(self, value):
"""Fill inputted tensor with the assigned value""" """Fill inputted tensor with the assigned value."""
return theano.tensor.basic.fill(self, value) return theano.tensor.basic.fill(self, value)
def choose(self, a, choices, out=None, mode='raise'): def choose(self, a, choices, out=None, mode='raise'):
"""Construct an array from an index array and a set of arrays to choose from.""" """
Construct an array from an index array and a set of arrays to choose
from.
"""
return theano.tensor.basic.choose(self, a, choices, out=None, return theano.tensor.basic.choose(self, a, choices, out=None,
mode='raise') mode='raise')
def squeeze(self): def squeeze(self):
"""Remove broadcastable dimensions from """
the shape of an array. Remove broadcastable dimensions from the shape of an array.
It returns the input array, but with the broadcastable dimensions
removed. This is always `x` itself or a view into `x`.
It returns the input array, but with the
broadcastable dimensions removed. This is
always `x` itself or a view into `x`.
""" """
return theano.tensor.extra_ops.squeeze(self) return theano.tensor.extra_ops.squeeze(self)
def compress(self, a, axis=None): def compress(self, a, axis=None):
"""Return selected slices only """Return selected slices only."""
"""
return theano.tensor.extra_ops.compress(self, a, axis=axis) return theano.tensor.extra_ops.compress(self, a, axis=axis)
class TensorVariable(_tensor_py_operators, Variable): class TensorVariable(_tensor_py_operators, Variable):
"""Subclass to add the tensor operators to the basic `Variable` class.""" """
Subclass to add the tensor operators to the basic `Variable` class.
"""
def __init__(self, type, owner=None, index=None, name=None): def __init__(self, type, owner=None, index=None, name=None):
super(TensorVariable, self).__init__(type, owner=owner, super(TensorVariable, self).__init__(type, owner=owner,
...@@ -721,9 +746,11 @@ TensorType.Variable = TensorVariable ...@@ -721,9 +746,11 @@ TensorType.Variable = TensorVariable
class TensorConstantSignature(tuple): class TensorConstantSignature(tuple):
"""A Signature object for comparing TensorConstant instances """
A Signature object for comparing TensorConstant instances.
An instance is a pair: (Type instance, ndarray). An instance is a pair: (Type instance, ndarray).
""" """
def __eq__(self, other): def __eq__(self, other):
if type(self) != type(other): if type(self) != type(other):
...@@ -814,6 +841,7 @@ class TensorConstant(_tensor_py_operators, Constant): ...@@ -814,6 +841,7 @@ class TensorConstant(_tensor_py_operators, Constant):
"""Subclass to add the tensor operators to the basic `Constant` class. """Subclass to add the tensor operators to the basic `Constant` class.
To create a TensorConstant, use the `constant` function in this module. To create a TensorConstant, use the `constant` function in this module.
""" """
def __init__(self, type, data, name=None): def __init__(self, type, data, name=None):
Constant.__init__(self, type, data, name) Constant.__init__(self, type, data, name)
......
...@@ -7,6 +7,7 @@ from theano import scalar ...@@ -7,6 +7,7 @@ from theano import scalar
class XlogX(scalar.UnaryScalarOp): class XlogX(scalar.UnaryScalarOp):
""" """
Compute X * log(X), with special case 0 log(0) = 0. Compute X * log(X), with special case 0 log(0) = 0.
""" """
@staticmethod @staticmethod
def st_impl(x): def st_impl(x):
...@@ -39,6 +40,7 @@ xlogx = Elemwise(scalar_xlogx, name='xlogx') ...@@ -39,6 +40,7 @@ xlogx = Elemwise(scalar_xlogx, name='xlogx')
class XlogY0(scalar.BinaryScalarOp): class XlogY0(scalar.BinaryScalarOp):
""" """
Compute X * log(Y), with special case 0 log(0) = 0. Compute X * log(Y), with special case 0 log(0) = 0.
""" """
@staticmethod @staticmethod
def st_impl(x, y): def st_impl(x, y):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论