Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
6304a061
提交
6304a061
authored
8月 20, 2015
作者:
abergeron
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3293 from harlouci/numpydoc_tensor
Numpydoc tensor
上级
5e536853
8e88a292
隐藏空白字符变更
内嵌
并排
正在显示
31 个修改的文件
包含
3112 行增加
和
1991 行删除
+3112
-1991
basic.py
theano/tensor/basic.py
+677
-449
blas.py
theano/tensor/blas.py
+82
-58
blas_headers.py
theano/tensor/blas_headers.py
+3
-1
elemwise.py
theano/tensor/elemwise.py
+173
-151
elemwise_cgen.py
theano/tensor/elemwise_cgen.py
+49
-44
extra_ops.py
theano/tensor/extra_ops.py
+154
-72
io.py
theano/tensor/io.py
+69
-36
nlinalg.py
theano/tensor/nlinalg.py
+78
-49
Conv3D.py
theano/tensor/nnet/Conv3D.py
+49
-26
ConvGrad3D.py
theano/tensor/nnet/ConvGrad3D.py
+5
-1
ConvTransp3D.py
theano/tensor/nnet/ConvTransp3D.py
+17
-5
conv.py
theano/tensor/nnet/conv.py
+169
-146
conv3d2d.py
theano/tensor/nnet/conv3d2d.py
+68
-32
neighbours.py
theano/tensor/nnet/neighbours.py
+109
-100
nnet.py
theano/tensor/nnet/nnet.py
+145
-88
sigm.py
theano/tensor/nnet/sigm.py
+155
-77
opt.py
theano/tensor/opt.py
+267
-135
opt_uncanonicalize.py
theano/tensor/opt_uncanonicalize.py
+8
-5
raw_random.py
theano/tensor/raw_random.py
+155
-109
shared_randomstreams.py
theano/tensor/shared_randomstreams.py
+74
-48
sharedvar.py
theano/tensor/sharedvar.py
+20
-12
conv.py
theano/tensor/signal/conv.py
+24
-15
downsample.py
theano/tensor/signal/downsample.py
+103
-101
slinalg.py
theano/tensor/slinalg.py
+41
-19
sort.py
theano/tensor/sort.py
+24
-16
subtensor.py
theano/tensor/subtensor.py
+186
-90
type.py
theano/tensor/type.py
+110
-48
type_other.py
theano/tensor/type_other.py
+1
-0
utils.py
theano/tensor/utils.py
+17
-8
var.py
theano/tensor/var.py
+78
-50
xlogx.py
theano/tensor/xlogx.py
+2
-0
没有找到文件。
theano/tensor/basic.py
浏览文件 @
6304a061
"""A `Type` and `Op` classes to work with numpy.ndarrays symbolically."""
"""A `Type` and `Op` classes to work with numpy.ndarrays symbolically."""
import
sys
import
sys
import
warnings
import
warnings
...
@@ -63,8 +62,10 @@ class ShapeError(Exception):
...
@@ -63,8 +62,10 @@ class ShapeError(Exception):
def
check_equal_numpy
(
x
,
y
):
def
check_equal_numpy
(
x
,
y
):
"""
"""
Returns True iff x and y are equal (checks the dtype and
Return True iff x and y are equal.
shape if x and y are numpy.ndarray instances).
Checks the dtype and shape if x and y are numpy.ndarray instances.
"""
"""
if
isinstance
(
x
,
numpy
.
ndarray
)
and
isinstance
(
y
,
numpy
.
ndarray
):
if
isinstance
(
x
,
numpy
.
ndarray
)
and
isinstance
(
y
,
numpy
.
ndarray
):
return
(
x
.
dtype
==
y
.
dtype
and
x
.
shape
==
y
.
shape
and
return
(
x
.
dtype
==
y
.
dtype
and
x
.
shape
==
y
.
shape
and
...
@@ -89,6 +90,7 @@ def constructor(f):
...
@@ -89,6 +90,7 @@ def constructor(f):
Make `f` appear as a constructor in the oplist (`gen_oplist`,
Make `f` appear as a constructor in the oplist (`gen_oplist`,
doc/oplist.txt).
doc/oplist.txt).
"""
"""
__oplist_constructor_list
.
append
(
f
)
__oplist_constructor_list
.
append
(
f
)
return
f
return
f
...
@@ -107,8 +109,7 @@ if 0:
...
@@ -107,8 +109,7 @@ if 0:
# - JB 20100226
# - JB 20100226
def
as_cuda_or_tensor_variable
(
x
,
name
=
None
,
ndim
=
None
):
def
as_cuda_or_tensor_variable
(
x
,
name
=
None
,
ndim
=
None
):
"""
"""
This function do the same as_tensor_variable, but don't
Do the same as_tensor_variable, but do not transfer the value on the gpu.
transfert the value on the gpu
"""
"""
if
hasattr
(
x
,
'_as_CudaNdarrayVariable'
):
if
hasattr
(
x
,
'_as_CudaNdarrayVariable'
):
# TODO: pass name and ndim arguments
# TODO: pass name and ndim arguments
...
@@ -117,29 +118,31 @@ if 0:
...
@@ -117,29 +118,31 @@ if 0:
def
as_tensor_variable
(
x
,
name
=
None
,
ndim
=
None
):
def
as_tensor_variable
(
x
,
name
=
None
,
ndim
=
None
):
"""Return `x`, transformed into a `TensorType`
"""Return `x`, transformed into a `TensorType`.
This function is often used by `make_node` methods of `Op`
This function is often used by `make_node` methods of `Op` subclasses
subclasses to turn ndarrays, numbers, `Scalar` instances, `Apply`
to turn ndarrays, numbers, `Scalar` instances, `Apply` instances and
instances and `TensorType` instances into valid input list
`TensorType` instances into valid input list elements.
elements.
Parameters
:Parameters:
----------
- `x`: Apply instance, Variable instance, numpy.ndarray, or number
x : Apply instance, Variable instance, numpy.ndarray, or number
This thing will be transformed into a `Variable` in a sensible way. An
This thing will be transformed into a `Variable` in a sensible way. An
ndarray argument will not be copied, but a list of numbers will be
ndarray argument will not be copied, but a list of numbers will be
copied to make an ndarray.
copied to make an ndarray.
- `name`: str or None
name : str or None
If a new `Variable` instance is created, it will be named with this
If a new `Variable` instance is created, it will be named with this
string.
string.
- `ndim`: None or integer
ndim : None or integer
Return a Variable with this many dimensions. Raise TypeError if it's
Return a Variable with this many dimensions. Raise TypeError if it's
not possible.
not possible.
:Exceptions:
Raises
- `ValueError`: raised if an `Apply` with more then one output is fetched
------
- `AsTensorError`: raised if `x` cannot be converted to a TensorType
ValueError
Variable
If an `Apply` with more than one output is fetched.
AsTensorError
If `x` cannot be converted to a TensorType Variable.
"""
"""
if
hasattr
(
x
,
'_as_TensorVariable'
):
if
hasattr
(
x
,
'_as_TensorVariable'
):
...
@@ -231,16 +234,18 @@ class NumpyAutocaster(object):
...
@@ -231,16 +234,18 @@ class NumpyAutocaster(object):
float32);
float32);
- if no data type can represent `x` without loss of precision, then
- if no data type can represent `x` without loss of precision, then
the last data type in the tuple will be used.
the last data type in the tuple will be used.
Parameters
----------
dtypes: tuple of strings
The ordered list of preferred data types (only used when
`config.cast_policy` is set to 'custom', see the `NumpyAutocaster`
help for details).
"""
"""
def
__init__
(
self
,
dtypes
):
"""
Constructor.
:type dtypes: Tuple of strings.
def
__init__
(
self
,
dtypes
):
:param dtypes: The ordered list of preferred data types (only used when
`config.cast_policy` is set to 'custom', see the `NumpyAutocaster` help
for details).
"""
self
.
dtypes
=
tuple
(
dtypes
)
self
.
dtypes
=
tuple
(
dtypes
)
def
__call__
(
self
,
x
):
def
__call__
(
self
,
x
):
...
@@ -312,17 +317,20 @@ autocast_float = NumpyAutocaster(('float16', 'float32', 'float64'))
...
@@ -312,17 +317,20 @@ autocast_float = NumpyAutocaster(('float16', 'float32', 'float64'))
#
#
class
autocast_float_as
(
object
):
class
autocast_float_as
(
object
):
"""
"""
Temporarily adjust autocasting behavior.
This class makes it possible to temporarily and locally adjust autocasting
This class makes it possible to temporarily and locally adjust autocasting
behavior when `config.cast_policy` is set to 'custom'.
behavior when `config.cast_policy` is set to 'custom'.
If `config.cast_policy` is not 'custom', an exception is raised.
If `config.cast_policy` is not 'custom', an exception is raised.
This class might be convenient in some code, but it definitely
helps to test the autocasting mechanism.
For example:
Examples
--------
>>> with autocast_float_as('float32'):
>>> with autocast_float_as('float32'):
... assert (fvector() + 1.1).dtype == 'float32' # temporary downcasting
... assert (fvector() + 1.1).dtype == 'float32' # temporary downcasting
>>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour
>>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour
This class might be convenient in some code, but it definitely
helps to test the autocasting mechanism.
"""
"""
def
__init__
(
self
,
*
dtypes
):
def
__init__
(
self
,
*
dtypes
):
self
.
dtypes
=
dtypes
self
.
dtypes
=
dtypes
...
@@ -339,11 +347,14 @@ class autocast_float_as(object):
...
@@ -339,11 +347,14 @@ class autocast_float_as(object):
def
constant_or_value
(
x
,
rtype
,
name
=
None
,
ndim
=
None
,
dtype
=
None
):
def
constant_or_value
(
x
,
rtype
,
name
=
None
,
ndim
=
None
,
dtype
=
None
):
"""Return a symbolic `Constant` with value `x`
"""Return a symbolic `Constant` with value `x`
.
:Exceptions:
Raises
- `TypeError`: `x` could not be converted to a numpy.ndarray
------
- `ValueError`: `x` could not be expanded to have ndim dimensions
TypeError
`x` could not be converted to a numpy.ndarray.
ValueError
`x` could not be expanded to have ndim dimensions.
"""
"""
if
dtype
is
not
None
:
if
dtype
is
not
None
:
...
@@ -507,8 +518,13 @@ class EmptyConstantError(NotScalarConstantError):
...
@@ -507,8 +518,13 @@ class EmptyConstantError(NotScalarConstantError):
def
numpy_scalar
(
data
):
def
numpy_scalar
(
data
):
""" Return a scalar stored in a numpy ndarray, or raise
""" Return a scalar stored in a numpy ndarray.
NotScalarConstantError if the numpy ndarray is not a scalar
Raises
------
NotScalarConstantError
If the numpy ndarray is not a scalar.
"""
"""
# handle case where data is numpy.array([])
# handle case where data is numpy.array([])
...
@@ -536,26 +552,29 @@ get_scalar_constant_value_elemwises = (
...
@@ -536,26 +552,29 @@ get_scalar_constant_value_elemwises = (
def
get_scalar_constant_value
(
orig_v
,
elemwise
=
True
,
def
get_scalar_constant_value
(
orig_v
,
elemwise
=
True
,
only_process_constants
=
False
):
only_process_constants
=
False
):
"""
return the constant scalar(0-D) value underlying variable `v`
"""
Return the constant scalar(0-D) value underlying variable `v`.
If v is the output of dimshuffles, fills, allocs, rebroadcasts,
If `v` is the output of dimshuffles, fills, allocs, rebroadcasts,
cast, OutputGuard, DeepCopyOp, ScalarFromTensor, ScalarOp,
cast, OutputGuard, DeepCopyOp, ScalarFromTensor, ScalarOp, Elemwise
Elemwise and some pattern with Subtensor,
and some pattern with Subtensor, this function digs through them.
this function digs through them.
If `v` is not some view of constant scalar data, then raise a
If `v` is not some view of constant scalar data, then raise a
NotScalarConstantError.
NotScalarConstantError.
:param elemwise: If False, we won't try to go into elemwise.
Parameters
So this call is faster.
----------
elemwise : bool
If False, we won't try to go into elemwise. So this call is faster.
only_process_constants : bool
If True, we only attempt to obtain the value of `orig_v` if it's
directly constant and don't try to dig through dimshuffles, fills,
allocs, and other to figure out its value.
:param only_process_constants: If True, we only attempt to obtain
Notes
the value of `orig_v` if it's directly constant and don't
-----
try to dig through dimshuffles, fills, allocs, and other to figure
There may be another function similar to this one in the code,
out its value
.
but I'm not sure where it is
.
:note: There may be another function similar to this one in the
code, but I'm not sure where it is.
"""
"""
v
=
orig_v
v
=
orig_v
while
True
:
while
True
:
...
@@ -773,8 +792,14 @@ lscalar = TensorType('int64', ())
...
@@ -773,8 +792,14 @@ lscalar = TensorType('int64', ())
def
scalar
(
name
=
None
,
dtype
=
None
):
def
scalar
(
name
=
None
,
dtype
=
None
):
"""Return a symbolic scalar variable.
"""Return a symbolic scalar variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
Parameters
----------
dtype: numeric
None means to use theano.config.floatX.
name
A name to attach to this variable.
"""
"""
if
dtype
is
None
:
if
dtype
is
None
:
dtype
=
config
.
floatX
dtype
=
config
.
floatX
...
@@ -803,8 +828,14 @@ lvector = TensorType('int64', (False, ))
...
@@ -803,8 +828,14 @@ lvector = TensorType('int64', (False, ))
def
vector
(
name
=
None
,
dtype
=
None
):
def
vector
(
name
=
None
,
dtype
=
None
):
"""Return a symbolic vector variable.
"""Return a symbolic vector variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
Parameters
----------
dtype: numeric
None means to use theano.config.floatX.
name
A name to attach to this variable
"""
"""
if
dtype
is
None
:
if
dtype
is
None
:
dtype
=
config
.
floatX
dtype
=
config
.
floatX
...
@@ -830,8 +861,14 @@ lmatrix = TensorType('int64', (False, False))
...
@@ -830,8 +861,14 @@ lmatrix = TensorType('int64', (False, False))
def
matrix
(
name
=
None
,
dtype
=
None
):
def
matrix
(
name
=
None
,
dtype
=
None
):
"""Return a symbolic matrix variable.
"""Return a symbolic matrix variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
Parameters
----------
dtype: numeric
None means to use theano.config.floatX.
name
A name to attach to this variable.
"""
"""
if
dtype
is
None
:
if
dtype
is
None
:
dtype
=
config
.
floatX
dtype
=
config
.
floatX
...
@@ -857,8 +894,14 @@ lrow = TensorType('int64', (True, False))
...
@@ -857,8 +894,14 @@ lrow = TensorType('int64', (True, False))
def
row
(
name
=
None
,
dtype
=
None
):
def
row
(
name
=
None
,
dtype
=
None
):
"""Return a symbolic row variable (ndim=2, broadcastable=[True,False]).
"""Return a symbolic row variable (ndim=2, broadcastable=[True,False]).
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
Parameters
----------
dtype: numeric type
None means to use theano.config.floatX.
name
A name to attach to this variable.
"""
"""
if
dtype
is
None
:
if
dtype
is
None
:
dtype
=
config
.
floatX
dtype
=
config
.
floatX
...
@@ -878,8 +921,14 @@ lcol = TensorType('int64', (False, True))
...
@@ -878,8 +921,14 @@ lcol = TensorType('int64', (False, True))
def
col
(
name
=
None
,
dtype
=
None
):
def
col
(
name
=
None
,
dtype
=
None
):
"""Return a symbolic column variable (ndim=2, broadcastable=[False,True]).
"""Return a symbolic column variable (ndim=2, broadcastable=[False,True]).
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
Parameters
----------
dtype : numeric
None means to use theano.config.floatX.
name
A name to attach to this variable.
"""
"""
if
dtype
is
None
:
if
dtype
is
None
:
dtype
=
config
.
floatX
dtype
=
config
.
floatX
...
@@ -899,8 +948,14 @@ ltensor3 = TensorType('int64', ((False,) * 3))
...
@@ -899,8 +948,14 @@ ltensor3 = TensorType('int64', ((False,) * 3))
def
tensor3
(
name
=
None
,
dtype
=
None
):
def
tensor3
(
name
=
None
,
dtype
=
None
):
"""Return a symbolic 3-D variable.
"""Return a symbolic 3-D variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
Parameters
----------
dtype: numeric type
None means to use theano.config.floatX.
name
A name to attach to this variable.
"""
"""
if
dtype
is
None
:
if
dtype
is
None
:
dtype
=
config
.
floatX
dtype
=
config
.
floatX
...
@@ -922,8 +977,14 @@ ltensor4 = TensorType('int64', ((False,) * 4))
...
@@ -922,8 +977,14 @@ ltensor4 = TensorType('int64', ((False,) * 4))
def
tensor4
(
name
=
None
,
dtype
=
None
):
def
tensor4
(
name
=
None
,
dtype
=
None
):
"""Return a symbolic 4-D variable.
"""Return a symbolic 4-D variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
Parameters
----------
dtype: numeric type
None means to use theano.config.floatX.
name
A name to attach to this variable.
"""
"""
if
dtype
is
None
:
if
dtype
is
None
:
dtype
=
config
.
floatX
dtype
=
config
.
floatX
...
@@ -957,6 +1018,7 @@ def _scal_elemwise_with_nfunc(nfunc, nin, nout):
...
@@ -957,6 +1018,7 @@ def _scal_elemwise_with_nfunc(nfunc, nin, nout):
**destination** inputs it takes. That is, the function should
**destination** inputs it takes. That is, the function should
take nin+nout inputs. nout == 0 means that the numpy function
take nin+nout inputs. nout == 0 means that the numpy function
does not take a numpy array argument to put its result in.
does not take a numpy array argument to put its result in.
"""
"""
def
construct
(
symbol
):
def
construct
(
symbol
):
symbolname
=
symbol
.
__name__
symbolname
=
symbol
.
__name__
...
@@ -1183,7 +1245,9 @@ def cast(x, dtype):
...
@@ -1183,7 +1245,9 @@ def cast(x, dtype):
class
MaxAndArgmax
(
Op
):
class
MaxAndArgmax
(
Op
):
"""Calculate the max and argmax over a given axis or over all axes.
"""
Calculate the max and argmax over a given axis or over all axes.
"""
"""
nin
=
2
# tensor, axis
nin
=
2
# tensor, axis
nout
=
2
# max val, max idx
nout
=
2
# max val, max idx
...
@@ -1418,6 +1482,7 @@ def makeKeepDims(x, y, axis):
...
@@ -1418,6 +1482,7 @@ def makeKeepDims(x, y, axis):
Reintroduces in y with length one the axes of x which have been left out
Reintroduces in y with length one the axes of x which have been left out
in a prior reduction of x. With this option, the resulting tensor will
in a prior reduction of x. With this option, the resulting tensor will
broadcast correctly against the original tensor x.
broadcast correctly against the original tensor x.
"""
"""
x
=
as_tensor_variable
(
x
)
x
=
as_tensor_variable
(
x
)
y
=
as_tensor_variable
(
y
)
y
=
as_tensor_variable
(
y
)
...
@@ -1453,14 +1518,18 @@ def makeKeepDims(x, y, axis):
...
@@ -1453,14 +1518,18 @@ def makeKeepDims(x, y, axis):
def
max_and_argmax
(
a
,
axis
=
None
,
keepdims
=
False
):
def
max_and_argmax
(
a
,
axis
=
None
,
keepdims
=
False
):
"""
"""
Returns maximum elements and their indices obtained by iterating over
Returns maximum elements and their indices obtained by iterating over
given axis
given axis
.
When axis is None (the default value), the max is performed
When axis is None (the default value), the max is performed
over the flattened tensor.
over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in
Parameters
----------
keepdims : bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result
the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor.
will broadcast correctly against the original tensor.
"""
"""
out
,
argout
=
_max_and_argmax
(
a
,
axis
)
out
,
argout
=
_max_and_argmax
(
a
,
axis
)
...
@@ -1474,16 +1543,22 @@ def max_and_argmax(a, axis=None, keepdims=False):
...
@@ -1474,16 +1543,22 @@ def max_and_argmax(a, axis=None, keepdims=False):
@constructor
@constructor
def
max
(
x
,
axis
=
None
,
keepdims
=
False
):
def
max
(
x
,
axis
=
None
,
keepdims
=
False
):
"""
"""
Returns maximum elements obtained by iterating over given axis
Returns maximum elements obtained by iterating over given axis
.
When axis is None (the default value), the max is performed
When axis is None (the default value), the max is performed
over the flattened tensor.
over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in
Parameters
----------
keepdims: bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result
the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor.
will broadcast correctly against the original tensor.
:note: we return an error as numpy when we reduce a dim with a shape of 0
Notes
-----
We return an error as numpy when we reduce a dim with a shape of 0.
"""
"""
# We have a choice of implementing this call with the
# We have a choice of implementing this call with the
...
@@ -1511,14 +1586,18 @@ def max(x, axis=None, keepdims=False):
...
@@ -1511,14 +1586,18 @@ def max(x, axis=None, keepdims=False):
@constructor
@constructor
def
argmax
(
x
,
axis
=
None
,
keepdims
=
False
):
def
argmax
(
x
,
axis
=
None
,
keepdims
=
False
):
"""
"""
Returns indices of maximum elements obtained by iterating over given axis
Returns indices of maximum elements obtained by iterating over given axis
.
When axis is None (the default value), the argmax is performed
When axis is None (the default value), the argmax is performed
over the flattened tensor.
over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in
Parameters
----------
keepdims : bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result
the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor.
will broadcast correctly against the original tensor.
"""
"""
# In python (using MaxAndArgmax.perform()) this leads to a wasteful
# In python (using MaxAndArgmax.perform()) this leads to a wasteful
# implementation that goes through the data twice instead of once
# implementation that goes through the data twice instead of once
...
@@ -1534,14 +1613,18 @@ def argmax(x, axis=None, keepdims=False):
...
@@ -1534,14 +1613,18 @@ def argmax(x, axis=None, keepdims=False):
@constructor
@constructor
def
min
(
x
,
axis
=
None
,
keepdims
=
False
):
def
min
(
x
,
axis
=
None
,
keepdims
=
False
):
"""
"""
Returns minimum elements obtained by iterating over given axis
Returns minimum elements obtained by iterating over given axis
.
When axis is None (the default value), the min is performed
When axis is None (the default value), the min is performed
over the flattened tensor.
over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in
Parameters
----------
keepdims: bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result
the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor.
will broadcast correctly against the original tensor.
"""
"""
x
=
as_tensor_variable
(
x
)
x
=
as_tensor_variable
(
x
)
str_x_type
=
str
(
x
.
dtype
)
str_x_type
=
str
(
x
.
dtype
)
...
@@ -1555,14 +1638,18 @@ def min(x, axis=None, keepdims=False):
...
@@ -1555,14 +1638,18 @@ def min(x, axis=None, keepdims=False):
@constructor
@constructor
def
argmin
(
x
,
axis
=
None
,
keepdims
=
False
):
def
argmin
(
x
,
axis
=
None
,
keepdims
=
False
):
"""
"""
Returns indices of minimum elements obtained by iterating over given axis
Returns indices of minimum elements obtained by iterating over given axis
.
When axis is None (the default value), the argmin is performed
When axis is None (the default value), the argmin is performed
over the flattened tensor.
over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in
Parameters
----------
keepdims: bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result
the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor.
will broadcast correctly against the original tensor.
"""
"""
x
=
as_tensor_variable
(
x
)
x
=
as_tensor_variable
(
x
)
str_x_type
=
str
(
x
.
dtype
)
str_x_type
=
str
(
x
.
dtype
)
...
@@ -1579,6 +1666,7 @@ def smallest(*args):
...
@@ -1579,6 +1666,7 @@ def smallest(*args):
Return the [elementwise] smallest of a variable number of arguments.
Return the [elementwise] smallest of a variable number of arguments.
Like python's min.
Like python's min.
"""
"""
if
len
(
args
)
==
2
:
if
len
(
args
)
==
2
:
a
,
b
=
args
a
,
b
=
args
...
@@ -1593,6 +1681,7 @@ def largest(*args):
...
@@ -1593,6 +1681,7 @@ def largest(*args):
Return the [elementwise] largest of a variable number of arguments.
Return the [elementwise] largest of a variable number of arguments.
Like python's max.
Like python's max.
"""
"""
if
len
(
args
)
==
2
:
if
len
(
args
)
==
2
:
a
,
b
=
args
a
,
b
=
args
...
@@ -1647,31 +1736,34 @@ def isinf(a):
...
@@ -1647,31 +1736,34 @@ def isinf(a):
def
allclose
(
a
,
b
,
rtol
=
1.e-5
,
atol
=
1.e-8
,
equal_nan
=
False
):
def
allclose
(
a
,
b
,
rtol
=
1.e-5
,
atol
=
1.e-8
,
equal_nan
=
False
):
"""
"""
Implement
s
Numpy's ``allclose`` on tensors.
Implement Numpy's ``allclose`` on tensors.
``absolute(a - b) <= (atol + rtol * absolute(b))``
``absolute(a - b) <= (atol + rtol * absolute(b))``
:note: Not a symmetric equation. See Numpy's documentation.
Parameters
----------
:param a: input to compare
a : tensor
:type a: tensor
Input to compare.
b : tensor
:param b: input to compare
Input to compare.
:type b: tensor
rtol : float
The relative tolerance parameter.
:param rtol: the relative tolerance parameter
atol : float
:type rtol: float
The absolute tolerance parameter.
equal_nan: bool
Whether to consider nan's in the same place to be close.
:param atol: the absolute tolerance parameter
Returns
:type atol: float
-------
bool
A boolean value (of type int8 returned by the tensor elementwise `all`
function) whether all elements in a and b are in the tolerance range
defined above.
:param equal_nan: whether to consider nan's in the same place to be close
Notes
:type equal_nan: bool
-----
Not a symmetric equation. See Numpy's documentation.
:returns: a boolean value (of type int8 returned by the tensor
elementwise `all` function) whether all elements in a and b are in
the tolerance range defined above.
:rtype: int8
"""
"""
return
all
(
isclose
(
a
,
b
,
rtol
,
atol
,
equal_nan
))
return
all
(
isclose
(
a
,
b
,
rtol
,
atol
,
equal_nan
))
...
@@ -1680,34 +1772,38 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
...
@@ -1680,34 +1772,38 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
"""
"""
Implements Numpy's ``isclose`` on tensors.
Implements Numpy's ``isclose`` on tensors.
The tolerance values are positive, typically very small numbers.
The
The tolerance values are positive, typically very small numbers. The
relative difference (`rtol` * abs(`b`)) and the absolute difference
relative difference (`rtol` * abs(`b`)) and the absolute difference
`atol` are added together to compare against the absolute difference
`atol` are added together to compare against the absolute difference
between `a` and `b`.
between `a` and `b`.
``absolute(a - b) <= (atol + rtol * absolute(b))``
``absolute(a - b) <= (atol + rtol * absolute(b))``
:note: Not a symmetric equation. See Numpy's documentation.
Parameters
----------
:param a: input to compare
a : tensor
:type a: tensor
Input to compare.
b : tensor
:param b: input to compare
Input to compare.
:type b: tensor
rtol : float
The relative tolerance parameter.
:param rtol: the relative tolerance parameter
atol : float
:type rtol: float
The absolute tolerance parameter.
equal_nan : bool
:param atol: the absolute tolerance parameter
Whether to consider nan's in the same place to be close
:type atol: float
:param equal_nan: whether to consider nan's in the same place to be close
Returns
:type equal_nan: bool
-------
int8
A boolean (int8) array where two arrays are element-wise equal
within a tolerance.
:returns: returns a boolean (int8) array where two arrays are element-wise
Notes
equal within a tolerance.
-----
:rtype: int8
Not a symmetric equation. See Numpy's documentation.
Examples
--------
>>> import theano
>>> import theano
>>> import numpy as np
>>> import numpy as np
>>> a = theano._asarray([1e10, 1e-7], dtype="float64")
>>> a = theano._asarray([1e10, 1e-7], dtype="float64")
...
@@ -1738,6 +1834,7 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
...
@@ -1738,6 +1834,7 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
>>> b = theano._asarray([1.0, np.inf], dtype="float64")
>>> b = theano._asarray([1.0, np.inf], dtype="float64")
>>> theano.tensor.isclose(a, b).eval()
>>> theano.tensor.isclose(a, b).eval()
array([1, 1], dtype==int8)
array([1, 1], dtype==int8)
"""
"""
# close will be an int8 array of 1 where within tolerance
# close will be an int8 array of 1 where within tolerance
# and 0 where not within tolerance or there was a nan or inf value.
# and 0 where not within tolerance or there was a nan or inf value.
...
@@ -2164,8 +2261,8 @@ class Nonzero(gof.Op):
...
@@ -2164,8 +2261,8 @@ class Nonzero(gof.Op):
Returns
Returns
-------
-------
result :
matrix
matrix
m
atrix containing the indices of the non-zero elements of a.
M
atrix containing the indices of the non-zero elements of a.
See Also
See Also
--------
--------
...
@@ -2220,14 +2317,13 @@ def nonzero(a, return_matrix=False):
...
@@ -2220,14 +2317,13 @@ def nonzero(a, return_matrix=False):
----------
----------
a : array_like
a : array_like
Input array.
Input array.
return_matrix : bool
return_matrix : bool
If True, returns a symbolic matrix. If False, returns a tuple of
If True, returns a symbolic matrix. If False, returns a tuple of
arrays. Defaults to False.
arrays. Defaults to False.
Returns
Returns
-------
-------
result :
tuple of vectors or matrix
tuple of vectors or matrix
See Also
See Also
--------
--------
...
@@ -2260,7 +2356,7 @@ def flatnonzero(a):
...
@@ -2260,7 +2356,7 @@ def flatnonzero(a):
Returns
Returns
-------
-------
res :
vector
vector
Output vector, containing the indices of the elements of `a.flatten()`
Output vector, containing the indices of the elements of `a.flatten()`
that are non-zero.
that are non-zero.
...
@@ -2268,6 +2364,7 @@ def flatnonzero(a):
...
@@ -2268,6 +2364,7 @@ def flatnonzero(a):
--------
--------
nonzero : Return the indices of the non-zero elements of the input array.
nonzero : Return the indices of the non-zero elements of the input array.
nonzero_values : Return the non-zero elements of the input array
nonzero_values : Return the non-zero elements of the input array
"""
"""
if
a
.
ndim
==
0
:
if
a
.
ndim
==
0
:
raise
ValueError
(
'Nonzero only supports non-scalar arrays.'
)
raise
ValueError
(
'Nonzero only supports non-scalar arrays.'
)
...
@@ -2299,7 +2396,7 @@ def nonzero_values(a):
...
@@ -2299,7 +2396,7 @@ def nonzero_values(a):
Returns
Returns
-------
-------
res :
vector
vector
Output vector, containing the non-zero elements of a.
Output vector, containing the non-zero elements of a.
See Also
See Also
...
@@ -2307,6 +2404,7 @@ def nonzero_values(a):
...
@@ -2307,6 +2404,7 @@ def nonzero_values(a):
nonzero : Return the indices of the non-zero elements of the input array.
nonzero : Return the indices of the non-zero elements of the input array.
flatnonzero : Return the indices of the non-zero elements of the
flatnonzero : Return the indices of the non-zero elements of the
flattened input array.
flattened input array.
"""
"""
return
a
.
flatten
()[
flatnonzero
(
a
)]
return
a
.
flatten
()[
flatnonzero
(
a
)]
...
@@ -2362,9 +2460,10 @@ def tri(N, M=None, k=0, dtype=None):
...
@@ -2362,9 +2460,10 @@ def tri(N, M=None, k=0, dtype=None):
Returns
Returns
-------
-------
tri :
Array of shape (N, M)
Array of shape (N, M)
Array with its lower triangle filled with ones and zero elsewhere;
Array with its lower triangle filled with ones and zero elsewhere;
in other words ``T[i,j] == 1`` for ``i <= j + k``, 0 otherwise.
in other words ``T[i,j] == 1`` for ``i <= j + k``, 0 otherwise.
"""
"""
if
dtype
is
None
:
if
dtype
is
None
:
dtype
=
config
.
floatX
dtype
=
config
.
floatX
...
@@ -2390,12 +2489,13 @@ def tril(m, k=0):
...
@@ -2390,12 +2489,13 @@ def tril(m, k=0):
Returns
Returns
-------
-------
tril :
array, shape (M, N)
array, shape (M, N)
Lower triangle of `m`, of same shape and data-type as `m`.
Lower triangle of `m`, of same shape and data-type as `m`.
See Also
See Also
--------
--------
triu : same thing, only for the upper triangle
triu : Same thing, only for the upper triangle.
"""
"""
return
m
*
tri
(
m
.
shape
[
0
],
m
.
shape
[
1
],
k
=
k
,
dtype
=
m
.
dtype
)
return
m
*
tri
(
m
.
shape
[
0
],
m
.
shape
[
1
],
k
=
k
,
dtype
=
m
.
dtype
)
...
@@ -2411,7 +2511,8 @@ def triu(m, k=0):
...
@@ -2411,7 +2511,8 @@ def triu(m, k=0):
See Also
See Also
--------
--------
tril : lower triangle of an array
tril : Lower triangle of an array.
"""
"""
return
m
*
(
1
-
tri
(
m
.
shape
[
0
],
m
.
shape
[
1
],
k
=
k
-
1
,
dtype
=
m
.
dtype
))
return
m
*
(
1
-
tri
(
m
.
shape
[
0
],
m
.
shape
[
1
],
k
=
k
-
1
,
dtype
=
m
.
dtype
))
...
@@ -2456,21 +2557,22 @@ def eye(n, m=None, k=0, dtype=None):
...
@@ -2456,21 +2557,22 @@ def eye(n, m=None, k=0, dtype=None):
Parameters
Parameters
----------
----------
n : int
n : int
Number of rows in the output.
Number of rows in the output.
m : int, optional
m : int, optional
Number of columns in the output. If None, defaults to `N`.
Number of columns in the output. If None, defaults to `N`.
k : int, optional
k : int, optional
Index of the diagonal: 0 (the default) refers to the main diagonal,
Index of the diagonal: 0 (the default) refers to the main diagonal,
a positive value refers to an upper diagonal, and a negative value
a positive value refers to an upper diagonal, and a negative value
to a lower diagonal.
to a lower diagonal.
dtype : data-type, optional
dtype : data-type, optional
Data-type of the returned array.
Data-type of the returned array.
Returns
Returns
-------
-------
I : ndarray of shape (N,M)
ndarray of shape (N,M)
An array where all elements are equal to zero, except for the `k`-th
An array where all elements are equal to zero, except for the `k`-th
diagonal, whose values are equal to one.
diagonal, whose values are equal to one.
"""
"""
if
dtype
is
None
:
if
dtype
is
None
:
dtype
=
config
.
floatX
dtype
=
config
.
floatX
...
@@ -2485,7 +2587,7 @@ def identity_like(x):
...
@@ -2485,7 +2587,7 @@ def identity_like(x):
class
Alloc
(
gof
.
Op
):
class
Alloc
(
gof
.
Op
):
"""Create a Tensor from an initial value and a desired shape
"""Create a Tensor from an initial value and a desired shape
.
alloc(value, shape0, shape1, ..., shapeN)
alloc(value, shape0, shape1, ..., shapeN)
...
@@ -2500,6 +2602,7 @@ class Alloc(gof.Op):
...
@@ -2500,6 +2602,7 @@ class Alloc(gof.Op):
This Op is used to replace fill() during optimizations because after shapes
This Op is used to replace fill() during optimizations because after shapes
are lifted, the first argument to fill can often be pruned from the graph.
are lifted, the first argument to fill can often be pruned from the graph.
"""
"""
__props__
=
()
__props__
=
()
...
@@ -2642,6 +2745,7 @@ class Alloc(gof.Op):
...
@@ -2642,6 +2745,7 @@ class Alloc(gof.Op):
for size mismatches.
for size mismatches.
If you always want an Alloc node, call make_node.
If you always want an Alloc node, call make_node.
"""
"""
ret
=
super
(
Alloc
,
self
)
.
__call__
(
val
,
*
shapes
,
**
kwargs
)
ret
=
super
(
Alloc
,
self
)
.
__call__
(
val
,
*
shapes
,
**
kwargs
)
try
:
try
:
...
@@ -2709,18 +2813,22 @@ pprint.assign(tensor_copy, printing.IgnorePrinter())
...
@@ -2709,18 +2813,22 @@ pprint.assign(tensor_copy, printing.IgnorePrinter())
@constructor
@constructor
def
sum
(
input
,
axis
=
None
,
dtype
=
None
,
keepdims
=
False
,
acc_dtype
=
None
):
def
sum
(
input
,
axis
=
None
,
dtype
=
None
,
keepdims
=
False
,
acc_dtype
=
None
):
"""
"""
Computes the sum along the given axis(es) of a tensor `input`
Computes the sum along the given axis(es) of a tensor `input`
.
When axis is None (the default value), the sum is performed
When axis is None (the default value), the sum is performed
over the flattened tensor.
over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor.
For full documentation see ``tensor.elemwise.Sum``.
For full documentation see ``tensor.elemwise.Sum``.
In particular please pay attention to the important warning when using
In particular please pay attention to the important warning when using
a custom acc_dtype.
a custom acc_dtype.
Parameters
----------
keepdims: bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor.
"""
"""
out
=
elemwise
.
Sum
(
axis
=
axis
,
dtype
=
dtype
,
acc_dtype
=
acc_dtype
)(
input
)
out
=
elemwise
.
Sum
(
axis
=
axis
,
dtype
=
dtype
,
acc_dtype
=
acc_dtype
)(
input
)
...
@@ -2736,16 +2844,20 @@ pprint.assign(Sum(), printing.FunctionPrinter('sum'))
...
@@ -2736,16 +2844,20 @@ pprint.assign(Sum(), printing.FunctionPrinter('sum'))
def
prod
(
input
,
axis
=
None
,
dtype
=
None
,
keepdims
=
False
,
acc_dtype
=
None
,
def
prod
(
input
,
axis
=
None
,
dtype
=
None
,
keepdims
=
False
,
acc_dtype
=
None
,
no_zeros_in_input
=
False
):
no_zeros_in_input
=
False
):
"""
"""
Computes the product along the given axis(es) of a tensor `input`
Computes the product along the given axis(es) of a tensor `input`
.
When axis is None (the default value), the product is performed
When axis is None (the default value), the product is performed
over the flattened tensor.
over the flattened tensor.
keepdims: If this is set to True, the axes which are reduced are left in
For full documentation see ``tensor.elemwise.Prod``.
Parameters
----------
keepdims: bool
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the result
the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor.
will broadcast correctly against the original tensor.
For full documentation see ``tensor.elemwise.Prod``.
"""
"""
out
=
elemwise
.
Prod
(
axis
,
dtype
=
dtype
,
acc_dtype
=
acc_dtype
,
out
=
elemwise
.
Prod
(
axis
,
dtype
=
dtype
,
acc_dtype
=
acc_dtype
,
...
@@ -2803,31 +2915,32 @@ class Mean(elemwise.CAReduce):
...
@@ -2803,31 +2915,32 @@ class Mean(elemwise.CAReduce):
def
mean
(
input
,
axis
=
None
,
dtype
=
None
,
op
=
False
,
keepdims
=
False
,
def
mean
(
input
,
axis
=
None
,
dtype
=
None
,
op
=
False
,
keepdims
=
False
,
acc_dtype
=
None
):
acc_dtype
=
None
):
"""
"""
Computes the mean value along the given axis(es) of a tensor `input`
Computes the mean value along the given axis(es) of a tensor `input`
.
:param axis: compute the mean along this axis of the tensor.
Parameters
None means all axes (like numpy).
----------
:type axis: None or int or (list of int) (see `Sum`)
axis : None or int or (list of int) (see `Sum`)
Compute the mean along this axis of the tensor.
:param dtype: dtype to cast the result of the inner summation into.
None means all axes (like numpy).
dtype: None or string
Dtype to cast the result of the inner summation into.
For instance, by default, a sum of a float32 tensor will be
For instance, by default, a sum of a float32 tensor will be
done in float64 (acc_dtype would be float64 by default),
done in float64 (acc_dtype would be float64 by default),
but that result will be casted back in float32.
but that result will be casted back in float32.
:type dtype: None or string
keepdims: bool
If this is set to True, the axes which are reduced are
:param keepdims: If this is set to True, the axes which are reduced are
left in the result as dimensions with size one. With this option,
left in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original tensor.
the result will broadcast correctly against the original tensor.
acc_dtype: None or string
Dtype to use for the inner summation. This will not
necessarily be the dtype of the output (in particular
if it is a discrete (int/uint) dtype, the output will
be in a float type). If None, then we use the same rules as `sum()`.
:param acc_dtype: dtype to use for the inner summation. This will not
Notes
necessarily be the dtype of the output (in particular
-----
if it is a discrete (int/uint) dtype, the output will
For gpu, if you specify dtype=float32, everything will be done on the gpu.
be in a float type).
If None, then we use the same rules as `sum()`.
:type acc_dtype: None or string
:note: for gpu, if you specify dtype=float32, everything will be done
on the gpu.
"""
"""
if
op
:
if
op
:
...
@@ -2896,18 +3009,23 @@ def var(input, axis=None, keepdims=False):
...
@@ -2896,18 +3009,23 @@ def var(input, axis=None, keepdims=False):
"""
"""
Computes the variance along the given axis(es) of a tensor `input`.
Computes the variance along the given axis(es) of a tensor `input`.
:param axis: Compute the variance along this axis of the tensor.
Parameters
None means all axes (like numpy).
----------
:type axis: None or int or (list of int) (see `Sum`)
axis: None or int or (list of int) (see `Sum`)
Compute the variance along this axis of the tensor.
:param keepdims: If this is set to True, the axes which are reduced are
None means all axes (like numpy).
keepdims : bool
If this is set to True, the axes which are reduced are
left in the result as dimensions with size one. With this option,
left in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original tensor.
the result will broadcast correctly against the original tensor.
:note: It uses the two-pass algorithm for more stable results.
Notes
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
-----
There exist other implementations that are even more stable, but
It uses the two-pass algorithm for more stable results.
probably slower.
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
There exist other implementations that are even more stable, but probably
slower.
"""
"""
input_ndim
=
input
.
type
.
ndim
input_ndim
=
input
.
type
.
ndim
...
@@ -2933,26 +3051,26 @@ def var(input, axis=None, keepdims=False):
...
@@ -2933,26 +3051,26 @@ def var(input, axis=None, keepdims=False):
@constructor
@constructor
def
std
(
input
,
axis
=
None
,
keepdims
=
False
):
def
std
(
input
,
axis
=
None
,
keepdims
=
False
):
"""
"""
Computes the standard deviation along the given axis(es)
Computes the standard deviation along the given axis(es) of a tensor `input`.
of a tensor `input`.
:param axis: Compute the standard deviation along this
Parameters
axis of the tensor.
----------
None means all axes (like numpy).
axis : None or int or (list of int) (see `Sum`)
:type axis: None or int or (list of int) (see `Sum`)
Compute the standard deviation along this axis of the tensor.
None means all axes (like numpy).
keepdims : bool
If this is set to True, the axes which are reduced are left in the
result as dimensions with size one. With this option, the result will
broadcast correctly against the original tensor.
:param keepdims: If this is set to True, the axes
Notes
which are reduced are
-----
left in the result as dimensions with size one.
It calls `var()` and `var()` uses the two-pass algorithm for more stable
With this option,
results.
the result will broadcast correctly against the
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
original tensor.
There exist other implementations that are even more stable, but probably
slower.
:note: It calls `var()` and `var()` uses the two-pass algorithm for more
stable results.
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
There exist other implementations that are even more stable, but
probably slower.
"""
"""
return
sqrt
(
var
(
input
=
input
,
axis
=
axis
,
keepdims
=
keepdims
))
return
sqrt
(
var
(
input
=
input
,
axis
=
axis
,
keepdims
=
keepdims
))
...
@@ -2960,10 +3078,12 @@ def std(input, axis=None, keepdims=False):
...
@@ -2960,10 +3078,12 @@ def std(input, axis=None, keepdims=False):
class
Default
(
gof
.
Op
):
class
Default
(
gof
.
Op
):
"""
"""
Takes an input x and a default value. If the input is not None, a
Takes an input x and a default value.
reference to it is returned. If the input is None, a copy of the
default value is returned instead. The input and the default must
If the input is not None, a reference to it is returned.
have exactly the same type.
If the input is None, a copy of the default value is returned instead.
The input and the default must have exactly the same type.
"""
"""
view_map
=
{
0
:
[
0
]}
view_map
=
{
0
:
[
0
]}
__props__
=
()
__props__
=
()
...
@@ -2994,15 +3114,13 @@ setdefault = default # legacy
...
@@ -2994,15 +3114,13 @@ setdefault = default # legacy
##########################
##########################
@_scal_elemwise_with_nfunc
(
'maximum'
,
2
,
1
)
@_scal_elemwise_with_nfunc
(
'maximum'
,
2
,
1
)
def
maximum
(
x
,
y
):
def
maximum
(
x
,
y
):
"""elemwise maximum. See max for the maximum in one tensor
"""elemwise maximum. See max for the maximum in one tensor"""
"""
# see decorator for function body
# see decorator for function body
@_scal_elemwise_with_nfunc
(
'minimum'
,
2
,
1
)
@_scal_elemwise_with_nfunc
(
'minimum'
,
2
,
1
)
def
minimum
(
x
,
y
):
def
minimum
(
x
,
y
):
"""elemwise minimum. See min for the minimum in one tensor
"""elemwise minimum. See min for the minimum in one tensor"""
"""
# see decorator for function body
# see decorator for function body
...
@@ -3058,6 +3176,7 @@ def ceil_intdiv(a, b):
...
@@ -3058,6 +3176,7 @@ def ceil_intdiv(a, b):
Safely compute ceil(float_division(a, b)).
Safely compute ceil(float_division(a, b)).
Works for all dtypes, but mostly useful when a and b are int.
Works for all dtypes, but mostly useful when a and b are int.
"""
"""
# If a and b are int with not many significant bits, we could
# If a and b are int with not many significant bits, we could
# cast them to float to avoid doing the modulo. We do not know if this
# cast them to float to avoid doing the modulo. We do not know if this
...
@@ -3099,13 +3218,17 @@ def pow(a, b):
...
@@ -3099,13 +3218,17 @@ def pow(a, b):
# So we do not use @scal_elemwise_with_nfunc('clip', 3, 1)
# So we do not use @scal_elemwise_with_nfunc('clip', 3, 1)
@_scal_elemwise
@_scal_elemwise
def
clip
(
x
,
min
,
max
):
def
clip
(
x
,
min
,
max
):
"""clip x to be between min and max.
"""
Clip x to be between min and max.
Notes
-----
When `x` is equal to the boundaries, the output is considered
to be `x`, so at these points, the gradient of the cost wrt the output
will be propagated to `x`, not to `min` nor `max`. In other words,
on these points, the gradient wrt `x` will be equal to the gradient wrt
the output, and the gradient wrt `min` and `max` will be zero.
:note: When `x` is equal to the boundaries, the output is considered
to be `x`, so at these points, the gradient of the cost wrt the output
will be propagated to `x`, not to `min` nor `max`. In other words,
on these points, the gradient wrt `x` will be equal to the gradient wrt
the output, and the gradient wrt `min` and `max` will be zero.
"""
"""
# see decorator for function body
# see decorator for function body
# for grep: clamp, bound
# for grep: clamp, bound
...
@@ -3125,14 +3248,16 @@ pprint.assign(pow, printing.OperatorPrinter('**', 1, 'right'))
...
@@ -3125,14 +3248,16 @@ pprint.assign(pow, printing.OperatorPrinter('**', 1, 'right'))
def
extract_constant
(
x
,
elemwise
=
True
):
def
extract_constant
(
x
,
elemwise
=
True
):
'''
"""
This function is basically a call to tensor.get_scalar_constant_value. The
This function is basically a call to tensor.get_scalar_constant_value.
main difference is the behaviour in case of failure. While
get_scalar_constant_value raises an TypeError, this function returns x,
The main difference is the behaviour in case of failure. While
as a tensor if possible. If x is a ScalarVariable from a
get_scalar_constant_value raises an TypeError, this function returns x,
scalar_from_tensor, we remove the conversion. If x is just a
as a tensor if possible. If x is a ScalarVariable from a
ScalarVariable, we convert it to a tensor with tensor_from_scalar.
scalar_from_tensor, we remove the conversion. If x is just a
'''
ScalarVariable, we convert it to a tensor with tensor_from_scalar.
"""
try
:
try
:
x
=
get_scalar_constant_value
(
x
,
elemwise
=
elemwise
)
x
=
get_scalar_constant_value
(
x
,
elemwise
=
elemwise
)
except
NotScalarConstantError
:
except
NotScalarConstantError
:
...
@@ -3150,8 +3275,7 @@ def transpose(x, axes=None):
...
@@ -3150,8 +3275,7 @@ def transpose(x, axes=None):
"""
"""
Reorder the dimensions of x. (Default: reverse them)
Reorder the dimensions of x. (Default: reverse them)
This is a macro around dimshuffle that matches the numpy.transpose
This is a macro around dimshuffle that matches the numpy.transpose function.
function.
"""
"""
if
axes
is
None
:
if
axes
is
None
:
...
@@ -3164,18 +3288,33 @@ def transpose(x, axes=None):
...
@@ -3164,18 +3288,33 @@ def transpose(x, axes=None):
def
batched_dot
(
x
,
y
):
def
batched_dot
(
x
,
y
):
"""
"""
:param x: A Tensor with sizes e.g.: for 3D (dim1, dim3, dim2)
:param y: A Tensor with sizes e.g.: for 3D (dim1, dim2, dim4)
This function computes the dot product between the two tensors, by
This function computes the dot product between the two tensors, by
iterating over the first dimension using scan.
iterating over the first dimension using scan.
Returns a tensor of size e.g. if it is 3D: (dim1, dim3, dim4)
Example:
Parameters
----------
x : tensor
A Tensor with sizes e.g.: for 3D (dim1, dim3, dim2).
y : tensor
A Tensor with sizes e.g.: for 3D (dim1, dim2, dim4).
Returns
-------
tensor
A tensor of size e.g. if it is 3D: (dim1, dim3, dim4).
Notes
-----
This is a subset of numpy.einsum, but we do not provide it for now.
But numpy einsum is slower than dot or tensordot:
http://mail.scipy.org/pipermail/numpy-discussion/2012-October/064259.html
Examples
--------
>>> first = tensor.tensor3('first')
>>> first = tensor.tensor3('first')
>>> second = tensor.tensor3('second')
>>> second = tensor.tensor3('second')
>>> result = batched_dot(first, second)
>>> result = batched_dot(first, second)
:note: This is a subset of numpy.einsum, but we do not provide it for now.
But numpy einsum is slower than dot or tensordot:
http://mail.scipy.org/pipermail/numpy-discussion/2012-October/064259.html
"""
"""
result
,
updates
=
theano
.
scan
(
result
,
updates
=
theano
.
scan
(
fn
=
lambda
x_mat
,
y_mat
:
fn
=
lambda
x_mat
,
y_mat
:
...
@@ -3188,11 +3327,22 @@ def batched_dot(x, y):
...
@@ -3188,11 +3327,22 @@ def batched_dot(x, y):
def
batched_tensordot
(
x
,
y
,
axes
=
2
):
def
batched_tensordot
(
x
,
y
,
axes
=
2
):
"""
"""
:param x: A Tensor with sizes e.g.: for 3D (dim1, dim3, dim2)
Compute the tensordot product.
:param y: A Tensor with sizes e.g.: for 3D (dim1, dim2, dim4)
:param axes: an integer or array. If an integer, the number of axes
A hybrid of batch_dot and tensordot, this function computes the
to sum over. If an array, it must have two array
tensordot product between the two tensors, by iterating over the
elements containing the axes to sum over in each tensor.
first dimension using scan to perform a sequence of tensordots.
Parameters
----------
x : tensor
A Tensor with sizes e.g.: for 3D (dim1, dim3, dim2)
y : tensor
A Tensor with sizes e.g.: for 3D (dim1, dim2, dim4)
axes: int or array-like of length 2
If an integer, the number of axes to sum over.
If an array, it must have two array elements containing the axes to sum
over in each tensor.
If an integer i, it is converted to an array containing
If an integer i, it is converted to an array containing
the last i dimensions of the first tensor and the first
the last i dimensions of the first tensor and the first
...
@@ -3206,11 +3356,7 @@ def batched_tensordot(x, y, axes=2):
...
@@ -3206,11 +3356,7 @@ def batched_tensordot(x, y, axes=2):
(Remember axes are zero-indexed!) The 2nd axis of a and the
(Remember axes are zero-indexed!) The 2nd axis of a and the
3rd axis of b must have the same shape; the same is true for
3rd axis of b must have the same shape; the same is true for
the 3rd axis of a and the 5th axis of b.
the 3rd axis of a and the 5th axis of b.
:type axes: int or array-like of length 2
A hybrid of batch_dot and tensordot, this function computes the
tensordot product between the two tensors, by iterating over the
first dimension using scan to perform a sequence of tensordots.
"""
"""
if
isinstance
(
axes
,
(
list
,
numpy
.
ndarray
)):
if
isinstance
(
axes
,
(
list
,
numpy
.
ndarray
)):
if
isinstance
(
axes
,
list
):
if
isinstance
(
axes
,
list
):
...
@@ -3239,20 +3385,17 @@ def split(x, splits_size, n_splits, axis=0):
...
@@ -3239,20 +3385,17 @@ def split(x, splits_size, n_splits, axis=0):
class
Split
(
Op
):
class
Split
(
Op
):
"""Partition a `TensorVariable` along some axis.
"""Partition a `TensorVariable` along some axis.
.. python::
Examples
--------
x = vector()
>>> x = vector()
splits = lvector()
>>> splits = lvector()
# you have to declare right away how many split_points there will be.
You have to declare right away how many split_points there will be.
ra, rb, rc = split(x, splits, n_splits = 3, axis = 0)
>>> ra, rb, rc = split(x, splits, n_splits = 3, axis = 0)
>>> f = function([x, splits], [ra, rb, rc])
f = function([x, splits], [ra, rb, rc])
>>> a, b, c = f([0,1,2,3,4,5], [3, 2, 1])
a == [0,1,2]
a, b, c = f([0,1,2,3,4,5], [3, 2, 1])
b == [3, 4]
c == [5]
#a == [0,1,2]
#b == [3, 4]
#c == [5]
"""
"""
...
@@ -3370,6 +3513,7 @@ class Split(Op):
...
@@ -3370,6 +3513,7 @@ class Split(Op):
def
addbroadcast
(
x
,
*
axes
):
def
addbroadcast
(
x
,
*
axes
):
"""
"""
Make the input broadcastable in the specified axes.
Make the input broadcastable in the specified axes.
For example, addbroadcast(x, 0) will make the first dimension of
For example, addbroadcast(x, 0) will make the first dimension of
x broadcastable. When performing the function, if the length of
x broadcastable. When performing the function, if the length of
x along that dimension is not 1, a ValueError will be raised.
x along that dimension is not 1, a ValueError will be raised.
...
@@ -3377,20 +3521,19 @@ def addbroadcast(x, *axes):
...
@@ -3377,20 +3521,19 @@ def addbroadcast(x, *axes):
We apply the opt here not to pollute the graph especially during
We apply the opt here not to pollute the graph especially during
the gpu optimization
the gpu optimization
Parameters:
Parameters
------------
x : tensor_like
Input theano tensor.
axis : an int or an iterable object such as list or tuple
of int values
The dimension along which the tensor x should be
broadcastable. if the length of x along these
dimensions is not 1, a ValueError will be raised.
returns:
----------
----------
a theano tensor, which is broadcastable along the specified dimensions.
x : tensor_like
Input theano tensor.
axis : an int or an iterable object such as list or tuple of int values
The dimension along which the tensor x should be broadcastable.
If the length of x along these dimensions is not 1, a ValueError will
be raised.
Returns
-------
tensor
A theano tensor, which is broadcastable along the specified dimensions.
"""
"""
rval
=
Rebroadcast
(
*
[(
axis
,
True
)
for
axis
in
axes
])(
x
)
rval
=
Rebroadcast
(
*
[(
axis
,
True
)
for
axis
in
axes
])(
x
)
...
@@ -3400,6 +3543,7 @@ def addbroadcast(x, *axes):
...
@@ -3400,6 +3543,7 @@ def addbroadcast(x, *axes):
def
unbroadcast
(
x
,
*
axes
):
def
unbroadcast
(
x
,
*
axes
):
"""
"""
Make the input impossible to broadcast in the specified axes.
Make the input impossible to broadcast in the specified axes.
For example, addbroadcast(x, 0) will make the first dimension
For example, addbroadcast(x, 0) will make the first dimension
of x broadcastable. When performing the function, if the length
of x broadcastable. When performing the function, if the length
of x along that dimension is not 1, a ValueError will be raised.
of x along that dimension is not 1, a ValueError will be raised.
...
@@ -3407,20 +3551,19 @@ def unbroadcast(x, *axes):
...
@@ -3407,20 +3551,19 @@ def unbroadcast(x, *axes):
We apply the opt here not to pollute the graph especially during
We apply the opt here not to pollute the graph especially during
the gpu optimization
the gpu optimization
Parameters:
Parameters
------------
x : tensor_like
Input theano tensor.
axis : an int or an iterable object such as list or tuple
of int values
The dimension along which the tensor x should be
unbroadcastable. if the length of x along these
dimensions is not 1, a ValueError will be raised.
returns:
----------
----------
a theano tensor, which is unbroadcastable along the specified dimensions.
x : tensor_like
Input theano tensor.
axis : an int or an iterable object such as list or tuple of int values
The dimension along which the tensor x should be unbroadcastable.
If the length of x along these dimensions is not 1, a ValueError will
be raised.
Returns
-------
tensor
A theano tensor, which is unbroadcastable along the specified dimensions.
"""
"""
rval
=
Rebroadcast
(
*
[(
axis
,
False
)
for
axis
in
axes
])(
x
)
rval
=
Rebroadcast
(
*
[(
axis
,
False
)
for
axis
in
axes
])(
x
)
...
@@ -3430,7 +3573,8 @@ def unbroadcast(x, *axes):
...
@@ -3430,7 +3573,8 @@ def unbroadcast(x, *axes):
def
patternbroadcast
(
x
,
broadcastable
):
def
patternbroadcast
(
x
,
broadcastable
):
"""
"""
Make the input adopt a specific broadcasting pattern.
Make the input adopt a specific broadcasting pattern.
broadcastable must be iterable. For example,
Broadcastable must be iterable. For example,
patternbroadcast(x, (True, False)) will make the first
patternbroadcast(x, (True, False)) will make the first
dimension of x broadcastable and the second dimension
dimension of x broadcastable and the second dimension
not broadcastable, so x will now be a row.
not broadcastable, so x will now be a row.
...
@@ -3438,21 +3582,20 @@ def patternbroadcast(x, broadcastable):
...
@@ -3438,21 +3582,20 @@ def patternbroadcast(x, broadcastable):
We apply the opt here not to pollute the graph especially during the gpu
We apply the opt here not to pollute the graph especially during the gpu
optimization.
optimization.
Parameters:
Parameters
------------
----------
x : tensor_like
x : tensor_like
Input theano tensor.
Input theano tensor.
broadcastable : an iterable object such as list or tuple
broadcastable : an iterable object such as list or tuple of bool values
of bool values
A set of boolean values indicating whether a dimension should be
broadcastable or not. If the length of x along these dimensions is
not 1, a ValueError will be raised.
a set of boolean values indicating whether a dimension
Returns
should be broadcastable or not.
-------
if the length of x along these dimensions is not 1,
tensor
a ValueError will be raised
.
A theano tensor, which is unbroadcastable along the specified dimensions
.
returns:
----------
a theano tensor, which is unbroadcastable along the specified dimensions.
"""
"""
rval
=
Rebroadcast
(
*
[(
i
,
broadcastable
[
i
])
rval
=
Rebroadcast
(
*
[(
i
,
broadcastable
[
i
])
for
i
in
xrange
(
len
(
broadcastable
))])(
x
)
for
i
in
xrange
(
len
(
broadcastable
))])(
x
)
...
@@ -3468,31 +3611,39 @@ class Join(Op):
...
@@ -3468,31 +3611,39 @@ class Join(Op):
Of course, TensorVariable instances do not have a shape, so this error
Of course, TensorVariable instances do not have a shape, so this error
cannot be caught until runtime. See `perform()`.
cannot be caught until runtime. See `perform()`.
For joins involving scalar values, see @stack.
See Also
--------
stack : For joins involving scalar values
.. python::
Examples
--------
>>> x, y, z = tensor.matrix(), tensor.matrix(), tensor.matrix()
>>> u = tensor.vector()
x, y, z = tensor.matrix(), tensor.matrix(), tensor.matrix()
>>> r = join(0, x, y, z)
u = tensor.vector()
>>> c = join(1, x, y, z)
>>> join(2, x, y, z) # WRONG: the axis has to be an index into the shape
>>> join(0, x, u) # WRONG: joined tensors must have the same rank
r = join(0, x, y, z)
c = join(1, x, y, z)
join(2, x, y, z) # WRONG: the axis has to be an index into the shape
join(0, x, u) # WRONG: joined tensors must have the same rank
"""
"""
check_input
=
False
check_input
=
False
__props__
=
()
__props__
=
()
def
make_node
(
self
,
*
axis_and_tensors
):
def
make_node
(
self
,
*
axis_and_tensors
):
"""
"""
:param axis: an Int or integer-valued Variable
Parameters
----------
:param tensors: a variable number (but not zero) of tensors to
axis: an Int or integer-valued Variable
concatenate along the specified axis. These tensors must have
tensors
the same shape along all dimensions other than this axis.
A variable number (but not zero) of tensors to
concatenate along the specified axis. These tensors must have
:returns: a symbolic Variable. It has the same ndim as the
the same shape along all dimensions other than this axis.
input tensors, and the most inclusive dtype.
Returns
-------
A symbolic Variable
It has the same ndim as the input tensors, and the most inclusive
dtype.
"""
"""
axis
,
tensors
=
axis_and_tensors
[
0
],
axis_and_tensors
[
1
:]
axis
,
tensors
=
axis_and_tensors
[
0
],
axis_and_tensors
[
1
:]
...
@@ -3709,26 +3860,25 @@ class Join(Op):
...
@@ -3709,26 +3860,25 @@ class Join(Op):
"""
"""
Convenience function to concatenate `TensorType`s along the given axis.
Convenience function to concatenate `TensorType`s along the given axis.
:Parameters:
Parameters
- `tensors` : list of tensors (or list-like)
----------
A list of tensors to be concatenated along the given axis.
tensors : list of tensors (or list-like)
- `axis` : int (symbolic or literal)
A list of tensors to be concatenated along the given axis.
The shapes of the tensors to be concatenated must be all
On which dimension should the tensors be joined? The `axis`
identical, except in the dimension (`axis`) on which they are to
must be a valid index into the shape of the tensors to be
be joined.
concatenated.
axis : int (symbolic or literal)
On which dimension should the tensors be joined? The `axis`
The `axis` parameter may either be an integer or an object that
must be a valid index into the shape of the tensors to be
can be converted to a scalar using `as_scalar`(`axis`). In the
concatenated.
former case, the axis is fixed at construction, while in the
The `axis` parameter may either be an integer or an object that
latter it may vary over time depending on the value of the
can be converted to a scalar using `as_scalar`(`axis`). In the
`axis` variable.
former case, the axis is fixed at construction, while in the
latter it may vary over time depending on the value of the
`axis` variable.
The shapes of the tensors to be concatenated must be all
"""
identical, except in the dimension (`axis`) on which they are to
be joined.
"""
join
=
Join
()
join
=
Join
()
pprint
.
assign
(
lambda
pstate
,
r
:
r
.
owner
and
isinstance
(
r
.
owner
.
op
,
Join
),
pprint
.
assign
(
lambda
pstate
,
r
:
r
.
owner
and
isinstance
(
r
.
owner
.
op
,
Join
),
...
@@ -3738,7 +3888,8 @@ pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Join),
...
@@ -3738,7 +3888,8 @@ pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Join),
def
roll
(
x
,
shift
,
axis
=
None
):
def
roll
(
x
,
shift
,
axis
=
None
):
"""
"""
Convenience function to roll `TensorType`s along the given axis.
Convenience function to roll `TensorType`s along the given axis.
Syntax copies numpy.roll function
Syntax copies numpy.roll function.
Parameters
Parameters
----------
----------
...
@@ -3746,15 +3897,16 @@ def roll(x, shift, axis=None):
...
@@ -3746,15 +3897,16 @@ def roll(x, shift, axis=None):
Input tensor.
Input tensor.
shift : int (symbolic or literal)
shift : int (symbolic or literal)
The number of places by which elements are shifted.
The number of places by which elements are shifted.
axis : int (symbolic or literal)
(optional)
axis : int (symbolic or literal)
, optional
The axis along which elements are shifted. By default, the array
The axis along which elements are shifted. By default, the array
is flattened before shifting, after which the original
is flattened before shifting, after which the original
shape is restored.
shape is restored.
Returns
Returns
-------
-------
res :
tensor
tensor
Output tensor, with the same shape as `x`.
Output tensor, with the same shape as `x`.
"""
"""
if
axis
is
None
:
if
axis
is
None
:
if
x
.
ndim
>
1
:
if
x
.
ndim
>
1
:
...
@@ -3780,9 +3932,13 @@ def roll(x, shift, axis=None):
...
@@ -3780,9 +3932,13 @@ def roll(x, shift, axis=None):
@constructor
@constructor
def
shape_padleft
(
t
,
n_ones
=
1
):
def
shape_padleft
(
t
,
n_ones
=
1
):
"""Reshape `t` by left-padding the shape with `n_ones` 1s
"""Reshape `t` by left-padding the shape with `n_ones` 1s.
See Also
--------
shape_padright
Dimshuffle
See also: `shape_padright` and `Dimshuffle`
"""
"""
_t
=
as_tensor_variable
(
t
)
_t
=
as_tensor_variable
(
t
)
...
@@ -3792,9 +3948,13 @@ def shape_padleft(t, n_ones=1):
...
@@ -3792,9 +3948,13 @@ def shape_padleft(t, n_ones=1):
@constructor
@constructor
def
shape_padright
(
t
,
n_ones
=
1
):
def
shape_padright
(
t
,
n_ones
=
1
):
"""Reshape `t` by right-padding the shape with `n_ones` 1s
"""Reshape `t` by right-padding the shape with `n_ones` 1s.
See Also
--------
shape_padleft
Dimshuffle
See also: `shape_padleft` and `Dimshuffle`
"""
"""
_t
=
as_tensor_variable
(
t
)
_t
=
as_tensor_variable
(
t
)
...
@@ -3808,6 +3968,7 @@ def stack(*tensors):
...
@@ -3808,6 +3968,7 @@ def stack(*tensors):
The size in dimension 0 of the result will be equal to the number
The size in dimension 0 of the result will be equal to the number
of tensors passed.
of tensors passed.
"""
"""
if
len
(
tensors
)
==
0
:
if
len
(
tensors
)
==
0
:
raise
Exception
(
'theano.tensor.stack(*tensors) must have at least'
raise
Exception
(
'theano.tensor.stack(*tensors) must have at least'
...
@@ -3843,9 +4004,10 @@ def concatenate(tensor_list, axis=0):
...
@@ -3843,9 +4004,10 @@ def concatenate(tensor_list, axis=0):
This function is similar to `join`, but uses the signature of
This function is similar to `join`, but uses the signature of
numpy's concatenate function.
numpy's concatenate function.
This function
Raises
:Exceptions:
------
- `TypeError` : the tensor_list must be a tuple or list
TypeError
The tensor_list must be a tuple or list.
"""
"""
# Check someone did not make the common mistake to do something like:
# Check someone did not make the common mistake to do something like:
...
@@ -3863,16 +4025,20 @@ def concatenate(tensor_list, axis=0):
...
@@ -3863,16 +4025,20 @@ def concatenate(tensor_list, axis=0):
def
get_vector_length
(
v
):
def
get_vector_length
(
v
):
"""Return the run-time length of a symbolic vector.
"""Return the run-time length of a symbolic vector.
:Parameters:
Parameters
- `v` : A rank-1 TensorType variable.
----------
v
:Exceptions:
A rank-1 TensorType variable.
- `TypeError` : `v` hasn't the proper type.
- `ValueError` : No special case applies, the length is not known.
In general this is not possible, but for a number of special cases
Raises
the length can be determined at compile / graph-construction time.
------
This function implements these special cases.
TypeError
`v` hasn't the proper type.
ValueError
No special case applies, the length is not known.
In general this is not possible, but for a number of special cases
the length can be determined at compile / graph-construction time.
This function implements these special cases.
"""
"""
v
=
as_tensor_variable
(
v
)
v
=
as_tensor_variable
(
v
)
...
@@ -3909,9 +4075,11 @@ def get_vector_length(v):
...
@@ -3909,9 +4075,11 @@ def get_vector_length(v):
def
horizontal_stack
(
*
args
):
def
horizontal_stack
(
*
args
):
"""
"""
Horizontally stack two L{TensorType}s.
Horizontally stack two L{TensorType}s.
Stack two L{TensorType}s along the second axis (column wise). These
Stack two L{TensorType}s along the second axis (column wise). These
L{TensorType}s must have the same shape along all dimensions but the
L{TensorType}s must have the same shape along all dimensions but the
second.
second.
"""
"""
# Note: 'horizontal_stack' and 'vertical_stack' do not behave exactly like
# Note: 'horizontal_stack' and 'vertical_stack' do not behave exactly like
# Numpy's hstack and vstack functions. This is intended, because Numpy's
# Numpy's hstack and vstack functions. This is intended, because Numpy's
...
@@ -3937,7 +4105,9 @@ class Reshape(Op):
...
@@ -3937,7 +4105,9 @@ class Reshape(Op):
"""Perform a reshape operation of the input x to the new shape shp.
"""Perform a reshape operation of the input x to the new shape shp.
The number of dimensions to which to reshape to (ndim) must be
The number of dimensions to which to reshape to (ndim) must be
known at graph build time."""
known at graph build time.
"""
view_map
=
{
0
:
[
0
]}
# output 0 is potentially aliased to inputs [0]
view_map
=
{
0
:
[
0
]}
# output 0 is potentially aliased to inputs [0]
_f16_ok
=
True
_f16_ok
=
True
...
@@ -4131,8 +4301,11 @@ def reshape(x, newshape, ndim=None, name=None):
...
@@ -4131,8 +4301,11 @@ def reshape(x, newshape, ndim=None, name=None):
class
Flatten
(
Op
):
class
Flatten
(
Op
):
"""
"""
Flatten a tensor.
Flattens a tensor to `outdim` dimensions by preserving the leading
Flattens a tensor to `outdim` dimensions by preserving the leading
outdim - 1 shape components.
outdim - 1 shape components.
"""
"""
view_map
=
{
0
:
[
0
]}
view_map
=
{
0
:
[
0
]}
...
@@ -4305,16 +4478,19 @@ def flatten(x, outdim=1):
...
@@ -4305,16 +4478,19 @@ def flatten(x, outdim=1):
class
Tile
(
Op
):
class
Tile
(
Op
):
"""
"""
DEPRECATED: use tile() instead.
Construct an array by repeating the input x according to reps pattern.
Construct an array by repeating the input x according to reps pattern.
.. note:: Deprecated
Use tile() instead.
Tiles its input according to reps. The length of reps is the number of
Tiles its input according to reps. The length of reps is the number of
dimension of x and contains the number of times to tile x in each
dimension of x and contains the number of times to tile x in each
dimension.
dimension.
:see: `numpy.tile
See Also
<http://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html>`_
--------
numpy.tile : http://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html
"""
"""
__props__
=
(
"ndim"
,)
__props__
=
(
"ndim"
,)
...
@@ -4377,13 +4553,15 @@ class Tile(Op):
...
@@ -4377,13 +4553,15 @@ class Tile(Op):
def
tile
(
x
,
reps
,
ndim
=
None
):
def
tile
(
x
,
reps
,
ndim
=
None
):
"""
"""
Tile input array `x` according to `reps`. See the docstring of `numpy.tile`
Tile input array `x` according to `reps`.
for details.
See the docstring of `numpy.tile` for details.
Currently, x.ndim and len(reps) must be equal, and, if specified, 'ndim'
Currently, x.ndim and len(reps) must be equal, and, if specified, 'ndim'
must be equal to both.
must be equal to both.
TODO: expand this.
TODO: expand this.
"""
"""
try
:
try
:
...
@@ -4420,6 +4598,7 @@ class ARange(Op):
...
@@ -4420,6 +4598,7 @@ class ARange(Op):
"""Create an array containing evenly spaced values within a given interval.
"""Create an array containing evenly spaced values within a given interval.
Parameters and behaviour are the same as numpy.arange().
Parameters and behaviour are the same as numpy.arange().
"""
"""
__props__
=
(
"dtype"
,)
__props__
=
(
"dtype"
,)
...
@@ -4550,13 +4729,13 @@ class _nd_grid(object):
...
@@ -4550,13 +4729,13 @@ class _nd_grid(object):
to their numpy equivalents.
to their numpy equivalents.
Parameters
Parameters
==========
----------
sparse : boolean, optional, default=True
sparse : boolean, optional, default=True
Specifying False leads to the equivalent of numpy's mgrid
Specifying False leads to the equivalent of numpy's mgrid functionality.
functionality.
Specifying True leads to the equivalent of ogrid.
Specifying True leads to the equivalent of ogrid.
Examples
Examples
========
--------
>>> a = T.mgrid[0:5, 0:3]
>>> a = T.mgrid[0:5, 0:3]
>>> a[0].eval()
>>> a[0].eval()
array([[0, 0, 0],
array([[0, 0, 0],
...
@@ -4570,7 +4749,6 @@ class _nd_grid(object):
...
@@ -4570,7 +4749,6 @@ class _nd_grid(object):
[0, 1, 2],
[0, 1, 2],
[0, 1, 2],
[0, 1, 2],
[0, 1, 2]], dtype=int8)
[0, 1, 2]], dtype=int8)
>>> b = T.ogrid[0:5, 0:3]
>>> b = T.ogrid[0:5, 0:3]
>>> b[0].eval()
>>> b[0].eval()
array([[0],
array([[0],
...
@@ -4580,6 +4758,7 @@ class _nd_grid(object):
...
@@ -4580,6 +4758,7 @@ class _nd_grid(object):
[4]], dtype=int8)
[4]], dtype=int8)
>>> b[1].eval()
>>> b[1].eval()
array([[0, 1, 2, 3]], dtype=int8)
array([[0, 1, 2, 3]], dtype=int8)
"""
"""
def
__init__
(
self
,
sparse
=
False
):
def
__init__
(
self
,
sparse
=
False
):
...
@@ -4689,11 +4868,19 @@ class PermuteRowElements(Op):
...
@@ -4689,11 +4868,19 @@ class PermuteRowElements(Op):
The terminal case is reached when the current tensors are vector,
The terminal case is reached when the current tensors are vector,
then the permutation contained in y is applied to x.
then the permutation contained in y is applied to x.
:param x: The input tensor, on which the permutation is applied
Parameters
:param y: Tensor containing the permutations to apply
----------
:param out: Tensor storing the output result
x : tensor
:param curdim: Counter of the current depth of recursion
The input tensor, on which the permutation is applied.
:param inverse: Wether to apply permutations or their inverse
y : tensor
Tensor containing the permutations to apply.
out : tensor
Tensor storing the output result.
curdim : int
Counter of the current depth of recursion.
inverse
Wether to apply permutations or their inverse.
"""
"""
if
len
(
x
.
shape
)
==
1
:
if
len
(
x
.
shape
)
==
1
:
# Numpy advanced indexing works in this case
# Numpy advanced indexing works in this case
...
@@ -4817,7 +5004,9 @@ def permute_row_elements(x, y, inverse=0):
...
@@ -4817,7 +5004,9 @@ def permute_row_elements(x, y, inverse=0):
def
inverse_permutation
(
perm
):
def
inverse_permutation
(
perm
):
"""Computes the inverse of permutations.
"""Computes the inverse of permutations.
Each row of input should contain a permutation of the first integers.
Each row of input should contain a permutation of the first integers.
"""
"""
return
permute_row_elements
(
return
permute_row_elements
(
arange
(
perm
.
shape
[
-
1
],
dtype
=
perm
.
dtype
),
arange
(
perm
.
shape
[
-
1
],
dtype
=
perm
.
dtype
),
...
@@ -4840,14 +5029,14 @@ class Dot(Op):
...
@@ -4840,14 +5029,14 @@ class Dot(Op):
equivalent to matrix multiplication. For two vectors, this is the inner
equivalent to matrix multiplication. For two vectors, this is the inner
product.
product.
:note: matrix-matrix products are sometimes optimized to Dot22 or Gemm ops.
Notes
(see tensor.blas)
-----
Matrix-matrix products are sometimes optimized to Dot22 or Gemm ops
:note: vector-vector products are sometimes optimized to Ger or CGer. (see
(see tensor.blas).
tensor.blas)
Vector-vector products are sometimes optimized to Ger or CGer (see
tensor.blas).
:note: m
atrix-vector products are sometimes optimized to Gemv, CGemv (see
M
atrix-vector products are sometimes optimized to Gemv, CGemv (see
tensor.blas)
tensor.blas)
.
"""
"""
__props__
=
()
__props__
=
()
...
@@ -5031,10 +5220,12 @@ pprint.assign(_dot, printing.OperatorPrinter(printing.special['middle_dot'],
...
@@ -5031,10 +5220,12 @@ pprint.assign(_dot, printing.OperatorPrinter(printing.special['middle_dot'],
def
dot
(
a
,
b
):
def
dot
(
a
,
b
):
"""
"""
Computes the dot product of two variables. For two matrices, this is
Computes the dot product of two variables.
equivalent to matrix multiplication. For two vectors, this is the inner
product. When one variable is a scalar, this is like elementwise
For two matrices, this is equivalent to matrix multiplication.
multiplication. For N dimensions, this is a sum product over the last axis
For two vectors, this is the inner product.
When one variable is a scalar, this is like elementwise multiplication.
For N dimensions, this is a sum product over the last axis
of the first array and the second-to-last axis of the second array:
of the first array and the second-to-last axis of the second array:
dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
...
@@ -5054,14 +5245,14 @@ def dot(a, b):
...
@@ -5054,14 +5245,14 @@ def dot(a, b):
3. If both a and b have either 1 or 2 dimensions, it calls Theano's
3. If both a and b have either 1 or 2 dimensions, it calls Theano's
Dot op on a and b.
Dot op on a and b.
:note: matrix-matrix products are sometimes optimized to Dot22 or Gemm ops.
Notes
(see tensor.blas)
-----
Matrix-matrix products are sometimes optimized to Dot22 or Gemm ops
:note: vector-vector products are sometimes optimized to Ger or CGer. (see
(see tensor.blas).
tensor.blas)
Vector-vector products are sometimes optimized to Ger or CGer (see
tensor.blas).
:note: m
atrix-vector products are sometimes optimized to Gemv, CGemv (see
M
atrix-vector products are sometimes optimized to Gemv, CGemv (see
tensor.blas)
tensor.blas)
.
"""
"""
a
,
b
=
as_tensor_variable
(
a
),
as_tensor_variable
(
b
)
a
,
b
=
as_tensor_variable
(
a
),
as_tensor_variable
(
b
)
...
@@ -5080,46 +5271,52 @@ def dot(a, b):
...
@@ -5080,46 +5271,52 @@ def dot(a, b):
def
tensordot
(
a
,
b
,
axes
=
2
):
def
tensordot
(
a
,
b
,
axes
=
2
):
"""
"""
Given two tensors a and b,tensordot computes a generalized dot product over
Compute a generalized dot product over provided axes.
Given two tensors a and b, tensordot computes a generalized dot product over
the provided axes. Theano's implementation reduces all expressions to
the provided axes. Theano's implementation reduces all expressions to
matrix or vector dot products and is based on code from Tijmen Tieleman's
matrix or vector dot products and is based on code from Tijmen Tieleman's
gnumpy (http://www.cs.toronto.edu/~tijmen/gnumpy.html).
gnumpy (http://www.cs.toronto.edu/~tijmen/gnumpy.html).
:param a: the first tensor variable
Parameters
:type a: symbolic tensor
----------
a: symbolic tensor
:param b: the second tensor variable
The first tensor variable.
:type b: symbolic tensor
b: symbolic tensor
The second tensor variable
:param axes: an integer or array. If an integer, the number of axes
axes: int or array-like of length 2
to sum over. If an array, it must have two array
If an integer, the number of axes to sum over.
elements containing the axes to sum over in each tensor.
If an array, it must have two array elements containing the axes
to sum over in each tensor.
Note that the default value of 2 is not guaranteed to work
for all values of a and b, and an error will be raised if
Note that the default value of 2 is not guaranteed to work
that is the case. The reason for keeping the default is to
for all values of a and b, and an error will be raised if
maintain the same signature as numpy's tensordot function
that is the case. The reason for keeping the default is to
(and np.tensordot raises analogous errors for non-compatible
maintain the same signature as numpy's tensordot function
inputs).
(and np.tensordot raises analogous errors for non-compatible
inputs).
If an integer i, it is converted to an array containing
the last i dimensions of the first tensor and the first
i dimensions of the second tensor:
axes = [list(range(a.ndim - i, b.ndim)), list(range(i))]
If an array, its two elements must contain compatible axes
of the two tensors. For example, [[1, 2], [2, 0]] means sum
over the 2nd and 3rd axes of a and the 3rd and 1st axes of b.
(Remember axes are zero-indexed!) The 2nd axis of a and the
3rd axis of b must have the same shape; the same is true for
the 3rd axis of a and the 1st axis of b.
:type axes: int or array-like of length 2
:returns: a tensor with shape equal to the concatenation of a's shape
(less any dimensions that were summed over) and b's shape
(less any dimensions that were summed over).
:rtype: symbolic tensor
If an integer i, it is converted to an array containing
the last i dimensions of the first tensor and the first
i dimensions of the second tensor:
axes = [list(range(a.ndim - i, b.ndim)), list(range(i))]
If an array, its two elements must contain compatible axes
of the two tensors. For example, [[1, 2], [2, 0]] means sum
over the 2nd and 3rd axes of a and the 3rd and 1st axes of b.
(Remember axes are zero-indexed!) The 2nd axis of a and the
3rd axis of b must have the same shape; the same is true for
the 3rd axis of a and the 1st axis of b.
Returns
-------
symbolic tensor
A tensor with shape equal to the concatenation of a's shape
(less any dimensions that were summed over) and b's shape
(less any dimensions that were summed over).
Examples
--------
It may be helpful to consider an example to see what tensordot does.
It may be helpful to consider an example to see what tensordot does.
Theano's implementation is identical to NumPy's. Here a has shape (2, 3, 4)
Theano's implementation is identical to NumPy's. Here a has shape (2, 3, 4)
and b has shape (5, 6, 4, 3). The axes to sum over are [[1, 2], [3, 2]] --
and b has shape (5, 6, 4, 3). The axes to sum over are [[1, 2], [3, 2]] --
...
@@ -5127,29 +5324,30 @@ def tensordot(a, b, axes=2):
...
@@ -5127,29 +5324,30 @@ def tensordot(a, b, axes=2):
are compatible. The resulting tensor will have shape (2, 5, 6) -- the
are compatible. The resulting tensor will have shape (2, 5, 6) -- the
dimensions that are not being summed:
dimensions that are not being summed:
a = np.random.random((2,3,4))
>>>
a = np.random.random((2,3,4))
b = np.random.random((5,6,4,3))
>>>
b = np.random.random((5,6,4,3))
#tensordot
#tensordot
c = np.tensordot(a, b, [[1,2],[3,2]])
>>>
c = np.tensordot(a, b, [[1,2],[3,2]])
#loop replicating tensordot
#loop replicating tensordot
a0, a1, a2 = a.shape
>>>
a0, a1, a2 = a.shape
b0, b1, _, _ = b.shape
>>>
b0, b1, _, _ = b.shape
cloop = np.zeros((a0,b0,b1))
>>>
cloop = np.zeros((a0,b0,b1))
#loop over non-summed indices -- these exist
#loop over non-summed indices -- these exist
#in the tensor product.
#in the tensor product.
for i in range(a0):
>>>
for i in range(a0):
for j in range(b0):
...
for j in range(b0):
for k in range(b1):
...
for k in range(b1):
#loop over summed indices -- these don't exist
...
#loop over summed indices -- these don't exist
#in the tensor product.
...
#in the tensor product.
for l in range(a1):
...
for l in range(a1):
for m in range(a2):
...
for m in range(a2):
cloop[i,j,k] += a[i,l,m] * b[j,k,m,l]
...
cloop[i,j,k] += a[i,l,m] * b[j,k,m,l]
np.allclose(c, cloop) #true
>>> np.allclose(c, cloop)
true
This specific implementation avoids a loop by transposing a and b such that
This specific implementation avoids a loop by transposing a and b such that
the summed axes of a are last and the summed axes of b are first. The
the summed axes of a are last and the summed axes of b are first. The
...
@@ -5160,12 +5358,16 @@ def tensordot(a, b, axes=2):
...
@@ -5160,12 +5358,16 @@ def tensordot(a, b, axes=2):
In an extreme case, no axes may be specified. The resulting tensor
In an extreme case, no axes may be specified. The resulting tensor
will have shape equal to the concatenation of the shapes of a and b:
will have shape equal to the concatenation of the shapes of a and b:
c = np.tensordot(a, b, 0)
>>> c = np.tensordot(a, b, 0)
print(a.shape) #(2,3,4)
>>> print(a.shape)
print(b.shape) #(5,6,4,3)
(2,3,4)
print(c.shape) #(2,3,4,5,6,4,3)
>>> print(b.shape)
(5,6,4,3)
>>> print(c.shape)
(2,3,4,5,6,4,3)
See the documentation of numpy.tensordot for more examples.
See the documentation of numpy.tensordot for more examples.
"""
"""
a
,
b
=
as_tensor_variable
(
a
),
as_tensor_variable
(
b
)
a
,
b
=
as_tensor_variable
(
a
),
as_tensor_variable
(
b
)
...
@@ -5275,6 +5477,7 @@ def outer(x, y):
...
@@ -5275,6 +5477,7 @@ def outer(x, y):
"""Return vector-vector outer product.
"""Return vector-vector outer product.
If an input isn't a vector, we flatten it first.
If an input isn't a vector, we flatten it first.
"""
"""
if
x
.
ndim
!=
1
:
if
x
.
ndim
!=
1
:
x
=
x
.
flatten
()
x
=
x
.
flatten
()
...
@@ -5310,9 +5513,16 @@ del x
...
@@ -5310,9 +5513,16 @@ del x
class
Diagonal
(
Op
):
class
Diagonal
(
Op
):
"""Return specified diagonals.
"""Return specified diagonals.
:param x: A tensor variable with x.ndim >= 2.
Parameters
----------
x
A tensor variable with x.ndim >= 2.
Returns
-------
vector
A vector representing the diagonal elements.
:return: A vector representing the diagonal elements.
"""
"""
__props__
=
(
"offset"
,
"axis1"
,
"axis2"
)
__props__
=
(
"offset"
,
"axis1"
,
"axis2"
)
...
@@ -5402,6 +5612,8 @@ def stacklists(arg):
...
@@ -5402,6 +5612,8 @@ def stacklists(arg):
This function can create a tensor from a shaped list of scalars:
This function can create a tensor from a shaped list of scalars:
Examples
--------
>>> from theano.tensor import stacklists, scalars, matrices
>>> from theano.tensor import stacklists, scalars, matrices
>>> from theano import function
>>> from theano import function
>>> a, b, c, d = scalars('abcd')
>>> a, b, c, d = scalars('abcd')
...
@@ -5421,6 +5633,7 @@ def stacklists(arg):
...
@@ -5421,6 +5633,7 @@ def stacklists(arg):
>>> x = ones((4, 4), 'float32')
>>> x = ones((4, 4), 'float32')
>>> f(x, x, x, x).shape
>>> f(x, x, x, x).shape
(2, 2, 4, 4)
(2, 2, 4, 4)
"""
"""
if
isinstance
(
arg
,
(
tuple
,
list
)):
if
isinstance
(
arg
,
(
tuple
,
list
)):
return
stack
(
*
list
(
map
(
stacklists
,
arg
)))
return
stack
(
*
list
(
map
(
stacklists
,
arg
)))
...
@@ -5434,12 +5647,18 @@ def ptp(a, axis=None):
...
@@ -5434,12 +5647,18 @@ def ptp(a, axis=None):
The name of the function comes from the acronym for peak to peak.
The name of the function comes from the acronym for peak to peak.
:param a : Input tensor.
Parameters
----------
a
Input tensor.
axis
Axis along which to find the peaks. By default, flatten the array.
:param axis : Axis along which to find the peaks. By default,
Returns
flatten the array.
-------
array
A new array holding the result.
:return : A new array holding the result.
"""
"""
a
=
as_tensor_variable
(
a
)
a
=
as_tensor_variable
(
a
)
...
@@ -5495,28 +5714,36 @@ def choose(a, choices, out=None, mode='raise'):
...
@@ -5495,28 +5714,36 @@ def choose(a, choices, out=None, mode='raise'):
negative integers are mapped to 0; values greater than n-1 are mapped
negative integers are mapped to 0; values greater than n-1 are mapped
to n-1; and then the new array is constructed as above.
to n-1; and then the new array is constructed as above.
:Parameter: *a* - int array
Parameters
----------
a : int array
This array must contain integers in [0, n-1], where n is the number of
This array must contain integers in [0, n-1], where n is the number of
choices, unless mode=wrap or mode=clip, in which cases any integers
choices, unless mode=wrap or mode=clip, in which cases any integers
are permissible.
are permissible.
:Parameter: *choices* -
sequence of arrays
choices :
sequence of arrays
Choice arrays. a and all of the choices must be broadcastable to
Choice arrays. a and all of the choices must be broadcastable to
the same shape. If choices is itself an array (not recommended),
the same shape. If choices is itself an array (not recommended),
then its outermost dimension (i.e., the one corresponding to
then its outermost dimension (i.e., the one corresponding to
choices.shape[0]) is taken as defining the ``sequence``.
choices.shape[0]) is taken as defining the ``sequence``.
:Parameter: *out* -
array, optional
out :
array, optional
If provided, the result will be inserted into this array.
If provided, the result will be inserted into this array.
It should be of the appropriate shape and dtype.
It should be of the appropriate shape and dtype.
:Parameter: *mode* -
{``raise`` (default), ``wrap``, ``clip``}, optional
mode :
{``raise`` (default), ``wrap``, ``clip``}, optional
Specifies how indices outside [0, n-1] will be treated:
Specifies how indices outside [0, n-1] will be treated:
``raise`` : an exception is raised
``raise`` : an exception is raised
``wrap`` : value becomes value mod n
``wrap`` : value becomes value mod n
``clip`` : values < 0 are mapped to 0, values > n-1 are mapped to n-1
``clip`` : values < 0 are mapped to 0, values > n-1 are mapped to n-1
:Returns: merged_array - array
Returns
-------
merged_array - array
The merged result.
The merged result.
:Raises:
ValueError - shape mismatch
Raises
------
ValueError - shape mismatch
If a and each choice array are not all broadcastable to the same shape.
If a and each choice array are not all broadcastable to the same shape.
"""
"""
# This is done to keep the same function signature then NumPy.
# This is done to keep the same function signature then NumPy.
assert
out
is
None
assert
out
is
None
...
@@ -5609,6 +5836,7 @@ class Choose(Op):
...
@@ -5609,6 +5836,7 @@ class Choose(Op):
class
AllocEmpty
(
gof
.
Op
):
class
AllocEmpty
(
gof
.
Op
):
"""Implement Alloc on the cpu, but without initializing memory."""
"""Implement Alloc on the cpu, but without initializing memory."""
__props__
=
(
"dtype"
,)
__props__
=
(
"dtype"
,)
# specify the type of the data
# specify the type of the data
...
...
theano/tensor/blas.py
浏览文件 @
6304a061
...
@@ -17,10 +17,12 @@ There are four kinds of BLAS Ops in Theano:
...
@@ -17,10 +17,12 @@ There are four kinds of BLAS Ops in Theano:
- C-based (blas_c)
- C-based (blas_c)
- CUDA-based (theano.sandbox.cuda.blas)
- CUDA-based (theano.sandbox.cuda.blas)
:note: Unfortunately (because it's confusing) this file currently contains Ops
Notes
that contain both Python and C versions. I think it would be better to
-----
move the C implementations to blas_c so that this file is pure Python.
Unfortunately (because it's confusing) this file currently contains Ops
-JB
that contain both Python and C versions. I think it would be better to
move the C implementations to blas_c so that this file is pure Python.
-JB
Ops
Ops
...
@@ -121,7 +123,6 @@ Specialize Gemm to Gemv
...
@@ -121,7 +123,6 @@ Specialize Gemm to Gemv
If arguments to GEMM are dimshuffled vectors, then we can use GEMV
If arguments to GEMM are dimshuffled vectors, then we can use GEMV
instead. This optimization is `local_gemm_to_gemv`.
instead. This optimization is `local_gemm_to_gemv`.
"""
"""
from
__future__
import
print_function
from
__future__
import
print_function
import
copy
import
copy
...
@@ -359,7 +360,9 @@ class Gemv(Op):
...
@@ -359,7 +360,9 @@ class Gemv(Op):
x, y are vectors
x, y are vectors
alpha, beta are scalars
alpha, beta are scalars
output is a vector that can be inplace on y
output is a vector that can be inplace on y
"""
"""
__props__
=
(
"inplace"
,)
__props__
=
(
"inplace"
,)
def
__init__
(
self
,
inplace
):
def
__init__
(
self
,
inplace
):
...
@@ -443,12 +446,13 @@ class Ger(Op):
...
@@ -443,12 +446,13 @@ class Ger(Op):
for matrix A, scalar alpha, vectors x and y.
for matrix A, scalar alpha, vectors x and y.
This interface to GER allows non-destructive operation on A via the
This interface to GER allows non-destructive operation on A via the
`destructive`
`destructive` argument to the constructor.
argument to the constructor.
:TODO: Create better classes ScipyGer and CGer that inherit from this class
:TODO: Create better classes ScipyGer and CGer that inherit from this class
and override the make_thunk() method to use Scipy and C respectively.
and override the make_thunk() method to use Scipy and C respectively.
"""
"""
__props__
=
(
"destructive"
,)
__props__
=
(
"destructive"
,)
def
__init__
(
self
,
destructive
):
def
__init__
(
self
,
destructive
):
...
@@ -508,16 +512,22 @@ def ldflags(libs=True, flags=False, libs_dir=False, include_dir=False):
...
@@ -508,16 +512,22 @@ def ldflags(libs=True, flags=False, libs_dir=False, include_dir=False):
It returns a list of libraries against which an Op's object file
It returns a list of libraries against which an Op's object file
should be linked to benefit from a BLAS implementation.
should be linked to benefit from a BLAS implementation.
:type libs: bool, defaults to True
Parameters
:param libs: extract flags starting with "-l"
----------
:type libs_dir: bool, defaults to False
libs : bool, optional
:param libs_dir: extract flags starting with "-L"
Extract flags starting with "-l" (the default is True).
:type include_dir: bool, defaults to False
libs_dir : bool, optional
:param include_dir: extract flags starting with "-I"
Extract flags starting with "-L" (the default is False).
:type flags: bool, defaults to False
include_dir : bool, optional
:param flags: extract all the other flags
Extract flags starting with "-I" (the default is False).
:rtype: list of strings
flags: bool, optional
:returns: extracted flags
Extract all the other flags (the default is False).
Returns
-------
list of strings
Extracted flags.
"""
"""
ldflags_str
=
theano
.
config
.
blas
.
ldflags
ldflags_str
=
theano
.
config
.
blas
.
ldflags
return
_ldflags
(
ldflags_str
=
ldflags_str
,
return
_ldflags
(
ldflags_str
=
ldflags_str
,
...
@@ -533,19 +543,25 @@ def _ldflags(ldflags_str, libs, flags, libs_dir, include_dir):
...
@@ -533,19 +543,25 @@ def _ldflags(ldflags_str, libs, flags, libs_dir, include_dir):
Depending on the options, different type of flags will be kept.
Depending on the options, different type of flags will be kept.
:type ldflags_str: string
Parameters
:param ldflags_str: the string to process. Typically, this will
----------
be the content of `theano.config.blas.ldflags`
ldflags_str : string
:type libs: bool
The string to process. Typically, this will be the content of
:param libs: extract flags starting with "-l"
`theano.config.blas.ldflags`.
:type libs_dir: bool
libs : bool
:param libs_dir: extract flags starting with "-L"
Extract flags starting with "-l".
:type include_dir: bool
flags: bool
:param include_dir: extract flags starting with "-I"
Extract all the other flags.
:type flags: bool
libs_dir: bool
:param flags: extract all the other flags
Extract flags starting with "-L".
:rtype: list of strings
include_dir: bool
:returns: extracted flags
Extract flags starting with "-I".
Returns
-------
list of strings
Extracted flags.
"""
"""
rval
=
[]
rval
=
[]
if
libs_dir
:
if
libs_dir
:
...
@@ -598,10 +614,12 @@ def _ldflags(ldflags_str, libs, flags, libs_dir, include_dir):
...
@@ -598,10 +614,12 @@ def _ldflags(ldflags_str, libs, flags, libs_dir, include_dir):
class
GemmRelated
(
Op
):
class
GemmRelated
(
Op
):
"""Base class for Gemm and Dot22
"""Base class for Gemm and Dot22
.
This class provides a kind of templated gemm Op.
This class provides a kind of templated gemm Op.
"""
"""
__props__
=
()
__props__
=
()
def
c_support_code
(
self
):
def
c_support_code
(
self
):
...
@@ -915,7 +933,7 @@ class GemmRelated(Op):
...
@@ -915,7 +933,7 @@ class GemmRelated(Op):
class
Gemm
(
GemmRelated
):
class
Gemm
(
GemmRelated
):
"""In-place version of matrix-matrix multiplication (with accumulation)
:
"""In-place version of matrix-matrix multiplication (with accumulation)
.
When a and b are scalars and x, y, and z are matrices, then
When a and b are scalars and x, y, and z are matrices, then
...
@@ -936,6 +954,7 @@ class Gemm(GemmRelated):
...
@@ -936,6 +954,7 @@ class Gemm(GemmRelated):
optimized linear algebra operations.)
optimized linear algebra operations.)
"""
"""
E_rank
=
'gemm only works for rank 2'
E_rank
=
'gemm only works for rank 2'
E_scalar
=
'gemm requires scalar argument'
E_scalar
=
'gemm requires scalar argument'
E_z_uniq
=
'argument z aliased to x or y'
# TODO: justify / delete this
E_z_uniq
=
'argument z aliased to x or y'
# TODO: justify / delete this
...
@@ -1430,9 +1449,10 @@ def _factor_canonicalized(lst):
...
@@ -1430,9 +1449,10 @@ def _factor_canonicalized(lst):
def
_gemm_from_factored_list
(
lst
):
def
_gemm_from_factored_list
(
lst
):
"""Returns None, or a list to replace node.outputs
"""
"""
Returns None, or a list to replace node.outputs.
"""
lst2
=
[]
lst2
=
[]
# Remove the tuple that can't be cast correctly.
# Remove the tuple that can't be cast correctly.
# This can happen when we try to cast a complex to a real
# This can happen when we try to cast a complex to a real
...
@@ -1524,7 +1544,7 @@ def _gemm_from_node2(node):
...
@@ -1524,7 +1544,7 @@ def _gemm_from_node2(node):
class
GemmOptimizer
(
Optimizer
):
class
GemmOptimizer
(
Optimizer
):
"""Graph optimizer for inserting Gemm operations"""
"""Graph optimizer for inserting Gemm operations
.
"""
def
__init__
(
self
):
def
__init__
(
self
):
Optimizer
.
__init__
(
self
)
Optimizer
.
__init__
(
self
)
self
.
warned
=
False
self
.
warned
=
False
...
@@ -1645,8 +1665,11 @@ class GemmOptimizer(Optimizer):
...
@@ -1645,8 +1665,11 @@ class GemmOptimizer(Optimizer):
class
Dot22
(
GemmRelated
):
class
Dot22
(
GemmRelated
):
"""Compute a matrix-matrix product.
"""Compute a matrix-matrix product.
This is a specialization of the more general Dot()
This is a specialization of the more general Dot().
"""
"""
def
make_node
(
self
,
x
,
y
):
def
make_node
(
self
,
x
,
y
):
dtypes
=
(
'float32'
,
'float64'
,
'complex64'
,
'complex128'
)
dtypes
=
(
'float32'
,
'float64'
,
'complex64'
,
'complex128'
)
if
x
.
type
.
ndim
!=
2
or
x
.
type
.
dtype
not
in
dtypes
:
if
x
.
type
.
ndim
!=
2
or
x
.
type
.
dtype
not
in
dtypes
:
...
@@ -1780,8 +1803,7 @@ def local_inplace_ger(node):
...
@@ -1780,8 +1803,7 @@ def local_inplace_ger(node):
@local_optimizer
([
gemm_no_inplace
])
@local_optimizer
([
gemm_no_inplace
])
def
local_gemm_to_gemv
(
node
):
def
local_gemm_to_gemv
(
node
):
"""GEMM acting on row or column matrices -> GEMV
"""GEMM acting on row or column matrices -> GEMV."""
"""
if
node
.
op
==
gemm_no_inplace
:
if
node
.
op
==
gemm_no_inplace
:
z
,
a
,
x
,
y
,
b
=
node
.
inputs
z
,
a
,
x
,
y
,
b
=
node
.
inputs
if
z
.
broadcastable
==
x
.
broadcastable
==
(
True
,
False
):
if
z
.
broadcastable
==
x
.
broadcastable
==
(
True
,
False
):
...
@@ -1794,8 +1816,7 @@ def local_gemm_to_gemv(node):
...
@@ -1794,8 +1816,7 @@ def local_gemm_to_gemv(node):
@local_optimizer
([
gemm_no_inplace
])
@local_optimizer
([
gemm_no_inplace
])
def
local_gemm_to_ger
(
node
):
def
local_gemm_to_ger
(
node
):
"""GEMM computing an outer-product -> GER
"""GEMM computing an outer-product -> GER."""
"""
if
node
.
op
==
gemm_no_inplace
:
if
node
.
op
==
gemm_no_inplace
:
z
,
a
,
x
,
y
,
b
=
node
.
inputs
z
,
a
,
x
,
y
,
b
=
node
.
inputs
if
x
.
broadcastable
[
1
]
and
y
.
broadcastable
[
0
]:
if
x
.
broadcastable
[
1
]
and
y
.
broadcastable
[
0
]:
...
@@ -1825,8 +1846,7 @@ def local_gemm_to_ger(node):
...
@@ -1825,8 +1846,7 @@ def local_gemm_to_ger(node):
# working
# working
@local_optimizer
([
_dot22
])
@local_optimizer
([
_dot22
])
def
local_dot22_to_ger_or_gemv
(
node
):
def
local_dot22_to_ger_or_gemv
(
node
):
"""dot22 computing an outer-product -> GER
"""dot22 computing an outer-product -> GER."""
"""
if
node
.
op
==
_dot22
:
if
node
.
op
==
_dot22
:
x
,
y
=
node
.
inputs
x
,
y
=
node
.
inputs
xb
=
x
.
broadcastable
xb
=
x
.
broadcastable
...
@@ -1904,11 +1924,14 @@ optdb.register('InplaceBlasOpt',
...
@@ -1904,11 +1924,14 @@ optdb.register('InplaceBlasOpt',
class
Dot22Scalar
(
GemmRelated
):
class
Dot22Scalar
(
GemmRelated
):
"""Compute a matrix-matrix product.
"""Compute a matrix-matrix product.
This is a specialization of the more general Dot()
This is a specialization of the more general Dot()
Used to call optimized gemm implementation.
Used to call optimized gemm implementation.
Also used to generate a gemm later.
Also used to generate a gemm later.
compute scalar*dot(x,y)
compute scalar*dot(x,y).
"""
"""
def
make_node
(
self
,
x
,
y
,
a
):
def
make_node
(
self
,
x
,
y
,
a
):
if
a
.
ndim
!=
0
:
if
a
.
ndim
!=
0
:
raise
TypeError
(
Gemm
.
E_scalar
,
a
)
raise
TypeError
(
Gemm
.
E_scalar
,
a
)
...
@@ -1996,25 +2019,27 @@ _dot22scalar = Dot22Scalar()
...
@@ -1996,25 +2019,27 @@ _dot22scalar = Dot22Scalar()
@local_optimizer
([
T
.
mul
])
@local_optimizer
([
T
.
mul
])
def
local_dot22_to_dot22scalar
(
node
):
def
local_dot22_to_dot22scalar
(
node
):
"""
"""
:note: Previous attempts to alter this optimization to replace dot22 with
Notes
gemm instead of dot22scalar resulted in some Scan nodes being
-----
duplicated and the ScanSaveMem optimization never running on them,
Previous attempts to alter this optimization to replace dot22 with
resulting in highly increased memory usage. Until this issue is
gemm instead of dot22scalar resulted in some Scan nodes being
resolved, this optimization should keep using dot22scalar instead of
duplicated and the ScanSaveMem optimization never running on them,
gemm.
resulting in highly increased memory usage. Until this issue is
resolved, this optimization should keep using dot22scalar instead of
:note: we upcast the scalar if after the multiplication with the
gemm.
dot this give the same type.
We upcast the scalar if after the multiplication with the dot this give
.. note: We execute this optimizer after the gemm optimizer. This
the same type.
allow to give more priority to gemm that give more speed up
then this optimizer, but allow the gemm optimizer to ignore
We execute this optimizer after the gemm optimizer. This
this op.
allow to give more priority to gemm that give more speed up
then this optimizer, but allow the gemm optimizer to ignore
this op.
TODO: support when we can reorder the mul to generate a
TODO: support when we can reorder the mul to generate a
dot22scalar or fix the canonizer to merge them(1 mul with multiple
dot22scalar or fix the canonizer to merge them(1 mul with multiple
inputs)
inputs)
"""
"""
if
node
.
op
!=
T
.
mul
:
if
node
.
op
!=
T
.
mul
:
return
False
return
False
...
@@ -2102,7 +2127,6 @@ def local_dot22_to_dot22scalar(node):
...
@@ -2102,7 +2127,6 @@ def local_dot22_to_dot22scalar(node):
return
[
T
.
mul
(
_dot22scalar
(
d
.
owner
.
inputs
[
0
],
return
[
T
.
mul
(
_dot22scalar
(
d
.
owner
.
inputs
[
0
],
d
.
owner
.
inputs
[
1
],
a
),
*
o
)]
d
.
owner
.
inputs
[
1
],
a
),
*
o
)]
# must happen after gemm as the gemm optimizer don't understant
# must happen after gemm as the gemm optimizer don't understant
# dot22scalar and gemm give more speed up then dot22scalar
# dot22scalar and gemm give more speed up then dot22scalar
blas_optdb
.
register
(
'local_dot22_to_dot22scalar'
,
blas_optdb
.
register
(
'local_dot22_to_dot22scalar'
,
...
...
theano/tensor/blas_headers.py
浏览文件 @
6304a061
""" Header text for the C and Fortran BLAS interfaces.
""" Header text for the C and Fortran BLAS interfaces.
There is no standard name or location for this header, so we just insert it
There is no standard name or location for this header, so we just insert it
ourselves into the C code
ourselves into the C code.
"""
"""
import
logging
import
logging
import
textwrap
import
textwrap
...
@@ -32,6 +33,7 @@ def detect_macos_sdot_bug():
...
@@ -32,6 +33,7 @@ def detect_macos_sdot_bug():
detected. Its value is returned by the function
detected. Its value is returned by the function
- detect_macos_sdot_bug.fix_works will be set to True if the fix was
- detect_macos_sdot_bug.fix_works will be set to True if the fix was
attempted, and succeeded.
attempted, and succeeded.
"""
"""
_logger
.
debug
(
'Starting detection of bug in Mac OS BLAS sdot_ routine'
)
_logger
.
debug
(
'Starting detection of bug in Mac OS BLAS sdot_ routine'
)
if
detect_macos_sdot_bug
.
tested
:
if
detect_macos_sdot_bug
.
tested
:
...
...
theano/tensor/elemwise.py
浏览文件 @
6304a061
...
@@ -62,67 +62,70 @@ class DimShuffle(Op):
...
@@ -62,67 +62,70 @@ class DimShuffle(Op):
dimension and a numerical index represents the dimension of the same
dimension and a numerical index represents the dimension of the same
rank in the tensor passed to perform.
rank in the tensor passed to perform.
Examples:
Parameters
DimShuffle((False, False, False), ['x', 2, 'x', 0, 1])
----------
input_broadcastable
This op will only work on 3d tensors with no broadcastable
The expected broadcastable pattern of the input
dimensions. The first dimension will be broadcastable,
new_order
then we will have the third dimension of the input tensor as
A list representing the relationship between the input's
the second of the resulting tensor, etc. If the tensor has
dimensions and the output's dimensions. Each element of the
shape (20, 30, 40), the resulting tensor will have dimensions
list can either be an index or 'x'. Indices must be encoded
(1, 40, 1, 20, 30). (AxBxC tensor is mapped to 1xCx1xAxB tensor)
as python integers, not theano symbolic integers.
inplace : bool, optional
DimShuffle((True, False), [1])
If True, the output will be a view of the input.
If False (default), the output will be a copy of the input.
This op will only work on 2d tensors with the first dimension
broadcastable.
If j = new_order[i] is an index, the output's ith dimension
The second dimension of the input tensor will be the first dimension of
will be the input's jth dimension.
the resulting tensor.
If new_order[i] is 'x', the output's ith dimension will
If the tensor has shape (1, 20), the resulting tensor will have shape
be 1 and Broadcast operations will be allowed to do broadcasting
(20, ).
over that dimension.
More examples:
If input.broadcastable[i] == False then i must be found in new_order.
DimShuffle((), ['x']) -> make a 0d (scalar) into a 1d vector
Broadcastable dimensions, on the other hand, can be discarded.
DimShuffle((False, False), [0, 1]) -> identity
DimShuffle((False, False), [1, 0]) -> inverts the 1st and 2nd dimensions
Extended Summary
DimShuffle((False,), ['x', 0]) -> make a row out
----------------
of a 1d vector (N to 1xN)
DimShuffle((False, False, False), ['x', 2, 'x', 0, 1])
DimShuffle((False,), [0, 'x']) -> make a column
out of a 1d vector (N to Nx1)
This op will only work on 3d tensors with no broadcastable
DimShuffle((False, False, False), [2, 0, 1]) -> AxBxC to CxAxB
dimensions. The first dimension will be broadcastable,
DimShuffle((False, False), [0, 'x', 1]) -> AxB to Ax1xB
then we will have the third dimension of the input tensor as
DimShuffle((False, False), [1, 'x', 0]) -> AxB to Bx1xA
the second of the resulting tensor, etc. If the tensor has
shape (20, 30, 40), the resulting tensor will have dimensions
(1, 40, 1, 20, 30). (AxBxC tensor is mapped to 1xCx1xAxB tensor)
DimShuffle((True, False), [1])
This op will only work on 2d tensors with the first dimension
broadcastable.
The second dimension of the input tensor will be the first dimension of
the resulting tensor.
If the tensor has shape (1, 20), the resulting tensor will have shape
(20, ).
More examples :
DimShuffle((), ['x']) -> make a 0d (scalar) into a 1d vector
DimShuffle((False, False), [0, 1]) -> identity
DimShuffle((False, False), [1, 0]) -> inverts the 1st and 2nd dimensions
DimShuffle((False,), ['x', 0]) -> make a row out
of a 1d vector (N to 1xN)
DimShuffle((False,), [0, 'x']) -> make a column
out of a 1d vector (N to Nx1)
DimShuffle((False, False, False), [2, 0, 1]) -> AxBxC to CxAxB
DimShuffle((False, False), [0, 'x', 1]) -> AxB to Ax1xB
DimShuffle((False, False), [1, 'x', 0]) -> AxB to Bx1xA
The reordering of the dimensions can be done in numpy with the
The reordering of the dimensions can be done in numpy with the
transpose function.
transpose function.
Adding, subtracting dimensions can be done with reshape.
Adding, subtracting dimensions can be done with reshape.
"""
"""
_f16_ok
=
True
_f16_ok
=
True
check_input
=
False
check_input
=
False
def
__init__
(
self
,
input_broadcastable
,
new_order
,
inplace
=
False
):
def
__init__
(
self
,
input_broadcastable
,
new_order
,
inplace
=
False
):
"""
Usage: DimShuffle(input_broadcastable, new_order, inplace = False)
- input_broadcastable: the expected broadcastable pattern of the
input
- new_order: a list representing the relationship between the
input's dimensions and the output's dimensions. Each
element of the list can either be an index or 'x'.
Indices must be encoded as python integers, not
theano symbolic integers.
- inplace: if True, the output will be a view of the input.
If False, the output will be a copy of the input.
If j = new_order[i] is an index, the output's ith dimension
will be the input's jth dimension.
If new_order[i] is 'x', the output's ith dimension will
be 1 and Broadcast operations will be allowed to do broadcasting
over that dimension.
If input.broadcastable[i] == False then i must be found in new_order.
Broadcastable dimensions, on the other hand, can be discarded.
"""
input_broadcastable
=
tuple
(
input_broadcastable
)
input_broadcastable
=
tuple
(
input_broadcastable
)
self
.
input_broadcastable
=
input_broadcastable
self
.
input_broadcastable
=
input_broadcastable
new_order
=
tuple
(
new_order
)
new_order
=
tuple
(
new_order
)
...
@@ -456,36 +459,40 @@ class Elemwise(OpenMPOp):
...
@@ -456,36 +459,40 @@ class Elemwise(OpenMPOp):
be the same as the corresponding input type (see the doc of
be the same as the corresponding input type (see the doc of
scalar.ScalarOp to get help about controlling the output type)
scalar.ScalarOp to get help about controlling the output type)
Examples:
Parameters
Elemwise(add) # represents + on tensors (x + y)
-----------
Elemwise(add, {0 : 0}) # represents the += operation (x += y)
scalar_op
Elemwise(add, {0 : 1}) # represents += on the second argument (y += x)
An instance of a subclass of scalar.ScalarOp which works uniquely
Elemwise(mul)(rand(10, 5), rand(1, 5)) # the second input is completed
on scalars.
# along the first dimension to match the first input
inplace_pattern
Elemwise(true_div)(rand(10, 5), rand(10, 1)) # same but along the
A dictionary that maps the index of an output to the
# second dimension
index of an input so the output is calculated inplace using
Elemwise(int_div)(rand(1, 5), rand(10, 1)) # the output has size (10, 5)
the input's storage. (Just like destroymap, but without the lists.)
Elemwise(log)(rand(3, 4, 5))
nfunc_spec
Either None or a tuple of three elements,
(nfunc_name, nin, nout) such that getattr(numpy, nfunc_name)
implements this operation, takes nin inputs and nout outputs.
Note that nin cannot always be inferred from the scalar op's
own nin field because that value is sometimes 0 (meaning a
variable number of inputs), whereas the numpy function may
not have varargs.
Examples
--------
Elemwise(add) # represents + on tensors (x + y)
Elemwise(add, {0 : 0}) # represents the += operation (x += y)
Elemwise(add, {0 : 1}) # represents += on the second argument (y += x)
Elemwise(mul)(rand(10, 5), rand(1, 5)) # the second input is completed
# along the first dimension to match the first input
Elemwise(true_div)(rand(10, 5), rand(10, 1)) # same but along the
# second dimension
Elemwise(int_div)(rand(1, 5), rand(10, 1)) # the output has size (10, 5)
Elemwise(log)(rand(3, 4, 5))
"""
"""
def
__init__
(
self
,
scalar_op
,
inplace_pattern
=
None
,
name
=
None
,
def
__init__
(
self
,
scalar_op
,
inplace_pattern
=
None
,
name
=
None
,
nfunc_spec
=
None
,
openmp
=
None
):
nfunc_spec
=
None
,
openmp
=
None
):
"""
Usage: Elemwise(scalar_op, inplace_pattern = {})
* scalar_op: an instance of a subclass of scalar.ScalarOp which works
uniquely on scalars
* inplace_pattern: a dictionary that maps the index of an output to the
index of an input so the output is calculated inplace using
the input's storage. (Just like destroymap, but without the lists.)
* nfunc_spec: either None or a tuple of three elements,
(nfunc_name, nin, nout) such that getattr(numpy, nfunc_name)
implements this operation, takes nin inputs and nout outputs.
Note that nin cannot always be inferred from the scalar op's
own nin field because that value is sometimes 0 (meaning a
variable number of inputs), whereas the numpy function may
not have varargs.
"""
if
inplace_pattern
is
None
:
if
inplace_pattern
is
None
:
inplace_pattern
=
{}
inplace_pattern
=
{}
self
.
name
=
name
self
.
name
=
name
...
@@ -1252,14 +1259,25 @@ class CAReduce(Op):
...
@@ -1252,14 +1259,25 @@ class CAReduce(Op):
dimensions. It will contain the variable of accumulating all values
dimensions. It will contain the variable of accumulating all values
over the reduced dimensions using the specified scalar op.
over the reduced dimensions using the specified scalar op.
Examples:
Parameters
CAReduce(add) -> sum (ie, acts like the numpy sum operation)
----------
CAReduce(mul) -> product
scalar_op
CAReduce(maximum) -> max
A binary scalar op with only one output.
CAReduce(minimum) -> min
It must be commutative and associative.
CAReduce(or_) -> any # not lazy
axis
CAReduce(and_) -> all # not lazy
- The dimension along which we want to reduce
CAReduce(xor) -> a bit at 1 tell that there was an odd number of bit at
- List of dimensions that we want to reduce
- If None, all dimensions are reduced
Examples
--------
CAReduce(add) -> sum (ie, acts like the numpy sum operation)
CAReduce(mul) -> product
CAReduce(maximum) -> max
CAReduce(minimum) -> min
CAReduce(or_) -> any # not lazy
CAReduce(and_) -> all # not lazy
CAReduce(xor) -> a bit at 1 tell that there was an odd number of bit at
that position that where 1.
that position that where 1.
0 it was an even number ...
0 it was an even number ...
...
@@ -1270,18 +1288,10 @@ class CAReduce(Op):
...
@@ -1270,18 +1288,10 @@ class CAReduce(Op):
operation represented by the reduction must be both commutative
operation represented by the reduction must be both commutative
and associative (eg add, multiply, maximum, binary or/and/xor - but not
and associative (eg add, multiply, maximum, binary or/and/xor - but not
subtract, divide or power).
subtract, divide or power).
"""
"""
def
__init__
(
self
,
scalar_op
,
axis
=
None
):
def
__init__
(
self
,
scalar_op
,
axis
=
None
):
"""
Usage: CAReduce(scalar_op, axis = None)
* scalar_op: a binary scalar op with only one output.
It must be commutative and associative.
* axis: - the dimension along which we want to reduce
- list of dimensions that we want to reduce
- if None, all dimensions are reduced
"""
if
scalar_op
.
nin
not
in
[
-
1
,
2
]
or
scalar_op
.
nout
!=
1
:
if
scalar_op
.
nin
not
in
[
-
1
,
2
]
or
scalar_op
.
nout
!=
1
:
raise
NotImplementedError
((
raise
NotImplementedError
((
"CAReduce only supports binary functions with a single "
"CAReduce only supports binary functions with a single "
...
@@ -1656,8 +1666,10 @@ class All(CAReduce):
...
@@ -1656,8 +1666,10 @@ class All(CAReduce):
""" Applies `bitwise and` to all the values of a tensor along the
""" Applies `bitwise and` to all the values of a tensor along the
specified axis(es).
specified axis(es).
Equivalent to CAReduce(scalar.and_, axis=axis)
Equivalent to CAReduce(scalar.and_, axis=axis).
"""
"""
def
__init__
(
self
,
axis
=
None
):
def
__init__
(
self
,
axis
=
None
):
CAReduce
.
__init__
(
self
,
scalar
.
and_
,
axis
)
CAReduce
.
__init__
(
self
,
scalar
.
and_
,
axis
)
...
@@ -1686,8 +1698,10 @@ class Any(CAReduce):
...
@@ -1686,8 +1698,10 @@ class Any(CAReduce):
""" Applies `bitwise or` to all the values of a tensor along the
""" Applies `bitwise or` to all the values of a tensor along the
specified axis(es).
specified axis(es).
Equivalent to CAReduce(scalar.or_, axis=axis)
Equivalent to CAReduce(scalar.or_, axis=axis).
"""
"""
def
__init__
(
self
,
axis
=
None
):
def
__init__
(
self
,
axis
=
None
):
CAReduce
.
__init__
(
self
,
scalar
.
or_
,
axis
)
CAReduce
.
__init__
(
self
,
scalar
.
or_
,
axis
)
...
@@ -1727,40 +1741,42 @@ class CAReduceDtype(CAReduce):
...
@@ -1727,40 +1741,42 @@ class CAReduceDtype(CAReduce):
If no dtype is provided, one will be inferred so as not to lose
If no dtype is provided, one will be inferred so as not to lose
too much precision.
too much precision.
Parameters
----------
scalar_op
A binary scalar op with only one output.
It must be commutative and associative.
axis
- the dimension along which we want to reduce
- list of dimensions that we want to reduce
- if None, all dimensions are reduced
dtype
The dtype of the returned tensor. If None, then we use the default
dtype which is the same as the input tensor's dtype except when:
- the input dtype is a signed integer of precision < 64 bit, in
which case we use int64
- the input dtype is an unsigned integer of precision < 64 bit, in
which case we use uint64
This default dtype does _not_ depend on the value of "acc_dtype".
This behavior is similar in spirit to that of numpy (except numpy
uses the default machine integer while we always use 64 bit
integers to avoid platform-dependent behavior).
acc_dtype
The dtype of the internal accumulator.
If None (default), we use the dtype in the list below,
or the input dtype if its precision is higher:
- for int dtypes, we use at least int64;
- for uint dtypes, we use at least uint64;
- for float dtypes, we use at least float64;
- for complex dtypes, we use at least complex128.
"""
"""
def
__init__
(
self
,
scalar_op
,
axis
=
None
,
dtype
=
None
,
acc_dtype
=
None
):
def
__init__
(
self
,
scalar_op
,
axis
=
None
,
dtype
=
None
,
acc_dtype
=
None
):
"""
Usage: CAReduceDtype(scalar_op, axis=None, dtype=None, acc_dtype=None)
:param scalar_op: a binary scalar op with only one output.
It must be commutative and associative.
:param axis: - the dimension along which we want to reduce
- list of dimensions that we want to reduce
- if None, all dimensions are reduced
:param dtype: The dtype of the returned
tensor. If None, then we use the default dtype which is the same
as the input tensor's dtype except when:
- the input dtype is a signed integer of precision < 64 bit, in
which case we use int64
- the input dtype is an unsigned integer of precision < 64 bit, in
which case we use uint64
This default dtype does _not_ depend on the value of "acc_dtype".
This behavior is similar in spirit to that of numpy (except numpy
uses the default machine integer while we always use 64 bit
integers to avoid platform-dependent behavior).
:param acc_dtype: The dtype of the internal accumulator.
If None (default), we use the dtype in the list below,
or the input dtype if its precision is higher:
- for int dtypes, we use at least int64;
- for uint dtypes, we use at least uint64;
- for float dtypes, we use at least float64;
- for complex dtypes, we use at least complex128.
"""
CAReduce
.
__init__
(
self
,
scalar_op
,
axis
=
axis
)
CAReduce
.
__init__
(
self
,
scalar_op
,
axis
=
axis
)
self
.
dtype
=
dtype
self
.
dtype
=
dtype
self
.
acc_dtype
=
acc_dtype
self
.
acc_dtype
=
acc_dtype
...
@@ -1888,33 +1904,36 @@ class Sum(CAReduceDtype):
...
@@ -1888,33 +1904,36 @@ class Sum(CAReduceDtype):
Equivalent to CAReduceDtype(scalar.add, axis=axis, dtype=dtype),
Equivalent to CAReduceDtype(scalar.add, axis=axis, dtype=dtype),
with the difference that this defines the gradient of sum wrt its
with the difference that this defines the gradient of sum wrt its
tensor input.
tensor input.
"""
def
__init__
(
self
,
axis
=
None
,
dtype
=
None
,
acc_dtype
=
None
):
Parameters
"""
----------
Constructor.
axis
Axis(es) along which the tensor should be summed
:param axis: Axis(es) along which the tensor should be summed
(use None to sum over all axes, and a list or tuple to sum along more
(use None to sum over all axes, and a list or tuple to sum along more
than one axis).
than one axis).
:param dtype: The dtype of the internal accumulator and returned
dtype
The dtype of the internal accumulator and returned
tensor. If None, then we use the default dtype which is the same as the
tensor. If None, then we use the default dtype which is the same as the
input tensor's dtype except when:
input tensor's dtype except when:
- the input dtype is a signed integer of precision < 64 bit, in
- the input dtype is a signed integer of precision < 64 bit, in
which case we use int64
which case we use int64
- the input dtype is an unsigned integer of precision < 64 bit, in
- the input dtype is an unsigned integer of precision < 64 bit, in
which case we use uint64
which case we use uint64
This value does not depend on the value of "acc_dtype".
This value does not depend on the value of "acc_dtype".
:param acc_dtype: The dtype of the internal accumulator.
acc_dtype
If None (default), we use the dtype in the list below,
The dtype of the internal accumulator.
or the input dtype if its precision is higher:
If None (default), we use the dtype in the list below,
- for int dtypes, we use at least int64;
or the input dtype if its precision is higher:
- for uint dtypes, we use at least uint64;
- for int dtypes, we use at least int64;
- for float dtypes, we use at least float64;
- for uint dtypes, we use at least uint64;
- for complex dtypes, we use at least complex128.
- for float dtypes, we use at least float64;
"""
- for complex dtypes, we use at least complex128.
"""
def
__init__
(
self
,
axis
=
None
,
dtype
=
None
,
acc_dtype
=
None
):
CAReduceDtype
.
__init__
(
self
,
scalar
.
add
,
axis
=
axis
,
CAReduceDtype
.
__init__
(
self
,
scalar
.
add
,
axis
=
axis
,
dtype
=
dtype
,
acc_dtype
=
acc_dtype
)
dtype
=
dtype
,
acc_dtype
=
acc_dtype
)
...
@@ -1960,7 +1979,9 @@ class Prod(CAReduceDtype):
...
@@ -1960,7 +1979,9 @@ class Prod(CAReduceDtype):
Equivalent to CAReduce(scalar.prod, axis = axis), with the
Equivalent to CAReduce(scalar.prod, axis = axis), with the
difference that this defines the gradient of prod wrt its tensor
difference that this defines the gradient of prod wrt its tensor
input.
input.
"""
"""
def
__init__
(
self
,
axis
=
None
,
dtype
=
None
,
acc_dtype
=
None
,
def
__init__
(
self
,
axis
=
None
,
dtype
=
None
,
acc_dtype
=
None
,
no_zeros_in_input
=
False
):
no_zeros_in_input
=
False
):
CAReduceDtype
.
__init__
(
self
,
scalar
.
mul
,
axis
=
axis
,
CAReduceDtype
.
__init__
(
self
,
scalar
.
mul
,
axis
=
axis
,
...
@@ -1982,7 +2003,7 @@ class Prod(CAReduceDtype):
...
@@ -1982,7 +2003,7 @@ class Prod(CAReduceDtype):
hash
(
self
.
no_zeros_in_input
))
hash
(
self
.
no_zeros_in_input
))
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
'''
"""
The grad of this Op could be very easy, if it is was not for the case
The grad of this Op could be very easy, if it is was not for the case
where zeros are present in a given "group" (ie. elements reduced
where zeros are present in a given "group" (ie. elements reduced
together to form the product).
together to form the product).
...
@@ -2026,7 +2047,8 @@ class Prod(CAReduceDtype):
...
@@ -2026,7 +2047,8 @@ class Prod(CAReduceDtype):
I do this by first counting the number of zeros in each group (see
I do this by first counting the number of zeros in each group (see
the "T.eq()" bits), then taking this or that behavior (see T.switch)
the "T.eq()" bits), then taking this or that behavior (see T.switch)
based on the result of this count.
based on the result of this count.
'''
"""
prod_in
,
=
inp
prod_in
,
=
inp
gz
,
=
grads
gz
,
=
grads
...
...
theano/tensor/elemwise_cgen.py
浏览文件 @
6304a061
...
@@ -5,8 +5,8 @@ import theano
...
@@ -5,8 +5,8 @@ import theano
def
make_declare
(
loop_orders
,
dtypes
,
sub
):
def
make_declare
(
loop_orders
,
dtypes
,
sub
):
"""
"""
Produce code to declare all necessary variables.
Produce code to declare all necessary variables.
"""
"""
decl
=
""
decl
=
""
for
i
,
(
loop_order
,
dtype
)
in
enumerate
(
zip
(
loop_orders
,
dtypes
)):
for
i
,
(
loop_order
,
dtype
)
in
enumerate
(
zip
(
loop_orders
,
dtypes
)):
var
=
sub
[
'lv
%
i'
%
i
]
# input name corresponding to ith loop variable
var
=
sub
[
'lv
%
i'
%
i
]
# input name corresponding to ith loop variable
...
@@ -117,8 +117,11 @@ def make_checks(loop_orders, dtypes, sub):
...
@@ -117,8 +117,11 @@ def make_checks(loop_orders, dtypes, sub):
def
make_alloc
(
loop_orders
,
dtype
,
sub
,
fortran
=
'0'
):
def
make_alloc
(
loop_orders
,
dtype
,
sub
,
fortran
=
'0'
):
"""Generate C code to allocate outputs.
"""Generate C code to allocate outputs.
:param fortran: a string included in the generated code. If it
Parameters
evaludate to non-zero, an ndarray in fortran order will be
----------
fortran : str
A string included in the generated code. If it
evaluate to non-zero, an ndarray in fortran order will be
created, otherwise it will be c order.
created, otherwise it will be c order.
"""
"""
...
@@ -179,25 +182,24 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub, openmp=None):
...
@@ -179,25 +182,24 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub, openmp=None):
Make a nested loop over several arrays and associate specific code
Make a nested loop over several arrays and associate specific code
to each level of nesting.
to each level of nesting.
@type loop_orders: list of N tuples of length M.
Parameters
@param loop_orders: Each value of each
----------
tuple can be either the index of a dimension to loop over or
loop_orders : list of N tuples of length M
the letter 'x' which means there is no looping to be done
Each value of each tuple can be either the index of a dimension to
over that variable at that point (in other words we broadcast
loop over or the letter 'x' which means there is no looping to be done
over that dimension). If an entry is an integer, it will become
over that variable at that point (in other words we broadcast
an alias of the entry of that rank.
over that dimension). If an entry is an integer, it will become
an alias of the entry of that rank.
@type loop_tasks: list of M+1 pieces of code.
loop_tasks : list of M+1 pieces of code
@param loop_tasks: The ith loop_task is a pair of strings, the first
The ith loop_task is a pair of strings, the first
string is code to be executed before the ith loop starts, the second
string is code to be executed before the ith loop starts, the second
one contains code to be executed just before going to the next element
one contains code to be executed just before going to the next element
of the ith dimension.
of the ith dimension.
The last element if loop_tasks is a single string, containing code
The last element if loop_tasks is a single string, containing code
to be executed at the very end.
to be executed at the very end.
sub : dictionary
@type sub: a dictionary.
Maps 'lv#' to a suitable variable name.
@param sub: Maps 'lv#' to a suitable variable name.
The 'lvi' variable corresponds to the ith element of loop_orders.
The 'lvi' variable corresponds to the ith element of loop_orders.
"""
"""
def
loop_over
(
preloop
,
code
,
indices
,
i
):
def
loop_over
(
preloop
,
code
,
indices
,
i
):
...
@@ -244,8 +246,9 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub, openmp=None):
...
@@ -244,8 +246,9 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub, openmp=None):
return
"{
%
s}"
%
s
return
"{
%
s}"
%
s
def
make_reordered_loop
(
init_loop_orders
,
olv_index
,
dtypes
,
inner_task
,
sub
,
openmp
=
None
):
def
make_reordered_loop
(
init_loop_orders
,
olv_index
,
dtypes
,
inner_task
,
sub
,
'''A bit like make_loop, but when only the inner-most loop executes code.
openmp
=
None
):
"""A bit like make_loop, but when only the inner-most loop executes code.
All the loops will be reordered so that the loops over the output tensor
All the loops will be reordered so that the loops over the output tensor
are executed with memory access as contiguous as possible.
are executed with memory access as contiguous as possible.
...
@@ -253,7 +256,8 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub, op
...
@@ -253,7 +256,8 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub, op
will be on its rows; if it's f_contiguous, it will be on its columns.
will be on its rows; if it's f_contiguous, it will be on its columns.
The output tensor's index among the loop variables is indicated by olv_index.
The output tensor's index among the loop variables is indicated by olv_index.
'''
"""
# Number of variables
# Number of variables
nvars
=
len
(
init_loop_orders
)
nvars
=
len
(
init_loop_orders
)
...
@@ -338,6 +342,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub, op
...
@@ -338,6 +342,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub, op
Returns a list containing a C expression representing the
Returns a list containing a C expression representing the
stride for each dimension of the ith variable, in the
stride for each dimension of the ith variable, in the
specified loop_order.
specified loop_order.
"""
"""
var
=
sub
[
"lv
%
i"
%
i
]
var
=
sub
[
"lv
%
i"
%
i
]
r
=
[]
r
=
[]
...
@@ -463,25 +468,25 @@ def make_loop_careduce(loop_orders, dtypes, loop_tasks, sub):
...
@@ -463,25 +468,25 @@ def make_loop_careduce(loop_orders, dtypes, loop_tasks, sub):
Make a nested loop over several arrays and associate specific code
Make a nested loop over several arrays and associate specific code
to each level of nesting.
to each level of nesting.
@type loop_orders: list of N tuples of length M.
Parameters
@param loop_orders: Each value of each
----------
tuple can be either the index of a dimension to loop over or
loop_orders : list of N tuples of length M
the letter 'x' which means there is no looping to be done
Each value of each tuple can be either the index of a dimension to
over that variable at that point (in other words we broadcast
loop over or the letter 'x' which means there is no looping to be done
over that dimension). If an entry is an integer, it will become
over that variable at that point (in other words we broadcast
an alias of the entry of that rank.
over that dimension). If an entry is an integer, it will become
an alias of the entry of that rank.
@type loop_tasks: list of M+1 pieces of code.
loop_tasks : list of M+1 pieces of code
@param loop_tasks:
The ith loop_task is a pair of strings, the first
The ith loop_task is a pair of strings, the first
string is code to be executed before the ith loop starts, the second
string is code to be executed before the ith loop starts, the second
one contains code to be executed just before going to the next element
one contains code to be executed just before going to the next element
of the ith dimension.
of the ith dimension.
The last element if loop_tasks is a single string, containing code
The last element if loop_tasks is a single string, containing code
to be executed at the very end.
to be executed at the very end.
sub: dictionary
@type sub: a dictionary
.
Maps 'lv#' to a suitable variable name
.
@param sub: Maps 'lv#' to a suitable variable name
.
The 'lvi' variable corresponds to the ith element of loop_orders
.
The 'lvi' variable corresponds to the ith element of loop_orders.
"""
"""
def
loop_over
(
preloop
,
code
,
indices
,
i
):
def
loop_over
(
preloop
,
code
,
indices
,
i
):
...
...
theano/tensor/extra_ops.py
浏览文件 @
6304a061
...
@@ -14,8 +14,9 @@ tensor = basic
...
@@ -14,8 +14,9 @@ tensor = basic
class
CpuContiguous
(
theano
.
Op
):
class
CpuContiguous
(
theano
.
Op
):
"""
"""
Check to see if the input is c-contiguous,
Check to see if the input is c-contiguous,
if it is, do nothing, else return a contiguous array
if it is, do nothing, else return a contiguous array
.
"""
"""
__props__
=
()
__props__
=
()
view_map
=
{
0
:
[
0
]}
view_map
=
{
0
:
[
0
]}
...
@@ -171,12 +172,16 @@ def cumsum(x, axis=None):
...
@@ -171,12 +172,16 @@ def cumsum(x, axis=None):
Wraping of numpy.cumsum.
Wraping of numpy.cumsum.
:param x: Input tensor variable.
Parameters
----------
:param axis: The axis along which the cumulative sum is computed.
x
Input tensor variable.
axis
The axis along which the cumulative sum is computed.
The default (None) is to compute the cumsum over the flattened array.
The default (None) is to compute the cumsum over the flattened array.
.. versionadded:: 0.7
.. versionadded:: 0.7
"""
"""
return
CumsumOp
(
axis
=
axis
)(
x
)
return
CumsumOp
(
axis
=
axis
)(
x
)
...
@@ -291,18 +296,24 @@ def cumprod(x, axis=None):
...
@@ -291,18 +296,24 @@ def cumprod(x, axis=None):
Wraping of numpy.cumprod.
Wraping of numpy.cumprod.
:param x: Input tensor variable.
Parameters
----------
x
Input tensor variable.
:param axis: The axis along which the cumulative product is computed.
axis
The axis along which the cumulative product is computed.
The default (None) is to compute the cumprod over the flattened array.
The default (None) is to compute the cumprod over the flattened array.
.. versionadded:: 0.7
.. versionadded:: 0.7
"""
"""
return
CumprodOp
(
axis
=
axis
)(
x
)
return
CumprodOp
(
axis
=
axis
)(
x
)
class
DiffOp
(
theano
.
Op
):
class
DiffOp
(
theano
.
Op
):
# See function diff for docstring
# See function diff for docstring
__props__
=
(
"n"
,
"axis"
)
__props__
=
(
"n"
,
"axis"
)
def
__init__
(
self
,
n
=
1
,
axis
=-
1
):
def
__init__
(
self
,
n
=
1
,
axis
=-
1
):
...
@@ -354,23 +365,29 @@ def diff(x, n=1, axis=-1):
...
@@ -354,23 +365,29 @@ def diff(x, n=1, axis=-1):
along the given axis, higher order differences are calculated by
along the given axis, higher order differences are calculated by
using diff recursively. Wraping of numpy.diff.
using diff recursively. Wraping of numpy.diff.
:param x: Input tensor variable.
Parameters
----------
x
Input tensor variable.
:param n: The number of times values are differenced, default is 1.
n
The number of times values are differenced, default is 1.
:param axis: The axis along which the difference is taken,
axis
default is the last axis.
The axis along which the difference is taken,
default is the last axis.
.. versionadded:: 0.6
.. versionadded:: 0.6
"""
"""
return
DiffOp
(
n
=
n
,
axis
=
axis
)(
x
)
return
DiffOp
(
n
=
n
,
axis
=
axis
)(
x
)
class
BinCountOp
(
theano
.
Op
):
class
BinCountOp
(
theano
.
Op
):
"""
"""
DEPRECATED: use bincount() instead.
.. note:: Deprecated
Use bincount() instead.
See function bincount for docstring.
See function bincount for docstring
"""
"""
compatible_type
=
(
'int8'
,
'int16'
,
'int32'
,
'int64'
,
compatible_type
=
(
'int8'
,
'int16'
,
'int32'
,
'int64'
,
'uint8'
,
'uint16'
,
'uint32'
,
'uint64'
)
'uint8'
,
'uint16'
,
'uint32'
,
'uint64'
)
...
@@ -473,17 +490,19 @@ def bincount(x, weights=None, minlength=None, assert_nonneg=False):
...
@@ -473,17 +490,19 @@ def bincount(x, weights=None, minlength=None, assert_nonneg=False):
specified the input array is weighted by it, i.e. if a value n
specified the input array is weighted by it, i.e. if a value n
is found at position i, out[n] += weight[i] instead of out[n] += 1.
is found at position i, out[n] += weight[i] instead of out[n] += 1.
:param x: 1 dimension, nonnegative ints
Parameters
----------
:param weights: array of the same shape as x with corresponding weights.
x : 1 dimension, nonnegative ints
weights : array of the same shape as x with corresponding weights.
Optional.
Optional.
:param minlength
: A minimum number of bins for the output array.
minlength
: A minimum number of bins for the output array.
Optional.
Optional.
:param assert_nonneg
: A flag that inserts an assert_op to check if
assert_nonneg
: A flag that inserts an assert_op to check if
every input x is nonnegative.
every input x is nonnegative.
Optional.
Optional.
.. versionadded:: 0.6
.. versionadded:: 0.6
"""
"""
compatible_type
=
(
'int8'
,
'int16'
,
'int32'
,
'int64'
,
compatible_type
=
(
'int8'
,
'int16'
,
'int32'
,
'int64'
,
'uint8'
,
'uint16'
,
'uint32'
)
'uint8'
,
'uint16'
,
'uint32'
)
...
@@ -520,18 +539,25 @@ def bincount(x, weights=None, minlength=None, assert_nonneg=False):
...
@@ -520,18 +539,25 @@ def bincount(x, weights=None, minlength=None, assert_nonneg=False):
def
squeeze
(
x
):
def
squeeze
(
x
):
"""
Remove broadcastable dimensions from
"""
the shape of an array.
Remove broadcastable dimensions from
the shape of an array.
It returns the input array, but with the
It returns the input array, but with the
broadcastable dimensions removed. This is
broadcastable dimensions removed. This is
always `x` itself or a view into `x`.
always `x` itself or a view into `x`.
:param x: Input data, tensor variable.
.. versionadded:: 0.6
Parameters
----------
x
Input data, tensor variable.
:return: `x` without its broadcastable dimensions.
Returns
-------
object
`x` without its broadcastable dimensions.
.. versionadded:: 0.6
"""
"""
view
=
x
.
dimshuffle
([
i
for
i
in
range
(
x
.
ndim
)
view
=
x
.
dimshuffle
([
i
for
i
in
range
(
x
.
ndim
)
if
not
x
.
broadcastable
[
i
]])
if
not
x
.
broadcastable
[
i
]])
...
@@ -539,20 +565,28 @@ def squeeze(x):
...
@@ -539,20 +565,28 @@ def squeeze(x):
def
compress
(
condition
,
x
,
axis
=
None
):
def
compress
(
condition
,
x
,
axis
=
None
):
"""Return selected slices of an array along given axis.
"""
Return selected slices of an array along given axis.
It returns the input tensor, but with selected slices along a given axis
It returns the input tensor, but with selected slices along a given axis
retained. If no axis is provided, the tensor is flattened
retained. If no axis is provided, the tensor is flattened
.
Corresponds to numpy.compress
Corresponds to numpy.compress
:param x: Input data, tensor variable
.. versionadded:: 0.7
:param condition: 1 dimensional array of non-zero and zero values
Parameters
corresponding to indices of slices along a selected axis
----------
x
Input data, tensor variable.
condition
1 dimensional array of non-zero and zero values
corresponding to indices of slices along a selected axis.
:return: `x` with selected slices
Returns
-------
object
`x` with selected slices.
.. versionadded:: 0.7
"""
"""
indices
=
theano
.
tensor
.
basic
.
flatnonzero
(
condition
)
indices
=
theano
.
tensor
.
basic
.
flatnonzero
(
condition
)
return
x
.
take
(
indices
,
axis
=
axis
)
return
x
.
take
(
indices
,
axis
=
axis
)
...
@@ -560,6 +594,7 @@ def compress(condition, x, axis=None):
...
@@ -560,6 +594,7 @@ def compress(condition, x, axis=None):
class
RepeatOp
(
theano
.
Op
):
class
RepeatOp
(
theano
.
Op
):
# See the repeat function for docstring
# See the repeat function for docstring
__props__
=
(
"axis"
,)
__props__
=
(
"axis"
,)
def
__init__
(
self
,
axis
=
None
):
def
__init__
(
self
,
axis
=
None
):
...
@@ -678,14 +713,19 @@ def repeat(x, repeats, axis=None):
...
@@ -678,14 +713,19 @@ def repeat(x, repeats, axis=None):
The number of repetitions for each element is `repeat`.
The number of repetitions for each element is `repeat`.
`repeats` is broadcasted to fit the length of the given `axis`.
`repeats` is broadcasted to fit the length of the given `axis`.
:param x: Input data, tensor variable.
Parameters
:param repeats: int, scalar or tensor variable.
----------
x
:param axis: int, optional.
Input data, tensor variable.
repeats : int, scalar or tensor variable
axis : int, optional
:see: :func:`tensor.tile <tensor.tile>`
See Also
--------
tensor.tile
.. versionadded:: 0.6
.. versionadded:: 0.6
"""
"""
repeats
=
tensor
.
as_tensor_variable
(
repeats
)
repeats
=
tensor
.
as_tensor_variable
(
repeats
)
...
@@ -763,21 +803,27 @@ bartlett_ = Bartlett()
...
@@ -763,21 +803,27 @@ bartlett_ = Bartlett()
# I create a function only to have the doc show well.
# I create a function only to have the doc show well.
def
bartlett
(
M
):
def
bartlett
(
M
):
"""An instance of this class returns the Bartlett spectral window in the
"""
An instance of this class returns the Bartlett spectral window in the
time-domain. The Bartlett window is very similar to a triangular window,
time-domain. The Bartlett window is very similar to a triangular window,
except that the end points are at zero. It is often used in signal
except that the end points are at zero. It is often used in signal
processing for tapering a signal, without generating too much ripple in
processing for tapering a signal, without generating too much ripple in
the frequency domain.
the frequency domain.
:param M: (integer scalar) Number of points in the output
.. versionadded:: 0.6
window. If zero or less, an empty vector is returned.
:return: (vector of doubles) The triangular window, with the
Parameters
maximum value normalized to one (the value one appears only if
----------
the number of samples is odd), with the first and last samples
M : integer scalar
equal to zero.
Number of points in the output window. If zero or less,
an empty vector is returned.
.. versionadded:: 0.6
Returns
-------
vector of doubles
The triangular window, with the maximum value normalized to one
(the value one appears only if the number of samples is odd), with
the first and last samples equal to zero.
"""
"""
return
bartlett_
(
M
)
return
bartlett_
(
M
)
...
@@ -823,8 +869,10 @@ class FillDiagonal(gof.Op):
...
@@ -823,8 +869,10 @@ class FillDiagonal(gof.Op):
def
grad
(
self
,
inp
,
cost_grad
):
def
grad
(
self
,
inp
,
cost_grad
):
"""
"""
Note: The gradient is currently implemented for matrices
Notes
only.
-----
The gradient is currently implemented for matrices only.
"""
"""
a
,
val
=
inp
a
,
val
=
inp
grad
=
cost_grad
[
0
]
grad
=
cost_grad
[
0
]
...
@@ -843,15 +891,25 @@ fill_diagonal_ = FillDiagonal()
...
@@ -843,15 +891,25 @@ fill_diagonal_ = FillDiagonal()
# I create a function only to have the doc show well.
# I create a function only to have the doc show well.
def
fill_diagonal
(
a
,
val
):
def
fill_diagonal
(
a
,
val
):
""" Returns a copy of an array with all
"""
Returns a copy of an array with all
elements of the main diagonal set to a specified scalar value.
elements of the main diagonal set to a specified scalar value.
:param a: Rectangular array of at least two dimensions.
.. versionadded:: 0.6
:param val: Scalar value to fill the diagonal whose type must be
Parameters
----------
a
Rectangular array of at least two dimensions.
val
Scalar value to fill the diagonal whose type must be
compatible with that of array 'a' (i.e. 'val' cannot be viewed
compatible with that of array 'a' (i.e. 'val' cannot be viewed
as an upcast of 'a').
as an upcast of 'a').
:return: An array identical to 'a' except that its main diagonal
Returns
-------
array
An array identical to 'a' except that its main diagonal
is filled with scalar 'val'. (For an array 'a' with a.ndim >=
is filled with scalar 'val'. (For an array 'a' with a.ndim >=
2, the main diagonal is the list of locations a[i, i, ..., i]
2, the main diagonal is the list of locations a[i, i, ..., i]
(i.e. with indices all identical).)
(i.e. with indices all identical).)
...
@@ -859,7 +917,8 @@ def fill_diagonal(a, val):
...
@@ -859,7 +917,8 @@ def fill_diagonal(a, val):
Support rectangular matrix and tensor with more than 2 dimensions
Support rectangular matrix and tensor with more than 2 dimensions
if the later have all dimensions are equals.
if the later have all dimensions are equals.
.. versionadded:: 0.6
"""
"""
return
fill_diagonal_
(
a
,
val
)
return
fill_diagonal_
(
a
,
val
)
...
@@ -902,13 +961,16 @@ class FillDiagonalOffset(gof.Op):
...
@@ -902,13 +961,16 @@ class FillDiagonalOffset(gof.Op):
height
,
width
=
a
.
shape
height
,
width
=
a
.
shape
"""
"""
Note: The fill_diagonal only support rectangular matrix. The output
Notes
-----
The fill_diagonal only support rectangular matrix. The output
of tall matrix is "wrapped", which is an option in numpy 1.9.0
of tall matrix is "wrapped", which is an option in numpy 1.9.0
but was regarded as a bug in numpy 1.6.2. Here I implement the
but was regarded as a bug in numpy 1.6.2. Here I implement the
fill_diagonal_offset with unwrapped output, so fill_diagonal_offset
fill_diagonal_offset with unwrapped output, so fill_diagonal_offset
supports tall matrix.(This make a little difference between the output
supports tall matrix.(This make a little difference between the output
of fill_diagonal and fill_diagonal_offset only in the case of tall
of fill_diagonal and fill_diagonal_offset only in the case of tall
matrix)
matrix)
"""
"""
if
offset
>=
0
:
if
offset
>=
0
:
start
=
offset
start
=
offset
...
@@ -925,8 +987,9 @@ class FillDiagonalOffset(gof.Op):
...
@@ -925,8 +987,9 @@ class FillDiagonalOffset(gof.Op):
def
grad
(
self
,
inp
,
cost_grad
):
def
grad
(
self
,
inp
,
cost_grad
):
"""
"""
Note: The gradient is currently implemented for matrices
Notes
only.
-----
The gradient is currently implemented for matrices only.
"""
"""
a
,
val
,
offset
=
inp
a
,
val
,
offset
=
inp
grad
=
cost_grad
[
0
]
grad
=
cost_grad
[
0
]
...
@@ -972,31 +1035,49 @@ def fill_diagonal_offset(a, val, offset):
...
@@ -972,31 +1035,49 @@ def fill_diagonal_offset(a, val, offset):
Returns a copy of an array with all
Returns a copy of an array with all
elements of the main diagonal set to a specified scalar value.
elements of the main diagonal set to a specified scalar value.
:param a: Rectangular array of two dimensions.
Parameters
:param val: Scalar value to fill the diagonal whose type must be
----------
compatible with that of array 'a' (i.e. 'val' cannot be viewed
a
as an upcast of 'a').
Rectangular array of two dimensions.
:param offset: Scalar value Offset of the diagonal from the main
val
diagonal. Can be positive or negative integer.
Scalar value to fill the diagonal whose type must be
:return: An array identical to 'a' except that its offset diagonal
compatible with that of array 'a' (i.e. 'val' cannot be viewed
is filled with scalar 'val'. The output is unwrapped.
as an upcast of 'a').
offset
Scalar value Offset of the diagonal from the main
diagonal. Can be positive or negative integer.
Returns
-------
array
An array identical to 'a' except that its offset diagonal
is filled with scalar 'val'. The output is unwrapped.
"""
"""
return
fill_diagonal_offset_
(
a
,
val
,
offset
)
return
fill_diagonal_offset_
(
a
,
val
,
offset
)
def
to_one_hot
(
y
,
nb_class
,
dtype
=
None
):
def
to_one_hot
(
y
,
nb_class
,
dtype
=
None
):
"""Return a matrix where each row correspond to the one hot
"""
Return a matrix where each row correspond to the one hot
encoding of each element in y.
encoding of each element in y.
:param y: A vector of integer value between 0 and nb_class - 1.
Parameters
:param nb_class: The number of class in y.
----------
:param dtype: The dtype of the returned matrix. Default floatX.
y
A vector of integer value between 0 and nb_class - 1.
nb_class : int
The number of class in y.
dtype : data-type
The dtype of the returned matrix. Default floatX.
Returns
-------
object
A matrix of shape (y.shape[0], nb_class), where each row ``i`` is
the one hot encoding of the corresponding ``y[i]`` value.
:return: A matrix of shape (y.shape[0], nb_class), where each
"""
row ``i`` is the one hot encoding of the corresponding ``y[i]``
value.
"""
ret
=
theano
.
tensor
.
zeros
((
y
.
shape
[
0
],
nb_class
),
ret
=
theano
.
tensor
.
zeros
((
y
.
shape
[
0
],
nb_class
),
dtype
=
dtype
)
dtype
=
dtype
)
ret
=
theano
.
tensor
.
set_subtensor
(
ret
[
theano
.
tensor
.
arange
(
y
.
shape
[
0
]),
y
],
ret
=
theano
.
tensor
.
set_subtensor
(
ret
[
theano
.
tensor
.
arange
(
y
.
shape
[
0
]),
y
],
...
@@ -1006,11 +1087,10 @@ def to_one_hot(y, nb_class, dtype=None):
...
@@ -1006,11 +1087,10 @@ def to_one_hot(y, nb_class, dtype=None):
class
Unique
(
theano
.
Op
):
class
Unique
(
theano
.
Op
):
"""
"""
Wraps numpy.unique.
Wraps numpy.unique. This op is not implemented on the GPU.
This op is not implemented on the GPU.
Examples
Examples
========
--------
>>> import numpy as np
>>> import numpy as np
>>> x = theano.tensor.vector()
>>> x = theano.tensor.vector()
...
@@ -1022,7 +1102,9 @@ class Unique(theano.Op):
...
@@ -1022,7 +1102,9 @@ class Unique(theano.Op):
>>> g = theano.function([y], Unique(True, True, False)(y))
>>> g = theano.function([y], Unique(True, True, False)(y))
>>> g([[1, 1, 1.0], (2, 3, 3.0)])
>>> g([[1, 1, 1.0], (2, 3, 3.0)])
[array([ 1., 2., 3.]), array([0, 3, 4]), array([0, 0, 0, 1, 2, 2])]
[array([ 1., 2., 3.]), array([0, 3, 4]), array([0, 0, 0, 1, 2, 2])]
"""
"""
__props__
=
(
"return_index"
,
"return_inverse"
,
"return_counts"
)
__props__
=
(
"return_index"
,
"return_inverse"
,
"return_counts"
)
def
__init__
(
self
,
return_index
=
False
,
return_inverse
=
False
,
def
__init__
(
self
,
return_index
=
False
,
return_inverse
=
False
,
...
...
theano/tensor/io.py
浏览文件 @
6304a061
...
@@ -11,13 +11,18 @@ import theano
...
@@ -11,13 +11,18 @@ import theano
class
LoadFromDisk
(
Op
):
class
LoadFromDisk
(
Op
):
"""
"""
An operation to load an array from disk
An operation to load an array from disk
.
See Also
See Also
load
--------
load
Notes
-----
Non-differentiable.
@note: Non-differentiable.
"""
"""
__props__
=
(
"dtype"
,
"broadcastable"
,
"mmap_mode"
)
__props__
=
(
"dtype"
,
"broadcastable"
,
"mmap_mode"
)
def
__init__
(
self
,
dtype
,
broadcastable
,
mmap_mode
=
None
):
def
__init__
(
self
,
dtype
,
broadcastable
,
mmap_mode
=
None
):
...
@@ -53,18 +58,26 @@ def load(path, dtype, broadcastable, mmap_mode=None):
...
@@ -53,18 +58,26 @@ def load(path, dtype, broadcastable, mmap_mode=None):
"""
"""
Load an array from an .npy file.
Load an array from an .npy file.
:param path: A Generic symbolic variable, that will contain a string
Parameters
:param dtype: The data type of the array to be read.
----------
:param broadcastable: The broadcastable pattern of the loaded array,
path
for instance, (False,) for a vector, (False, True) for a column,
A Generic symbolic variable, that will contain a string
(False, False) for a matrix.
dtype : data-type
:param mmap_mode: How the file will be loaded. None means that the
The data type of the array to be read.
data will be copied into an array in memory, 'c' means that the file
broadcastable
will be mapped into virtual memory, so only the parts that are
The broadcastable pattern of the loaded array, for instance,
needed will be actually read from disk and put into memory.
(False,) for a vector, (False, True) for a column,
Other modes supported by numpy.load ('r', 'r+', 'w+') cannot
(False, False) for a matrix.
be supported by Theano.
mmap_mode
How the file will be loaded. None means that the
data will be copied into an array in memory, 'c' means that the file
will be mapped into virtual memory, so only the parts that are
needed will be actually read from disk and put into memory.
Other modes supported by numpy.load ('r', 'r+', 'w+') cannot
be supported by Theano.
Examples
--------
>>> from theano import *
>>> from theano import *
>>> path = Variable(Generic())
>>> path = Variable(Generic())
>>> x = tensor.load(path, 'int64', (False,))
>>> x = tensor.load(path, 'int64', (False,))
...
@@ -72,6 +85,7 @@ def load(path, dtype, broadcastable, mmap_mode=None):
...
@@ -72,6 +85,7 @@ def load(path, dtype, broadcastable, mmap_mode=None):
>>> fn = function([path], y)
>>> fn = function([path], y)
>>> fn("stored-array.npy")
>>> fn("stored-array.npy")
array([0, 2, 4, 6, 8], dtype=int64)
array([0, 2, 4, 6, 8], dtype=int64)
"""
"""
return
LoadFromDisk
(
dtype
,
broadcastable
,
mmap_mode
)(
path
)
return
LoadFromDisk
(
dtype
,
broadcastable
,
mmap_mode
)(
path
)
...
@@ -91,14 +105,19 @@ else:
...
@@ -91,14 +105,19 @@ else:
class
MPIRecv
(
Op
):
class
MPIRecv
(
Op
):
"""
"""
An operation to asynchronously receive an array to a remote host using MPI
An operation to asynchronously receive an array to a remote host using MPI
.
See Also
See Also
MPIRecv
--------
MPIWait
MPIRecv
MPIWait
Notes
-----
Non-differentiable.
@note: Non-differentiable.
"""
"""
__props__
=
(
"source"
,
"tag"
,
"shape"
,
"dtype"
)
__props__
=
(
"source"
,
"tag"
,
"shape"
,
"dtype"
)
def
__init__
(
self
,
source
,
tag
,
shape
,
dtype
):
def
__init__
(
self
,
source
,
tag
,
shape
,
dtype
):
...
@@ -134,13 +153,18 @@ class MPIRecv(Op):
...
@@ -134,13 +153,18 @@ class MPIRecv(Op):
class
MPIRecvWait
(
Op
):
class
MPIRecvWait
(
Op
):
"""
"""
An operation to wait on a previously received array using MPI
An operation to wait on a previously received array using MPI
.
See Also
See Also
MPIRecv
--------
MPIRecv
Notes
-----
Non-differentiable.
@note: Non-differentiable.
"""
"""
__props__
=
(
"tag"
,)
__props__
=
(
"tag"
,)
def
__init__
(
self
,
tag
):
def
__init__
(
self
,
tag
):
...
@@ -168,14 +192,19 @@ class MPIRecvWait(Op):
...
@@ -168,14 +192,19 @@ class MPIRecvWait(Op):
class
MPISend
(
Op
):
class
MPISend
(
Op
):
"""
"""
An operation to asynchronously Send an array to a remote host using MPI
An operation to asynchronously Send an array to a remote host using MPI
.
See Also
See Also
MPIRecv
--------
MPISendWait
MPIRecv
MPISendWait
Notes
-----
Non-differentiable.
@note: Non-differentiable.
"""
"""
__props__
=
(
"dest"
,
"tag"
)
__props__
=
(
"dest"
,
"tag"
)
def
__init__
(
self
,
dest
,
tag
):
def
__init__
(
self
,
dest
,
tag
):
...
@@ -202,12 +231,16 @@ class MPISend(Op):
...
@@ -202,12 +231,16 @@ class MPISend(Op):
class
MPISendWait
(
Op
):
class
MPISendWait
(
Op
):
"""
"""
An operation to wait on a previously sent array using MPI
An operation to wait on a previously sent array using MPI.
See Also
--------
MPISend
See Also:
Notes
MPISend
-----
Non-differentiable.
@note: Non-differentiable.
"""
"""
__props__
=
(
"tag"
,)
__props__
=
(
"tag"
,)
...
@@ -227,35 +260,35 @@ class MPISendWait(Op):
...
@@ -227,35 +260,35 @@ class MPISendWait(Op):
def
isend
(
var
,
dest
,
tag
):
def
isend
(
var
,
dest
,
tag
):
"""
"""
Non blocking send
Non blocking send
.
"""
"""
return
MPISend
(
dest
,
tag
)(
var
)
return
MPISend
(
dest
,
tag
)(
var
)
def
send
(
var
,
dest
,
tag
):
def
send
(
var
,
dest
,
tag
):
"""
"""
blocking send
Blocking send.
"""
"""
return
MPISendWait
(
tag
)(
*
isend
(
var
,
dest
,
tag
))
return
MPISendWait
(
tag
)(
*
isend
(
var
,
dest
,
tag
))
def
irecv
(
shape
,
dtype
,
source
,
tag
):
def
irecv
(
shape
,
dtype
,
source
,
tag
):
"""
"""
non-blocking receive
Non-blocking receive.
"""
"""
return
MPIRecv
(
source
,
tag
,
shape
,
dtype
)()
return
MPIRecv
(
source
,
tag
,
shape
,
dtype
)()
def
recv
(
shape
,
dtype
,
source
,
tag
):
def
recv
(
shape
,
dtype
,
source
,
tag
):
"""
"""
blocking receive
Blocking receive.
"""
"""
return
MPIRecvWait
(
tag
)(
*
irecv
(
shape
,
dtype
,
source
,
tag
))
return
MPIRecvWait
(
tag
)(
*
irecv
(
shape
,
dtype
,
source
,
tag
))
# Ordering keys for scheduling
# Ordering keys for scheduling
def
mpi_send_wait_key
(
a
):
def
mpi_send_wait_key
(
a
):
"""
Wait as long as possible on Waits, Start Send/Recvs early
"""
"""
Wait as long as possible on Waits, Start Send/Recvs early.
"""
if
isinstance
(
a
.
op
,
(
MPIRecvWait
,
MPISendWait
)):
if
isinstance
(
a
.
op
,
(
MPIRecvWait
,
MPISendWait
)):
return
1
return
1
if
isinstance
(
a
.
op
,
(
MPIRecv
,
MPISend
)):
if
isinstance
(
a
.
op
,
(
MPIRecv
,
MPISend
)):
...
@@ -264,7 +297,7 @@ def mpi_send_wait_key(a):
...
@@ -264,7 +297,7 @@ def mpi_send_wait_key(a):
def
mpi_tag_key
(
a
):
def
mpi_tag_key
(
a
):
"""
Break MPI ties by using the variable tag - prefer lower tags first
"""
"""
Break MPI ties by using the variable tag - prefer lower tags first.
"""
if
isinstance
(
a
.
op
,
(
MPISend
,
MPIRecv
,
MPIRecvWait
,
MPISendWait
)):
if
isinstance
(
a
.
op
,
(
MPISend
,
MPIRecv
,
MPIRecvWait
,
MPISendWait
)):
return
a
.
op
.
tag
return
a
.
op
.
tag
else
:
else
:
...
...
theano/tensor/nlinalg.py
浏览文件 @
6304a061
...
@@ -17,17 +17,18 @@ logger = logging.getLogger(__name__)
...
@@ -17,17 +17,18 @@ logger = logging.getLogger(__name__)
class
MatrixPinv
(
Op
):
class
MatrixPinv
(
Op
):
"""Computes the pseudo-inverse of a matrix :math:`A`.
"""Computes the pseudo-inverse of a matrix :math:`A`.
The pseudo-inverse of a matrix
A
, denoted :math:`A^+`, is
The pseudo-inverse of a matrix
:math:`A`
, denoted :math:`A^+`, is
defined as: "the matrix that 'solves' [the least-squares problem]
defined as: "the matrix that 'solves' [the least-squares problem]
:math:`Ax = b`," i.e., if :math:`
\\
bar{x}` is said solution, then
:math:`Ax = b`," i.e., if :math:`
\\
bar{x}` is said solution, then
:math:`A^+` is that matrix such that :math:`
\\
bar{x} = A^+b`.
:math:`A^+` is that matrix such that :math:`
\\
bar{x} = A^+b`.
Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity matrix.
Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity matrix.
This method is not faster th
e
n `matrix_inverse`. Its strength comes from
This method is not faster th
a
n `matrix_inverse`. Its strength comes from
that it works for non-square matrices.
that it works for non-square matrices.
If you have a square matrix though, `matrix_inverse` can be both more
If you have a square matrix though, `matrix_inverse` can be both more
exact and faster to compute. Also this op does not get optimized into a
exact and faster to compute. Also this op does not get optimized into a
solve op.
solve op.
"""
"""
__props__
=
()
__props__
=
()
...
@@ -55,8 +56,11 @@ class MatrixInverse(Op):
...
@@ -55,8 +56,11 @@ class MatrixInverse(Op):
matrix :math:`A_{inv}` such that the dot product :math:`A
\
cdot A_{inv}`
matrix :math:`A_{inv}` such that the dot product :math:`A
\
cdot A_{inv}`
and :math:`A_{inv}
\
cdot A` equals the identity matrix :math:`I`.
and :math:`A_{inv}
\
cdot A` equals the identity matrix :math:`I`.
:note: When possible, the call to this op will be optimized to the call
Notes
of ``solve``.
-----
When possible, the call to this op will be optimized to the call
of ``solve``.
"""
"""
__props__
=
()
__props__
=
()
...
@@ -82,7 +86,7 @@ class MatrixInverse(Op):
...
@@ -82,7 +86,7 @@ class MatrixInverse(Op):
where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
``inputs``. Using the `matrix cookbook
``inputs``. Using the `matrix cookbook
<http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
<http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
on
c
e can deduce that the relation corresponds to
one can deduce that the relation corresponds to
.. math:: (X^{-1} \cdot V^{T} \cdot X^{-1})^T.
.. math:: (X^{-1} \cdot V^{T} \cdot X^{-1})^T.
...
@@ -99,9 +103,9 @@ class MatrixInverse(Op):
...
@@ -99,9 +103,9 @@ class MatrixInverse(Op):
.. math:: \frac{\partial X^{-1}}{\partial X}V,
.. math:: \frac{\partial X^{-1}}{\partial X}V,
where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
``inputs``.
Using the `matrix cookbook
``inputs``. Using the `matrix cookbook
<http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
<http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
on
c
e can deduce that the relation corresponds to
one can deduce that the relation corresponds to
.. math:: X^{-1} \cdot V \cdot X^{-1}.
.. math:: X^{-1} \cdot V \cdot X^{-1}.
...
@@ -120,11 +124,12 @@ matrix_inverse = MatrixInverse()
...
@@ -120,11 +124,12 @@ matrix_inverse = MatrixInverse()
def
matrix_dot
(
*
args
):
def
matrix_dot
(
*
args
):
""" Shorthand for product between several dots
""" Shorthand for product between several dots
.
Given :math:`N` matrices :math:`A_0, A_1, .., A_N`, ``matrix_dot`` will
Given :math:`N` matrices :math:`A_0, A_1, .., A_N`, ``matrix_dot`` will
generate the matrix product between all in the given order, namely
generate the matrix product between all in the given order, namely
:math:`A_0
\
cdot A_1
\
cdot A_2
\
cdot ..
\
cdot A_N`.
:math:`A_0
\
cdot A_1
\
cdot A_2
\
cdot ..
\
cdot A_N`.
"""
"""
rval
=
args
[
0
]
rval
=
args
[
0
]
for
a
in
args
[
1
:]:
for
a
in
args
[
1
:]:
...
@@ -163,10 +168,14 @@ alloc_diag = AllocDiag()
...
@@ -163,10 +168,14 @@ alloc_diag = AllocDiag()
class
ExtractDiag
(
Op
):
class
ExtractDiag
(
Op
):
""" Return the diagonal of a matrix.
"""Return the diagonal of a matrix.
Notes
-----
Works on the GPU.
:note: work on the GPU.
"""
"""
__props__
=
(
"view"
,)
__props__
=
(
"view"
,)
def
__init__
(
self
,
view
=
False
):
def
__init__
(
self
,
view
=
False
):
...
@@ -246,14 +255,18 @@ def trace(X):
...
@@ -246,14 +255,18 @@ def trace(X):
"""
"""
Returns the sum of diagonal elements of matrix X.
Returns the sum of diagonal elements of matrix X.
:note: work on GPU since 0.6rc4.
Notes
-----
Works on GPU since 0.6rc4.
"""
"""
return
extract_diag
(
X
)
.
sum
()
return
extract_diag
(
X
)
.
sum
()
class
Det
(
Op
):
class
Det
(
Op
):
"""Matrix determinant
"""
Input should be a square matrix
Matrix determinant. Input should be a square matrix.
"""
"""
__props__
=
()
__props__
=
()
...
@@ -287,9 +300,11 @@ det = Det()
...
@@ -287,9 +300,11 @@ det = Det()
class
Eig
(
Op
):
class
Eig
(
Op
):
"""Compute the eigenvalues and right eigenvectors of a square array.
"""
Compute the eigenvalues and right eigenvectors of a square array.
"""
"""
_numop
=
staticmethod
(
numpy
.
linalg
.
eig
)
_numop
=
staticmethod
(
numpy
.
linalg
.
eig
)
__props__
=
()
__props__
=
()
...
@@ -317,6 +332,7 @@ class Eigh(Eig):
...
@@ -317,6 +332,7 @@ class Eigh(Eig):
Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
"""
"""
_numop
=
staticmethod
(
numpy
.
linalg
.
eigh
)
_numop
=
staticmethod
(
numpy
.
linalg
.
eigh
)
__props__
=
(
'UPLO'
,)
__props__
=
(
'UPLO'
,)
...
@@ -363,6 +379,7 @@ class Eigh(Eig):
...
@@ -363,6 +379,7 @@ class Eigh(Eig):
.. math:: \frac{\partial\,v_{kn}}
.. math:: \frac{\partial\,v_{kn}}
{\partial a_{ij}} =
{\partial a_{ij}} =
\sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
\sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
"""
"""
x
,
=
inputs
x
,
=
inputs
w
,
v
=
self
(
x
)
w
,
v
=
self
(
x
)
...
@@ -383,9 +400,11 @@ def _zero_disconnected(outputs, grads):
...
@@ -383,9 +400,11 @@ def _zero_disconnected(outputs, grads):
class
EighGrad
(
Op
):
class
EighGrad
(
Op
):
"""Gradient of an eigensystem of a Hermitian matrix.
"""
Gradient of an eigensystem of a Hermitian matrix.
"""
"""
__props__
=
(
'UPLO'
,)
__props__
=
(
'UPLO'
,)
def
__init__
(
self
,
UPLO
=
'L'
):
def
__init__
(
self
,
UPLO
=
'L'
):
...
@@ -414,6 +433,7 @@ class EighGrad(Op):
...
@@ -414,6 +433,7 @@ class EighGrad(Op):
"""
"""
Implements the "reverse-mode" gradient for the eigensystem of
Implements the "reverse-mode" gradient for the eigensystem of
a square matrix.
a square matrix.
"""
"""
x
,
w
,
v
,
W
,
V
=
inputs
x
,
w
,
v
,
W
,
V
=
inputs
N
=
x
.
shape
[
0
]
N
=
x
.
shape
[
0
]
...
@@ -453,10 +473,13 @@ def eigh(a, UPLO='L'):
...
@@ -453,10 +473,13 @@ def eigh(a, UPLO='L'):
class
QRFull
(
Op
):
class
QRFull
(
Op
):
"""
"""
Full QR Decomposition.
Full QR Decomposition.
Computes the QR decomposition of a matrix.
Computes the QR decomposition of a matrix.
Factor the matrix a as qr, where q is orthonormal
Factor the matrix a as qr, where q is orthonormal
and r is upper-triangular.
and r is upper-triangular.
"""
"""
_numop
=
staticmethod
(
numpy
.
linalg
.
qr
)
_numop
=
staticmethod
(
numpy
.
linalg
.
qr
)
__props__
=
(
'mode'
,)
__props__
=
(
'mode'
,)
...
@@ -484,9 +507,12 @@ class QRFull(Op):
...
@@ -484,9 +507,12 @@ class QRFull(Op):
class
QRIncomplete
(
Op
):
class
QRIncomplete
(
Op
):
"""
"""
Incomplete QR Decomposition.
Incomplete QR Decomposition.
Computes the QR decomposition of a matrix.
Computes the QR decomposition of a matrix.
Factor the matrix a as qr and return a single matrix.
Factor the matrix a as qr and return a single matrix.
"""
"""
_numop
=
staticmethod
(
numpy
.
linalg
.
qr
)
_numop
=
staticmethod
(
numpy
.
linalg
.
qr
)
__props__
=
(
'mode'
,)
__props__
=
(
'mode'
,)
...
@@ -513,15 +539,12 @@ def qr(a, mode="full"):
...
@@ -513,15 +539,12 @@ def qr(a, mode="full"):
Factor the matrix a as qr, where q
Factor the matrix a as qr, where q
is orthonormal and r is upper-triangular.
is orthonormal and r is upper-triangular.
:type a:
Parameters
array_like, shape (M, N)
----------
:param a:
a : array_like, shape (M, N)
Matrix to be factored.
Matrix to be factored.
:type mode:
mode : {'reduced', 'complete', 'r', 'raw', 'full', 'economic'}, optional
one of 'reduced', 'complete', 'r', 'raw', 'full' and
'economic', optional
:keyword mode:
If K = min(M, N), then
If K = min(M, N), then
'reduced'
'reduced'
...
@@ -558,19 +581,18 @@ def qr(a, mode="full"):
...
@@ -558,19 +581,18 @@ def qr(a, mode="full"):
both doing the same thing in the new numpy version but only
both doing the same thing in the new numpy version but only
full works on the old previous numpy version.
full works on the old previous numpy version.
:rtype q:
Returns
matrix of float or complex, optional
-------
:return q:
q : matrix of float or complex, optional
A matrix with orthonormal columns. When mode = 'complete' the
A matrix with orthonormal columns. When mode = 'complete' the
result is an orthogonal/unitary matrix depending on whether or
result is an orthogonal/unitary matrix depending on whether or
not a is real/complex. The determinant may be either +/- 1 in
not a is real/complex. The determinant may be either +/- 1 in
that case.
that case.
r : matrix of float or complex, optional
:rtype r:
The upper-triangular matrix.
matrix of float or complex, optional
:return r:
The upper-triangular matrix.
"""
"""
x
=
[[
2
,
1
],
[
3
,
4
]]
x
=
[[
2
,
1
],
[
3
,
4
]]
if
isinstance
(
numpy
.
linalg
.
qr
(
x
,
mode
),
tuple
):
if
isinstance
(
numpy
.
linalg
.
qr
(
x
,
mode
),
tuple
):
return
QRFull
(
mode
)(
a
)
return
QRFull
(
mode
)(
a
)
...
@@ -579,22 +601,26 @@ def qr(a, mode="full"):
...
@@ -579,22 +601,26 @@ def qr(a, mode="full"):
class
SVD
(
Op
):
class
SVD
(
Op
):
"""
Parameters
----------
full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N),
respectively.
Otherwise, the shapes are (M, K) and (K, N), respectively,
where K = min(M, N).
compute_uv : bool, optional
Whether or not to compute u and v in addition to s.
True by default.
"""
# See doc in the docstring of the function just after this class.
# See doc in the docstring of the function just after this class.
_numop
=
staticmethod
(
numpy
.
linalg
.
svd
)
_numop
=
staticmethod
(
numpy
.
linalg
.
svd
)
__props__
=
(
'full_matrices'
,
'compute_uv'
)
__props__
=
(
'full_matrices'
,
'compute_uv'
)
def
__init__
(
self
,
full_matrices
=
True
,
compute_uv
=
True
):
def
__init__
(
self
,
full_matrices
=
True
,
compute_uv
=
True
):
"""
full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N),
respectively.
Otherwise, the shapes are (M, K) and (K, N), respectively,
where K = min(M, N).
compute_uv : bool, optional
Whether or not to compute u and v in addition to s.
True by default.
"""
self
.
full_matrices
=
full_matrices
self
.
full_matrices
=
full_matrices
self
.
compute_uv
=
compute_uv
self
.
compute_uv
=
compute_uv
...
@@ -619,18 +645,21 @@ def svd(a, full_matrices=1, compute_uv=1):
...
@@ -619,18 +645,21 @@ def svd(a, full_matrices=1, compute_uv=1):
"""
"""
This function performs the SVD on CPU.
This function performs the SVD on CPU.
:type full_matrices: bool, optional
Parameters
:param full_matrices:
----------
full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N),
If True (default), u and v have the shapes (M, M) and (N, N),
respectively.
respectively.
Otherwise, the shapes are (M, K) and (K, N), respectively,
Otherwise, the shapes are (M, K) and (K, N), respectively,
where K = min(M, N).
where K = min(M, N).
:type compute_uv: bool, optional
compute_uv : bool, optional
:param compute_uv:
Whether or not to compute u and v in addition to s.
Whether or not to compute u and v in addition to s.
True by default.
True by default.
:returns: U, V and D matrices.
Returns
-------
U, V, D : matrices
"""
"""
return
SVD
(
full_matrices
,
compute_uv
)(
a
)
return
SVD
(
full_matrices
,
compute_uv
)(
a
)
...
...
theano/tensor/nnet/Conv3D.py
浏览文件 @
6304a061
...
@@ -44,8 +44,13 @@ from theano.gradient import grad_undefined
...
@@ -44,8 +44,13 @@ from theano.gradient import grad_undefined
# the output function is only defined when dr, dc, dt are natural numbers.
# the output function is only defined when dr, dc, dt are natural numbers.
class
Conv3D
(
theano
.
Op
):
class
Conv3D
(
theano
.
Op
):
""" 3D `convolution` of multiple filters on a minibatch
"""
:note: does not flip the kernel, moves kernel with a user specified stride
3D `convolution` of multiple filters on a minibatch.
Notes
-----
Does not flip the kernel, moves kernel with a user specified stride.
"""
"""
__props__
=
()
__props__
=
()
...
@@ -54,10 +59,17 @@ class Conv3D(theano.Op):
...
@@ -54,10 +59,17 @@ class Conv3D(theano.Op):
def
make_node
(
self
,
V
,
W
,
b
,
d
):
def
make_node
(
self
,
V
,
W
,
b
,
d
):
"""
"""
:param V: Visible unit, input(batch,row,column,time,in channel)
Parameters
:param W: Weights, filter(out channel,row,column,time,in channel)
----------
:param b: bias, shape == (W.shape[0],)
V
:param d: strides when moving the filter over the input(dx,dy,dt)
Visible unit, input(batch,row,column,time,in channel)
W
Weights, filter(out channel,row,column,time,in channel)
b
bias, shape == (W.shape[0],)
d
strides when moving the filter over the input(dx,dy,dt)
"""
"""
V_
=
T
.
as_tensor_variable
(
V
)
V_
=
T
.
as_tensor_variable
(
V
)
...
@@ -539,28 +551,39 @@ _conv3D = Conv3D()
...
@@ -539,28 +551,39 @@ _conv3D = Conv3D()
def
conv3D
(
V
,
W
,
b
,
d
):
def
conv3D
(
V
,
W
,
b
,
d
):
"""
"""
3D "convolution" of multiple filters on a minibatch
3D "convolution" of multiple filters on a minibatch.
(does not flip the kernel, moves kernel with a user specified stride)
(does not flip the kernel, moves kernel with a user specified stride)
:param V: Visible unit, input.
Parameters
dimensions: (batch, row, column, time, in channel)
----------
:param W: Weights, filter.
V
dimensions: (out channel, row, column, time ,in channel)
Visible unit, input.
:param b: bias, shape == (W.shape[0],)
Dimensions: (batch, row, column, time, in channel).
:param d: strides when moving the filter over the input(dx, dy, dt)
W
Weights, filter.
:note: The order of dimensions does not correspond to the one in `conv2d`.
Dimensions: (out channel, row, column, time ,in channel).
This is for optimization.
b
Bias, shape == (W.shape[0],).
:note: The GPU implementation is very slow. You should use
d
:func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>` or
Strides when moving the filter over the input(dx, dy, dt).
:func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>` for a
GPU graph instead.
Notes
-----
:see: Someone made a script that shows how to swap the axes
The order of dimensions does not correspond to the one in `conv2d`.
between both 3d convolution implementations in Theano. See
This is for optimization.
the last `attachment
<https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_.
The GPU implementation is very slow. You should use
:func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>` or
:func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>` for a
GPU graph instead.
See Also
--------
Someone made a script that shows how to swap the axes
between both 3d convolution implementations in Theano. See
the last `attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_
"""
"""
return
_conv3D
(
V
,
W
,
b
,
d
)
return
_conv3D
(
V
,
W
,
b
,
d
)
...
...
theano/tensor/nnet/ConvGrad3D.py
浏览文件 @
6304a061
...
@@ -13,7 +13,11 @@ from theano.gradient import DisconnectedType
...
@@ -13,7 +13,11 @@ from theano.gradient import DisconnectedType
# than visiting each weight gradient element once and passing through whole video
# than visiting each weight gradient element once and passing through whole video
class
ConvGrad3D
(
theano
.
Op
):
class
ConvGrad3D
(
theano
.
Op
):
""" Gradient of Conv3D with respect to W """
"""
Gradient of Conv3D with respect to W.
"""
__props__
=
()
__props__
=
()
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
...
...
theano/tensor/nnet/ConvTransp3D.py
浏览文件 @
6304a061
...
@@ -11,7 +11,12 @@ from theano.gradient import DisconnectedType
...
@@ -11,7 +11,12 @@ from theano.gradient import DisconnectedType
class
ConvTransp3D
(
theano
.
Op
):
class
ConvTransp3D
(
theano
.
Op
):
""" "Transpose" of Conv3D (Conv3D implements multiplication by an implicitly defined matrix W. This implements multiplication by its transpose) """
"""
"Transpose" of Conv3D (Conv3D implements multiplication by an implicitly
defined matrix W. This implements multiplication by its transpose).
"""
__props__
=
()
__props__
=
()
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
...
@@ -19,10 +24,17 @@ class ConvTransp3D(theano.Op):
...
@@ -19,10 +24,17 @@ class ConvTransp3D(theano.Op):
def
make_node
(
self
,
W
,
b
,
d
,
H
,
RShape
=
None
):
def
make_node
(
self
,
W
,
b
,
d
,
H
,
RShape
=
None
):
"""
"""
:param W: Weights, filter
Parameters
:param b: bias, shape == (W.shape[0],)
----------
:param d: strides when moving the filter over the input
W
:param H: The output of Conv3D
Weights, filter
b
Bias, shape == (W.shape[0],).
d
Strides when moving the filter over the input.
H
The output of Conv3D.
"""
"""
W_
=
T
.
as_tensor_variable
(
W
)
W_
=
T
.
as_tensor_variable
(
W
)
b_
=
T
.
as_tensor_variable
(
b
)
b_
=
T
.
as_tensor_variable
(
b
)
...
...
theano/tensor/nnet/conv.py
浏览文件 @
6304a061
...
@@ -36,61 +36,58 @@ _logger = logging.getLogger("theano.tensor.nnet.conv")
...
@@ -36,61 +36,58 @@ _logger = logging.getLogger("theano.tensor.nnet.conv")
def
conv2d
(
input
,
filters
,
image_shape
=
None
,
filter_shape
=
None
,
def
conv2d
(
input
,
filters
,
image_shape
=
None
,
filter_shape
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
**
kargs
):
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
**
kargs
):
"""This function will build the symbolic graph for convolving a stack of
"""
This function will build the symbolic graph for convolving a stack of
input images with a set of filters. The implementation is modelled after
input images with a set of filters. The implementation is modelled after
Convolutional Neural Networks (CNN). It is simply a wrapper to the ConvOp
Convolutional Neural Networks (CNN). It is simply a wrapper to the ConvOp
but provides a much cleaner interface.
but provides a much cleaner interface.
:type input: symbolic 4D tensor
Parameters
:param input: mini-batch of feature map stacks, of shape
----------
(batch size, stack size, nb row, nb col)
input : symbolic 4D tensor
see the optional parameter image_shape
Mini-batch of feature map stacks, of shape
(batch size, stack size, nb row, nb col)
:type filters: symbolic 4D tensor
see the optional parameter image_shape
:param filters: set of filters used in CNN layer of shape
filters: symbolic 4D tensor
(nb filters, stack size, nb row, nb col)
Set of filters used in CNN layer of shape
see the optional parameter filter_shape
(nb filters, stack size, nb row, nb col)
see the optional parameter filter_shape
:param border_mode:
border_mode : {'valid', 'full'}
'valid'-- only apply filter to complete patches of the image. Generates
'valid'only apply filter to complete patches of the image. Generates
output of shape: image_shape - filter_shape + 1
output of shape: image_shape - filter_shape + 1.
'full' -- zero-pads image to multiple of filter shape to generate output
'full' zero-pads image to multiple of filter shape to generate output
of shape: image_shape + filter_shape - 1
of shape: image_shape + filter_shape - 1.
subsample: tuple of len 2
:type subsample: tuple of len 2
Factor by which to subsample the output. Also called strides elsewhere.
:param subsample: factor by which to subsample the output.
image_shape: None, tuple/list of len 4 of int, None or Constant variable
Also called strides elsewhere.
The shape of the input parameter.
Optional, used for optimization like loop unrolling
:type image_shape: None, tuple/list of len 4 of int, None or
You can put None for any element of the list to tell that this element
Constant variable
is not constant.
:param image_shape: The shape of the input parameter.
filter_shape : None, tuple/list of len 4 of int, None or Constant variable
Optional, used for optimization like loop unrolling
Optional, used for optimization like loop unrolling
You can put None for any element of the list
You can put None for any element of the list
to tell that this element is not constant.
to tell that this element is not constant.
:type filter_shape: None, tuple/list of len 4 of int, None or
kwargs
Constant variable
Kwargs are passed onto ConvOp. Can be used to set the following:
:param filter_shape: Optional, used for optimization like loop unrolling
unroll_batch, unroll_kern, unroll_patch, openmp (see ConvOp doc).
You can put None for any element of the list
to tell that this element is not constant.
openmp: By default have the same value as
:param kwargs: kwargs are passed onto ConvOp.
config.openmp. For small image, filter,
Can be used to set the following:
batch size, nkern and stack size, it can be
unroll_batch, unroll_kern, unroll_patch,
faster to disable manually openmp. A fast and
openmp (see ConvOp doc)
incomplete test show that with image size
6x6, filter size 4x4, batch size==1,
openmp: By default have the same value as
n kern==1 and stack size==1, it is faster
config.openmp. For small image, filter,
to disable it in valid mode. But if we
batch size, nkern and stack size, it can be
grow the batch size to 10, it is faster
faster to disable manually openmp. A fast and
with openmp on a core 2 duo.
incomplete test show that with image size
6x6, filter size 4x4, batch size==1,
Returns
n kern==1 and stack size==1, it is faster
-------
to disable it in valid mode. But if we
symbolic 4D tensor
grow the batch size to 10, it is faster
Set of feature maps generated by convolutional layer. Tensor is
with openmp on a core 2 duo.
of shape (batch size, nb filters, output row, output col).
:rtype: symbolic 4D tensor
:return: set of feature maps generated by convolutional layer. Tensor is
of shape (batch size, nb filters, output row, output col)
"""
"""
...
@@ -171,6 +168,97 @@ class ConvOp(OpenMPOp):
...
@@ -171,6 +168,97 @@ class ConvOp(OpenMPOp):
output[b,k,:,:] =
\
sum_i input[b,i,:,:] * filter[k,i,:,:]
\f
orall b,k
output[b,k,:,:] =
\
sum_i input[b,i,:,:] * filter[k,i,:,:]
\f
orall b,k
where b is the mini-batch index, k the filter index and * is the
where b is the mini-batch index, k the filter index and * is the
convolution operator.
convolution operator.
The constructor initializes a ConvOp with given output_mode (full/valid).
All other parameters are optional and are only used to generate more
optimized c code, or to enable graph optimizers to optimally replace the
ConvOp.
NOTES ON OPTIMIZATION:
There are two types of optimization. The first is the selection of the
fastest algo when bsize and nkern are provided with imshp and kshp.
By default we try to select the fastest version. You can specify it
with the unroll_batch, unroll_kern, and unroll_patch parameter.
The second type of optimization is hardcoding some dimensions into the
code when all shape are know.
This make a significant difference for the 'full' output_mode.
Sometimes, the fastest implementation on x86-64 uses
{unroll_batch=4, unroll_kern=4, unroll_patch=False}
with all other shape parameters being provided.
For optimizing other architectures, see:
Kazushige Goto and Robert A. Van De Geijn, Anatomy of High-Performance
Matrix Multiplication, (mr x nr). ACM Transactions on Mathematical
Software, May 2008.
Figure 12: (mr x nr). For x86 use 2x4, itanium 8x8, etc.
Parameters
----------
output_mode : {'valid', 'full'}
'valid' gives an output smaller then the image.
'full' gives an output bigger then the image.
See 'border_mode' in conv2d's doc.
Optional parameters: (will generate more optimal c code)
imshp : tuple of len 2 or 3: 2 for 2d image, 3 for a stack of 2d images.
Stacksize, nb image row, nb image col.
kshp : tuple of len 2
Nb kernel row, nb kernel col.
nkern : int
The number of kernel.
bsize : int
The size of the minibatch.
dx : int
Patch stride rows.
dy : int
Patch stride cols
Params which select the version of code used:
unroll_patch : bool
Use a version of c_code that unroll the patch loop that don't
request all shape information to work, but if all shape information
are present, will use it to hardcode the value in the code for
faster code.
unroll_batch : int
Use a version of c_code that unroll the batch (by unroll_batch)
and the nkern (by unroll_kern) loop. The size must by a multiple
of bsize or nkern respectively.
unroll_kern : int
Use a version of c_code that unroll the batch
(by unroll_batch) and the nkern(by unroll_kern) loop. The size
must by a multiple of bsize or nkern respectively.
verbose : int
Passed to GpuConv.
version: int or str
Passed to GpuConv, if version='no_fft', fft
optimization will be desactivated at the op level.
direction_hint: {'forward', 'bprop weights', 'bprop inputs'}
Passed to GpuConv, used by graph optimizers to aid algorithm choice.
The 3 following parameters are used internally when we generate
the gradient when dx!=1 or dy!=1.
imshp_logical
Default None. None value is equivalent to imshp value.
When imshp_logical != imshp, it tell we need to insert 0 in
the image before we do the convolution. For example, when dx==dy==2
and the image is [[1, 2], [3, 4]], we should make as if the image
was [[1, 0, 2, 0], [0, 0, 0, 0], [3, 0, 4, 0], [0, 0, 0, 0]].
Our python code insert the zero, but the c code optimize it.
imshp_logical != imshp when taking the grad again the weights or
the image when the output_mode is full and `dx != 1` or `dy != 1`.
kshp_logical
Idem but for kshp and used for the grad again the
weights when the output_mode is valid and `dx != 1` or `dy != 1`.
kshp_logical_top_aligned
Used in the same case. Default to True.
Set to False in the grad again the weight when the
output_mode is full.
"""
"""
__attrnames
=
[
'imshp'
,
'kshp'
,
'nkern'
,
'bsize'
,
'dx'
,
'dy'
,
'out_mode'
,
__attrnames
=
[
'imshp'
,
'kshp'
,
'nkern'
,
'bsize'
,
'dx'
,
'dy'
,
'out_mode'
,
...
@@ -257,10 +345,20 @@ class ConvOp(OpenMPOp):
...
@@ -257,10 +345,20 @@ class ConvOp(OpenMPOp):
with kernels of shape "kshp". Accepts symbolic or integer shapes.
with kernels of shape "kshp". Accepts symbolic or integer shapes.
Propagates `None`s (for unknown shapes).
Propagates `None`s (for unknown shapes).
:param inshp: (rows,cols) of input image
Parameters
:param kshp: (rows,cols) of filters
----------
:param mode: 'valid' or 'full' (see 'border_mode' in conv2d's doc)
inshp
:return: (rows,cols) of output image
(rows,cols) of input image.
kshp
(rows,cols) of filters.
mode: {'valid', 'full'}
See 'border_mode' in conv2d's doc.
Returns
-------
object
(rows,cols) of output image.
"""
"""
# The formula would be ceil((i + s * k - s * 1) / float(d)),
# The formula would be ceil((i + s * k - s * 1) / float(d)),
# with s=1 for mode=='full' and s=-1 for mode=='valid'.
# with s=1 for mode=='full' and s=-1 for mode=='valid'.
...
@@ -284,92 +382,6 @@ class ConvOp(OpenMPOp):
...
@@ -284,92 +382,6 @@ class ConvOp(OpenMPOp):
version
=-
1
,
version
=-
1
,
direction_hint
=
'forward'
,
direction_hint
=
'forward'
,
openmp
=
None
):
openmp
=
None
):
"""
Initializes a ConvOp with given output_mode (full/valid). All other
parameters are optional and are only used to generate more optimized c
code, or to enable graph optimizers to optimally replace the ConvOp.
NOTES ON OPTIMIZATION:
Their is two type of optimization. The first is the selection of the
fastest algo when bsize and nkern are probided with imshp and kshp.
By default we try to select the fastest version. You can specify it
with the unroll_batch, unroll_kern, and unroll_patch parameter.
The second type of optimization is hardcoding some dimensions into the
code when all shape are know.
This make a significant difference for the 'full' output_mode.
Some times, the fastest implementation on x86-64 uses
{unroll_batch=4, unroll_kern=4, unroll_patch=False}
with all other shape parameters being provided.
For optimizing other architectures, see:
Kazushige Goto and Robert A. Van De Geijn, Anatomy of High-Performance
Matrix Multiplication, (mr x nr). ACM Transactions on Mathematical
Software, May 2008.
Figure 12: (mr x nr). For x86 use 2x4, itanium 8x8, etc.
:type output_mode: string
:param output_mode: 'valid' -- gives an output smaller then the image
'full' -- gives an output bigger then the image
Optional parameters: (will generate more optimal c code)
:type imshp: tuple of len 2 or 3: 2 for 2d image,
3 for a stack of 2d images.
:param imshp: (stacksize, nb image row, nb image col)
:type kshp: tuple of len 2
:param kshp: (nb kernel row, nb kernel col)
:type nkern: int
:param nkern: the number of kernel
:type bsize: int
:param bsize: the size of the minibatch
:type dx: int
:param dx: patch stride rows
:type dy: int
:param dy: patch stride cols
Params which select the version of code used:
:type unroll_patch: bool
:param unroll_patch: use a version of c_code that unroll the patch loop
that don't request all shape information to work, but if all shape
information are present, will
use it to hardcode the value in the code for faster code.
:type unroll_batch:int
:param unroll_batch: use a version of c_code that unroll the batch
(by unroll_batch) and the nkern(by unroll_kern) loop. The size
must by a multiple of bsize or nkern respectively.
:type unroll_kern:int
:param unroll_kern: use a version of c_code that unroll the batch
(by unroll_batch) and the nkern(by unroll_kern) loop. The size
must by a multiple of bsize or nkern
respectively.
:type verbose: int
:param verbose: passed to GpuConv
:type version: int or str
:param version: passed to GpuConv, if version='no_fft', fft
optimization will be desactivated at the op level.
:param direction_hint: 'forward', 'bprop weights' or 'bprop inputs'.
Passed to GpuConv, used by graph optimizers to aid algorithm choice
The 3 following parameters are used internally when we generate
the gradient when dx!=1 or dy!=1.
:param imshp_logical: Default None. None value is equivalent to imshp
value. When imshp_logical != imshp, it tell we need to insert 0 in
the image before we do the convolution. For example, when dx==dy==2
and the image is [[1, 2], [3, 4]], we should make as if the image
was [[1, 0, 2, 0], [0, 0, 0, 0], [3, 0, 4, 0], [0, 0, 0, 0]].
Our python code insert the zero, but the c code optimize it.
imshp_logical != imshp when taking the grad again the weights or
the image when the output_mode is full and `dx != 1` or `dy != 1`.
:param kshp_logical: idem but for kshp and used for the grad again the
weights when the output_mode is valid and `dx != 1` or `dy != 1`.
:param kshp_logical_top_aligned: Used in the same case.Default to True.
Set to False in the grad again the weight when the
output_mode is full.
"""
# Deactivate fft_optimization at the op level if specified
# Deactivate fft_optimization at the op level if specified
if
version
==
"no_fft"
:
if
version
==
"no_fft"
:
self
.
fft_opt
=
False
self
.
fft_opt
=
False
...
@@ -587,7 +599,10 @@ class ConvOp(OpenMPOp):
...
@@ -587,7 +599,10 @@ class ConvOp(OpenMPOp):
for
a
in
self
.
__attrnames
)
+
"}"
for
a
in
self
.
__attrnames
)
+
"}"
def
flops
(
self
,
inputs
,
outputs
):
def
flops
(
self
,
inputs
,
outputs
):
""" Useful with the hack in profilemode to print the MFlops"""
"""
Useful with the hack in profilemode to print the MFlops.
"""
images
,
kerns
=
inputs
images
,
kerns
=
inputs
out
,
=
outputs
out
,
=
outputs
assert
images
[
1
]
==
kerns
[
1
]
assert
images
[
1
]
==
kerns
[
1
]
...
@@ -608,8 +623,13 @@ class ConvOp(OpenMPOp):
...
@@ -608,8 +623,13 @@ class ConvOp(OpenMPOp):
def
make_node
(
self
,
inputs
,
kerns
):
def
make_node
(
self
,
inputs
,
kerns
):
# TODO: find a way to make ConvOp work for N-D (after NIPS09)
# TODO: find a way to make ConvOp work for N-D (after NIPS09)
"""
"""
inputs - 4 dim: batches x stacksize x rows x cols
Parameters
kerns - 4 dim: nkern x stackidx x rows x cols
----------
inputs
4 dim: batches x stacksize x rows x cols.
kerns
4 dim: nkern x stackidx x rows x cols.
"""
"""
_inputs
=
as_tensor_variable
(
inputs
)
_inputs
=
as_tensor_variable
(
inputs
)
_kerns
=
as_tensor_variable
(
kerns
)
_kerns
=
as_tensor_variable
(
kerns
)
...
@@ -655,7 +675,8 @@ class ConvOp(OpenMPOp):
...
@@ -655,7 +675,8 @@ class ConvOp(OpenMPOp):
def
perform
(
self
,
node
,
inp
,
out
):
def
perform
(
self
,
node
,
inp
,
out
):
"""
"""
By default if len(img2d.shape)==3, we
By default if len(img2d.shape)==3, we TODO
"""
"""
img2d
,
filtersflipped
=
inp
img2d
,
filtersflipped
=
inp
z
,
=
out
z
,
=
out
...
@@ -1818,7 +1839,9 @@ Py_XDECREF(img2d);
...
@@ -1818,7 +1839,9 @@ Py_XDECREF(img2d);
def
gen_conv_code_unroll_batch_kern
(
d
,
unroll_bsize
=
1
,
unroll_ksize
=
1
):
def
gen_conv_code_unroll_batch_kern
(
d
,
unroll_bsize
=
1
,
unroll_ksize
=
1
):
""" c_code for ConvOp that unroll the batch size loop
"""
c_code for ConvOp that unroll the batch size loop.
"""
"""
assert
unroll_bsize
>
0
and
unroll_ksize
>
0
assert
unroll_bsize
>
0
and
unroll_ksize
>
0
if
"unroll_bsize"
in
d
or
"unroll_ksize"
in
d
or
"unroll_iter"
in
d
or
"unroll_biter"
in
d
or
"unroll_kiter"
in
d
:
if
"unroll_bsize"
in
d
or
"unroll_ksize"
in
d
or
"unroll_iter"
in
d
or
"unroll_biter"
in
d
or
"unroll_kiter"
in
d
:
...
...
theano/tensor/nnet/conv3d2d.py
浏览文件 @
6304a061
...
@@ -6,10 +6,13 @@ import theano.sandbox.cuda as cuda
...
@@ -6,10 +6,13 @@ import theano.sandbox.cuda as cuda
def
get_diagonal_subtensor_view
(
x
,
i0
,
i1
):
def
get_diagonal_subtensor_view
(
x
,
i0
,
i1
):
"""Helper function for DiagonalSubtensor and
"""
IncDiagonalSubtensor
Helper function for DiagonalSubtensor and IncDiagonalSubtensor.
Notes
-----
It returns a partial view of x, not a partial copy.
:note: it return a partial view of x, not a partial copy.
"""
"""
# We have to cast i0 and i0 to int because python 2.4 (and maybe later)
# We have to cast i0 and i0 to int because python 2.4 (and maybe later)
# do not support indexing with 0-dim, 'int*' ndarrays.
# do not support indexing with 0-dim, 'int*' ndarrays.
...
@@ -27,13 +30,24 @@ def get_diagonal_subtensor_view(x, i0, i1):
...
@@ -27,13 +30,24 @@ def get_diagonal_subtensor_view(x, i0, i1):
class
DiagonalSubtensor
(
Op
):
class
DiagonalSubtensor
(
Op
):
"""Return a form a nd diagonal subtensor.
"""
Return a form a nd diagonal subtensor.
:param x: n-d tensor
:param i0: axis index in x
Parameters
:param i1: axis index in x
----------
:note: Work on the GPU.
x
n-d tensor
i0
Axis index in x
i1
Axis index in x
Notes
-----
Work on the GPU.
Extended summary
----------------
``x`` is some n-dimensional tensor, but this Op only deals with a
``x`` is some n-dimensional tensor, but this Op only deals with a
matrix-shaped slice, using axes i0 and i1. Without loss of
matrix-shaped slice, using axes i0 and i1. Without loss of
generality, suppose that ``i0`` picks out our ``row`` dimension,
generality, suppose that ``i0`` picks out our ``row`` dimension,
...
@@ -73,6 +87,7 @@ class DiagonalSubtensor(Op):
...
@@ -73,6 +87,7 @@ class DiagonalSubtensor(Op):
see what's necessary at that point.
see what's necessary at that point.
"""
"""
__props__
=
(
"inplace"
,)
__props__
=
(
"inplace"
,)
def
__str__
(
self
):
def
__str__
(
self
):
...
@@ -111,8 +126,10 @@ diagonal_subtensor = DiagonalSubtensor(False)
...
@@ -111,8 +126,10 @@ diagonal_subtensor = DiagonalSubtensor(False)
class
IncDiagonalSubtensor
(
Op
):
class
IncDiagonalSubtensor
(
Op
):
"""
"""
The gradient of DiagonalSubtensor
The gradient of DiagonalSubtensor.
"""
"""
__props__
=
(
"inplace"
,)
__props__
=
(
"inplace"
,)
def
__str__
(
self
):
def
__str__
(
self
):
...
@@ -153,26 +170,39 @@ inc_diagonal_subtensor = IncDiagonalSubtensor(False)
...
@@ -153,26 +170,39 @@ inc_diagonal_subtensor = IncDiagonalSubtensor(False)
def
conv3d
(
signals
,
filters
,
def
conv3d
(
signals
,
filters
,
signals_shape
=
None
,
filters_shape
=
None
,
signals_shape
=
None
,
filters_shape
=
None
,
border_mode
=
'valid'
):
border_mode
=
'valid'
):
"""Convolve spatio-temporal filters with a movie.
"""
Convolve spatio-temporal filters with a movie.
It flips the filters.
It flips the filters.
:param signals: timeseries of images whose pixels have color channels.
Parameters
shape: [Ns, Ts, C, Hs, Ws]
----------
:param filters: spatio-temporal filters
signals
shape: [Nf, Tf, C, Hf, Wf]
Timeseries of images whose pixels have color channels.
:param signals_shape: None or a tuple/list with the shape of signals
Shape: [Ns, Ts, C, Hs, Ws].
:param filters_shape: None or a tuple/list with the shape of filters
filters
:param border_mode: The only one tested is 'valid'.
Spatio-temporal filters.
Shape: [Nf, Tf, C, Hf, Wf].
:note: Another way to define signals: (batch, time, in channel, row, column)
signals_shape
Another way to define filters: (out channel,time,in channel, row, column)
None or a tuple/list with the shape of signals.
:note: For the GPU, you can use this implementation or
filters_shape
:func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>`.
None or a tuple/list with the shape of filters.
border_mode
:see: Someone made a script that shows how to swap the axes between
The only one tested is 'valid'.
both 3d convolution implementations in Theano. See the last
`attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_.
Notes
-----
Another way to define signals: (batch, time, in channel, row, column)
Another way to define filters: (out channel,time,in channel, row, column)
For the GPU, you can use this implementation or
:func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>`.
See Also
--------
Someone made a script that shows how to swap the axes between
both 3d convolution implementations in Theano. See the last
`attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_
"""
"""
...
@@ -264,7 +294,8 @@ def conv3d(signals, filters,
...
@@ -264,7 +294,8 @@ def conv3d(signals, filters,
def
make_gpu_optimizer
(
op
,
to_gpu
):
def
make_gpu_optimizer
(
op
,
to_gpu
):
"""This function create optimizer that move some inputs to the GPU
"""
This function create optimizer that move some inputs to the GPU
for op that work on both CPU and GPU.
for op that work on both CPU and GPU.
The op object is created by calling op(), so good default value
The op object is created by calling op(), so good default value
...
@@ -272,8 +303,12 @@ def make_gpu_optimizer(op, to_gpu):
...
@@ -272,8 +303,12 @@ def make_gpu_optimizer(op, to_gpu):
We suppose the same op work with CPU and GPU inputs.
We suppose the same op work with CPU and GPU inputs.
:param op: the op that support GPU inputs
Parameters
:param to_gpu: a list of op inputs that are moved to the GPU.
----------
op
The op that support GPU inputs.
to_gpu
A list of op inputs that are moved to the GPU.
"""
"""
@theano.gof.local_optimizer
([
op
,
cuda
.
gpu_from_host
])
@theano.gof.local_optimizer
([
op
,
cuda
.
gpu_from_host
])
...
@@ -281,6 +316,7 @@ def make_gpu_optimizer(op, to_gpu):
...
@@ -281,6 +316,7 @@ def make_gpu_optimizer(op, to_gpu):
"""
"""
op(host_from_gpu()) -> host_from_gpu(op)
op(host_from_gpu()) -> host_from_gpu(op)
gpu_from_host(op) -> op(gpu_from_host)
gpu_from_host(op) -> op(gpu_from_host)
"""
"""
if
isinstance
(
node
.
op
,
op
):
if
isinstance
(
node
.
op
,
op
):
# op(host_from_gpu()) -> host_from_gpu(op)
# op(host_from_gpu()) -> host_from_gpu(op)
...
@@ -314,7 +350,7 @@ if cuda.cuda_available:
...
@@ -314,7 +350,7 @@ if cuda.cuda_available:
@theano.gof.local_optimizer
([
DiagonalSubtensor
,
IncDiagonalSubtensor
])
@theano.gof.local_optimizer
([
DiagonalSubtensor
,
IncDiagonalSubtensor
])
def
local_inplace_DiagonalSubtensor
(
node
):
def
local_inplace_DiagonalSubtensor
(
node
):
"""
also work for IncDiagonalSubtensor
"""
"""
Also work for IncDiagonalSubtensor.
"""
if
(
isinstance
(
node
.
op
,
(
DiagonalSubtensor
,
IncDiagonalSubtensor
))
and
if
(
isinstance
(
node
.
op
,
(
DiagonalSubtensor
,
IncDiagonalSubtensor
))
and
not
node
.
op
.
inplace
):
not
node
.
op
.
inplace
):
new_op
=
node
.
op
.
__class__
(
inplace
=
True
)
new_op
=
node
.
op
.
__class__
(
inplace
=
True
)
...
...
theano/tensor/nnet/neighbours.py
浏览文件 @
6304a061
...
@@ -13,23 +13,29 @@ from theano.gradient import grad_undefined
...
@@ -13,23 +13,29 @@ from theano.gradient import grad_undefined
class
Images2Neibs
(
Op
):
class
Images2Neibs
(
Op
):
"""
Parameters
----------
mode : {'valid', 'ignore_borders', 'wrap_centered'}
'valid': Requires an input that is a multiple of the
pooling factor (in each direction).
'ignore_borders': Same as valid, but will ignore the borders
if the shape(s) of the input is not a multiple of the pooling
factor(s).
'wrap_centered' : ?? TODO comment
Returns
-------
object
Reshapes the input as a 2D tensor where each row is an
pooling example.
"""
__props__
=
(
"mode"
,)
__props__
=
(
"mode"
,)
def
__init__
(
self
,
mode
=
'valid'
):
def
__init__
(
self
,
mode
=
'valid'
):
"""
:type mode: str
:param mode: Possible values:
'valid': Requires an input that is a multiple of the
pooling factor (in each direction)
'ignore_borders': Same as valid, but will ignore the borders
if the shape(s) of the input
is not a multiple of the pooling factor(s)
'wrap_centered' : ?? TODO comment
:return:
Reshapes the input as a 2D tensor where each row is an
pooling example
"""
if
mode
not
in
[
'valid'
,
'wrap_centered'
,
'ignore_borders'
]:
if
mode
not
in
[
'valid'
,
'wrap_centered'
,
'ignore_borders'
]:
raise
NotImplementedError
(
"Only the mode valid, ignore_borders"
raise
NotImplementedError
(
"Only the mode valid, ignore_borders"
" and wrap_centered have been"
" and wrap_centered have been"
...
@@ -46,20 +52,22 @@ class Images2Neibs(Op):
...
@@ -46,20 +52,22 @@ class Images2Neibs(Op):
def
make_node
(
self
,
ten4
,
neib_shape
,
neib_step
=
None
):
def
make_node
(
self
,
ten4
,
neib_shape
,
neib_step
=
None
):
"""
"""
:param ten4: a list of lists of images
Parameters
ten4 is of shape (list 1 dim, list 2 dim,
----------
row, col)
ten4 : a list of lists of images
:param neib_shape: (r,c) where r is the height of the neighborhood
ten4 is of shape (list 1 dim, list 2 dim, row, col).
in rows and c is the width of the neighborhood
neib_shape
in columns
(r,c) where r is the height of the neighborhood in rows and c is
:param neib_step: (dr,dc) where dr is the number of rows to
the width of the neighborhood in columns.
skip between patch and dc is the number of
neib_step
columns. When None, this is the same as
(dr,dc) where dr is the number of rows to skip between patch and dc
neib_shape(patch are disjoint)
is the number of columns. When None, this is the same as neib_shape
(patch are disjoint).
output:
a 2D matrix, written using the following pattern
Returns
-------
matrix
A 2D matrix, written using the following pattern
idx = 0
idx = 0
for i in xrange(list 1 dim)
for i in xrange(list 1 dim)
for j in xrange(list 2 dim)
for j in xrange(list 2 dim)
...
@@ -68,9 +76,10 @@ class Images2Neibs(Op):
...
@@ -68,9 +76,10 @@ class Images2Neibs(Op):
output[idx,:]
output[idx,:]
= flattened version of ten4[i,j,l:l+r,k:k+c]
= flattened version of ten4[i,j,l:l+r,k:k+c]
idx += 1
idx += 1
(note: t
he op isn't necessarily implemented internally with these
.. note:: T
he op isn't necessarily implemented internally with these
for loops, they're just the easiest way to describe the output
for loops, they're just the easiest way to describe the output
pattern)
pattern.
"""
"""
ten4
=
T
.
as_tensor_variable
(
ten4
)
ten4
=
T
.
as_tensor_variable
(
ten4
)
neib_shape
=
T
.
as_tensor_variable
(
neib_shape
)
neib_shape
=
T
.
as_tensor_variable
(
neib_shape
)
...
@@ -420,61 +429,46 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
...
@@ -420,61 +429,46 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
"""
"""
Function :func:`images2neibs <theano.sandbox.neighbours.images2neibs>`
Function :func:`images2neibs <theano.sandbox.neighbours.images2neibs>`
allows to apply a sliding window operation to a tensor containing
allows to apply a sliding window operation to a tensor containing
images
images or other two-dimensional objects.
or other two-dimensional objects.
The sliding window operation loops over points in input data and stores
The sliding window operation loops
a rectangular neighbourhood of each point.
over points in input data and stores a rectangular neighbourhood of
It is possible to assign a step of selecting patches (parameter `neib_step`).
each point.
It is possible to assign a step of selecting patches (parameter
Parameters
`neib_step`).
----------
ten4 : A 4d tensor-like
:param ten4: A 4-dimensional tensor which represents
A 4-dimensional tensor which represents a list of lists of images.
a list of lists of images.a list of lists of images.
It should have shape (list 1 dim, list 2 dim, row, col). The first
It should have shape (list 1 dim, list 2 dim,
two dimensions can be useful to store different channels and batches.
row, col). The first two dimensions can be
neib_shape : A 1d tensor-like of 2 values
useful to store different channels and batches.
A tuple containing two values: height and width of the neighbourhood.
:type ten4: A 4d tensor-like.
It should have shape (r,c) where r is the height of the neighborhood
:param neib_shape: A tuple containing two
in rows and c is the width of the neighborhood in columns.
values: height and width of the neighbourhood.
neib_step : A 1d tensor-like of 2 values
It should have shape (r,c) where r is the height of the
(dr,dc) where dr is the number of rows to skip between patch and dc is
neighborhood in rows and c is the width of the neighborhood
the number of columns. The parameter should be a tuple of two elements:
in columns
number of rows and number of columns to skip each iteration.
:type neib_shape: A 1d tensor-like of 2 values.
Basically, when the step is 1, the neighbourhood of every first element
:param neib_step: (dr,dc) where dr is the number of rows to
is taken and every possible rectangular subset is returned.
skip between patch and dc is the number of
By default it is equal to `neib_shape` in other words, the patches are
columns. The parameter should be a tuple of two elements:
disjoint. When the step is greater than `neib_shape`, some elements are
number
omitted. When None, this is the same as neib_shape (patch are disjoint).
of rows and number of columns to skip each iteration.
.. note:: Currently the step size should be chosen in the way that the
Basically, when the step is 1, the neighbourhood of every
corresponding dimension :math:`i` (width or height) is equal to
first element is taken and every possible rectangular
:math:`n * step
\
_size_i + neib
\
_shape_i` for some :math:`n`
subset is returned. By default it is equal to
mode : {'valid', 'ignore_borders', 'wrap_centered}
`neib_shape` in other words, the
patches are disjoint. When the step is greater than
`neib_shape`, some elements are omitted. When None, this
is the same as
neib_shape(patch are disjoint)
.. note:: Currently the step size should be chosen in the way that the
corresponding dimension :math:`i` (width or height) is equal to
:math:`n * step
\
_size_i + neib
\
_shape_i` for some :math:`n`
:type neib_step: A 1d tensor-like of 2 values.
:param mode:
Possible values:
``valid``
``valid``
Requires an input that is a multiple of the
Requires an input that is a multiple of the
pooling factor (in each direction)
pooling factor (in each direction).
``ignore_borders``
``ignore_borders``
Same as valid, but will ignore the borders
Same as valid, but will ignore the borders if the shape(s) of
if the shape(s) of the input
the input is not a multiple of the pooling factor(s).
is not a multiple of the pooling factor(s)
``wrap_centered``
``wrap_centered``
?? TODO comment
?? TODO comment
:type mode: str
Returns
:return:
-------
object
Reshapes the input as a 2D tensor where each row is an
Reshapes the input as a 2D tensor where each row is an
pooling example. Pseudo-code of the output:
pooling example. Pseudo-code of the output:
...
@@ -493,7 +487,8 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
...
@@ -493,7 +487,8 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
these for loops, they're just the easiest way to describe the
these for loops, they're just the easiest way to describe the
output pattern.
output pattern.
Example:
Examples
--------
.. code-block:: python
.. code-block:: python
...
@@ -512,6 +507,7 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
...
@@ -512,6 +507,7 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
.. note:: The underlying code will construct a 2D tensor of disjoint
.. note:: The underlying code will construct a 2D tensor of disjoint
patches 5x5. The output has shape 4x25.
patches 5x5. The output has shape 4x25.
"""
"""
return
Images2Neibs
(
mode
)(
ten4
,
neib_shape
,
neib_step
)
return
Images2Neibs
(
mode
)(
ten4
,
neib_shape
,
neib_step
)
...
@@ -524,25 +520,37 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
...
@@ -524,25 +520,37 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
the output of :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`
the output of :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`
and reconstructs its input.
and reconstructs its input.
:param neibs: matrix like the one obtained by
Parameters
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`
----------
:param neib_shape: `neib_shape` that was used in
neibs: matrix
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`
Like the one obtained by
:param original_shape: original shape of the 4d tensor given to
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`.
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`
neib_shape
`neib_shape` that was used in
:return: Reconstructs the input of
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`.
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`,
original_shape
a 4d tensor of shape `original_shape`.
Original shape of the 4d tensor given to
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`
.. note:: Currently, the function doesn't support tensors created with
`neib_step` different from default value. This means that it may be
Returns
impossible to compute the gradient of a variable gained by
-------
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>` w.r.t.
object
its inputs in this case, because it uses
Reconstructs the input of
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>` for
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`,
gradient computation.
a 4d tensor of shape `original_shape`.
Notes
-----
Currently, the function doesn't support tensors created with
`neib_step` different from default value. This means that it may be
impossible to compute the gradient of a variable gained by
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>` w.r.t.
its inputs in this case, because it uses
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>` for
gradient computation.
Examples
--------
Example, which uses a tensor gained in example for
Example, which uses a tensor gained in example for
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`:
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`:
...
@@ -555,6 +563,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
...
@@ -555,6 +563,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
im_new_val = inv_window(neibs_val)
im_new_val = inv_window(neibs_val)
.. note:: The code will output the initial image array.
.. note:: The code will output the initial image array.
"""
"""
neibs
=
T
.
as_tensor_variable
(
neibs
)
neibs
=
T
.
as_tensor_variable
(
neibs
)
neib_shape
=
T
.
as_tensor_variable
(
neib_shape
)
neib_shape
=
T
.
as_tensor_variable
(
neib_shape
)
...
...
theano/tensor/nnet/nnet.py
浏览文件 @
6304a061
"""Provides neural-network specific Ops.
"""
Provides neural-network specific Ops.
:note: TODO: factor this out into a neural-network toolbox.
Notes
-----
TODO: factor this out into a neural-network toolbox.
:note:
We register all optimization with the gpu tag as we don't
We register all optimization with the gpu tag as we don't
implement all the intermediate case on the GPU (in particular
implement all the intermediate case on the GPU (in particular
AdvancedSubtensor). So to make sure it run well on the gpu with
AdvancedSubtensor). So to make sure it run well on the gpu with
fast_compile, we register them as needed for the GPU. This can be
fast_compile, we register them as needed for the GPU. This can be
revisited later when all the intermediate part are on the GPU.
revisited later when all the intermediate part are on the GPU.
"""
"""
import
logging
import
logging
...
@@ -38,13 +41,16 @@ class SoftmaxWithBias(gof.Op):
...
@@ -38,13 +41,16 @@ class SoftmaxWithBias(gof.Op):
"""
"""
An L{Op} for the output of neural-net multiclass classifiers.
An L{Op} for the output of neural-net multiclass classifiers.
@type x: is a matrix of floats (32 or 64)
Attributes
@type b: is a [row] vector of floats (32 or 64),
----------
length is number of cols in x
x : a matrix of floats (32 or 64)
b : a [row] vector of floats (32 or 64), length is number of cols in x
This L{Op}'s output is softmax(x+b).
This L{Op}'s output is softmax(x+b).
softmax(x[i]) is the i'th distribution over len(x[i]) options.
softmax(x[i]) is the i'th distribution over len(x[i]) options.
"""
"""
nin
=
2
nin
=
2
nout
=
1
nout
=
1
__props__
=
()
__props__
=
()
...
@@ -270,7 +276,11 @@ softmax_with_bias = SoftmaxWithBias()
...
@@ -270,7 +276,11 @@ softmax_with_bias = SoftmaxWithBias()
class
SoftmaxGrad
(
gof
.
Op
):
class
SoftmaxGrad
(
gof
.
Op
):
"""Gradient wrt x of the Softmax Op"""
"""
Gradient wrt x of the Softmax Op.
"""
nin
=
2
nin
=
2
nout
=
1
nout
=
1
__props__
=
()
__props__
=
()
...
@@ -391,6 +401,7 @@ class Softmax(gof.Op):
...
@@ -391,6 +401,7 @@ class Softmax(gof.Op):
\\
frac{e^{
\
mathbf{x}_j}}{
\
sum_{k=1}^K e^{
\
mathbf{x}_k}}`
\\
frac{e^{
\
mathbf{x}_j}}{
\
sum_{k=1}^K e^{
\
mathbf{x}_k}}`
where :math:`K` is the total number of neurons in the layer. This
where :math:`K` is the total number of neurons in the layer. This
activation function gets applied row-wise.
activation function gets applied row-wise.
"""
"""
nin
=
1
nin
=
1
...
@@ -584,7 +595,9 @@ def softmax(c):
...
@@ -584,7 +595,9 @@ def softmax(c):
@opt.register_specialize
(
'fast_compile_gpu'
)
@opt.register_specialize
(
'fast_compile_gpu'
)
@gof.local_optimizer
([
softmax_op
])
@gof.local_optimizer
([
softmax_op
])
def
local_softmax_with_bias
(
node
):
def
local_softmax_with_bias
(
node
):
"""Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias)
"""
Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias).
"""
"""
if
node
.
op
==
softmax_op
:
if
node
.
op
==
softmax_op
:
x
,
=
node
.
inputs
x
,
=
node
.
inputs
...
@@ -789,15 +802,19 @@ if 0:
...
@@ -789,15 +802,19 @@ if 0:
class
CrossentropySoftmaxArgmax1HotWithBias
(
gof
.
Op
):
class
CrossentropySoftmaxArgmax1HotWithBias
(
gof
.
Op
):
"""A special compound L{Op} for the output of neural-net classifiers.
"""
A special compound L{Op} for the output of neural-net classifiers.
:type x: is a matrix of floats (32 or 64)
Parameters
:type b: is a [row] vector of floats (32 or 64),
----------
length is number of cols in x
x : a matrix of floats (32 or 64)
:type y_idx: a [column] vector of int (32 or 64),
b : a [row] vector of floats (32 or 64), length is number of cols in x
length is number of rows in x
y_idx : a [column] vector of int (32 or 64),
length is number of rows in x
:returns: row-wise NLL, softmax(x+b), row-wise argmax of (x+b)
Returns
-------
object
row-wise NLL, softmax(x+b), row-wise argmax of (x+b).
@precondition: every entry in y_idx is a valid (non-negative)
@precondition: every entry in y_idx is a valid (non-negative)
column index into x
column index into x
...
@@ -816,6 +833,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
...
@@ -816,6 +833,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
i'th example.
i'th example.
"""
"""
nin
=
3
nin
=
3
nout
=
3
nout
=
3
__props__
=
()
__props__
=
()
...
@@ -846,7 +864,8 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
...
@@ -846,7 +864,8 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
return
Apply
(
self
,
[
x
,
b
,
y_idx
],
[
nll
,
sm
,
am
])
return
Apply
(
self
,
[
x
,
b
,
y_idx
],
[
nll
,
sm
,
am
])
def
perform
(
self
,
node
,
input_storage
,
output_storage
):
def
perform
(
self
,
node
,
input_storage
,
output_storage
):
"""The math, where x is an input vector, and t is a target index:
"""
The math, where x is an input vector, and t is a target index:
softmax(x)[i] = exp(x[i]) / sum_j(exp(x[j]))
softmax(x)[i] = exp(x[i]) / sum_j(exp(x[j]))
nll(x,t) = -log(softmax(x)[t])
nll(x,t) = -log(softmax(x)[t])
...
@@ -1037,12 +1056,15 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
...
@@ -1037,12 +1056,15 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
class
CrossentropySoftmax1HotWithBiasDx
(
gof
.
Op
):
class
CrossentropySoftmax1HotWithBiasDx
(
gof
.
Op
):
"""
Gradient wrt x of the CrossentropySoftmaxArgmax1HotWithBias Op.
"""
nin
=
3
nin
=
3
nout
=
1
nout
=
1
__props__
=
()
__props__
=
()
"""Gradient wrt x of the CrossentropySoftmaxArgmax1HotWithBias Op"""
def
make_node
(
self
,
dy
,
sm
,
y_idx
,
**
kwargs
):
def
make_node
(
self
,
dy
,
sm
,
y_idx
,
**
kwargs
):
dy
=
tensor
.
as_tensor_variable
(
dy
)
dy
=
tensor
.
as_tensor_variable
(
dy
)
sm
=
tensor
.
as_tensor_variable
(
sm
)
sm
=
tensor
.
as_tensor_variable
(
sm
)
...
@@ -1217,15 +1239,19 @@ def crossentropy_softmax_1hot(x, y_idx, **kwargs):
...
@@ -1217,15 +1239,19 @@ def crossentropy_softmax_1hot(x, y_idx, **kwargs):
def
crossentropy_softmax_max_and_argmax_1hot_with_bias
(
x
,
b
,
y_idx
,
**
kwargs
):
def
crossentropy_softmax_max_and_argmax_1hot_with_bias
(
x
,
b
,
y_idx
,
**
kwargs
):
"""
"""
@return: The cross-entropy, the softmax output, the max probability,
Returns
and the argmax index
-------
object
The cross-entropy, the softmax output, the max probability,
and the argmax index.
@todo
: Since we are recomputing the argmax,
TODO
: Since we are recomputing the argmax,
we might as well assert that it is correct.
we might as well assert that it is correct.
@todo
: Make this entire function is
TODO
: Make this entire function is
unnecessary? e.g. CrossentropySoftmaxArgmax1HotWithBias should return
unnecessary? e.g. CrossentropySoftmaxArgmax1HotWithBias should return
the appropriate information (i.e. the max probability)?
the appropriate information (i.e. the max probability)?
"""
"""
(
xent
,
softmax
)
=
crossentropy_softmax_1hot_with_bias
(
x
,
b
,
y_idx
,
**
kwargs
)
(
xent
,
softmax
)
=
crossentropy_softmax_1hot_with_bias
(
x
,
b
,
y_idx
,
**
kwargs
)
(
max_pr
,
argmax
)
=
tensor
.
max_and_argmax
(
softmax
,
axis
=-
1
)
(
max_pr
,
argmax
)
=
tensor
.
max_and_argmax
(
softmax
,
axis
=-
1
)
...
@@ -1262,29 +1288,34 @@ crossentropy_categorical_1hot_grad = CrossentropyCategorical1HotGrad()
...
@@ -1262,29 +1288,34 @@ crossentropy_categorical_1hot_grad = CrossentropyCategorical1HotGrad()
class
CrossentropyCategorical1Hot
(
gof
.
Op
):
class
CrossentropyCategorical1Hot
(
gof
.
Op
):
"""
"""
Compute the cross entropy between a coding distribution and
Compute the cross entropy between a coding distribution and
a true distribution of the form [0, 0, ... 0, 1, 0, ..., 0]
a true distribution of the form [0, 0, ... 0, 1, 0, ..., 0]
.
.. math::
.. math::
y[i] = -
\
log(coding_dist[i, one_of_n[i])
y[i] = -
\
log(coding_dist[i, one_of_n[i])
Notes
:note: In the case that the coding distribution is the output of a
-----
softmax, an application of this Op will probably be optimized
In the case that the coding distribution is the output of a
away in favour of one with a C implementation.
softmax, an application of this Op will probably be optimized
away in favour of one with a C implementation.
"""
"""
__props__
=
()
__props__
=
()
def
make_node
(
self
,
coding_dist
,
true_one_of_n
):
def
make_node
(
self
,
coding_dist
,
true_one_of_n
):
"""
"""
:type coding_dist: dense matrix
Parameters
----------
coding_dist : dense matrix
true_one_of_n : lvector
:type true_one_of_n: lvector
Returns
-------
dvector
:rtype: dvector
"""
"""
_coding_dist
=
tensor
.
as_tensor_variable
(
coding_dist
)
_coding_dist
=
tensor
.
as_tensor_variable
(
coding_dist
)
_true_one_of_n
=
tensor
.
as_tensor_variable
(
true_one_of_n
)
_true_one_of_n
=
tensor
.
as_tensor_variable
(
true_one_of_n
)
...
@@ -1332,10 +1363,13 @@ crossentropy_categorical_1hot = CrossentropyCategorical1Hot()
...
@@ -1332,10 +1363,13 @@ crossentropy_categorical_1hot = CrossentropyCategorical1Hot()
@opt.register_specialize
(
'fast_compile_gpu'
)
@opt.register_specialize
(
'fast_compile_gpu'
)
@gof.optimizer
@gof.optimizer
def
crossentropy_to_crossentropy_with_softmax_with_bias
(
fgraph
):
def
crossentropy_to_crossentropy_with_softmax_with_bias
(
fgraph
):
"""This is a stabilization optimization
"""
This is a stabilization optimization.
:note: not a local optimization because we are replacing outputs
Notes
from several nodes at once
-----
Not a local optimization because we are replacing outputs
from several nodes at once.
"""
"""
...
@@ -1362,16 +1396,19 @@ def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
...
@@ -1362,16 +1396,19 @@ def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
@gof.optimizer
@gof.optimizer
def
crossentropy_to_crossentropy_with_softmax
(
fgraph
):
def
crossentropy_to_crossentropy_with_softmax
(
fgraph
):
"""This is a stabilization optimization that is more general then
"""
crossentropy_to_crossentropy_with_softmax_with_bias
This is a stabilization optimization that is more general than
crossentropy_to_crossentropy_with_softmax_with_bias.
It must be executed after local_softmax_with_bias optimization in
It must be executed after local_softmax_with_bias optimization in
specialize
specialize
.
:todo
: This is a stabilization optimization! How to make this more cleanly?
TODO
: This is a stabilization optimization! How to make this more cleanly?
:note: not a local optimization because we are replacing outputs
Notes
from several nodes at once
-----
Not a local optimization because we are replacing outputs from several
nodes at once.
"""
"""
...
@@ -1460,11 +1497,13 @@ def local_argmax_pushdown(node):
...
@@ -1460,11 +1497,13 @@ def local_argmax_pushdown(node):
def
_check_rows_is_arange_len_labels
(
rows
,
labels
):
def
_check_rows_is_arange_len_labels
(
rows
,
labels
):
'''Check that 'rows' is the same node as T.arange(labels.shape[0])
"""
Check that 'rows' is the same node as T.arange(labels.shape[0]).
Also considers the case where labels.shape[0] is constant and equal
Also considers the case where labels.shape[0] is constant and equal
to 1, and T.arange(labels.shape[0]) has been constant-folded into 0.
to 1, and T.arange(labels.shape[0]) has been constant-folded into 0.
'''
"""
if
labels
.
owner
and
hasattr
(
labels
.
owner
.
fgraph
,
'shape_feature'
):
if
labels
.
owner
and
hasattr
(
labels
.
owner
.
fgraph
,
'shape_feature'
):
shape_of
=
labels
.
owner
.
fgraph
.
shape_feature
.
shape_of
shape_of
=
labels
.
owner
.
fgraph
.
shape_feature
.
shape_of
...
@@ -1795,10 +1834,11 @@ def graph_merge_softmax_with_crossentropy_softmax(node):
...
@@ -1795,10 +1834,11 @@ def graph_merge_softmax_with_crossentropy_softmax(node):
@gof.local_optimizer
([
CrossentropySoftmax1HotWithBiasDx
])
@gof.local_optimizer
([
CrossentropySoftmax1HotWithBiasDx
])
def
local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc
(
node
):
def
local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc
(
node
):
"""
"""
Replace
s
a CrossentropySoftmax1HotWithBiasDx op, whose incoming gradient is
Replace a CrossentropySoftmax1HotWithBiasDx op, whose incoming gradient is
an `alloc` of a scalar variable or one that has either broadcastable or
an `alloc` of a scalar variable or one that has either broadcastable or
matching dimensions with the output variable, by one that skips the
matching dimensions with the output variable, by one that skips the
intermediate `alloc`.
intermediate `alloc`.
"""
"""
if
isinstance
(
node
.
op
,
CrossentropySoftmax1HotWithBiasDx
):
if
isinstance
(
node
.
op
,
CrossentropySoftmax1HotWithBiasDx
):
dy
,
sm
,
y_idx
=
node
.
inputs
dy
,
sm
,
y_idx
=
node
.
inputs
...
@@ -1850,30 +1890,38 @@ def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(node):
...
@@ -1850,30 +1890,38 @@ def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(node):
def
binary_crossentropy
(
output
,
target
):
def
binary_crossentropy
(
output
,
target
):
"""
"""
Compute the crossentropy of binary random variables
Compute the crossentropy of binary random variables.
output and target are each expectations of binary random
Output and target are each expectations of binary random
variables; target may be exactly 0 or 1 but output must
variables; target may be exactly 0 or 1 but output must
lie strictly between 0 and 1.
lie strictly between 0 and 1.
@note: we could use the x log y op to support output=0
@ and output=1. The gradient would still be undefined though.
Notes
@note: We do not sum, crossentropy is computed by component.
-----
@todo: Rewrite as a scalar, and then broadcast to tensor.
We could use the x log y op to support output=0 and output=1.
The gradient would still be undefined though.
We do not sum, crossentropy is computed by component.
TODO : Rewrite as a scalar, and then broadcast to tensor.
"""
"""
return
-
(
target
*
tensor
.
log
(
output
)
+
(
1.0
-
target
)
*
tensor
.
log
(
1.0
-
output
))
return
-
(
target
*
tensor
.
log
(
output
)
+
(
1.0
-
target
)
*
tensor
.
log
(
1.0
-
output
))
def
categorical_crossentropy
(
coding_dist
,
true_dist
):
def
categorical_crossentropy
(
coding_dist
,
true_dist
):
"""
"""
WARNING: THIS FUNCTION IS UNNECESSARILY POLYMORPHIC.
Return the cross-entropy between an approximating distribution and a true
We ultimately don't want the polymorphism, and will move this function to pylearn.algorithms.cost.
distribution.
The 1hot version will be removed.
The length of the documentation here is a form of code smell.
Return the cross-entropy between an approximating distribution and a true distribution
.. warning:: THIS FUNCTION IS UNNECESSARILY POLYMORPHIC.
We ultimately don't want the polymorphism, and will move this function
to pylearn.algorithms.cost. The 1hot version will be removed.
The length of the documentation here is a form of code smell.
The cross entropy between two probability distributions measures the average number of bits
The cross entropy between two probability distributions measures the average
needed to identify an event from a set of possibilities, if a coding scheme is used based
number of bits needed to identify an event from a set of possibilities, if a
on a given probability distribution q, rather than the "true" distribution p.
coding scheme is used based on a given probability distribution q, rather
than the "true" distribution p.
Mathematically it is defined as follows:
Mathematically it is defined as follows:
...
@@ -1881,20 +1929,25 @@ def categorical_crossentropy(coding_dist, true_dist):
...
@@ -1881,20 +1929,25 @@ def categorical_crossentropy(coding_dist, true_dist):
H(p,q) = -
\
sum_x p(x)
\
log(q(x))
H(p,q) = -
\
sum_x p(x)
\
log(q(x))
:type coding_dist: a dense matrix.
Parameters
:param coding_dist: Each slice along axis represents one distribution.
----------
coding_dist : a dense matrix
:type true_dist: a dense matrix or sparse matrix or integer vector.
Each slice along axis represents one distribution.
:param coding_dist: In the case of a matrix argument, each slice along axis represents one
true_dist : a dense matrix or sparse matrix or integer vector
distribution. In the case of an integer vector argument, each element represents the
In the case of a matrix argument, each slice along axis represents one
position of the '1' in a 1-of-N encoding.
distribution. In the case of an integer vector argument, each element
represents the position of the '1' in a 1-of-N encoding.
:type axis: int
:param axis: the dimension over which each distribution runs. (1 for row distributions, 0
Returns
for column distributions)
-------
tensor of rank one-less-than `coding_dist`
:rtype: tensor of rank one-less-than `coding_dist`
The cross entropy between each coding and true distribution.
:returns: the cross entropy between each coding and true distribution.
Notes
-----
axis : int
The dimension over which each distribution runs
(1 for row distributions, 0 for column distributions).
"""
"""
if
true_dist
.
ndim
==
coding_dist
.
ndim
:
if
true_dist
.
ndim
==
coding_dist
.
ndim
:
...
@@ -2036,23 +2089,27 @@ def relu(x, alpha=0):
...
@@ -2036,23 +2089,27 @@ def relu(x, alpha=0):
"""
"""
Compute the element-wise rectified linear activation function.
Compute the element-wise rectified linear activation function.
:type x: symbolic tensor
Parameters
:param x: Tensor to compute the activation function for.
----------
x : symbolic tensor
:type alpha: scalar or tensor, optional
Tensor to compute the activation function for.
:param alpha: Slope for negative input, usually between 0 and 1. The
alpha : scalar or tensor, optional
default value of 0 will lead to the standard rectifier, 1 will lead to
Slope for negative input, usually between 0 and 1. The default value
of 0 will lead to the standard rectifier, 1 will lead to
a linear activation function, and any value in between will give a
a linear activation function, and any value in between will give a
leaky rectifier. A shared variable (broadcastable against `x`) will
leaky rectifier. A shared variable (broadcastable against `x`) will
result in a parameterized rectifier with learnable slope(s).
result in a parameterized rectifier with learnable slope(s).
:rtype: symbolic tensor
Returns
:return: element-wise rectifier applied to `x`
-------
symbolic tensor
Element-wise rectifier applied to `x`.
.. note:: This is numerically equivalent to
Notes
``T.switch(x > 0, x, alpha * x)``
-----
(or ``T.maximum(x, alpha * x)`` for ``alpha < 1``), but uses a faster
This is numerically equivalent to ``T.switch(x > 0, x, alpha * x)``
formulation or an optimized Op, so we encourage to use this function.
(or ``T.maximum(x, alpha * x)`` for ``alpha < 1``), but uses a faster
formulation or an optimized Op, so we encourage to use this function.
"""
"""
# This is probably the fastest implementation for GPUs. Both the forward
# This is probably the fastest implementation for GPUs. Both the forward
...
...
theano/tensor/nnet/sigm.py
浏览文件 @
6304a061
"""Ops and optimizations: sigmoid, softplus
"""
Ops and optimizations: sigmoid, softplus.
These functions implement special cases of exp and log to improve numerical
stability.
These functions implement special cases of exp and log to improve numerical stability.
"""
"""
from
__future__
import
print_function
from
__future__
import
print_function
...
@@ -25,6 +28,7 @@ from theano.tensor import elemwise, opt, NotScalarConstantError
...
@@ -25,6 +28,7 @@ from theano.tensor import elemwise, opt, NotScalarConstantError
class
ScalarSigmoid
(
scalar
.
UnaryScalarOp
):
class
ScalarSigmoid
(
scalar
.
UnaryScalarOp
):
"""
"""
This is just speed opt. Not for stability.
This is just speed opt. Not for stability.
"""
"""
@staticmethod
@staticmethod
def
st_impl
(
x
):
def
st_impl
(
x
):
...
@@ -126,7 +130,8 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
...
@@ -126,7 +130,8 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
@staticmethod
@staticmethod
def
gen_graph
():
def
gen_graph
():
"""
"""
This method was used to generate the graph: sigmoid_prec.png in the doc
This method was used to generate the graph: sigmoid_prec.png in the doc.
"""
"""
data
=
numpy
.
arange
(
-
15
,
15
,
.
1
)
data
=
numpy
.
arange
(
-
15
,
15
,
.
1
)
val
=
1
/
(
1
+
numpy
.
exp
(
-
data
))
val
=
1
/
(
1
+
numpy
.
exp
(
-
data
))
...
@@ -173,6 +178,7 @@ pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid'))
...
@@ -173,6 +178,7 @@ pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid'))
class
UltraFastScalarSigmoid
(
scalar
.
UnaryScalarOp
):
class
UltraFastScalarSigmoid
(
scalar
.
UnaryScalarOp
):
"""
"""
This is just speed opt. Not for stability.
This is just speed opt. Not for stability.
"""
"""
@staticmethod
@staticmethod
def
st_impl
(
x
):
def
st_impl
(
x
):
...
@@ -245,7 +251,7 @@ def local_ultra_fast_sigmoid(node):
...
@@ -245,7 +251,7 @@ def local_ultra_fast_sigmoid(node):
When enabled, change all sigmoid to ultra_fast_sigmoid.
When enabled, change all sigmoid to ultra_fast_sigmoid.
For example do mode.including('local_ultra_fast_sigmoid')
For example do mode.including('local_ultra_fast_sigmoid')
or use the Theano flag optimizer_including=local_ultra_fast_sigmoid
or use the Theano flag optimizer_including=local_ultra_fast_sigmoid
.
This speeds up the sigmoid op by using an approximation.
This speeds up the sigmoid op by using an approximation.
...
@@ -269,11 +275,12 @@ theano.compile.optdb['uncanonicalize'].register("local_ultra_fast_sigmoid",
...
@@ -269,11 +275,12 @@ theano.compile.optdb['uncanonicalize'].register("local_ultra_fast_sigmoid",
def
hard_sigmoid
(
x
):
def
hard_sigmoid
(
x
):
"""An approximation of sigmoid.
"""
An approximation of sigmoid.
More approximate and faster than ultra_fast_sigmoid.
More approximate and faster than ultra_fast_sigmoid.
Approx in 3 parts: 0, scaled linear, 1
Approx in 3 parts: 0, scaled linear, 1
.
Removing the slope and shift does not make it faster.
Removing the slope and shift does not make it faster.
...
@@ -375,7 +382,13 @@ logsigm_to_softplus = gof.PatternSub(
...
@@ -375,7 +382,13 @@ logsigm_to_softplus = gof.PatternSub(
def
_is_1
(
expr
):
def
_is_1
(
expr
):
"""rtype bool. True iff expr is a constant close to 1
"""
Returns
-------
bool
True iff expr is a constant close to 1.
"""
"""
try
:
try
:
v
=
opt
.
get_scalar_constant_value
(
expr
)
v
=
opt
.
get_scalar_constant_value
(
expr
)
...
@@ -405,8 +418,13 @@ opt.register_stabilize(log1pexp_to_softplus, name='log1pexp_to_softplus')
...
@@ -405,8 +418,13 @@ opt.register_stabilize(log1pexp_to_softplus, name='log1pexp_to_softplus')
def
is_1pexp
(
t
):
def
is_1pexp
(
t
):
"""
"""
If 't' is of the form (1+exp(x)), return (False, x).
Else return None.
Returns
-------
object
If 't' is of the form (1+exp(x)), return (False, x).
Else return None.
"""
"""
if
t
.
owner
and
t
.
owner
.
op
==
tensor
.
add
:
if
t
.
owner
and
t
.
owner
.
op
==
tensor
.
add
:
scalars
,
scalar_inputs
,
nonconsts
=
\
scalars
,
scalar_inputs
,
nonconsts
=
\
...
@@ -449,11 +467,18 @@ def is_exp(var):
...
@@ -449,11 +467,18 @@ def is_exp(var):
"""
"""
Match a variable with either of the `exp(x)` or `-exp(x)` patterns.
Match a variable with either of the `exp(x)` or `-exp(x)` patterns.
:param var: The Variable to analyze.
Parameters
----------
var
The Variable to analyze.
Returns
-------
tuple
A pair (b, x) with `b` a boolean set to True if `var` is of the
form `-exp(x)` and False if `var` is of the form `exp(x)`. If `var`
cannot be cast into either form, then return `None`.
:return: A pair (b, x) with `b` a boolean set to True if `var` is of the
form `-exp(x)` and False if `var` is of the form `exp(x)`. If `var` cannot
be cast into either form, then return `None`.
"""
"""
neg
=
False
neg
=
False
neg_info
=
is_neg
(
var
)
neg_info
=
is_neg
(
var
)
...
@@ -468,10 +493,17 @@ def is_mul(var):
...
@@ -468,10 +493,17 @@ def is_mul(var):
"""
"""
Match a variable with `x * y * z * ...`.
Match a variable with `x * y * z * ...`.
:param var: The Variable to analyze.
Parameters
----------
var
The Variable to analyze.
Returns
-------
object
A list [x, y, z, ...] if `var` is of the form `x * y * z * ...`,
or None if `var` cannot be cast into this form.
:return: A list [x, y, z, ...] if `var` is of the form `x * y * z * ...`,
or None if `var` cannot be cast into this form.
"""
"""
if
var
.
owner
and
var
.
owner
.
op
==
tensor
.
mul
:
if
var
.
owner
and
var
.
owner
.
op
==
tensor
.
mul
:
return
var
.
owner
.
inputs
return
var
.
owner
.
inputs
...
@@ -504,9 +536,16 @@ def is_neg(var):
...
@@ -504,9 +536,16 @@ def is_neg(var):
"""
"""
Match a variable with the `-x` pattern.
Match a variable with the `-x` pattern.
:param var: The Variable to analyze.
Parameters
----------
var
The Variable to analyze.
Returns
-------
object
`x` if `var` is of the form `-x`, or None otherwise.
:return: `x` if `var` is of the form `-x`, or None otherwise.
"""
"""
apply
=
var
.
owner
apply
=
var
.
owner
if
not
apply
:
if
not
apply
:
...
@@ -538,8 +577,10 @@ def is_neg(var):
...
@@ -538,8 +577,10 @@ def is_neg(var):
@opt.register_stabilize
@opt.register_stabilize
@gof.local_optimizer
([
tensor
.
true_div
])
@gof.local_optimizer
([
tensor
.
true_div
])
def
local_exp_over_1_plus_exp
(
node
):
def
local_exp_over_1_plus_exp
(
node
):
"""exp(x)/(1+exp(x)) -> sigm(x)
"""
exp(x)/(1+exp(x)) -> sigm(x)
c/(1+exp(x)) -> c*sigm(-x)
c/(1+exp(x)) -> c*sigm(-x)
"""
"""
# this optimization should be done for numerical stability
# this optimization should be done for numerical stability
# so we don't care to check client counts
# so we don't care to check client counts
...
@@ -585,20 +626,28 @@ def parse_mul_tree(root):
...
@@ -585,20 +626,28 @@ def parse_mul_tree(root):
"""
"""
Parse a tree of multiplications starting at the given root.
Parse a tree of multiplications starting at the given root.
:param root: The variable at the root of the tree.
Parameters
----------
:return: A tree where each non-leaf node corresponds to a multiplication
root
in the computation of `root`, represented by the list of its inputs. Each
The variable at the root of the tree.
input is a pair [n, x] with `n` a boolean value indicating whether
sub-tree `x` should be negated.
Returns
-------
Examples:
object
A tree where each non-leaf node corresponds to a multiplication
in the computation of `root`, represented by the list of its inputs.
Each input is a pair [n, x] with `n` a boolean value indicating whether
sub-tree `x` should be negated.
Examples
--------
x * y -> [False, [[False, x], [False, y]]]
x * y -> [False, [[False, x], [False, y]]]
-(x * y) -> [True, [[False, x], [False, y]]]
-(x * y) -> [True, [[False, x], [False, y]]]
-x * y -> [False, [[True, x], [False, y]]]
-x * y -> [False, [[True, x], [False, y]]]
-x -> [True, x]
-x -> [True, x]
(x * y) * -z -> [False, [[False, [[False, x], [False, y]]],
(x * y) * -z -> [False, [[False, [[False, x], [False, y]]],
[True, z]]]
[True, z]]]
"""
"""
# Is it a multiplication?
# Is it a multiplication?
mul_info
=
is_mul
(
root
)
mul_info
=
is_mul
(
root
)
...
@@ -619,29 +668,36 @@ def parse_mul_tree(root):
...
@@ -619,29 +668,36 @@ def parse_mul_tree(root):
def
replace_leaf
(
arg
,
leaves
,
new_leaves
,
op
,
neg
):
def
replace_leaf
(
arg
,
leaves
,
new_leaves
,
op
,
neg
):
"""
"""
Attempt
s
to replace a leaf of a multiplication tree.
Attempt to replace a leaf of a multiplication tree.
We search for a leaf in `leaves` whose argument is `arg`, and if we find
We search for a leaf in `leaves` whose argument is `arg`, and if we find
one, we remove it from `leaves` and add to `new_leaves` a leaf with
one, we remove it from `leaves` and add to `new_leaves` a leaf with
argument `arg` and variable `op(arg)`.
argument `arg` and variable `op(arg)`.
:param arg: The argument of the leaf we are looking for.
Parameters
----------
:param leaves: List of leaves to look into. Each leaf should be a pair
arg
(x, l) with `x` the argument of the Op found in the leaf, and `l` the
The argument of the leaf we are looking for.
actual leaf as found in a multiplication tree output by `parse_mul_tree`
leaves
(i.e. a pair [boolean, variable]).
List of leaves to look into. Each leaf should be a pair
(x, l) with `x` the argument of the Op found in the leaf, and `l` the
:param new_leaves: If a replacement occurred, then the leaf is removed from
actual leaf as found in a multiplication tree output by `parse_mul_tree`
`leaves` and added to the list `new_leaves` (after being modified by `op`).
(i.e. a pair [boolean, variable]).
new_leaves
:param op: A function that, when applied to `arg`, returns the Variable
If a replacement occurred, then the leaf is removed from `leaves`
we want to replace the original leaf variable with.
and added to the list `new_leaves` (after being modified by `op`).
op
:param neg: If True, then the boolean value associated to the leaf should
A function that, when applied to `arg`, returns the Variable
be swapped. If False, then this value should remain unchanged.
we want to replace the original leaf variable with.
neg : bool
If True, then the boolean value associated to the leaf should
be swapped. If False, then this value should remain unchanged.
Returns
-------
bool
True if a replacement occurred, or False otherwise.
:return: True if a replacement occurred, or False otherwise.
"""
"""
for
idx
,
x
in
enumerate
(
leaves
):
for
idx
,
x
in
enumerate
(
leaves
):
if
x
[
0
]
==
arg
:
if
x
[
0
]
==
arg
:
...
@@ -657,12 +713,19 @@ def simplify_mul(tree):
...
@@ -657,12 +713,19 @@ def simplify_mul(tree):
"""
"""
Simplify a multiplication tree.
Simplify a multiplication tree.
:param tree: A multiplication tree (as output by `parse_mul_tree`).
Parameters
----------
tree
A multiplication tree (as output by `parse_mul_tree`).
Returns
-------
object
A multiplication tree computing the same output as `tree` but without
useless multiplications by 1 nor -1 (identified by leaves of the form
[False, None] or [True, None] respectively). Useless multiplications
(with less than two inputs) are also removed from the tree.
:return: A multiplication tree computing the same output as `tree` but
without useless multiplications by 1 nor -1 (identified by leaves of the
form [False, None] or [True, None] respectively). Useless multiplications
(with less than two inputs) are also removed from the tree.
"""
"""
neg
,
inputs
=
tree
neg
,
inputs
=
tree
if
isinstance
(
inputs
,
list
):
if
isinstance
(
inputs
,
list
):
...
@@ -694,12 +757,18 @@ def compute_mul(tree):
...
@@ -694,12 +757,18 @@ def compute_mul(tree):
Compute the Variable that is the output of a multiplication tree.
Compute the Variable that is the output of a multiplication tree.
This is the inverse of the operation performed by `parse_mul_tree`, i.e.
This is the inverse of the operation performed by `parse_mul_tree`, i.e.
compute_mul(parse_mul_tree(tree)) == tree
compute_mul(parse_mul_tree(tree)) == tree.
Parameters
----------
tree
A multiplication tree (as output by `parse_mul_tree`).
:param tree: A multiplication tree (as output by `parse_mul_tree`).
Returns
-------
object
A Variable that computes the multiplication represented by the tree.
:return: A Variable that computes the multiplication represented by the
tree.
"""
"""
neg
,
inputs
=
tree
neg
,
inputs
=
tree
if
inputs
is
None
:
if
inputs
is
None
:
...
@@ -727,32 +796,38 @@ def perform_sigm_times_exp(tree, exp_x=None, exp_minus_x=None, sigm_x=None,
...
@@ -727,32 +796,38 @@ def perform_sigm_times_exp(tree, exp_x=None, exp_minus_x=None, sigm_x=None,
by replacing matching pairs (exp, sigmoid) with the desired optimized
by replacing matching pairs (exp, sigmoid) with the desired optimized
version.
version.
:param tree: The sub-tree to operate on.
Parameters
----------
:exp_x: List of arguments x so that `exp(x)` exists somewhere in the whole
tree
multiplication tree. Each argument is a pair (x, leaf) with `x` the
The sub-tree to operate on.
argument of the exponential, and `leaf` the corresponding leaf in the
exp_x
multiplication tree (of the form [n, exp(x)] -- see `parse_mul_tree`).
List of arguments x so that `exp(x)` exists somewhere in the whole
If None, this argument is initialized to an empty list.
multiplication tree. Each argument is a pair (x, leaf) with `x` the
argument of the exponential, and `leaf` the corresponding leaf in the
:param exp_minus_x: Similar to `exp_x`, but for `exp(-x)`.
multiplication tree (of the form [n, exp(x)] -- see `parse_mul_tree`).
If None, this argument is initialized to an empty list.
exp_minus_x
Similar to `exp_x`, but for `exp(-x)`.
sigm_x
Similar to `exp_x`, but for `sigmoid(x)`.
sigm_minus_x
Similar to `exp_x`, but for `sigmoid(-x)`.
parent
Parent of `tree` (None if `tree` is the global root).
child_idx
Index of `tree` in its parent's inputs (None if `tree` is the global
root).
full_tree
The global multiplication tree (should not be set except by recursive
calls to this function). Used for debugging only.
Returns
-------
bool
True if a modification was performed somewhere in the whole multiplication
tree, or False otherwise.
:param sigm_x: Similar to `exp_x`, but for `sigmoid(x)`.
:param sigm_minus_x: Similar to `exp_x`, but for `sigmoid(-x)`.
:param parent: Parent of `tree` (None if `tree` is the global root).
:param child_idx: Index of `tree` in its parent's inputs (None if `tree` is
the global root).
:param full_tree: The global multiplication tree (should not be set except
by recursive calls to this function). Used for debugging only.
:return: True if a modification was performed somewhere in the whole
multiplication tree, or False otherwise.
"""
"""
if
exp_x
is
None
:
if
exp_x
is
None
:
exp_x
=
[]
exp_x
=
[]
if
exp_minus_x
is
None
:
if
exp_minus_x
is
None
:
...
@@ -836,6 +911,7 @@ def local_sigm_times_exp(node):
...
@@ -836,6 +911,7 @@ def local_sigm_times_exp(node):
"""
"""
exp(x) * sigm(-x) -> sigm(x)
exp(x) * sigm(-x) -> sigm(x)
exp(-x) * sigm(x) -> sigm(-x)
exp(-x) * sigm(x) -> sigm(-x)
"""
"""
# Bail early if it is not a multiplication.
# Bail early if it is not a multiplication.
if
node
.
op
!=
tensor
.
mul
:
if
node
.
op
!=
tensor
.
mul
:
...
@@ -859,6 +935,7 @@ def local_sigm_times_exp(node):
...
@@ -859,6 +935,7 @@ def local_sigm_times_exp(node):
def
local_inv_1_plus_exp
(
node
):
def
local_inv_1_plus_exp
(
node
):
"""
"""
1/(1+exp(x)) -> sigm(-x)
1/(1+exp(x)) -> sigm(-x)
"""
"""
# this optimization should be done for numerical stability
# this optimization should be done for numerical stability
# so we don't care to check client counts
# so we don't care to check client counts
...
@@ -883,6 +960,7 @@ def local_inv_1_plus_exp(node):
...
@@ -883,6 +960,7 @@ def local_inv_1_plus_exp(node):
def
local_1msigmoid
(
node
):
def
local_1msigmoid
(
node
):
"""
"""
1-sigm(x) -> sigm(-x)
1-sigm(x) -> sigm(-x)
"""
"""
if
node
.
op
==
tensor
.
sub
:
if
node
.
op
==
tensor
.
sub
:
sub_l
,
sub_r
=
node
.
inputs
sub_l
,
sub_r
=
node
.
inputs
...
...
theano/tensor/opt.py
浏览文件 @
6304a061
"""
"""
Tensor optimizations addressing the ops in basic.py
Tensor optimizations addressing the ops in basic.py
.
"""
"""
from
__future__
import
print_function
from
__future__
import
print_function
# TODO: intelligent merge for mul/add
# TODO: intelligent merge for mul/add
...
@@ -68,15 +68,20 @@ def copy_stack_trace(from_var, to_var):
...
@@ -68,15 +68,20 @@ def copy_stack_trace(from_var, to_var):
Copies the stack trace from one or more tensor variables to
Copies the stack trace from one or more tensor variables to
one or more tensor variables.
one or more tensor variables.
:param from_var: tensor variable or list of tensor variables to
Parameters
copy stack traces from.
----------
:param to_var: tensor variable or list of tensor variables to
from_var
copy stack traces to.
Tensor variable or list of tensor variables to copy stack traces from.
to_var
Tensor variable or list of tensor variables to copy stack traces to.
.. note:: The stacktrace is assumed to be of the form of a list of lists
Notes
-----
The stacktrace is assumed to be of the form of a list of lists
of tuples. Each tuple contains the filename, line number, function name
of tuples. Each tuple contains the filename, line number, function name
and so on. Each list of tuples contains the truples belonging to a
and so on. Each list of tuples contains the truples belonging to a
particular variable.
particular variable.
"""
"""
# Store stack traces from from_var
# Store stack traces from from_var
...
@@ -151,11 +156,20 @@ def _fill_chain(new_out, orig_inputs):
...
@@ -151,11 +156,20 @@ def _fill_chain(new_out, orig_inputs):
def
encompasses_broadcastable
(
b1
,
b2
):
def
encompasses_broadcastable
(
b1
,
b2
):
"""
"""
Returns True if the broadcastable patterns b1 and b2 are such that b2 is
broadcasted to b1's shape and not the opposite.
:param b1: the broadcastable attribute of a tensor type
Parameters
:param b2: the broadcastable attribute of a tensor type
----------
b1
The broadcastable attribute of a tensor type.
b2
The broadcastable attribute of a tensor type.
Returns
-------
bool
True if the broadcastable patterns b1 and b2 are such that b2 is
broadcasted to b1's shape and not the opposite.
"""
"""
if
len
(
b1
)
<
len
(
b2
):
if
len
(
b1
)
<
len
(
b2
):
return
False
return
False
...
@@ -184,7 +198,8 @@ def scalarconsts_rest(inputs):
...
@@ -184,7 +198,8 @@ def scalarconsts_rest(inputs):
def
broadcast_like
(
value
,
template
,
fgraph
,
dtype
=
None
):
def
broadcast_like
(
value
,
template
,
fgraph
,
dtype
=
None
):
"""Return a Variable with the same shape and dtype as the template,
"""
Return a Variable with the same shape and dtype as the template,
filled by broadcasting value through it. `value` will be cast as
filled by broadcasting value through it. `value` will be cast as
necessary.
necessary.
...
@@ -240,9 +255,11 @@ def inplace_elemwise_optimizer_op(OP):
...
@@ -240,9 +255,11 @@ def inplace_elemwise_optimizer_op(OP):
see if it can operate inplace on that input. If so, makes the
see if it can operate inplace on that input. If so, makes the
change and go to the next output or Broadcast Op.
change and go to the next output or Broadcast Op.
Examples:
Examples
x + y + z -> x += y += z
--------
(x + y) * (x * y) -> (x += y) *= (x * y) or (x + y) *= (x *= y)
x + y + z -> x += y += z
(x + y) * (x * y) -> (x += y) *= (x * y) or (x + y) *= (x *= y)
"""
"""
# We should not validate too often as this takes too much time to
# We should not validate too often as this takes too much time to
# execute!
# execute!
...
@@ -507,6 +524,7 @@ def local_dimshuffle_lift(node):
...
@@ -507,6 +524,7 @@ def local_dimshuffle_lift(node):
After this transform, clusters of Elemwise operations are
After this transform, clusters of Elemwise operations are
void of DimShuffle operations.
void of DimShuffle operations.
"""
"""
op
=
node
.
op
op
=
node
.
op
if
not
isinstance
(
op
,
DimShuffle
):
if
not
isinstance
(
op
,
DimShuffle
):
...
@@ -556,6 +574,7 @@ def local_lift_transpose_through_dot(node):
...
@@ -556,6 +574,7 @@ def local_lift_transpose_through_dot(node):
The transformation should be apply whether or not the transpose is
The transformation should be apply whether or not the transpose is
inplace. The newly-introduced transpositions are not inplace, this will
inplace. The newly-introduced transpositions are not inplace, this will
be taken care of in a later optimization phase.
be taken care of in a later optimization phase.
"""
"""
if
not
(
isinstance
(
node
.
op
,
T
.
DimShuffle
)
and
node
.
op
.
new_order
==
(
1
,
0
)):
if
not
(
isinstance
(
node
.
op
,
T
.
DimShuffle
)
and
node
.
op
.
new_order
==
(
1
,
0
)):
return
False
return
False
...
@@ -639,11 +658,12 @@ def local_scalar_tensor_scalar(node):
...
@@ -639,11 +658,12 @@ def local_scalar_tensor_scalar(node):
class
MakeVector
(
T
.
Op
):
class
MakeVector
(
T
.
Op
):
"""Concatenate a number of scalars together into a vector
"""Concatenate a number of scalars together into a vector
.
This is a simple version of stack() that introduces far less cruft
This is a simple version of stack() that introduces far less cruft
into the graph. Should work with 0 inputs. The constant_folding
into the graph. Should work with 0 inputs. The constant_folding
optimization will remove it.
optimization will remove it.
"""
"""
__props__
=
(
"dtype"
,)
__props__
=
(
"dtype"
,)
...
@@ -755,7 +775,7 @@ T.pprint.assign(lambda pstate, r: r.owner and
...
@@ -755,7 +775,7 @@ T.pprint.assign(lambda pstate, r: r.owner and
class
ShapeFeature
(
object
):
class
ShapeFeature
(
object
):
"""Graph optimizer for removing all calls to shape()
"""Graph optimizer for removing all calls to shape()
.
This optimizer replaces all Shapes and Subtensors of Shapes with
This optimizer replaces all Shapes and Subtensors of Shapes with
Shape_i and MakeVector Ops.
Shape_i and MakeVector Ops.
...
@@ -791,7 +811,6 @@ class ShapeFeature(object):
...
@@ -791,7 +811,6 @@ class ShapeFeature(object):
For example the infer_shape for a matrix-matrix product would accept
For example the infer_shape for a matrix-matrix product would accept
input_shapes=((x0,x1), (y0,y1)) and return ((x0, y1),).
input_shapes=((x0,x1), (y0,y1)) and return ((x0, y1),).
Inferring the shape of internal nodes in the graph is important
Inferring the shape of internal nodes in the graph is important
for doing size-driven optimizations. If we know how big various
for doing size-driven optimizations. If we know how big various
intermediate results will be, we can estimate the cost of many Ops
intermediate results will be, we can estimate the cost of many Ops
...
@@ -800,18 +819,18 @@ class ShapeFeature(object):
...
@@ -800,18 +819,18 @@ class ShapeFeature(object):
In cases where you cannot figure out the shape, raise a ShapeError.
In cases where you cannot figure out the shape, raise a ShapeError.
.. note::
Notes
-----
Right now there is only the ConvOp that could really take
Right now there is only the ConvOp that could really take
advantage of this shape inference, but it is worth it even
advantage of this shape inference, but it is worth it even
just for the ConvOp. All that's necessary to do shape
just for the ConvOp. All that's necessary to do shape
inference is 1) to mark shared inputs as having a particular
inference is 1) to mark shared inputs as having a particular
shape, either via a .tag or some similar hacking; and 2) to
shape, either via a .tag or some similar hacking; and 2) to
add an optional Param() argument to promise that inputs will
add an optional Param() argument to promise that inputs will
have a certain shape (or even to have certain shapes in
have a certain shape (or even to have certain shapes in
certain dimensions). We can't automatically infer the shape of
certain dimensions). We can't automatically infer the shape of
shared variables as they can change of shape during the
shared variables as they can change of shape during the
execution by default. (NOT IMPLEMENTED YET, BUT IS IN TRAC)
execution by default. (NOT IMPLEMENTED YET, BUT IS IN TRAC)
Using Shape information in Optimizations
Using Shape information in Optimizations
...
@@ -842,7 +861,7 @@ class ShapeFeature(object):
...
@@ -842,7 +861,7 @@ class ShapeFeature(object):
"""
"""
def
shape_ir
(
self
,
i
,
r
):
def
shape_ir
(
self
,
i
,
r
):
"""Return symbolic r.shape[i] for tensor variable r, int i"""
"""Return symbolic r.shape[i] for tensor variable r, int i
.
"""
if
hasattr
(
r
.
type
,
"broadcastable"
)
and
r
.
type
.
broadcastable
[
i
]:
if
hasattr
(
r
.
type
,
"broadcastable"
)
and
r
.
type
.
broadcastable
[
i
]:
return
self
.
lscalar_one
return
self
.
lscalar_one
else
:
else
:
...
@@ -855,7 +874,7 @@ class ShapeFeature(object):
...
@@ -855,7 +874,7 @@ class ShapeFeature(object):
return
s
return
s
def
shape_tuple
(
self
,
r
):
def
shape_tuple
(
self
,
r
):
"""Return a tuple of symbolic shape vars for tensor variable r"""
"""Return a tuple of symbolic shape vars for tensor variable r
.
"""
if
not
hasattr
(
r
,
'ndim'
):
if
not
hasattr
(
r
,
'ndim'
):
# This happen for NoneConst.
# This happen for NoneConst.
return
None
return
None
...
@@ -867,6 +886,7 @@ class ShapeFeature(object):
...
@@ -867,6 +886,7 @@ class ShapeFeature(object):
This function is used for Ops that don't implement infer_shape.
This function is used for Ops that don't implement infer_shape.
Ops that do implement infer_shape should use the i_shapes parameter,
Ops that do implement infer_shape should use the i_shapes parameter,
but this default implementation ignores it.
but this default implementation ignores it.
"""
"""
rval
=
[]
rval
=
[]
for
r
in
node
.
outputs
:
for
r
in
node
.
outputs
:
...
@@ -880,6 +900,7 @@ class ShapeFeature(object):
...
@@ -880,6 +900,7 @@ class ShapeFeature(object):
"""Return a symbolic integer scalar for the shape element s_i.
"""Return a symbolic integer scalar for the shape element s_i.
The s_i argument was produced by the infer_shape() of an Op subclass.
The s_i argument was produced by the infer_shape() of an Op subclass.
"""
"""
# unpack the s_i that the Op returned
# unpack the s_i that the Op returned
assert
s_i
is
not
None
assert
s_i
is
not
None
...
@@ -933,8 +954,11 @@ class ShapeFeature(object):
...
@@ -933,8 +954,11 @@ class ShapeFeature(object):
def
set_shape
(
self
,
r
,
s
):
def
set_shape
(
self
,
r
,
s
):
"""Assign the shape `s` to previously un-shaped variable `r`.
"""Assign the shape `s` to previously un-shaped variable `r`.
:type r: a variable
Parameters
:type s: None or a tuple of symbolic integers
----------
r : a variable
s : None or a tuple of symbolic integers
"""
"""
assert
r
not
in
self
.
shape_of
,
'r already in shape_of'
assert
r
not
in
self
.
shape_of
,
'r already in shape_of'
if
s
is
None
:
if
s
is
None
:
...
@@ -972,11 +996,12 @@ class ShapeFeature(object):
...
@@ -972,11 +996,12 @@ class ShapeFeature(object):
self
.
shape_of_reverse_index
.
setdefault
(
sv
,
set
())
.
add
(
r
)
self
.
shape_of_reverse_index
.
setdefault
(
sv
,
set
())
.
add
(
r
)
def
update_shape
(
self
,
r
,
other_r
):
def
update_shape
(
self
,
r
,
other_r
):
'''
Replace shape of r by shape of other_r.
"""
Replace shape of r by shape of other_r.
If, on some dimensions, the shape of other_r is not informative,
If, on some dimensions, the shape of other_r is not informative,
keep the shape of r on those dimensions.
keep the shape of r on those dimensions.
'''
"""
# other_r should already have a shape
# other_r should already have a shape
assert
other_r
in
self
.
shape_of
,
(
'other_r not in shape_of'
,
other_r
)
assert
other_r
in
self
.
shape_of
,
(
'other_r not in shape_of'
,
other_r
)
other_shape
=
self
.
shape_of
[
other_r
]
other_shape
=
self
.
shape_of
[
other_r
]
...
@@ -1303,8 +1328,7 @@ class ShapeFeature(object):
...
@@ -1303,8 +1328,7 @@ class ShapeFeature(object):
class
ShapeOptimizer
(
Optimizer
):
class
ShapeOptimizer
(
Optimizer
):
"""Optimizer that serves to add ShapeFeature as an fgraph feature.
"""Optimizer that serves to add ShapeFeature as an fgraph feature."""
"""
def
__init__
(
self
):
def
__init__
(
self
):
Optimizer
.
__init__
(
self
)
Optimizer
.
__init__
(
self
)
...
@@ -1392,6 +1416,7 @@ def local_useless_alloc(node):
...
@@ -1392,6 +1416,7 @@ def local_useless_alloc(node):
If the input type is the same as the output type (dtype and broadcast)
If the input type is the same as the output type (dtype and broadcast)
there is no change in the shape of the input. So this is just a simple copy
there is no change in the shape of the input. So this is just a simple copy
of the input. This is not needed.
of the input. This is not needed.
"""
"""
if
node
.
op
==
T
.
alloc
:
if
node
.
op
==
T
.
alloc
:
if
node
.
inputs
[
0
]
.
type
==
node
.
outputs
[
0
]
.
type
:
if
node
.
inputs
[
0
]
.
type
==
node
.
outputs
[
0
]
.
type
:
...
@@ -1438,14 +1463,15 @@ def local_track_shape_i(node):
...
@@ -1438,14 +1463,15 @@ def local_track_shape_i(node):
@gof.local_optimizer
([
Subtensor
,
AdvancedSubtensor1
])
@gof.local_optimizer
([
Subtensor
,
AdvancedSubtensor1
])
def
local_subtensor_make_vector
(
node
):
def
local_subtensor_make_vector
(
node
):
"""
"""
r
eplace all subtensor(make_vector) like:
R
eplace all subtensor(make_vector) like:
[a,b,c][0] -> a
[a,b,c][0] -> a
[a,b,c][0:2] -> [a,b]
[a,b,c][0:2] -> [a,b]
r
eplace all AdvancedSubtensor1(make_vector) like:
R
eplace all AdvancedSubtensor1(make_vector) like:
[a,b,c][[0,2]] -> [a,c]
[a,b,c][[0,2]] -> [a,c]
we can do this for constant indexes
We can do this for constant indexes.
"""
"""
x
=
node
.
inputs
[
0
]
x
=
node
.
inputs
[
0
]
if
not
x
.
owner
or
x
.
owner
.
op
!=
make_vector
:
if
not
x
.
owner
or
x
.
owner
.
op
!=
make_vector
:
...
@@ -1514,7 +1540,6 @@ def local_subtensor_make_vector(node):
...
@@ -1514,7 +1540,6 @@ def local_subtensor_make_vector(node):
@gof.local_optimizer
([
T
.
Elemwise
])
@gof.local_optimizer
([
T
.
Elemwise
])
def
local_useless_elemwise
(
node
):
def
local_useless_elemwise
(
node
):
"""
"""
eq(x,x) -> 1
eq(x,x) -> 1
neq(x,x) -> 0
neq(x,x) -> 0
mul(x) -> x
mul(x) -> x
...
@@ -1559,8 +1584,7 @@ def local_useless_elemwise(node):
...
@@ -1559,8 +1584,7 @@ def local_useless_elemwise(node):
@register_specialize
@register_specialize
@gof.local_optimizer
([
T
.
Elemwise
])
@gof.local_optimizer
([
T
.
Elemwise
])
def
local_alloc_unary
(
node
):
def
local_alloc_unary
(
node
):
"""unary(alloc(x, shp)) -> alloc(unary(x), shp)
"""unary(alloc(x, shp)) -> alloc(unary(x), shp)"""
"""
if
isinstance
(
node
.
op
,
T
.
Elemwise
)
and
len
(
node
.
inputs
)
==
1
:
if
isinstance
(
node
.
op
,
T
.
Elemwise
)
and
len
(
node
.
inputs
)
==
1
:
a
=
node
.
inputs
[
0
]
a
=
node
.
inputs
[
0
]
if
a
.
owner
and
isinstance
(
a
.
owner
.
op
,
T
.
Alloc
):
if
a
.
owner
and
isinstance
(
a
.
owner
.
op
,
T
.
Alloc
):
...
@@ -1587,6 +1611,7 @@ def local_cast_cast(node):
...
@@ -1587,6 +1611,7 @@ def local_cast_cast(node):
dtype1 == dtype2
dtype1 == dtype2
TODO: the base dtype is the same (int, uint, float, complex)
TODO: the base dtype is the same (int, uint, float, complex)
and the first cast cause an upcast.
and the first cast cause an upcast.
"""
"""
if
(
not
isinstance
(
node
.
op
,
T
.
Elemwise
)
or
if
(
not
isinstance
(
node
.
op
,
T
.
Elemwise
)
or
not
isinstance
(
node
.
op
.
scalar_op
,
scalar
.
Cast
)):
not
isinstance
(
node
.
op
.
scalar_op
,
scalar
.
Cast
)):
...
@@ -1607,9 +1632,9 @@ def local_cast_cast(node):
...
@@ -1607,9 +1632,9 @@ def local_cast_cast(node):
def
local_func_inv
(
node
):
def
local_func_inv
(
node
):
"""
"""
Check for two consecutive operations that are functional inverses
Check for two consecutive operations that are functional inverses
and remove them from the function graph
and remove them from the function graph.
"""
"""
inv_pairs
=
(
inv_pairs
=
(
(
basic
.
Deg2Rad
,
basic
.
Rad2Deg
),
(
basic
.
Deg2Rad
,
basic
.
Rad2Deg
),
(
basic
.
Cosh
,
basic
.
ArcCosh
),
(
basic
.
Cosh
,
basic
.
ArcCosh
),
...
@@ -1641,9 +1666,9 @@ def local_func_inv(node):
...
@@ -1641,9 +1666,9 @@ def local_func_inv(node):
def
is_inverse_pair
(
node_op
,
prev_op
,
inv_pair
):
def
is_inverse_pair
(
node_op
,
prev_op
,
inv_pair
):
"""
"""
Given two consecutive operations, check if they are the
Given two consecutive operations, check if they are the
provided pair of inverse functions
provided pair of inverse functions.
"""
"""
node_is_op0
=
isinstance
(
node_op
,
inv_pair
[
0
])
node_is_op0
=
isinstance
(
node_op
,
inv_pair
[
0
])
node_is_op1
=
isinstance
(
node_op
,
inv_pair
[
1
])
node_is_op1
=
isinstance
(
node_op
,
inv_pair
[
1
])
prev_is_op0
=
isinstance
(
prev_op
,
inv_pair
[
0
])
prev_is_op0
=
isinstance
(
prev_op
,
inv_pair
[
0
])
...
@@ -1659,20 +1684,24 @@ class Assert(T.Op):
...
@@ -1659,20 +1684,24 @@ class Assert(T.Op):
Returns the first parameter if the condition is true, otherwise, triggers
Returns the first parameter if the condition is true, otherwise, triggers
AssertionError.
AssertionError.
Example:
Notes
T = theano.tensor
-----
x = T.vector('x')
assert_op = T.opt.Assert()
func = theano.function([x], assert_op(x, x.size<2))
Notes:
This Op is a debugging feature. It can be removed from the graph
This Op is a debugging feature. It can be removed from the graph
because of optimizations, and can hide some possible optimizations to
because of optimizations, and can hide some possible optimizations to
the optimizer. Specifically, removing happens if it can be determined
the optimizer. Specifically, removing happens if it can be determined
that condition will always be true. Also, the output of the Op must be
that condition will always be true. Also, the output of the Op must be
used in the function computing the graph, but it doesn't have to be
used in the function computing the graph, but it doesn't have to be
returned.
returned.
Examples
--------
T = theano.tensor
x = T.vector('x')
assert_op = T.opt.Assert()
func = theano.function([x], assert_op(x, x.size<2))
"""
"""
__props__
=
(
'msg'
,)
__props__
=
(
'msg'
,)
view_map
=
{
0
:
[
0
]}
view_map
=
{
0
:
[
0
]}
...
@@ -1770,7 +1799,9 @@ def local_remove_all_assert(node):
...
@@ -1770,7 +1799,9 @@ def local_remove_all_assert(node):
"""An optimization disabled by default that removes all asserts from
"""An optimization disabled by default that removes all asserts from
the graph.
the graph.
:note: See the :ref:`unsafe` section to know how to enable it.
Notes
-----
See the :ref:`unsafe` section to know how to enable it.
"""
"""
if
not
isinstance
(
node
.
op
,
Assert
):
if
not
isinstance
(
node
.
op
,
Assert
):
...
@@ -1804,11 +1835,12 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
...
@@ -1804,11 +1835,12 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
BROADCAST CONDITION: the condition is that the one input that are
BROADCAST CONDITION: the condition is that the one input that are
not to be optimized to have the same broadcast pattern as the
not to be optimized to have the same broadcast pattern as the
output
output.
We can change the alloc by a dimshuffle as the elemwise
already have the shape info. The dimshuffle will be faster
to exec.
We can change the alloc by a dimshuffle as the elemwise
already have the shape info. The dimshuffle will be faster
to exec
"""
"""
if
not
isinstance
(
node
.
op
,
ElemwiseOP
):
if
not
isinstance
(
node
.
op
,
ElemwiseOP
):
return
False
return
False
...
@@ -1969,6 +2001,7 @@ def local_upcast_elemwise_constant_inputs(node):
...
@@ -1969,6 +2001,7 @@ def local_upcast_elemwise_constant_inputs(node):
those Ops do implicit upcasting anyway.
those Ops do implicit upcasting anyway.
Rationale: it helps merge things like (1-x) and (1.0 - x).
Rationale: it helps merge things like (1-x) and (1.0 - x).
"""
"""
if
len
(
node
.
outputs
)
>
1
:
if
len
(
node
.
outputs
)
>
1
:
return
return
...
@@ -2033,7 +2066,8 @@ def local_upcast_elemwise_constant_inputs(node):
...
@@ -2033,7 +2066,8 @@ def local_upcast_elemwise_constant_inputs(node):
@register_specialize
@register_specialize
@gof.local_optimizer
([
IncSubtensor
])
@gof.local_optimizer
([
IncSubtensor
])
def
local_useless_inc_subtensor
(
node
):
def
local_useless_inc_subtensor
(
node
):
"""Remove IncSubtensor, when we overwrite the full inputs with the
"""
Remove IncSubtensor, when we overwrite the full inputs with the
new value.
new value.
"""
"""
...
@@ -2082,6 +2116,7 @@ def local_set_to_inc_subtensor(node):
...
@@ -2082,6 +2116,7 @@ def local_set_to_inc_subtensor(node):
"""
"""
AdvancedIncSubtensor1(x, x[ilist]+other, ilist, set_instead_of_inc=True) ->
AdvancedIncSubtensor1(x, x[ilist]+other, ilist, set_instead_of_inc=True) ->
AdvancedIncSubtensor1(x, other, ilist, set_instead_of_inc=False)
AdvancedIncSubtensor1(x, other, ilist, set_instead_of_inc=False)
"""
"""
if
(
isinstance
(
node
.
op
,
AdvancedIncSubtensor1
)
and
if
(
isinstance
(
node
.
op
,
AdvancedIncSubtensor1
)
and
node
.
op
.
set_instead_of_inc
and
node
.
op
.
set_instead_of_inc
and
...
@@ -2144,6 +2179,7 @@ def local_useless_subtensor(node):
...
@@ -2144,6 +2179,7 @@ def local_useless_subtensor(node):
AdvancedSubtensor1 case, the full input is taken when the indices are
AdvancedSubtensor1 case, the full input is taken when the indices are
equivalent to `arange(0, input.shape[0], 1)` using either an explicit
equivalent to `arange(0, input.shape[0], 1)` using either an explicit
list/vector or the ARange op.
list/vector or the ARange op.
"""
"""
# This optimization needs ShapeOpt and fgraph.shape_feature
# This optimization needs ShapeOpt and fgraph.shape_feature
if
not
hasattr
(
node
.
fgraph
,
'shape_feature'
):
if
not
hasattr
(
node
.
fgraph
,
'shape_feature'
):
...
@@ -2261,6 +2297,7 @@ def local_subtensor_lift(node):
...
@@ -2261,6 +2297,7 @@ def local_subtensor_lift(node):
elemwise(x,...)[idx] -> elemwise(x[idx],...)
elemwise(x,...)[idx] -> elemwise(x[idx],...)
when x,... are broadcasted scalar or not broadcasted at all
when x,... are broadcasted scalar or not broadcasted at all
rebroadcast(x)[idx] => rebroadcast(x[idx])
rebroadcast(x)[idx] => rebroadcast(x[idx])
"""
"""
if
isinstance
(
node
.
op
,
Subtensor
):
if
isinstance
(
node
.
op
,
Subtensor
):
u
=
node
.
inputs
[
0
]
u
=
node
.
inputs
[
0
]
...
@@ -2327,7 +2364,7 @@ def local_subtensor_lift(node):
...
@@ -2327,7 +2364,7 @@ def local_subtensor_lift(node):
def
merge_two_slices
(
slice1
,
len1
,
slice2
,
len2
):
def
merge_two_slices
(
slice1
,
len1
,
slice2
,
len2
):
'''
"""
This function merges two slices into a single slice. The code works on
This function merges two slices into a single slice. The code works on
the assumption that:
the assumption that:
a) slice1 is actually a slice and not an index, while slice2
a) slice1 is actually a slice and not an index, while slice2
...
@@ -2340,7 +2377,7 @@ def merge_two_slices(slice1, len1, slice2, len2):
...
@@ -2340,7 +2377,7 @@ def merge_two_slices(slice1, len1, slice2, len2):
the two consecutive slices.
the two consecutive slices.
``len1`` is the length of the tensor **before** applying the first slice,
``len1`` is the length of the tensor **before** applying the first slice,
while ``len2`` is the length **after** applying the first slice.
while ``len2`` is the length **after** applying the first slice.
'''
"""
list_opt
=
[
local_abs_merge
,
local_mul_switch_sink
,
list_opt
=
[
local_abs_merge
,
local_mul_switch_sink
,
local_upcast_elemwise_constant_inputs
,
local_upcast_elemwise_constant_inputs
,
local_remove_switch_const_cond
,
constant_folding
]
local_remove_switch_const_cond
,
constant_folding
]
...
@@ -2466,6 +2503,7 @@ def local_subtensor_merge(node):
...
@@ -2466,6 +2503,7 @@ def local_subtensor_merge(node):
Refactored optimization to deal with all cases of tensor merging.
Refactored optimization to deal with all cases of tensor merging.
Given a subgraph of the form Subtensor(Subtensor(u)), the optimization
Given a subgraph of the form Subtensor(Subtensor(u)), the optimization
expresses all slices in a canonical form, and then merges them together.
expresses all slices in a canonical form, and then merges them together.
"""
"""
if
isinstance
(
node
.
op
,
Subtensor
):
if
isinstance
(
node
.
op
,
Subtensor
):
...
@@ -2601,7 +2639,8 @@ def local_subtensor_of_dot(node):
...
@@ -2601,7 +2639,8 @@ def local_subtensor_of_dot(node):
idxs_a is the first A.ndim-1 entries of idxs,
idxs_a is the first A.ndim-1 entries of idxs,
and idxs_b is the remaining entries of idxs (if any),
and idxs_b is the remaining entries of idxs (if any),
modified to skip the second-to-last dimension of B
modified to skip the second-to-last dimension of B
(because dot sums over this dimension)
(because dot sums over this dimension).
"""
"""
if
not
isinstance
(
node
.
op
,
Subtensor
):
if
not
isinstance
(
node
.
op
,
Subtensor
):
return
return
...
@@ -2715,7 +2754,8 @@ compile.optdb.register('pre_local_IncSubtensor_serialize',
...
@@ -2715,7 +2754,8 @@ compile.optdb.register('pre_local_IncSubtensor_serialize',
@gof.local_optimizer
([
IncSubtensor
],
inplace
=
True
)
@gof.local_optimizer
([
IncSubtensor
],
inplace
=
True
)
def
local_inplace_setsubtensor
(
node
):
def
local_inplace_setsubtensor
(
node
):
"""
"""
Also work for GpuIncSubtensor
Also work for GpuIncSubtensor.
"""
"""
if
isinstance
(
node
.
op
,
IncSubtensor
)
and
not
node
.
op
.
inplace
:
if
isinstance
(
node
.
op
,
IncSubtensor
)
and
not
node
.
op
.
inplace
:
new_op
=
node
.
op
.
__class__
(
new_op
=
node
.
op
.
__class__
(
...
@@ -2734,7 +2774,10 @@ compile.optdb.register('local_inplace_setsubtensor',
...
@@ -2734,7 +2774,10 @@ compile.optdb.register('local_inplace_setsubtensor',
@gof.local_optimizer
([
AdvancedIncSubtensor1
],
inplace
=
True
)
@gof.local_optimizer
([
AdvancedIncSubtensor1
],
inplace
=
True
)
def
local_inplace_incsubtensor1
(
node
):
def
local_inplace_incsubtensor1
(
node
):
""" also work for GpuAdvancedIncSubtensor1 """
"""
Also work for GpuAdvancedIncSubtensor1.
"""
if
isinstance
(
node
.
op
,
AdvancedIncSubtensor1
)
and
not
node
.
op
.
inplace
:
if
isinstance
(
node
.
op
,
AdvancedIncSubtensor1
)
and
not
node
.
op
.
inplace
:
new_op
=
node
.
op
.
clone_inplace
()
new_op
=
node
.
op
.
clone_inplace
()
new_node
=
new_op
(
*
node
.
inputs
)
new_node
=
new_op
(
*
node
.
inputs
)
...
@@ -2756,6 +2799,7 @@ compile.optdb.register('local_inplace_incsubtensor1',
...
@@ -2756,6 +2799,7 @@ compile.optdb.register('local_inplace_incsubtensor1',
def
local_incsubtensor_of_zeros
(
node
):
def
local_incsubtensor_of_zeros
(
node
):
"""
"""
IncSubtensor(x, zeros, idx) -> x
IncSubtensor(x, zeros, idx) -> x
"""
"""
if
(
isinstance
(
node
.
op
,
(
IncSubtensor
,
if
(
isinstance
(
node
.
op
,
(
IncSubtensor
,
AdvancedIncSubtensor
,
AdvancedIncSubtensor
,
...
@@ -2784,6 +2828,7 @@ def local_setsubtensor_of_constants(node):
...
@@ -2784,6 +2828,7 @@ def local_setsubtensor_of_constants(node):
SetSubtensor(x, x[idx], idx) -> x
SetSubtensor(x, x[idx], idx) -> x
when x is constant or alloc.
when x is constant or alloc.
"""
"""
if
isinstance
(
node
.
op
,
IncSubtensor
)
and
node
.
op
.
set_instead_of_inc
:
if
isinstance
(
node
.
op
,
IncSubtensor
)
and
node
.
op
.
set_instead_of_inc
:
x
=
node
.
inputs
[
0
]
x
=
node
.
inputs
[
0
]
...
@@ -2813,14 +2858,16 @@ def local_setsubtensor_of_constants(node):
...
@@ -2813,14 +2858,16 @@ def local_setsubtensor_of_constants(node):
@register_stabilize
@register_stabilize
@gof.local_optimizer
([
AdvancedSubtensor1
])
@gof.local_optimizer
([
AdvancedSubtensor1
])
def
local_adv_sub1_adv_inc_sub1
(
node
):
def
local_adv_sub1_adv_inc_sub1
(
node
):
"""Optimize the possible AdvSub1(AdvIncSub1(...), ...)
"""Optimize the possible AdvSub1(AdvIncSub1(...), ...)
.
AdvancedSubtensor1(AdvancedIncSubtensor1(0s, y, idx), idx) -> y
AdvancedSubtensor1(AdvancedIncSubtensor1(0s, y, idx), idx) -> y
AdvancedSubtensor1(AdvancedSetSubtensor1(x, y, idx), idx) -> y
AdvancedSubtensor1(AdvancedSetSubtensor1(x, y, idx), idx) -> y
:note: This opt add AssertOp. Otherwise, it would remove shape and
Notes
index error. If you want to get rid of them, see the
-----
:ref:`unsafe_optimization` section.
This opt add AssertOp. Otherwise, it would remove shape and
index error. If you want to get rid of them, see the
:ref:`unsafe_optimization` section.
"""
"""
if
not
isinstance
(
node
.
op
,
AdvancedSubtensor1
):
if
not
isinstance
(
node
.
op
,
AdvancedSubtensor1
):
...
@@ -2862,6 +2909,7 @@ def local_useless_inc_subtensor_alloc(node):
...
@@ -2862,6 +2909,7 @@ def local_useless_inc_subtensor_alloc(node):
Replaces an [Advanced]IncSubtensor[1], whose increment is an `alloc` of
Replaces an [Advanced]IncSubtensor[1], whose increment is an `alloc` of
a fully or partially broadcastable variable, by one that skips the
a fully or partially broadcastable variable, by one that skips the
intermediate `alloc` where possible.
intermediate `alloc` where possible.
"""
"""
if
isinstance
(
node
.
op
,
(
IncSubtensor
,
if
isinstance
(
node
.
op
,
(
IncSubtensor
,
AdvancedIncSubtensor
,
AdvancedIncSubtensor
,
...
@@ -2962,7 +3010,8 @@ def local_useless_inc_subtensor_alloc(node):
...
@@ -2962,7 +3010,8 @@ def local_useless_inc_subtensor_alloc(node):
@gof.local_optimizer
([
T
.
Rebroadcast
])
@gof.local_optimizer
([
T
.
Rebroadcast
])
def
local_useless_rebroadcast
(
node
):
def
local_useless_rebroadcast
(
node
):
"""
"""
Remove Rebroadcast if id does not actually change the broadcasting pattern
Remove Rebroadcast if id does not actually change the broadcasting pattern.
"""
"""
if
isinstance
(
node
.
op
,
T
.
Rebroadcast
):
if
isinstance
(
node
.
op
,
T
.
Rebroadcast
):
x
=
node
.
inputs
[
0
]
x
=
node
.
inputs
[
0
]
...
@@ -2992,6 +3041,7 @@ def local_rebroadcast_lift(node):
...
@@ -2992,6 +3041,7 @@ def local_rebroadcast_lift(node):
Rebroadcast(Elemwise(x)) => Elemwise(Rebroadcast(x))
Rebroadcast(Elemwise(x)) => Elemwise(Rebroadcast(x))
Rebroadcast(Rebroadcast(x)) => Rebroadcast(x)
Rebroadcast(Rebroadcast(x)) => Rebroadcast(x)
"""
"""
op
=
node
.
op
op
=
node
.
op
if
not
isinstance
(
op
,
T
.
Rebroadcast
):
if
not
isinstance
(
op
,
T
.
Rebroadcast
):
...
@@ -3023,8 +3073,14 @@ def apply_rebroadcast_opt(rval):
...
@@ -3023,8 +3073,14 @@ def apply_rebroadcast_opt(rval):
Apply as many times as required the optimization local_useless_rebroadcast
Apply as many times as required the optimization local_useless_rebroadcast
and local_rebroadcast_lift.
and local_rebroadcast_lift.
:param rval: a Variable
Parameters
:return: a Variable (the same if no optimization can be applied)
----------
rval: a Variable
Returns
-------
A Variable (the same if no optimization can be applied)
"""
"""
changed
=
True
changed
=
True
...
@@ -3056,6 +3112,7 @@ def local_join_1(node):
...
@@ -3056,6 +3112,7 @@ def local_join_1(node):
"""Join(i, x) => x
"""Join(i, x) => x
Remove Join() when only one element is joined.
Remove Join() when only one element is joined.
"""
"""
if
not
isinstance
(
node
.
op
,
T
.
Join
):
if
not
isinstance
(
node
.
op
,
T
.
Join
):
return
return
...
@@ -3070,7 +3127,8 @@ def local_join_1(node):
...
@@ -3070,7 +3127,8 @@ def local_join_1(node):
def
local_join_empty
(
node
):
def
local_join_empty
(
node
):
"""Join(i, x, y, empty) => Join(i, x, y)
"""Join(i, x, y, empty) => Join(i, x, y)
remove empty inputs to joins. The empty inputs can be anywhere.
Remove empty inputs to joins. The empty inputs can be anywhere.
"""
"""
if
not
isinstance
(
node
.
op
,
T
.
Join
):
if
not
isinstance
(
node
.
op
,
T
.
Join
):
return
return
...
@@ -3147,6 +3205,7 @@ def local_remove_switch_const_cond(node):
...
@@ -3147,6 +3205,7 @@ def local_remove_switch_const_cond(node):
T.switch(cond,left,right) -->
T.switch(cond,left,right) -->
if cond is constant and cond == 0: right
if cond is constant and cond == 0: right
if cond is constant and cond != 0: left
if cond is constant and cond != 0: left
"""
"""
if
(
isinstance
(
node
.
op
,
T
.
Elemwise
)
and
if
(
isinstance
(
node
.
op
,
T
.
Elemwise
)
and
isinstance
(
node
.
op
.
scalar_op
,
scalar
.
basic
.
Switch
)):
isinstance
(
node
.
op
.
scalar_op
,
scalar
.
basic
.
Switch
)):
...
@@ -3183,7 +3242,9 @@ def local_mul_switch_sink(node):
...
@@ -3183,7 +3242,9 @@ def local_mul_switch_sink(node):
This is useful because A and B may not be numerically stable and give
This is useful because A and B may not be numerically stable and give
NaN or inf values for cases where the switch returns 0.
NaN or inf values for cases where the switch returns 0.
With this optimization T.grad(T.switch(...)) has the right behavior.
With this optimization T.grad(T.switch(...)) has the right behavior.
Exemple:
Examples
--------
x -> f(x)
x -> f(x)
x -> g(x)
x -> g(x)
y = T.switch(cond,f(x),g(x))
y = T.switch(cond,f(x),g(x))
...
@@ -3193,6 +3254,7 @@ def local_mul_switch_sink(node):
...
@@ -3193,6 +3254,7 @@ def local_mul_switch_sink(node):
T.grad(y,x) -> switch(cond,grad(f(x),x), 0) + switch(cond,0,grad(g(x),x))
T.grad(y,x) -> switch(cond,grad(f(x),x), 0) + switch(cond,0,grad(g(x),x))
This will be particularly useful for the lazyif because we skip
This will be particularly useful for the lazyif because we skip
an entire part of the graph.
an entire part of the graph.
"""
"""
if
node
.
op
!=
T
.
mul
:
if
node
.
op
!=
T
.
mul
:
return
False
return
False
...
@@ -3234,6 +3296,7 @@ def local_div_switch_sink(node):
...
@@ -3234,6 +3296,7 @@ def local_div_switch_sink(node):
This is useful because A may not be numerically stable and give
This is useful because A may not be numerically stable and give
NaN or inf values for cases where the switch returns 0.
NaN or inf values for cases where the switch returns 0.
See local_mul_switch_sink for more details.
See local_mul_switch_sink for more details.
"""
"""
if
(
node
.
op
!=
T
.
true_div
and
node
.
op
!=
T
.
int_div
):
if
(
node
.
op
!=
T
.
true_div
and
node
.
op
!=
T
.
int_div
):
return
False
return
False
...
@@ -3308,6 +3371,7 @@ def local_useless_split(node):
...
@@ -3308,6 +3371,7 @@ def local_useless_split(node):
""" Split{n_splits=1}(x, y) -> x
""" Split{n_splits=1}(x, y) -> x
Remove Split with only 1 split.
Remove Split with only 1 split.
"""
"""
if
isinstance
(
node
.
op
,
T
.
Split
):
if
isinstance
(
node
.
op
,
T
.
Split
):
if
node
.
op
.
len_splits
==
1
:
if
node
.
op
.
len_splits
==
1
:
...
@@ -3329,6 +3393,7 @@ def local_flatten_lift(node):
...
@@ -3329,6 +3393,7 @@ def local_flatten_lift(node):
This optimization is needed by optimization
This optimization is needed by optimization
nnet/sigm.py:log1msigm_to_softplus to get applied when there is a flatten.
nnet/sigm.py:log1msigm_to_softplus to get applied when there is a flatten.
"""
"""
if
(
isinstance
(
node
.
op
,
T
.
Flatten
)
and
if
(
isinstance
(
node
.
op
,
T
.
Flatten
)
and
node
.
inputs
[
0
]
.
owner
and
node
.
inputs
[
0
]
.
owner
and
...
@@ -3347,6 +3412,7 @@ def local_flatten_lift(node):
...
@@ -3347,6 +3412,7 @@ def local_flatten_lift(node):
def
local_reshape_chain
(
node
):
def
local_reshape_chain
(
node
):
"""
"""
Reshape(Reshape(shape1),shape2) -> Reshape(shape2)
Reshape(Reshape(shape1),shape2) -> Reshape(shape2)
"""
"""
if
not
opt
.
check_chain
(
node
,
T
.
Reshape
,
T
.
Reshape
):
if
not
opt
.
check_chain
(
node
,
T
.
Reshape
,
T
.
Reshape
):
return
False
return
False
...
@@ -3378,6 +3444,7 @@ def local_reshape_lift(node):
...
@@ -3378,6 +3444,7 @@ def local_reshape_lift(node):
This optimization is needed by optimization
This optimization is needed by optimization
nnet/sigm.py:log1msigm_to_softplus to get applied when there is a reshape.
nnet/sigm.py:log1msigm_to_softplus to get applied when there is a reshape.
"""
"""
if
(
isinstance
(
node
.
op
,
T
.
Reshape
)
and
if
(
isinstance
(
node
.
op
,
T
.
Reshape
)
and
node
.
inputs
[
0
]
.
owner
and
node
.
inputs
[
0
]
.
owner
and
...
@@ -3526,26 +3593,32 @@ class Canonizer(gof.LocalOptimizer):
...
@@ -3526,26 +3593,32 @@ class Canonizer(gof.LocalOptimizer):
Usage: Canonizer(main, inverse, reciprocal, calculate)
Usage: Canonizer(main, inverse, reciprocal, calculate)
* main: a suitable Op class that is commutative, associative and
Parameters
takes one to an arbitrary number of inputs, e.g. add or
----------
mul
main
* inverse: an Op class such that inverse(main(x, y), y) == x
A suitable Op class that is commutative, associative and
e.g. sub or true_div
takes one to an arbitrary number of inputs, e.g. add or
* reciprocal: a function such that main(x, reciprocal(y)) ==
mul
inverse(x, y) e.g. neg or inv
inverse
An Op class such that inverse(main(x, y), y) == x
* calculate: function that takes a list of numpy.ndarray instances
e.g. sub or true_div
for the numerator, another list for the denumerator,
reciprocal
and calculates inverse(main(*num), main(*denum)). It
A function such that main(x, reciprocal(y)) == inverse(x, y)
takes a keyword argument, aslist. If True, the value
e.g. neg or inv
should be returned as a list of one element, unless
calculate
the value is such that value = main(). In that case,
Function that takes a list of numpy.ndarray instances
the return value should be an empty list.
for the numerator, another list for the denumerator,
and calculates inverse(main(*num), main(*denum)). It
takes a keyword argument, aslist. If True, the value
should be returned as a list of one element, unless
the value is such that value = main(). In that case,
the return value should be an empty list.
The variable is a local_optimizer. It is best used with a TopoOptimizer in
The variable is a local_optimizer. It is best used with a TopoOptimizer in
in_to_out order.
in_to_out order.
Examples:
Examples
--------
T = theano.tensor
T = theano.tensor
add_canonizer = Canonizer(T.add, T.sub, T.neg,
add_canonizer = Canonizer(T.add, T.sub, T.neg,
lambda n, d: sum(n) - sum(d))
lambda n, d: sum(n) - sum(d))
...
@@ -3563,6 +3636,7 @@ class Canonizer(gof.LocalOptimizer):
...
@@ -3563,6 +3636,7 @@ class Canonizer(gof.LocalOptimizer):
2 * x / 2 -> x
2 * x / 2 -> x
x * y * z -> Elemwise(T.mul){x,y,z} #only one pass over the memory.
x * y * z -> Elemwise(T.mul){x,y,z} #only one pass over the memory.
!-> Elemwise(T.mul){x,Elemwise(T.mul){y,z}}
!-> Elemwise(T.mul){x,Elemwise(T.mul){y,z}}
"""
"""
def
__init__
(
self
,
main
,
inverse
,
reciprocal
,
calculate
,
def
__init__
(
self
,
main
,
inverse
,
reciprocal
,
calculate
,
...
@@ -3747,8 +3821,13 @@ class Canonizer(gof.LocalOptimizer):
...
@@ -3747,8 +3821,13 @@ class Canonizer(gof.LocalOptimizer):
@staticmethod
@staticmethod
def
get_constant
(
v
):
def
get_constant
(
v
):
"""
"""
Returns a numeric constant if v is a Constant or, well, a
numeric constant. If v is a plain Variable, returns None.
Returns
-------
object
A numeric constant if v is a Constant or, well, a
numeric constant. If v is a plain Variable, returns None.
"""
"""
if
isinstance
(
v
,
Variable
):
if
isinstance
(
v
,
Variable
):
try
:
try
:
...
@@ -3762,6 +3841,7 @@ class Canonizer(gof.LocalOptimizer):
...
@@ -3762,6 +3841,7 @@ class Canonizer(gof.LocalOptimizer):
"""
"""
Shorthand for:
Shorthand for:
self.simplify_constants(*self.simplify_factors(num, denum))
self.simplify_constants(*self.simplify_factors(num, denum))
"""
"""
rval
=
self
.
simplify_constants
(
*
self
.
simplify_factors
(
num
,
denum
),
rval
=
self
.
simplify_constants
(
*
self
.
simplify_factors
(
num
,
denum
),
out_type
=
out_type
)
out_type
=
out_type
)
...
@@ -3781,6 +3861,7 @@ class Canonizer(gof.LocalOptimizer):
...
@@ -3781,6 +3861,7 @@ class Canonizer(gof.LocalOptimizer):
[x], [x] -> [], []
[x], [x] -> [], []
[x, y], [x] -> [y], []
[x, y], [x] -> [y], []
[a, b], [c, d] -> [a, b], [c, d]
[a, b], [c, d] -> [a, b], [c, d]
"""
"""
for
v
in
list
(
num
):
for
v
in
list
(
num
):
if
v
in
denum
:
if
v
in
denum
:
...
@@ -3790,18 +3871,22 @@ class Canonizer(gof.LocalOptimizer):
...
@@ -3790,18 +3871,22 @@ class Canonizer(gof.LocalOptimizer):
def
simplify_constants
(
self
,
orig_num
,
orig_denum
,
out_type
=
None
):
def
simplify_constants
(
self
,
orig_num
,
orig_denum
,
out_type
=
None
):
"""
"""
Find all constants and put them together into a single constant.
Finds all constants in orig_num and orig_denum (using
Finds all constants in orig_num and orig_denum (using
get_constant) and puts them together into a single
get_constant) and puts them together into a single
constant. The constant is inserted as the first element of the
constant. The constant is inserted as the first element of the
numerator. If the constant is the neutral element, it is
numerator. If the constant is the neutral element, it is
removed from the numerator.
Examples:
removed from the numerator.
Examples
--------
Let main be multiplication:
Let main be multiplication:
[2, 3, x], [] -> [6, x], []
[2, 3, x], [] -> [6, x], []
[x, y, 2], [4, z] -> [0.5, x, y], [z]
[x, y, 2], [4, z] -> [0.5, x, y], [z]
[x, 2, y], [z, 2] -> [x, y], [z]
[x, 2, y], [z, 2] -> [x, y], [z]
"""
"""
# Lists representing the numerator and denumerator
# Lists representing the numerator and denumerator
...
@@ -3969,13 +4054,15 @@ register_canonicalize(local_neg_to_mul)
...
@@ -3969,13 +4054,15 @@ register_canonicalize(local_neg_to_mul)
@register_specialize
@register_specialize
@gof.local_optimizer
([
T
.
Sum
,
T
.
elemwise
.
Prod
])
@gof.local_optimizer
([
T
.
Sum
,
T
.
elemwise
.
Prod
])
def
local_sum_prod_mul_by_scalar
(
node
):
def
local_sum_prod_mul_by_scalar
(
node
):
"""sum(scalar * smth) -> scalar * sum(smth)
"""
sum(-smth) -> -sum(smth)
sum(scalar * smth) -> scalar * sum(smth)
sum(-smth) -> -sum(smth)
or
or
prod(scalar * smth) -> scalar ** size(smth) * prod(smth)
prod(-smth) -> -1 ** size(smth) * prod(smth)
prod(scalar * smth) -> scalar ** size(smth) * prod(smth)
prod(-smth) -> -1 ** size(smth) * prod(smth)
"""
"""
# TODO: if the the thing inside the Sum is a division,
# TODO: if the the thing inside the Sum is a division,
# we should get at the numerator....
# we should get at the numerator....
...
@@ -4040,8 +4127,11 @@ def local_elemwise_sub_zeros(node):
...
@@ -4040,8 +4127,11 @@ def local_elemwise_sub_zeros(node):
@register_specialize
@register_specialize
@gof.local_optimizer
([
T
.
Sum
])
@gof.local_optimizer
([
T
.
Sum
])
def
local_sum_div_dimshuffle
(
node
):
def
local_sum_div_dimshuffle
(
node
):
'''sum(a / dimshuffle{...}(b), axis=l) -> sum(a, axis={...}) / b,
"""
if dimension l of the DimShuffle is 'x'.'''
sum(a / dimshuffle{...}(b), axis=l) -> sum(a, axis={...}) / b,
if dimension l of the DimShuffle is 'x'.
"""
# TODO: extend it to product, and quotient of products
# TODO: extend it to product, and quotient of products
# It does not make much sense now to extend it to the case where the
# It does not make much sense now to extend it to the case where the
...
@@ -4128,8 +4218,10 @@ def local_sum_div_dimshuffle(node):
...
@@ -4128,8 +4218,10 @@ def local_sum_div_dimshuffle(node):
@register_canonicalize
@register_canonicalize
@gof.local_optimizer
([
T
.
Sum
,
T
.
elemwise
.
Prod
])
@gof.local_optimizer
([
T
.
Sum
,
T
.
elemwise
.
Prod
])
def
local_sum_prod_all_to_none
(
node
):
def
local_sum_prod_all_to_none
(
node
):
"""Sum{0,1,...N} -> Sum{} or
"""
Prod{0,1,...N} -> Prod{}
Sum{0,1,...N} -> Sum{} or
Prod{0,1,...N} -> Prod{}
"""
"""
if
isinstance
(
node
.
op
,
T
.
Sum
)
or
isinstance
(
node
.
op
,
T
.
elemwise
.
Prod
):
if
isinstance
(
node
.
op
,
T
.
Sum
)
or
isinstance
(
node
.
op
,
T
.
elemwise
.
Prod
):
opt_type
=
T
.
Sum
if
isinstance
(
node
.
op
,
T
.
Sum
)
else
T
.
elemwise
.
Prod
opt_type
=
T
.
Sum
if
isinstance
(
node
.
op
,
T
.
Sum
)
else
T
.
elemwise
.
Prod
...
@@ -4148,6 +4240,7 @@ def local_op_of_op(node):
...
@@ -4148,6 +4240,7 @@ def local_op_of_op(node):
Prod(Prod()) -> single Prod()
Prod(Prod()) -> single Prod()
or
or
Sum(Sum()) -> single Sum()
Sum(Sum()) -> single Sum()
"""
"""
if
isinstance
(
node
.
op
,
T
.
elemwise
.
Prod
)
or
isinstance
(
node
.
op
,
T
.
Sum
):
if
isinstance
(
node
.
op
,
T
.
elemwise
.
Prod
)
or
isinstance
(
node
.
op
,
T
.
Sum
):
opt_type
=
T
.
Sum
if
isinstance
(
node
.
op
,
T
.
Sum
)
else
T
.
elemwise
.
Prod
opt_type
=
T
.
Sum
if
isinstance
(
node
.
op
,
T
.
Sum
)
else
T
.
elemwise
.
Prod
...
@@ -4219,14 +4312,16 @@ ALL_REDUCE = [T.elemwise.CAReduce, T.elemwise.All, T.elemwise.Any,
...
@@ -4219,14 +4312,16 @@ ALL_REDUCE = [T.elemwise.CAReduce, T.elemwise.All, T.elemwise.Any,
@register_uncanonicalize
# Needed for MaxAndArgmax -> CAReduce
@register_uncanonicalize
# Needed for MaxAndArgmax -> CAReduce
@gof.local_optimizer
(
ALL_REDUCE
)
@gof.local_optimizer
(
ALL_REDUCE
)
def
local_reduce_join
(
node
):
def
local_reduce_join
(
node
):
"""Reduce{scalar.op}(Join(axis=0, a, b), axis=0) -> Elemwise{scalar.op}(a, b)
"""
Reduce{scalar.op}(Join(axis=0, a, b), axis=0) -> Elemwise{scalar.op}(a, b)
:note: supported scalar.op are Maximum, Mimimum in some cases and
Notes
Add and Mul in all cases.
-----
Supported scalar.op are Maximum, Mimimum in some cases and Add and Mul in
all cases.
:note: Currently we must reduce on axis 0. It is probably
Currently we must reduce on axis 0. It is probably extensible to the case
extensible to the case where we join and reduce on the same
where we join and reduce on the same set of axis.
set of axis.
"""
"""
if
(
isinstance
(
node
.
op
,
T
.
CAReduce
)
and
if
(
isinstance
(
node
.
op
,
T
.
CAReduce
)
and
...
@@ -4312,7 +4407,7 @@ def local_cut_useless_reduce(node):
...
@@ -4312,7 +4407,7 @@ def local_cut_useless_reduce(node):
@register_specialize
@register_specialize
@gof.local_optimizer
(
ALL_REDUCE
)
@gof.local_optimizer
(
ALL_REDUCE
)
def
local_reduce_broadcastable
(
node
):
def
local_reduce_broadcastable
(
node
):
"""Remove reduction over broadcastable dimensions"""
"""Remove reduction over broadcastable dimensions
.
"""
if
isinstance
(
node
.
op
,
T
.
CAReduce
):
if
isinstance
(
node
.
op
,
T
.
CAReduce
):
reduced
,
=
node
.
inputs
reduced
,
=
node
.
inputs
odtype
=
node
.
outputs
[
0
]
.
dtype
odtype
=
node
.
outputs
[
0
]
.
dtype
...
@@ -4351,9 +4446,11 @@ def local_reduce_broadcastable(node):
...
@@ -4351,9 +4446,11 @@ def local_reduce_broadcastable(node):
@register_specialize
@register_specialize
@gof.local_optimizer
([
T
.
Sum
,
T
.
elemwise
.
Prod
])
@gof.local_optimizer
([
T
.
Sum
,
T
.
elemwise
.
Prod
])
def
local_opt_alloc
(
node
):
def
local_opt_alloc
(
node
):
""" sum(alloc(constant,shapes...)) => constant*prod(shapes)
"""
or
sum(alloc(constant,shapes...)) => constant*prod(shapes)
prod(alloc(constant,shapes...)) => constant**prod(shapes)
or
prod(alloc(constant,shapes...)) => constant**prod(shapes)
"""
"""
if
isinstance
(
node
.
op
,
T
.
Sum
)
or
isinstance
(
node
.
op
,
T
.
elemwise
.
Prod
):
if
isinstance
(
node
.
op
,
T
.
Sum
)
or
isinstance
(
node
.
op
,
T
.
elemwise
.
Prod
):
node_inps
,
=
node
.
inputs
node_inps
,
=
node
.
inputs
...
@@ -4406,9 +4503,11 @@ def local_neg_neg(node):
...
@@ -4406,9 +4503,11 @@ def local_neg_neg(node):
@register_specialize
@register_specialize
@gof.local_optimizer
([
T
.
neg
])
@gof.local_optimizer
([
T
.
neg
])
def
local_neg_div_neg
(
node
):
def
local_neg_div_neg
(
node
):
"""- (-a / b) -> a / b
"""
- (-a / b) -> a / b
Also performs - (c / b) -> ((-c) / b) when c is a scalar constant.
Also performs - (c / b) -> ((-c) / b) when c is a scalar constant.
"""
"""
if
node
.
op
==
T
.
neg
:
if
node
.
op
==
T
.
neg
:
if
node
.
inputs
[
0
]
.
owner
and
node
.
inputs
[
0
]
.
owner
.
op
==
T
.
true_div
:
if
node
.
inputs
[
0
]
.
owner
and
node
.
inputs
[
0
]
.
owner
.
op
==
T
.
true_div
:
...
@@ -4427,8 +4526,10 @@ def local_neg_div_neg(node):
...
@@ -4427,8 +4526,10 @@ def local_neg_div_neg(node):
@gof.local_optimizer
([
T
.
mul
])
@gof.local_optimizer
([
T
.
mul
])
def
local_mul_zero
(
node
):
def
local_mul_zero
(
node
):
"""As part of canonicalization, we replace multiplication by zero
"""
As part of canonicalization, we replace multiplication by zero
with zero.
with zero.
"""
"""
if
node
.
op
==
T
.
mul
:
if
node
.
op
==
T
.
mul
:
otype
=
node
.
outputs
[
0
]
.
type
otype
=
node
.
outputs
[
0
]
.
type
...
@@ -4489,10 +4590,12 @@ register_canonicalize(local_pow_canonicalize)
...
@@ -4489,10 +4590,12 @@ register_canonicalize(local_pow_canonicalize)
@register_specialize
@register_specialize
@gof.local_optimizer
([
T
.
mul
])
@gof.local_optimizer
([
T
.
mul
])
def
local_mul_to_sqr
(
node
):
def
local_mul_to_sqr
(
node
):
"""x*x -> sqr(x)
"""
x*x -> sqr(x)
This is faster on the GPU when memory fetching is a big part of
This is faster on the GPU when memory fetching is a big part of
the computation time.
the computation time.
"""
"""
if
node
.
op
==
T
.
mul
:
if
node
.
op
==
T
.
mul
:
if
len
(
node
.
inputs
)
==
2
:
if
len
(
node
.
inputs
)
==
2
:
...
@@ -4620,7 +4723,8 @@ def local_pow_specialize_device(node):
...
@@ -4620,7 +4723,8 @@ def local_pow_specialize_device(node):
@gof.local_optimizer
([
T
.
mul
])
@gof.local_optimizer
([
T
.
mul
])
def
local_mul_specialize
(
node
):
def
local_mul_specialize
(
node
):
"""Remove special-case constants from mul arguments and useless neg in inputs.
"""
Remove special-case constants from mul arguments and useless neg in inputs.
mul(-1, x) -> neg(x)
mul(-1, x) -> neg(x)
mul(1, x, y) -> mul(x, y)
mul(1, x, y) -> mul(x, y)
...
@@ -4629,6 +4733,7 @@ def local_mul_specialize(node):
...
@@ -4629,6 +4733,7 @@ def local_mul_specialize(node):
This is not done if we would add more nodes in the graph, like with:
This is not done if we would add more nodes in the graph, like with:
mul(-1, x, y) -/-> neg(mul(x, y))
mul(-1, x, y) -/-> neg(mul(x, y))
"""
"""
# here, we are past the point of canonicalization, so we don't
# here, we are past the point of canonicalization, so we don't
# want to put in un-necessary fills.
# want to put in un-necessary fills.
...
@@ -4766,8 +4871,9 @@ local_mul_canonizer.add_simplifier(check_for_x_over_absX, 'X_over_absX')
...
@@ -4766,8 +4871,9 @@ local_mul_canonizer.add_simplifier(check_for_x_over_absX, 'X_over_absX')
@gof.local_optimizer
([
T
.
abs_
])
@gof.local_optimizer
([
T
.
abs_
])
def
local_abs_lift
(
node
):
def
local_abs_lift
(
node
):
"""
"""
move the abs toward the input. This is needed for
Move the abs toward the input.
check_for_x_over_absX to apply in more case.
This is needed for check_for_x_over_absX to apply in more case.
"""
"""
if
node
.
op
==
T
.
abs_
and
node
.
inputs
[
0
]
.
owner
:
if
node
.
op
==
T
.
abs_
and
node
.
inputs
[
0
]
.
owner
:
...
@@ -4783,7 +4889,7 @@ def local_abs_lift(node):
...
@@ -4783,7 +4889,7 @@ def local_abs_lift(node):
@gof.local_optimizer
([
T
.
mul
,
T
.
true_div
])
@gof.local_optimizer
([
T
.
mul
,
T
.
true_div
])
def
local_abs_merge
(
node
):
def
local_abs_merge
(
node
):
"""
"""
m
erge abs generated by local_abs_lift when the canonizer don't
M
erge abs generated by local_abs_lift when the canonizer don't
need it anymore
need it anymore
"""
"""
...
@@ -4968,6 +5074,8 @@ def attempt_distribution(factor, num, denum, out_type):
...
@@ -4968,6 +5074,8 @@ def attempt_distribution(factor, num, denum, out_type):
@gof.local_optimizer
([
T
.
mul
,
T
.
true_div
,
T
.
inv
])
@gof.local_optimizer
([
T
.
mul
,
T
.
true_div
,
T
.
inv
])
def
local_greedy_distributor
(
node
):
def
local_greedy_distributor
(
node
):
"""
"""
Optimize by reducing the number of multiplications and/or divisions.
This optimization tries to apply distributivity of multiplication
This optimization tries to apply distributivity of multiplication
to addition in order to reduce the number of multiplications
to addition in order to reduce the number of multiplications
and/or divisions that must be done. The algorithm weighs division
and/or divisions that must be done. The algorithm weighs division
...
@@ -4985,6 +5093,7 @@ def local_greedy_distributor(node):
...
@@ -4985,6 +5093,7 @@ def local_greedy_distributor(node):
This optimization aims to reduce computational cost. It may also
This optimization aims to reduce computational cost. It may also
increase numerical stability, e.g. when x and/or y tend to 0 in
increase numerical stability, e.g. when x and/or y tend to 0 in
example 1.
example 1.
"""
"""
out
=
node
.
outputs
[
0
]
out
=
node
.
outputs
[
0
]
...
@@ -5083,7 +5192,13 @@ def constant_folding(node):
...
@@ -5083,7 +5192,13 @@ def constant_folding(node):
def
_is_1
(
expr
):
def
_is_1
(
expr
):
"""rtype bool. True iff expr is a constant close to 1
"""
Returns
-------
bool
True iff expr is a constant close to 1.
"""
"""
try
:
try
:
v
=
get_scalar_constant_value
(
expr
)
v
=
get_scalar_constant_value
(
expr
)
...
@@ -5093,7 +5208,13 @@ def _is_1(expr):
...
@@ -5093,7 +5208,13 @@ def _is_1(expr):
def
_is_minus1
(
expr
):
def
_is_minus1
(
expr
):
"""rtype bool. True iff expr is a constant close to -1
"""
Returns
-------
bool
True iff expr is a constant close to -1.
"""
"""
try
:
try
:
v
=
get_scalar_constant_value
(
expr
)
v
=
get_scalar_constant_value
(
expr
)
...
@@ -5103,13 +5224,19 @@ def _is_minus1(expr):
...
@@ -5103,13 +5224,19 @@ def _is_minus1(expr):
def
get_clients
(
node
):
def
get_clients
(
node
):
"Used by erf/erfc opt to track less frequent op"
"""
Used by erf/erfc opt to track less frequent op.
"""
return
[
c
for
c
,
i
in
node
.
outputs
[
0
]
.
clients
return
[
c
for
c
,
i
in
node
.
outputs
[
0
]
.
clients
if
c
!=
"output"
]
if
c
!=
"output"
]
def
get_clients2
(
node
):
def
get_clients2
(
node
):
"Used by erf/erfc opt to track less frequent op"
"""
Used by erf/erfc opt to track less frequent op.
"""
l
=
[]
l
=
[]
for
c
,
i
in
node
.
outputs
[
0
]
.
clients
:
for
c
,
i
in
node
.
outputs
[
0
]
.
clients
:
if
c
!=
"output"
:
if
c
!=
"output"
:
...
@@ -5622,18 +5749,22 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
...
@@ -5622,18 +5749,22 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
"""
"""
We parametrize it to make it work for Elemwise and GpuElemwise op.
We parametrize it to make it work for Elemwise and GpuElemwise op.
:param OP: GpuElemwise or Elemwise class (the one that we want to fuse)
Parameters
----------
:param max_input_fct: a function that returns the maximum number of inputs
OP
that this elemwise can take (useful for GpuElemwise).
GpuElemwise or Elemwise class (the one that we want to fuse)
GPU kernel currently has a limit of 256 bytes for
max_input_fct
the size of all parameters passed to it. As currently
A function that returns the maximum number of inputs
we pass many information only by parameter, we must
that this elemwise can take (useful for GpuElemwise).
limit how many ops we fuse together to avoid busting
GPU kernel currently has a limit of 256 bytes for
that 256 limit.
the size of all parameters passed to it. As currently
we pass many information only by parameter, we must
limit how many ops we fuse together to avoid busting
that 256 limit.
On the CPU we limit to 32 input variables
since that is the maximum numpy support.
On the CPU we limit to 32 input variables
since that is the maximum numpy support.
"""
"""
if
maker
is
None
:
if
maker
is
None
:
def
maker
(
node
,
scalar_op
):
def
maker
(
node
,
scalar_op
):
...
@@ -5647,6 +5778,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
...
@@ -5647,6 +5778,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
For mixed dtype, we let the Composite op do the cast. It lets the C
For mixed dtype, we let the Composite op do the cast. It lets the C
compiler do the cast.
compiler do the cast.
The number of dimensions is validated at call time by theano itself.
The number of dimensions is validated at call time by theano itself.
"""
"""
# META TODO: PUT THESE THINGS IN TRAC, NOT TODO NOTES!!
# META TODO: PUT THESE THINGS IN TRAC, NOT TODO NOTES!!
# TODO: use broadcast flag?
# TODO: use broadcast flag?
...
@@ -5862,7 +5994,7 @@ local_elemwise_fusion = local_elemwise_fusion_op(T.Elemwise,
...
@@ -5862,7 +5994,7 @@ local_elemwise_fusion = local_elemwise_fusion_op(T.Elemwise,
class
FusionOptimizer
(
Optimizer
):
class
FusionOptimizer
(
Optimizer
):
"""Graph optimizer for Fusion of elemwise operations"""
"""Graph optimizer for Fusion of elemwise operations
.
"""
def
__init__
(
self
,
local_optimizer
):
def
__init__
(
self
,
local_optimizer
):
Optimizer
.
__init__
(
self
)
Optimizer
.
__init__
(
self
)
self
.
optimizer
=
local_optimizer
self
.
optimizer
=
local_optimizer
...
...
theano/tensor/opt_uncanonicalize.py
浏览文件 @
6304a061
...
@@ -28,8 +28,8 @@ problem.
...
@@ -28,8 +28,8 @@ problem.
Also, we should make the fgraph refuse optimization that break the
Also, we should make the fgraph refuse optimization that break the
canonization of the graph in the optimizations phases where the graph is
canonization of the graph in the optimizations phases where the graph is
supposed to be canonical.
supposed to be canonical.
"""
"""
# TODO: intelligent merge for mul/add
# TODO: intelligent merge for mul/add
# TODO: 0*x -> 0
# TODO: 0*x -> 0
import
logging
import
logging
...
@@ -72,12 +72,15 @@ def local_max_and_argmax(node):
...
@@ -72,12 +72,15 @@ def local_max_and_argmax(node):
@gof.local_optimizer
([
T
.
neg
])
@gof.local_optimizer
([
T
.
neg
])
def
local_max_to_min
(
node
):
def
local_max_to_min
(
node
):
"""
"""
change -(max(-x)) to min
Change -(max(-x)) to min.
This is tested in tensor/tests/test_basic.py:test_min_max.
This is tested in tensor/tests/test_basic.py:test_min_max
Notes
-----
We don't need an opt that will do the reverse as by default
the interface put only MaxAndArgmax into the graph.
:note: we don't need an opt that will do the reverse as by default
the interface put only MaxAndArgmax into the graph.
"""
"""
if
node
.
op
==
T
.
neg
and
node
.
inputs
[
0
]
.
owner
:
if
node
.
op
==
T
.
neg
and
node
.
inputs
[
0
]
.
owner
:
max
=
node
.
inputs
[
0
]
max
=
node
.
inputs
[
0
]
...
...
theano/tensor/raw_random.py
浏览文件 @
6304a061
...
@@ -19,7 +19,8 @@ __docformat__ = "restructuredtext en"
...
@@ -19,7 +19,8 @@ __docformat__ = "restructuredtext en"
class
RandomStateType
(
gof
.
Type
):
class
RandomStateType
(
gof
.
Type
):
"""A Type wrapper for numpy.random.RandomState
"""
A Type wrapper for numpy.random.RandomState.
The reason this exists (and `Generic` doesn't suffice) is that
The reason this exists (and `Generic` doesn't suffice) is that
RandomState objects that would appear to be equal do not compare
RandomState objects that would appear to be equal do not compare
...
@@ -99,35 +100,36 @@ random_state_type = RandomStateType()
...
@@ -99,35 +100,36 @@ random_state_type = RandomStateType()
class
RandomFunction
(
gof
.
Op
):
class
RandomFunction
(
gof
.
Op
):
"""Op that draws random numbers from a numpy.random.RandomState object
"""
"""
__props__
=
(
"fn"
,
"outtype"
,
"inplace"
,
"ndim_added"
)
Op that draws random numbers from a numpy.random.RandomState object.
def
__init__
(
self
,
fn
,
outtype
,
inplace
=
False
,
ndim_added
=
0
):
Parameters
"""
----------
:param fn: a member function of numpy.random.RandomState
fn : string or function reference
A member function of numpy.random.RandomState. A string will
be interpreted as the name of a member function of
numpy.random.RandomState.
Technically, any function with a signature like the ones in
Technically, any function with a signature like the ones in
numpy.random.RandomState will do.
This function must accept
numpy.random.RandomState will do. This function must accept
the shape (sometimes called size) of the output as the last
the shape (sometimes called size) of the output as the last
positional argument.
positional argument.
outtype
The theano Type of the output.
args
A list of default arguments for the function
kwargs
If the 'inplace' key is there, its value will be used to
determine if the op operates inplace or not.
If the 'ndim_added' key is there, its value indicates how
many more dimensions this op will add to the output, in
addition to the shape's dimensions (used in multinomial and
permutation).
:type fn: string or function reference. A string will
"""
be interpreted as the name of a member function of
numpy.random.RandomState.
:param outtype: the theano Type of the output
:param args: a list of default arguments for the function
__props__
=
(
"fn"
,
"outtype"
,
"inplace"
,
"ndim_added"
)
:param kwargs:
def
__init__
(
self
,
fn
,
outtype
,
inplace
=
False
,
ndim_added
=
0
):
If the 'inplace' key is there, its value will be used to
determine if the op operates inplace or not.
If the 'ndim_added' key is there, its value indicates how
many more dimensions this op will add to the output, in
addition to the shape's dimensions (used in multinomial and
permutation).
"""
self
.
__setstate__
([
fn
,
outtype
,
inplace
,
ndim_added
])
self
.
__setstate__
([
fn
,
outtype
,
inplace
,
ndim_added
])
def
__getstate__
(
self
):
def
__getstate__
(
self
):
...
@@ -151,30 +153,33 @@ class RandomFunction(gof.Op):
...
@@ -151,30 +153,33 @@ class RandomFunction(gof.Op):
def
make_node
(
self
,
r
,
shape
,
*
args
):
def
make_node
(
self
,
r
,
shape
,
*
args
):
"""
"""
:param r: a numpy.random.RandomState instance, or a Variable of Type
Parameters
RandomStateType that will contain a RandomState instance.
----------
r
:param shape: an lvector with a shape defining how many samples
A numpy.random.RandomState instance, or a Variable of Type
to draw. In the case of scalar distributions, it is the shape
RandomStateType that will contain a RandomState instance.
of the tensor output by this Op. In that case, at runtime, the
shape
value associated with this lvector must have a length equal to
An lvector with a shape defining how many samples
the number of dimensions promised by `self.outtype`.
to draw. In the case of scalar distributions, it is the shape
In a more general case, the number of output dimensions,
of the tensor output by this Op. In that case, at runtime, the
len(self.outtype), is equal to len(shape)+self.ndim_added.
value associated with this lvector must have a length equal to
The special case where len(shape) == 0 means that the smallest
the number of dimensions promised by `self.outtype`.
shape compatible with the argument's shape will be used.
In a more general case, the number of output dimensions,
len(self.outtype), is equal to len(shape)+self.ndim_added.
:param args: the values associated with these variables will
The special case where len(shape) == 0 means that the smallest
be passed to the RandomState function during perform as extra
shape compatible with the argument's shape will be used.
"*args"-style arguments. These should be castable to variables
args
of Type TensorType.
The values associated with these variables will be passed to the
RandomState function during perform as extra "*args"-style
:rtype: Apply
arguments. These should be castable to variables of Type TensorType.
:return: Apply with two outputs. The first output is a
Returns
gof.generic Variable from which to draw further random numbers.
-------
The second output is the outtype() instance holding the random
Apply
draw.
Apply with two outputs. The first output is a gof.generic Variable
from which to draw further random numbers.
The second output is the outtype() instance holding the random
draw.
"""
"""
shape_
=
tensor
.
as_tensor_variable
(
shape
,
ndim
=
1
)
shape_
=
tensor
.
as_tensor_variable
(
shape
,
ndim
=
1
)
...
@@ -289,12 +294,15 @@ def _infer_ndim_bcast(ndim, shape, *args):
...
@@ -289,12 +294,15 @@ def _infer_ndim_bcast(ndim, shape, *args):
"""
"""
Infer the number of dimensions from the shape or the other arguments.
Infer the number of dimensions from the shape or the other arguments.
:rtype: (int, variable, tuple) triple, where the variable is an integer
Returns
vector, and the tuple contains Booleans.
-------
:returns: the first element returned is the inferred number of dimensions.
(int, variable, tuple) triple, where the variable is an integer vector,
The second element is the shape inferred (combining symbolic and constant
and the tuple contains Booleans
informations from shape and args).
The first element returned is the inferred number of dimensions.
The third element is a broadcasting pattern corresponding to that shape.
The second element is the shape inferred (combining symbolic and
constant informations from shape and args).
The third element is a broadcasting pattern corresponding to that shape.
"""
"""
# Find the minimum value of ndim required by the *args
# Find the minimum value of ndim required by the *args
...
@@ -390,7 +398,7 @@ def _infer_ndim_bcast(ndim, shape, *args):
...
@@ -390,7 +398,7 @@ def _infer_ndim_bcast(ndim, shape, *args):
def
_generate_broadcasting_indices
(
out_shape
,
*
shapes
):
def
_generate_broadcasting_indices
(
out_shape
,
*
shapes
):
'''
"""
Return indices over each shape that broadcast them to match out_shape.
Return indices over each shape that broadcast them to match out_shape.
The first returned list is equivalent to numpy.ndindex(out_shape),
The first returned list is equivalent to numpy.ndindex(out_shape),
...
@@ -400,7 +408,8 @@ def _generate_broadcasting_indices(out_shape, *shapes):
...
@@ -400,7 +408,8 @@ def _generate_broadcasting_indices(out_shape, *shapes):
The shapes should have the same length as out_shape. If they are longer,
The shapes should have the same length as out_shape. If they are longer,
the right-most dimensions are ignored.
the right-most dimensions are ignored.
'''
"""
all_shapes
=
(
out_shape
,)
+
shapes
all_shapes
=
(
out_shape
,)
+
shapes
# Will contain the return value: a list of indices for each argument
# Will contain the return value: a list of indices for each argument
ret_indices
=
[[()]
for
shape
in
all_shapes
]
ret_indices
=
[[()]
for
shape
in
all_shapes
]
...
@@ -447,6 +456,7 @@ def uniform(random_state, size=None, low=0.0, high=1.0, ndim=None, dtype=None):
...
@@ -447,6 +456,7 @@ def uniform(random_state, size=None, low=0.0, high=1.0, ndim=None, dtype=None):
If dtype is not specified, it will be inferred from the dtype of
If dtype is not specified, it will be inferred from the dtype of
low and high, but will be at least as precise as floatX.
low and high, but will be at least as precise as floatX.
"""
"""
low
=
tensor
.
as_tensor_variable
(
low
)
low
=
tensor
.
as_tensor_variable
(
low
)
high
=
tensor
.
as_tensor_variable
(
high
)
high
=
tensor
.
as_tensor_variable
(
high
)
...
@@ -471,6 +481,7 @@ def normal(random_state, size=None, avg=0.0, std=1.0, ndim=None, dtype=None):
...
@@ -471,6 +481,7 @@ def normal(random_state, size=None, avg=0.0, std=1.0, ndim=None, dtype=None):
If dtype is not specified, it will be inferred from the dtype of
If dtype is not specified, it will be inferred from the dtype of
avg and std, but will be at least as precise as floatX.
avg and std, but will be at least as precise as floatX.
"""
"""
avg
=
tensor
.
as_tensor_variable
(
avg
)
avg
=
tensor
.
as_tensor_variable
(
avg
)
std
=
tensor
.
as_tensor_variable
(
std
)
std
=
tensor
.
as_tensor_variable
(
std
)
...
@@ -493,6 +504,7 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None,
...
@@ -493,6 +504,7 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None,
If size is None, the output shape will be determined by the shapes
If size is None, the output shape will be determined by the shapes
of n and prob.
of n and prob.
"""
"""
if
prob
is
not
None
:
if
prob
is
not
None
:
p
=
prob
p
=
prob
...
@@ -514,12 +526,13 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None,
...
@@ -514,12 +526,13 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None,
def
random_integers_helper
(
random_state
,
low
,
high
,
size
):
def
random_integers_helper
(
random_state
,
low
,
high
,
size
):
'''
"""
Helper function to draw random integers.
Helper function to draw random integers.
This is a generalization of numpy.random.random_integers to the case where
This is a generalization of numpy.random.random_integers to the case where
low and high are tensors.
low and high are tensors.
'''
"""
# Figure out the output shape
# Figure out the output shape
if
size
is
not
None
:
if
size
is
not
None
:
out_ndim
=
len
(
size
)
out_ndim
=
len
(
size
)
...
@@ -570,6 +583,7 @@ def random_integers(random_state, size=None, low=0, high=1, ndim=None,
...
@@ -570,6 +583,7 @@ def random_integers(random_state, size=None, low=0, high=1, ndim=None,
If size is None, the output shape will be determined by the shapes
If size is None, the output shape will be determined by the shapes
of low and high.
of low and high.
"""
"""
low
=
tensor
.
as_tensor_variable
(
low
)
low
=
tensor
.
as_tensor_variable
(
low
)
high
=
tensor
.
as_tensor_variable
(
high
)
high
=
tensor
.
as_tensor_variable
(
high
)
...
@@ -580,11 +594,13 @@ def random_integers(random_state, size=None, low=0, high=1, ndim=None,
...
@@ -580,11 +594,13 @@ def random_integers(random_state, size=None, low=0, high=1, ndim=None,
def
choice_helper
(
random_state
,
a
,
replace
,
p
,
size
):
def
choice_helper
(
random_state
,
a
,
replace
,
p
,
size
):
"""Helper function to draw random numbers using numpy's choice function.
"""
Helper function to draw random numbers using numpy's choice function.
This is a generalization of numpy.random.choice that coerces
This is a generalization of numpy.random.choice that coerces
`replace` to a bool and replaces `p` with None when p is a vector
`replace` to a bool and replaces `p` with None when p is a vector
of 0 elements.
of 0 elements.
"""
"""
if
a
.
ndim
>
1
:
if
a
.
ndim
>
1
:
raise
ValueError
(
'a.ndim (
%
i) must be 0 or 1'
%
a
.
ndim
)
raise
ValueError
(
'a.ndim (
%
i) must be 0 or 1'
%
a
.
ndim
)
...
@@ -608,6 +624,7 @@ def choice(random_state, size=None, a=2, replace=True, p=None, ndim=None,
...
@@ -608,6 +624,7 @@ def choice(random_state, size=None, a=2, replace=True, p=None, ndim=None,
may be a plain integer to supplement the missing information.
may be a plain integer to supplement the missing information.
If size is None, a scalar will be returned.
If size is None, a scalar will be returned.
"""
"""
# numpy.random.choice is only available for numpy versions >= 1.7
# numpy.random.choice is only available for numpy versions >= 1.7
major
,
minor
,
_
=
numpy
.
version
.
short_version
.
split
(
'.'
)
major
,
minor
,
_
=
numpy
.
version
.
short_version
.
split
(
'.'
)
...
@@ -631,17 +648,21 @@ def poisson(random_state, size=None, lam=1.0, ndim=None, dtype='int64'):
...
@@ -631,17 +648,21 @@ def poisson(random_state, size=None, lam=1.0, ndim=None, dtype='int64'):
"""
"""
Draw samples from a Poisson distribution.
Draw samples from a Poisson distribution.
The Poisson distribution is the limit of the Binomial distribution for large N.
The Poisson distribution is the limit of the Binomial distribution for
large N.
:param lam: float or ndarray-like of the same shape as size parameter
Parameters
----------
lam : float or ndarray-like of the same shape as size parameter
Expectation of interval, should be >= 0.
Expectation of interval, should be >= 0.
size: int or tuple of ints, optional
Output shape. If the given shape is, e.g., (m, n, k), then m * n * k
samples are drawn.
dtype
The dtype of the return value (which will represent counts).
:param size: int or tuple of ints, optional
size or ndim must be given.
Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn.
:param dtype: the dtype of the return value (which will represent counts)
size or ndim must be given
"""
"""
lam
=
tensor
.
as_tensor_variable
(
lam
)
lam
=
tensor
.
as_tensor_variable
(
lam
)
...
@@ -653,7 +674,8 @@ def poisson(random_state, size=None, lam=1.0, ndim=None, dtype='int64'):
...
@@ -653,7 +674,8 @@ def poisson(random_state, size=None, lam=1.0, ndim=None, dtype='int64'):
def
permutation_helper
(
random_state
,
n
,
shape
):
def
permutation_helper
(
random_state
,
n
,
shape
):
"""Helper function to generate permutations from integers.
"""
Helper function to generate permutations from integers.
permutation_helper(random_state, n, (1,)) will generate a permutation of
permutation_helper(random_state, n, (1,)) will generate a permutation of
integers 0..n-1.
integers 0..n-1.
...
@@ -666,6 +688,7 @@ def permutation_helper(random_state, n, shape):
...
@@ -666,6 +688,7 @@ def permutation_helper(random_state, n, shape):
This is a generalization of numpy.random.permutation to tensors.
This is a generalization of numpy.random.permutation to tensors.
Otherwise it behaves the same.
Otherwise it behaves the same.
"""
"""
# n should be a 0-dimension array
# n should be a 0-dimension array
assert
n
.
shape
==
()
assert
n
.
shape
==
()
...
@@ -688,17 +711,20 @@ def permutation_helper(random_state, n, shape):
...
@@ -688,17 +711,20 @@ def permutation_helper(random_state, n, shape):
def
permutation
(
random_state
,
size
=
None
,
n
=
1
,
ndim
=
None
,
dtype
=
'int64'
):
def
permutation
(
random_state
,
size
=
None
,
n
=
1
,
ndim
=
None
,
dtype
=
'int64'
):
"""
"""
Returns permutations of the integers between 0 and n-1, as many times
Return permutations of the integers between 0 and n-1.
as required by size. For instance, if size=(p,q), p*q permutations
will be generated, and the output shape will be (p,q,n), because each
Returns them as many times as required by size. For instance, if size=(p,q),
permutation is of size n.
p*q permutations will be generated, and the output shape will be (p,q,n),
because each permutation is of size n.
Theano tries to infer the number of dimensions from the length of
Theano tries to infer the number of dimensions from the length of
the size argument and the shape of n, but you may always specify it
the size argument and the shape of n, but you may always specify it
with the `ndim` parameter.
with the `ndim` parameter.
:note:
Notes
Note that the output will then be of dimension ndim+1.
-----
Note that the output will then be of dimension ndim+1.
"""
"""
if
size
is
None
or
size
==
():
if
size
is
None
or
size
==
():
if
not
(
ndim
is
None
or
ndim
==
1
):
if
not
(
ndim
is
None
or
ndim
==
1
):
...
@@ -718,12 +744,13 @@ def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'):
...
@@ -718,12 +744,13 @@ def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'):
def
multinomial_helper
(
random_state
,
n
,
pvals
,
size
):
def
multinomial_helper
(
random_state
,
n
,
pvals
,
size
):
'''
"""
Helper function drawing from multinomial distributions.
Helper function drawing from multinomial distributions.
This is a generalization of numpy.random.multinomial to the case where
This is a generalization of numpy.random.multinomial to the case where
n and pvals are tensors.
n and pvals are tensors.
'''
"""
# Figure out the shape if it's None
# Figure out the shape if it's None
# Note: the output ndim will be ndim+1, because the multinomial
# Note: the output ndim will be ndim+1, because the multinomial
# adds a dimension. The length of that dimension is pvals.shape[-1].
# adds a dimension. The length of that dimension is pvals.shape[-1].
...
@@ -791,31 +818,40 @@ def multinomial_helper(random_state, n, pvals, size):
...
@@ -791,31 +818,40 @@ def multinomial_helper(random_state, n, pvals, size):
def
multinomial
(
random_state
,
size
=
None
,
n
=
1
,
pvals
=
[
0.5
,
0.5
],
def
multinomial
(
random_state
,
size
=
None
,
n
=
1
,
pvals
=
[
0.5
,
0.5
],
ndim
=
None
,
dtype
=
'int64'
):
ndim
=
None
,
dtype
=
'int64'
):
"""Sample from one or more multinomial distributions defined by
"""
Sample from one or more multinomial distributions defined by
one-dimensional slices in pvals.
one-dimensional slices in pvals.
:param pvals: a tensor of shape "nmulti+(L,)" describing each multinomial
Parameters
----------
pvals
A tensor of shape "nmulti+(L,)" describing each multinomial
distribution. This tensor must have the property that
distribution. This tensor must have the property that
numpy.allclose(pvals.sum(axis=-1), 1) is true.
numpy.allclose(pvals.sum(axis=-1), 1) is true.
size
:param size: a
vector of shape information for the output; this can also
A
vector of shape information for the output; this can also
specify the "nmulti" part of pvals' shape. A -1 in the k'th position
specify the "nmulti" part of pvals' shape. A -1 in the k'th position
from the right means to borrow the k'th position from the
from the right means to borrow the k'th position from the
right in nmulti. (See examples below.)
right in nmulti. (See examples below.)
Default ``None`` means size=nmulti.
Default ``None`` means size=nmulti.
n
:param n: t
he number of experiments to simulate for each
T
he number of experiments to simulate for each
multinomial. This can be a scalar, or tensor, it will be
multinomial. This can be a scalar, or tensor, it will be
broadcasted to have shape "nmulti".
broadcasted to have shape "nmulti".
dtype
:param dtype: the dtype of the return value (which will represent counts)
The dtype of the return value (which will represent counts)
:returns: tensor of len(size)+1 dimensions, and shape[-1]==L, with
Returns
the specified ``dtype``, with the experiment counts. See
-------
tensor
Tensor of len(size)+1 dimensions, and shape[-1]==L, with
the specified ``dtype``, with the experiment counts. See
examples to understand the shape of the return value, which is
examples to understand the shape of the return value, which is
derived from both size and pvals.shape.
In return value rval,
derived from both size and pvals.shape. In return value rval,
"numpy.allclose(rval.sum(axis=-1), n)" will be true.
"numpy.allclose(rval.sum(axis=-1), n)" will be true.
Extended Summary
----------------
For example, to simulate n experiments from each multinomial in a batch of
For example, to simulate n experiments from each multinomial in a batch of
size B:
size B:
...
@@ -881,8 +917,8 @@ class RandomStreamsBase(object):
...
@@ -881,8 +917,8 @@ class RandomStreamsBase(object):
return the number of successes.
return the number of successes.
If the size argument is ambiguous on the number of dimensions,
If the size argument is ambiguous on the number of dimensions,
ndim may be a plain integer to supplement the missing
ndim may be a plain integer to supplement the missing
information.
information.
"""
"""
if
prob
is
not
None
:
if
prob
is
not
None
:
p
=
prob
p
=
prob
...
@@ -895,8 +931,8 @@ class RandomStreamsBase(object):
...
@@ -895,8 +931,8 @@ class RandomStreamsBase(object):
distribution between low and high.
distribution between low and high.
If the size argument is ambiguous on the number of dimensions,
If the size argument is ambiguous on the number of dimensions,
ndim may be a plain integer to supplement the missing
ndim may be a plain integer to supplement the missing
information.
information.
"""
"""
return
self
.
gen
(
uniform
,
size
,
low
,
high
,
ndim
=
ndim
,
dtype
=
dtype
)
return
self
.
gen
(
uniform
,
size
,
low
,
high
,
ndim
=
ndim
,
dtype
=
dtype
)
...
@@ -906,8 +942,8 @@ class RandomStreamsBase(object):
...
@@ -906,8 +942,8 @@ class RandomStreamsBase(object):
the specified standard deviation (std).
the specified standard deviation (std).
If the size argument is ambiguous on the number of dimensions,
If the size argument is ambiguous on the number of dimensions,
ndim may be a plain integer to supplement the missing
ndim may be a plain integer to supplement the missing
information.
information.
"""
"""
return
self
.
gen
(
normal
,
size
,
avg
,
std
,
ndim
=
ndim
,
dtype
=
dtype
)
return
self
.
gen
(
normal
,
size
,
avg
,
std
,
ndim
=
ndim
,
dtype
=
dtype
)
...
@@ -917,8 +953,8 @@ class RandomStreamsBase(object):
...
@@ -917,8 +953,8 @@ class RandomStreamsBase(object):
Sample a random integer between low and high, both inclusive.
Sample a random integer between low and high, both inclusive.
If the size argument is ambiguous on the number of dimensions,
If the size argument is ambiguous on the number of dimensions,
ndim may be a plain integer to supplement the missing
ndim may be a plain integer to supplement the missing
information.
information.
"""
"""
return
self
.
gen
(
random_integers
,
size
,
low
,
high
,
ndim
=
ndim
,
return
self
.
gen
(
random_integers
,
size
,
low
,
high
,
ndim
=
ndim
,
dtype
=
dtype
)
dtype
=
dtype
)
...
@@ -926,13 +962,14 @@ class RandomStreamsBase(object):
...
@@ -926,13 +962,14 @@ class RandomStreamsBase(object):
def
choice
(
self
,
size
=
None
,
a
=
2
,
replace
=
True
,
p
=
None
,
ndim
=
None
,
def
choice
(
self
,
size
=
None
,
a
=
2
,
replace
=
True
,
p
=
None
,
ndim
=
None
,
dtype
=
'int64'
):
dtype
=
'int64'
):
"""
"""
Choose values from `a` with or without replacement. `a` can be a 1-D
Choose values from `a` with or without replacement.
array or a positive scalar. If `a` is a scalar, the samples are drawn
from the range 0,...,a-1.
`a` can be a 1-D array or a positive scalar.
If `a` is a scalar, the samples are drawn from the range 0,...,a-1.
If the size argument is ambiguous on the number of dimensions,
If the size argument is ambiguous on the number of dimensions,
ndim may be a plain integer to supplement the missing
ndim may be a plain integer to supplement the missing
information.
information.
"""
"""
return
self
.
gen
(
choice
,
size
,
a
,
replace
,
p
,
ndim
=
ndim
,
dtype
=
dtype
)
return
self
.
gen
(
choice
,
size
,
a
,
replace
,
p
,
ndim
=
ndim
,
dtype
=
dtype
)
...
@@ -940,27 +977,32 @@ class RandomStreamsBase(object):
...
@@ -940,27 +977,32 @@ class RandomStreamsBase(object):
"""
"""
Draw samples from a Poisson distribution.
Draw samples from a Poisson distribution.
The Poisson distribution is the limit of the Binomial distribution for large N.
The Poisson distribution is the limit of the Binomial distribution for
large N.
If the size argument is ambiguous on the number of dimensions,
If the size argument is ambiguous on the number of dimensions,
ndim may be a plain integer to supplement the missing
ndim may be a plain integer to supplement the missing
information.
information.
"""
"""
return
self
.
gen
(
poisson
,
size
,
lam
,
ndim
=
ndim
,
dtype
=
dtype
)
return
self
.
gen
(
poisson
,
size
,
lam
,
ndim
=
ndim
,
dtype
=
dtype
)
def
permutation
(
self
,
size
=
None
,
n
=
1
,
ndim
=
None
,
dtype
=
'int64'
):
def
permutation
(
self
,
size
=
None
,
n
=
1
,
ndim
=
None
,
dtype
=
'int64'
):
"""
"""
Returns permutations of the integers between 0 and n-1, as many times
Return permutations of the integers between 0 and n-1.
as required by size. For instance, if size=(p,q), p*q permutations
will be generated, and the output shape will be (p,q,n), because each
Returns them as many times as required by size. For instance,
if size=(p,q), p*q permutations will be generated,
and the output shape will be (p,q,n), because each
permutation is of size n.
permutation is of size n.
Theano tries to infer the number of dimensions from the length
Theano tries to infer the number of dimensions from the length
of the size argument and the shape of n, but you may always
of the size argument and the shape of n, but you may always
specify it with the `ndim` parameter.
specify it with the `ndim` parameter.
.. note::
Notes
Note that the output will then be of dimension ndim+1.
-----
Note that the output will then be of dimension ndim+1.
"""
"""
return
self
.
gen
(
permutation
,
size
,
n
,
ndim
=
ndim
,
dtype
=
dtype
)
return
self
.
gen
(
permutation
,
size
,
n
,
ndim
=
ndim
,
dtype
=
dtype
)
...
@@ -976,16 +1018,20 @@ class RandomStreamsBase(object):
...
@@ -976,16 +1018,20 @@ class RandomStreamsBase(object):
of the size argument and the shapes of n and pvals, but you may
of the size argument and the shapes of n and pvals, but you may
always specify it with the `ndim` parameter.
always specify it with the `ndim` parameter.
.. note::
Notes
Note that the output will then be of dimension ndim+1.
-----
Note that the output will then be of dimension ndim+1.
"""
"""
return
self
.
gen
(
multinomial
,
size
,
n
,
pvals
,
ndim
=
ndim
,
dtype
=
dtype
)
return
self
.
gen
(
multinomial
,
size
,
n
,
pvals
,
ndim
=
ndim
,
dtype
=
dtype
)
def
shuffle_row_elements
(
self
,
input
):
def
shuffle_row_elements
(
self
,
input
):
"""Return a variable with every row (rightmost index) shuffled.
"""
Return a variable with every row (rightmost index) shuffled.
This uses permutation random variable internally, available via
This uses permutation random variable internally, available via
the ``.permutation`` attribute of the return value.
the ``.permutation`` attribute of the return value.
"""
"""
perm
=
self
.
permutation
(
size
=
input
.
shape
[:
-
1
],
n
=
input
.
shape
[
-
1
],
perm
=
self
.
permutation
(
size
=
input
.
shape
[:
-
1
],
n
=
input
.
shape
[
-
1
],
ndim
=
input
.
ndim
-
1
)
ndim
=
input
.
ndim
-
1
)
...
...
theano/tensor/shared_randomstreams.py
浏览文件 @
6304a061
"""Define RandomStreams, providing random number variables for Theano
graphs.
"""
"""
Define RandomStreams, providing random number variables for Theano
graphs.
"""
import
copy
import
copy
import
numpy
import
numpy
...
@@ -20,7 +21,10 @@ class RandomStateSharedVariable(SharedVariable):
...
@@ -20,7 +21,10 @@ class RandomStateSharedVariable(SharedVariable):
@shared_constructor
@shared_constructor
def
randomstate_constructor
(
value
,
name
=
None
,
strict
=
False
,
def
randomstate_constructor
(
value
,
name
=
None
,
strict
=
False
,
allow_downcast
=
None
,
borrow
=
False
):
allow_downcast
=
None
,
borrow
=
False
):
"""SharedVariable Constructor for RandomState"""
"""
SharedVariable Constructor for RandomState.
"""
if
not
isinstance
(
value
,
numpy
.
random
.
RandomState
):
if
not
isinstance
(
value
,
numpy
.
random
.
RandomState
):
raise
TypeError
raise
TypeError
if
not
borrow
:
if
not
borrow
:
...
@@ -37,20 +41,20 @@ class RandomStreams(raw_random.RandomStreamsBase):
...
@@ -37,20 +41,20 @@ class RandomStreams(raw_random.RandomStreamsBase):
"""
"""
Module component with similar interface to numpy.random
Module component with similar interface to numpy.random
(numpy.random.RandomState)
(numpy.random.RandomState)
Parameters
----------
seed: None or int
A default seed to initialize the RandomState
instances after build. See `RandomStreamsInstance.__init__`
for more details.
"""
"""
def
updates
(
self
):
def
updates
(
self
):
return
list
(
self
.
state_updates
)
return
list
(
self
.
state_updates
)
def
__init__
(
self
,
seed
=
None
):
def
__init__
(
self
,
seed
=
None
):
"""
:type seed: None or int
:param seed: a default seed to initialize the RandomState
instances after build. See `RandomStreamsInstance.__init__`
for more details.
"""
super
(
RandomStreams
,
self
)
.
__init__
()
super
(
RandomStreams
,
self
)
.
__init__
()
# A list of pairs of the form (input_r, output_r). This will be
# A list of pairs of the form (input_r, output_r). This will be
# over-ridden by the module instance to contain stream generators.
# over-ridden by the module instance to contain stream generators.
...
@@ -62,14 +66,18 @@ class RandomStreams(raw_random.RandomStreamsBase):
...
@@ -62,14 +66,18 @@ class RandomStreams(raw_random.RandomStreamsBase):
self
.
gen_seedgen
=
numpy
.
random
.
RandomState
(
seed
)
self
.
gen_seedgen
=
numpy
.
random
.
RandomState
(
seed
)
def
seed
(
self
,
seed
=
None
):
def
seed
(
self
,
seed
=
None
):
"""Re-initialize each random stream
"""
Re-initialize each random stream.
:param seed: each random stream will be assigned a unique
state that depends deterministically on this value.
:type seed: None or integer in range 0 to 2**30
Parameters
----------
seed : None or integer in range 0 to 2**30
Each random stream will be assigned a unique state that depends
deterministically on this value.
:rtype: None
Returns
-------
None
"""
"""
if
seed
is
None
:
if
seed
is
None
:
...
@@ -82,54 +90,72 @@ class RandomStreams(raw_random.RandomStreamsBase):
...
@@ -82,54 +90,72 @@ class RandomStreams(raw_random.RandomStreamsBase):
borrow
=
True
)
borrow
=
True
)
def
__getitem__
(
self
,
item
):
def
__getitem__
(
self
,
item
):
"""Retrieve the numpy RandomState instance associated with a
"""
particular stream
Retrieve the numpy RandomState instance associated with a particular
stream.
:param item: a variable of type RandomStateType, associated
Parameters
with this RandomStream
----------
item
A variable of type RandomStateType, associated
with this RandomStream.
:rtype: numpy RandomState (or None, before initialize)
Returns
-------
numpy RandomState (or None, before initialize)
:note: This is kept for compatibility with
Notes
`tensor.randomstreams.RandomStreams`. The simpler syntax
-----
``item.rng.get_value()`` is also valid.
This is kept for compatibility with `tensor.randomstreams.RandomStreams`.
The simpler syntax ``item.rng.get_value()`` is also valid.
"""
"""
return
item
.
get_value
(
borrow
=
True
)
return
item
.
get_value
(
borrow
=
True
)
def
__setitem__
(
self
,
item
,
val
):
def
__setitem__
(
self
,
item
,
val
):
"""
Set the numpy RandomState instance associated with a
"""
particular stream
Set the numpy RandomState instance associated with a particular stream.
:param item: a variable of type RandomStateType, associated
Parameters
with this RandomStream
----------
item
A variable of type RandomStateType, associated with this
RandomStream.
:param val: the new valu
e
val : numpy RandomStat
e
:type val: numpy RandomState
The new value.
:rtype: None
Returns
-------
None
:note: This is kept for compatibility with
Notes
`tensor.randomstreams.RandomStreams`. The simpler syntax
-----
``item.rng.set_value(val)`` is also valid.
This is kept for compatibility with `tensor.randomstreams.RandomStreams`.
The simpler syntax ``item.rng.set_value(val)`` is also valid.
"""
"""
item
.
set_value
(
val
,
borrow
=
True
)
item
.
set_value
(
val
,
borrow
=
True
)
def
gen
(
self
,
op
,
*
args
,
**
kwargs
):
def
gen
(
self
,
op
,
*
args
,
**
kwargs
):
"""Create a new random stream in this container.
"""
Create a new random stream in this container.
:param op: a RandomFunction instance to
Parameters
:param args: interpreted by `op`
----------
op
:param kwargs: interpreted by `op`
A RandomFunction instance to
args
:returns: The symbolic random draw part of op()'s return
Interpreted by `op`.
value. This function stores the updated RandomStateType
kwargs
Variable for use at `build` time.
Interpreted by `op`.
:rtype: TensorVariable
Returns
-------
Tensor Variable
The symbolic random draw part of op()'s return value.
This function stores the updated RandomStateType Variable
for use at `build` time.
"""
"""
seed
=
int
(
self
.
gen_seedgen
.
randint
(
2
**
30
))
seed
=
int
(
self
.
gen_seedgen
.
randint
(
2
**
30
))
...
...
theano/tensor/sharedvar.py
浏览文件 @
6304a061
...
@@ -8,9 +8,12 @@ from theano.compile import shared_constructor, SharedVariable
...
@@ -8,9 +8,12 @@ from theano.compile import shared_constructor, SharedVariable
def
load_shared_variable
(
val
):
def
load_shared_variable
(
val
):
"""This function is only here to keep some pickles loading
"""
This function is only here to keep some pickles loading
after a failed fix done in August 2011.
after a failed fix done in August 2011.
It can be removed after sufficient time has passed."""
It can be removed after sufficient time has passed.
"""
return
tensor_constructor
(
val
)
return
tensor_constructor
(
val
)
...
@@ -22,13 +25,15 @@ class TensorSharedVariable(_tensor_py_operators, SharedVariable):
...
@@ -22,13 +25,15 @@ class TensorSharedVariable(_tensor_py_operators, SharedVariable):
@shared_constructor
@shared_constructor
def
tensor_constructor
(
value
,
name
=
None
,
strict
=
False
,
allow_downcast
=
None
,
def
tensor_constructor
(
value
,
name
=
None
,
strict
=
False
,
allow_downcast
=
None
,
borrow
=
False
,
broadcastable
=
None
):
borrow
=
False
,
broadcastable
=
None
):
"""SharedVariable Constructor for TensorType
"""
SharedVariable Constructor for TensorType.
:note: Regarding the inference of the broadcastable pattern...
Notes
-----
Regarding the inference of the broadcastable pattern...
The default is to assume that the value might be resized in any
The default is to assume that the value might be resized in any
dimension, so the default broadcastable is
dimension, so the default broadcastable is ``(False,)*len(value.shape)``.
``(False,)*len(value.shape)``. The optional `broadcastable`
The optional `broadcastable` argument will override this default.
argument will override this default.
"""
"""
if
not
isinstance
(
value
,
numpy
.
ndarray
):
if
not
isinstance
(
value
,
numpy
.
ndarray
):
...
@@ -61,13 +66,16 @@ class ScalarSharedVariable(_tensor_py_operators, SharedVariable):
...
@@ -61,13 +66,16 @@ class ScalarSharedVariable(_tensor_py_operators, SharedVariable):
@shared_constructor
@shared_constructor
def
scalar_constructor
(
value
,
name
=
None
,
strict
=
False
,
allow_downcast
=
None
,
def
scalar_constructor
(
value
,
name
=
None
,
strict
=
False
,
allow_downcast
=
None
,
borrow
=
False
):
borrow
=
False
):
"""SharedVariable constructor for scalar values. Default: int64 or float64.
"""
SharedVariable constructor for scalar values. Default: int64 or float64.
:note: We implement this using 0-d tensors for now.
Notes
-----
We implement this using 0-d tensors for now.
:note:
We ignore the borrow parameter as we convert ``value`` to an
We ignore the borrow parameter as we convert ``value`` to an
ndarray (this is a new object). This respects the semantic of
ndarray (this is a new object). This respects the semantic of
borrow, as it is a hint to Theano that we can reuse it.
borrow, as it is a hint to Theano that we can reuse it.
"""
"""
if
not
isinstance
(
value
,
(
numpy
.
number
,
float
,
int
,
complex
)):
if
not
isinstance
(
value
,
(
numpy
.
number
,
float
,
int
,
complex
)):
...
...
theano/tensor/signal/conv.py
浏览文件 @
6304a061
"""
"""
Contains a wrapper function for tensor.nnet.ConvOp, which can be used to perform
Contains a wrapper function for tensor.nnet.ConvOp, which can be used to perform
generic 2D convolution.
generic 2D convolution.
"""
"""
__docformat__
=
"restructuredtext en"
__docformat__
=
"restructuredtext en"
import
warnings
import
warnings
...
@@ -25,20 +25,29 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
...
@@ -25,20 +25,29 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
Shape parameters are optional and will result in faster execution.
Shape parameters are optional and will result in faster execution.
:type input: dmatrix of dtensor3
Parameters
:param input: symbolic variable for images to be filtered
----------
:type filters: dmatrix of dtensor3
input : dmatrix of dtensor3
:param filters: symbolic variable containing filter values
Symbolic variable for images to be filtered.
:param border_mode: 'valid' or 'full'. see scipy.signal.convolve2d
filters : dmatrix of dtensor3
:param subsample: factor by which to subsample output
Symbolic variable containing filter values.
:type image_shape: tuple of length 2 or 3
border_mode: {'valid', 'full'}
:param image_shape: ([number images,] image height, image width)
See scipy.signal.convolve2d.
:type filter_shape: tuple of length 2 or 3
subsample
:param filter_shape: ([number filters,] filter height, filter width)
Factor by which to subsample output.
:param kwargs: see theano.tensor.nnet.conv.conv2d
image_shape : tuple of length 2 or 3
:rtype: symbolic 2D,3D or 4D tensor
([number images,] image height, image width).
:return: tensor of filtered images, with shape
filter_shape : tuple of length 2 or 3
([number images,] [number filters,] image height, image width)
([number filters,] filter height, filter width).
kwargs
See theano.tensor.nnet.conv.conv2d.
Returns
-------
symbolic 2D,3D or 4D tensor
Tensor of filtered images, with shape
([number images,] [number filters,] image height, image width).
"""
"""
assert
input
.
ndim
in
(
2
,
3
)
assert
input
.
ndim
in
(
2
,
3
)
assert
filters
.
ndim
in
(
2
,
3
)
assert
filters
.
ndim
in
(
2
,
3
)
...
...
theano/tensor/signal/downsample.py
浏览文件 @
6304a061
""" Ops for downsampling images.
"""
Ops for downsampling images.
Planned:
Planned:
DownsampleFactorMax, DownsampleAvg, DownsampleSoftmax.
DownsampleFactorMax, DownsampleAvg, DownsampleSoftmax.
...
@@ -29,12 +30,14 @@ def max_pool_2d_same_size(input, patch_size):
...
@@ -29,12 +30,14 @@ def max_pool_2d_same_size(input, patch_size):
keeping only the maximum values. The output has the same dimensions as
keeping only the maximum values. The output has the same dimensions as
the input.
the input.
:type input: 4-D theano tensor of input images.
Parameters
:param input: input images. Max pooling will be done over the 2 last
----------
dimensions.
input : 4-D theano tensor of input images
:type patch_size: tuple of length 2
Input images. Max pooling will be done over the 2 last dimensions.
:param patch_size: size of the patch (patch height, patch width).
patch_size : tuple of length 2
Size of the patch (patch height, patch width).
(2,2) will retain only one non-zero value per patch of 4 values.
(2,2) will retain only one non-zero value per patch of 4 values.
"""
"""
output
=
DownsampleFactorMax
(
patch_size
,
True
)(
input
)
output
=
DownsampleFactorMax
(
patch_size
,
True
)(
input
)
outs
=
MaxPoolGrad
(
patch_size
,
True
)(
input
,
output
,
output
)
outs
=
MaxPoolGrad
(
patch_size
,
True
)(
input
,
output
,
output
)
...
@@ -48,29 +51,29 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
...
@@ -48,29 +51,29 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
the specified factor, by keeping only the maximum value of non-overlapping
the specified factor, by keeping only the maximum value of non-overlapping
patches of size (ds[0],ds[1])
patches of size (ds[0],ds[1])
:type input: N-D theano tensor of input images.
Parameters
:param input: input images. Max pooling will be done over the 2 last
----------
dimensions.
input : N-D theano tensor of input images
:type ds: tuple of length 2
Input images. Max pooling will be done over the 2 last dimensions.
:param ds: factor by which to downscale (vertical ds, horizontal ds).
ds : tuple of length 2
Factor by which to downscale (vertical ds, horizontal ds).
(2,2) will halve the image in each dimension.
(2,2) will halve the image in each dimension.
:type ignore_border
: bool
ignore_border
: bool
:param ignore_border: When True, (5,5) input with ds=(2,2)
When True, (5,5) input with ds=(2,2) will generate a (2,2) output.
will generate a (2,2) output.
(3,3) otherwise.
(3,3) otherwise.
:type st
: tuple of lenght 2
st
: tuple of lenght 2
:param st: stride size, which is the number of shifts
Stride size, which is the number of shifts over rows/cols to get the
over rows/cols to get the the next pool region.
next pool region. If st is None, it is considered equal to ds
if st is None, it is considered equal to ds
(no overlap on pooling regions).
(no overlap on pooling regions)
padding : tuple of two ints
:param padding:
(pad_h, pad_w), pad zeros to extend beyond four borders
(pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
:param mode: 'max', 'sum', 'average_inc_pad' or 'average_exc_pad'.
Operation executed on each window. `max` and `sum` always exclude
Operation executed on each window. `max` and `sum` always exclude
the padding in the computation. `average` gives you the choice to
the padding in the computation. `average` gives you the choice to
include or exclude it.
include or exclude it.
:type mode: string
"""
"""
if
input
.
ndim
<
2
:
if
input
.
ndim
<
2
:
raise
NotImplementedError
(
'max_pool_2d requires a dimension >= 2'
)
raise
NotImplementedError
(
'max_pool_2d requires a dimension >= 2'
)
...
@@ -104,44 +107,69 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
...
@@ -104,44 +107,69 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
class
DownsampleFactorMax
(
Op
):
class
DownsampleFactorMax
(
Op
):
"""For N-dimensional tensors, consider that the last two
"""
dimensions span images. This Op downsamples these images by
For N-dimensional tensors, consider that the last two dimensions span
taking the max, sum or average over different patch.
images. This Op downsamples these images by taking the max, sum or average
over different patch.
The constructor takes the max, sum or average or different input patches.
Parameters
----------
ds : list or tuple of two ints
Downsample factor over rows and column.
ds indicates the pool region size.
ignore_border : bool
If ds doesn't divide imgshape, do we include an extra row/col of partial
downsampling (False) or ignore it (True).
st : list or tuple of two ints or None
Stride size, which is the number of shifts over rows/cols to get the
next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions).
padding: tuple of two ints
(pad_h, pad_w), pad zeros to extend beyond four borders of the images,
pad_h is the size of the top and bottom margins, and pad_w is the size
of the left and right margins.
mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
('average_inc_pad' excludes the padding from the count,
'average_exc_pad' include it)
"""
"""
__props__
=
(
'ds'
,
'ignore_border'
,
'st'
,
'padding'
,
'mode'
)
__props__
=
(
'ds'
,
'ignore_border'
,
'st'
,
'padding'
,
'mode'
)
@staticmethod
@staticmethod
def
out_shape
(
imgshape
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
(
0
,
0
)):
def
out_shape
(
imgshape
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
(
0
,
0
)):
"""Return the shape of the output from this op, for input of given
"""
Return the shape of the output from this op, for input of given
shape and flags.
shape and flags.
:param imgshape: the shape of a tensor of images. The last two elements
Parameters
are interpreted as the number of rows, and the number of cols.
----------
:type imgshape: tuple, list, or similar of integer or
imgshape : tuple, list, or similar of integer or scalar Theano variable
scalar Theano variable.
The shape of a tensor of images. The last two elements are
interpreted as the number of rows, and the number of cols.
:param ds: downsample factor over rows and columns
ds : list or tuple of two ints
this parameter indicates the size of the pooling region
Downsample factor over rows and columns this parameter indicates
:type ds: list or tuple of two ints
the size of the pooling region.
st : list or tuple of two ints
:param st: the stride size. This is the distance between the pooling
The stride size. This is the distance between the pooling regions.
regions. If it's set to None, in which case it equlas ds.
If it's set to None, it equals ds.
:type st: list or tuple of two ints
ignore_border : bool
If ds doesn't divide imgshape, do we include an extra row/col of
:param ignore_border: if ds doesn't divide imgshape, do we include an
partial downsampling (False) or ignore it (True).
extra row/col of partial downsampling (False) or ignore it (True).
padding : tuple of two ints
:type ignore_border: bool
(pad_h, pad_w), pad zeros to extend beyond four borders
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
:rtype: list
Returns
:returns: the shape of the output from this op, for input of given
-------
shape. This will have the same length as imgshape, but with last
list
two elements reduced as per the downsampling & ignore_border flags.
The shape of the output from this op, for input of given shape.
This will have the same length as imgshape, but with last two
elements reduced as per the downsampling & ignore_border flags.
"""
"""
if
len
(
imgshape
)
<
2
:
if
len
(
imgshape
)
<
2
:
raise
TypeError
(
'imgshape must have at least two elements '
raise
TypeError
(
'imgshape must have at least two elements '
...
@@ -190,33 +218,6 @@ class DownsampleFactorMax(Op):
...
@@ -190,33 +218,6 @@ class DownsampleFactorMax(Op):
def
__init__
(
self
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
(
0
,
0
),
def
__init__
(
self
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
(
0
,
0
),
mode
=
'max'
):
mode
=
'max'
):
""" Take the max, sum or average or different input patches.
:param ds: downsample factor over rows and column.
ds indicates the pool region size.
:type ds: list or tuple of two ints
:param ignore_border: if ds doesn't divide imgshape, do we include
an extra row/col of partial downsampling (False) or
ignore it (True).
:type ignore_border: bool
: param st: stride size, which is the number of shifts
over rows/cols to get the the next pool region.
if st is None, it is considered equal to ds
(no overlap on pooling regions)
: type st: list or tuple of two ints or None
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
:param mode: 'max', 'sum', 'average_inc_pad', 'average_exc_pad'.
('average_inc_pad' excludes the padding from the count,
'average_exc_pad' include it)
"""
self
.
ds
=
tuple
(
ds
)
self
.
ds
=
tuple
(
ds
)
if
not
all
([
isinstance
(
d
,
int
)
for
d
in
ds
]):
if
not
all
([
isinstance
(
d
,
int
)
for
d
in
ds
]):
raise
ValueError
(
raise
ValueError
(
...
@@ -876,35 +877,36 @@ class DownsampleFactorMaxGradGrad(Op):
...
@@ -876,35 +877,36 @@ class DownsampleFactorMaxGradGrad(Op):
@staticmethod
@staticmethod
def
out_shape
(
imgshape
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
(
0
,
0
)):
def
out_shape
(
imgshape
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
(
0
,
0
)):
"""Return the shape of the output from this op, for input of given
"""
Return the shape of the output from this op, for input of given
shape and flags.
shape and flags.
:param imgshape: the shape of a tensor of images. The last two elements
Parameters
----------
imgshape : tuple, list, or similar of integer or scalar Theano variable
The shape of a tensor of images. The last two elements
are interpreted as the number of rows, and the number of cols.
are interpreted as the number of rows, and the number of cols.
:type imgshape: tuple, list, or similar of integer or
ds : list or tuple of two ints
scalar Theano variable.
Downsample factor over rows and columns this parameter indicates the
size of the pooling region.
:param ds: downsample factor over rows and columns
st: list or tuple of two ints
this parameter indicates the size of the pooling region
The stride size. This is the distance between the pooling regions.
:type ds: list or tuple of two ints
If it's set to None, in which case it equlas ds.
ignore_border: bool
:param st: the stride size. This is the distance between the pooling
If ds doesn't divide imgshape, do we include an
regions. If it's set to None, in which case it equlas ds.
:type st: list or tuple of two ints
:param ignore_border: if ds doesn't divide imgshape, do we include an
extra row/col of partial downsampling (False) or ignore it (True).
extra row/col of partial downsampling (False) or ignore it (True).
:type ignore_border: bool
padding : tuple of two ints
(pad_h, pad_w), pad zeros to extend beyond four borders
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
:rtype: list
Returns
:returns: the shape of the output from this op, for input of given
-------
shape. This will have the same length as imgshape, but with last
list
two elements reduced as per the downsampling & ignore_border flags.
The shape of the output from this op, for input of given shape.
This will have the same length as imgshape, but with last two
elements reduced as per the downsampling & ignore_border flags.
"""
"""
if
len
(
imgshape
)
<
2
:
if
len
(
imgshape
)
<
2
:
raise
TypeError
(
'imgshape must have at least two elements '
raise
TypeError
(
'imgshape must have at least two elements '
...
...
theano/tensor/slinalg.py
浏览文件 @
6304a061
...
@@ -31,9 +31,10 @@ MATRIX_STRUCTURES = (
...
@@ -31,9 +31,10 @@ MATRIX_STRUCTURES = (
class
Cholesky
(
Op
):
class
Cholesky
(
Op
):
"""
"""
Return a triangular matrix square root of positive semi-definite `x`
Return a triangular matrix square root of positive semi-definite `x`.
L = cholesky(X, lower=True) implies dot(L, L.T) == X.
L = cholesky(X, lower=True) implies dot(L, L.T) == X
"""
"""
# TODO: inplace
# TODO: inplace
# TODO: for specific dtypes
# TODO: for specific dtypes
...
@@ -90,13 +91,16 @@ class CholeskyGrad(Op):
...
@@ -90,13 +91,16 @@ class CholeskyGrad(Op):
return
Apply
(
self
,
[
x
,
l
,
dz
],
[
x
.
type
()])
return
Apply
(
self
,
[
x
,
l
,
dz
],
[
x
.
type
()])
def
perform
(
self
,
node
,
inputs
,
outputs
):
def
perform
(
self
,
node
,
inputs
,
outputs
):
"""Implements the "reverse-mode" gradient [1]_ for the
"""
Implements the "reverse-mode" gradient [1]_ for the
Cholesky factorization of a positive-definite matrix.
Cholesky factorization of a positive-definite matrix.
References
----------
.. [1] S. P. Smith. "Differentiation of the Cholesky Algorithm".
.. [1] S. P. Smith. "Differentiation of the Cholesky Algorithm".
Journal of Computational and Graphical Statistics,
Journal of Computational and Graphical Statistics,
Vol. 4, No. 2 (Jun.,1995), pp. 134-147
Vol. 4, No. 2 (Jun.,1995), pp. 134-147
http://www.jstor.org/stable/1390762
http://www.jstor.org/stable/1390762
"""
"""
x
=
inputs
[
0
]
x
=
inputs
[
0
]
...
@@ -133,7 +137,10 @@ class CholeskyGrad(Op):
...
@@ -133,7 +137,10 @@ class CholeskyGrad(Op):
class
Solve
(
Op
):
class
Solve
(
Op
):
"""Solve a system of linear equations"""
"""
Solve a system of linear equations.
"""
__props__
=
(
'A_structure'
,
'lower'
,
'overwrite_A'
,
'overwrite_b'
)
__props__
=
(
'A_structure'
,
'lower'
,
'overwrite_A'
,
'overwrite_b'
)
...
@@ -195,7 +202,9 @@ solve = Solve() # general solve
...
@@ -195,7 +202,9 @@ solve = Solve() # general solve
class
Eigvalsh
(
Op
):
class
Eigvalsh
(
Op
):
"""Generalized eigenvalues of a Hermetian positive definite eigensystem
"""
Generalized eigenvalues of a Hermitian positive definite eigensystem.
"""
"""
__props__
=
(
'lower'
,)
__props__
=
(
'lower'
,)
...
@@ -243,8 +252,10 @@ class Eigvalsh(Op):
...
@@ -243,8 +252,10 @@ class Eigvalsh(Op):
class
EigvalshGrad
(
Op
):
class
EigvalshGrad
(
Op
):
"""Gradient of generalized eigenvalues of a Hermetian positive definite
"""
eigensystem
Gradient of generalized eigenvalues of a Hermitian positive definite
eigensystem.
"""
"""
# Note: This Op (EigvalshGrad), should be removed and replaced with a graph
# Note: This Op (EigvalshGrad), should be removed and replaced with a graph
...
@@ -303,17 +314,24 @@ def eigvalsh(a, b, lower=True):
...
@@ -303,17 +314,24 @@ def eigvalsh(a, b, lower=True):
def
kron
(
a
,
b
):
def
kron
(
a
,
b
):
""" Kronecker product
""" Kronecker product
.
Same as scipy.linalg.kron(a, b).
Same as scipy.linalg.kron(a, b).
:note: numpy.kron(a, b) != scipy.linalg.kron(a, b)!
Parameters
They don't have the same shape and order when
----------
a.ndim != b.ndim != 2.
a: array_like
b: array_like
:param a: array_like
Returns
:param b: array_like
-------
:return: array_like with a.ndim + b.ndim - 2 dimensions.
array_like with a.ndim + b.ndim - 2 dimensions
Notes
-----
numpy.kron(a, b) != scipy.linalg.kron(a, b)!
They don't have the same shape and order when
a.ndim != b.ndim != 2.
"""
"""
a
=
tensor
.
as_tensor_variable
(
a
)
a
=
tensor
.
as_tensor_variable
(
a
)
...
@@ -336,7 +354,9 @@ def kron(a, b):
...
@@ -336,7 +354,9 @@ def kron(a, b):
class
Expm
(
Op
):
class
Expm
(
Op
):
"""Compute the matrix exponential of a square array
"""
Compute the matrix exponential of a square array.
"""
"""
__props__
=
()
__props__
=
()
...
@@ -365,7 +385,9 @@ class Expm(Op):
...
@@ -365,7 +385,9 @@ class Expm(Op):
class
ExpmGrad
(
Op
):
class
ExpmGrad
(
Op
):
"""Gradient of the matrix exponential of a square array.
"""
Gradient of the matrix exponential of a square array.
"""
"""
__props__
=
()
__props__
=
()
...
...
theano/tensor/sort.py
浏览文件 @
6304a061
...
@@ -5,7 +5,8 @@ from theano.tensor.basic import mul, arange
...
@@ -5,7 +5,8 @@ from theano.tensor.basic import mul, arange
class
SortOp
(
theano
.
Op
):
class
SortOp
(
theano
.
Op
):
"""
"""
This class is a wrapper for numpy sort function
This class is a wrapper for numpy sort function.
"""
"""
__props__
=
(
"kind"
,
"order"
)
__props__
=
(
"kind"
,
"order"
)
...
@@ -62,12 +63,15 @@ class SortOp(theano.Op):
...
@@ -62,12 +63,15 @@ class SortOp(theano.Op):
return
index_val
return
index_val
def
__get_argsort_indices
(
self
,
a
,
axis
):
def
__get_argsort_indices
(
self
,
a
,
axis
):
"""Calculates indices which can be used to reverse
"""
sorting operation of "a" tensor along "axis"
Calculates indices which can be used to reverse sorting operation of
"a" tensor along "axis".
Returns
-------
1d array if axis is None
list of lenght len(a.shape) otherwise
returns:
1d array if axis is None
list of lenght len(a.shape) otherwise
"""
"""
# The goal is to get gradient wrt input from gradient
# The goal is to get gradient wrt input from gradient
...
@@ -99,24 +103,26 @@ class SortOp(theano.Op):
...
@@ -99,24 +103,26 @@ class SortOp(theano.Op):
def
sort
(
a
,
axis
=-
1
,
kind
=
'quicksort'
,
order
=
None
):
def
sort
(
a
,
axis
=-
1
,
kind
=
'quicksort'
,
order
=
None
):
"""
"""
Return a sorted copy of an array.
a : Tensor
Tensor to be sorted
Parameters
----------
a : Tensor
Tensor to be sorted
axis : Tensor
axis : Tensor
Axis along which to sort. If None, the array is
Axis along which to sort. If None, the array is flattened before
flattened before sorting.
sorting.
kind : {'quicksort', 'mergesort', 'heapsort'}, optional
kind : {'quicksort', 'mergesort', 'heapsort'}, optional
Sorting algorithm. Default is 'quicksort'.
Sorting algorithm. Default is 'quicksort'.
order : list, optional
order : list, optional
When `a` is a structured array, this argument specifies which
When `a` is a structured array, this argument specifies which
fields to compare first, second, and so on. This list does not
fields to compare first, second, and so on. This list does not
need to include all of the fields.
need to include all of the fields.
Returns
-------
array
A sorted copy of an array.
"""
"""
if
axis
is
None
:
if
axis
is
None
:
a
=
a
.
flatten
()
a
=
a
.
flatten
()
...
@@ -126,7 +132,8 @@ def sort(a, axis=-1, kind='quicksort', order=None):
...
@@ -126,7 +132,8 @@ def sort(a, axis=-1, kind='quicksort', order=None):
class
ArgSortOp
(
theano
.
Op
):
class
ArgSortOp
(
theano
.
Op
):
"""
"""
This class is a wrapper for numpy argsort function
This class is a wrapper for numpy argsort function.
"""
"""
__props__
=
(
"kind"
,
"order"
)
__props__
=
(
"kind"
,
"order"
)
...
@@ -196,6 +203,7 @@ def argsort(a, axis=-1, kind='quicksort', order=None):
...
@@ -196,6 +203,7 @@ def argsort(a, axis=-1, kind='quicksort', order=None):
specified by the kind keyword. It returns an array of indices of
specified by the kind keyword. It returns an array of indices of
the same shape as a that index data along the given axis in sorted
the same shape as a that index data along the given axis in sorted
order.
order.
"""
"""
if
axis
is
None
:
if
axis
is
None
:
a
=
a
.
flatten
()
a
=
a
.
flatten
()
...
...
theano/tensor/subtensor.py
浏览文件 @
6304a061
...
@@ -39,6 +39,7 @@ sparse_module_ref = None
...
@@ -39,6 +39,7 @@ sparse_module_ref = None
class
AdvancedIndexingError
(
TypeError
):
class
AdvancedIndexingError
(
TypeError
):
"""
"""
Raised when Subtensor is asked to perform advanced indexing.
Raised when Subtensor is asked to perform advanced indexing.
"""
"""
def
__init__
(
self
,
*
args
):
def
__init__
(
self
,
*
args
):
...
@@ -52,6 +53,7 @@ class AdvancedIndexingError(TypeError):
...
@@ -52,6 +53,7 @@ class AdvancedIndexingError(TypeError):
def
make_constant
(
args
):
def
make_constant
(
args
):
"""
"""
Convert python litterals to theano constants in subtensor arguments.
Convert python litterals to theano constants in subtensor arguments.
"""
"""
def
conv
(
a
):
def
conv
(
a
):
if
a
is
None
:
if
a
is
None
:
...
@@ -68,13 +70,14 @@ def make_constant(args):
...
@@ -68,13 +70,14 @@ def make_constant(args):
def
get_idx_list
(
inputs
,
idx_list
,
get_count
=
False
):
def
get_idx_list
(
inputs
,
idx_list
,
get_count
=
False
):
'''
"""
Given a list of inputs to the subtensor and its idx_list reorders
Given a list of inputs to the subtensor and its idx_list reorders
the inputs according to the idx list to get the right values.
the inputs according to the idx list to get the right values.
If get_counts=True, instead returns the number of inputs consumed
If get_counts=True, instead returns the number of inputs consumed
during this process.
during this process.
'''
"""
# The number of indices
# The number of indices
n
=
len
(
inputs
)
-
1
n
=
len
(
inputs
)
-
1
...
@@ -102,14 +105,15 @@ def get_idx_list(inputs, idx_list, get_count=False):
...
@@ -102,14 +105,15 @@ def get_idx_list(inputs, idx_list, get_count=False):
def
get_canonical_form_slice
(
theslice
,
length
):
def
get_canonical_form_slice
(
theslice
,
length
):
'''
"""
Given a slice [start:stop:step] transform it into a canonical form
Given a slice [start:stop:step] transform it into a canonical form
that respects the conventions imposed by python and numpy.
that respects the conventions imposed by python and numpy.
In a canonical form a slice is represented by a canonical form slice,
In a canonical form a slice is represented by a canonical form slice,
in which 0 <= start <= stop <= length and step > 0, and a flag which says
in which 0 <= start <= stop <= length and step > 0, and a flag which says
if the resulting set of numbers needs to be reversed or not.
if the resulting set of numbers needs to be reversed or not.
'''
"""
from
theano.tensor
import
switch
,
lt
,
ge
,
sgn
from
theano.tensor
import
switch
,
lt
,
ge
,
sgn
if
isinstance
(
theslice
,
slice
):
if
isinstance
(
theslice
,
slice
):
...
@@ -252,7 +256,8 @@ def get_canonical_form_slice(theslice, length):
...
@@ -252,7 +256,8 @@ def get_canonical_form_slice(theslice, length):
class
Subtensor
(
Op
):
class
Subtensor
(
Op
):
"""Return a subtensor view
"""
Return a subtensor view.
The inputs array is the tensor x, followed by scalar integer types.
The inputs array is the tensor x, followed by scalar integer types.
TODO: WRITEME: how are the scalar integer variables formatted?
TODO: WRITEME: how are the scalar integer variables formatted?
...
@@ -297,12 +302,16 @@ class Subtensor(Op):
...
@@ -297,12 +302,16 @@ class Subtensor(Op):
@staticmethod
@staticmethod
def
collapse
(
idxs
,
cond
):
def
collapse
(
idxs
,
cond
):
"""
"""
Parameters
----------
idxs : a list of indices or slices.
cond : a callable that returns a bool
idxs: a list of indices or slices.
Returns
cond: a callable that returns a bool
-------
list
returns:
idxs, with the slices flattened out into a list.
idxs, with the slices flattened out into a list.
i
f cond is true for an entry, does not flatten it.
I
f cond is true for an entry, does not flatten it.
"""
"""
ret
=
[]
ret
=
[]
...
@@ -323,12 +332,14 @@ class Subtensor(Op):
...
@@ -323,12 +332,14 @@ class Subtensor(Op):
@staticmethod
@staticmethod
def
convert
(
entry
,
slice_ok
=
True
):
def
convert
(
entry
,
slice_ok
=
True
):
"""
"""
Change references to Variables into references to Types.
The "idx_list" field is unique to each Subtensor instance.
The "idx_list" field is unique to each Subtensor instance.
It is not unique to each Apply node, so it should not refer to
It is not unique to each Apply node, so it should not refer to
specific Variables. This method changes references to Variables
specific Variables.
into references to Types.
TODO: WRITEME: This method also accepts "entry" already being a Type;
TODO: WRITEME: This method also accepts "entry" already being a Type;
when would that happen?
when would that happen?
"""
"""
invalid_scal_types
=
[
scal
.
float64
,
scal
.
float32
,
scal
.
float16
]
invalid_scal_types
=
[
scal
.
float64
,
scal
.
float32
,
scal
.
float16
]
scal_types
=
[
scal
.
int64
,
scal
.
int32
,
scal
.
int16
,
scal
.
int8
]
scal_types
=
[
scal
.
int64
,
scal
.
int32
,
scal
.
int16
,
scal
.
int8
]
...
@@ -389,30 +400,33 @@ class Subtensor(Op):
...
@@ -389,30 +400,33 @@ class Subtensor(Op):
only_process_constants
=
False
):
only_process_constants
=
False
):
"""
"""
Return the idx_list with constant inputs replaced by their
Return the idx_list with constant inputs replaced by their
python scalar equivalent.
May raise
python scalar equivalent.
`theano.tensor.NotScalarConstantError` if the idx contains
May raise
`theano.tensor.NotScalarConstantError` if the idx contains
non-constant entries.
non-constant entries.
If allow_partial is True, then entries that are not constant
If allow_partial is True, then entries that are not constant will
will stay as their input variable rather than raising an
stay as their input variable rather than raising an exception.
exception.
None entries are always left as-is.
None entries are always left as-is.
Example usage (where v, a are appropriately typed theano variables):
Parameters
----------
>>> b = a[v, 1:3]
only_process_constants
>>> b.owner.op.idx_list
If True, we only attempt to obtain the value of an index/slice if
(Scalar(int64), slice(Scalar(int64), Scalar(int64), None))
it's directly constant and don't try to dig through dimshuffles,
>>> b.owner.op.get_constant_idx(b.owner.inputs, allow_partial=True)
fills, allocs, and other to figure out its value.
[v, slice(1, 3, None)]
>>> b.owner.op.get_constant_idx(b.owner.inputs)
Examples
NotScalarConstantError: v
--------
Example usage where v, a are appropriately typed theano variables :
>>> b = a[v, 1:3]
>>> b.owner.op.idx_list
(Scalar(int64), slice(Scalar(int64), Scalar(int64), None))
>>> b.owner.op.get_constant_idx(b.owner.inputs, allow_partial=True)
[v, slice(1, 3, None)]
>>> b.owner.op.get_constant_idx(b.owner.inputs)
NotScalarConstantError: v
:param only_process_constants: If True, we only attempt to obtain
the value of an index/slice if it's directly constant and don't
try to dig through dimshuffles, fills, allocs, and other to figure
out its value.
"""
"""
real_idx
=
get_idx_list
(
inputs
,
self
.
idx_list
)
real_idx
=
get_idx_list
(
inputs
,
self
.
idx_list
)
...
@@ -451,8 +465,13 @@ class Subtensor(Op):
...
@@ -451,8 +465,13 @@ class Subtensor(Op):
def
make_node
(
self
,
x
,
*
inputs
):
def
make_node
(
self
,
x
,
*
inputs
):
"""
"""
x: the tensor to take a subtensor of
Parameters
inputs: a list of theano Scalars
----------
x
The tensor to take a subtensor of.
inputs
A list of theano Scalars.
"""
"""
x
=
theano
.
tensor
.
as_tensor_variable
(
x
)
x
=
theano
.
tensor
.
as_tensor_variable
(
x
)
inputs
=
tuple
(
self
.
my_as_scalar
(
a
)
for
a
in
inputs
)
inputs
=
tuple
(
self
.
my_as_scalar
(
a
)
for
a
in
inputs
)
...
@@ -607,8 +626,8 @@ class Subtensor(Op):
...
@@ -607,8 +626,8 @@ class Subtensor(Op):
@staticmethod
@staticmethod
def
default_helper_c_code_args
():
def
default_helper_c_code_args
():
"""
"""
Returns a dictionary of default arguments to
Returns a dictionary of default arguments to
helper_c_code.
helper_c_code
"""
"""
return
{
"c_prefix"
:
"PyArray"
,
return
{
"c_prefix"
:
"PyArray"
,
...
@@ -622,7 +641,8 @@ class Subtensor(Op):
...
@@ -622,7 +641,8 @@ class Subtensor(Op):
The parameters c_prefix are there to allow reusing this
The parameters c_prefix are there to allow reusing this
function on PyArray and CudaNdarray object.
function on PyArray and CudaNdarray object.
This fct take as input the x,
This fct take as input the x.
"""
"""
default_args
=
Subtensor
.
default_helper_c_code_args
()
default_args
=
Subtensor
.
default_helper_c_code_args
()
...
@@ -986,16 +1006,25 @@ pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor),
...
@@ -986,16 +1006,25 @@ pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor),
def
set_subtensor
(
x
,
y
,
inplace
=
False
,
def
set_subtensor
(
x
,
y
,
inplace
=
False
,
tolerate_inplace_aliasing
=
False
):
tolerate_inplace_aliasing
=
False
):
"""Return x with the given subtensor overwritten by y.
"""
Return x with the given subtensor overwritten by y.
Example: To replicate the numpy expression "r[10:] = 5", type
Parameters
----------
x
Symbolic variable for the lvalue of = operation.
y
Symbolic variable for the rvalue of = operation.
tolerate_inplace_aliasing
See inc_subtensor for documentation.
Examples
--------
To replicate the numpy expression "r[10:] = 5", type
>>> r = ivector()
>>> r = ivector()
>>> new_r = set_subtensor(r[10:], 5)
>>> new_r = set_subtensor(r[10:], 5)
:param x: symbolic variable for the lvalue of = operation
:param y: symbolic variable for the rvalue of = operation
:param tolerate_inplace_aliasing: see inc_subtensor for documentation.
"""
"""
return
inc_subtensor
(
x
,
y
,
inplace
,
set_instead_of_inc
=
True
,
return
inc_subtensor
(
x
,
y
,
inplace
,
set_instead_of_inc
=
True
,
tolerate_inplace_aliasing
=
tolerate_inplace_aliasing
)
tolerate_inplace_aliasing
=
tolerate_inplace_aliasing
)
...
@@ -1003,22 +1032,32 @@ def set_subtensor(x, y, inplace=False,
...
@@ -1003,22 +1032,32 @@ def set_subtensor(x, y, inplace=False,
def
inc_subtensor
(
x
,
y
,
inplace
=
False
,
set_instead_of_inc
=
False
,
def
inc_subtensor
(
x
,
y
,
inplace
=
False
,
set_instead_of_inc
=
False
,
tolerate_inplace_aliasing
=
False
):
tolerate_inplace_aliasing
=
False
):
"""Return x with the given subtensor incremented by y.
"""
Return x with the given subtensor incremented by y.
:param x: the symbolic result of a Subtensor operation.
:param y: the amount by which to increment ths subtensor in question
Parameters
:param inplace: Don't use. Theano will do it when possible.
----------
:param set_instead_of_inc: If True, do a set_subtensor instead.
x
:param tolerate_inplace_aliasing: allow x and y to be views of a single
The symbolic result of a Subtensor operation.
underlying array even while working inplace. For correct results,
y
x and y must not be overlapping views; if they overlap, the result
The amount by which to increment the subtensor in question.
of this Op will generally be incorrect. This value has no effect if
inplace
inplace=False.
Don't use. Theano will do it when possible.
set_instead_of_inc
Example: To replicate the numpy expression "r[10:] += 5", type
If True, do a set_subtensor instead.
tolerate_inplace_aliasing:
Allow x and y to be views of a single underlying array even while
working inplace. For correct results, x and y must not be overlapping
views; if they overlap, the result of this Op will generally be
incorrect. This value has no effect if inplace=False.
Examples
--------
To replicate the numpy expression "r[10:] += 5", type
>>> r = ivector()
>>> r = ivector()
>>> new_r = inc_subtensor(r[10:], 5)
>>> new_r = inc_subtensor(r[10:], 5)
"""
"""
# First of all, y cannot have a higher dimension than x,
# First of all, y cannot have a higher dimension than x,
# nor have non-broadcastable dimensions where x is broadcastable.
# nor have non-broadcastable dimensions where x is broadcastable.
...
@@ -1159,7 +1198,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
...
@@ -1159,7 +1198,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
class
IncSubtensor
(
Op
):
class
IncSubtensor
(
Op
):
"""Increment a subtensor.
"""
Increment a subtensor.
This is like numpy's
This is like numpy's
...
@@ -1167,8 +1207,12 @@ class IncSubtensor(Op):
...
@@ -1167,8 +1207,12 @@ class IncSubtensor(Op):
It is used internally to implement the gradient on SubTensor.
It is used internally to implement the gradient on SubTensor.
:param set_instead_of_inc: if True set the subtensor to the value instead
Parameters
of incrementing it by that value.
----------
set_instead_of_inc
If True set the subtensor to the value instead of incrementing it by
that value.
"""
"""
check_input
=
False
check_input
=
False
...
@@ -1225,9 +1269,14 @@ class IncSubtensor(Op):
...
@@ -1225,9 +1269,14 @@ class IncSubtensor(Op):
def
make_node
(
self
,
x
,
y
,
*
inputs
):
def
make_node
(
self
,
x
,
y
,
*
inputs
):
"""
"""
x: the tensor to increment
Parameters
y: the value to increment by
----------
inputs: TODO WRITEME
x
The tensor to increment.
y
The value to increment by.
inputs: TODO WRITEME
"""
"""
x
,
y
=
map
(
theano
.
tensor
.
as_tensor_variable
,
[
x
,
y
])
x
,
y
=
map
(
theano
.
tensor
.
as_tensor_variable
,
[
x
,
y
])
if
y
.
ndim
>
x
.
ndim
:
if
y
.
ndim
>
x
.
ndim
:
...
@@ -1411,8 +1460,10 @@ class IncSubtensor(Op):
...
@@ -1411,8 +1460,10 @@ class IncSubtensor(Op):
)
)
def
do_type_checking
(
self
,
node
):
def
do_type_checking
(
self
,
node
):
""" Should raise NotImplementedError if c_code does not support
"""
Should raise NotImplementedError if c_code does not support
the types involved in this node.
the types involved in this node.
"""
"""
if
not
isinstance
(
node
.
inputs
[
0
]
.
type
,
theano
.
tensor
.
TensorType
):
if
not
isinstance
(
node
.
inputs
[
0
]
.
type
,
theano
.
tensor
.
TensorType
):
...
@@ -1427,13 +1478,19 @@ class IncSubtensor(Op):
...
@@ -1427,13 +1478,19 @@ class IncSubtensor(Op):
def
copy_of_x
(
self
,
x
):
def
copy_of_x
(
self
,
x
):
"""
"""
:param x: a string giving the name of a C variable
Parameters
pointing to an array
----------
x
A string giving the name of a C variable pointing to an array.
Returns
-------
object
C code expression to make a copy of x.
:return: C code expression to make a copy of x
Base class uses PyArrayObject *, subclasses may override for
different types of arrays.
Base class uses PyArrayObject *, subclasses may override for
different types of arrays.
"""
"""
# Parameters of PyArrary_FromAny are:
# Parameters of PyArrary_FromAny are:
# array
# array
...
@@ -1448,12 +1505,16 @@ class IncSubtensor(Op):
...
@@ -1448,12 +1505,16 @@ class IncSubtensor(Op):
def
make_view_array
(
self
,
x
,
view_ndim
):
def
make_view_array
(
self
,
x
,
view_ndim
):
"""
"""
:param x: a string identifying an array to be viewed
Parameters
:param view_ndim: a string specifying the number of dimensions
----------
to have in the view
x
A string identifying an array to be viewed.
view_ndim
A string specifying the number of dimensions to have in the view.
This doesn't need to actually set up the view with the right indexing;
we'll do that manually later.
This doesn't need to actually set up the view with the
right indexing; we'll do that manually later.
"""
"""
return
"""Py_INCREF(PyArray_DESCR(
%(x)
s));
return
"""Py_INCREF(PyArray_DESCR(
%(x)
s));
...
@@ -1471,22 +1532,35 @@ class IncSubtensor(Op):
...
@@ -1471,22 +1532,35 @@ class IncSubtensor(Op):
"""
%
locals
()
"""
%
locals
()
def
get_helper_c_code_args
(
self
):
def
get_helper_c_code_args
(
self
):
""" Return a dictionary of arguments to pass to helper_c_code."""
"""
Return a dictionary of arguments to pass to helper_c_code.
"""
return
Subtensor
.
default_helper_c_code_args
()
return
Subtensor
.
default_helper_c_code_args
()
def
copy_into
(
self
,
view
,
source
):
def
copy_into
(
self
,
view
,
source
):
"""
"""
view: string, C code expression for an array
Parameters
source: string, C code expression for an array
----------
view : string
C code expression for an array.
source : string
C code expression for an array.
Returns
-------
object
C code expression to copy source into view, and 0 on success.
returns a C code expression to copy source into view, and
return 0 on success
"""
"""
return
"""PyArray_CopyInto(
%(view)
s,
%(source)
s)"""
%
locals
()
return
"""PyArray_CopyInto(
%(view)
s,
%(source)
s)"""
%
locals
()
def
add_to_zview
(
self
,
name
,
x
,
fail
):
def
add_to_zview
(
self
,
name
,
x
,
fail
):
""" Return C code to add x to zview. Should DECREF zview if the
"""
add fails."""
Return C code to add x to zview. Should DECREF zview if the
add fails.
"""
return
"""
return
"""
PyArrayObject * add_rval = (PyArrayObject*)PyNumber_InPlaceAdd(
PyArrayObject * add_rval = (PyArrayObject*)PyNumber_InPlaceAdd(
...
@@ -1551,11 +1625,13 @@ class IncSubtensor(Op):
...
@@ -1551,11 +1625,13 @@ class IncSubtensor(Op):
def
_sum_grad_over_bcasted_dims
(
x
,
gx
):
def
_sum_grad_over_bcasted_dims
(
x
,
gx
):
"""Sum of gx over dimensions to reproduce x.broadcastable.
"""
Sum of gx over dimensions to reproduce x.broadcastable.
This is useful to sum gradients over certain dimensions when
This is useful to sum gradients over certain dimensions when
x has been broadcasted, and we need to sum the gradient contributions
x has been broadcasted, and we need to sum the gradient contributions
over all duplications.
over all duplications.
"""
"""
if
gx
.
broadcastable
!=
x
.
broadcastable
:
if
gx
.
broadcastable
!=
x
.
broadcastable
:
x_dim_added
=
gx
.
ndim
-
x
.
ndim
x_dim_added
=
gx
.
ndim
-
x
.
ndim
...
@@ -1592,7 +1668,10 @@ def _sum_grad_over_bcasted_dims(x, gx):
...
@@ -1592,7 +1668,10 @@ def _sum_grad_over_bcasted_dims(x, gx):
class
AdvancedSubtensor1
(
Op
):
class
AdvancedSubtensor1
(
Op
):
"""Implement x[ilist] where ilist is a vector of integers."""
"""
Implement x[ilist] where ilist is a vector of integers.
"""
# sparse_grad doesn't go in here since it only affects the output
# sparse_grad doesn't go in here since it only affects the output
# of the grad() method.
# of the grad() method.
__props__
=
()
__props__
=
()
...
@@ -1777,7 +1856,11 @@ advanced_subtensor1 = AdvancedSubtensor1()
...
@@ -1777,7 +1856,11 @@ advanced_subtensor1 = AdvancedSubtensor1()
class
AdvancedIncSubtensor1
(
Op
):
class
AdvancedIncSubtensor1
(
Op
):
"""Increments a subtensor using advanced slicing (list of index)"""
"""
Increments a subtensor using advanced slicing (list of index).
"""
__props__
=
(
'inplace'
,
'set_instead_of_inc'
)
__props__
=
(
'inplace'
,
'set_instead_of_inc'
)
def
__init__
(
self
,
inplace
=
False
,
set_instead_of_inc
=
False
):
def
__init__
(
self
,
inplace
=
False
,
set_instead_of_inc
=
False
):
...
@@ -1828,13 +1911,19 @@ class AdvancedIncSubtensor1(Op):
...
@@ -1828,13 +1911,19 @@ class AdvancedIncSubtensor1(Op):
def
copy_of_x
(
self
,
x
):
def
copy_of_x
(
self
,
x
):
"""
"""
:param x: a string giving the name of a C variable
Parameters
pointing to an array
----------
x : string
Gives the name of a C variable pointing to an array.
Returns
-------
object
C code expression to make a copy of x.
:return: C code expression to make a copy of x
Base class uses PyArrayObject *, subclasses may override for
different types of arrays.
Base class uses PyArrayObject *, subclasses may override for
different types of arrays.
"""
"""
# Parameters of PyArrary_FromAny are:
# Parameters of PyArrary_FromAny are:
# array
# array
...
@@ -1994,6 +2083,7 @@ def adv_index_broadcastable_pattern(a, idx):
...
@@ -1994,6 +2083,7 @@ def adv_index_broadcastable_pattern(a, idx):
For this, we make a fake ndarray and a fake idx and call use ask numpy
For this, we make a fake ndarray and a fake idx and call use ask numpy
the output. From this, we find the output broadcast pattern.
the output. From this, we find the output broadcast pattern.
"""
"""
def
replace_slice
(
v
):
def
replace_slice
(
v
):
...
@@ -2021,8 +2111,11 @@ def adv_index_broadcastable_pattern(a, idx):
...
@@ -2021,8 +2111,11 @@ def adv_index_broadcastable_pattern(a, idx):
class
AdvancedSubtensor
(
Op
):
class
AdvancedSubtensor
(
Op
):
"""Return a subtensor copy, using advanced indexing.
"""
"""
Return a subtensor copy, using advanced indexing.
"""
# Should be used by __getitem__ and __getslice__, as follow:
# Should be used by __getitem__ and __getslice__, as follow:
# AdvancedSubtensor()(self, *args),
# AdvancedSubtensor()(self, *args),
# if args contains and advanced indexing pattern
# if args contains and advanced indexing pattern
...
@@ -2094,13 +2187,16 @@ advanced_subtensor = AdvancedSubtensor()
...
@@ -2094,13 +2187,16 @@ advanced_subtensor = AdvancedSubtensor()
class
AdvancedIncSubtensor
(
Op
):
class
AdvancedIncSubtensor
(
Op
):
"""Increments a subtensor using advanced indexing.
"""
Increments a subtensor using advanced indexing.
:note: We need the numpy.inplace_increment() function currently
Notes
numpy's PR 326 to be able to make an inplace version of this
-----
op.
We need the numpy.inplace_increment() function currently
numpy's PR 326 to be able to make an inplace version of this op.
"""
"""
__props__
=
(
"inplace"
,
"set_instead_of_inc"
)
__props__
=
(
"inplace"
,
"set_instead_of_inc"
)
def
__init__
(
self
,
inplace
=
False
,
set_instead_of_inc
=
False
):
def
__init__
(
self
,
inplace
=
False
,
set_instead_of_inc
=
False
):
...
...
theano/tensor/type.py
浏览文件 @
6304a061
...
@@ -12,7 +12,27 @@ _logger = logging.getLogger("theano.tensor.type")
...
@@ -12,7 +12,27 @@ _logger = logging.getLogger("theano.tensor.type")
class
TensorType
(
Type
):
class
TensorType
(
Type
):
"""Symbolic `Type` representing a numpy.ndarray value."""
"""
Symbolic `Type` representing a numpy.ndarray value.
Initialize self.dtype and self.broadcastable.
Parameters
----------
dtype: str
Corresponding to numpy dtype (e.g., 'int64')
The value (ndarray) associated to a `Variable` of this `Type` will
have this dtype.
broadcastable: tuple, list, or array of boolean values
This argument serves two purposes. First, the True elements of this
list indicate the dimensions where the shape of an associated value
must be 1. Secondly, the length of this list is the number of
dimensions that an associated value must have. See
doc:`broadcasting` for an explanation of how this list is used.
name : str
Optional name for this type.
"""
filter_checks_isfinite
=
False
filter_checks_isfinite
=
False
"""
"""
...
@@ -21,21 +41,6 @@ class TensorType(Type):
...
@@ -21,21 +41,6 @@ class TensorType(Type):
"""
"""
def
__init__
(
self
,
dtype
,
broadcastable
,
name
=
None
,
sparse_grad
=
False
):
def
__init__
(
self
,
dtype
,
broadcastable
,
name
=
None
,
sparse_grad
=
False
):
"""Initialize self.dtype and self.broadcastable.
:Parameters:
- `dtype`: str corresponding to numpy dtype (e.g., 'int64')
The value (ndarray) associated to a `Variable` of this `Type` will
have this dtype.
- `broadcastable`: tuple, list, or array of boolean values
This argument serves two purposes. First, the True elements of this
list indicate the dimensions where the shape of an associated value
must be 1. Secondly, the length of this list is the number of
dimensions that an associated value must have. See
:doc:`broadcasting` for an explanation of how this list is used.
- `name`: str
Optional name for this type.
"""
self
.
dtype
=
str
(
dtype
)
self
.
dtype
=
str
(
dtype
)
if
self
.
dtype
==
'floatX'
:
if
self
.
dtype
==
'floatX'
:
self
.
dtype
=
config
.
floatX
self
.
dtype
=
config
.
floatX
...
@@ -56,6 +61,7 @@ class TensorType(Type):
...
@@ -56,6 +61,7 @@ class TensorType(Type):
"""
"""
Return a copy of the type optionally with a new dtype or
Return a copy of the type optionally with a new dtype or
broadcastable pattern.
broadcastable pattern.
"""
"""
if
dtype
is
None
:
if
dtype
is
None
:
dtype
=
self
.
dtype
dtype
=
self
.
dtype
...
@@ -65,11 +71,13 @@ class TensorType(Type):
...
@@ -65,11 +71,13 @@ class TensorType(Type):
sparse_grad
=
self
.
sparse_grad
)
sparse_grad
=
self
.
sparse_grad
)
def
filter
(
self
,
data
,
strict
=
False
,
allow_downcast
=
None
):
def
filter
(
self
,
data
,
strict
=
False
,
allow_downcast
=
None
):
"""Convert `data` to something which can be associated to a
"""
Convert `data` to something which can be associated to a
`TensorVariable`.
`TensorVariable`.
This function is not meant to be called in user code.
It is for
This function is not meant to be called in user code. It is for
`Linker` instances to use when running a compiled graph.
`Linker` instances to use when running a compiled graph.
"""
"""
# Explicit error message when one accidentally uses a Variable as
# Explicit error message when one accidentally uses a Variable as
# input (typical mistake, especially with shared variables).
# input (typical mistake, especially with shared variables).
...
@@ -191,11 +199,13 @@ class TensorType(Type):
...
@@ -191,11 +199,13 @@ class TensorType(Type):
return
data
return
data
def
filter_variable
(
self
,
other
,
allow_convert
=
True
):
def
filter_variable
(
self
,
other
,
allow_convert
=
True
):
"""Convert a symbolic Variable into a TensorType, if compatible.
"""
Convert a symbolic Variable into a TensorType, if compatible.
For the moment, only a TensorType or CudaNdarrayType will be
For the moment, only a TensorType or CudaNdarrayType will be
converted, provided they have the same number of dimensions,
converted, provided they have the same number of dimensions,
broadcastable pattern, and dtype.
broadcastable pattern, and dtype.
"""
"""
if
hasattr
(
other
,
'_as_TensorVariable'
):
if
hasattr
(
other
,
'_as_TensorVariable'
):
other
=
other
.
_as_TensorVariable
()
other
=
other
.
_as_TensorVariable
()
...
@@ -230,10 +240,12 @@ class TensorType(Type):
...
@@ -230,10 +240,12 @@ class TensorType(Type):
return
"value is valid"
return
"value is valid"
def
dtype_specs
(
self
):
def
dtype_specs
(
self
):
"""Return a tuple (python type, c type, numpy typenum) that corresponds
"""
Return a tuple (python type, c type, numpy typenum) that corresponds
to self.dtype.
to self.dtype.
This function is used internally as part of C code generation.
This function is used internally as part of C code generation.
"""
"""
# TODO: add more type correspondances for e.g. int32, int64, float32,
# TODO: add more type correspondances for e.g. int32, int64, float32,
# complex64, etc.
# complex64, etc.
...
@@ -261,7 +273,10 @@ class TensorType(Type):
...
@@ -261,7 +273,10 @@ class TensorType(Type):
return
scal
.
get_scalar_type
(
dtype
=
self
.
dtype
)
return
scal
.
get_scalar_type
(
dtype
=
self
.
dtype
)
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
"""Compare True iff other is the same kind of TensorType"""
"""
Compare True iff other is the same kind of TensorType.
"""
return
type
(
self
)
==
type
(
other
)
and
other
.
dtype
==
self
.
dtype
\
return
type
(
self
)
==
type
(
other
)
and
other
.
dtype
==
self
.
dtype
\
and
other
.
broadcastable
==
self
.
broadcastable
and
other
.
broadcastable
==
self
.
broadcastable
...
@@ -305,14 +320,19 @@ class TensorType(Type):
...
@@ -305,14 +320,19 @@ class TensorType(Type):
def
values_eq_approx
(
a
,
b
,
allow_remove_inf
=
False
,
allow_remove_nan
=
False
,
def
values_eq_approx
(
a
,
b
,
allow_remove_inf
=
False
,
allow_remove_nan
=
False
,
rtol
=
None
,
atol
=
None
):
rtol
=
None
,
atol
=
None
):
"""
"""
:param allow_remove_inf: If True, when there is an inf in a,
Parameters
we allow any value in b in that position.
----------
Event -inf
allow_remove_inf
:param allow_remove_nan: If True, when there is a nan in a,
If True, when there is an inf in a, we allow any value in b in
we allow any value in b in that position.
that position. Event -inf
Event +-inf
allow_remove_nan
:param rtol: relative tolerance, passed to _allclose
If True, when there is a nan in a, we allow any value in b in
:param atol: absolute tolerance, passed to _allclose
that position. Event +-inf
rtol
Relative tolerance, passed to _allclose.
atol
Absolute tolerance, passed to _allclose.
"""
"""
if
isinstance
(
a
,
numpy
.
ndarray
)
and
isinstance
(
b
,
numpy
.
ndarray
):
if
isinstance
(
a
,
numpy
.
ndarray
)
and
isinstance
(
b
,
numpy
.
ndarray
):
if
a
.
shape
!=
b
.
shape
:
if
a
.
shape
!=
b
.
shape
:
...
@@ -389,7 +409,8 @@ class TensorType(Type):
...
@@ -389,7 +409,8 @@ class TensorType(Type):
ndim
=
property
(
lambda
self
:
len
(
self
.
broadcastable
),
ndim
=
property
(
lambda
self
:
len
(
self
.
broadcastable
),
doc
=
"number of dimensions"
)
doc
=
"number of dimensions"
)
"""Number of dimensions
"""
Number of dimensions.
This read-only property is the preferred way to get the number of
This read-only property is the preferred way to get the number of
dimensions of a `TensorType`.
dimensions of a `TensorType`.
...
@@ -397,12 +418,15 @@ class TensorType(Type):
...
@@ -397,12 +418,15 @@ class TensorType(Type):
"""
"""
def
make_variable
(
self
,
name
=
None
):
def
make_variable
(
self
,
name
=
None
):
"""Return a `TensorVariable` of this type
"""
Return a `TensorVariable` of this type.
Parameters
----------
name : str
A pretty name to identify this `Variable` when printing and
debugging
:Parameters:
- `name`: str
A pretty name to identify this `Variable` when printing and
debugging
"""
"""
return
self
.
Variable
(
self
,
name
=
name
)
return
self
.
Variable
(
self
,
name
=
name
)
...
@@ -430,7 +454,10 @@ class TensorType(Type):
...
@@ -430,7 +454,10 @@ class TensorType(Type):
# "TensorType{%s, %s}" % (str(self.dtype), str(self.broadcastable))
# "TensorType{%s, %s}" % (str(self.dtype), str(self.broadcastable))
def
c_declare
(
self
,
name
,
sub
,
check_input
=
True
):
def
c_declare
(
self
,
name
,
sub
,
check_input
=
True
):
"""Override `CLinkerType.c_declare` """
"""
Override `CLinkerType.c_declare`.
"""
if
(
check_input
):
if
(
check_input
):
check
=
"""
check
=
"""
typedef
%(dtype)
s dtype_
%(name)
s;
typedef
%(dtype)
s dtype_
%(name)
s;
...
@@ -444,13 +471,19 @@ class TensorType(Type):
...
@@ -444,13 +471,19 @@ class TensorType(Type):
return
declaration
+
check
return
declaration
+
check
def
c_init
(
self
,
name
,
sub
):
def
c_init
(
self
,
name
,
sub
):
"""Override `CLinkerType.c_init` """
"""
Override `CLinkerType.c_init`.
"""
return
"""
return
"""
%(name)
s = NULL;
%(name)
s = NULL;
"""
%
dict
(
sub
,
name
=
name
,
type_num
=
self
.
dtype_specs
()[
2
])
"""
%
dict
(
sub
,
name
=
name
,
type_num
=
self
.
dtype_specs
()[
2
])
def
c_extract
(
self
,
name
,
sub
,
check_input
=
True
):
def
c_extract
(
self
,
name
,
sub
,
check_input
=
True
):
"""Override `CLinkerType.c_extract` """
"""
Override `CLinkerType.c_extract`.
"""
if
(
check_input
):
if
(
check_input
):
check
=
"""
check
=
"""
%(name)
s = NULL;
%(name)
s = NULL;
...
@@ -509,7 +542,10 @@ class TensorType(Type):
...
@@ -509,7 +542,10 @@ class TensorType(Type):
"""
%
dict
(
sub
,
name
=
name
,
type_num
=
self
.
dtype_specs
()[
2
])
"""
%
dict
(
sub
,
name
=
name
,
type_num
=
self
.
dtype_specs
()[
2
])
def
c_cleanup
(
self
,
name
,
sub
):
def
c_cleanup
(
self
,
name
,
sub
):
"""Override `CLinkerType.c_cleanup` """
"""
Override `CLinkerType.c_cleanup`.
"""
return
"""
return
"""
if (
%(name)
s) {
if (
%(name)
s) {
Py_XDECREF(
%(name)
s);
Py_XDECREF(
%(name)
s);
...
@@ -517,7 +553,10 @@ class TensorType(Type):
...
@@ -517,7 +553,10 @@ class TensorType(Type):
"""
%
locals
()
"""
%
locals
()
def
c_sync
(
self
,
name
,
sub
):
def
c_sync
(
self
,
name
,
sub
):
"""Override `CLinkerType.c_sync` """
"""
Override `CLinkerType.c_sync`.
"""
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
type_num
=
self
.
dtype_specs
()[
2
]
type_num
=
self
.
dtype_specs
()[
2
]
return
"""
return
"""
...
@@ -558,7 +597,10 @@ class TensorType(Type):
...
@@ -558,7 +597,10 @@ class TensorType(Type):
"""
%
locals
()
"""
%
locals
()
def
c_headers
(
self
):
def
c_headers
(
self
):
"""Override `CLinkerObject.c_headers` """
"""
Override `CLinkerObject.c_headers`.
"""
return
scal
.
get_scalar_type
(
self
.
dtype
)
.
c_headers
()
return
scal
.
get_scalar_type
(
self
.
dtype
)
.
c_headers
()
def
c_libraries
(
self
):
def
c_libraries
(
self
):
...
@@ -568,7 +610,10 @@ class TensorType(Type):
...
@@ -568,7 +610,10 @@ class TensorType(Type):
return
scal
.
get_scalar_type
(
self
.
dtype
)
.
c_compile_args
()
return
scal
.
get_scalar_type
(
self
.
dtype
)
.
c_compile_args
()
def
c_support_code
(
self
):
def
c_support_code
(
self
):
"""Override `CLinkerObject.c_support_code` """
"""
Override `CLinkerObject.c_support_code`.
"""
return
scal
.
get_scalar_type
(
self
.
dtype
)
.
c_support_code
()
return
scal
.
get_scalar_type
(
self
.
dtype
)
.
c_support_code
()
def
c_init_code
(
self
):
def
c_init_code
(
self
):
...
@@ -584,6 +629,7 @@ class TensorType(Type):
...
@@ -584,6 +629,7 @@ class TensorType(Type):
def
value_zeros
(
self
,
shape
):
def
value_zeros
(
self
,
shape
):
"""
"""
Create an numpy ndarray full of 0 values.
Create an numpy ndarray full of 0 values.
"""
"""
return
numpy
.
zeros
(
shape
,
dtype
=
self
.
dtype
)
return
numpy
.
zeros
(
shape
,
dtype
=
self
.
dtype
)
...
@@ -604,17 +650,33 @@ class TensorType(Type):
...
@@ -604,17 +650,33 @@ class TensorType(Type):
``get_size()`` will be called on the output of this function
``get_size()`` will be called on the output of this function
when printing the memory profile.
when printing the memory profile.
:param obj: The object that this Type represents during execution
Parameters
:return: Python object that ``self.get_size()`` understands
----------
obj
The object that this Type represents during execution.
Returns
-------
object
Python object that ``self.get_size()`` understands.
"""
"""
return
obj
.
shape
return
obj
.
shape
def
get_size
(
self
,
shape_info
):
def
get_size
(
self
,
shape_info
):
""" Number of bytes taken by the object represented by shape_info.
"""
Number of bytes taken by the object represented by shape_info.
Parameters
----------
shape_info
The output of the call to get_shape_info().
Returns
-------
int
The number of bytes taken by the object described by ``shape_info``.
:param shape_info: the output of the call to get_shape_info()
:return: the number of bytes taken by the object described by
``shape_info``.
"""
"""
if
shape_info
:
if
shape_info
:
return
numpy
.
prod
(
shape_info
)
*
numpy
.
dtype
(
self
.
dtype
)
.
itemsize
return
numpy
.
prod
(
shape_info
)
*
numpy
.
dtype
(
self
.
dtype
)
.
itemsize
...
...
theano/tensor/type_other.py
浏览文件 @
6304a061
...
@@ -105,6 +105,7 @@ SliceType.Constant = SliceConstant
...
@@ -105,6 +105,7 @@ SliceType.Constant = SliceConstant
class
NoneTypeT
(
Generic
):
class
NoneTypeT
(
Generic
):
"""
"""
Inherit from Generic to have c code working.
Inherit from Generic to have c code working.
"""
"""
def
filter
(
self
,
x
,
strict
=
False
,
allow_downcast
=
None
):
def
filter
(
self
,
x
,
strict
=
False
,
allow_downcast
=
None
):
...
...
theano/tensor/utils.py
浏览文件 @
6304a061
...
@@ -6,7 +6,8 @@ from theano.gof.utils import hash_from_code
...
@@ -6,7 +6,8 @@ from theano.gof.utils import hash_from_code
def
hash_from_ndarray
(
data
):
def
hash_from_ndarray
(
data
):
"""Return a hash from an ndarray
"""
Return a hash from an ndarray.
It takes care of the data, shapes, strides and dtype.
It takes care of the data, shapes, strides and dtype.
...
@@ -32,23 +33,31 @@ def hash_from_ndarray(data):
...
@@ -32,23 +33,31 @@ def hash_from_ndarray(data):
def
shape_of_variables
(
fgraph
,
input_shapes
):
def
shape_of_variables
(
fgraph
,
input_shapes
):
"""
"""
Compute the numeric shape of all intermediate variables given input shapes
Compute the numeric shape of all intermediate variables given input shapes
.
Inputs:
Parameters
fgraph - the theano.FunctionGraph in question
----------
input_shapes - a dict mapping input to shape
fgraph
The theano.FunctionGraph in question.
input_shapes : dict
A dict mapping input to shape.
Outputs:
Returns
shapes - a dict mapping variable to shape
-------
shapes : dict
A dict mapping variable to shape
WARNING
: This modifies the fgraph. Not pure.
.. warning:
: This modifies the fgraph. Not pure.
Examples
--------
>>> import theano
>>> import theano
>>> x = theano.tensor.matrix('x')
>>> x = theano.tensor.matrix('x')
>>> y = x[512:]; y.name = 'y'
>>> y = x[512:]; y.name = 'y'
>>> fgraph = theano.FunctionGraph([x], [y], clone=False)
>>> fgraph = theano.FunctionGraph([x], [y], clone=False)
>>> shape_of_variables(fgraph, {x: (1024, 1024)})
>>> shape_of_variables(fgraph, {x: (1024, 1024)})
{y: (512, 1024), x: (1024, 1024)}
{y: (512, 1024), x: (1024, 1024)}
"""
"""
if
not
hasattr
(
fgraph
,
'shape_feature'
):
if
not
hasattr
(
fgraph
,
'shape_feature'
):
...
...
theano/tensor/var.py
浏览文件 @
6304a061
...
@@ -22,8 +22,9 @@ def equal_slices(s1, s2):
...
@@ -22,8 +22,9 @@ def equal_slices(s1, s2):
class
AsTensorError
(
TypeError
):
class
AsTensorError
(
TypeError
):
"""Raised when as_tensor_variable isn't able to create a
"""
TensorVariable.
Raised when as_tensor_variable isn't able to create a TensorVariable.
"""
"""
pass
pass
...
@@ -254,8 +255,11 @@ class _tensor_py_operators:
...
@@ -254,8 +255,11 @@ class _tensor_py_operators:
def
transpose
(
self
,
*
axes
):
def
transpose
(
self
,
*
axes
):
"""
"""
Return `tensor.transpose(self, axes)`
or `tensor.transpose(self, axes[0])`
Returns
-------
object
`tensor.transpose(self, axes)` or `tensor.transpose(self, axes[0])`.
If only one `axes` argument is provided and it is iterable, then it is
If only one `axes` argument is provided and it is iterable, then it is
assumed to be the entire axes tuple, and passed intact to
assumed to be the entire axes tuple, and passed intact to
...
@@ -298,16 +302,18 @@ class _tensor_py_operators:
...
@@ -298,16 +302,18 @@ class _tensor_py_operators:
def
reshape
(
self
,
shape
,
ndim
=
None
):
def
reshape
(
self
,
shape
,
ndim
=
None
):
"""Return a reshaped view/copy of this variable.
"""Return a reshaped view/copy of this variable.
:param shape: something that can be converted to a symbolic vector of
Parameters
integers
----------
shape
Something that can be converted to a symbolic vector of integers.
ndim
The length of the shape. Passing None here means for
Theano to try and guess the length of `shape`.
:param ndim: the length of the shape. Passing None here means for
.. warning:: This has a different signature than numpy's
theano to try and guess the length of `shape`.
ndarray.reshape!
In numpy you do not need to wrap the shape arguments
* warning-- this has a different signature than numpy's
in a tuple, in theano you do need to.
ndarray.reshape!
in numpy you do not need to wrap the shape arguments
in a tuple, in theano you do need to
"""
"""
...
@@ -323,21 +329,29 @@ class _tensor_py_operators:
...
@@ -323,21 +329,29 @@ class _tensor_py_operators:
Reorder the dimensions of this variable, optionally inserting
Reorder the dimensions of this variable, optionally inserting
broadcasted dimensions.
broadcasted dimensions.
:param pattern: list/tuple of int mixed with 'x' for broadcastable
Parameters
dimensions
----------
pattern
List/tuple of int mixed with 'x' for broadcastable dimensions.
Examples
--------
For example, to create a 3D view of a [2D] matrix, call
For example, to create a 3D view of a [2D] matrix, call
``dimshuffle([0,'x',1])``. This will create a 3D view such that the
``dimshuffle([0,'x',1])``. This will create a 3D view such that the
middle dimension is an implicit broadcasted dimension. To do the same
middle dimension is an implicit broadcasted dimension. To do the same
thing on the transpose of that matrix, call
thing on the transpose of that matrix, call ``dimshuffle([1, 'x', 0])``.
``dimshuffle([1, 'x', 0])``.
Notes
-----
This function supports the pattern passed as a tuple, or as a
This function supports the pattern passed as a tuple, or as a
variable-length argument (e.g. ``a.dimshuffle(pattern)`` is equivalent
variable-length argument (e.g. ``a.dimshuffle(pattern)`` is equivalent
to ``a.dimshuffle(*pattern)`` where ``pattern`` is a list/tuple of ints
to ``a.dimshuffle(*pattern)`` where ``pattern`` is a list/tuple of ints
mixed with 'x' characters).
mixed with 'x' characters).
For more information, see `DimShuffle`.
See Also
--------
DimShuffle
"""
"""
if
(
len
(
pattern
)
==
1
)
and
(
isinstance
(
pattern
[
0
],
(
list
,
tuple
))):
if
(
len
(
pattern
)
==
1
)
and
(
isinstance
(
pattern
[
0
],
(
list
,
tuple
))):
pattern
=
pattern
[
0
]
pattern
=
pattern
[
0
]
...
@@ -524,13 +538,17 @@ class _tensor_py_operators:
...
@@ -524,13 +538,17 @@ class _tensor_py_operators:
"""The rank of this tensor."""
"""The rank of this tensor."""
broadcastable
=
property
(
lambda
self
:
self
.
type
.
broadcastable
)
broadcastable
=
property
(
lambda
self
:
self
.
type
.
broadcastable
)
"""The broadcastable signature of this tensor.
"""
The broadcastable signature of this tensor.
See Also
--------
broadcasting
See :doc:`broadcasting` for details.
"""
"""
dtype
=
property
(
lambda
self
:
self
.
type
.
dtype
)
dtype
=
property
(
lambda
self
:
self
.
type
.
dtype
)
"""
The dtype of this tensor.
"""
"""
The dtype of this tensor.
"""
# extra pseudo-operator symbols
# extra pseudo-operator symbols
def
__dot__
(
left
,
right
):
def
__dot__
(
left
,
right
):
...
@@ -542,13 +560,13 @@ class _tensor_py_operators:
...
@@ -542,13 +560,13 @@ class _tensor_py_operators:
dot
=
__dot__
dot
=
__dot__
def
sum
(
self
,
axis
=
None
,
dtype
=
None
,
keepdims
=
False
,
acc_dtype
=
None
):
def
sum
(
self
,
axis
=
None
,
dtype
=
None
,
keepdims
=
False
,
acc_dtype
=
None
):
"""See `theano.tensor.sum`"""
"""See `theano.tensor.sum`
.
"""
return
theano
.
tensor
.
basic
.
sum
(
self
,
axis
=
axis
,
return
theano
.
tensor
.
basic
.
sum
(
self
,
axis
=
axis
,
dtype
=
dtype
,
keepdims
=
keepdims
,
dtype
=
dtype
,
keepdims
=
keepdims
,
acc_dtype
=
acc_dtype
)
acc_dtype
=
acc_dtype
)
def
prod
(
self
,
axis
=
None
,
dtype
=
None
,
keepdims
=
False
,
acc_dtype
=
None
):
def
prod
(
self
,
axis
=
None
,
dtype
=
None
,
keepdims
=
False
,
acc_dtype
=
None
):
"""See `theano.tensor.prod`"""
"""See `theano.tensor.prod`
.
"""
return
theano
.
tensor
.
basic
.
prod
(
self
,
axis
=
axis
,
return
theano
.
tensor
.
basic
.
prod
(
self
,
axis
=
axis
,
dtype
=
dtype
,
keepdims
=
keepdims
,
dtype
=
dtype
,
keepdims
=
keepdims
,
acc_dtype
=
acc_dtype
)
acc_dtype
=
acc_dtype
)
...
@@ -564,49 +582,49 @@ class _tensor_py_operators:
...
@@ -564,49 +582,49 @@ class _tensor_py_operators:
theano
.
tensor
.
basic
.
abs_
(
self
),
L
)
.
sum
(
axis
=
axis
),
1.0
/
L
)
theano
.
tensor
.
basic
.
abs_
(
self
),
L
)
.
sum
(
axis
=
axis
),
1.0
/
L
)
def
mean
(
self
,
axis
=
None
,
dtype
=
None
,
keepdims
=
False
,
acc_dtype
=
None
):
def
mean
(
self
,
axis
=
None
,
dtype
=
None
,
keepdims
=
False
,
acc_dtype
=
None
):
"""See `theano.tensor.mean`"""
"""See `theano.tensor.mean`
.
"""
return
theano
.
tensor
.
basic
.
mean
(
self
,
axis
=
axis
,
return
theano
.
tensor
.
basic
.
mean
(
self
,
axis
=
axis
,
dtype
=
dtype
,
keepdims
=
keepdims
,
dtype
=
dtype
,
keepdims
=
keepdims
,
acc_dtype
=
acc_dtype
)
acc_dtype
=
acc_dtype
)
def
var
(
self
,
axis
=
None
,
keepdims
=
False
):
def
var
(
self
,
axis
=
None
,
keepdims
=
False
):
"""See `theano.tensor.var`"""
"""See `theano.tensor.var`
.
"""
return
theano
.
tensor
.
basic
.
var
(
self
,
axis
,
keepdims
=
keepdims
)
return
theano
.
tensor
.
basic
.
var
(
self
,
axis
,
keepdims
=
keepdims
)
def
std
(
self
,
axis
=
None
,
keepdims
=
False
):
def
std
(
self
,
axis
=
None
,
keepdims
=
False
):
"""See `theano.tensor.std`"""
"""See `theano.tensor.std`
.
"""
return
theano
.
tensor
.
basic
.
std
(
self
,
axis
,
keepdims
=
keepdims
)
return
theano
.
tensor
.
basic
.
std
(
self
,
axis
,
keepdims
=
keepdims
)
def
min
(
self
,
axis
=
None
,
keepdims
=
False
):
def
min
(
self
,
axis
=
None
,
keepdims
=
False
):
"""See `theano.tensor.min`"""
"""See `theano.tensor.min`
.
"""
return
theano
.
tensor
.
basic
.
min
(
self
,
axis
,
keepdims
=
keepdims
)
return
theano
.
tensor
.
basic
.
min
(
self
,
axis
,
keepdims
=
keepdims
)
def
max
(
self
,
axis
=
None
,
keepdims
=
False
):
def
max
(
self
,
axis
=
None
,
keepdims
=
False
):
"""See `theano.tensor.max`"""
"""See `theano.tensor.max`
.
"""
return
theano
.
tensor
.
basic
.
max
(
self
,
axis
,
keepdims
=
keepdims
)
return
theano
.
tensor
.
basic
.
max
(
self
,
axis
,
keepdims
=
keepdims
)
def
argmin
(
self
,
axis
=
None
,
keepdims
=
False
):
def
argmin
(
self
,
axis
=
None
,
keepdims
=
False
):
"""See `theano.tensor.argmin`"""
"""See `theano.tensor.argmin`
.
"""
return
theano
.
tensor
.
basic
.
argmin
(
self
,
axis
,
keepdims
=
keepdims
)
return
theano
.
tensor
.
basic
.
argmin
(
self
,
axis
,
keepdims
=
keepdims
)
def
argmax
(
self
,
axis
=
None
,
keepdims
=
False
):
def
argmax
(
self
,
axis
=
None
,
keepdims
=
False
):
"""See `theano.tensor.argmax`"""
"""See `theano.tensor.argmax`
.
"""
return
theano
.
tensor
.
basic
.
argmax
(
self
,
axis
,
keepdims
=
keepdims
)
return
theano
.
tensor
.
basic
.
argmax
(
self
,
axis
,
keepdims
=
keepdims
)
def
nonzero
(
self
,
return_matrix
=
False
):
def
nonzero
(
self
,
return_matrix
=
False
):
"""See `theano.tensor.nonzero`"""
"""See `theano.tensor.nonzero`
.
"""
return
theano
.
tensor
.
basic
.
nonzero
(
self
,
return_matrix
=
return_matrix
)
return
theano
.
tensor
.
basic
.
nonzero
(
self
,
return_matrix
=
return_matrix
)
def
nonzero_values
(
self
):
def
nonzero_values
(
self
):
"""See `theano.tensor.nonzero_values`"""
"""See `theano.tensor.nonzero_values`
.
"""
return
theano
.
tensor
.
basic
.
nonzero_values
(
self
)
return
theano
.
tensor
.
basic
.
nonzero_values
(
self
)
def
sort
(
self
,
axis
=-
1
,
kind
=
'quicksort'
,
order
=
None
):
def
sort
(
self
,
axis
=-
1
,
kind
=
'quicksort'
,
order
=
None
):
"""See `theano.tensor.sort`"""
"""See `theano.tensor.sort`
.
"""
return
theano
.
tensor
.
sort
(
self
,
axis
,
kind
,
order
)
return
theano
.
tensor
.
sort
(
self
,
axis
,
kind
,
order
)
def
argsort
(
self
,
axis
=-
1
,
kind
=
'quicksort'
,
order
=
None
):
def
argsort
(
self
,
axis
=-
1
,
kind
=
'quicksort'
,
order
=
None
):
"""See `theano.tensor.argsort`"""
"""See `theano.tensor.argsort`
.
"""
return
theano
.
tensor
.
argsort
(
self
,
axis
,
kind
,
order
)
return
theano
.
tensor
.
argsort
(
self
,
axis
,
kind
,
order
)
def
clip
(
self
,
a_min
,
a_max
):
def
clip
(
self
,
a_min
,
a_max
):
...
@@ -614,17 +632,17 @@ class _tensor_py_operators:
...
@@ -614,17 +632,17 @@ class _tensor_py_operators:
return
theano
.
tensor
.
basic
.
clip
(
self
,
a_min
,
a_max
)
return
theano
.
tensor
.
basic
.
clip
(
self
,
a_min
,
a_max
)
def
conj
(
self
):
def
conj
(
self
):
"""See `theano.tensor.conj`"""
"""See `theano.tensor.conj`
.
"""
return
theano
.
tensor
.
basic
.
conj
(
self
)
return
theano
.
tensor
.
basic
.
conj
(
self
)
conjugate
=
conj
conjugate
=
conj
def
repeat
(
self
,
repeats
,
axis
=
None
):
def
repeat
(
self
,
repeats
,
axis
=
None
):
"""See `theano.tensor.repeat`"""
"""See `theano.tensor.repeat`
.
"""
return
theano
.
tensor
.
extra_ops
.
repeat
(
self
,
repeats
,
axis
)
return
theano
.
tensor
.
extra_ops
.
repeat
(
self
,
repeats
,
axis
)
def
round
(
self
,
mode
=
"half_away_from_zero"
):
def
round
(
self
,
mode
=
"half_away_from_zero"
):
"""See `theano.tensor.round`"""
"""See `theano.tensor.round`
.
"""
return
theano
.
tensor
.
basic
.
round
(
self
,
mode
)
return
theano
.
tensor
.
basic
.
round
(
self
,
mode
)
def
trace
(
self
):
def
trace
(
self
):
...
@@ -646,12 +664,13 @@ class _tensor_py_operators:
...
@@ -646,12 +664,13 @@ class _tensor_py_operators:
return
theano
.
tensor
.
extra_ops
.
cumprod
(
self
,
axis
)
return
theano
.
tensor
.
extra_ops
.
cumprod
(
self
,
axis
)
def
ptp
(
self
,
axis
=
None
):
def
ptp
(
self
,
axis
=
None
):
"""
see 'theano.tensor.ptp'
"""
"""
See 'theano.tensor.ptp'.
"""
return
theano
.
tensor
.
ptp
(
self
,
axis
)
return
theano
.
tensor
.
ptp
(
self
,
axis
)
def
swapaxes
(
self
,
axis1
,
axis2
):
def
swapaxes
(
self
,
axis1
,
axis2
):
"""Return 'tensor.swapaxes(self, axis1, axis2)
"""
Return 'tensor.swapaxes(self, axis1, axis2).
If a matrix is provided with the right axes, its transpose
If a matrix is provided with the right axes, its transpose
will be returned.
will be returned.
...
@@ -660,32 +679,38 @@ class _tensor_py_operators:
...
@@ -660,32 +679,38 @@ class _tensor_py_operators:
return
theano
.
tensor
.
basic
.
swapaxes
(
self
,
axis1
,
axis2
)
return
theano
.
tensor
.
basic
.
swapaxes
(
self
,
axis1
,
axis2
)
def
fill
(
self
,
value
):
def
fill
(
self
,
value
):
"""Fill inputted tensor with the assigned value"""
"""Fill inputted tensor with the assigned value
.
"""
return
theano
.
tensor
.
basic
.
fill
(
self
,
value
)
return
theano
.
tensor
.
basic
.
fill
(
self
,
value
)
def
choose
(
self
,
a
,
choices
,
out
=
None
,
mode
=
'raise'
):
def
choose
(
self
,
a
,
choices
,
out
=
None
,
mode
=
'raise'
):
"""Construct an array from an index array and a set of arrays to choose from."""
"""
Construct an array from an index array and a set of arrays to choose
from.
"""
return
theano
.
tensor
.
basic
.
choose
(
self
,
a
,
choices
,
out
=
None
,
return
theano
.
tensor
.
basic
.
choose
(
self
,
a
,
choices
,
out
=
None
,
mode
=
'raise'
)
mode
=
'raise'
)
def
squeeze
(
self
):
def
squeeze
(
self
):
"""Remove broadcastable dimensions from
"""
the shape of an array.
Remove broadcastable dimensions from the shape of an array.
It returns the input array, but with the broadcastable dimensions
removed. This is always `x` itself or a view into `x`.
It returns the input array, but with the
broadcastable dimensions removed. This is
always `x` itself or a view into `x`.
"""
"""
return
theano
.
tensor
.
extra_ops
.
squeeze
(
self
)
return
theano
.
tensor
.
extra_ops
.
squeeze
(
self
)
def
compress
(
self
,
a
,
axis
=
None
):
def
compress
(
self
,
a
,
axis
=
None
):
"""Return selected slices only
"""Return selected slices only."""
"""
return
theano
.
tensor
.
extra_ops
.
compress
(
self
,
a
,
axis
=
axis
)
return
theano
.
tensor
.
extra_ops
.
compress
(
self
,
a
,
axis
=
axis
)
class
TensorVariable
(
_tensor_py_operators
,
Variable
):
class
TensorVariable
(
_tensor_py_operators
,
Variable
):
"""Subclass to add the tensor operators to the basic `Variable` class."""
"""
Subclass to add the tensor operators to the basic `Variable` class.
"""
def
__init__
(
self
,
type
,
owner
=
None
,
index
=
None
,
name
=
None
):
def
__init__
(
self
,
type
,
owner
=
None
,
index
=
None
,
name
=
None
):
super
(
TensorVariable
,
self
)
.
__init__
(
type
,
owner
=
owner
,
super
(
TensorVariable
,
self
)
.
__init__
(
type
,
owner
=
owner
,
...
@@ -721,9 +746,11 @@ TensorType.Variable = TensorVariable
...
@@ -721,9 +746,11 @@ TensorType.Variable = TensorVariable
class
TensorConstantSignature
(
tuple
):
class
TensorConstantSignature
(
tuple
):
"""A Signature object for comparing TensorConstant instances
"""
A Signature object for comparing TensorConstant instances.
An instance is a pair: (Type instance, ndarray).
An instance is a pair: (Type instance, ndarray).
"""
"""
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
if
type
(
self
)
!=
type
(
other
):
if
type
(
self
)
!=
type
(
other
):
...
@@ -814,6 +841,7 @@ class TensorConstant(_tensor_py_operators, Constant):
...
@@ -814,6 +841,7 @@ class TensorConstant(_tensor_py_operators, Constant):
"""Subclass to add the tensor operators to the basic `Constant` class.
"""Subclass to add the tensor operators to the basic `Constant` class.
To create a TensorConstant, use the `constant` function in this module.
To create a TensorConstant, use the `constant` function in this module.
"""
"""
def
__init__
(
self
,
type
,
data
,
name
=
None
):
def
__init__
(
self
,
type
,
data
,
name
=
None
):
Constant
.
__init__
(
self
,
type
,
data
,
name
)
Constant
.
__init__
(
self
,
type
,
data
,
name
)
...
...
theano/tensor/xlogx.py
浏览文件 @
6304a061
...
@@ -7,6 +7,7 @@ from theano import scalar
...
@@ -7,6 +7,7 @@ from theano import scalar
class
XlogX
(
scalar
.
UnaryScalarOp
):
class
XlogX
(
scalar
.
UnaryScalarOp
):
"""
"""
Compute X * log(X), with special case 0 log(0) = 0.
Compute X * log(X), with special case 0 log(0) = 0.
"""
"""
@staticmethod
@staticmethod
def
st_impl
(
x
):
def
st_impl
(
x
):
...
@@ -39,6 +40,7 @@ xlogx = Elemwise(scalar_xlogx, name='xlogx')
...
@@ -39,6 +40,7 @@ xlogx = Elemwise(scalar_xlogx, name='xlogx')
class
XlogY0
(
scalar
.
BinaryScalarOp
):
class
XlogY0
(
scalar
.
BinaryScalarOp
):
"""
"""
Compute X * log(Y), with special case 0 log(0) = 0.
Compute X * log(Y), with special case 0 log(0) = 0.
"""
"""
@staticmethod
@staticmethod
def
st_impl
(
x
,
y
):
def
st_impl
(
x
,
y
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论