Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
bd11e130
提交
bd11e130
authored
7月 02, 2015
作者:
Frédéric Bastien
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3074 from harlouci/flake8_v2
flake8
上级
cb08bc11
fc6d2310
隐藏空白字符变更
内嵌
并排
正在显示
15 个修改的文件
包含
557 行增加
和
573 行删除
+557
-573
basic.py
theano/tensor/basic.py
+44
-45
blas.py
theano/tensor/blas.py
+3
-3
elemwise.py
theano/tensor/elemwise.py
+108
-102
inplace.py
theano/tensor/inplace.py
+0
-1
opt.py
theano/tensor/opt.py
+279
-288
raw_random.py
theano/tensor/raw_random.py
+36
-35
shared_randomstreams.py
theano/tensor/shared_randomstreams.py
+4
-2
sharedvar.py
theano/tensor/sharedvar.py
+8
-6
slinalg.py
theano/tensor/slinalg.py
+9
-16
sort.py
theano/tensor/sort.py
+20
-20
subtensor.py
theano/tensor/subtensor.py
+23
-25
utils.py
theano/tensor/utils.py
+5
-5
var.py
theano/tensor/var.py
+8
-10
xlogx.py
theano/tensor/xlogx.py
+10
-2
test_flake8.py
theano/tests/test_flake8.py
+0
-13
没有找到文件。
theano/tensor/basic.py
浏览文件 @
bd11e130
"""A `Type` and `Op` classes to work with numpy.ndarrays symbolically."""
__docformat__
=
"restructuredtext en"
import
sys
import
warnings
...
...
@@ -29,7 +27,6 @@ from theano.printing import pprint, min_informative_str
# For history
from
theano.compile
import
Rebroadcast
,
Shape
,
shape
# We use these exceptions as well.
import
theano.scalar.sharedvar
from
theano.gradient
import
grad_undefined
...
...
@@ -42,6 +39,8 @@ from theano.tensor.elemwise import Elemwise, DimShuffle, CAReduce, Sum
import
logging
_logger
=
logging
.
getLogger
(
"theano.tensor.basic"
)
__docformat__
=
"restructuredtext en"
# This is needed as we will hide it later
python_complex
=
complex
python_any
=
any
...
...
@@ -620,8 +619,8 @@ def get_scalar_constant_value(orig_v, elemwise=True,
ret
=
[[
None
]]
v
.
owner
.
op
.
perform
(
v
.
owner
,
const
,
ret
)
return
ret
[
0
][
0
]
elif
(
isinstance
(
v
.
owner
.
op
,
theano
.
tensor
.
subtensor
.
Subtensor
)
and
v
.
ndim
==
0
):
elif
(
isinstance
(
v
.
owner
.
op
,
theano
.
tensor
.
subtensor
.
Subtensor
)
and
v
.
ndim
==
0
):
if
isinstance
(
v
.
owner
.
inputs
[
0
],
TensorConstant
):
cdata
=
tuple
(
v
.
owner
.
op
.
get_constant_idx
(
v
.
owner
.
inputs
))
try
:
...
...
@@ -1090,7 +1089,7 @@ scalar_from_tensor = ScalarFromTensor()
# to be removed as we get the epydoc routine-documenting thing going
#-JB 20080924
#
-JB 20080924
def
_conversion
(
real_value
,
name
):
__oplist_tag
(
real_value
,
'casting'
)
real_value
.
__module__
=
'tensor.basic'
...
...
@@ -1235,8 +1234,8 @@ class MaxAndArgmax(Op):
raise
TypeError
(
"MaxAndArgmax needs a constant axis. Got
%
s"
%
axis
)
else
:
assert
(
axis
.
dtype
.
startswith
(
"int"
)
or
axis
.
dtype
.
startswith
(
"uint"
))
assert
(
axis
.
dtype
.
startswith
(
"int"
)
or
axis
.
dtype
.
startswith
(
"uint"
))
axis
=
int
(
axis
.
data
)
# we make the axis all positive to make the infer_shape work
# with negative axis
...
...
@@ -1373,13 +1372,13 @@ class MaxAndArgmax(Op):
# Lebesgue measure, the result may be interpreted as weak gradient.
# @note: This function should work correctly for L{vector}s.
#
(x, y), (gz, gw)
#
gz*dz/dx + gw*dw/dx, gz*dz/dy + gw*dw/dy
#
gMax * dMax/dx + gArgMax * dArgMax/dx,
#
gMax * dMax/daxis + gArgMax * dArgMax/daxis
#
g_max has one less dimension than x, so you need to complete
#
g_max to x's shape when axis=0 the broadcasting mechanism
#
does it automatically
#
(x, y), (gz, gw)
#
gz*dz/dx + gw*dw/dx, gz*dz/dy + gw*dw/dy
#
gMax * dMax/dx + gArgMax * dArgMax/dx,
#
gMax * dMax/daxis + gArgMax * dArgMax/daxis
#
g_max has one less dimension than x, so you need to complete
#
g_max to x's shape when axis=0 the broadcasting mechanism
#
does it automatically
x
,
axis
=
inp
g_max
,
g_max_idx
=
grads
...
...
@@ -2078,7 +2077,7 @@ def chi2sf(x, k):
# numpy.real(float32) return a view on the inputs.
#@_scal_elemwise_with_nfunc('real', 1, 1)
#
@_scal_elemwise_with_nfunc('real', 1, 1)
@_scal_elemwise
def
real
(
z
):
"""Return real component of complex-valued tensor `z`"""
...
...
@@ -2116,7 +2115,7 @@ def complex_from_polar(abs, angle):
# fill, _fill_inplace = _elemwise(scal.second, 'fill',
#
"""fill WRITEME (elemwise)""")
#
"""fill WRITEME (elemwise)""")
@_scal_elemwise
def
second
(
a
,
b
):
"""Create a matrix by filling the shape of a with b"""
...
...
@@ -3540,8 +3539,8 @@ class Join(Op):
dtypes
=
[
x
.
type
.
dtype
for
x
in
as_tensor_variable_args
]
out_dtype
=
scal
.
upcast
(
*
dtypes
)
output_maker
=
lambda
bcastable
:
tensor
(
dtype
=
out_dtype
,
broadcastable
=
bcastable
)
def
output_maker
(
bcastable
):
return
tensor
(
dtype
=
out_dtype
,
broadcastable
=
bcastable
)
return
self
.
_make_node_internal
(
axis
,
tensors
,
as_tensor_variable_args
,
output_maker
)
...
...
@@ -4361,8 +4360,7 @@ class Tile(Op):
def
make_node
(
self
,
x
,
reps
):
warnings
.
warn
((
"Tile op is deprecated, use tile function instead."
),
stacklevel
=
3
)
"Tile op is deprecated, use tile function instead."
),
stacklevel
=
3
)
x
=
as_tensor_variable
(
x
)
reps
=
as_tensor_variable
(
reps
)
return
gof
.
Apply
(
self
,
[
x
,
reps
],
[
tensor
(
x
.
type
.
dtype
,
[
False
]
*
...
...
@@ -4427,8 +4425,9 @@ def tile(x, reps, ndim=None):
except
TypeError
:
raise
ValueError
(
"reps must be iterable"
)
if
not
numpy
.
all
([
isinstance
(
r
,
integer_types
)
or
(
isinstance
(
r
,
TensorVariable
)
and
r
.
dtype
in
[
"int8"
,
"int16"
,
"int32"
,
"int64"
])
for
r
in
reps
]):
(
isinstance
(
r
,
TensorVariable
)
and
r
.
dtype
in
[
"int8"
,
"int16"
,
"int32"
,
"int64"
])
for
r
in
reps
]):
raise
ValueError
(
"elements of reps must be scalars of integer dtype"
)
elif
len
(
reps
)
!=
x
.
ndim
:
raise
ValueError
(
"len(reps) != x.ndim not currently supported"
)
...
...
@@ -4442,10 +4441,10 @@ def tile(x, reps, ndim=None):
shape
=
[
x
.
shape
[
i
]
for
i
in
xrange
(
ndim
)]
alloc_shape
=
reps
+
shape
y
=
alloc
(
x
,
*
alloc_shape
)
shuffle_ind
=
numpy
.
arange
(
ndim
*
2
)
.
reshape
(
2
,
ndim
)
shuffle_ind
=
numpy
.
arange
(
ndim
*
2
)
.
reshape
(
2
,
ndim
)
shuffle_ind
=
shuffle_ind
.
transpose
()
.
flatten
()
y
=
y
.
dimshuffle
(
*
shuffle_ind
)
new_shapes
=
[
sh
*
reps
[
i
]
for
i
,
sh
in
enumerate
(
shape
)]
new_shapes
=
[
sh
*
reps
[
i
]
for
i
,
sh
in
enumerate
(
shape
)]
y
=
y
.
reshape
(
new_shapes
)
return
y
...
...
@@ -4493,12 +4492,12 @@ class ARange(Op):
def
upcast
(
var
):
if
(
'int'
in
var
.
dtype
and
# We do not want to cast uint64 to int64 as this can
# loose information. If we upcast uint64 with int64,
# this give float64. This is safer then checking for
# uint64 in case we support [u]int128 or other in the
# future.
scal
.
upcast
(
var
.
dtype
,
'int64'
)
==
'int64'
):
# We do not want to cast uint64 to int64 as this can
# loose information. If we upcast uint64 with int64,
# this give float64. This is safer then checking for
# uint64 in case we support [u]int128 or other in the
# future.
scal
.
upcast
(
var
.
dtype
,
'int64'
)
==
'int64'
):
return
cast
(
var
,
'int64'
)
return
var
...
...
@@ -4512,8 +4511,8 @@ class ARange(Op):
else
:
stop
=
upcast
(
stop
)
start
=
upcast
(
start
)
return
[(
maximum
(
cast
(
ceil
(
cast
((
stop
-
start
),
'float64'
)
/
step
),
'int64'
),
0
),)]
return
[(
maximum
(
cast
(
ceil
(
cast
((
stop
-
start
),
'float64'
)
/
step
),
'int64'
),
0
),)]
def
perform
(
self
,
node
,
inp
,
out_
):
start
,
stop
,
step
=
inp
...
...
@@ -4742,8 +4741,8 @@ class PermuteRowElements(Op):
# the gradient over these axes, but keep the dimension (as
# broadcastable)
broadcasted_dims
=
[
dim
for
dim
in
xrange
(
gz
.
type
.
ndim
)
if
x
.
type
.
broadcastable
[
dim
]
and
not
gz
.
type
.
broadcastable
[
dim
]]
if
x
.
type
.
broadcastable
[
dim
]
and
not
gz
.
type
.
broadcastable
[
dim
]]
gx
=
Sum
(
axis
=
broadcasted_dims
)(
gx
)
# Sum(...) removed the dimensions in broadcasted_dims,
...
...
@@ -4876,17 +4875,17 @@ class Dot(Op):
xgrad
=
gz
*
y
ygrad
=
gz
*
x
#x is vector, y is matrix, grad is vector
#
x is vector, y is matrix, grad is vector
elif
xdim
==
1
and
ydim
==
2
:
xgrad
=
dot
(
gz
,
y
.
T
)
ygrad
=
outer
(
x
.
T
,
gz
)
#x is matrix, y is vector, grad is vector
#
x is matrix, y is vector, grad is vector
elif
xdim
==
2
and
ydim
==
1
:
xgrad
=
outer
(
gz
,
y
.
T
)
ygrad
=
dot
(
x
.
T
,
gz
)
#x is matrix, y is matrix, grad is matrix
#
x is matrix, y is matrix, grad is matrix
elif
xdim
==
ydim
==
2
:
xgrad
=
dot
(
gz
,
y
.
T
)
ygrad
=
dot
(
x
.
T
,
gz
)
...
...
@@ -4958,8 +4957,8 @@ class Dot(Op):
if
eval_point_values
[
i
]
is
not
None
and
\
input_values
[
i
]
.
shape
!=
eval_point_values
[
i
]
.
shape
:
raise
ValueError
(
'input '
+
str
(
i
)
+
' and eval_point '
+
str
(
i
)
+
' to Dot.R_op should have the same shape, but '
'input '
+
str
(
i
)
+
' and eval_point '
+
str
(
i
)
+
' to Dot.R_op should have the same shape, but '
'their shapes are
%
s and
%
s, respectively'
%
(
str
(
input_values
[
i
]
.
shape
),
str
(
eval_point_values
[
i
]
.
shape
)))
...
...
@@ -5230,8 +5229,8 @@ def tensordot(a, b, axes=2):
'equal to b.ndim (b.ndim=
%
i, max(axes[1])=
%
i).'
%
(
b
.
ndim
,
numpy
.
max
(
numpy
.
array
(
b_axes
))))
a_order
=
(
tuple
(
x
for
x
in
tuple
(
xrange
(
a
.
ndim
))
if
x
not
in
a_axes
)
+
a_axes
)
a_order
=
(
tuple
(
x
for
x
in
tuple
(
xrange
(
a
.
ndim
))
if
x
not
in
a_axes
)
+
a_axes
)
b_order
=
(
b_axes
+
tuple
(
x
for
x
in
tuple
(
xrange
(
b
.
ndim
))
if
x
not
in
b_axes
))
...
...
@@ -5528,8 +5527,8 @@ class Choose(Op):
# dimensions for the output
l
=
[]
for
sh1
,
sh2
,
b1
in
zip
(
shapes
[
0
],
shapes
[
1
][
1
:],
node
.
inputs
[
0
]
.
broadcastable
):
shapes
[
1
][
1
:],
node
.
inputs
[
0
]
.
broadcastable
):
if
b1
:
l
.
append
(
sh2
)
else
:
...
...
@@ -5635,7 +5634,7 @@ class AllocEmpty(gof.Op):
out
[
0
]
=
numpy
.
empty
(
sh
,
dtype
=
self
.
dtype
)
def
c_code
(
self
,
node
,
name
,
inputs
,
out_
,
sub
):
dtype
=
"NPY_"
+
self
.
dtype
.
upper
()
dtype
=
"NPY_"
+
self
.
dtype
.
upper
()
out
,
=
out_
fail
=
sub
[
'fail'
]
shps
=
inputs
...
...
theano/tensor/blas.py
浏览文件 @
bd11e130
...
...
@@ -266,7 +266,7 @@ SOMEPATH/Canopy_64bit/User/lib/python2.7/site-packages/numpy/distutils/system_in
# Using "conda install mkl" will install both, as well as
# optimized versions of numpy and scipy.
try
:
import
mkl
import
mkl
#noqa
except
ImportError
as
e
:
_logger
.
info
(
'Conda mkl is not available:
%
s'
,
e
)
else
:
...
...
@@ -1599,11 +1599,11 @@ class GemmOptimizer(Optimizer):
)
did_something
=
True
nb_replacement
+=
1
except
InconsistencyError
as
e
:
except
InconsistencyError
:
# TODO: retry other applications of gemm (see comment
# in _gemm_from_node)
nb_inconsistency_replace
+=
1
except
ReplacementDidntRemovedError
as
e
:
except
ReplacementDidntRemovedError
:
nb_replacement_didn_t_remove
+=
1
self
.
warned
=
True
fgraph
.
remove_feature
(
u
)
...
...
theano/tensor/elemwise.py
浏览文件 @
bd11e130
...
...
@@ -11,7 +11,7 @@ from six import iteritems
from
six.moves
import
xrange
from
theano.gof
import
Apply
,
Op
,
OpenMPOp
from
theano
import
scalar
from
theano.scalar
import
Scalar
,
get_scalar_type
from
theano.scalar
import
get_scalar_type
from
theano.printing
import
pprint
from
theano.tensor.utils
import
hash_from_dict
from
theano.gradient
import
DisconnectedType
...
...
@@ -50,7 +50,7 @@ def TensorConstant(*inputs, **kwargs):
##################
#
## DimShuffle ##
#
#
DimShuffle
#
##################
class
DimShuffle
(
Op
):
...
...
@@ -139,8 +139,8 @@ class DimShuffle(Op):
raise
TypeError
(
"DimShuffle indices must be python ints."
)
if
j
>=
len
(
input_broadcastable
):
raise
ValueError
((
"new_order[
%
d] is
%
d, but the input "
"only has
%
d axes."
)
%
(
i
,
j
,
len
(
input_broadcastable
)))
"only has
%
d axes."
)
%
(
i
,
j
,
len
(
input_broadcastable
)))
if
j
in
new_order
[(
i
+
1
):]:
raise
ValueError
(
"The same input dimension may not appear "
"twice in the list of output dimensions"
,
...
...
@@ -207,7 +207,7 @@ class DimShuffle(Op):
ob
.
append
(
ib
[
value
])
output
=
TensorType
(
dtype
=
input
.
type
.
dtype
,
broadcastable
=
ob
)
.
make_variable
()
broadcastable
=
ob
)
.
make_variable
()
return
Apply
(
self
,
[
input
],
[
output
])
...
...
@@ -219,12 +219,11 @@ class DimShuffle(Op):
and
self
.
input_broadcastable
==
other
.
input_broadcastable
def
_rehash
(
self
):
self
.
_hashval
=
(
hash
(
type
(
self
)
.
__name__
)
^
hash
(
type
(
self
)
.
__module__
)
^
hash
(
self
.
inplace
)
^
hash
(
self
.
new_order
)
^
hash
(
self
.
input_broadcastable
))
self
.
_hashval
=
(
hash
(
type
(
self
)
.
__name__
)
^
hash
(
type
(
self
)
.
__module__
)
^
hash
(
self
.
inplace
)
^
hash
(
self
.
new_order
)
^
hash
(
self
.
input_broadcastable
))
def
__hash__
(
self
):
return
self
.
_hashval
...
...
@@ -232,7 +231,7 @@ class DimShuffle(Op):
def
__str__
(
self
):
if
self
.
inplace
:
return
"InplaceDimShuffle{
%
s}"
%
","
.
join
(
str
(
x
)
for
x
in
self
.
new_order
)
for
x
in
self
.
new_order
)
else
:
return
"DimShuffle{
%
s}"
%
","
.
join
(
str
(
x
)
for
x
in
self
.
new_order
)
...
...
@@ -286,7 +285,8 @@ class DimShuffle(Op):
nd_out
=
len
(
self
.
new_order
)
check_input_nd
=
[(
'if (PyArray_NDIM(
%(input)
s) != '
+
str
(
nd_in
)
+
')'
'{PyErr_SetString(PyExc_NotImplementedError, "input nd");
%(fail)
s;}'
)]
'{PyErr_SetString(PyExc_NotImplementedError, '
'"input nd");
%(fail)
s;}'
)]
clear_output
=
[
'if (
%(res)
s) {Py_XDECREF(
%(res)
s);}'
]
...
...
@@ -296,8 +296,10 @@ class DimShuffle(Op):
get_base
=
[
'{ PyArrayObject *
%(basename)
s =
%(input)
s'
,
'Py_INCREF((PyObject*)
%(basename)
s)'
]
else
:
get_base
=
[(
'{ PyArrayObject *
%(basename)
s = (PyArrayObject*)PyArray_FromAny((PyObject*)
%(input)
s, NULL,'
'0, 0, NPY_ARRAY_ALIGNED|NPY_ARRAY_ENSURECOPY, NULL)'
)]
get_base
=
[(
'{ PyArrayObject *
%(basename)
s = '
'(PyArrayObject*)PyArray_FromAny((PyObject*)
%(input)
s,'
' NULL, 0, 0, NPY_ARRAY_ALIGNED|NPY_ARRAY_ENSURECOPY,'
' NULL)'
)]
shape_statements
=
[
'npy_intp dimensions[
%
i]'
%
nd_out
]
for
i
,
o
in
enumerate
(
self
.
new_order
):
...
...
@@ -312,9 +314,12 @@ class DimShuffle(Op):
# set the strides of the non-broadcasted dimensions
for
i
,
o
in
enumerate
(
self
.
new_order
):
if
o
!=
'x'
:
strides_statements
+=
[(
'strides['
+
str
(
i
)
+
'] = PyArray_DIMS(
%(basename)
s)['
+
str
(
o
)
+
'] == 1? 0 : PyArray_STRIDES(
%(basename)
s)['
+
str
(
o
)
+
']'
)]
strides_statements
+=
[(
'strides['
+
str
(
i
)
+
'] = PyArray_DIMS(
%(basename)
s)['
+
str
(
o
)
+
'] == 1? 0 : '
'PyArray_STRIDES(
%(basename)
s)['
+
str
(
o
)
+
']'
)]
else
:
strides_statements
+=
[(
'strides['
+
str
(
i
)
+
'] = 0'
)]
...
...
@@ -360,12 +365,12 @@ PyArray_SetBaseObject(%(res)s, (PyObject*)%(basename)s);
"""
'}'
]
full_code
=
statements
(
check_input_nd
+
clear_output
+
get_base
+
shape_statements
+
strides_statements
+
close_bracket
)
full_code
=
statements
(
check_input_nd
+
clear_output
+
get_base
+
shape_statements
+
strides_statements
+
close_bracket
)
if
0
:
print
(
'C_CODE'
)
...
...
@@ -408,7 +413,7 @@ PyArray_SetBaseObject(%(res)s, (PyObject*)%(basename)s);
class
DimShufflePrinter
:
def
__p
(
self
,
new_order
,
pstate
,
r
):
if
new_order
!=
()
and
new_order
[
0
]
==
'x'
:
if
new_order
!=
()
and
new_order
[
0
]
==
'x'
:
return
"
%
s"
%
self
.
__p
(
new_order
[
1
:],
pstate
,
r
)
# return "[%s]" % self.__p(new_order[1:], pstate, r)
if
list
(
new_order
)
==
list
(
range
(
r
.
type
.
ndim
)):
...
...
@@ -416,7 +421,7 @@ class DimShufflePrinter:
if
list
(
new_order
)
==
list
(
reversed
(
range
(
r
.
type
.
ndim
))):
return
"
%
s.T"
%
pstate
.
pprinter
.
process
(
r
)
return
"DimShuffle{
%
s}(
%
s)"
%
(
", "
.
join
(
map
(
str
,
new_order
)),
pstate
.
pprinter
.
process
(
r
))
pstate
.
pprinter
.
process
(
r
))
def
process
(
self
,
r
,
pstate
):
if
r
.
owner
is
None
:
...
...
@@ -428,11 +433,11 @@ class DimShufflePrinter:
raise
TypeError
(
"Can only print DimShuffle."
)
pprint
.
assign
(
lambda
pstate
,
r
:
r
.
owner
and
isinstance
(
r
.
owner
.
op
,
DimShuffle
),
DimShufflePrinter
())
DimShufflePrinter
())
################
#
## Elemwise ##
#
#
Elemwise
#
################
class
Elemwise
(
OpenMPOp
):
...
...
@@ -496,7 +501,7 @@ class Elemwise(OpenMPOp):
self
.
nfunc
=
getattr
(
numpy
,
nfunc_spec
[
0
])
elif
scalar_op
.
nin
>
0
:
self
.
ufunc
=
numpy
.
frompyfunc
(
scalar_op
.
impl
,
scalar_op
.
nin
,
scalar_op
.
nout
)
scalar_op
.
nout
)
# precompute the hash of this node
self
.
_rehash
()
...
...
@@ -518,7 +523,8 @@ class Elemwise(OpenMPOp):
self
.
nfunc
=
getattr
(
numpy
,
self
.
nfunc_spec
[
0
])
elif
self
.
scalar_op
.
nin
>
0
:
self
.
ufunc
=
numpy
.
frompyfunc
(
self
.
scalar_op
.
impl
,
self
.
scalar_op
.
nin
,
self
.
scalar_op
.
nout
)
self
.
scalar_op
.
nin
,
self
.
scalar_op
.
nout
)
self
.
_rehash
()
def
make_node
(
self
,
*
inputs
):
...
...
@@ -557,15 +563,16 @@ class Elemwise(OpenMPOp):
# it is multiplied by nout because Elemwise supports multiple outputs
# (nout of them)
out_broadcastables
=
[[
all
(
bcast
)
for
bcast
in
izip
(
*
[
input
.
type
.
broadcastable
for
input
in
inputs
])]]
*
shadow
.
nout
for
bcast
in
izip
(
*
[
input
.
type
.
broadcastable
for
input
in
inputs
])]]
*
shadow
.
nout
# inplace_pattern maps output idx -> input idx
inplace_pattern
=
self
.
inplace_pattern
if
inplace_pattern
:
for
overwriter
,
overwritten
in
iteritems
(
inplace_pattern
):
for
ob
,
ib
in
izip
(
out_broadcastables
[
overwriter
],
inputs
[
overwritten
]
.
type
.
broadcastable
):
inputs
[
overwritten
]
.
type
.
broadcastable
):
if
ib
and
not
ob
:
raise
ValueError
(
"Operation cannot be done inplace on an input "
...
...
@@ -579,8 +586,8 @@ class Elemwise(OpenMPOp):
([
i
.
type
.
dtype
for
i
in
inputs
],
out_dtypes
,
inplace_pattern
)))
outputs
=
[
TensorType
(
dtype
=
dtype
,
broadcastable
=
broadcastable
)()
for
dtype
,
broadcastable
in
izip
(
out_dtypes
,
out_broadcastables
)
]
for
dtype
,
broadcastable
in
izip
(
out_dtypes
,
out_broadcastables
)
]
return
Apply
(
self
,
inputs
,
outputs
)
def
__eq__
(
self
,
other
):
...
...
@@ -589,8 +596,8 @@ class Elemwise(OpenMPOp):
other_items
=
list
(
other
.
inplace_pattern
.
items
())
items
.
sort
()
other_items
.
sort
()
rval
=
((
self
.
scalar_op
==
other
.
scalar_op
)
and
(
items
==
other_items
))
rval
=
((
self
.
scalar_op
==
other
.
scalar_op
)
and
(
items
==
other_items
))
return
rval
return
False
...
...
@@ -628,7 +635,7 @@ class Elemwise(OpenMPOp):
rop_out
=
None
for
jdx
,
(
inp
,
eval_point
)
in
enumerate
(
izip
(
inputs
,
eval_points
)):
eval_points
)):
# if None, then we can just ignore this branch ..
# what we do is to assume that for any non-differentiable
# branch, the gradient is actually 0, which I think is not
...
...
@@ -668,7 +675,7 @@ class Elemwise(OpenMPOp):
# to the gradient.grad method when the outputs have
# some integer and some floating point outputs
if
False
in
[
str
(
out
.
type
.
dtype
)
.
find
(
'int'
)
==
-
1
for
out
in
outs
]:
for
out
in
outs
]:
# For integer output, return value may
# only be zero or undefined
# We don't bother with trying to check
...
...
@@ -699,7 +706,7 @@ class Elemwise(OpenMPOp):
# we can sum over them
# todo: only count dimensions that were effectively broadcasted
to_sum
=
[
j
for
j
,
bcast
in
enumerate
(
ipt
.
type
.
broadcastable
)
if
bcast
]
if
bcast
]
if
to_sum
:
shuffle
=
[]
...
...
@@ -714,7 +721,7 @@ class Elemwise(OpenMPOp):
# close for
sr
=
Sum
(
axis
=
to_sum
)(
rval
[
i
])
sr
=
sr
.
dimshuffle
(
shuffle
)
#sr = DimShuffle(sr.type.broadcastable, shuffle)(sr)
#
sr = DimShuffle(sr.type.broadcastable, shuffle)(sr)
rval
[
i
]
=
sr
# close if
# close for
...
...
@@ -747,7 +754,7 @@ class Elemwise(OpenMPOp):
if
not
isinstance
(
scalar_igrads
,
(
list
,
tuple
)):
raise
TypeError
(
'
%
s.grad returned
%
s instead of list or tuple'
%
(
str
(
self
.
scalar_op
),
str
(
type
(
scalar_igrads
))))
(
str
(
self
.
scalar_op
),
str
(
type
(
scalar_igrads
))))
nd
=
len
(
inputs
[
0
]
.
type
.
broadcastable
)
# this is the same for everyone
...
...
@@ -787,9 +794,8 @@ class Elemwise(OpenMPOp):
# should be disabled.
super
(
Elemwise
,
self
)
.
perform
(
node
,
inputs
,
output_storage
)
maxsize
=
max
(
len
(
input
.
shape
)
for
input
in
inputs
)
for
dims
in
izip
(
*
[
list
(
zip
(
input
.
shape
,
sinput
.
type
.
broadcastable
))
for
input
,
sinput
in
zip
(
inputs
,
node
.
inputs
)]):
for
input
,
sinput
in
zip
(
inputs
,
node
.
inputs
)]):
if
max
(
d
for
d
,
b
in
dims
)
!=
1
and
(
1
,
False
)
in
dims
:
# yes there may be more compact ways to write this code,
# but please maintain python 2.4 compatibility
...
...
@@ -1115,7 +1121,7 @@ class Elemwise(OpenMPOp):
# use it! The scalar_op need to check the broadcast flag himself.
if
(
all
([
o
.
ndim
>=
1
for
o
in
node
.
outputs
])
and
# Don't use the contig code for broadcasted scalar.
not
all
(
node
.
outputs
[
0
]
.
broadcastable
)):
not
all
(
node
.
outputs
[
0
]
.
broadcastable
)):
contig
=
None
try
:
contig
=
self
.
scalar_op
.
c_code_contiguous
(
...
...
@@ -1192,19 +1198,20 @@ class Elemwise(OpenMPOp):
return
self
.
scalar_op
.
c_support_code
()
def
c_support_code_apply
(
self
,
node
,
nodename
):
support_code
=
self
.
scalar_op
.
c_support_code_apply
(
node
,
nodename
+
'_scalar_'
)
support_code
=
self
.
scalar_op
.
c_support_code_apply
(
node
,
nodename
+
'_scalar_'
)
return
support_code
def
c_code_cache_version_apply
(
self
,
node
):
version
=
[
12
]
# the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend...
scalar_node
=
Apply
(
self
.
scalar_op
,
[
get_scalar_type
(
dtype
=
input
.
type
.
dtype
)
.
make_variable
()
for
input
in
node
.
inputs
],
[
get_scalar_type
(
dtype
=
output
.
type
.
dtype
)
.
make_variable
()
for
output
in
node
.
outputs
])
scalar_node
=
Apply
(
self
.
scalar_op
,
[
get_scalar_type
(
dtype
=
input
.
type
.
dtype
)
.
make_variable
()
for
input
in
node
.
inputs
],
[
get_scalar_type
(
dtype
=
output
.
type
.
dtype
)
.
make_variable
()
for
output
in
node
.
outputs
])
version
.
append
(
self
.
scalar_op
.
c_code_cache_version_apply
(
scalar_node
))
for
i
in
node
.
inputs
+
node
.
outputs
:
version
.
append
(
get_scalar_type
(
dtype
=
i
.
type
.
dtype
)
.
c_code_cache_version
())
...
...
@@ -1233,7 +1240,7 @@ class Elemwise(OpenMPOp):
################
#
## CAReduce ##
#
#
CAReduce
#
################
class
CAReduce
(
Op
):
...
...
@@ -1325,8 +1332,8 @@ class CAReduce(Op):
if
self
.
axis
is
not
None
:
for
axis
in
self
.
axis
:
if
(
axis
>=
input
.
type
.
ndim
or
(
axis
<
0
and
abs
(
axis
)
>
input
.
type
.
ndim
)):
if
(
axis
>=
input
.
type
.
ndim
or
(
axis
<
0
and
abs
(
axis
)
>
input
.
type
.
ndim
)):
raise
ValueError
((
'Not enough dimensions on
%
s to reduce on axis
%
s'
%
(
input
,
axis
)))
...
...
@@ -1366,9 +1373,9 @@ class CAReduce(Op):
self
.
set_ufunc
(
self
.
scalar_op
)
def
__eq__
(
self
,
other
):
return
(
type
(
self
)
==
type
(
other
)
and
self
.
scalar_op
==
other
.
scalar_op
and
self
.
axis
==
other
.
axis
)
return
(
type
(
self
)
==
type
(
other
)
and
self
.
scalar_op
==
other
.
scalar_op
and
self
.
axis
==
other
.
axis
)
def
__hash__
(
self
):
if
self
.
axis
is
None
:
...
...
@@ -1420,13 +1427,13 @@ class CAReduce(Op):
# was built with "frompyfunc". We need to find out if we
# are in one of these cases (only "object" is supported in
# the output).
if
((
self
.
ufunc
.
ntypes
==
1
)
and
(
self
.
ufunc
.
types
[
0
][
-
1
]
==
'O'
)):
if
((
self
.
ufunc
.
ntypes
==
1
)
and
(
self
.
ufunc
.
types
[
0
][
-
1
]
==
'O'
)):
variable
=
self
.
ufunc
.
reduce
(
variable
,
dimension
,
dtype
=
'object'
)
dtype
=
'object'
)
else
:
variable
=
self
.
ufunc
.
reduce
(
variable
,
dimension
,
dtype
=
acc_dtype
)
dtype
=
acc_dtype
)
variable
=
numpy
.
asarray
(
variable
)
if
numpy
.
may_share_memory
(
variable
,
input
):
...
...
@@ -1434,7 +1441,7 @@ class CAReduce(Op):
# We don't want this.
variable
=
variable
.
copy
()
output
[
0
]
=
theano
.
_asarray
(
variable
,
dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
)
dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
)
else
:
# Force a copy
output
[
0
]
=
numpy
.
array
(
variable
,
copy
=
True
,
...
...
@@ -1568,27 +1575,25 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
"""
%
locals
()
else
:
raise
TypeError
(
"The CAReduce.scalar_op must have an identity field."
)
"The CAReduce.scalar_op must have an identity field."
)
task0_decl
=
(
"
%(dtype)
s&
%(name)
s_i = *
%(name)
s_iter;
\n
"
"
%(name)
s_i =
%(identity)
s;"
%
dict
(
dtype
=
adtype
,
name
=
aname
,
identity
=
identity
))
task0_decl
=
(
"
%(dtype)
s&
%(name)
s_i = *
%(name)
s_iter;
\n
"
"
%(name)
s_i =
%(identity)
s;"
%
dict
(
dtype
=
adtype
,
name
=
aname
,
identity
=
identity
))
task1_decl
=
(
"
%(dtype)
s&
%(name)
s_i = *
%(name)
s_iter;
\n
"
%
dict
(
dtype
=
idtype
,
name
=
inames
[
0
]))
%
dict
(
dtype
=
idtype
,
name
=
inames
[
0
]))
task1_code
=
self
.
scalar_op
.
c_code
(
Apply
(
self
.
scalar_op
,
[
get_scalar_type
(
dtype
=
input
.
type
.
dtype
)
.
make_variable
()
for
input
in
(
node
.
inputs
*
2
)],
[
get_scalar_type
(
dtype
=
output
.
type
.
dtype
)
.
make_variable
()
for
input
in
node
.
outputs
]),
None
,
[
"
%
s_i"
%
aname
,
"
%
s_i"
%
inames
[
0
]],
[
"
%
s_i"
%
aname
],
sub
)
Apply
(
self
.
scalar_op
,
[
get_scalar_type
(
dtype
=
input
.
type
.
dtype
)
.
make_variable
()
for
input
in
(
node
.
inputs
*
2
)],
[
get_scalar_type
(
dtype
=
output
.
type
.
dtype
)
.
make_variable
()
for
input
in
node
.
outputs
]),
None
,
[
"
%
s_i"
%
aname
,
"
%
s_i"
%
inames
[
0
]],
[
"
%
s_i"
%
aname
],
sub
)
code1
=
"""
{
%(task1_decl)
s
...
...
@@ -1600,11 +1605,10 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
if
len
(
axis
)
==
1
:
all_code
=
[(
""
,
""
)]
*
nnested
+
[(
task0_decl
,
code1
),
""
]
else
:
all_code
=
(
[(
""
,
""
)]
*
nnested
+
[(
task0_decl
,
""
)]
+
[(
""
,
""
)]
*
(
len
(
axis
)
-
2
)
+
[(
""
,
code1
),
""
])
all_code
=
([(
""
,
""
)]
*
nnested
+
[(
task0_decl
,
""
)]
+
[(
""
,
""
)]
*
(
len
(
axis
)
-
2
)
+
[(
""
,
code1
),
""
])
else
:
all_code
=
[
task0_decl
+
code1
]
loop
=
cgen
.
make_loop_careduce
(
...
...
@@ -1632,11 +1636,12 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
version
=
[
5
]
# the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend...
scalar_node
=
Apply
(
self
.
scalar_op
,
[
get_scalar_type
(
dtype
=
input
.
type
.
dtype
)
.
make_variable
()
for
input
in
node
.
inputs
],
[
get_scalar_type
(
dtype
=
output
.
type
.
dtype
)
.
make_variable
()
for
output
in
node
.
outputs
])
scalar_node
=
Apply
(
self
.
scalar_op
,
[
get_scalar_type
(
dtype
=
input
.
type
.
dtype
)
.
make_variable
()
for
input
in
node
.
inputs
],
[
get_scalar_type
(
dtype
=
output
.
type
.
dtype
)
.
make_variable
()
for
output
in
node
.
outputs
])
version
.
append
(
self
.
scalar_op
.
c_code_cache_version_apply
(
scalar_node
))
for
i
in
node
.
inputs
+
node
.
outputs
:
version
.
append
(
get_scalar_type
(
dtype
=
i
.
type
.
dtype
)
.
c_code_cache_version
())
...
...
@@ -1760,9 +1765,9 @@ class CAReduceDtype(CAReduce):
self
.
acc_dtype
=
acc_dtype
def
__eq__
(
self
,
other
):
return
(
CAReduce
.
__eq__
(
self
,
other
)
and
self
.
dtype
==
other
.
dtype
and
self
.
acc_dtype
==
other
.
acc_dtype
)
return
(
CAReduce
.
__eq__
(
self
,
other
)
and
self
.
dtype
==
other
.
dtype
and
self
.
acc_dtype
==
other
.
acc_dtype
)
def
__hash__
(
self
):
return
CAReduce
.
__hash__
(
self
)
^
hash
((
self
.
dtype
,
self
.
acc_dtype
))
...
...
@@ -1968,8 +1973,8 @@ class Prod(CAReduceDtype):
self
.
no_zeros_in_input
=
False
def
__eq__
(
self
,
other
):
return
(
CAReduceDtype
.
__eq__
(
self
,
other
)
and
self
.
no_zeros_in_input
==
other
.
no_zeros_in_input
)
return
(
CAReduceDtype
.
__eq__
(
self
,
other
)
and
self
.
no_zeros_in_input
==
other
.
no_zeros_in_input
)
def
__hash__
(
self
):
return
(
CAReduceDtype
.
__hash__
(
self
)
^
...
...
@@ -2124,25 +2129,26 @@ class MulWithoutZeros(scalar.BinaryScalarOp):
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
x
,
y
=
inp
z
,
=
out
return
((
"
%(z)
s = ((
%(x)
s == 0) ? (
%(y)
s) : "
+
"((
%(y)
s == 0) ? (
%(x)
s) : ((
%(y)
s)*(
%(x)
s))) );"
)
return
((
"
%(z)
s = ((
%(x)
s == 0) ? (
%(y)
s) : "
+
"((
%(y)
s == 0) ? (
%(x)
s) : ((
%(y)
s)*(
%(x)
s))) );"
)
%
locals
())
def
c_code_cache_version
(
self
):
return
(
1
,)
mul_without_zeros
=
MulWithoutZeros
(
scalar
.
upcast_out
,
name
=
'mul_without_zeros'
)
mul_without_zeros
=
MulWithoutZeros
(
scalar
.
upcast_out
,
name
=
'mul_without_zeros'
)
class
ProdWithoutZeros
(
CAReduceDtype
):
def
__init__
(
self
,
axis
=
None
,
dtype
=
None
,
acc_dtype
=
None
):
CAReduceDtype
.
__init__
(
self
,
mul_without_zeros
,
axis
=
axis
,
dtype
=
dtype
,
acc_dtype
=
acc_dtype
)
def
grad
(
self
,
inp
,
grads
):
a
,
=
inp
a_grad
=
theano
.
gradient
.
grad_not_implemented
(
self
,
0
,
a
,
"2nd derivatives of `product(a)` is not currently supported."
"If `a` is guarenteed to contains no zeros, use `product(a, no_zeros_in_input=True)`."
)
a_grad
=
theano
.
gradient
.
grad_not_implemented
(
self
,
0
,
a
,
"2nd derivatives of `product(a)` is not currently supported."
"If `a` is guarenteed to contains no zeros, use "
"`product(a, no_zeros_in_input=True)`."
)
return
[
a_grad
]
theano/tensor/inplace.py
浏览文件 @
bd11e130
...
...
@@ -28,7 +28,6 @@ def _scal_inplace(symbol):
def
chk
(
pstate
,
r
):
if
not
r
.
owner
:
return
False
op
=
r
.
owner
.
op
return
r
.
owner
.
op
==
rval
pprint
.
assign
(
chk
,
printing
.
FunctionPrinter
(
symbolname
.
replace
(
'_inplace'
,
'='
)))
...
...
theano/tensor/opt.py
浏览文件 @
bd11e130
...
...
@@ -6,8 +6,6 @@ from __future__ import print_function
# TODO: 0*x -> 0
import
logging
_logger
=
logging
.
getLogger
(
'theano.tensor.opt'
)
import
itertools
import
operator
import
sys
...
...
@@ -34,12 +32,10 @@ from theano.tensor.subtensor import (get_idx_list, get_canonical_form_slice,
Subtensor
,
IncSubtensor
,
make_constant
,
AdvancedIncSubtensor1
,
AdvancedIncSubtensor
,
AdvancedSubtensor
,
AdvancedSubtensor1
,
advanced_subtensor
,
advanced_subtensor1
,
advanced_inc_subtensor1
,
inc_subtensor
)
advanced_inc_subtensor1
)
from
theano
import
scalar
from
theano.scalar
import
basic
from
theano.tensor
import
basic
as
T
...
...
@@ -56,6 +52,8 @@ from theano.gof import toolbox
from
theano.tensor.basic
import
get_scalar_constant_value
,
ShapeError
,
NotScalarConstantError
from
six
import
StringIO
_logger
=
logging
.
getLogger
(
'theano.tensor.opt'
)
theano
.
configparser
.
AddConfigVar
(
'on_shape_error'
,
"warn: print a warning and use the default"
" value. raise: raise an error"
,
...
...
@@ -165,23 +163,24 @@ def broadcast_like(value, template, fgraph, dtype=None):
# the template may have 1s in its shape without being broadcastable
if
rval
.
broadcastable
!=
template
.
broadcastable
:
rval
=
T
.
unbroadcast
(
rval
,
*
[
i
for
i
in
xrange
(
rval
.
ndim
)
if
rval
.
broadcastable
[
i
]
and
not
template
.
broadcastable
[
i
]])
if
rval
.
broadcastable
[
i
]
and
not
template
.
broadcastable
[
i
]])
assert
rval
.
type
.
dtype
==
dtype
if
rval
.
type
.
broadcastable
!=
template
.
broadcastable
:
raise
AssertionError
(
"rval.type.broadcastable is "
+
str
(
rval
.
type
.
broadcastable
)
+
" but template.broadcastable is"
+
str
(
template
.
broadcastable
))
str
(
rval
.
type
.
broadcastable
)
+
" but template.broadcastable is"
+
str
(
template
.
broadcastable
))
return
rval
theano
.
configparser
.
AddConfigVar
(
'tensor.insert_inplace_optimizer_validate_nb'
,
"-1: auto, if graph have less then 500 nodes 1, else 10"
,
theano
.
configparser
.
IntParam
(
-
1
),
in_c_key
=
False
)
theano
.
configparser
.
AddConfigVar
(
'tensor.insert_inplace_optimizer_validate_nb'
,
"-1: auto, if graph have less then 500 nodes 1, else 10"
,
theano
.
configparser
.
IntParam
(
-
1
),
in_c_key
=
False
)
def
inplace_elemwise_optimizer_op
(
OP
):
...
...
@@ -251,11 +250,10 @@ def inplace_elemwise_optimizer_op(OP):
# target.
# Remove here as faster.
candidate_inputs
=
[
i
for
i
in
xrange
(
len
(
node
.
inputs
))
if
i
not
in
baseline
.
values
()
\
and
not
isinstance
(
node
.
inputs
[
i
],
Constant
)
\
and
not
fgraph
.
destroyers
(
node
.
inputs
[
i
])
\
and
node
.
inputs
[
i
]
not
in
protected_inputs
]
if
i
not
in
baseline
.
values
()
and
not
isinstance
(
node
.
inputs
[
i
],
Constant
)
and
not
fgraph
.
destroyers
(
node
.
inputs
[
i
])
and
node
.
inputs
[
i
]
not
in
protected_inputs
]
verbose
=
False
...
...
@@ -265,7 +263,7 @@ def inplace_elemwise_optimizer_op(OP):
for
candidate_input
in
candidate_inputs
:
# remove inputs that don't have the same dtype as the output
if
node
.
inputs
[
candidate_input
]
.
type
!=
node
.
outputs
[
candidate_output
]
.
type
:
candidate_output
]
.
type
:
continue
inplace_pattern
=
dict
(
baseline
)
...
...
@@ -274,20 +272,20 @@ def inplace_elemwise_optimizer_op(OP):
if
hasattr
(
op
.
scalar_op
,
"make_new_inplace"
):
new_scal
=
op
.
scalar_op
.
make_new_inplace
(
scalar
.
transfer_type
(
*
[
inplace_pattern
.
get
(
i
,
None
)
\
for
i
in
xrange
(
len
(
node
.
outputs
))]))
*
[
inplace_pattern
.
get
(
i
,
None
)
for
i
in
xrange
(
len
(
node
.
outputs
))]))
else
:
new_scal
=
op
.
scalar_op
.
__class__
(
scalar
.
transfer_type
(
*
[
inplace_pattern
.
get
(
i
,
None
)
\
for
i
in
xrange
(
len
(
node
.
outputs
))]))
*
[
inplace_pattern
.
get
(
i
,
None
)
for
i
in
xrange
(
len
(
node
.
outputs
))]))
new_outputs
=
OP
(
new_scal
,
inplace_pattern
)(
*
node
.
inputs
,
**
dict
(
return_list
=
True
))
*
node
.
inputs
,
**
dict
(
return_list
=
True
))
new_node
=
new_outputs
[
0
]
.
owner
for
r
,
new_r
in
zip
(
node
.
outputs
,
new_outputs
):
fgraph
.
replace
(
r
,
new_r
,
reason
=
"inplace_elemwise_optimizer"
)
reason
=
"inplace_elemwise_optimizer"
)
nb_change_no_validate
+=
1
if
nb_change_no_validate
>=
check_each_change
:
fgraph
.
validate
()
...
...
@@ -295,9 +293,9 @@ def inplace_elemwise_optimizer_op(OP):
nb_change_no_validate
=
0
except
(
ValueError
,
TypeError
,
InconsistencyError
)
as
e
:
if
check_each_change
!=
1
and
not
raised_warning
:
print
((
"Some inplace optimization was not "
"performed due to unexpected error:"
),
file
=
sys
.
stderr
)
print
((
"Some inplace optimization was not "
"performed due to unexpected error:"
),
file
=
sys
.
stderr
)
print
(
e
,
file
=
sys
.
stderr
)
raised_warning
=
True
fgraph
.
revert
(
chk
)
...
...
@@ -313,7 +311,8 @@ def inplace_elemwise_optimizer_op(OP):
except
Exception
:
if
not
raised_warning
:
print
((
"Some inplace optimization was not "
"performed due to unexpected error"
),
file
=
sys
.
stderr
)
"performed due to unexpected error"
),
file
=
sys
.
stderr
)
fgraph
.
revert
(
chk
)
return
inplace_elemwise_optimizer
...
...
@@ -381,8 +380,8 @@ def register_specialize_device(lopt, *tags, **kwargs):
# Register merge_optimizer as a global opt during canonicalize
compile
.
optdb
[
'canonicalize'
]
.
register
(
'canon_merge'
,
merge_optimizer
,
'fast_run'
,
final_opt
=
True
)
compile
.
optdb
[
'canonicalize'
]
.
register
(
'canon_merge'
,
merge_optimizer
,
'fast_run'
,
final_opt
=
True
)
#####################
...
...
@@ -512,11 +511,10 @@ def local_lift_transpose_through_dot(node):
inplace. The newly-introduced transpositions are not inplace, this will
be taken care of in a later optimization phase.
"""
if
not
(
isinstance
(
node
.
op
,
T
.
DimShuffle
)
and
node
.
op
.
new_order
==
(
1
,
0
)):
if
not
(
isinstance
(
node
.
op
,
T
.
DimShuffle
)
and
node
.
op
.
new_order
==
(
1
,
0
)):
return
False
if
not
(
node
.
inputs
[
0
]
.
owner
and
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
T
.
Dot
)):
if
not
(
node
.
inputs
[
0
]
.
owner
and
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
T
.
Dot
)):
return
False
x
,
y
=
node
.
inputs
[
0
]
.
owner
.
inputs
...
...
@@ -601,22 +599,19 @@ class MakeVector(T.Op):
def
make_node
(
self
,
*
inputs
):
inputs
=
list
(
map
(
T
.
as_tensor_variable
,
inputs
))
if
not
all
(
a
.
type
==
inputs
[
0
]
.
type
for
a
in
inputs
)
or
(
len
(
inputs
)
>
0
and
inputs
[
0
]
.
dtype
!=
self
.
dtype
):
dtype
=
theano
.
scalar
.
upcast
(
self
.
dtype
,
*
[
i
.
dtype
for
i
in
inputs
])
if
(
not
all
(
a
.
type
==
inputs
[
0
]
.
type
for
a
in
inputs
)
or
(
len
(
inputs
)
>
0
and
inputs
[
0
]
.
dtype
!=
self
.
dtype
)):
dtype
=
theano
.
scalar
.
upcast
(
self
.
dtype
,
*
[
i
.
dtype
for
i
in
inputs
])
# upcast the input to the determined dtype,
# but don't downcast anything
assert
dtype
==
self
.
dtype
,
(
"The upcast of the inputs to MakeVector should match the "
"dtype given in __init__."
)
"The upcast of the inputs to MakeVector should match the "
"dtype given in __init__."
)
if
not
all
(
self
.
dtype
==
T
.
cast
(
i
,
dtype
=
dtype
)
.
dtype
for
i
in
inputs
):
raise
TypeError
(
"MakeVector.make_node expected inputs"
" upcastable to
%
s. got
%
s"
%
(
self
.
dtype
,
str
([
i
.
dtype
for
i
in
inputs
])
))
" upcastable to
%
s. got
%
s"
%
(
self
.
dtype
,
str
([
i
.
dtype
for
i
in
inputs
])))
inputs
=
[
T
.
cast
(
i
,
dtype
=
dtype
)
for
i
in
inputs
]
assert
all
(
self
.
dtype
==
a
.
dtype
for
a
in
inputs
)
assert
all
(
a
.
ndim
==
0
for
a
in
inputs
)
...
...
@@ -625,11 +620,9 @@ class MakeVector(T.Op):
dtype
=
inputs
[
0
]
.
type
.
dtype
else
:
dtype
=
self
.
dtype
#bcastable = (len(inputs) == 1)
#
bcastable = (len(inputs) == 1)
bcastable
=
False
otype
=
T
.
TensorType
(
broadcastable
=
(
bcastable
,),
dtype
=
dtype
)
otype
=
T
.
TensorType
(
broadcastable
=
(
bcastable
,),
dtype
=
dtype
)
return
T
.
Apply
(
self
,
inputs
,
[
otype
()])
def
__str__
(
self
):
...
...
@@ -700,13 +693,14 @@ class MakeVectorPrinter:
if
r
.
owner
is
None
:
raise
TypeError
(
"Can only print make_vector."
)
elif
isinstance
(
r
.
owner
.
op
,
MakeVector
):
return
"[
%
s]"
%
", "
.
join
(
pstate
.
pprinter
.
process
(
input
,
pstate
.
clone
(
precedence
=
1000
))
for
input
in
r
.
owner
.
inputs
)
return
"[
%
s]"
%
", "
.
join
(
pstate
.
pprinter
.
process
(
input
,
pstate
.
clone
(
precedence
=
1000
))
for
input
in
r
.
owner
.
inputs
)
else
:
raise
TypeError
(
"Can only print make_vector."
)
T
.
pprint
.
assign
(
lambda
pstate
,
r
:
r
.
owner
and
isinstance
(
r
.
owner
.
op
,
MakeVector
),
MakeVectorPrinter
())
T
.
pprint
.
assign
(
lambda
pstate
,
r
:
r
.
owner
and
isinstance
(
r
.
owner
.
op
,
MakeVector
),
MakeVectorPrinter
())
class
ShapeFeature
(
object
):
...
...
@@ -843,8 +837,8 @@ class ShapeFeature(object):
# by always returning the same object to represent 1
return
self
.
lscalar_one
if
(
type
(
s_i
)
in
integer_types
or
isinstance
(
s_i
,
numpy
.
integer
)
or
(
isinstance
(
s_i
,
numpy
.
ndarray
)
and
s_i
.
ndim
==
0
)):
isinstance
(
s_i
,
numpy
.
integer
)
or
(
isinstance
(
s_i
,
numpy
.
ndarray
)
and
s_i
.
ndim
==
0
)):
# this shape is a constant
assert
s_i
>=
0
return
T
.
constant
(
s_i
,
dtype
=
'int64'
)
...
...
@@ -859,9 +853,9 @@ class ShapeFeature(object):
# s_i is x.shape[i], we change it to Shape_i.
if
(
s_i
.
owner
and
isinstance
(
s_i
.
owner
.
op
,
Subtensor
)
and
s_i
.
owner
.
inputs
[
0
]
.
owner
and
isinstance
(
s_i
.
owner
.
inputs
[
0
]
.
owner
.
op
,
T
.
Shape
)):
isinstance
(
s_i
.
owner
.
op
,
Subtensor
)
and
s_i
.
owner
.
inputs
[
0
]
.
owner
and
isinstance
(
s_i
.
owner
.
inputs
[
0
]
.
owner
.
op
,
T
.
Shape
)):
assert
s_i
.
ndim
==
0
assert
len
(
s_i
.
owner
.
op
.
idx_list
)
==
1
...
...
@@ -883,7 +877,7 @@ class ShapeFeature(object):
return
s_i
else
:
raise
TypeError
(
'Unsupported shape element'
,
s_i
,
type
(
s_i
),
getattr
(
s_i
,
'type'
,
None
))
s_i
,
type
(
s_i
),
getattr
(
s_i
,
'type'
,
None
))
def
set_shape
(
self
,
r
,
s
):
"""Assign the shape `s` to previously un-shaped variable `r`.
...
...
@@ -910,7 +904,7 @@ class ShapeFeature(object):
shape_vars
=
[]
for
i
in
xrange
(
r
.
ndim
):
if
(
hasattr
(
r
.
type
,
'broadcastable'
)
and
r
.
type
.
broadcastable
[
i
]):
r
.
type
.
broadcastable
[
i
]):
shape_vars
.
append
(
self
.
lscalar_one
)
else
:
shape_vars
.
append
(
self
.
unpack
(
s
[
i
]))
...
...
@@ -947,8 +941,8 @@ class ShapeFeature(object):
self
.
set_shape
(
r
,
other_shape
)
return
if
(
other_r
.
owner
and
r
.
owner
and
other_r
.
owner
.
inputs
==
r
.
owner
.
inputs
and
other_r
.
owner
.
op
==
r
.
owner
.
op
):
other_r
.
owner
.
inputs
==
r
.
owner
.
inputs
and
other_r
.
owner
.
op
==
r
.
owner
.
op
):
# We are doing a merge. So the 2 shapes graph will be the
# same. This is only a speed optimization to call
# ancestors() less frequently.
...
...
@@ -957,10 +951,10 @@ class ShapeFeature(object):
# Merge other_shape with r_shape, giving the priority to other_shape
merged_shape
=
[]
for
i
,
ps
in
enumerate
(
other_shape
):
if
(
ps
.
owner
and
isinstance
(
getattr
(
ps
.
owner
,
'op'
,
None
),
Shape_i
)
and
ps
.
owner
.
op
.
i
==
i
and
ps
.
owner
.
inputs
[
0
]
in
(
r
,
other_r
)):
if
(
ps
.
owner
and
isinstance
(
getattr
(
ps
.
owner
,
'op'
,
None
),
Shape_i
)
and
ps
.
owner
.
op
.
i
==
i
and
ps
.
owner
.
inputs
[
0
]
in
(
r
,
other_r
)):
# If other_shape[i] is uninformative, use r_shape[i].
# For now, we consider 2 cases of uninformative other_shape[i]:
# - Shape_i(i)(other_r);
...
...
@@ -1084,11 +1078,11 @@ class ShapeFeature(object):
r
in
node
.
inputs
])
except
NotImplementedError
as
e
:
raise
NotImplementedError
(
'Code called by infer_shape failed raising a '
'NotImplementedError. Raising NotImplementedError to '
'indicate that a shape cannot be computed is no longer '
'supported, and one should now use tensor.ShapeError '
'instead. The original exception message is:
%
s'
%
e
)
'Code called by infer_shape failed raising a '
'NotImplementedError. Raising NotImplementedError to '
'indicate that a shape cannot be computed is no longer '
'supported, and one should now use tensor.ShapeError '
'instead. The original exception message is:
%
s'
%
e
)
except
Exception
as
e
:
msg
=
(
'Failed to infer_shape from Op
%
s.
\n
Input shapes: '
'
%
s
\n
Exception encountered during infer_shape: '
...
...
@@ -1108,10 +1102,10 @@ class ShapeFeature(object):
if
len
(
o_shapes
)
!=
len
(
node
.
outputs
):
raise
Exception
(
(
'The infer_shape method for the Op "
%
s" returned a list '
+
'with the wrong number of element: len(o_shapes) =
%
d '
+
' != len(node.outputs) =
%
d'
)
%
(
str
(
node
.
op
),
len
(
o_shapes
),
len
(
node
.
outputs
)))
'with the wrong number of element: len(o_shapes) =
%
d '
+
' != len(node.outputs) =
%
d'
)
%
(
str
(
node
.
op
),
len
(
o_shapes
),
len
(
node
.
outputs
)))
# Ensure shapes are in 'int64'. This is to make sure the assert
# found in the `local_useless_subtensor` optimization does not fail.
...
...
@@ -1173,9 +1167,9 @@ class ShapeFeature(object):
# with the InputToGpuOptimizer optimizer.
continue
if
(
repl
.
owner
and
repl
.
owner
.
inputs
[
0
]
is
shpnode
.
inputs
[
0
]
and
isinstance
(
repl
.
owner
.
op
,
Shape_i
)
and
repl
.
owner
.
op
.
i
==
shpnode
.
op
.
i
):
repl
.
owner
.
inputs
[
0
]
is
shpnode
.
inputs
[
0
]
and
isinstance
(
repl
.
owner
.
op
,
Shape_i
)
and
repl
.
owner
.
op
.
i
==
shpnode
.
op
.
i
):
# The replacement is a shape_i of the same
# input. So no need to do this equivalent
# replacement.
...
...
@@ -1239,7 +1233,7 @@ class ShapeFeature(object):
if
not
dx
.
owner
or
not
dy
.
owner
:
return
False
if
(
not
isinstance
(
dx
.
owner
.
op
,
Shape_i
)
or
not
isinstance
(
dy
.
owner
.
op
,
Shape_i
)):
not
isinstance
(
dy
.
owner
.
op
,
Shape_i
)):
return
False
opx
=
dx
.
owner
.
op
opy
=
dy
.
owner
.
op
...
...
@@ -1310,10 +1304,9 @@ def local_fill_to_alloc(node):
return
# TODO: cut out un-necessary dimshuffles of v
assert
rval
[
0
]
.
type
==
node
.
outputs
[
0
]
.
type
,
(
'rval'
,
rval
[
0
]
.
type
,
'orig'
,
node
.
outputs
[
0
]
.
type
,
'node'
,
node
,
)
# theano.printing.debugprint(node.outputs[0], file='str'))
assert
rval
[
0
]
.
type
==
node
.
outputs
[
0
]
.
type
,
(
'rval'
,
rval
[
0
]
.
type
,
'orig'
,
node
.
outputs
[
0
]
.
type
,
'node'
,
node
,)
# theano.printing.debugprint(node.outputs[0], file='str'))
return
rval
...
...
@@ -1404,7 +1397,7 @@ def local_subtensor_make_vector(node):
try
:
idx
,
=
node
.
op
.
idx_list
except
Exception
:
#'how can you have multiple indexes into a shape?'
#
'how can you have multiple indexes into a shape?'
raise
if
isinstance
(
idx
,
(
scalar
.
Scalar
,
T
.
TensorType
)):
...
...
@@ -1467,13 +1460,13 @@ def local_useless_elemwise(node):
if
isinstance
(
node
.
op
,
T
.
Elemwise
):
if
node
.
op
.
scalar_op
==
theano
.
scalar
.
eq
and
len
(
node
.
inputs
)
==
2
:
if
node
.
inputs
[
0
]
==
node
.
inputs
[
1
]:
# it is the same var in the graph. That will always be true
# it is the same var in the graph. That will always be true
return
[
T
.
fill
(
node
.
inputs
[
0
],
T
.
constant
(
1.0
,
dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
))]
elif
node
.
op
.
scalar_op
==
theano
.
scalar
.
neq
and
len
(
node
.
inputs
)
==
2
:
if
node
.
inputs
[
0
]
==
node
.
inputs
[
1
]:
# it is the same var in the graph. That will always be false
# it is the same var in the graph. That will always be false
return
[
T
.
fill
(
node
.
inputs
[
0
],
T
.
constant
(
0.0
,
dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
))]
...
...
@@ -1482,8 +1475,8 @@ def local_useless_elemwise(node):
elif
node
.
op
.
scalar_op
==
theano
.
scalar
.
add
and
len
(
node
.
inputs
)
==
1
:
return
[
node
.
inputs
[
0
]]
elif
(
node
.
op
.
scalar_op
==
theano
.
scalar
.
identity
and
len
(
node
.
inputs
)
==
1
):
elif
(
node
.
op
.
scalar_op
==
theano
.
scalar
.
identity
and
len
(
node
.
inputs
)
==
1
):
return
[
node
.
inputs
[
0
]]
...
...
@@ -1513,12 +1506,12 @@ def local_cast_cast(node):
and the first cast cause an upcast.
"""
if
(
not
isinstance
(
node
.
op
,
T
.
Elemwise
)
or
not
isinstance
(
node
.
op
.
scalar_op
,
scalar
.
Cast
)):
not
isinstance
(
node
.
op
.
scalar_op
,
scalar
.
Cast
)):
return
x
=
node
.
inputs
[
0
]
if
(
not
x
.
owner
or
not
isinstance
(
x
.
owner
.
op
,
T
.
Elemwise
)
or
not
isinstance
(
x
.
owner
.
op
.
scalar_op
,
scalar
.
Cast
)):
not
isinstance
(
x
.
owner
.
op
,
T
.
Elemwise
)
or
not
isinstance
(
x
.
owner
.
op
.
scalar_op
,
scalar
.
Cast
)):
return
if
node
.
op
.
scalar_op
.
o_type
==
x
.
owner
.
op
.
scalar_op
.
o_type
:
return
[
x
]
...
...
@@ -1738,7 +1731,7 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
# The broadcast pattern of the ouptut must match the broadcast
# pattern of at least one of the inputs.
if
not
any
([
i
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
for
i
in
node
.
inputs
]):
node
.
outputs
[
0
]
.
type
.
broadcastable
for
i
in
node
.
inputs
]):
return
False
def
dimshuffled_alloc
(
i
):
...
...
@@ -1749,10 +1742,8 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
# At least one input must have an owner that is either a AllocOP or a
# DimShuffleOP with an owner that is a AllocOP -- otherwise there is
# nothing to optimize.
if
not
any
([
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
))
for
i
in
node
.
inputs
]):
if
not
any
([
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
))
for
i
in
node
.
inputs
]):
return
False
# Search for input that we can use as a baseline for the dimensions.
...
...
@@ -1761,9 +1752,8 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
if
i
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
:
# Prefer an input that is not a AllocOP nor a DimShuffleOP of a
# AllocOP so that all allocs can be optimized.
if
not
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
))):
if
not
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
))):
assert_op_idx
=
idx
break
...
...
@@ -1773,8 +1763,8 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
# there is more than one then do all but one. number of
# inputs with alloc or dimshuffle alloc
l2
=
[
i
for
i
in
node
.
inputs
if
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
)))]
if
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
)))]
# If only 1 alloc or dimshuffle alloc, it is the one we
# will use for the shape. So no alloc would be removed.
if
len
(
l2
)
>
1
:
...
...
@@ -1794,14 +1784,13 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
same_shape
=
node
.
fgraph
.
shape_feature
.
same_shape
for
i
in
node
.
inputs
:
# Remove alloc
if
(
i
.
owner
and
isinstance
(
i
.
owner
.
op
,
AllocOP
)
and
i
.
owner
.
inputs
[
0
]
.
type
!=
i
.
owner
.
outputs
[
0
]
.
type
):
if
(
i
.
owner
and
isinstance
(
i
.
owner
.
op
,
AllocOP
)
and
i
.
owner
.
inputs
[
0
]
.
type
!=
i
.
owner
.
outputs
[
0
]
.
type
):
# when i.owner.inputs[0].type == i.owner.outputs[0].type we
# will remove that alloc later
assert
i
.
type
.
ndim
==
cmp_op
.
ndim
if
(
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
and
not
same_shape
(
i
,
cmp_op
)):
if
(
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
and
not
same_shape
(
i
,
cmp_op
)):
assert_op
=
assert_
(
assert_op
,
*
[
T
.
eq
(
i
.
shape
[
idx
],
cmp_op
.
shape
[
idx
])
for
idx
in
xrange
(
i
.
type
.
ndim
)
...
...
@@ -1891,7 +1880,7 @@ def local_upcast_elemwise_constant_inputs(node):
scalar_op
=
node
.
op
.
scalar_op
# print "aa", scalar_op.output_types_preference
if
(
getattr
(
scalar_op
,
'output_types_preference'
,
None
)
in
(
T
.
scal
.
upgrade_to_float
,
T
.
scal
.
upcast_out
)):
in
(
T
.
scal
.
upgrade_to_float
,
T
.
scal
.
upcast_out
)):
# this is the kind of op that we can screw with the input
# dtypes by upcasting explicitly
output_dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
...
...
@@ -1910,11 +1899,12 @@ def local_upcast_elemwise_constant_inputs(node):
else
:
if
shape_i
is
None
:
return
new_inputs
.
append
(
T
.
alloc
(
T
.
cast
(
cval_i
,
output_dtype
),
*
[
shape_i
(
d
)(
i
)
for
d
in
xrange
(
i
.
ndim
)]))
#print >> sys.stderr, "AAA",
#*[Shape_i(d)(i) for d in xrange(i.ndim)]
new_inputs
.
append
(
T
.
alloc
(
T
.
cast
(
cval_i
,
output_dtype
),
*
[
shape_i
(
d
)(
i
)
for
d
in
xrange
(
i
.
ndim
)]))
# print >> sys.stderr, "AAA",
# *[Shape_i(d)(i) for d in xrange(i.ndim)]
except
NotScalarConstantError
:
# for the case of a non-scalar
if
isinstance
(
i
,
T
.
TensorConstant
):
...
...
@@ -1958,7 +1948,7 @@ def local_useless_inc_subtensor(node):
except
NotScalarConstantError
:
return
if
(
node
.
inputs
[
0
]
.
ndim
!=
node
.
inputs
[
1
]
.
ndim
or
node
.
inputs
[
0
]
.
broadcastable
!=
node
.
inputs
[
1
]
.
broadcastable
):
node
.
inputs
[
0
]
.
broadcastable
!=
node
.
inputs
[
1
]
.
broadcastable
):
# FB: I didn't check if this case can happen, but this opt
# don't support it.
return
...
...
@@ -1994,16 +1984,16 @@ def local_set_to_inc_subtensor(node):
AdvancedIncSubtensor1(x, other, ilist, set_instead_of_inc=False)
"""
if
(
isinstance
(
node
.
op
,
AdvancedIncSubtensor1
)
and
node
.
op
.
set_instead_of_inc
==
True
and
node
.
inputs
[
1
]
.
owner
and
isinstance
(
node
.
inputs
[
1
]
.
owner
.
op
,
Elemwise
)
and
isinstance
(
node
.
inputs
[
1
]
.
owner
.
op
.
scalar_op
,
scalar
.
Add
)):
node
.
op
.
set_instead_of_inc
and
node
.
inputs
[
1
]
.
owner
and
isinstance
(
node
.
inputs
[
1
]
.
owner
.
op
,
Elemwise
)
and
isinstance
(
node
.
inputs
[
1
]
.
owner
.
op
.
scalar_op
,
scalar
.
Add
)):
addn
=
node
.
inputs
[
1
]
.
owner
subn
=
None
other
=
None
if
(
addn
.
inputs
[
0
]
.
owner
and
isinstance
(
addn
.
inputs
[
0
]
.
owner
.
op
,
AdvancedSubtensor1
)):
isinstance
(
addn
.
inputs
[
0
]
.
owner
.
op
,
AdvancedSubtensor1
)):
subn
=
addn
.
inputs
[
0
]
.
owner
other
=
addn
.
inputs
[
1
]
elif
(
addn
.
inputs
[
1
]
.
owner
and
...
...
@@ -2013,7 +2003,7 @@ def local_set_to_inc_subtensor(node):
else
:
return
if
(
subn
.
inputs
[
1
]
!=
node
.
inputs
[
2
]
or
subn
.
inputs
[
0
]
!=
node
.
inputs
[
0
]):
subn
.
inputs
[
0
]
!=
node
.
inputs
[
0
]):
return
return
[
advanced_inc_subtensor1
(
node
.
inputs
[
0
],
other
,
node
.
inputs
[
2
])]
...
...
@@ -2030,9 +2020,9 @@ def local_useless_slice(node):
last_slice
=
len
(
slices
)
for
s
in
slices
[::
-
1
]:
# check if slice and then check slice indices
if
(
isinstance
(
s
,
slice
)
and
s
.
start
is
None
and
s
.
stop
is
None
and
(
s
.
step
is
None
or
T
.
extract_constant
(
s
.
step
)
==
1
)):
last_slice
-=
1
if
(
isinstance
(
s
,
slice
)
and
s
.
start
is
None
and
s
.
stop
is
None
and
(
s
.
step
is
None
or
T
.
extract_constant
(
s
.
step
)
==
1
)):
last_slice
-=
1
else
:
break
# check if we removed something
...
...
@@ -2098,11 +2088,10 @@ def local_useless_subtensor(node):
# the same underlying variable.
if
(
length_pos_shape_i
.
owner
and
isinstance
(
length_pos_shape_i
.
owner
.
op
,
T
.
ScalarFromTensor
)):
T
.
ScalarFromTensor
)):
length_pos_shape_i
=
length_pos_shape_i
.
owner
.
inputs
[
0
]
elif
(
length_pos
.
owner
and
isinstance
(
length_pos
.
owner
.
op
,
T
.
TensorFromScalar
)):
isinstance
(
length_pos
.
owner
.
op
,
T
.
TensorFromScalar
)):
length_pos
=
length_pos
.
owner
.
inputs
[
0
]
else
:
# We did not find underlying variables of the same type
...
...
@@ -2322,8 +2311,8 @@ def merge_two_slices(slice1, len1, slice2, len2):
pn_stop
=
sl1
.
start
+
(
sl2
.
start
-
1
)
*
sl1
.
step
pn_stop
=
T
.
switch
(
T
.
and_
(
T
.
lt
(
pn_stop
,
0
),
T
.
gt
(
flen
,
0
)),
-
len1
-
1
,
T
.
minimum
(
pn_stop
,
sl1
.
stop
))
-
len1
-
1
,
T
.
minimum
(
pn_stop
,
sl1
.
stop
))
pn_start
=
sl1
.
start
+
(
sl2
.
stop
-
1
)
*
sl1
.
step
pn_start
=
T
.
minimum
(
pn_start
,
sl1
.
stop
)
pn_start
=
T
.
maximum
(
pn_start
,
0
)
...
...
@@ -2345,9 +2334,8 @@ def merge_two_slices(slice1, len1, slice2, len2):
pp_start
))
stop
=
T
.
switch
(
T
.
lt
(
reverse2
*
reverse1
,
0
),
T
.
switch
(
T
.
lt
(
reverse1
,
0
),
np_stop
,
pn_stop
),
T
.
switch
(
T
.
lt
(
reverse1
,
0
),
nn_stop
,
pp_stop
))
T
.
switch
(
T
.
lt
(
reverse1
,
0
),
np_stop
,
pn_stop
),
T
.
switch
(
T
.
lt
(
reverse1
,
0
),
nn_stop
,
pp_stop
))
step
=
T
.
switch
(
T
.
lt
(
reverse2
*
reverse1
,
0
),
n_step
,
p_step
)
start
=
T
.
switch
(
T
.
le
(
flen
,
0
),
0
,
start
)
...
...
@@ -2463,7 +2451,7 @@ def local_subtensor_of_alloc(node):
# We check that the corresponding val dimensions was
# not a broadcasted dimensions.
if
(
val
.
type
.
ndim
>
(
i
-
n_added_dims
)
and
val
.
type
.
broadcastable
[
i
-
n_added_dims
]):
val
.
type
.
broadcastable
[
i
-
n_added_dims
]):
val_slices
.
append
(
slice
(
None
))
else
:
val_slices
.
append
(
sl
)
...
...
@@ -2496,8 +2484,8 @@ def local_subtensor_of_alloc(node):
rval
[
0
]
=
theano
.
tensor
.
unbroadcast
(
rval
[
0
],
*
[
i
for
i
,
(
b1
,
b2
)
in
enumerate
(
zip
(
rval
[
0
]
.
broadcastable
,
node
.
outputs
[
0
]
.
broadcastable
))
if
b1
and
not
b2
])
node
.
outputs
[
0
]
.
broadcastable
))
if
b1
and
not
b2
])
return
rval
...
...
@@ -2518,7 +2506,7 @@ def local_subtensor_of_dot(node):
if
not
isinstance
(
node
.
op
,
Subtensor
):
return
if
(
not
node
.
inputs
[
0
]
.
owner
or
not
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
T
.
Dot
)):
not
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
T
.
Dot
)):
return
# If there is other node that use the outputs of the dot
# We don't want to compute twice the sub part.
...
...
@@ -2540,7 +2528,8 @@ def local_subtensor_of_dot(node):
# We skip this if b.ndim = 1, since then we just want b_sub = b, not b_sub = b[:]
# (dot also handles b.ndim < 2 as a special case)
if
b
.
ndim
>
1
and
len
(
b_indices
)
>=
b
.
ndim
-
1
:
b_indices
=
b_indices
[:
b
.
ndim
-
2
]
+
(
slice
(
None
,
None
,
None
),)
+
b_indices
[
b
.
ndim
-
2
:]
b_indices
=
(
b_indices
[:
b
.
ndim
-
2
]
+
(
slice
(
None
,
None
,
None
),)
+
b_indices
[
b
.
ndim
-
2
:])
a_sub
=
a
.
__getitem__
(
tuple
(
a_indices
))
b_sub
=
b
.
__getitem__
(
tuple
(
b_indices
))
if
b_indices
else
b
...
...
@@ -2583,14 +2572,13 @@ def local_IncSubtensor_serialize(node):
"""
def
movable
(
i
):
# Return True iff this is a incsubtensor that we can move
return
i
.
owner
\
and
isinstance
(
i
.
owner
.
op
,
(
IncSubtensor
,
AdvancedIncSubtensor1
,
AdvancedIncSubtensor
,
))
\
and
i
.
type
==
o_type
\
and
len
(
i
.
clients
)
==
1
\
and
not
i
.
owner
.
op
.
set_instead_of_inc
return
(
i
.
owner
and
isinstance
(
i
.
owner
.
op
,
(
IncSubtensor
,
AdvancedIncSubtensor1
,
AdvancedIncSubtensor
,))
and
i
.
type
==
o_type
and
len
(
i
.
clients
)
==
1
and
not
i
.
owner
.
op
.
set_instead_of_inc
)
if
node
.
op
==
T
.
add
:
o_type
=
node
.
outputs
[
0
]
.
type
...
...
@@ -2598,8 +2586,8 @@ def local_IncSubtensor_serialize(node):
movable_inputs
=
[
i
for
i
in
node
.
inputs
if
movable
(
i
)]
if
movable_inputs
:
new_inputs
=
[
i
for
i
in
node
.
inputs
if
not
movable
(
i
)]
\
+
[
mi
.
owner
.
inputs
[
0
]
for
mi
in
movable_inputs
]
new_inputs
=
([
i
for
i
in
node
.
inputs
if
not
movable
(
i
)]
+
[
mi
.
owner
.
inputs
[
0
]
for
mi
in
movable_inputs
])
new_add
=
T
.
add
(
*
new_inputs
)
# stack up the new incsubtensors
...
...
@@ -2638,9 +2626,10 @@ def local_inplace_setsubtensor(node):
return
[
new_node
]
return
False
compile
.
optdb
.
register
(
'local_inplace_setsubtensor'
,
TopoOptimizer
(
local_inplace_setsubtensor
,
failure_callback
=
TopoOptimizer
.
warn_inplace
),
60
,
'fast_run'
,
'inplace'
)
# DEBUG
TopoOptimizer
(
local_inplace_setsubtensor
,
failure_callback
=
TopoOptimizer
.
warn_inplace
),
60
,
'fast_run'
,
'inplace'
)
# DEBUG
@gof.local_optimizer
([
AdvancedIncSubtensor1
],
inplace
=
True
)
...
...
@@ -2653,8 +2642,8 @@ def local_inplace_incsubtensor1(node):
return
False
compile
.
optdb
.
register
(
'local_inplace_incsubtensor1'
,
TopoOptimizer
(
local_inplace_incsubtensor1
,
failure_callback
=
TopoOptimizer
.
warn_inplace
),
local_inplace_incsubtensor1
,
failure_callback
=
TopoOptimizer
.
warn_inplace
),
60
,
'fast_run'
,
'inplace'
)
# DEBUG
...
...
@@ -2671,7 +2660,7 @@ def local_incsubtensor_of_zeros(node):
if
(
isinstance
(
node
.
op
,
(
IncSubtensor
,
AdvancedIncSubtensor
,
AdvancedIncSubtensor1
))
and
not
node
.
op
.
set_instead_of_inc
):
not
node
.
op
.
set_instead_of_inc
):
x
=
node
.
inputs
[
0
]
y
=
node
.
inputs
[
1
]
replace
=
False
...
...
@@ -2713,8 +2702,8 @@ def local_setsubtensor_of_constants(node):
pass
if
(
replace_x
is
not
None
and
replace_y
is
not
None
and
replace_x
==
replace_y
):
replace_y
is
not
None
and
replace_x
==
replace_y
):
return
[
x
]
else
:
return
False
...
...
@@ -2738,7 +2727,7 @@ def local_adv_sub1_adv_inc_sub1(node):
return
inp
=
node
.
inputs
[
0
]
if
(
not
inp
.
owner
or
not
isinstance
(
inp
.
owner
.
op
,
AdvancedIncSubtensor1
)):
not
isinstance
(
inp
.
owner
.
op
,
AdvancedIncSubtensor1
)):
return
idx
=
node
.
inputs
[
1
]
idx2
=
inp
.
owner
.
inputs
[
2
]
...
...
@@ -2747,13 +2736,13 @@ def local_adv_sub1_adv_inc_sub1(node):
if
idx
is
not
idx2
:
return
if
(
not
inp
.
owner
.
op
.
set_instead_of_inc
and
T
.
extract_constant
(
x
)
!=
0
):
T
.
extract_constant
(
x
)
!=
0
):
return
cond
=
[
T
.
all
(
T
.
and_
(
T
.
lt
(
idx
,
x
.
shape
[
0
]),
T
.
ge
(
idx
,
-
x
.
shape
[
0
])))]
cond
=
[
T
.
all
(
T
.
and_
(
T
.
lt
(
idx
,
x
.
shape
[
0
]),
T
.
ge
(
idx
,
-
x
.
shape
[
0
])))]
if
not
node
.
fgraph
.
shape_feature
.
same_shape
(
idx
,
y
,
0
,
0
):
cond
.
append
(
T
.
eq
(
idx
.
shape
[
0
],
y
.
shape
[
0
]))
y
=
Assert
(
"Bad indexing or shapes in a AdvancedIncSubtensor1 that was optimized away"
)(
y
,
*
cond
)
y
=
Assert
(
"Bad indexing or shapes in a AdvancedIncSubtensor1 "
"that was optimized away"
)(
y
,
*
cond
)
if
y
.
dtype
==
node
.
outputs
[
0
]
.
dtype
:
return
[
y
]
...
...
@@ -2828,33 +2817,34 @@ def local_useless_inc_subtensor_alloc(node):
# Build `z_broad` explicitly to include extra implicit dimensions.
z_broad
=
((
True
,)
*
(
xi
.
ndim
-
z
.
ndim
)
+
z
.
broadcastable
)
cond
=
[
# The shapes of `y` and `xi` must either agree or `y` may
# also have shape equal to 1 which may be treated as a
# broadcastable dimension by the subtensor op.
T
.
or_
(
T
.
eq
(
y
.
shape
[
k
],
1
),
T
.
eq
(
y
.
shape
[
k
],
xi
.
shape
[
k
]))
# Loop over all dimensions.
for
k
in
xrange
(
xi
.
ndim
)
# We need to check the above shapes, if
# * the pre-alloc increment `z` is broadcastable in
# dimension `k` (if it isn't, then the shapes of `z` and
# `y` are the same by the definition of the `Alloc` op in
# this dimension and replacing `y` by `z` will not hide a
# shape error), and
# * `xi` and `y` do not have the same shape in dimension
# `k` or we cannot infer the shape statically (if the
# shapes of `xi` and `y` are not the same, then replacing
# `y` by `z` will hide the shape error of `y`), and
# * the shape of `y` is not equal to 1 or we cannot infer
# the shape statically (if the shape of `y` is equal to
# 1, then `y` is broadcasted by the inc_subtensor op
# internally, so the shapes of `xi` and `y` do not need
# to match in dimension `k`; else we need to check at
# runtime that the shape of `y` is either 1 or the same
# as `xi` or otherwise replacing `y` by `z` will hide a
# shape error).
if
(
z_broad
[
k
]
and
not
same_shape
(
xi
,
y
,
dim_x
=
k
,
dim_y
=
k
)
and
shape_of
[
y
][
k
]
!=
1
)]
cond
=
[
# The shapes of `y` and `xi` must either agree or `y` may
# also have shape equal to 1 which may be treated as a
# broadcastable dimension by the subtensor op.
T
.
or_
(
T
.
eq
(
y
.
shape
[
k
],
1
),
T
.
eq
(
y
.
shape
[
k
],
xi
.
shape
[
k
]))
# Loop over all dimensions.
for
k
in
xrange
(
xi
.
ndim
)
# We need to check the above shapes, if
# * the pre-alloc increment `z` is broadcastable in
# dimension `k` (if it isn't, then the shapes of `z` and
# `y` are the same by the definition of the `Alloc` op in
# this dimension and replacing `y` by `z` will not hide a
# shape error), and
# * `xi` and `y` do not have the same shape in dimension
# `k` or we cannot infer the shape statically (if the
# shapes of `xi` and `y` are not the same, then replacing
# `y` by `z` will hide the shape error of `y`), and
# * the shape of `y` is not equal to 1 or we cannot infer
# the shape statically (if the shape of `y` is equal to
# 1, then `y` is broadcasted by the inc_subtensor op
# internally, so the shapes of `xi` and `y` do not need
# to match in dimension `k`; else we need to check at
# runtime that the shape of `y` is either 1 or the same
# as `xi` or otherwise replacing `y` by `z` will hide a
# shape error).
if
(
z_broad
[
k
]
and
not
same_shape
(
xi
,
y
,
dim_x
=
k
,
dim_y
=
k
)
and
shape_of
[
y
][
k
]
!=
1
)]
if
len
(
cond
)
>
0
:
msg
=
'`x[i]` and `y` do not have the same shape.'
...
...
@@ -2916,7 +2906,7 @@ def local_rebroadcast_lift(node):
# compilation phase.
if
hasattr
(
input
,
'clients'
)
and
len
(
input
.
clients
)
==
1
:
rval
=
inode
.
op
.
make_node
(
T
.
Rebroadcast
(
*
list
(
op
.
axis
.
items
()))(
inode
.
inputs
[
0
]))
.
outputs
inode
.
inputs
[
0
]))
.
outputs
return
rval
if
inode
and
isinstance
(
inode
.
op
,
T
.
Rebroadcast
):
# the "axis" specification in the outer Rebroadcast overrides
...
...
@@ -3031,11 +3021,11 @@ def local_join_make_vector(node):
for
idx
in
xrange
(
2
,
len
(
node
.
inputs
)):
inp
=
node
.
inputs
[
idx
]
if
(
inp
.
owner
and
isinstance
(
inp
.
owner
.
op
,
MakeVector
)
and
new_inputs
[
-
1
]
.
owner
and
isinstance
(
new_inputs
[
-
1
]
.
owner
.
op
,
MakeVector
)
and
# MakeVector have a dtype parameter
inp
.
owner
.
op
==
new_inputs
[
-
1
]
.
owner
.
op
):
isinstance
(
inp
.
owner
.
op
,
MakeVector
)
and
new_inputs
[
-
1
]
.
owner
and
isinstance
(
new_inputs
[
-
1
]
.
owner
.
op
,
MakeVector
)
and
# MakeVector have a dtype parameter
inp
.
owner
.
op
==
new_inputs
[
-
1
]
.
owner
.
op
):
inps
=
new_inputs
[
-
1
]
.
owner
.
inputs
+
inp
.
owner
.
inputs
new_inputs
[
-
1
]
=
inp
.
owner
.
op
(
*
inps
)
else
:
...
...
@@ -3059,7 +3049,7 @@ def local_remove_switch_const_cond(node):
if cond is constant and cond != 0: left
"""
if
(
isinstance
(
node
.
op
,
T
.
Elemwise
)
and
isinstance
(
node
.
op
.
scalar_op
,
scalar
.
basic
.
Switch
)):
isinstance
(
node
.
op
.
scalar_op
,
scalar
.
basic
.
Switch
)):
cond
=
T
.
extract_constant
(
node
.
inputs
[
0
],
elemwise
=
False
)
if
type
(
cond
)
is
numpy
.
ndarray
and
cond
.
ndim
==
0
:
if
cond
==
0
:
...
...
@@ -3241,9 +3231,9 @@ def local_flatten_lift(node):
nnet/sigm.py:log1msigm_to_softplus to get applied when there is a flatten.
"""
if
(
isinstance
(
node
.
op
,
T
.
Flatten
)
and
node
.
inputs
[
0
]
.
owner
and
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
T
.
Elemwise
)
and
len
(
node
.
inputs
[
0
]
.
owner
.
inputs
)
==
1
):
node
.
inputs
[
0
]
.
owner
and
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
T
.
Elemwise
)
and
len
(
node
.
inputs
[
0
]
.
owner
.
inputs
)
==
1
):
f
=
node
.
op
(
node
.
inputs
[
0
]
.
owner
.
inputs
[
0
])
e
=
node
.
inputs
[
0
]
.
owner
.
op
(
f
)
return
[
e
]
...
...
@@ -3290,9 +3280,9 @@ def local_reshape_lift(node):
nnet/sigm.py:log1msigm_to_softplus to get applied when there is a reshape.
"""
if
(
isinstance
(
node
.
op
,
T
.
Reshape
)
and
node
.
inputs
[
0
]
.
owner
and
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
T
.
Elemwise
)
and
len
(
node
.
inputs
[
0
]
.
owner
.
inputs
)
==
1
):
node
.
inputs
[
0
]
.
owner
and
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
T
.
Elemwise
)
and
len
(
node
.
inputs
[
0
]
.
owner
.
inputs
)
==
1
):
r
=
node
.
op
(
node
.
inputs
[
0
]
.
owner
.
inputs
[
0
],
node
.
inputs
[
1
])
e
=
node
.
inputs
[
0
]
.
owner
.
op
(
r
)
# In rare case the original broadcast was (False, True), but
...
...
@@ -3539,7 +3529,7 @@ class Canonizer(gof.LocalOptimizer):
return
[
input
],
[]
if
input
.
owner
is
None
or
input
.
owner
.
op
not
in
[
self
.
main
,
self
.
inverse
,
self
.
reciprocal
]:
self
.
main
,
self
.
inverse
,
self
.
reciprocal
]:
if
input
.
owner
and
isinstance
(
input
.
owner
.
op
,
T
.
DimShuffle
):
# If input is a DimShuffle of some input which does
# something like this:
...
...
@@ -3552,9 +3542,9 @@ class Canonizer(gof.LocalOptimizer):
# the num/denum of its input
dsn
=
input
.
owner
# dimshuffle node
dsop
=
dsn
.
op
# dimshuffle op
dsi0
=
dsn
.
inputs
[
0
]
# the first input of the
# dimshuffle i.e. the ndarray to
# redim
# the first input of the dimshuffle i.e. the ndarray to redim
dsi0
=
dsn
.
inputs
[
0
]
# The compatible order is a DimShuffle "new_order" of the form:
# ('x', ..., 'x', 0, 1, 2, ..., dimshuffle_input.type.ndim)
...
...
@@ -3566,9 +3556,9 @@ class Canonizer(gof.LocalOptimizer):
# different numbers of dimensions (hence why we can
# discard its information - we know we can retrieve it
# later on).
compatible_order
=
(
'x'
,)
*
(
input
.
type
.
ndim
-
dsi0
.
type
.
ndim
)
+
tuple
(
range
(
dsi0
.
type
.
ndim
))
compatible_order
=
(
(
'x'
,)
*
(
input
.
type
.
ndim
-
dsi0
.
type
.
ndim
)
+
tuple
(
range
(
dsi0
.
type
.
ndim
)
))
if
dsop
.
new_order
==
compatible_order
:
# If the "new_order" is the one we recognize,
# we return the num_denum of the dimshuffled input.
...
...
@@ -3815,9 +3805,9 @@ class Canonizer(gof.LocalOptimizer):
new
=
self
.
merge_num_denum
(
num
,
denum
)
if
new
.
type
.
dtype
!=
out
.
type
.
dtype
:
#new = T.fill(out, new)
#
new = T.fill(out, new)
elem_op
=
T
.
Elemwise
(
scalar
.
Identity
(
scalar
.
specific_out
(
getattr
(
scalar
,
out
.
type
.
dtype
))))
getattr
(
scalar
,
out
.
type
.
dtype
))))
new
=
elem_op
(
new
)
assert
(
new
.
type
==
out
.
type
)
==
(
not
(
new
.
type
!=
out
.
type
))
...
...
@@ -3833,12 +3823,12 @@ class Canonizer(gof.LocalOptimizer):
else
:
_logger
.
warning
(
' '
.
join
((
'CANONIZE FAILED: new, out = '
,
new
,
','
,
out
,
'types'
,
new
.
type
,
','
,
out
.
type
)))
new
.
type
,
','
,
out
.
type
)))
return
False
def
__str__
(
self
):
return
getattr
(
self
,
'name'
,
'Canonizer(
%
s,
%
s,
%
s)'
%
(
self
.
main
,
self
.
inverse
,
self
.
reciprocal
))
self
.
main
,
self
.
inverse
,
self
.
reciprocal
))
def
mul_calculate
(
num
,
denum
,
aslist
=
False
,
out_type
=
None
):
...
...
@@ -3872,7 +3862,7 @@ register_canonicalize(local_mul_canonizer, name='local_mul_canonizer')
def
local_neg_to_mul
(
node
):
if
node
.
op
==
T
.
neg
:
return
[
T
.
mul
(
numpy
.
array
(
-
1
,
dtype
=
node
.
inputs
[
0
]
.
dtype
),
node
.
inputs
[
0
])]
node
.
inputs
[
0
])]
register_canonicalize
(
local_neg_to_mul
)
...
...
@@ -3924,10 +3914,10 @@ def local_elemwise_sub_zeros(node):
"""
Elemwise{sub}(X,X) -> zeros_like(X)
"""
if
(
isinstance
(
node
.
op
,
T
.
Elemwise
)
and
node
.
op
.
scalar_op
.
nin
==
2
and
node
.
op
.
scalar_op
==
scalar
.
sub
and
node
.
inputs
[
0
]
==
node
.
inputs
[
1
]):
if
(
isinstance
(
node
.
op
,
T
.
Elemwise
)
and
node
.
op
.
scalar_op
.
nin
==
2
and
node
.
op
.
scalar_op
==
scalar
.
sub
and
node
.
inputs
[
0
]
==
node
.
inputs
[
1
]):
return
[
T
.
zeros_like
(
node
.
inputs
[
0
])]
...
...
@@ -4013,9 +4003,8 @@ def local_sum_div_dimshuffle(node):
' to False.'
)
new_denom
=
T
.
DimShuffle
(
thing_dimshuffled
.
type
.
broadcastable
,
new_new_order
)(
thing_dimshuffled
)
thing_dimshuffled
.
type
.
broadcastable
,
new_new_order
)(
thing_dimshuffled
)
return
[
T
.
true_div
(
node
.
op
(
numerator
),
new_denom
)]
# else:
# print 'incompatible dims:', axis, new_order
...
...
@@ -4052,8 +4041,9 @@ def local_op_of_op(node):
# We manipulate the graph so this is done to make sure the opt
# doesn't affect other computations.
if
len
(
node_inps
.
clients
)
==
1
:
if
(
node_inps
.
owner
and
(
isinstance
(
node_inps
.
owner
.
op
,
T
.
elemwise
.
Prod
)
or
isinstance
(
node_inps
.
owner
.
op
,
T
.
elemwise
.
Sum
))):
if
(
node_inps
.
owner
and
(
isinstance
(
node_inps
.
owner
.
op
,
T
.
elemwise
.
Prod
)
or
isinstance
(
node_inps
.
owner
.
op
,
T
.
elemwise
.
Sum
))):
# check to see either the inner or outer prod is doing a
# product over all axis, in which case we can remove it
...
...
@@ -4074,7 +4064,6 @@ def local_op_of_op(node):
assert
len
(
newaxis
)
==
len
(
list
(
node_inps
.
owner
.
op
.
axis
)
+
list
(
node
.
op
.
axis
))
# The old bugged logic. We keep it there to generate a warning
# when we generated bad code.
alldims
=
list
(
range
(
node_inps
.
owner
.
inputs
[
0
]
.
type
.
ndim
))
...
...
@@ -4087,20 +4076,20 @@ def local_op_of_op(node):
if
i
not
in
alldims
]
if
(
theano
.
config
.
warn
.
sum_sum_bug
and
newaxis
!=
newaxis_old
and
len
(
newaxis
)
==
len
(
newaxis_old
)):
newaxis
!=
newaxis_old
and
len
(
newaxis
)
==
len
(
newaxis_old
)):
_logger
.
warn
(
"WARNING (YOUR CURRENT CODE IS FINE): Theano "
"versions between version 9923a40c7b7a and August "
"2nd, 2010 generated bugged code in this case. "
"This happens when there are two consecutive sums "
"in the graph and the intermediate sum is not "
"used elsewhere in the code. Some safeguard "
"removed some bad code, but not in all cases. You "
"are in one such case. To disable this warning "
"(that you can safely ignore since this bug has "
"been fixed) set the theano flag "
"`warn.sum_sum_bug` to False."
)
"WARNING (YOUR CURRENT CODE IS FINE): Theano "
"versions between version 9923a40c7b7a and August "
"2nd, 2010 generated bugged code in this case. "
"This happens when there are two consecutive sums "
"in the graph and the intermediate sum is not "
"used elsewhere in the code. Some safeguard "
"removed some bad code, but not in all cases. You "
"are in one such case. To disable this warning "
"(that you can safely ignore since this bug has "
"been fixed) set the theano flag "
"`warn.sum_sum_bug` to False."
)
combined
=
opt_type
(
newaxis
,
dtype
=
out_dtype
)
return
[
combined
(
node_inps
.
owner
.
inputs
[
0
])]
...
...
@@ -4126,9 +4115,8 @@ def local_reduce_join(node):
"""
if
(
isinstance
(
node
.
op
,
T
.
CAReduce
)
and
node
.
inputs
[
0
]
.
owner
and
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
T
.
Join
)):
node
.
inputs
[
0
]
.
owner
and
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
T
.
Join
)):
join
=
node
.
inputs
[
0
]
.
owner
if
T
.
extract_constant
(
join
.
inputs
[
0
])
!=
0
:
return
...
...
@@ -4149,7 +4137,8 @@ def local_reduce_join(node):
if
not
inp
:
return
if
(
not
isinstance
(
inp
.
op
,
DimShuffle
)
or
inp
.
op
.
new_order
!=
(
'x'
,)
+
tuple
(
range
(
inp
.
inputs
[
0
]
.
ndim
))):
inp
.
op
.
new_order
!=
(
'x'
,)
+
tuple
(
range
(
inp
.
inputs
[
0
]
.
ndim
))):
return
new_inp
.
append
(
inp
.
inputs
[
0
])
ret
=
Elemwise
(
node
.
op
.
scalar_op
)(
*
new_inp
)
...
...
@@ -4174,8 +4163,7 @@ def local_reduce_join(node):
'optimization, that modified the pattern '
'"Reduce{scalar.op}(Join(axis=0, a, b), axis=0)", '
'did not check the reduction axis. So if the '
'reduction axis was not 0, you got a wrong answer.'
))
'reduction axis was not 0, you got a wrong answer.'
))
return
# We add the new check late to don't add extra warning.
...
...
@@ -4204,7 +4192,7 @@ def local_cut_useless_reduce(node):
# theano/tensor/tests/test_opt.py:T_local_reduce.test_local_reduce_broadcast_some_0
# see gh-790 issue.
#
#@register_canonicalize
#
@register_canonicalize
@register_uncanonicalize
@register_specialize
@gof.local_optimizer
(
ALL_REDUCE
)
...
...
@@ -4258,7 +4246,7 @@ def local_opt_alloc(node):
input
=
node_inps
.
owner
.
inputs
[
0
]
shapes
=
node_inps
.
owner
.
inputs
[
1
:]
if
(
node
.
op
.
axis
is
None
or
node
.
op
.
axis
==
tuple
(
range
(
input
.
ndim
))):
node
.
op
.
axis
==
tuple
(
range
(
input
.
ndim
))):
try
:
val
=
get_scalar_constant_value
(
input
)
assert
val
.
size
==
1
...
...
@@ -4346,7 +4334,7 @@ register_canonicalize(local_mul_zero)
@gof.local_optimizer
([
T
.
true_div
])
def
local_div_to_inv
(
node
):
if
node
.
op
==
T
.
true_div
and
N
.
all
(
local_mul_canonizer
.
get_constant
(
node
.
inputs
[
0
])
==
1.0
):
local_mul_canonizer
.
get_constant
(
node
.
inputs
[
0
])
==
1.0
):
out
=
node
.
outputs
[
0
]
new_out
=
T
.
inv
(
local_mul_canonizer
.
merge_num_denum
(
node
.
inputs
[
1
:],
[]))
...
...
@@ -4501,7 +4489,8 @@ def local_pow_specialize_device(node):
if
abs
(
y
)
>
2
:
# We fuse all the pow together here to make
# compilation faster
rval1
=
Elemwise
(
theano
.
scalar
.
Composite
(
rval1
=
Elemwise
(
theano
.
scalar
.
Composite
(
[
pow2_scal
[
0
]],
[
rval1_scal
]))
.
make_node
(
xsym
)
if
y
<
0
:
rval
=
[
T
.
inv
(
rval1
)]
...
...
@@ -4566,8 +4555,8 @@ def local_mul_specialize(node):
else
:
# The next case would cause a replace by an equivalent case.
if
(
neg
and
nb_neg_node
==
0
and
nb_cst
==
1
):
nb_neg_node
==
0
and
nb_cst
==
1
):
return
elif
neg
:
# Don't add an extra neg node as we can't
...
...
@@ -4640,8 +4629,8 @@ def check_for_x_over_absX(numerators, denominators):
# TODO: this function should dig/search through dimshuffles
# This won't catch a dimshuffled absolute value
for
den
in
list
(
denominators
):
if
(
den
.
owner
and
den
.
owner
.
op
==
T
.
abs_
and
den
.
owner
.
inputs
[
0
]
in
numerators
):
if
(
den
.
owner
and
den
.
owner
.
op
==
T
.
abs_
and
den
.
owner
.
inputs
[
0
]
in
numerators
):
if
den
.
owner
.
inputs
[
0
]
.
type
.
dtype
.
startswith
(
'complex'
):
# TODO: Make an Op that projects a complex number to
# have unit length but projects 0 to 0. That
...
...
@@ -4715,8 +4704,8 @@ def local_log1p(node):
if
node
.
op
==
T
.
log
:
log_arg
,
=
node
.
inputs
if
log_arg
.
owner
and
log_arg
.
owner
.
op
==
T
.
add
:
scalars
,
scalar_inputs
,
nonconsts
=
\
scalarconsts_rest
(
log_arg
.
owner
.
inputs
)
scalars
,
scalar_inputs
,
nonconsts
=
scalarconsts_rest
(
log_arg
.
owner
.
inputs
)
# scalar_inputs are potentially dimshuffled and fill'd scalars
if
scalars
and
numpy
.
allclose
(
numpy
.
sum
(
scalars
),
1
):
if
not
nonconsts
:
...
...
@@ -4748,7 +4737,7 @@ def local_log_add(node):
if
len
(
zi
)
!=
2
:
# -- upgrading Maximum to handle multiple inputs wasn't trivial
# TODO
#raise NotImplementedError()
#
raise NotImplementedError()
return
pre_exp
=
[
x
.
owner
.
inputs
[
0
]
for
x
in
zi
if
x
.
owner
and
x
.
owner
.
op
==
T
.
exp
]
...
...
@@ -4945,8 +4934,7 @@ def constant_folding(node):
storage_map
[
o
]
=
[
None
]
compute_map
[
o
]
=
[
False
]
if
(
hasattr
(
node
.
op
,
'python_constant_folding'
)
and
node
.
op
.
python_constant_folding
(
node
)):
node
.
op
.
python_constant_folding
(
node
)):
old_value
=
getattr
(
node
.
op
,
'_op_use_c_code'
,
False
)
try
:
node
.
op
.
_op_use_c_code
=
False
...
...
@@ -5037,9 +5025,9 @@ register_specialize(local_one_minus_erf)
local_one_minus_erf2
=
gof
.
PatternSub
((
T
.
add
,
1
,
(
T
.
mul
,
-
1
,
(
T
.
erf
,
'x'
))),
(
T
.
erfc
,
'x'
),
allow_multiple_clients
=
True
,
name
=
'local_one_minus_erf2'
)
(
T
.
erfc
,
'x'
),
allow_multiple_clients
=
True
,
name
=
'local_one_minus_erf2'
)
register_canonicalize
(
local_one_minus_erf2
)
register_stabilize
(
local_one_minus_erf2
)
register_specialize
(
local_one_minus_erf2
)
...
...
@@ -5058,7 +5046,7 @@ register_canonicalize(local_one_plus_neg_erf)
register_stabilize
(
local_one_plus_neg_erf
)
register_specialize
(
local_one_plus_neg_erf
)
#(-1)+erf(x) => -erfc(x) don't need erf(x)+(-1) as the canonicalize
#
(-1)+erf(x) => -erfc(x) don't need erf(x)+(-1) as the canonicalize
# will put the -1 as the first argument.
local_erf_minus_one
=
gof
.
PatternSub
((
T
.
add
,
dict
(
pattern
=
'y'
,
constraint
=
_is_minus1
),
...
...
@@ -5124,7 +5112,7 @@ register_canonicalize(local_one_add_neg_erfc)
register_stabilize
(
local_one_add_neg_erfc
)
register_specialize
(
local_one_add_neg_erfc
)
#(-1)+erfc(-x)=>erf(x)
#
(-1)+erfc(-x)=>erf(x)
local_erf_neg_minus_one
=
gof
.
PatternSub
((
T
.
add
,
dict
(
pattern
=
'y'
,
constraint
=
_is_minus1
),
(
T
.
erfc
,
(
T
.
neg
,
'x'
))),
...
...
@@ -5137,7 +5125,7 @@ register_canonicalize(local_erf_neg_minus_one)
register_stabilize
(
local_erf_neg_minus_one
)
register_specialize
(
local_erf_neg_minus_one
)
#(-1)+erfc(-1*x)=>erf(x)
#
(-1)+erfc(-1*x)=>erf(x)
local_erf_neg_minus_one2
=
gof
.
PatternSub
((
T
.
add
,
dict
(
pattern
=
'y'
,
constraint
=
_is_minus1
),
(
T
.
erfc
,
(
T
.
mul
,
-
1
,
'x'
))),
...
...
@@ -5176,8 +5164,8 @@ def local_log_erfc(node):
x
=
node
.
inputs
[
0
]
.
owner
.
inputs
[
0
]
stab_value
=
(
-
x
**
2
-
T
.
log
(
x
)
-
.
5
*
T
.
log
(
numpy
.
pi
)
+
T
.
log
(
1
-
1
/
(
2
*
x
**
2
)
+
3
/
(
4
*
x
**
4
)
-
15
/
(
8
*
x
**
6
)))
T
.
log
(
1
-
1
/
(
2
*
x
**
2
)
+
3
/
(
4
*
x
**
4
)
-
15
/
(
8
*
x
**
6
)))
if
(
node
.
outputs
[
0
]
.
dtype
==
'float32'
or
node
.
outputs
[
0
]
.
dtype
==
'float16'
):
...
...
@@ -5191,8 +5179,8 @@ def local_log_erfc(node):
# Stability optimization of the grad of log(erfc(x))
#([y*]exp(-(x**2)))/erfc(x) # The y* is optional
#([y*]exp(x**2))/erfc(-x) => [y*](when x>threashold,
#
([y*]exp(-(x**2)))/erfc(x) # The y* is optional
#
([y*]exp(x**2))/erfc(-x) => [y*](when x>threashold,
# sqrt(pi)*-x/(1-1/(2*x**2)+3/(4*x**4)-15/(8*x**6)))
# for float64: threshold=26.63 see at the end of the fct for the explaination
# for float32: threshold=9.3 see at the end of the fct for the explaination
...
...
@@ -5226,8 +5214,8 @@ def local_grad_log_erfc_neg(node):
if
mul
.
owner
.
inputs
[
0
]
.
owner
or
len
(
mul
.
owner
.
inputs
)
!=
2
:
return
False
y
=
mul
.
owner
.
inputs
[
0
]
if
(
not
mul
.
owner
.
inputs
[
1
]
.
owner
or
mul
.
owner
.
inputs
[
1
]
.
owner
.
op
!=
T
.
exp
):
if
(
not
mul
.
owner
.
inputs
[
1
]
.
owner
or
mul
.
owner
.
inputs
[
1
]
.
owner
.
op
!=
T
.
exp
):
return
False
exp
=
mul
.
owner
.
inputs
[
1
]
...
...
@@ -5236,8 +5224,8 @@ def local_grad_log_erfc_neg(node):
if
exp
.
owner
.
inputs
[
0
]
.
owner
.
op
==
T
.
neg
:
neg
=
exp
.
owner
.
inputs
[
0
]
if
(
not
neg
.
owner
.
inputs
[
0
]
.
owner
or
neg
.
owner
.
inputs
[
0
]
.
owner
.
op
!=
T
.
sqr
):
if
(
not
neg
.
owner
.
inputs
[
0
]
.
owner
or
neg
.
owner
.
inputs
[
0
]
.
owner
.
op
!=
T
.
sqr
):
return
False
sqr
=
neg
.
owner
.
inputs
[
0
]
x
=
sqr
.
owner
.
inputs
[
0
]
...
...
@@ -5279,8 +5267,8 @@ def local_grad_log_erfc_neg(node):
return
False
if
len
(
mul_neg
.
owner
.
inputs
)
==
2
:
if
(
not
mul_neg
.
owner
.
inputs
[
1
]
.
owner
or
mul_neg
.
owner
.
inputs
[
1
]
.
owner
.
op
!=
T
.
sqr
):
if
(
not
mul_neg
.
owner
.
inputs
[
1
]
.
owner
or
mul_neg
.
owner
.
inputs
[
1
]
.
owner
.
op
!=
T
.
sqr
):
return
False
sqr
=
mul_neg
.
owner
.
inputs
[
1
]
x
=
sqr
.
owner
.
inputs
[
0
]
...
...
@@ -5292,8 +5280,8 @@ def local_grad_log_erfc_neg(node):
return
False
if
cst2
!=
-
1
:
if
(
not
erfc_x
.
owner
or
erfc_x
.
owner
.
op
!=
T
.
mul
or
len
(
erfc_x
.
owner
.
inputs
)
!=
2
):
if
(
not
erfc_x
.
owner
or
erfc_x
.
owner
.
op
!=
T
.
mul
or
len
(
erfc_x
.
owner
.
inputs
)
!=
2
):
# todo implement that case
return
False
if
erfc_x
.
owner
.
inputs
[
1
]
is
not
mul_neg
.
owner
.
inputs
[
1
]:
...
...
@@ -5324,12 +5312,12 @@ def local_grad_log_erfc_neg(node):
# aaron value
stab_value
=
(
x
*
T
.
pow
(
1
-
1
/
(
2
*
(
x
**
2
))
+
3
/
(
4
*
(
x
**
4
))
-
15
/
(
8
*
(
x
**
6
)),
-
1
)
*
T
.
cast
(
T
.
sqrt
(
numpy
.
pi
),
dtype
=
x
.
dtype
))
3
/
(
4
*
(
x
**
4
))
-
15
/
(
8
*
(
x
**
6
)),
-
1
)
*
T
.
cast
(
T
.
sqrt
(
numpy
.
pi
),
dtype
=
x
.
dtype
))
if
x
.
dtype
==
'float32'
or
x
.
dtype
==
'float16'
:
threshold
=
9.3
#threshold = 10.1
#
threshold = 10.1
elif
x
.
dtype
==
'float64'
:
threshold
=
26.641747557
ret
=
T
.
switch
(
x
<
threshold
,
true_div_no_mul
,
stab_value
)
*
y
...
...
@@ -5531,6 +5519,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
if
maker
is
None
:
def
maker
(
node
,
scalar_op
):
return
OP
(
scalar_op
)
def
local_fuse
(
node
):
"""
As part of specialization, we fuse two consecutive elemwise Ops of the
...
...
@@ -5598,13 +5587,13 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
# If a variable is used as multiple into to the same node,
# we still want to fusion. So we take the set.
if
(
i
.
owner
and
isinstance
(
i
.
owner
.
op
,
OP
)
and
len
(
set
([
n
for
n
,
idx
in
i
.
clients
]))
==
1
and
# Do not merge elemwise that don't have the same
# broadcastable pattern to don't redo duplicate
# computation due to broadcast.
i
.
owner
.
outputs
[
0
]
.
broadcastable
==
node
.
outputs
[
0
]
.
broadcastable
):
isinstance
(
i
.
owner
.
op
,
OP
)
and
len
(
set
([
n
for
n
,
idx
in
i
.
clients
]))
==
1
and
# Do not merge elemwise that don't have the same
# broadcastable pattern to don't redo duplicate
# computation due to broadcast.
i
.
owner
.
outputs
[
0
]
.
broadcastable
==
node
.
outputs
[
0
]
.
broadcastable
):
do_fusion
=
True
try
:
tmp_s_input
=
[]
...
...
@@ -5840,14 +5829,14 @@ def local_add_mul_fusion(node):
"""
if
(
not
isinstance
(
node
.
op
,
Elemwise
)
or
not
isinstance
(
node
.
op
.
scalar_op
,
(
scalar
.
Add
,
scalar
.
Mul
))):
not
isinstance
(
node
.
op
.
scalar_op
,
(
scalar
.
Add
,
scalar
.
Mul
))):
return
False
s_op
=
node
.
op
.
scalar_op
.
__class__
for
inp
in
node
.
inputs
:
if
(
inp
.
owner
and
isinstance
(
inp
.
owner
.
op
,
Elemwise
)
and
isinstance
(
inp
.
owner
.
op
.
scalar_op
,
s_op
)):
isinstance
(
inp
.
owner
.
op
,
Elemwise
)
and
isinstance
(
inp
.
owner
.
op
.
scalar_op
,
s_op
)):
l
=
list
(
node
.
inputs
)
l
.
remove
(
inp
)
return
[
node
.
op
(
*
(
l
+
inp
.
owner
.
inputs
))]
...
...
@@ -5882,13 +5871,15 @@ else:
# just returns the input, it should be removed from the graph to
# make sure all possible optimizations can be applied.
register_canonicalize
(
gof
.
OpRemove
(
theano
.
gradient
.
consider_constant_
),
'fast_compile'
,
'fast_run'
,
name
=
'remove_consider_constant'
)
'fast_compile'
,
'fast_run'
,
name
=
'remove_consider_constant'
)
register_canonicalize
(
gof
.
OpRemove
(
theano
.
gradient
.
zero_grad_
),
'fast_compile'
,
'fast_run'
,
name
=
'remove_zero_grad'
)
'fast_compile'
,
'fast_run'
,
name
=
'remove_zero_grad'
)
register_canonicalize
(
gof
.
OpRemove
(
theano
.
gradient
.
disconnected_grad_
),
'fast_compile'
,
'fast_run'
,
name
=
'remove_disconnected_grad'
)
'fast_compile'
,
'fast_run'
,
name
=
'remove_disconnected_grad'
)
@register_canonicalize
...
...
theano/tensor/raw_random.py
浏览文件 @
bd11e130
"""Define random number Type (`RandomStateType`) and Op (`RandomFunction`)."""
from
__future__
import
print_function
__docformat__
=
"restructuredtext en"
import
sys
from
copy
import
copy
...
...
@@ -15,6 +15,8 @@ from theano import gof
from
six
import
string_types
from
theano.compile
import
optdb
__docformat__
=
"restructuredtext en"
class
RandomStateType
(
gof
.
Type
):
"""A Type wrapper for numpy.random.RandomState
...
...
@@ -85,13 +87,13 @@ class RandomStateType(gof.Type):
# Register RandomStateType's C code for ViewOp.
theano
.
compile
.
register_view_op_c_code
(
RandomStateType
,
"""
Py_XDECREF(
%(oname)
s);
%(oname)
s =
%(iname)
s;
Py_XINCREF(
%(oname)
s);
"""
,
1
)
RandomStateType
,
"""
Py_XDECREF(
%(oname)
s);
%(oname)
s =
%(iname)
s;
Py_XINCREF(
%(oname)
s);
"""
,
1
)
random_state_type
=
RandomStateType
()
...
...
@@ -135,9 +137,8 @@ class RandomFunction(gof.Op):
and
self
.
ndim_added
==
other
.
ndim_added
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
fn
)
\
^
hash
(
self
.
outtype
)
\
^
hash
(
self
.
inplace
)
^
hash
(
self
.
ndim_added
)
return
(
hash
(
type
(
self
))
^
hash
(
self
.
fn
)
^
hash
(
self
.
outtype
)
^
hash
(
self
.
inplace
)
^
hash
(
self
.
ndim_added
))
def
__getstate__
(
self
):
return
self
.
state
...
...
@@ -233,7 +234,6 @@ class RandomFunction(gof.Op):
# copy of r if self.inplace is False
r
,
shape
,
args
=
inputs
[
0
],
inputs
[
1
],
inputs
[
2
:]
assert
type
(
r
)
==
numpy
.
random
.
RandomState
,
(
type
(
r
),
r
)
r_orig
=
r
# If shape == [], that means no shape is enforced, and numpy is
# trusted to draw the appropriate number of samples, numpy uses
...
...
@@ -245,16 +245,16 @@ class RandomFunction(gof.Op):
shape
=
tuple
(
shape
)
if
(
shape
is
not
None
and
self
.
outtype
.
ndim
!=
len
(
shape
)
+
self
.
ndim_added
):
self
.
outtype
.
ndim
!=
len
(
shape
)
+
self
.
ndim_added
):
raise
ValueError
(
'Shape mismatch: self.outtype.ndim (
%
i) !='
' len(shape) (
%
i) + self.ndim_added (
%
i)'
%
(
self
.
outtype
.
ndim
,
len
(
shape
),
self
.
ndim_added
))
%
(
self
.
outtype
.
ndim
,
len
(
shape
),
self
.
ndim_added
))
if
not
self
.
inplace
:
r
=
copy
(
r
)
rout
[
0
]
=
r
rval
=
self
.
fn
(
r
,
*
(
args
+
[
shape
]))
if
not
isinstance
(
rval
,
numpy
.
ndarray
)
\
or
str
(
rval
.
dtype
)
!=
node
.
outputs
[
1
]
.
type
.
dtype
:
if
(
not
isinstance
(
rval
,
numpy
.
ndarray
)
or
str
(
rval
.
dtype
)
!=
node
.
outputs
[
1
]
.
type
.
dtype
)
:
rval
=
theano
.
_asarray
(
rval
,
dtype
=
node
.
outputs
[
1
]
.
type
.
dtype
)
# When shape is None, numpy has a tendency to unexpectedly
...
...
@@ -288,7 +288,7 @@ class RandomFunction(gof.Op):
def
grad
(
self
,
inputs
,
outputs
):
return
[
theano
.
gradient
.
grad_undefined
(
self
,
k
,
inp
,
'No gradient defined through raw random numbers op'
)
'No gradient defined through raw random numbers op'
)
for
k
,
inp
in
enumerate
(
inputs
)]
def
R_op
(
self
,
inputs
,
eval_points
):
...
...
@@ -325,8 +325,8 @@ def _infer_ndim_bcast(ndim, shape, *args):
else
:
if
shape_ndim
!=
ndim
:
raise
ValueError
(
'ndim should be equal to len(shape), but
\n
'
,
'ndim =
%
s, len(shape) =
%
s, shape =
%
s'
%
(
ndim
,
shape_ndim
,
shape
))
'ndim =
%
s, len(shape) =
%
s, shape =
%
s'
%
(
ndim
,
shape_ndim
,
shape
))
bcast
=
[]
pre_v_shape
=
[]
...
...
@@ -353,7 +353,8 @@ def _infer_ndim_bcast(ndim, shape, *args):
break
else
:
if
n_a_i
==
0
:
raise
ValueError
((
'Auto-shape of -1 must overlap'
raise
ValueError
((
'Auto-shape of -1 must overlap'
'with the shape of one of the broadcastable'
'inputs'
))
else
:
...
...
@@ -373,7 +374,7 @@ def _infer_ndim_bcast(ndim, shape, *args):
# but we need to know ndim
if
not
args
:
raise
TypeError
((
'_infer_ndim_bcast cannot infer shape without'
' either shape or args'
))
' either shape or args'
))
template
=
reduce
(
lambda
a
,
b
:
a
+
b
,
args
)
v_shape
=
template
.
shape
bcast
=
template
.
broadcastable
...
...
@@ -463,7 +464,7 @@ def uniform(random_state, size=None, low=0.0, high=1.0, ndim=None, dtype=None):
dtype
=
tensor
.
scal
.
upcast
(
theano
.
config
.
floatX
,
low
.
dtype
,
high
.
dtype
)
ndim
,
size
,
bcast
=
_infer_ndim_bcast
(
ndim
,
size
,
low
,
high
)
op
=
RandomFunction
(
'uniform'
,
tensor
.
TensorType
(
dtype
=
dtype
,
broadcastable
=
bcast
))
tensor
.
TensorType
(
dtype
=
dtype
,
broadcastable
=
bcast
))
return
op
(
random_state
,
size
,
low
,
high
)
...
...
@@ -487,7 +488,7 @@ def normal(random_state, size=None, avg=0.0, std=1.0, ndim=None, dtype=None):
dtype
=
tensor
.
scal
.
upcast
(
theano
.
config
.
floatX
,
avg
.
dtype
,
std
.
dtype
)
ndim
,
size
,
bcast
=
_infer_ndim_bcast
(
ndim
,
size
,
avg
,
std
)
op
=
RandomFunction
(
'normal'
,
tensor
.
TensorType
(
dtype
=
dtype
,
broadcastable
=
bcast
))
tensor
.
TensorType
(
dtype
=
dtype
,
broadcastable
=
bcast
))
return
op
(
random_state
,
size
,
avg
,
std
)
...
...
@@ -517,7 +518,8 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None,
# p=numpy.asarray([.1, .2, .3], dtype='float64'))
n
=
tensor
.
cast
(
n
,
'int32'
)
op
=
RandomFunction
(
'binomial'
,
tensor
.
TensorType
(
dtype
=
dtype
,
broadcastable
=
(
False
,)
*
ndim
))
tensor
.
TensorType
(
dtype
=
dtype
,
broadcastable
=
(
False
,)
*
ndim
))
return
op
(
random_state
,
size
,
n
,
p
)
...
...
@@ -583,7 +585,7 @@ def random_integers(random_state, size=None, low=0, high=1, ndim=None,
high
=
tensor
.
as_tensor_variable
(
high
)
ndim
,
size
,
bcast
=
_infer_ndim_bcast
(
ndim
,
size
,
low
,
high
)
op
=
RandomFunction
(
random_integers_helper
,
tensor
.
TensorType
(
dtype
=
dtype
,
broadcastable
=
bcast
))
tensor
.
TensorType
(
dtype
=
dtype
,
broadcastable
=
bcast
))
return
op
(
random_state
,
size
,
low
,
high
)
...
...
@@ -719,8 +721,9 @@ def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'):
ndim
,
size
,
bcast
=
_infer_ndim_bcast
(
ndim
,
size
)
# print "NDIM", ndim, size
op
=
RandomFunction
(
permutation_helper
,
tensor
.
TensorType
(
dtype
=
dtype
,
broadcastable
=
bcast
+
(
False
,)),
ndim_added
=
1
)
tensor
.
TensorType
(
dtype
=
dtype
,
broadcastable
=
bcast
+
(
False
,)),
ndim_added
=
1
)
return
op
(
random_state
,
size
,
n
)
...
...
@@ -738,14 +741,11 @@ def multinomial_helper(random_state, n, pvals, size):
ndim
=
len
(
size
)
else
:
ndim
=
max
(
n
.
ndim
,
pvals
.
ndim
-
1
)
out_ndim
=
ndim
+
1
# broadcast n to ndim dimensions and pvals to ndim+1
if
n
.
ndim
>
ndim
:
raise
ValueError
(
'n.ndim (
%
i) should not be larger than len(size) (
%
i)'
%
(
n
.
ndim
,
ndim
),
n
,
size
)
raise
ValueError
(
'n.ndim (
%
i) should not be larger than len(size) (
%
i)'
%
(
n
.
ndim
,
ndim
),
n
,
size
)
if
n
.
ndim
<
ndim
:
n
=
n
.
reshape
((
1
,)
*
(
ndim
-
n
.
ndim
)
+
n
.
shape
)
...
...
@@ -788,7 +788,7 @@ def multinomial_helper(random_state, n, pvals, size):
# because mtrand.pyx has a ValueError that will trigger if
# sum(pvals[:-1]) > 1.0
pvi
=
pvi
*
(
1.0
-
5e-5
)
#pvi = pvi * .9
#
pvi = pvi * .9
pisum
=
numpy
.
sum
(
pvi
)
elif
pvi
[
-
1
]
<
5e-5
:
# will this even work?
pvi
=
pvi
*
(
1.0
-
5e-5
)
...
...
@@ -859,8 +859,9 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
ndim
,
size
,
bcast
=
_infer_ndim_bcast
(
ndim
,
size
,
n
,
tmp
)
bcast
=
bcast
+
(
pvals
.
type
.
broadcastable
[
-
1
],)
op
=
RandomFunction
(
multinomial_helper
,
tensor
.
TensorType
(
dtype
=
dtype
,
broadcastable
=
bcast
),
ndim_added
=
1
)
tensor
.
TensorType
(
dtype
=
dtype
,
broadcastable
=
bcast
),
ndim_added
=
1
)
return
op
(
random_state
,
size
,
n
,
pvals
)
...
...
theano/tensor/shared_randomstreams.py
浏览文件 @
bd11e130
"""Define RandomStreams, providing random number variables for Theano
graphs.
"""
__docformat__
=
"restructuredtext en"
import
copy
import
numpy
from
theano.compile.sharedvalue
import
(
SharedVariable
,
shared_constructor
,
shared
)
from
theano.tensor
import
raw_random
__docformat__
=
"restructuredtext en"
class
RandomStateSharedVariable
(
SharedVariable
):
pass
...
...
@@ -77,7 +79,7 @@ class RandomStreams(raw_random.RandomStreamsBase):
for
old_r
,
new_r
in
self
.
state_updates
:
old_r_seed
=
seedgen
.
randint
(
2
**
30
)
old_r
.
set_value
(
numpy
.
random
.
RandomState
(
int
(
old_r_seed
)),
borrow
=
True
)
borrow
=
True
)
def
__getitem__
(
self
,
item
):
"""Retrieve the numpy RandomState instance associated with a
...
...
theano/tensor/sharedvar.py
浏览文件 @
bd11e130
...
...
@@ -41,10 +41,10 @@ def tensor_constructor(value, name=None, strict=False, allow_downcast=None,
broadcastable
=
(
False
,)
*
len
(
value
.
shape
)
type
=
TensorType
(
value
.
dtype
,
broadcastable
=
broadcastable
)
return
TensorSharedVariable
(
type
=
type
,
value
=
numpy
.
array
(
value
,
copy
=
(
not
borrow
)),
name
=
name
,
strict
=
strict
,
allow_downcast
=
allow_downcast
)
value
=
numpy
.
array
(
value
,
copy
=
(
not
borrow
)),
name
=
name
,
strict
=
strict
,
allow_downcast
=
allow_downcast
)
# TensorSharedVariable brings in the tensor operators, is not ideal, but works
...
...
@@ -85,8 +85,10 @@ def scalar_constructor(value, name=None, strict=False, allow_downcast=None,
# Do not pass the dtype to asarray because we want this to fail if
# strict is True and the types do not match.
rval
=
ScalarSharedVariable
(
type
=
tensor_type
,
value
=
numpy
.
array
(
value
,
copy
=
True
),
name
=
name
,
strict
=
strict
,
allow_downcast
=
allow_downcast
)
value
=
numpy
.
array
(
value
,
copy
=
True
),
name
=
name
,
strict
=
strict
,
allow_downcast
=
allow_downcast
)
return
rval
except
Exception
:
traceback
.
print_exc
()
...
...
theano/tensor/slinalg.py
浏览文件 @
bd11e130
import
logging
logger
=
logging
.
getLogger
(
__name__
)
import
numpy
import
warnings
from
six.moves
import
xrange
from
theano.gof
import
Op
,
Apply
from
theano.tensor
import
as_tensor_variable
,
dot
,
DimShuffle
,
Dot
from
theano.tensor.blas
import
Dot22
from
theano
import
tensor
import
theano.tensor
from
theano.tensor.opt
import
(
register_stabilize
,
register_specialize
,
register_canonicalize
)
from
theano.gof
import
local_optimizer
from
theano.gof.opt
import
Optimizer
from
theano.gradient
import
DisconnectedType
import
numpy
try
:
import
scipy.linalg
...
...
@@ -24,6 +11,13 @@ except ImportError:
# some ops (e.g. Cholesky, Solve, A_Xinv_b) won't work
imported_scipy
=
False
from
theano
import
tensor
import
theano.tensor
from
theano.tensor
import
as_tensor_variable
from
theano.gof
import
Op
,
Apply
logger
=
logging
.
getLogger
(
__name__
)
MATRIX_STRUCTURES
=
(
'general'
,
'symmetric'
,
...
...
@@ -123,7 +117,6 @@ class CholeskyGrad(Op):
F
[
k
,
k
]
/=
(
2
*
L
[
k
,
k
])
else
:
F
=
numpy
.
triu
(
dz
)
M
=
N
-
1
for
k
in
xrange
(
N
-
1
,
-
1
,
-
1
):
for
j
in
xrange
(
k
+
1
,
N
):
for
i
in
xrange
(
j
,
N
):
...
...
@@ -182,7 +175,7 @@ class Solve(Op):
else
:
rval
=
scipy
.
linalg
.
solve
(
A
,
b
)
output_storage
[
0
][
0
]
=
rval
# computes shape of x where x = inv(A) * b
def
infer_shape
(
self
,
node
,
shapes
):
Ashape
,
Bshape
=
shapes
...
...
theano/tensor/sort.py
浏览文件 @
bd11e130
...
...
@@ -28,7 +28,7 @@ class SortOp(theano.Op):
def
make_node
(
self
,
input
,
axis
=-
1
):
input
=
theano
.
tensor
.
as_tensor_variable
(
input
)
if
(
axis
is
None
or
(
isinstance
(
axis
,
theano
.
Constant
)
and
axis
.
data
is
None
)):
(
isinstance
(
axis
,
theano
.
Constant
)
and
axis
.
data
is
None
)):
axis
=
theano
.
Constant
(
theano
.
gof
.
generic
,
None
)
# axis=None flattens the array before sorting
out_type
=
tensor
(
dtype
=
input
.
dtype
,
broadcastable
=
[
False
])
...
...
@@ -45,7 +45,7 @@ class SortOp(theano.Op):
def
infer_shape
(
self
,
node
,
inputs_shapes
):
if
(
isinstance
(
node
.
inputs
[
1
],
theano
.
Constant
)
and
node
.
inputs
[
1
]
.
data
is
None
):
node
.
inputs
[
1
]
.
data
is
None
):
# That means axis = None,
# So the array is flattened before being sorted
return
[(
mul
(
*
inputs_shapes
[
0
]),)]
...
...
@@ -64,16 +64,17 @@ class SortOp(theano.Op):
" matrix (and axis is None or 0) and tensor3"
)
if
a
.
ndim
==
1
:
idx
=
argsort
(
*
inputs
,
kind
=
self
.
kind
,
order
=
self
.
order
)
#
rev_idx = numpy.where(idx[None, :]==numpy.arange(5)[:,None])[1]
#
rev_idx = numpy.where(idx[None, :]==numpy.arange(5)[:,None])[1]
rev_idx
=
theano
.
tensor
.
eq
(
idx
[
None
,
:],
arange
(
a
.
shape
[
0
])[:,
None
])
.
nonzero
()[
1
]
inp_grad
=
output_grads
[
0
][
rev_idx
]
elif
a
.
ndim
==
2
:
if
(
axis
is
None
or
(
isinstance
(
axis
,
theano
.
Constant
)
and
axis
.
data
is
None
)):
(
isinstance
(
axis
,
theano
.
Constant
)
and
axis
.
data
is
None
)):
idx
=
argsort
(
*
inputs
,
kind
=
self
.
kind
,
order
=
self
.
order
)
rev_idx
=
theano
.
tensor
.
eq
(
idx
[
None
,
:],
arange
(
a
.
shape
[
0
]
*
a
.
shape
[
1
])[:,
None
])
.
nonzero
()[
1
]
rev_idx
=
theano
.
tensor
.
eq
(
idx
[
None
,
:],
arange
(
a
.
shape
[
0
]
*
a
.
shape
[
1
])[:,
None
])
.
nonzero
()[
1
]
inp_grad
=
output_grads
[
0
][
rev_idx
]
.
reshape
(
a
.
shape
)
elif
(
axis
==
0
or
(
isinstance
(
axis
,
theano
.
Constant
)
and
axis
.
data
==
0
)):
...
...
@@ -85,7 +86,7 @@ class SortOp(theano.Op):
indices
=
self
.
__get_argsort_indices
(
a
,
axis
)
inp_grad
=
output_grads
[
0
][
indices
[
0
],
indices
[
1
],
indices
[
2
]]
elif
(
axis
is
None
or
(
isinstance
(
axis
,
theano
.
Constant
)
and
axis
.
data
is
None
)):
(
isinstance
(
axis
,
theano
.
Constant
)
and
axis
.
data
is
None
)):
rev_idx
=
self
.
__get_argsort_indices
(
a
,
axis
)
inp_grad
=
output_grads
[
0
][
rev_idx
]
.
reshape
(
a
.
shape
)
axis_grad
=
theano
.
gradient
.
grad_undefined
(
...
...
@@ -103,13 +104,13 @@ class SortOp(theano.Op):
list of lenght len(a.shape) otherwise
"""
# The goal is to get gradient wrt input from gradient
# The goal is to get gradient wrt input from gradient
# wrt sort(input, axis)
idx
=
argsort
(
a
,
axis
,
kind
=
self
.
kind
,
order
=
self
.
order
)
# rev_idx is the reverse of previous argsort operation
rev_idx
=
argsort
(
idx
,
axis
,
kind
=
self
.
kind
,
order
=
self
.
order
)
# rev_idx is the reverse of previous argsort operation
rev_idx
=
argsort
(
idx
,
axis
,
kind
=
self
.
kind
,
order
=
self
.
order
)
if
(
axis
is
None
or
(
isinstance
(
axis
,
theano
.
Constant
)
and
axis
.
data
is
None
)):
(
isinstance
(
axis
,
theano
.
Constant
)
and
axis
.
data
is
None
)):
return
rev_idx
indices
=
[]
if
axis
.
data
>=
0
:
...
...
@@ -120,7 +121,7 @@ class SortOp(theano.Op):
if
i
==
axis_data
:
indices
.
append
(
rev_idx
)
else
:
index_shape
=
[
1
]
*
a
.
ndim
index_shape
=
[
1
]
*
a
.
ndim
index_shape
[
i
]
=
a
.
shape
[
i
]
# it's a way to emulate numpy.ogrid[0: a.shape[0], 0: a.shape[1], 0: a.shape[2]]
indices
.
append
(
theano
.
tensor
.
arange
(
a
.
shape
[
i
])
.
reshape
(
index_shape
))
...
...
@@ -178,28 +179,27 @@ class ArgSortOp(theano.Op):
return
hash
(
type
(
self
))
^
hash
(
self
.
order
)
^
hash
(
self
.
kind
)
def
__str__
(
self
):
return
(
self
.
__class__
.
__name__
+
"{
%
s,
%
s}"
%
(
self
.
kind
,
str
(
self
.
order
)))
return
(
self
.
__class__
.
__name__
+
"{
%
s,
%
s}"
%
(
self
.
kind
,
str
(
self
.
order
)))
def
make_node
(
self
,
input
,
axis
=-
1
):
input
=
theano
.
tensor
.
as_tensor_variable
(
input
)
if
(
axis
is
None
or
(
isinstance
(
axis
,
theano
.
Constant
)
and
axis
.
data
is
None
)):
(
isinstance
(
axis
,
theano
.
Constant
)
and
axis
.
data
is
None
)):
axis
=
theano
.
Constant
(
theano
.
gof
.
generic
,
None
)
bcast
=
[
False
]
else
:
axis
=
theano
.
tensor
.
as_tensor_variable
(
axis
)
bcast
=
input
.
type
.
broadcastable
return
theano
.
Apply
(
self
,
[
input
,
axis
],
[
theano
.
tensor
.
TensorType
(
dtype
=
"int64"
,
broadcastable
=
bcast
)()])
return
theano
.
Apply
(
self
,
[
input
,
axis
],
[
theano
.
tensor
.
TensorType
(
dtype
=
"int64"
,
broadcastable
=
bcast
)()])
def
perform
(
self
,
node
,
inputs
,
output_storage
):
a
=
inputs
[
0
]
axis
=
inputs
[
1
]
z
=
output_storage
[
0
]
z
[
0
]
=
theano
.
_asarray
(
np
.
argsort
(
a
,
axis
,
self
.
kind
,
self
.
order
),
dtype
=
node
.
outputs
[
0
]
.
dtype
)
z
[
0
]
=
theano
.
_asarray
(
np
.
argsort
(
a
,
axis
,
self
.
kind
,
self
.
order
),
dtype
=
node
.
outputs
[
0
]
.
dtype
)
def
infer_shape
(
self
,
node
,
inputs_shapes
):
if
(
isinstance
(
node
.
inputs
[
1
],
theano
.
Constant
)
and
...
...
theano/tensor/subtensor.py
浏览文件 @
bd11e130
from
copy
import
copy
import
os
import
sys
from
textwrap
import
dedent
import
warnings
import
logging
_logger
=
logging
.
getLogger
(
"theano.tensor.subtensor"
)
import
numpy
from
six.moves
import
xrange
...
...
@@ -32,6 +30,7 @@ if config.cxx:
except
ImportError
:
pass
_logger
=
logging
.
getLogger
(
"theano.tensor.subtensor"
)
# Do a lazy import of the sparse module
sparse_module_ref
=
None
...
...
@@ -336,9 +335,9 @@ class Subtensor(Op):
theano
.
tensor
.
wscalar
,
theano
.
tensor
.
bscalar
]
invalid_tensor_types
=
[
theano
.
tensor
.
fscalar
,
theano
.
tensor
.
dscalar
,
theano
.
tensor
.
cscalar
,
theano
.
tensor
.
zscalar
]
if
(
isinstance
(
entry
,
gof
.
Variable
)
and
(
entry
.
type
in
invalid_scal_types
or
entry
.
type
in
invalid_tensor_types
)):
if
(
isinstance
(
entry
,
gof
.
Variable
)
and
(
entry
.
type
in
invalid_scal_types
or
entry
.
type
in
invalid_tensor_types
)):
raise
TypeError
(
"Expected an integer"
)
if
isinstance
(
entry
,
gof
.
Variable
)
and
entry
.
type
in
scal_types
:
...
...
@@ -346,13 +345,13 @@ class Subtensor(Op):
elif
isinstance
(
entry
,
gof
.
Type
)
and
entry
in
scal_types
:
return
entry
if
(
isinstance
(
entry
,
gof
.
Variable
)
and
entry
.
type
in
tensor_types
and
numpy
.
all
(
entry
.
type
.
broadcastable
)):
if
(
isinstance
(
entry
,
gof
.
Variable
)
and
entry
.
type
in
tensor_types
and
numpy
.
all
(
entry
.
type
.
broadcastable
)):
return
scal
.
get_scalar_type
(
entry
.
type
.
dtype
)
elif
(
isinstance
(
entry
,
gof
.
Type
)
and
entry
in
tensor_types
and
numpy
.
all
(
entry
.
broadcastable
)):
elif
(
isinstance
(
entry
,
gof
.
Type
)
and
entry
in
tensor_types
and
numpy
.
all
(
entry
.
broadcastable
)):
return
scal
.
get_scalar_type
(
entry
.
dtype
)
elif
slice_ok
and
isinstance
(
entry
,
slice
):
a
=
entry
.
start
...
...
@@ -425,8 +424,9 @@ class Subtensor(Op):
conv
(
val
.
step
))
else
:
try
:
return
get_scalar_constant_value
(
val
,
only_process_constants
=
only_process_constants
)
return
get_scalar_constant_value
(
val
,
only_process_constants
=
only_process_constants
)
except
theano
.
tensor
.
NotScalarConstantError
:
if
allow_partial
:
return
val
...
...
@@ -477,8 +477,8 @@ class Subtensor(Op):
%
(
input
.
type
,
expected_type
))
# infer the broadcasting pattern
padded
=
(
self
.
get_constant_idx
((
None
,)
+
inputs
,
allow_partial
=
True
)
+
[
slice
(
None
,
None
,
None
)]
*
(
x
.
type
.
ndim
-
len
(
idx_list
)))
padded
=
(
self
.
get_constant_idx
((
None
,)
+
inputs
,
allow_partial
=
True
)
+
[
slice
(
None
,
None
,
None
)]
*
(
x
.
type
.
ndim
-
len
(
idx_list
)))
broadcastable
=
[]
for
i
,
(
p
,
bc
)
in
enumerate
(
izip
(
padded
,
x
.
type
.
broadcastable
)):
if
isinstance
(
p
,
slice
):
...
...
@@ -528,9 +528,9 @@ class Subtensor(Op):
if
isinstance
(
idx
,
slice
):
# If it is the default (None, None, None) slice, or a variant,
# the shape will be xl
if
((
idx
.
start
in
[
None
,
0
])
and
(
idx
.
stop
in
[
None
,
sys
.
maxsize
])
and
(
idx
.
step
is
None
or
idx
.
step
==
1
)):
if
((
idx
.
start
in
[
None
,
0
])
and
(
idx
.
stop
in
[
None
,
sys
.
maxsize
])
and
(
idx
.
step
is
None
or
idx
.
step
==
1
)):
outshp
.
append
(
xl
)
else
:
cnf
=
get_canonical_form_slice
(
idx
,
xl
)[
0
]
...
...
@@ -556,8 +556,7 @@ class Subtensor(Op):
first
=
x
.
zeros_like
()
.
astype
(
theano
.
config
.
floatX
)
else
:
first
=
IncSubtensor
(
self
.
idx_list
)(
x
.
zeros_like
(),
gz
,
*
rest
)
return
([
first
]
+
[
DisconnectedType
()()]
*
len
(
rest
))
return
([
first
]
+
[
DisconnectedType
()()]
*
len
(
rest
))
def
connection_pattern
(
self
,
node
):
...
...
@@ -1034,8 +1033,7 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
dim_offset
=
x
.
ndim
-
y
.
ndim
for
dim
in
xrange
(
y
.
ndim
):
if
(
x
.
broadcastable
[
dim
+
dim_offset
]
and
not
y
.
broadcastable
[
dim
]):
if
(
x
.
broadcastable
[
dim
+
dim_offset
]
and
not
y
.
broadcastable
[
dim
]):
# It is acceptable to try to increment a subtensor with a
# broadcastable dim with a tensor that is not broadcastable
# on that dimension. However, its length must then be 1.
...
...
@@ -2133,9 +2131,9 @@ class AdvancedIncSubtensor(Op):
return
hash
((
type
(
self
),
self
.
inplace
,
self
.
set_instead_of_inc
))
def
__eq__
(
self
,
other
):
return
(
type
(
self
)
==
type
(
other
)
and
self
.
inplace
==
other
.
inplace
and
self
.
set_instead_of_inc
==
other
.
set_instead_of_inc
)
return
(
type
(
self
)
==
type
(
other
)
and
self
.
inplace
==
other
.
inplace
and
self
.
set_instead_of_inc
==
other
.
set_instead_of_inc
)
def
__str__
(
self
):
return
"
%
s{
%
s,
%
s}"
%
(
self
.
__class__
.
__name__
,
...
...
theano/tensor/utils.py
浏览文件 @
bd11e130
...
...
@@ -79,11 +79,11 @@ def shape_of_variables(fgraph, input_shapes):
if
not
hasattr
(
fgraph
,
'shape_feature'
):
fgraph
.
attach_feature
(
theano
.
tensor
.
opt
.
ShapeFeature
())
input_dims
=
[
dimension
for
inp
in
fgraph
.
inputs
for
dimension
in
fgraph
.
shape_feature
.
shape_of
[
inp
]]
input_dims
=
[
dimension
for
inp
in
fgraph
.
inputs
for
dimension
in
fgraph
.
shape_feature
.
shape_of
[
inp
]]
output_dims
=
[
dimension
for
shape
in
fgraph
.
shape_feature
.
shape_of
.
values
()
for
dimension
in
shape
]
for
dimension
in
shape
]
compute_shapes
=
theano
.
function
(
input_dims
,
output_dims
)
...
...
@@ -93,8 +93,8 @@ def shape_of_variables(fgraph, input_shapes):
" interface changed. Now by default, it clones the graph it receives."
" To have the old behavior, give it this new parameter `clone=False`."
)
numeric_input_dims
=
[
dim
for
inp
in
fgraph
.
inputs
for
dim
in
input_shapes
[
inp
]]
numeric_input_dims
=
[
dim
for
inp
in
fgraph
.
inputs
for
dim
in
input_shapes
[
inp
]]
numeric_output_dims
=
compute_shapes
(
*
numeric_input_dims
)
sym_to_num_dict
=
dict
(
izip
(
output_dims
,
numeric_output_dims
))
...
...
theano/tensor/var.py
浏览文件 @
bd11e130
import
copy
import
pdb
import
sys
import
traceback
as
tb
import
warnings
...
...
@@ -41,9 +39,9 @@ class _tensor_py_operators:
# CASTS
# REMOVED THESE BECAUSE PYTHON appears to require __int__ to return
# an int. -JB 20081112
#def __int__(self): return convert_to_int32(self)
#def __float__(self): return convert_to_float64(self)
#def __complex__(self): return convert_to_complex128(self)
#
def __int__(self): return convert_to_int32(self)
#
def __float__(self): return convert_to_float64(self)
#
def __complex__(self): return convert_to_complex128(self)
# COMPARISONS
_is_nonzero
=
True
...
...
@@ -68,7 +66,6 @@ class _tensor_py_operators:
rval
.
_is_nonzero
=
False
return
rval
def
__nonzero__
(
self
):
# Python 2.x
return
self
.
__bool__
()
...
...
@@ -215,7 +212,7 @@ class _tensor_py_operators:
# DO NOT USE THESE BECAUSE INPLACE OPS SHOULD BE INSERTED
# BY OPTIMIZATIONS ONLY
#
#
ARITHMETIC - INPLACE
# ARITHMETIC - INPLACE
# def __iadd__(self, other):
# return _add_inplace(self, other)
# def __isub__(self, other):
...
...
@@ -642,7 +639,8 @@ class TensorVariable(_tensor_py_operators, Variable):
elif
config
.
warn_float64
==
"raise"
:
raise
Exception
(
msg
)
elif
config
.
warn_float64
==
'pdb'
:
import
pdb
;
pdb
.
set_trace
()
import
pdb
pdb
.
set_trace
()
TensorType
.
Variable
=
TensorVariable
...
...
@@ -744,8 +742,8 @@ class TensorConstant(_tensor_py_operators, Constant):
def
__init__
(
self
,
type
,
data
,
name
=
None
):
Constant
.
__init__
(
self
,
type
,
data
,
name
)
if
(
isinstance
(
data
,
numpy
.
ndarray
)
and
data
.
ndim
>
0
and
len
(
numpy
.
unique
(
data
))
==
1
):
data
.
ndim
>
0
and
len
(
numpy
.
unique
(
data
))
==
1
):
self
.
tag
.
unique_value
=
numpy
.
unique
(
data
)[
0
]
else
:
self
.
tag
.
unique_value
=
None
...
...
theano/tensor/xlogx.py
浏览文件 @
bd11e130
...
...
@@ -13,12 +13,15 @@ class XlogX(scalar.UnaryScalarOp):
if
x
==
0.0
:
return
0.0
return
x
*
numpy
.
log
(
x
)
def
impl
(
self
,
x
):
return
XlogX
.
st_impl
(
x
)
def
grad
(
self
,
inputs
,
grads
):
x
,
=
inputs
gz
,
=
grads
return
[
gz
*
(
1
+
scalar
.
log
(
x
))]
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
x
,
=
inputs
z
,
=
outputs
...
...
@@ -28,7 +31,8 @@ class XlogX(scalar.UnaryScalarOp):
? 0.0
:
%(x)
s * log(
%(x)
s);"""
%
locals
()
raise
NotImplementedError
(
'only floatingpoint is implemented'
)
scalar_xlogx
=
XlogX
(
scalar
.
upgrade_to_float
,
name
=
'scalar_xlogx'
)
scalar_xlogx
=
XlogX
(
scalar
.
upgrade_to_float
,
name
=
'scalar_xlogx'
)
xlogx
=
Elemwise
(
scalar_xlogx
,
name
=
'xlogx'
)
...
...
@@ -41,12 +45,15 @@ class XlogY0(scalar.BinaryScalarOp):
if
x
==
0.0
:
return
0.0
return
x
*
numpy
.
log
(
y
)
def
impl
(
self
,
x
,
y
):
return
XlogY0
.
st_impl
(
x
,
y
)
def
grad
(
self
,
inputs
,
grads
):
x
,
y
=
inputs
gz
,
=
grads
return
[
gz
*
scalar
.
log
(
y
),
gz
*
x
/
y
]
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
x
,
y
=
inputs
z
,
=
outputs
...
...
@@ -56,5 +63,6 @@ class XlogY0(scalar.BinaryScalarOp):
? 0.0
:
%(x)
s * log(
%(y)
s);"""
%
locals
()
raise
NotImplementedError
(
'only floatingpoint is implemented'
)
scalar_xlogy0
=
XlogY0
(
scalar
.
upgrade_to_float
,
name
=
'scalar_xlogy0'
)
scalar_xlogy0
=
XlogY0
(
scalar
.
upgrade_to_float
,
name
=
'scalar_xlogy0'
)
xlogy0
=
Elemwise
(
scalar_xlogy0
,
name
=
'xlogy0'
)
theano/tests/test_flake8.py
浏览文件 @
bd11e130
...
...
@@ -57,30 +57,17 @@ whitelist_flake8 = [
"typed_list/tests/test_type.py"
,
"typed_list/tests/test_opt.py"
,
"typed_list/tests/test_basic.py"
,
"tensor/var.py"
,
"tensor/sharedvar.py"
,
"tensor/inplace.py"
,
"tensor/slinalg.py"
,
"tensor/shared_randomstreams.py"
,
"tensor/subtensor.py"
,
"tensor/elemwise.py"
,
"tensor/xlogx.py"
,
"tensor/blas_headers.py"
,
"tensor/utils.py"
,
"tensor/type.py"
,
"tensor/fourier.py"
,
"tensor/sort.py"
,
"tensor/__init__.py"
,
"tensor/opt_uncanonicalize.py"
,
"tensor/opt.py"
,
"tensor/blas.py"
,
"tensor/extra_ops.py"
,
"tensor/nlinalg.py"
,
"tensor/blas_c.py"
,
"tensor/elemwise_cgen.py"
,
"tensor/raw_random.py"
,
"tensor/blas_scipy.py"
,
"tensor/basic.py"
,
"tensor/tests/test_subtensor.py"
,
"tensor/tests/test_utils.py"
,
"tensor/tests/test_nlinalg.py"
,
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论