Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
cfc588cc
提交
cfc588cc
authored
11月 09, 2010
作者:
Razvan Pascanu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Removed trailing spaces
上级
ccb73dab
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
164 行增加
和
166 行删除
+164
-166
basic.py
theano/tensor/basic.py
+90
-90
opt.py
theano/tensor/opt.py
+74
-76
没有找到文件。
theano/tensor/basic.py
浏览文件 @
cfc588cc
...
...
@@ -57,7 +57,7 @@ __oplist_constructor_list = []
"""List of functions to be listed as op constructors in the oplist (`gen_oplist`, doc/oplist.txt)."""
def
constructor
(
f
):
"""Add `f` to :doc:`oplist`.
Make `f` appear as a constructor in the oplist (`gen_oplist`, doc/oplist.txt).
"""
__oplist_constructor_list
.
append
(
f
)
...
...
@@ -80,7 +80,7 @@ if 0:
if
hasattr
(
x
,
'_as_CudaNdarrayVariable'
):
return
x
.
_as_CudaNdarrayVariable
()
#TODO: pass name and ndim arguments
return
as_tensor_variable
(
x
,
name
,
ndim
)
def
as_tensor_variable
(
x
,
name
=
None
,
ndim
=
None
):
"""Return `x`, transformed into a `TensorType`
...
...
@@ -158,7 +158,7 @@ class NumpyAutocaster(object):
When config.floatX is float32 (at the time of calling), then this function downcasts float
and numpy.float arguments to numpy.float32, if float32 is in the self.dtypes list.
Python ints are always 64bit and floats are always double precision.
This class uses the algorithm in __call__ to use a narrower dtype when no precision would
be lost, and to even lose precision when this is demanded by the list of dtypes (e.g. to
...
...
@@ -182,7 +182,7 @@ class NumpyAutocaster(object):
# recall: float is numpy.float
if
isinstance
(
x
,
float
)
and
config
.
floatX
in
self
.
dtypes
and
config
.
floatX
==
'float32'
:
return
theano
.
_asarray
(
x
,
dtype
=
'float32'
)
for
dtype
in
self
.
dtypes
:
x_
=
theano
.
_asarray
(
x
,
dtype
=
dtype
)
if
numpy
.
all
(
x
==
x_
):
...
...
@@ -200,7 +200,7 @@ autocast_float = NumpyAutocaster(('float32', 'float64'))
# this autocasting, and in future, our ops might be smarter about factoring out upcasts. The
# advantage of this mechanism is to combine it with floatX so that 1.0 + xmatrix() will always
# have the same type as the xmatrix().
#
#
class
autocast_float_as
(
object
):
"""This class makes it possible to temporarily and locally adjust autocasting behaviour.
...
...
@@ -222,7 +222,7 @@ class autocast_float_as(object):
def
constant_or_value
(
x
,
rtype
,
name
=
None
,
ndim
=
None
,
dtype
=
None
):
"""Return a symbolic `Constant` with value `x`
:Exceptions:
- `TypeError`: `x` could not be converted to a numpy.ndarray
- `ValueError`: `x` could not be expanded to have ndim dimensions
...
...
@@ -295,19 +295,19 @@ if int(config.tensor.cmp_sloppy)>1:
# useful to test the GPU as they don't use extended precision and
# this cause some difference bigger then the normal sloppy.
float32_atol
=
5e-4
float32_rtol
=
1e-3
float32_rtol
=
1e-3
float64_rtol
=
1e-4
float64_atol
=
1e-3
elif
int
(
config
.
tensor
.
cmp_sloppy
):
float32_atol
=
1e-4
float32_rtol
=
1e-3
float32_rtol
=
1e-3
float64_rtol
=
1e-4
float64_atol
=
1e-3
else
:
#If you change those value in test don't forget to put them back when the test end.
#Don't forget the case when the test fail.
float32_atol
=
1e-5
float32_rtol
=
1e-3
float32_rtol
=
1e-3
# defaults in numpy.allclose
float64_rtol
=
1.0000000000000001e-05
...
...
@@ -395,7 +395,7 @@ class TensorType(Type):
if
self
.
dtype
==
'floatX'
:
self
.
dtype
=
config
.
floatX
### broadcastable is immutable, and all elements are either True or False
self
.
broadcastable
=
tuple
(
bool
(
b
)
for
b
in
broadcastable
)
self
.
broadcastable
=
tuple
(
bool
(
b
)
for
b
in
broadcastable
)
self
.
dtype_specs
()
# error checking is done there
self
.
name
=
name
self
.
numpy_dtype
=
numpy
.
dtype
(
self
.
dtype
)
...
...
@@ -438,12 +438,12 @@ class TensorType(Type):
except
Exception
,
e
:
return
str
(
e
)
return
"value is valid"
def
dtype_specs
(
self
):
"""Return a tuple (python type, c type, numpy typenum) that corresponds to
self.dtype.
This function is used internally as part of C code generation.
"""
#TODO: add more type correspondances for e.g. int32, int64, float32,
...
...
@@ -483,7 +483,7 @@ class TensorType(Type):
a_eq_b
=
(
a
==
b
)
r
=
numpy
.
all
(
a_eq_b
)
if
r
:
return
True
# maybe the trouble is that there are NaNs
# maybe the trouble is that there are NaNs
a_missing
=
numpy
.
isnan
(
a
)
if
a_missing
.
any
():
b_missing
=
numpy
.
isnan
(
b
)
...
...
@@ -546,7 +546,7 @@ class TensorType(Type):
#set it to False
cmp_elemwise
=
numpy
.
where
(
both_inf
&
cmp_elemwise
,
a
==
b
,
cmp_elemwise
)
#check the sign of the inf
both_inf
=
numpy
.
where
(
both_inf
,
a
==
b
,
both_inf
)
...
...
@@ -554,7 +554,7 @@ class TensorType(Type):
both_inf
+=
a_inf
if
allow_remove_nan
:
both_missing
+=
a_missing
# Combine all information.
return
(
cmp_elemwise
+
both_missing
+
both_inf
)
.
all
()
...
...
@@ -885,7 +885,7 @@ class _tensor_py_operators:
def
__abs__
(
self
):
return
abs_
(
self
)
def
__neg__
(
self
):
return
neg
(
self
)
#CASTS
#CASTS
#### REMOVED THESE BECAUSE PYTHON appears to require __int__ to return an int. -JB 20081112
#def __int__(self): return convert_to_int32(self)
#def __float__(self): return convert_to_float64(self)
...
...
@@ -898,7 +898,7 @@ class _tensor_py_operators:
def
__ge__
(
self
,
other
):
return
ge
(
self
,
other
)
#BITWISE
def
__invert__
(
self
):
return
invert
(
self
)
def
__invert__
(
self
):
return
invert
(
self
)
def
__and__
(
self
,
other
):
return
and_
(
self
,
other
)
def
__or__
(
self
,
other
):
return
or_
(
self
,
other
)
def
__xor__
(
self
,
other
):
return
xor
(
self
,
other
)
...
...
@@ -910,27 +910,27 @@ class _tensor_py_operators:
# def __ixor__(self, other): return _xor_inplace(self, other)
#ARITHMETIC - NORMAL
def
__add__
(
self
,
other
):
def
__add__
(
self
,
other
):
try
:
return
add
(
self
,
other
)
except
Exception
,
e
:
return
NotImplemented
def
__sub__
(
self
,
other
):
def
__sub__
(
self
,
other
):
try
:
return
sub
(
self
,
other
)
except
Exception
,
e
:
return
NotImplemented
def
__mul__
(
self
,
other
):
try
:
def
__mul__
(
self
,
other
):
try
:
return
mul
(
self
,
other
)
except
Exception
,
e
:
return
NotImplemented
def
__div__
(
self
,
other
):
try
:
def
__div__
(
self
,
other
):
try
:
return
div_proxy
(
self
,
other
)
except
Exception
,
e
:
return
NotImplemented
def
__pow__
(
self
,
other
):
def
__pow__
(
self
,
other
):
try
:
return
pow
(
self
,
other
)
except
Exception
,
e
:
...
...
@@ -1031,12 +1031,12 @@ class _tensor_py_operators:
def
__getslice__
(
self
,
*
args
):
args
=
slice
(
*
args
),
return
self
.
__getitem__
(
args
)
#COPYING
def
copy
(
self
):
return
tensor_copy
(
self
)
def
__iter__
(
self
):
def
__iter__
(
self
):
try
:
for
i
in
xrange
(
get_vector_length
(
self
)):
yield
self
[
i
]
...
...
@@ -1044,7 +1044,7 @@ class _tensor_py_operators:
# This prevents accidental iteration via builtin.sum(self)
raise
TypeError
(
'TensorType does not support iteration. '
'Maybe you are using builtin.sum instead of theano.tensor.sum? (Maybe .max?)'
)
# CONVENIENT ACCESS TO TYPE PROPERTIES
ndim
=
property
(
lambda
self
:
self
.
type
.
ndim
)
...
...
@@ -1053,7 +1053,7 @@ class _tensor_py_operators:
"""The broadcastable signature of this tensor.
See :doc:`broadcasting` for details.
"""
dtype
=
property
(
lambda
self
:
self
.
type
.
dtype
)
""" The dtype of this tensor. """
...
...
@@ -1095,7 +1095,7 @@ class _tensor_py_operators:
def
get_constant_value
(
self
):
return
get_constant_value
(
self
)
class
TensorVariable
(
Variable
,
_tensor_py_operators
):
"""Subclass to add the tensor operators to the basic `Variable` class."""
TensorType
.
Variable
=
TensorVariable
...
...
@@ -1115,7 +1115,7 @@ class TensorConstantSignature(tuple):
#N.B. compare shape to ensure no broadcasting in ==
#N.B. compare elementwise last because it is the most expensive check
return
(
t0
==
t1
)
and
(
d0
.
shape
==
d1
.
shape
)
\
and
(
self
.
sum
==
other
.
sum
)
and
(
numpy
.
all
(
d0
==
d1
))
and
(
self
.
sum
==
other
.
sum
)
and
(
numpy
.
all
(
d0
==
d1
))
def
__hash__
(
self
):
t
,
d
=
self
return
hashtype
(
self
)
^
hash
(
t
)
^
hash
(
d
.
shape
)
^
hash
(
self
.
sum
)
...
...
@@ -1130,7 +1130,7 @@ class TensorConstantSignature(tuple):
class
TensorConstant
(
Constant
,
_tensor_py_operators
):
"""Subclass to add the tensor operators to the basic `Constant` class.
To create a TensorConstant, use the `constant` function in this module.
"""
def
signature
(
self
):
...
...
@@ -1139,7 +1139,7 @@ TensorType.Constant = TensorConstant
class
TensorValue
(
Value
,
_tensor_py_operators
):
"""Subclass to add the tensor operators to the basic `Value` class.
To create a TensorValue, use the `value` function in this module.
:note: Value is deprecated by SharedVariable
...
...
@@ -1167,8 +1167,8 @@ def _elemwise(scalar_op, name, doc_prefix=''):
inplace
=
elemwise
.
Elemwise
(
inplace_scalar_op
,
{
0
:
0
},
name
=
name
+
"_inplace"
)
# don't add the inplace versions, they aren't supposed to be part of the user interface
_constructor_list
.
append
(
straight
)
_constructor_list
.
append
(
straight
)
# This is here so that gen_oplist can detect which module declared these variables.
straight
.
__module__
=
'tensor'
...
...
@@ -1181,7 +1181,7 @@ def _elemwise(scalar_op, name, doc_prefix=''):
def
_redefine
(
real_symbol_value
,
module
=
'tensor'
):
"""Replace the value associated with a function symbol.
This is useful to trick epydoc into doing what we want. It's a hack.
"""
real_symbol_value
.
__module__
=
'tensor.basic'
...
...
@@ -1275,7 +1275,7 @@ def _conversion(real_value, name):
_convert_to_int8
=
_conversion
(
elemwise
.
Elemwise
(
scal
.
convert_to_int8
),
'int8'
)
"""Cast to 8-bit integer"""
_convert_to_int16
=
_conversion
(
elemwise
.
Elemwise
(
scal
.
convert_to_int16
),
'int16'
)
"""Cast to 16-bit integer"""
...
...
@@ -1287,7 +1287,7 @@ _convert_to_int64 = _conversion(elemwise.Elemwise(scal.convert_to_int64), 'int64
_convert_to_uint8
=
_conversion
(
elemwise
.
Elemwise
(
scal
.
convert_to_uint8
),
'uint8'
)
"""Cast to unsigned 8-bit integer"""
_convert_to_uint16
=
_conversion
(
elemwise
.
Elemwise
(
scal
.
convert_to_uint16
),
'uint16'
)
"""Cast to unsigned 16-bit integer"""
...
...
@@ -1324,9 +1324,9 @@ _cast_mapping = {
'complex128'
:
_convert_to_complex128
}
@constructor
def
cast
(
x
,
dtype
):
"""Symbolically cast `x` to a Tensor of type `dtype`."""
"""Symbolically cast `x` to a Tensor of type `dtype`."""
if
dtype
==
'floatX'
:
dtype
=
config
.
floatX
_x
=
as_tensor_variable
(
x
)
if
_x
.
type
.
dtype
==
dtype
:
return
_x
...
...
@@ -1382,7 +1382,7 @@ pprint.assign(_shape, printing.MemberPrinter('shape'))
class
MaxAndArgmax
(
Op
):
"""Calculate the max and argmax over a given axis.
.. note::
If axis is None it means to calculate the max over the last dimension which is
...
...
@@ -1393,7 +1393,7 @@ class MaxAndArgmax(Op):
nin
=
2
# tensor, axis
nout
=
2
# max val, max idx
E_axis
=
'invalid axis'
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
...
...
@@ -1422,7 +1422,7 @@ class MaxAndArgmax(Op):
inputs
=
[
x
,
axis
]
#TODO: figure things out if axis is a constant
broadcastable
=
[
False
]
*
(
x
.
type
.
ndim
-
1
)
outputs
=
[
tensor
(
x
.
type
.
dtype
,
broadcastable
,
name
=
'max'
),
outputs
=
[
tensor
(
x
.
type
.
dtype
,
broadcastable
,
name
=
'max'
),
tensor
(
'int32'
,
broadcastable
,
name
=
'argmax'
)]
return
Apply
(
self
,
inputs
,
outputs
)
def
perform
(
self
,
node
,
(
x
,
axis
),
(
max
,
max_idx
)):
...
...
@@ -1445,7 +1445,7 @@ class MaxAndArgmax(Op):
# gMax * dMax/dx + gArgMax * dArgMax/dx, gMax * dMax/daxis + gArgMax * dArgMax/daxis
# g_max has one less dimension than x, so you need to complete g_max to x's shape
# when axis=0 the broadcasting mechanism does it automatically
if
not
(
axis
.
data
==
0
or
axis
.
data
==
x
.
ndim
-
1
):
raise
NotImplementedError
(
'MaxAndArgmax gradient with axis corresponding to internal dimension'
)
if
axis
.
data
==
0
:
...
...
@@ -1874,7 +1874,7 @@ if 0:
class
Alloc
(
gof
.
Op
):
"""Create a Tensor from an initial value and a desired shape
alloc(value, shape0, shape1, ..., shapeN)
alloc(value, shape0, shape1, ..., shapeN)
Returns an N-dimensional tensor initialized by `value` using something equivalent to
>>> z = numpy.zeros(shape, value.dtype)
...
...
@@ -1883,7 +1883,7 @@ class Alloc(gof.Op):
The result has N dimensions, has the dtype of `value` and is obtained by broadcasting value
over the output ndarray.
This Op is used to replace fill() during optimizations because after shapes are lifted,
This Op is used to replace fill() during optimizations because after shapes are lifted,
the first argument to fill can often be pruned from the graph.
"""
def
__init__
(
self
):
...
...
@@ -1943,7 +1943,7 @@ class Alloc(gof.Op):
pass
return
ret
alloc
=
Alloc
()
pprint
.
assign
(
alloc
,
printing
.
FunctionPrinter
(
'alloc'
))
...
...
@@ -2006,8 +2006,8 @@ def mean(input, axis = None, op = False):
:param axis: compute the mean along this axis of the tensor.
None means all axes (like numpy).
:type axis: None or int or (list of int) (see `Sum`)
:note: for gpu, if you manually cast the input to float32 before calling
:note: for gpu, if you manually cast the input to float32 before calling
mean, everything will be done on the gpu.
"""
if
op
:
...
...
@@ -2117,7 +2117,7 @@ class Default(gof.Op):
if
x
is
None
:
# why copy? Theano can't yet understand out[0] being a view of either x or y,
# so we can be a view of x, but only a copy of y.
out
[
0
]
=
default
.
copy
()
out
[
0
]
=
default
.
copy
()
else
:
out
[
0
]
=
x
default
=
Default
()
...
...
@@ -2221,7 +2221,7 @@ class Subtensor(Op):
integers are indexes into the inputs array, and the start/stop/step members
of each slice are also integer indexes into the inputs array (or None). The
inputs array is the tensor x, followed by scalar integer variables.
@todo: add support for advanced tensor indexing (in Subtensor_dx too).
The idx_list is a tuple similar in structure to the sort of key you might expect in numpy's
...
...
@@ -2246,7 +2246,7 @@ class Subtensor(Op):
elif
isinstance
(
entry
,
slice
):
helper
(
entry
.
start
)
helper
(
entry
.
stop
)
helper
(
entry
.
step
)
helper
(
entry
.
step
)
for
idx
in
idxs
:
helper
(
idx
)
return
ret
...
...
@@ -2312,7 +2312,7 @@ class Subtensor(Op):
def
make_node
(
self
,
x
,
*
inputs
):
x
=
as_tensor_variable
(
x
)
inputs
=
tuple
(
self
.
my_as_scalar
(
a
)
for
a
in
inputs
)
idx_list
=
list
(
self
.
idx_list
)
if
len
(
idx_list
)
>
x
.
type
.
ndim
:
raise
ValueError
(
Subtensor
.
e_invalid
,
...
...
@@ -2412,7 +2412,7 @@ class Subtensor(Op):
msg
+=
[(
entry
.
start
,
entry
.
stop
,
entry
.
step
)]
else
:
msg
+=
[
entry
]
idx_list
=
tuple
(
msg
)
#backport
#idx_list = tuple((entry.start, entry.stop, entry.step)
...
...
@@ -2472,7 +2472,7 @@ class SubtensorPrinter:
msg3
=
""
else
:
msg3
=
":
%
s"
%
entry
.
step
sidxs
.
append
(
"
%
s:
%
s
%
s"
%
(
msg1
,
msg2
,
msg3
))
#backport
#sidxs.append("%s:%s%s" % ("" if entry.start is None or entry.start == 0 else entry.start,
...
...
@@ -2531,10 +2531,10 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False):
class
IncSubtensor
(
Op
):
"""Increment a subtensor.
This is like numpy's
This is like numpy's
x[i,j,k] += y
It is used internally to implement the gradient on SubTensor.
:param set_instead_of_inc: if True set the subtensor to the value instead
...
...
@@ -2592,7 +2592,7 @@ class IncSubtensor(Op):
def
make_node
(
self
,
x
,
y
,
*
inputs
):
x
,
y
=
map
(
as_tensor_variable
,
[
x
,
y
])
inputs
=
tuple
(
map
(
Subtensor
.
my_as_scalar
,
inputs
))
idx_list
=
list
(
self
.
idx_list
)
if
len
(
idx_list
)
>
x
.
type
.
ndim
:
raise
ValueError
(
Subtensor
.
e_invalid
,
...
...
@@ -2671,11 +2671,11 @@ class Split(Op):
"""Partition a `TensorVariable` along some axis.
.. python::
x = vector()
splits = lvector()
# you have to declare right away how many split_points there will be.
ra, rb, rc = split(x, splits, n_splits = 3, axis = 0)
ra, rb, rc = split(x, splits, n_splits = 3, axis = 0)
f = function([x, splits], [ra, rb, rc])
...
...
@@ -2709,16 +2709,16 @@ class Split(Op):
node
=
self
.
make_node
(
*
inputs
,
**
kwargs
)
node
.
tag
.
trace
=
traceback
.
extract_stack
()[:
-
1
]
return
node
.
outputs
def
make_node
(
self
,
x
,
axis
,
splits
):
"""WRITEME"""
x
=
as_tensor_variable
(
x
)
axis
=
as_tensor_variable
(
axis
)
splits
=
as_tensor_variable
(
splits
)
if
splits
.
type
not
in
int_vector_types
:
if
splits
.
type
not
in
int_vector_types
:
raise
TypeError
(
'splits must have type tensor.lvector'
,
splits
.
type
)
if
axis
.
type
not
in
int_types
:
if
axis
.
type
not
in
int_types
:
raise
TypeError
(
'axis must have type lscalar'
,
axis
.
type
)
# # The following lines are necessary if we allow splits of zero
...
...
@@ -2738,21 +2738,21 @@ class Split(Op):
#in python 2.4, x.shape[numpy.asarray(1)] don't work.
if
sys
.
version_info
[
0
:
2
]
==
(
2
,
4
)
and
axis
.
size
==
1
:
axis
=
int
(
axis
)
try
:
len_along_axis
=
x
.
shape
[
axis
]
except
:
raise
ValueError
(
'Split.perform() with axis=(
%
s) is invalid for x.shape==(
%
s)'
%
(
axis
,
x
.
shape
))
if
len
(
splits
)
!=
self
.
len_splits
:
raise
ValueError
(
'In Split.perform(), len(splits) != len_splits.'
,
raise
ValueError
(
'In Split.perform(), len(splits) != len_splits.'
,
(
len
(
splits
),
self
.
len_splits
))
if
numpy
.
sum
(
splits
)
!=
len_along_axis
:
raise
ValueError
(
'The splits sum to
%
s, expected
%
s'
%
(
numpy
.
sum
(
splits
),
len_along_axis
))
if
not
all
(
splits
):
raise
ValueError
(
'Cannot have a split of zero.'
)
# Checking is done, let's roll the splitting algorithm!
# Basically we step along the given axis of x, extracting subtensors of size splits[i]
# as we go along.
...
...
@@ -2826,7 +2826,7 @@ def addbroadcast(x, *axes):
def
unbroadcast
(
x
,
*
axes
):
"""
Make the input impossible to broadcast in the specified axes.
We apply the opt here to don't pollute the graph especially during the gpu optimization
"""
rval
=
Rebroadcast
(
*
[(
axis
,
False
)
for
axis
in
axes
])(
x
)
...
...
@@ -2835,7 +2835,7 @@ def unbroadcast(x, *axes):
def
patternbroadcast
(
x
,
broadcastable
):
"""
Make the input impossible to broadcast in the specified axes.
We apply the opt here to don't pollute the graph especially during the gpu optimization
"""
rval
=
Rebroadcast
(
*
[(
i
,
broadcastable
[
i
])
for
i
in
range
(
len
(
broadcastable
))])(
x
)
...
...
@@ -2853,7 +2853,7 @@ class Join(Op):
For joins involving scalar values, see @stack.
.. python::
x, y, z = tensor.matrix(), tensor.matrix(), tensor.matrix()
u = tensor.vector()
...
...
@@ -2952,7 +2952,7 @@ class Join(Op):
return
[
None
]
+
split_gz
else
:
# assume that this isn't differentiable
return
[
None
]
*
(
1
+
len
(
tensors
))
return
[
None
]
*
(
1
+
len
(
tensors
))
def
_native_grad
(
self
,
axis_and_tensors
,
(
gz
,)):
"""WRITEME"""
...
...
@@ -3006,7 +3006,7 @@ pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Join),
@constructor
def
shape_padleft
(
t
,
n_ones
=
1
):
"""Reshape `t` by left-padding the shape with `n_ones` 1s
See also: `shape_padright` and `Dimshuffle`
"""
_t
=
as_tensor_variable
(
t
)
...
...
@@ -3017,7 +3017,7 @@ def shape_padleft(t, n_ones=1):
@constructor
def
shape_padright
(
t
,
n_ones
=
1
):
"""Reshape `t` by right-padding the shape with `n_ones` 1s
See also: `shape_padleft` and `Dimshuffle`
"""
_t
=
as_tensor_variable
(
t
)
...
...
@@ -3045,10 +3045,10 @@ def stack(*tensors):
@constructor
def
concatenate
(
tensor_list
,
axis
=
0
):
"""Alias for `join`(axis, *tensor_list).
This function is similar to `join`, but uses the signature of numpy's concatenate function.
This function
This function
:Exceptions:
- `TypeError` : the tensor_list must be a tuple or list
...
...
@@ -3072,7 +3072,7 @@ def get_vector_length(v):
:Exceptions:
- `TypeError` : `v` hasn't the proper type.
- `ValueError` : No special case applies, the length is not known.
In general this is not possible, but for a number of special cases the length can be
determined at compile / graph-construction time. This function implements these special
cases.
...
...
@@ -3165,7 +3165,7 @@ else:
class
Reshape
(
Op
):
"""Perform a reshape operation of the input x to the new shape shp.
The number of dimensions to which to reshape to (ndim) must be known at graph
The number of dimensions to which to reshape to (ndim) must be known at graph
build time."""
view_map
=
{
0
:
[
0
]}
#output 0 is potentially aliased to inputs [0]
def
__init__
(
self
,
ndim
,
name
=
None
):
...
...
@@ -3248,7 +3248,7 @@ class Flatten(Op):
def
grad
(
self
,
(
x
,),
(
g_out
,)):
return
[
reshape
(
g_out
,
shape
(
x
),
x
.
ndim
)]
def
flatten
(
x
,
outdim
=
1
):
def
flatten
(
x
,
outdim
=
1
):
return
Flatten
(
outdim
)(
x
)
class
TileGrad
(
Op
):
...
...
@@ -3634,7 +3634,7 @@ class AdvancedSubtensor(Op):
# TODO: in general, we need to re-pack the inputs into a valid index, just like
# subtensor
out
[
0
]
=
inputs
[
0
]
.
__getitem__
(
inputs
[
1
:])
#return
#return
#raise NotImplementedError()
def
grad
(
self
,
inputs
,
(
gz
,)):
...
...
@@ -3703,7 +3703,7 @@ class Dot(Op):
return
hash
(
type
(
self
))
# the rationale for Dot22 is related to getting GEMM Ops into the graph. See Dot22 in tensor.blas for details.
def
make_node
(
self
,
*
inputs
):
inputs
=
map
(
as_tensor_variable
,
inputs
)
...
...
@@ -3764,7 +3764,7 @@ class Dot(Op):
elif
x
.
type
.
ndim
==
1
and
y
.
type
.
ndim
>
1
:
rval
=
dot
(
gz
,
y
.
T
),
outer
(
x
.
T
,
gz
)
elif
x
.
type
.
ndim
>
1
and
y
.
type
.
ndim
==
1
:
rval
=
outer
(
gz
,
y
.
T
),
dot
(
x
.
T
,
gz
)
rval
=
outer
(
gz
,
y
.
T
),
dot
(
x
.
T
,
gz
)
else
:
rval
=
dot
(
gz
,
y
.
T
),
dot
(
x
.
T
,
gz
)
return
cast
(
rval
[
0
],
x
.
dtype
),
cast
(
rval
[
1
],
y
.
dtype
)
...
...
@@ -3865,7 +3865,7 @@ class TensorDot(Op):
if
len
(
axes
[
0
])
!=
len
(
axes
[
1
]):
raise
ValueError
(
"We need that the axes 2 sub list of axes are of the same size"
)
assert
len
(
axes
[
0
])
==
len
(
axes
[
1
])
self
.
axes
=
axes
def
__eq__
(
self
,
other
):
...
...
@@ -3887,7 +3887,7 @@ class TensorDot(Op):
if
axesdim
>
x
.
type
.
ndim
or
axesdim
>
y
.
type
.
ndim
:
raise
TypeError
(
'Cannot sum over more dimensions than input.
%
i >
%
i,
%
i'
%
axesdim
,
x
.
type
.
ndim
,
y
.
type
.
ndim
)
outdim
=
x
.
type
.
ndim
+
y
.
type
.
ndim
-
2
*
axesdim
output
=
tensor
(
dtype
=
scal
.
upcast
(
x
.
dtype
,
y
.
dtype
),
broadcastable
=
[
False
]
*
outdim
);
...
...
@@ -3904,7 +3904,7 @@ class TensorDot(Op):
def
grad
(
self
,
(
x
,
y
),
(
gz
,)):
gx
,
gy
=
tensordot_grad
(
self
.
axes
)(
x
,
y
,
gz
)
return
[
gx
,
gy
]
def
__str__
(
self
):
return
"tensordot"
tensordot
=
TensorDot
...
...
@@ -3923,7 +3923,7 @@ class Outer(Op):
if
nx
!=
1
:
raise
TypeError
(
'non-vector arg0 to outer()'
,
x
)
if
ny
!=
1
:
raise
TypeError
(
'not-vector arg1 to outer()'
,
y
)
bz
=
[
x
.
type
.
broadcastable
[
0
],
y
.
type
.
broadcastable
[
0
]]
i_dtypes
=
[
input
.
type
.
dtype
for
input
in
inputs
]
...
...
@@ -3997,8 +3997,8 @@ class numeric_grad:
#
# There is a relationship between the step size and the function value and the measurement
# error that is incurred due to rounding. The finite difference we measure is
# delta = f(x0) - f(x0+eps)
#
# delta = f(x0) - f(x0+eps)
#
# For maximum precision, f should be close to zero.
# For every power of 2 that f departs from zero, we lose a bit of precision in delta.
#
...
...
@@ -4009,7 +4009,7 @@ class numeric_grad:
# bias into our measurement in general for non-linear functions.
#
# It would be interesting to have a version of numeric grad that used an adaptive stepsize.
#
#
# For now, we use a heuristic that catches very bad gradients, but is not perfectly
# accurate.
type_eps
=
{
'float64'
:
1e-7
,
...
...
@@ -4161,7 +4161,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
mode
=
None
,
cast_to_output_type
=
False
):
""" Test a gradient by Finite Difference Method. Raise error on failure.
Example:
Example:
>>> verify_grad(theano.tensor.tanh,
(numpy.asarray([[2,3,4], [-1, 3.3, 9.9]]),),
rng=numpy.random)
...
...
@@ -4187,8 +4187,8 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
debug mode, which can be very slow if it has to verify a lot
of intermediate computations.
:note: This op does not support multiple outputs. In tests/test_scan.py there is
an experimental verify_grad that covers that case as well by using random
:note: This op does not support multiple outputs. In tests/test_scan.py there is
an experimental verify_grad that covers that case as well by using random
projections.
"""
assert
isinstance
(
pt
,
(
list
,
tuple
))
...
...
@@ -4244,7 +4244,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
t_r
=
shared
(
random_projection
())
#random projection of o onto t_r
cost
=
sum
(
t_r
*
o_output
)
#This sum() is defined above, it's not the builtin sum.
cost
=
sum
(
t_r
*
o_output
)
#This sum() is defined above, it's not the builtin sum.
cost_fn
=
function
(
tensor_pt
,
cost
)
#todo-- determine if this is actually needed
...
...
theano/tensor/opt.py
浏览文件 @
cfc588cc
...
...
@@ -119,11 +119,11 @@ def insert_inplace_optimizer_op(OP):
"""
#we should not validate too often as this take too much time to execute!
#It is the _dfs_toposort() fct in theano/gof/destroyhandler.py
#that take so much time.
#Should we try to use another lib that do toposort?
#that take so much time.
#Should we try to use another lib that do toposort?
# igraph: http://igraph.sourceforge.net/
# networkx: https://networkx.lanl.gov/
#Should we try to use cython?
#Should we try to use cython?
# compiling only that fct is not enought, should we try to add the deque class too?
# and init the deque and other list to an upper bound number of element?
#Should Theano do online toposort as in http://code.google.com/p/acyclic/?
...
...
@@ -213,7 +213,7 @@ def insert_inplace_optimizer_op(OP):
insert_inplace_optimizer
=
insert_inplace_optimizer_op
(
T
.
Elemwise
)
compile
.
optdb
.
register
(
'inplace_opt'
,
insert_inplace_optimizer
,
75
,
'fast_run'
,
'inplace'
)
compile
.
optdb
.
register
(
'inplace_opt'
,
insert_inplace_optimizer
,
75
,
'fast_run'
,
'inplace'
)
def
register_canonicalize
(
lopt
,
*
tags
,
**
kwargs
):
name
=
(
kwargs
and
kwargs
.
pop
(
'name'
))
or
lopt
.
__name__
...
...
@@ -304,7 +304,7 @@ class MakeVector(T.Op):
"""Concatenate a number of scalars together into a vector
This is a simple version of stack() that introduces far less cruft into the graph.
Should work with 0 inputs. The constant_folding optimization will remove it.
"""
def
__init__
(
self
,
dtype
=
'int64'
):
...
...
@@ -398,7 +398,7 @@ class Shape_i(T.Op):
%(out)
s=(PyArrayObject*)PyArray_ZEROS(0, NULL, PyArray_INT64, 0);
((npy_int64*)PyArray_DATA(
%(out)
s))[0]=
%(x)
s->dimensions[
%(i)
s];
"""
%
locals
()
elif
node
.
inputs
[
0
]
.
type
.
__class__
.
__name__
==
"CudaNdarrayType"
:
#Don't want to import cuda stuff here.
return
"""
...
...
@@ -413,12 +413,12 @@ class Shape_i(T.Op):
class
ShapeFeature
(
object
):
"""Graph optimizer for removing all calls to shape()
This optimizer replaces all Shapes and Subtensors of Shapes with Shape_i and MakeVector
Ops.
This optimizer has several goals:
1. to 'lift' Shapes to as close to the inputs as possible.
1. to 'lift' Shapes to as close to the inputs as possible.
2. to infer the shape of every node in the graph in terms of the input shapes.
3. remove all fills (T.second, T.fill) from the graph
...
...
@@ -430,7 +430,7 @@ class ShapeFeature(object):
Many optimizations refuse to work on nodes with multiple clients.
Lifting is done by using an `<Op>.infer_shape` function if one is present, or else using a
conservative default. An Op that supports shape-lifting should define a
conservative default. An Op that supports shape-lifting should define a
infer_shape(self, node, input_shapes) function. The argument input_shapes is a tuple
of tuples... there is an interior tuple for each input to the node. The tuple has as many
elements as dimensions. The element in position i of tuple j represents the i'th shape
...
...
@@ -439,9 +439,9 @@ class ShapeFeature(object):
the output[j].shape[i] of the function. If an output is not a TensorType, then None should
be returned instead of a tuple for that output.
For example the infer_shape for a matrix-matrix product would accept
For example the infer_shape for a matrix-matrix product would accept
input_shapes=((x0,x1), (y0,y1)) and return ((x0, y1),).
Inferring the shape of internal nodes in the graph is important for doing size-driven
optimizations. If we know how big various intermediate results will be, we can estimate
...
...
@@ -495,7 +495,7 @@ class ShapeFeature(object):
return
T
.
constant
(
s_i
,
dtype
=
'int64'
)
if
type
(
s_i
)
in
(
tuple
,
list
):
# this dimension is the same as many of the inputs
# which tells us that if one of the inputs is known,
# which tells us that if one of the inputs is known,
# the others all become known.
# TODO: should be implemented in Elemwise, and Dot
#
...
...
@@ -506,7 +506,7 @@ class ShapeFeature(object):
raise
TypeError
(
'Shape element must be scalar'
,
s_i
)
return
s_i
else
:
raise
TypeError
(
'Unsupported shape element'
,
raise
TypeError
(
'Unsupported shape element'
,
s_i
,
type
(
s_i
),
getattr
(
s_i
,
'type'
,
None
))
def
set_shape
(
self
,
r
,
s
):
...
...
@@ -534,7 +534,7 @@ class ShapeFeature(object):
assert
not
hasattr
(
env
,
'shape_feature'
)
env
.
shape_feature
=
self
self
.
shape_of
=
{}
# Variable -> tuple(scalars) or None (All tensor vars map to tuple)
self
.
scheduled
=
{}
# Variable ->
self
.
scheduled
=
{}
# Variable ->
self
.
lscalar_one
=
T
.
constant
(
1
,
dtype
=
'int64'
)
assert
self
.
lscalar_one
.
type
==
T
.
lscalar
for
node
in
env
.
toposort
():
...
...
@@ -622,7 +622,7 @@ def local_fill_to_alloc(node):
This is an important optimization because with the shape_to_shape_i optimization, the
dependency on 's' is often removed.
"""
if
node
.
op
==
T
.
fill
:
r
,
v
=
node
.
inputs
...
...
@@ -637,7 +637,7 @@ def local_fill_to_alloc(node):
shape_of
=
node
.
env
.
shape_feature
.
shape_of
# TODO: cut out un-necessary dimshuffles of v
rval
=
[
T
.
alloc
(
T
.
cast
(
v
,
node
.
outputs
[
0
]
.
dtype
),
*
shape_of
[
node
.
outputs
[
0
]])]
#if rval[0].type != node.outputs[0].type:
#print >> sys.stderr, theano.printing.debugprint(node.outputs[0], file='str')
...
...
@@ -700,7 +700,7 @@ def local_subtensor_make_vector(node):
raise
if
isinstance
(
idx
,
(
scalar
.
Scalar
,
T
.
TensorType
)):
# The idx is a Scalar, ie a Type. This means the actual index
# The idx is a Scalar, ie a Type. This means the actual index
# is contained in node.inputs[1]
old_idx
,
idx
=
idx
,
node
.
inputs
[
1
]
assert
idx
.
type
==
old_idx
...
...
@@ -773,7 +773,7 @@ class Assert(T.Op):
cond
=
[
T
.
as_tensor_variable
(
c
)
for
c
in
conds
]
assert
numpy
.
all
([
c
.
type
.
ndim
==
0
for
c
in
cond
])
return
gof
.
Apply
(
self
,
[
value
]
+
cond
,
[
value
.
type
()])
def
__str__
(
self
):
return
self
.
__class__
.
__name__
def
perform
(
self
,
node
,
inputs
,
(
out
,)):
...
...
@@ -807,7 +807,7 @@ class Assert(T.Op):
def
infer_shape
(
self
,
node
,
input_shapes
):
return
[
input_shapes
[
0
]]
assert_
=
Assert
()
@register_specialize
...
...
@@ -818,13 +818,13 @@ def local_remove_useless_assert(node):
for
c
in
node
.
inputs
[
1
:]:
try
:
const
=
get_constant_value
(
c
)
if
0
!=
const
.
ndim
or
const
==
0
:
#Should we raise an error here? How to be sure it is not catched?
cond
.
append
(
c
)
except
TypeError
:
cond
.
append
(
c
)
if
len
(
cond
)
==
0
:
return
[
node
.
inputs
[
0
]]
if
len
(
cond
)
!=
len
(
node
.
inputs
)
-
1
:
...
...
@@ -873,12 +873,12 @@ def local_alloc_elemwise(node):
isinstance
(
i
.
owner
.
inputs
[
0
]
.
owner
.
op
,
T
.
Alloc
)):
no_broad_idx
=
idx
break
assert
no_broad_idx
>=
0
assert_op
=
node
.
inputs
[
no_broad_idx
]
cmp_op
=
assert_op
new
=
[]
for
i
in
node
.
inputs
:
if
i
.
owner
and
isinstance
(
i
.
owner
.
op
,
T
.
Alloc
)
and
i
.
owner
.
inputs
[
0
]
.
type
!=
i
.
owner
.
outputs
[
0
]
.
type
:
#when i.owner.inputs[0].type == i.owner.outputs[0].type we will remove that alloc later
...
...
@@ -1017,8 +1017,8 @@ def local_IncSubtensor_serialize(node):
IncSubtensor(Elemwise{second}(a, 0), g(f(a[2])), [2])
This is much worse because this time we have to produce 3 matrices the size of 'a', just so
we can add them together.
we can add them together.
This Op rearranges IncSubtensor's that all work on the same initial argument (here,
Elemwise{second}(a,0)) into a chain. The advantage of the chain structure is that each one
can be optimized later in the pipeline to operate inplace.
...
...
@@ -1028,7 +1028,7 @@ def local_IncSubtensor_serialize(node):
#
# add(x, incsubtensor(b, c), incsubtensor(b, d))
# -> incsubtensor(incsubtensor(add(x,b,b), c), d)
"""
def
movable
(
i
):
# Return True iff this is a incsubtensor that we can move
...
...
@@ -1138,7 +1138,7 @@ def local_rebroadcast_lift(node):
def
apply_rebroadcast_opt
(
rval
):
"""
Apply as many times as required the optimization local_useless_rebroadcast
Apply as many times as required the optimization local_useless_rebroadcast
and local_rebroadcast_lift.
:param rval: a Variable
...
...
@@ -1149,7 +1149,7 @@ def apply_rebroadcast_opt(rval):
while
changed
and
rval
.
owner
:
changed
=
False
rval2
=
theano
.
tensor
.
opt
.
local_useless_rebroadcast
.
transform
(
rval
.
owner
)
if
rval2
:
if
rval2
:
assert
len
(
rval2
)
==
1
rval
=
rval2
[
0
]
changed
=
True
...
...
@@ -1216,7 +1216,7 @@ def local_mul_switch_sink(node):
fct
[
0
]
.
values_eq_approx
=
fct
[
0
]
.
type
.
values_eq_approx_remove_nan
return
fct
except
TypeError
:
pass
pass
try
:
if
get_constant_value
(
switch
.
inputs
[
2
])
==
0.
:
listmul
=
node
.
inputs
[:
idx
]
+
node
.
inputs
[
idx
+
1
:]
...
...
@@ -1274,7 +1274,7 @@ def local_reshape_chain(node):
"""
if
not
opt
.
check_chain
(
node
,
T
.
Reshape
,
T
.
Reshape
):
return
False
# TODO: this can permit a failing program to run by eliminating the the lower
# reshape
return
[
node
.
op
(
node
.
inputs
[
0
]
.
owner
.
inputs
[
0
],
node
.
inputs
[
1
])]
...
...
@@ -1304,7 +1304,7 @@ if 0:
y_shape
=
node
.
env
.
shape_feature
.
shape_of
[
y
]
def
tmp
(
thing
):
try
:
try
:
return
T
.
get_constant_value
(
thing
)
except
(
TypeError
,
ValueError
),
e
:
print
e
,
thing
.
owner
.
inputs
[
0
]
...
...
@@ -1322,15 +1322,15 @@ def local_fill_cut(node):
If c.type == a.type.
"""
# this optimization is essentially for getting broadcasting to replace fill.
# This is always possible when using a Compound Elemwise operation,
# this optimization is essentially for getting broadcasting to replace fill.
# This is always possible when using a Compound Elemwise operation,
# but it is not always possible without one (consider filling a large matrix with a scalar,
# and then adding another scalar. The only numbers that count are the two scalars, but we
# can't ignore the large matrix because it gives the shape of the result.
if
not
opt
.
check_chain
(
node
,
T
.
Elemwise
):
return
False
output
=
node
.
outputs
[
0
]
try
:
#reference is some input with the same type as the input but that is not produced by a fill
...
...
@@ -1397,7 +1397,7 @@ class Canonizer(gof.LocalOptimizer):
Simplification tool.
Usage: Canonizer(main, inverse, reciprocal, calculate)
* main: a suitable Op class that is commutative, associative and
takes one to an arbitrary number of inputs, e.g. add or
mul
...
...
@@ -1421,7 +1421,7 @@ class Canonizer(gof.LocalOptimizer):
T = theano.tensor
add_canonizer = Canonizer(T.add, T.sub, T.neg, lambda n, d: sum(n) - sum(d))
mul_canonizer = Canonizer(T.mul, T.true_div, T.inv, lambda n, d: prod(n) / prod(d))
Examples of optimizations mul_canonizer can perform:
x / x -> 1
(x * y) / x -> y
...
...
@@ -1659,7 +1659,7 @@ class Canonizer(gof.LocalOptimizer):
# Lists representing the *constant* elements of num and denum
numct
,
denumct
=
[],
[]
for
v
in
orig_num
:
ct
=
self
.
get_constant
(
v
)
if
ct
is
not
None
:
...
...
@@ -1788,7 +1788,7 @@ register_canonicalize(local_mul_canonizer, name = 'local_mul_canonizer')
@gof.local_optimizer
([
T
.
neg
])
def
local_neg_to_mul
(
node
):
if
node
.
op
==
T
.
neg
:
return
[
T
.
mul
(
numpy
.
array
(
-
1
,
dtype
=
node
.
inputs
[
0
]
.
dtype
),
return
[
T
.
mul
(
numpy
.
array
(
-
1
,
dtype
=
node
.
inputs
[
0
]
.
dtype
),
node
.
inputs
[
0
])]
register_canonicalize
(
local_neg_to_mul
)
...
...
@@ -1797,7 +1797,7 @@ register_canonicalize(local_neg_to_mul)
def
local_sum_mul_by_scalar
(
node
):
"""sum(scalar * smth) -> scalar * sum(smth)
"""
# TODO: if the the thing inside the Sum is a division,
# TODO: if the the thing inside the Sum is a division,
# we should get at the numerator....
if
isinstance
(
node
.
op
,
T
.
Sum
):
thing_summed
,
=
node
.
inputs
...
...
@@ -1935,7 +1935,7 @@ def local_sum_sum(node):
# special case of local_cut_useless_reduce
return
[
T
.
Sum
(
None
)(
summed
.
owner
.
inputs
[
0
])]
if
node
.
op
.
axis
is
None
:
# we're summing up everything anyway so lets
# we're summing up everything anyway so lets
# do it all at once
return
[
T
.
Sum
(
None
)(
summed
.
owner
.
inputs
[
0
])]
...
...
@@ -1983,7 +1983,6 @@ def local_sum_alloc(node):
if
summed
.
owner
and
isinstance
(
summed
.
owner
.
op
,
T
.
Alloc
):
input
=
summed
.
owner
.
inputs
[
0
]
shapes
=
summed
.
owner
.
inputs
[
1
:]
#import pdb;pdb.set_trace()
if
node
.
op
.
axis
is
None
or
node
.
op
.
axis
==
tuple
(
range
(
input
.
ndim
)):
try
:
val
=
get_constant_value
(
input
)
...
...
@@ -2019,7 +2018,7 @@ register_specialize(local_mul_to_neg)
@register_specialize
@gof.local_optimizer
([
T
.
neg
])
def
local_neg_neg
(
node
):
# other specializations shouldn't put this in,
# other specializations shouldn't put this in,
# but sometimes they do
if
node
.
op
==
T
.
neg
:
if
node
.
inputs
[
0
]
.
owner
and
node
.
inputs
[
0
]
.
owner
.
op
==
T
.
neg
:
...
...
@@ -2177,11 +2176,11 @@ def local_pow_specialize_device(node):
rval1
=
None
rval1_scal
=
None
while
y_to_do
>
0
:
log_to_do
=
int
(
numpy
.
log2
(
y_to_do
))
log_to_do
=
int
(
numpy
.
log2
(
y_to_do
))
if
rval1
:
rval1
*=
pow2
[
log_to_do
]
rval1_scal
*=
pow2_scal
[
log_to_do
]
else
:
else
:
rval1
=
pow2
[
log_to_do
]
rval1_scal
=
pow2_scal
[
log_to_do
]
y_to_do
-=
2
**
log_to_do
...
...
@@ -2197,7 +2196,7 @@ def local_pow_specialize_device(node):
rval
[
0
]
=
T
.
cast
(
rval
[
0
],
odtype
)
assert
rval
[
0
]
.
type
==
node
.
outputs
[
0
]
.
type
,
(
rval
,
node
.
outputs
)
return
rval
@gof.local_optimizer
([
T
.
mul
])
def
local_mul_specialize
(
node
):
"""Remove special-case constants from mul arguments
...
...
@@ -2210,7 +2209,7 @@ def local_mul_specialize(node):
neg
=
False
new_inputs
=
[]
for
input
in
node
.
inputs
:
# remove any neg arguments
# remove any neg arguments
while
input
.
owner
and
input
.
owner
.
op
==
T
.
neg
:
neg
^=
True
input
=
input
.
owner
.
inputs
[
0
]
...
...
@@ -2303,8 +2302,8 @@ def check_for_x_over_absX(numerators, denominators):
if
den
.
owner
and
den
.
owner
.
op
==
T
.
abs_
and
den
.
owner
.
inputs
[
0
]
in
numerators
:
if
den
.
owner
.
inputs
[
0
]
.
type
.
dtype
.
startswith
(
'complex'
):
#TODO: Make an Op that projects a complex number to have unit length
# but projects 0 to 0. That would be a weird Op, but consistent with the
# special case below. I heard there's some convention in Matlab that is
# but projects 0 to 0. That would be a weird Op, but consistent with the
# special case below. I heard there's some convention in Matlab that is
# similar to this... but not sure.
pass
else
:
...
...
@@ -2319,7 +2318,7 @@ local_mul_canonizer.add_simplifier(check_for_x_over_absX, 'X_over_absX')
def
local_abs_lift
(
node
):
"""
move the abs toward the input. This is needed for check_for_x_over_absX to apply in more case.
"""
if
node
.
op
==
T
.
abs_
and
node
.
inputs
[
0
]
.
owner
:
assert
node
.
nin
==
1
...
...
@@ -2328,13 +2327,13 @@ def local_abs_lift(node):
if
node
.
inputs
[
0
]
.
owner
.
op
==
T
.
true_div
:
i
=
node
.
inputs
[
0
]
.
owner
.
inputs
return
[
T
.
true_div
(
T
.
abs_
(
i
[
0
]),
T
.
abs_
(
i
[
1
]))]
@register_specialize
@gof.local_optimizer
([])
def
local_abs_merge
(
node
):
"""
merge abs generated by local_abs_lift when the canonizer don't need it anymore
"""
if
node
.
op
==
T
.
mul
and
sum
([
i
.
owner
.
op
==
T
.
abs_
for
i
in
node
.
inputs
if
i
.
owner
])
>
1
:
inputs
=
[]
...
...
@@ -2570,7 +2569,7 @@ def constant_folding(node):
return
msg
register_canonicalize
(
constant_folding
,
'fast_compile'
)
register_stabilize
(
constant_folding
)
# because
register_stabilize
(
constant_folding
)
# because
register_specialize
(
constant_folding
)
...
...
@@ -2598,7 +2597,7 @@ def _is_minus1(expr):
return
False
#1+erf(x)=>erfc(-x)
local_one_plus_erf
=
gof
.
PatternSub
((
T
.
add
,
local_one_plus_erf
=
gof
.
PatternSub
((
T
.
add
,
dict
(
pattern
=
'y'
,
constraint
=
_is_1
),
(
T
.
erf
,
'x'
)),
(
T
.
erfc
,
(
T
.
neg
,
'x'
)),
...
...
@@ -2608,7 +2607,7 @@ register_stabilize(local_one_plus_erf, name='local_one_plus_erf')
register_specialize
(
local_one_plus_erf
,
name
=
'local_one_plus_erf'
)
#1-erf(x)=>erfc(x)
local_one_minus_erf
=
gof
.
PatternSub
((
T
.
sub
,
local_one_minus_erf
=
gof
.
PatternSub
((
T
.
sub
,
dict
(
pattern
=
'y'
,
constraint
=
_is_1
),
(
T
.
erf
,
'x'
)),
(
T
.
erfc
,
'x'
),
...
...
@@ -2629,7 +2628,7 @@ register_specialize(local_one_minus_erf2)
#1+(-erf(x))=>erfc(x)
#This is a different graph then the previous as the canonicalize don't work completly
local_one_plus_neg_erf
=
gof
.
PatternSub
((
T
.
add
,
local_one_plus_neg_erf
=
gof
.
PatternSub
((
T
.
add
,
dict
(
pattern
=
'y'
,
constraint
=
_is_1
),
(
T
.
neg
,(
T
.
erf
,
'x'
))),
(
T
.
erfc
,
'x'
),
...
...
@@ -2640,7 +2639,7 @@ register_specialize(local_one_plus_neg_erf, name='local_one_plus_neg_erf')
#(-1)+erf(x) => -erfc(x)
#don't need erf(x)+(-1) as the canonicalize will put the -1 as the first argument.
local_erf_minus_one
=
gof
.
PatternSub
((
T
.
add
,
local_erf_minus_one
=
gof
.
PatternSub
((
T
.
add
,
dict
(
pattern
=
'y'
,
constraint
=
_is_minus1
),
(
T
.
erf
,
'x'
)),
(
T
.
neg
,(
T
.
erfc
,
'x'
)),
...
...
@@ -2650,7 +2649,7 @@ register_stabilize(local_erf_minus_one, name='local_erf_minus_one')
register_specialize
(
local_erf_minus_one
,
name
=
'local_erf_minus_one'
)
#1-erfc(x) => erf(x)
local_one_minus_erfc
=
gof
.
PatternSub
((
T
.
sub
,
local_one_minus_erfc
=
gof
.
PatternSub
((
T
.
sub
,
dict
(
pattern
=
'y'
,
constraint
=
_is_1
),
(
T
.
erfc
,
'x'
)),
(
T
.
erf
,
'x'
),
...
...
@@ -2665,7 +2664,7 @@ local_one_minus_erfc2 = gof.PatternSub((T.add,
(
T
.
erf
,
'x'
),
allow_multiple_clients
=
True
,
name
=
'local_one_minus_erfc2'
)
register_canonicalize
(
local_one_minus_erfc2
)
register_canonicalize
(
local_one_minus_erfc2
)
register_stabilize
(
local_one_minus_erfc2
)
register_specialize
(
local_one_minus_erfc2
)
...
...
@@ -2675,13 +2674,13 @@ local_one_minus_erfc3 = gof.PatternSub((T.add,
(
T
.
erf
,
'x'
),
allow_multiple_clients
=
True
,
name
=
'local_one_minus_erfc3'
)
register_canonicalize
(
local_one_minus_erfc3
)
register_canonicalize
(
local_one_minus_erfc3
)
register_stabilize
(
local_one_minus_erfc3
)
register_specialize
(
local_one_minus_erfc3
)
#1+(-erfc(x)) => erf(x)
#This is a different graph then the previous as the canonicalize don't work completly
local_one_add_neg_erfc
=
gof
.
PatternSub
((
T
.
add
,
local_one_add_neg_erfc
=
gof
.
PatternSub
((
T
.
add
,
dict
(
pattern
=
'y'
,
constraint
=
_is_1
),
(
T
.
neg
,(
T
.
erfc
,
'x'
))),
(
T
.
erf
,
'x'
),
...
...
@@ -2691,7 +2690,7 @@ register_stabilize(local_one_add_neg_erfc, name='local_one_add_neg_erfc')
register_specialize
(
local_one_add_neg_erfc
,
name
=
'local_one_add_neg_erfc'
)
#(-1)+erfc(-x)=>erf(x)
local_erf_neg_minus_one
=
gof
.
PatternSub
((
T
.
add
,
local_erf_neg_minus_one
=
gof
.
PatternSub
((
T
.
add
,
dict
(
pattern
=
'y'
,
constraint
=
_is_minus1
),
(
T
.
erfc
,
(
T
.
neg
,
'x'
))),
(
T
.
erf
,
'x'
),
...
...
@@ -2701,7 +2700,7 @@ register_stabilize(local_erf_neg_minus_one, name='local_erf_neg_minus_one')
register_specialize
(
local_erf_neg_minus_one
,
name
=
'local_erf_neg_minus_one'
)
#(-1)+erfc(-1*x)=>erf(x)
local_erf_neg_minus_one2
=
gof
.
PatternSub
((
T
.
add
,
local_erf_neg_minus_one2
=
gof
.
PatternSub
((
T
.
add
,
dict
(
pattern
=
'y'
,
constraint
=
_is_minus1
),
(
T
.
erfc
,
(
T
.
mul
,
-
1
,
'x'
))),
(
T
.
erf
,
'x'
),
...
...
@@ -2732,7 +2731,7 @@ def local_log_erfc(node):
x
=
node
.
inputs
[
0
]
.
owner
.
inputs
[
0
]
stab_value
=
-
x
**
2
-
T
.
log
(
x
)
-.
5
*
T
.
log
(
numpy
.
pi
)
+
T
.
log
(
1
-
1
/
(
2
*
x
**
2
)
+
3
/
(
4
*
x
**
4
)
-
15
/
(
8
*
x
**
6
))
if
node
.
outputs
[
0
]
.
dtype
==
'float32'
:
threshold
=
10.0541949
elif
node
.
outputs
[
0
]
.
dtype
==
'float64'
:
...
...
@@ -2749,7 +2748,7 @@ def local_log_erfc(node):
#for float64: threshold=26.63 see at the end of the fct for the explaination
#for float32: threshold=9.3 see at the end of the fct for the explaination
#TODO: remove the contraint that their is only 2 inputs to mul and the exp(x**2) is the second.
#TODO: at the test point 10 in float32, their is instability in the original value.
#TODO: at the test point 10 in float32, their is instability in the original value.
# the original give -30.0, the stab -20.1 and in float64 -18.1.
# Make the test don't generate error in that case!
@register_stabilize
...
...
@@ -2809,7 +2808,7 @@ def local_grad_log_erfc_neg(node):
new_inputs
.
append
(
i
)
return
new_inputs
mul_inputs
=
check_input
(
mul_neg
.
owner
.
inputs
)
#put the constant first
for
i
in
range
(
len
(
mul_inputs
)):
if
isinstance
(
i
,
Constant
):
...
...
@@ -2821,7 +2820,7 @@ def local_grad_log_erfc_neg(node):
mul_inputs
[
i
]
=
tmp
break
mul_neg
=
T
.
mul
(
*
mul_inputs
)
try
:
cst2
=
get_constant_value
(
mul_neg
.
owner
.
inputs
[
0
])
except
TypeError
:
...
...
@@ -2840,25 +2839,25 @@ def local_grad_log_erfc_neg(node):
return
False
if
cst2
!=-
1
:
if
(
not
erfc_x
.
owner
or
erfc_x
.
owner
.
op
!=
T
.
mul
if
(
not
erfc_x
.
owner
or
erfc_x
.
owner
.
op
!=
T
.
mul
or
len
(
erfc_x
.
owner
.
inputs
)
!=
2
):
#todo implement that case
return
False
if
erfc_x
.
owner
.
inputs
[
1
]
is
not
mul_neg
.
owner
.
inputs
[
1
]:
return
False
x
=
erfc_x
try
:
try
:
cst
=
get_constant_value
(
erfc_x
.
owner
.
inputs
[
0
])
except
TypeError
:
return
False
if
cst2
!=
-
cst
*
2
:
return
False
#The constant is valid. Must check that the
elif
erfc_x
is
not
x
:
elif
erfc_x
is
not
x
:
return
False
else
:
return
False
...
...
@@ -3014,7 +3013,7 @@ def local_elemwise_fusion_op(OP):
try
:
s_new_out
.
owner
.
op
.
c_code
(
s_new_out
.
owner
,
"test_presence_of_c_code"
,
[
"x"
for
x
in
s_g
],
"z"
,{})
"z"
,{})
except
MethodNotDefined
:
_logger
.
info
(
"
%
s does not implement the c_code function. As well as being potentially slow, this disables loop fusion of this op."
%
str
(
s_new_out
.
owner
.
op
))
return
False
...
...
@@ -3046,19 +3045,18 @@ def local_elemwise_fusion_op(OP):
return
False
# print "local_elemwise_fusion: FUSED",nb_elemwise+1,"elemwise!"
#we fuse as many that we can at the same time to make debug mode faster
#debug mode will be faster as it won't test all intermediate step.
while
True
:
ret
=
local_fuse
(
n
)
if
ret
is
not
False
and
ret
is
not
None
:
#print n,ret
#import pdb;pdb.set_trace()
assert
len
(
ret
)
==
len
(
n
.
outputs
)
assert
len
(
ret
)
==
1
n
=
ret
[
0
]
.
owner
else
:
break
return
n
.
outputs
return
local_fuse
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论