Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
6253b797
提交
6253b797
authored
1月 31, 2014
作者:
abergeron
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1717 from nouiz/faster_opt
Faster opt
上级
9df815dc
a4b14d66
隐藏空白字符变更
内嵌
并排
正在显示
14 个修改的文件
包含
153 行增加
和
145 行删除
+153
-145
install_ubuntu.txt
doc/install_ubuntu.txt
+4
-3
profiling.py
theano/compile/profiling.py
+2
-2
opt.py
theano/gof/opt.py
+7
-4
elemwise.py
theano/sandbox/gpuarray/elemwise.py
+4
-4
basic.py
theano/scalar/basic.py
+33
-20
scan_utils.py
theano/scan_module/scan_utils.py
+1
-1
test_basic.py
theano/sparse/tests/test_basic.py
+5
-2
basic.py
theano/tensor/basic.py
+1
-1
elemwise.py
theano/tensor/elemwise.py
+14
-14
test_sigm.py
theano/tensor/nnet/tests/test_sigm.py
+2
-2
opt.py
theano/tensor/opt.py
+64
-61
subtensor.py
theano/tensor/subtensor.py
+2
-2
test_opt.py
theano/tensor/tests/test_opt.py
+7
-22
type.py
theano/tensor/type.py
+7
-7
没有找到文件。
doc/install_ubuntu.txt
浏览文件 @
6253b797
...
...
@@ -166,9 +166,10 @@ yourself. Here is some code that will help you.
cd OpenBLAS
make FC=gfortran
sudo make PREFIX=/usr/local/ install
cd /usr/local/lib
ln -s libopenblas.so /usr/lib/libblas.so
ln -s libopenblas.so.0 /usr/lib/libblas.so.3gf
# Tell Theano to use OpenBLAS.
# This work only for the current user.
# Each Theano user on that computer should run that line.
echo -e "\n[blas]\nldflags = -lopenblas\n" >> ~/.theanorc
Contributed GPU instruction
...
...
theano/compile/profiling.py
浏览文件 @
6253b797
...
...
@@ -787,8 +787,8 @@ class ProfileStats(object):
if
self
.
variable_shape
or
self
.
variable_strides
:
self
.
summary_memory
(
file
,
n_apply_to_print
)
if
self
.
optimizer_profile
:
print
"Optimizer Profile"
print
"-----------------"
print
>>
file
,
"Optimizer Profile"
print
>>
file
,
"-----------------"
self
.
optimizer_profile
[
0
]
.
print_profile
(
file
,
self
.
optimizer_profile
[
1
])
...
...
theano/gof/opt.py
浏览文件 @
6253b797
...
...
@@ -1252,7 +1252,7 @@ class NavigatorOptimizer(Optimizer):
pruner
(
node
)
if
chin
is
not
None
:
def
on_change_input
(
self
,
fgraph
,
node
,
i
,
r
,
new_r
,
reason
):
chin
(
node
,
i
,
r
,
new_r
)
chin
(
node
,
i
,
r
,
new_r
,
reason
)
u
=
Updater
()
fgraph
.
attach_feature
(
u
)
...
...
@@ -1701,7 +1701,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
lopt
))
count_opt
=
[]
not_used
=
0
not_used
=
[]
not_used_time
=
0
process_count
=
{}
for
o
in
opt
.
global_optimizers
+
list
(
opt
.
get_local_optimizers
()):
...
...
@@ -1713,7 +1713,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
if
count
>
0
:
count_opt
.
append
((
time_opts
[
opt
],
count
,
opt
))
else
:
not_used
+=
1
not_used
.
append
((
time_opts
[
opt
],
opt
))
not_used_time
+=
time_opts
[
opt
]
if
count_opt
:
...
...
@@ -1724,7 +1724,10 @@ class EquilibriumOptimizer(NavigatorOptimizer):
print
>>
stream
,
blanc
,
'
%.3
fs -
%
d -
%
s'
%
(
t
,
count
,
opt
)
print
>>
stream
,
blanc
,
'
%.3
fs - in
%
d optimization that where not used'
%
(
not_used_time
,
not_used
)
not_used_time
,
len
(
not_used
))
not_used
.
sort
()
for
(
t
,
opt
)
in
not_used
[::
-
1
]:
print
>>
stream
,
blanc
+
" "
,
'
%.3
fs -
%
s'
%
(
t
,
opt
)
print
>>
stream
@staticmethod
...
...
theano/sandbox/gpuarray/elemwise.py
浏览文件 @
6253b797
...
...
@@ -76,11 +76,11 @@ class GpuElemwise(HideC, Elemwise):
try
:
inps
=
[
make_argument
(
i
,
'i
%
d'
%
(
n
,))
for
n
,
i
in
enumerate
(
node
.
inputs
)]
scal_ins
=
[
scalar
.
Scalar
(
i
.
dtype
)
for
i
in
node
.
inputs
]
scal_ins
=
[
scalar
.
get_scalar_type
(
i
.
dtype
)
for
i
in
node
.
inputs
]
outs
=
[
make_argument
(
o
,
'o
%
d'
%
(
n
,))
for
n
,
o
in
enumerate
(
node
.
outputs
)
if
not
n
in
self
.
inplace_pattern
]
scal_out
=
[
scalar
.
Scalar
(
o
.
dtype
)
for
o
in
node
.
outputs
]
scal_out
=
[
scalar
.
get_scalar_type
(
o
.
dtype
)
for
o
in
node
.
outputs
]
fake_node
=
Apply
(
self
.
scalar_op
,
[
i
()
for
i
in
scal_ins
],
[
o
()
for
o
in
scal_out
])
...
...
@@ -103,11 +103,11 @@ class GpuElemwise(HideC, Elemwise):
def
generate_kernel
(
self
,
node
,
nodename
):
inps
=
[
make_argument
(
i
,
'i
%
d'
%
(
n
,))
for
n
,
i
in
enumerate
(
node
.
inputs
)]
scal_ins
=
[
scalar
.
Scalar
(
i
.
dtype
)
for
i
in
node
.
inputs
]
scal_ins
=
[
scalar
.
get_scalar_type
(
i
.
dtype
)
for
i
in
node
.
inputs
]
outs
=
[
make_argument
(
o
,
'o
%
d'
%
(
n
,))
for
n
,
o
in
enumerate
(
node
.
outputs
)
if
not
n
in
self
.
inplace_pattern
]
scal_out
=
[
scalar
.
Scalar
(
o
.
dtype
)
for
o
in
node
.
outputs
]
scal_out
=
[
scalar
.
get_scalar_type
(
o
.
dtype
)
for
o
in
node
.
outputs
]
fake_node
=
Apply
(
self
.
scalar_op
,
[
i
()
for
i
in
scal_ins
],
[
o
()
for
o
in
scal_out
])
...
...
theano/scalar/basic.py
浏览文件 @
6253b797
...
...
@@ -69,6 +69,18 @@ def upcast(dtype, *dtypes):
return
rval
def
get_scalar_type
(
dtype
):
"""
Return an Scalar(dtype) object.
This cache objects to save allocation and run time.
"""
if
dtype
not
in
get_scalar_type
.
cache
:
get_scalar_type
.
cache
[
dtype
]
=
Scalar
(
dtype
=
dtype
)
return
get_scalar_type
.
cache
[
dtype
]
get_scalar_type
.
cache
=
{}
def
as_scalar
(
x
,
name
=
None
):
if
isinstance
(
x
,
gof
.
Apply
):
if
len
(
x
.
outputs
)
!=
1
:
...
...
@@ -91,7 +103,7 @@ def constant(x):
# purpose typically.
if
hasattr
(
x
,
'dtype'
):
assert
x
.
ndim
==
0
return
ScalarConstant
(
Scalar
(
str
(
x
.
dtype
)),
x
)
return
ScalarConstant
(
get_scalar_type
(
str
(
x
.
dtype
)),
x
)
if
isinstance
(
x
,
builtin_float
):
for
dtype
in
[
'float32'
,
'float64'
]:
x_
=
theano
.
_asarray
(
x
,
dtype
=
dtype
)
...
...
@@ -99,7 +111,7 @@ def constant(x):
break
x_
=
None
assert
x_
is
not
None
return
ScalarConstant
(
Scalar
(
str
(
x_
.
dtype
)),
x
)
return
ScalarConstant
(
get_scalar_type
(
str
(
x_
.
dtype
)),
x
)
if
isinstance
(
x
,
builtin_int
):
for
dtype
in
[
'int8'
,
'int16'
,
'int32'
,
'int64'
]:
x_
=
theano
.
_asarray
(
x
,
dtype
=
dtype
)
...
...
@@ -107,7 +119,7 @@ def constant(x):
break
x_
=
None
assert
x_
is
not
None
return
ScalarConstant
(
Scalar
(
str
(
x_
.
dtype
)),
x
)
return
ScalarConstant
(
get_scalar_type
(
str
(
x_
.
dtype
)),
x
)
if
isinstance
(
x
,
builtin_complex
):
#TODO: We have added the complex type, so this should be tested
raise
NotImplementedError
()
...
...
@@ -457,18 +469,18 @@ theano.compile.register_view_op_c_code(
1
)
int8
=
Scalar
(
'int8'
)
int16
=
Scalar
(
'int16'
)
int32
=
Scalar
(
'int32'
)
int64
=
Scalar
(
'int64'
)
uint8
=
Scalar
(
'uint8'
)
uint16
=
Scalar
(
'uint16'
)
uint32
=
Scalar
(
'uint32'
)
uint64
=
Scalar
(
'uint64'
)
float32
=
Scalar
(
'float32'
)
float64
=
Scalar
(
'float64'
)
complex64
=
Scalar
(
'complex64'
)
complex128
=
Scalar
(
'complex128'
)
int8
=
get_scalar_type
(
'int8'
)
int16
=
get_scalar_type
(
'int16'
)
int32
=
get_scalar_type
(
'int32'
)
int64
=
get_scalar_type
(
'int64'
)
uint8
=
get_scalar_type
(
'uint8'
)
uint16
=
get_scalar_type
(
'uint16'
)
uint32
=
get_scalar_type
(
'uint32'
)
uint64
=
get_scalar_type
(
'uint64'
)
float32
=
get_scalar_type
(
'float32'
)
float64
=
get_scalar_type
(
'float64'
)
complex64
=
get_scalar_type
(
'complex64'
)
complex128
=
get_scalar_type
(
'complex128'
)
int_types
=
int8
,
int16
,
int32
,
int64
uint_types
=
uint8
,
uint16
,
uint32
,
uint64
...
...
@@ -584,7 +596,7 @@ class _scalar_py_operators:
# The second is needed for Elemwise ops to work right
if
dtype
is
None
:
dtype
=
str
(
self
.
type
.
dtype
)
return
second
(
self
,
ScalarConstant
(
Scalar
(
dtype
),
0
))
return
second
(
self
,
ScalarConstant
(
get_scalar_type
(
dtype
),
0
))
def
astype
(
self
,
dtype
):
return
cast
(
self
,
dtype
)
...
...
@@ -628,7 +640,8 @@ complexs128 = _multi(complex128)
# necessary to use this same mechanism in other places as well in the future.
class
upcast_out
(
object
):
def
__new__
(
self
,
*
types
):
return
Scalar
(
dtype
=
Scalar
.
upcast
(
*
types
)),
dtype
=
Scalar
.
upcast
(
*
types
)
return
get_scalar_type
(
dtype
),
class
upgrade_to_float
(
object
):
...
...
@@ -644,7 +657,7 @@ class upgrade_to_float(object):
uint16
:
float32
,
uint32
:
float64
,
uint64
:
float64
}
return
Scalar
(
Scalar
.
upcast
(
*
[
conv
.
get
(
type
,
type
)
return
get_scalar_type
(
Scalar
.
upcast
(
*
[
conv
.
get
(
type
,
type
)
for
type
in
types
])),
...
...
@@ -656,7 +669,7 @@ class same_out(object):
def
upcast_out_no_complex
(
*
types
):
if
any
([
type
in
complex_types
for
type
in
types
]):
raise
TypeError
(
'complex type are not supported'
)
return
Scalar
(
dtype
=
Scalar
.
upcast
(
*
types
)),
return
get_scalar_type
(
dtype
=
Scalar
.
upcast
(
*
types
)),
def
same_out_float_only
(
type
):
...
...
@@ -1455,7 +1468,7 @@ def div_proxy(x, y):
class
TrueDiv
(
BinaryScalarOp
):
def
output_types
(
self
,
types
):
if
all
(
t
in
discrete_types
for
t
in
types
):
return
[
Scalar
(
config
.
floatX
)]
return
[
get_scalar_type
(
config
.
floatX
)]
else
:
return
super
(
TrueDiv
,
self
)
.
output_types
(
types
)
...
...
theano/scan_module/scan_utils.py
浏览文件 @
6253b797
...
...
@@ -59,7 +59,7 @@ def safe_new(x, tag='', dtype=None):
# making the pushout optimization fail
elif
isinstance
(
x
,
scalar
.
ScalarVariable
):
if
dtype
:
nw_x
=
scalar
.
Scalar
(
dtype
=
dtype
)()
nw_x
=
scalar
.
get_scalar_type
(
dtype
=
dtype
)()
else
:
nw_x
=
x
.
type
()
nw_x
.
name
=
nw_name
...
...
theano/sparse/tests/test_basic.py
浏览文件 @
6253b797
...
...
@@ -1113,8 +1113,11 @@ class test_structureddot(unittest.TestCase):
utt
.
assert_allclose
(
scipy_result
,
theano_result
)
if
(
not
theano
.
config
.
mode
in
[
"DebugMode"
,
"DEBUG_MODE"
]
and
theano
.
config
.
cxx
):
self
.
assertFalse
(
theano_time
>
overhead_rtol
*
scipy_time
+
overhead_tol
)
self
.
assertFalse
(
theano_time
>
overhead_rtol
*
scipy_time
+
overhead_tol
,
(
theano_time
,
overhead_rtol
*
scipy_time
+
overhead_tol
,
scipy_time
,
overhead_rtol
,
overhead_tol
))
class
DotTests
(
utt
.
InferShapeTester
):
...
...
theano/tensor/basic.py
浏览文件 @
6253b797
...
...
@@ -993,7 +993,7 @@ class ScalarFromTensor(Op):
assert
t
.
type
.
broadcastable
==
()
return
Apply
(
self
,
[
t
],
[
scal
.
Scalar
(
dtype
=
t
.
type
.
dtype
)
.
make_variable
()])
[
scal
.
get_scalar_type
(
dtype
=
t
.
type
.
dtype
)
.
make_variable
()])
def
perform
(
self
,
node
,
inp
,
out_
):
s
,
=
inp
...
...
theano/tensor/elemwise.py
浏览文件 @
6253b797
...
...
@@ -8,7 +8,7 @@ import theano
from
theano
import
gof
from
theano.gof
import
Apply
,
Op
from
theano
import
scalar
from
theano.scalar
import
Scalar
from
theano.scalar
import
Scalar
,
get_scalar_type
from
theano.printing
import
pprint
from
theano.gof.python25
import
all
,
any
from
theano.tensor.utils
import
hash_from_dict
...
...
@@ -515,7 +515,7 @@ class Elemwise(Op):
"""
inputs
=
map
(
as_tensor_variable
,
inputs
)
shadow
=
self
.
scalar_op
.
make_node
(
*
[
Scalar
(
dtype
=
i
.
type
.
dtype
)()
for
i
in
inputs
])
*
[
get_scalar_type
(
dtype
=
i
.
type
.
dtype
)()
for
i
in
inputs
])
target_length
=
max
([
input
.
type
.
ndim
for
input
in
inputs
])
...
...
@@ -718,7 +718,7 @@ class Elemwise(Op):
def
as_scalar
(
t
):
if
isinstance
(
t
.
type
,
(
NullType
,
DisconnectedType
)):
return
t
return
Scalar
(
t
.
type
.
dtype
)()
return
get_scalar_type
(
t
.
type
.
dtype
)()
scalar_inputs
=
map
(
as_scalar
,
inputs
)
scalar_ograds
=
map
(
as_scalar
,
ograds
)
...
...
@@ -1039,9 +1039,9 @@ class Elemwise(Op):
# We generate the C code of the inner loop using the scalar op
task_code
=
self
.
scalar_op
.
c_code
(
Apply
(
self
.
scalar_op
,
[
Scalar
(
dtype
=
input
.
type
.
dtype
)()
[
get_scalar_type
(
dtype
=
input
.
type
.
dtype
)()
for
input
in
node
.
inputs
],
[
Scalar
(
dtype
=
output
.
type
.
dtype
)()
[
get_scalar_type
(
dtype
=
output
.
type
.
dtype
)()
for
output
in
node
.
outputs
]),
nodename
+
'_scalar_'
,
[
"
%
s_i"
%
s
for
s
in
_inames
],
...
...
@@ -1161,11 +1161,11 @@ class Elemwise(Op):
# now we insert versions for the ops on which we depend...
scalar_node
=
Apply
(
self
.
scalar_op
,
[
Scalar
(
dtype
=
input
.
type
.
dtype
)()
for
input
in
node
.
inputs
],
[
Scalar
(
dtype
=
output
.
type
.
dtype
)()
for
output
in
node
.
outputs
])
[
get_scalar_type
(
dtype
=
input
.
type
.
dtype
)()
for
input
in
node
.
inputs
],
[
get_scalar_type
(
dtype
=
output
.
type
.
dtype
)()
for
output
in
node
.
outputs
])
version
.
append
(
self
.
scalar_op
.
c_code_cache_version_apply
(
scalar_node
))
for
i
in
node
.
inputs
+
node
.
outputs
:
version
.
append
(
Scalar
(
dtype
=
i
.
type
.
dtype
)
.
c_code_cache_version
())
version
.
append
(
get_scalar_type
(
dtype
=
i
.
type
.
dtype
)
.
c_code_cache_version
())
if
all
(
version
):
return
tuple
(
version
)
else
:
...
...
@@ -1531,9 +1531,9 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
task1_code
=
self
.
scalar_op
.
c_code
(
Apply
(
self
.
scalar_op
,
[
Scalar
(
dtype
=
input
.
type
.
dtype
)()
[
get_scalar_type
(
dtype
=
input
.
type
.
dtype
)()
for
input
in
(
node
.
inputs
*
2
)],
[
Scalar
(
dtype
=
output
.
type
.
dtype
)()
[
get_scalar_type
(
dtype
=
output
.
type
.
dtype
)()
for
input
in
node
.
outputs
]),
None
,
[
"
%
s_i"
%
aname
,
"
%
s_i"
%
inames
[
0
]],
...
...
@@ -1583,11 +1583,11 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
# now we insert versions for the ops on which we depend...
scalar_node
=
Apply
(
self
.
scalar_op
,
[
Scalar
(
dtype
=
input
.
type
.
dtype
)()
for
input
in
node
.
inputs
],
[
Scalar
(
dtype
=
output
.
type
.
dtype
)()
for
output
in
node
.
outputs
])
[
get_scalar_type
(
dtype
=
input
.
type
.
dtype
)()
for
input
in
node
.
inputs
],
[
get_scalar_type
(
dtype
=
output
.
type
.
dtype
)()
for
output
in
node
.
outputs
])
version
.
append
(
self
.
scalar_op
.
c_code_cache_version_apply
(
scalar_node
))
for
i
in
node
.
inputs
+
node
.
outputs
:
version
.
append
(
Scalar
(
dtype
=
i
.
type
.
dtype
)
.
c_code_cache_version
())
version
.
append
(
get_scalar_type
(
dtype
=
i
.
type
.
dtype
)
.
c_code_cache_version
())
if
all
(
version
):
return
tuple
(
version
)
else
:
...
...
@@ -1665,7 +1665,7 @@ class CAReduceDtype(CAReduce):
def
__init__
(
self
,
scalar_op
,
axis
=
None
,
dtype
=
None
,
acc_dtype
=
None
):
"""
Usage: CAReduceDtype(scalar_op, axis=None, dtype=None)
Usage: CAReduceDtype(scalar_op, axis=None, dtype=None
, acc_dtype=None
)
:param scalar_op: a binary scalar op with only one output.
It must be commutative and associative.
...
...
theano/tensor/nnet/tests/test_sigm.py
浏览文件 @
6253b797
...
...
@@ -162,7 +162,7 @@ class T_sigmoid_opts(unittest.TestCase):
f
=
theano
.
function
([
x
],
(
T
.
fill
(
x
,
-
1.0
)
*
T
.
exp
(
x
))
/
((
1
+
T
.
exp
(
x
))
*
(
1
+
T
.
exp
(
-
x
))),
mode
=
m
)
assert
[
node
.
op
for
node
in
f
.
maker
.
fgraph
.
toposort
()]
==
[
sigmoid
,
T
.
mul
,
theano
.
tensor
.
inplace
.
neg_inplace
]
T
.
mul
]
f
(
data
)
f
=
theano
.
function
([
x
],
(
T
.
fill
(
x
,
-
1.1
)
*
T
.
exp
(
x
))
/
((
1
+
T
.
exp
(
x
))
*
(
1
+
T
.
exp
(
-
x
))),
mode
=
m
)
...
...
@@ -238,7 +238,7 @@ class T_sigmoid_opts(unittest.TestCase):
tensor
.
exp
(
x
*
y
)
*
tensor
.
exp
(
y
)),
mode
=
m
)
match
(
f
,
[
sigmoid
,
tensor
.
mul
,
tensor
.
neg
,
tensor
.
exp
,
sigmoid
,
tensor
.
mul
,
tensor
.
neg
])
tensor
.
mul
])
def
test_perform_sigm_times_exp
(
self
):
"""
...
...
theano/tensor/opt.py
浏览文件 @
6253b797
...
...
@@ -2559,12 +2559,12 @@ def local_fill_cut(node):
# scalars, but we can't ignore the large matrix because it gives
# the shape of the result.
if
no
t
opt
.
check_chain
(
node
,
T
.
Elemwise
)
:
if
no
de
.
op
!=
T
.
Elemwise
:
return
False
output
=
node
.
outputs
[
0
]
try
:
#reference is some input with the same type as the
in
put but
#reference is some input with the same type as the
out
put but
#that is not produced by a fill
reference
=
[
input
for
input
in
node
.
inputs
...
...
@@ -2574,16 +2574,18 @@ def local_fill_cut(node):
return
False
new_inputs
=
[]
new
=
False
for
input
in
node
.
inputs
:
if
opt
.
check_chain
(
input
,
T
.
fill
)
:
if
input
.
owner
and
input
.
owner
.
op
==
T
.
fill
:
model
,
filling
=
input
.
owner
.
inputs
if
encompasses_broadcastable
(
reference
.
type
.
broadcastable
,
filling
.
type
.
broadcastable
):
new_inputs
.
append
(
filling
)
new
=
True
continue
new_inputs
.
append
(
input
)
if
n
ew_inputs
==
node
.
inputs
:
if
n
ot
new
:
return
False
rval
=
node
.
op
(
*
new_inputs
)
...
...
@@ -2787,9 +2789,9 @@ class Canonizer(gof.LocalOptimizer):
pairs
=
[
self
.
get_num_denum
(
input2
)
for
input2
in
parent
.
inputs
]
if
parent
.
op
==
self
.
main
:
# If we have main(x, y
), numx, denumx, numy and denumy
# then num is concat(numx, numy) and denum is
# concat(denumx, denumy) note that main() can have any
# If we have main(x, y
, ...), numx, denumx, numy, denumy, ...
# then num is concat(numx, numy
, num...
) and denum is
# concat(denumx, denumy
, denum...
) note that main() can have any
# number of arguments >= 0 concat is list concatenation
num
=
reduce
(
list
.
__iadd__
,
map
(
operator
.
itemgetter
(
0
),
pairs
))
denum
=
reduce
(
list
.
__iadd__
,
map
(
operator
.
itemgetter
(
1
),
pairs
))
...
...
@@ -2865,12 +2867,13 @@ class Canonizer(gof.LocalOptimizer):
else
:
return
v
def
simplify
(
self
,
num
,
denum
):
def
simplify
(
self
,
num
,
denum
,
out_type
):
"""
Shorthand for:
self.simplify_constants(*self.simplify_factors(num, denum))
"""
rval
=
self
.
simplify_constants
(
*
self
.
simplify_factors
(
num
,
denum
))
rval
=
self
.
simplify_constants
(
*
self
.
simplify_factors
(
num
,
denum
),
out_type
=
out_type
)
for
reason
,
simplifier
in
self
.
external_simplifiers
:
# TODO: document that 'reason' is associated with this
# simplification to help auditing when things go
...
...
@@ -2894,7 +2897,7 @@ class Canonizer(gof.LocalOptimizer):
denum
.
remove
(
v
)
return
num
,
denum
def
simplify_constants
(
self
,
orig_num
,
orig_denum
):
def
simplify_constants
(
self
,
orig_num
,
orig_denum
,
out_type
=
None
):
"""
Finds all constants in orig_num and orig_denum (using
...
...
@@ -2912,7 +2915,6 @@ class Canonizer(gof.LocalOptimizer):
# Lists representing the numerator and denumerator
num
,
denum
=
list
(
orig_num
),
list
(
orig_denum
)
out_type
=
self
.
merge_num_denum
(
orig_num
,
orig_denum
)
.
type
# Lists representing the *constant* elements of num and denum
numct
,
denumct
=
[],
[]
...
...
@@ -2981,29 +2983,26 @@ class Canonizer(gof.LocalOptimizer):
if
op
not
in
[
self
.
main
,
self
.
inverse
,
self
.
reciprocal
]:
return
False
out
=
node
.
outputs
[
0
]
assert
len
(
node
.
outputs
)
==
1
out
=
node
.
outputs
[
0
]
# check if any of the clients of this node would be part of
# this canonized graph... if so, we do nothing and wait for
# them to be transformed.
def
_bypass_dimshuffle
(
n
):
if
(
isinstance
(
getattr
(
n
,
'op'
,
None
),
DimShuffle
)
and
len
(
n
.
outputs
[
0
]
.
clients
)
<=
1
):
return
_bypass_dimshuffle
(
n
.
outputs
[
0
]
.
clients
[
0
][
0
])
else
:
return
n
for
c
,
c_idx
in
out
.
clients
:
if
c
==
'output'
:
continue
if
getattr
(
_bypass_dimshuffle
(
c
),
'op'
,
''
)
in
[
self
.
main
,
self
.
inverse
,
self
.
reciprocal
]:
while
(
isinstance
(
getattr
(
c
,
'op'
,
None
),
DimShuffle
)
and
len
(
c
.
outputs
[
0
]
.
clients
)
<=
1
):
c
=
c
.
outputs
[
0
]
.
clients
[
0
][
0
]
if
getattr
(
c
,
'op'
,
''
)
in
[
self
.
main
,
self
.
inverse
,
self
.
reciprocal
]:
return
False
# Here we make the canonical version of the graph around this node
# See the documentation of get_num_denum and simplify
orig_num
,
orig_denum
=
self
.
get_num_denum
(
node
.
outputs
[
0
])
num
,
denum
=
self
.
simplify
(
list
(
orig_num
),
list
(
orig_denum
))
num
,
denum
=
self
.
simplify
(
list
(
orig_num
),
list
(
orig_denum
)
,
out
.
type
)
def
same
(
x
,
y
):
return
len
(
x
)
==
len
(
y
)
and
all
(
N
.
all
(
xe
==
ye
)
for
xe
,
ye
in
...
...
@@ -3387,20 +3386,6 @@ def local_sum_alloc(node):
pass
@gof.local_optimizer
([
T
.
mul
])
def
local_mul_to_neg
(
node
):
if
node
.
op
==
T
.
mul
and
N
.
all
(
local_mul_canonizer
.
get_constant
(
node
.
inputs
[
0
])
==
-
1.0
):
other_prod
=
local_mul_canonizer
.
merge_num_denum
(
node
.
inputs
[
1
:],
[])
if
other_prod
.
type
==
node
.
outputs
[
0
]
.
type
:
return
[
-
other_prod
]
# else the multiplication is also acting as a cast, so we
# might as well leave it alone. I don't think it's better to
# turn this into a negation in the wrong type, followed by an
# explicit cast.
register_specialize
(
local_mul_to_neg
)
@register_specialize
@gof.local_optimizer
([
T
.
neg
])
def
local_neg_neg
(
node
):
...
...
@@ -3447,7 +3432,7 @@ def local_mul_zero(node):
except
NotScalarConstantError
:
continue
#print 'MUL by value', value, node.inputs
if
N
.
all
(
value
==
0
)
:
if
value
==
0
:
#print '... returning zeros'
return
_fill_chain
(
theano
.
_asarray
(
0
,
dtype
=
otype
.
dtype
),
node
.
inputs
)
...
...
@@ -3485,9 +3470,9 @@ register_canonicalize(local_inv_canon)
@gof.local_optimizer
([
T
.
pow
])
def
local_pow_canonicalize
(
node
):
if
node
.
op
==
T
.
pow
:
if
N
.
all
(
local_mul_canonizer
.
get_constant
(
node
.
inputs
[
1
])
==
0
)
:
if
local_mul_canonizer
.
get_constant
(
node
.
inputs
[
1
])
==
0
:
return
[
broadcast_like
(
1
,
node
.
outputs
[
0
],
node
.
fgraph
)]
if
N
.
all
(
local_mul_canonizer
.
get_constant
(
node
.
inputs
[
1
])
==
1
)
:
if
local_mul_canonizer
.
get_constant
(
node
.
inputs
[
1
])
==
1
:
return
[
broadcast_like
(
node
.
inputs
[
0
],
node
.
outputs
[
0
],
node
.
fgraph
)]
else
:
return
False
...
...
@@ -3581,7 +3566,7 @@ def local_pow_specialize_device(node):
# 512 is too small for the cpu and too big for some gpu!
if
abs
(
y
)
==
int
(
abs
(
y
))
and
abs
(
y
)
<=
512
:
pow2
=
[
xsym
]
pow2_scal
=
[
theano
.
scalar
.
Scalar
(
xsym
.
dtype
)()]
pow2_scal
=
[
theano
.
scalar
.
get_scalar_type
(
xsym
.
dtype
)()]
y_to_do
=
abs
(
y
)
for
i
in
xrange
(
int
(
numpy
.
log2
(
y_to_do
))):
pow2
.
append
(
T
.
sqr
(
pow2
[
i
]))
...
...
@@ -3616,7 +3601,15 @@ def local_pow_specialize_device(node):
@gof.local_optimizer
([
T
.
mul
])
def
local_mul_specialize
(
node
):
"""Remove special-case constants from mul arguments
"""Remove special-case constants from mul arguments and useless neg in inputs.
mul(-1, x) -> neg(x)
mul(1, x, y) -> mul(x, y)
mul(0, ...) -> alloc(0, shapes...)
This is not done if we would add more nodes in the graph, like with:
mul(-1, x, y) -/-> neg(mul(x, y))
"""
# here, we are past the point of canonicalization, so we don't
# want to put in un-necessary fills.
...
...
@@ -3626,19 +3619,23 @@ def local_mul_specialize(node):
#the idea here is that we have pow(x, y)
neg
=
False
new_inputs
=
[]
nb_neg_node
=
0
nb_cst
=
0
for
input
in
node
.
inputs
:
# remove any neg arguments
while
input
.
owner
and
input
.
owner
.
op
==
T
.
neg
:
neg
^=
True
input
=
input
.
owner
.
inputs
[
0
]
nb_neg_node
+=
1
# remove special case arguments of 1, -1 or 0
y
=
local_mul_canonizer
.
get_constant
(
input
)
if
N
.
all
(
y
==
1.0
):
continue
elif
N
.
all
(
y
==
-
1.0
):
if
y
==
1.0
:
nb_cst
+=
1
elif
y
==
-
1.0
:
nb_cst
+=
1
neg
^=
True
# toggles
elif
N
.
all
(
y
==
0.0
)
:
elif
y
==
0.0
:
# if we find any zero, we just return right away
return
[
broadcast_like
(
0
,
node
.
outputs
[
0
],
node
.
fgraph
)]
else
:
...
...
@@ -3652,10 +3649,17 @@ def local_mul_specialize(node):
else
:
rval
=
new_inputs
[
0
]
else
:
if
neg
:
rval
=
-
T
.
mul
(
*
new_inputs
)
else
:
rval
=
T
.
mul
(
*
new_inputs
)
# The next case would cause a replace by an equivalent case.
if
(
neg
and
nb_neg_node
==
0
and
nb_cst
==
1
):
return
elif
neg
:
# Don't add an extra neg node as we can't
# fully replace this mul by a neg.
m1
=
numpy
.
asarray
(
-
1
,
dtype
=
node
.
outputs
[
0
]
.
dtype
)
new_inputs
=
[
m1
]
+
new_inputs
rval
=
T
.
mul
(
*
new_inputs
)
return
[
broadcast_like
(
rval
,
node
.
outputs
[
0
],
node
.
fgraph
)]
else
:
...
...
@@ -3712,9 +3716,6 @@ def local_add_specialize(node):
return
False
register_specialize
(
local_add_specialize
)
# neg_to_mul = out2in(gof.LocalOptGroup(local_neg_to_mul))
# mul_to_neg = out2in(gof.LocalOptGroup(local_mul_to_neg))
mul_canonizer
=
in2out
(
gof
.
LocalOptGroup
(
local_mul_canonizer
,
local_fill_cut
,
local_fill_sink
),
name
=
'mul_canonizer_groups'
)
...
...
@@ -3871,7 +3872,8 @@ register_canonicalize(local_add_canonizer, name='local_add_canonizer')
##################
def
distribute_greedy
(
pos_pairs
,
neg_pairs
,
num
,
denum
,
minscore
=
0
):
def
distribute_greedy
(
pos_pairs
,
neg_pairs
,
num
,
denum
,
out_type
,
minscore
=
0
):
# each pair in pos_pairs and neg_pairs is a num/denum pair. this
# function attempts to add num and denum to the corresponding parts
# of each pair, and counts how many multiplications/divisions can
...
...
@@ -3887,10 +3889,10 @@ def distribute_greedy(pos_pairs, neg_pairs, num, denum, minscore=0):
# score is number of operations saved, higher is better
score
=
len
(
num
)
+
div_cost
*
len
(
denum
)
new_pos_pairs
=
list
(
itertools
.
starmap
(
local_mul_canonizer
.
simplify
,
[(
n
+
num
,
d
+
denum
)
for
(
n
,
d
)
[(
n
+
num
,
d
+
denum
,
out_type
)
for
(
n
,
d
)
in
pos_pairs
]))
new_neg_pairs
=
list
(
itertools
.
starmap
(
local_mul_canonizer
.
simplify
,
[(
n
+
num
,
d
+
denum
)
for
(
n
,
d
)
[(
n
+
num
,
d
+
denum
,
out_type
)
for
(
n
,
d
)
in
neg_pairs
]))
for
(
n
,
d
),
(
nn
,
dd
)
in
zip
(
pos_pairs
+
neg_pairs
,
new_pos_pairs
+
new_neg_pairs
):
...
...
@@ -3903,7 +3905,7 @@ def distribute_greedy(pos_pairs, neg_pairs, num, denum, minscore=0):
return
True
,
new_pos_pairs
,
new_neg_pairs
def
attempt_distribution
(
factor
,
num
,
denum
):
def
attempt_distribution
(
factor
,
num
,
denum
,
out_type
):
# we try to insert each num and each denum in the factor
# returns: changes?, new_factor, new_num, new_denum
# if there are changes, new_num and new_denum contain all the numerators
...
...
@@ -3916,13 +3918,13 @@ def attempt_distribution(factor, num, denum):
change
=
False
for
n
in
list
(
num
):
success
,
pos_pairs
,
neg_pairs
=
distribute_greedy
(
pos_pairs
,
neg_pairs
,
[
n
],
[])
neg_pairs
,
[
n
],
[]
,
out_type
)
if
success
:
change
=
True
num
.
remove
(
n
)
for
d
in
list
(
denum
):
success
,
pos_pairs
,
neg_pairs
=
distribute_greedy
(
pos_pairs
,
neg_pairs
,
[],
[
d
])
neg_pairs
,
[],
[
d
]
,
out_type
)
if
success
:
change
=
True
denum
.
remove
(
d
)
...
...
@@ -3967,12 +3969,13 @@ def local_greedy_distributor(node):
change
=
False
out_type
=
out
.
type
for
candidate
in
list
(
num
):
if
candidate
not
in
num
:
continue
num
.
remove
(
candidate
)
_change
,
candidate
,
num
,
denum
=
attempt_distribution
(
candidate
,
num
,
denum
)
num
,
denum
,
out_type
)
change
|=
_change
new_num
.
append
(
candidate
)
...
...
@@ -3981,7 +3984,7 @@ def local_greedy_distributor(node):
continue
denum
.
remove
(
candidate
)
_change
,
candidate
,
denum
,
num
=
attempt_distribution
(
candidate
,
denum
,
num
)
denum
,
num
,
out_type
)
change
|=
_change
new_denum
.
append
(
candidate
)
...
...
@@ -4636,7 +4639,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 1024):
elif
ii
in
tmp_input
:
tmp_s_input
.
append
(
tmp_scalar
[
tmp_input
.
index
(
ii
)])
else
:
tmp
=
scalar
.
Scalar
(
ii
.
dtype
)
.
make_variable
()
tmp
=
scalar
.
get_scalar_type
(
ii
.
dtype
)
.
make_variable
()
try
:
tmp
.
tag
.
test_value
=
gof
.
op
.
get_test_value
(
ii
)
.
flatten
()[
0
]
except
AttributeError
:
...
...
@@ -4690,7 +4693,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 1024):
if
inputs
.
count
(
i
)
==
node
.
inputs
.
count
(
i
):
s
=
s_inputs
[
inputs
.
index
(
i
)]
else
:
s
=
scalar
.
Scalar
(
i
.
dtype
)
.
make_variable
()
s
=
scalar
.
get_scalar_type
(
i
.
dtype
)
.
make_variable
()
try
:
if
theano
.
config
.
compute_test_value
!=
'off'
:
v
=
gof
.
op
.
get_test_value
(
i
)
...
...
theano/tensor/subtensor.py
浏览文件 @
6253b797
...
...
@@ -318,11 +318,11 @@ class Subtensor(Op):
if
(
isinstance
(
entry
,
gof
.
Variable
)
and
entry
.
type
in
tensor_types
and
numpy
.
all
(
entry
.
type
.
broadcastable
)):
return
scal
.
Scalar
(
entry
.
type
.
dtype
)
return
scal
.
get_scalar_type
(
entry
.
type
.
dtype
)
elif
(
isinstance
(
entry
,
gof
.
Type
)
and
entry
in
tensor_types
and
numpy
.
all
(
entry
.
broadcastable
)):
return
scal
.
Scalar
(
entry
.
dtype
)
return
scal
.
get_scalar_type
(
entry
.
dtype
)
elif
slice_ok
and
isinstance
(
entry
,
slice
):
a
=
entry
.
start
b
=
entry
.
stop
...
...
theano/tensor/tests/test_opt.py
浏览文件 @
6253b797
...
...
@@ -2838,7 +2838,7 @@ def test_local_mul_specialize():
nodes
=
[
node
.
op
for
node
in
f
.
maker
.
fgraph
.
toposort
()]
print
nodes
theano
.
printing
.
debugprint
(
f
)
assert
nodes
==
[
T
.
mul
,
inplace
.
neg_inplace
]
assert
nodes
==
[
T
.
mul
]
f
=
function
([
v
,
m
],
v
*
0
*
(
-
m
),
mode
=
mode
)
nodes
=
[
node
.
op
for
node
in
f
.
maker
.
fgraph
.
toposort
()]
...
...
@@ -2852,6 +2852,12 @@ def test_local_mul_specialize():
theano
.
printing
.
debugprint
(
f
)
assert
nodes
==
[
T
.
mul
]
f
=
function
([
v
,
m
],
v
*
(
-
1
)
*
m
,
mode
=
mode
)
nodes
=
[
node
.
op
for
node
in
f
.
maker
.
fgraph
.
toposort
()]
print
nodes
theano
.
printing
.
debugprint
(
f
)
assert
nodes
==
[
T
.
mul
]
def
speed_local_pow_specialize_range
():
val
=
numpy
.
random
.
rand
(
1e7
)
...
...
@@ -4000,27 +4006,6 @@ def test_local_join_1():
assert
f
.
maker
.
fgraph
.
outputs
[
0
]
.
dtype
==
config
.
floatX
def
test_local_mul_to_neg
():
"""
Test that a multiplication by -1 or -1.0 yields the appropriate data type
"""
a
=
T
.
imatrix
()
f1
=
theano
.
function
([
a
],
-
1
*
a
)
f2
=
theano
.
function
([
a
],
-
1.0
*
a
)
aval
=
numpy
.
random
.
randint
(
0
,
10
,
(
2
,
2
))
.
astype
(
'int32'
)
if
config
.
cast_policy
==
'custom'
:
assert
f1
(
aval
)
.
dtype
==
a
.
dtype
assert
f2
(
aval
)
.
dtype
==
'float64'
elif
config
.
cast_policy
==
'numpy'
:
assert
f1
(
aval
)
.
dtype
==
str
(
numpy
.
array
(
0
)
.
dtype
)
assert
f2
(
aval
)
.
dtype
==
'float64'
elif
config
.
cast_policy
==
'numpy+floatX'
:
assert
f1
(
aval
)
.
dtype
==
str
(
numpy
.
array
(
0
)
.
dtype
)
assert
f2
(
aval
)
.
dtype
==
config
.
floatX
else
:
raise
NotImplementedError
(
config
.
cast_policy
)
def
test_local_add_specialize
():
# test of non-zero dimension
a
=
tensor
.
vector
()
...
...
theano/tensor/type.py
浏览文件 @
6253b797
...
...
@@ -240,7 +240,7 @@ class TensorType(Type):
%
(
self
.
__class__
.
__name__
,
self
.
dtype
))
def
to_scalar_type
(
self
):
return
scal
.
Scalar
(
dtype
=
self
.
dtype
)
return
scal
.
get_scalar_type
(
dtype
=
self
.
dtype
)
def
__eq__
(
self
,
other
):
"""Compare True iff other is the same kind of TensorType"""
...
...
@@ -538,23 +538,23 @@ class TensorType(Type):
def
c_headers
(
self
):
"""Override `CLinkerObject.c_headers` """
return
scal
.
Scalar
(
self
.
dtype
)
.
c_headers
()
return
scal
.
get_scalar_type
(
self
.
dtype
)
.
c_headers
()
def
c_libraries
(
self
):
return
scal
.
Scalar
(
self
.
dtype
)
.
c_libraries
()
return
scal
.
get_scalar_type
(
self
.
dtype
)
.
c_libraries
()
def
c_compile_args
(
self
):
return
scal
.
Scalar
(
self
.
dtype
)
.
c_compile_args
()
return
scal
.
get_scalar_type
(
self
.
dtype
)
.
c_compile_args
()
def
c_support_code
(
self
):
"""Override `CLinkerObject.c_support_code` """
return
scal
.
Scalar
(
self
.
dtype
)
.
c_support_code
()
return
scal
.
get_scalar_type
(
self
.
dtype
)
.
c_support_code
()
def
c_init_code
(
self
):
return
scal
.
Scalar
(
self
.
dtype
)
.
c_init_code
()
return
scal
.
get_scalar_type
(
self
.
dtype
)
.
c_init_code
()
def
c_code_cache_version
(
self
):
scalar_version
=
scal
.
Scalar
(
self
.
dtype
)
.
c_code_cache_version
()
scalar_version
=
scal
.
get_scalar_type
(
self
.
dtype
)
.
c_code_cache_version
()
if
scalar_version
:
return
(
11
,)
+
scalar_version
else
:
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论