Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
cd644635
提交
cd644635
authored
10月 21, 2009
作者:
James Bergstra
浏览文件
操作
浏览文件
下载
差异文件
merge
上级
754f30fa
4a5bf511
隐藏空白字符变更
内嵌
并排
正在显示
16 个修改的文件
包含
632 行增加
和
358 行删除
+632
-358
debugging_with_stepmode.txt
doc/advanced/debugging_with_stepmode.txt
+71
-0
index.txt
doc/advanced/index.txt
+1
-0
function_module.py
theano/compile/function_module.py
+70
-46
mode.py
theano/compile/mode.py
+49
-2
profilemode.py
theano/compile/profilemode.py
+3
-2
test_module.py
theano/compile/tests/test_module.py
+10
-9
cc.py
theano/gof/cc.py
+86
-47
cmodule.py
theano/gof/cmodule.py
+5
-1
link.py
theano/gof/link.py
+9
-9
basic.py
theano/scalar/basic.py
+4
-6
basic.py
theano/tensor/basic.py
+75
-9
blas.py
theano/tensor/blas.py
+11
-5
nnet.py
theano/tensor/nnet.py
+5
-18
opt.py
theano/tensor/opt.py
+5
-5
test_nnet.py
theano/tensor/tests/test_nnet.py
+14
-8
test_opt.py
theano/tensor/tests/test_opt.py
+214
-191
没有找到文件。
doc/advanced/debugging_with_stepmode.txt
0 → 100644
浏览文件 @
cd644635
Debugging with a customized so-called StepMode
==============================================
One convenient trick I've found for debugging my programs that are running with theano is to
use what I call a 'StepMode'. There is no such StepMode in the standard library because the
purpose of it is to hack it to investigate what your own particular program is doing.
.. code-block:: python
from theano.gof.link import WrapLinkerMany
from theano.compile.mode import (Mode, register_mode, predefined_modes, predefined_linkers,
predefined_optimizers, default_linker, default_optimizer)
class StepMode(Mode):
def __init__(self, linker=default_linker, optimizer=default_optimizer):
def blah(i, node, th):
# This function will be run for each node in your compiled program.
# here you can inspect all the values as they are computed,
# ... you can even change them !
# 'i' is the execution position in the serialized graph
# node is the symbolic Apply instance
# th is a callable thing that will compute the node.
print i, node, len(th.inputs)
# the symbolic inputs of the node are in node.inputs
# the j'th non-symbolic input of the node is in th.inputs[j][0]
th() # call the function to actually 'run' the graph
# the symbolic outputs of the node are in node.outputs
# the j'th non-symbolic output of the node is in th.outputs[j][0]
print type(th.outputs[0][0])
if i == 39:
print 'this node is weird...', th.outputs[0][0]
self.provided_linker = linker
self.provided_optimizer = optimizer
if isinstance(linker, str) or linker is None:
linker = predefined_linkers[linker]
self.linker = WrapLinkerMany([linker], [blah])
if isinstance(optimizer, str) or optimizer is None:
optimizer = predefined_optimizers[optimizer]
self._optimizer = optimizer
The way to use it is like this:
.. code-block:: python
fn = function(inputs, outputs, mode=StepMode())
When you call fn, your function in the stepmode will be called for each node in the compiled
program. You can print out some or all of the values, you can change them in mid-execution.
You can see where bizarre values are first occurring in your computations. It's a very
powerful way to understand your program's execution.
Remember, if you give names your variables then printing nodes will give you a better idea of
where in the calculations you are.
doc/advanced/index.txt
浏览文件 @
cd644635
...
...
@@ -15,4 +15,5 @@ Advanced Topics (under construction)
ccodegen
function
module
debugging_with_stepmode
theano/compile/function_module.py
浏览文件 @
cd644635
...
...
@@ -22,7 +22,59 @@ from io import *
import
logging
_logger
=
logging
.
getLogger
(
'theano.compile.function_module'
)
def
view_map_root
(
v
):
"""Return the variable that v is ultimately a view of"""
if
v
.
owner
is
None
:
return
v
vmap
=
getattr
(
v
.
owner
.
op
,
'view_map'
,
{})
dmap
=
getattr
(
v
.
owner
.
op
,
'destroy_map'
,
{})
outpos
=
v
.
owner
.
outputs
.
index
(
v
)
v_views
=
vmap
.
get
(
outpos
,
[])
+
dmap
.
get
(
outpos
,
[])
if
len
(
v_views
)
>
1
:
raise
NotImplementedError
()
elif
v_views
:
return
view_map_root
(
v
.
owner
.
inputs
[
v_views
[
0
]])
else
:
return
v
def
view_tree_set
(
v
,
treeset
):
"""Add to `treeset` all variables that are views of v, given that v is not a view"""
treeset
.
add
(
v
)
for
cl
,
v_input_pos_to_cl
in
v
.
clients
:
if
cl
==
'output'
:
continue
vmap
=
getattr
(
cl
.
op
,
'view_map'
,
{})
dmap
=
getattr
(
cl
.
op
,
'destroy_map'
,
{})
for
opos
,
iposlist
in
vmap
.
items
()
+
dmap
.
items
():
if
v_input_pos_to_cl
in
iposlist
:
if
cl
.
outputs
[
opos
]
not
in
treeset
:
view_tree_set
(
cl
.
outputs
[
opos
],
treeset
)
def
infer_reuse_pattern
(
env
,
outputs_to_disown
):
"""
Given an env and a list of variables, returns the list or set of all variables which may
share the same underlying data storage as any of the specified variables. Used internally
by function, FunctionMaker.
This list (or set) is also refered to as no_recycling sometimes, especially by linker code.
"""
rval
=
set
()
for
o
in
outputs_to_disown
:
view_tree_set
(
view_map_root
(
o
),
rval
)
# remove from rval all of the inputs, constants, values.
rval
=
set
(
r
for
r
in
rval
if
r
.
owner
is
not
None
)
if
1
:
# DEBUG STUFF
# verify that we return a superset of what we've been returning so far...
rval0
=
_old_infer_reuse_pattern
(
env
,
outputs_to_disown
)
rval0_set
=
set
(
rval0
)
for
r
in
rval0_set
:
assert
r
in
rval
return
rval
def
_old_infer_reuse_pattern
(
env
,
outputs_to_disown
):
"""
Given an env and a list of variables, returns the list of all
variables which may share the same underlying data storage as any of
...
...
@@ -39,18 +91,8 @@ def infer_reuse_pattern(env, outputs_to_disown):
do_not_reuse
.
append
(
r
)
node
=
r
.
owner
op
=
node
.
op
if
hasattr
(
op
,
'destroy_map'
):
dmap
=
op
.
destroy_map
else
:
dmap
=
{}
if
hasattr
(
op
,
'view_map'
):
vmap
=
op
.
view_map
else
:
vmap
=
{}
#backport
#dmap = op.destroy_map if hasattr(op, 'destroy_map') else {}
#vmap = op.view_map if hasattr(op, 'view_map') else {}
dmap
=
getattr
(
op
,
'destroy_map'
,
{})
vmap
=
getattr
(
op
,
'view_map'
,
{})
for
l
in
dmap
.
values
()
+
vmap
.
values
():
for
i
in
l
:
walk
(
node
.
inputs
[
i
])
...
...
@@ -515,6 +557,7 @@ class SanityCheckFunction(Function):
super
(
SanityCheckFunction
,
self
)
.
__init__
(
*
args
,
**
kwargs
)
self
.
others
=
others
self
.
check_equal
=
check_equal
# DEPRECATED? Is this just for DualLinker?
def
__setitem__
(
self
,
item
,
value
):
super
(
SanityCheckFunction
,
self
)
.
__setitem__
(
item
,
value
)
...
...
@@ -739,6 +782,7 @@ class FunctionMaker(object):
input_storage_lists
.
append
([
input_storage_i
])
defaults
.
append
((
self
.
required
[
i
],
self
.
refeed
[
i
],
input_storage_i
))
# Get a function instance
_fn
,
_i
,
_o
=
self
.
linker
.
make_thunk
(
input_storage
=
input_storage_lists
)
fn
=
self
.
function_builder
(
_fn
,
_i
,
_o
,
self
.
indices
,
self
.
outputs
,
defaults
,
self
.
unpack_single
,
self
.
return_none
,
self
)
...
...
@@ -791,7 +835,7 @@ def register_checker(checker):
def
function
(
inputs
,
outputs
,
mode
=
None
,
accept_inplace
=
False
):
"""
Return a
function calculating
the outputs from the inputs.
Return a
Function that will calculate
the outputs from the inputs.
:param inputs: list of `SymbolicInput` or `In` instances
...
...
@@ -804,61 +848,41 @@ def function(inputs, outputs, mode=None, accept_inplace = False):
Currently, the library provides the following mode strings:
- SANITY_CHECK TODO: NotImplemented
- FAST_COMPILE (apply only optimization that are fast to apply)
- FAST_RUN (default) (optimize without too much time)
-
EXPENSIVE_OPTIMIZATION TODO: NotImplemented
-
FAST_COMPILE (minimal optimization)
- PROFILE_MODE : allow to print a profile mode with mode.print_summary
- DEBUG_MODE :
make all the check that we taught of(compare python and c,...
)
- DEBUG_MODE :
verify many internal conditions that are normally assumed (SLOW
)
:param accept_inplace: True iff the graph can contain inplace operations prior to the
optimization phase (default is False)
Every element of the input list will be upgraded to an `In` instance if necessary,
using the rules implemented by the `convert_function_input` function.
Similarly, every element of the output list will be upgraded to an
`Out` instance if necessary:
* a `Variable` instance r will be upgraded like `Out`(r)
Random Numbers
--------------
"""
If your computation involves random numbers, then you have to pass the `RandomKit` as an
input argument. That RandomKit must have a name to be able to seed the generator. To seed
the generator, use the `__setitem__` method:
#Every element of the input list will be upgraded to an `In` instance if necessary,
#using the rules implemented by the `convert_function_input` function.
..code-block: python
f[<kitname>] = seed #re-seed the elements of a RandomKit
#Similarly, every element of the output list will be upgraded to an
#`Out` instance if necessary:
"""
t1
=
time
.
time
()
if
mode
is
None
:
mode
=
mode_module
.
default_mode
#backport
#mode = mode if mode is not None else mode_module.default_mode
mode
=
mode_module
.
default_mode
inputs
=
map
(
convert_function_input
,
inputs
)
if
outputs
is
not
None
:
if
isinstance
(
outputs
,
(
list
,
tuple
)):
outputs
=
map
(
FunctionMaker
.
wrap_out
,
outputs
)
else
:
outputs
=
FunctionMaker
.
wrap_out
(
outputs
)
#backport
#outputs = map(FunctionMaker.wrap_out, outputs) if isinstance(outputs, (list, tuple)) else FunctionMaker.wrap_out(outputs)
if
isinstance
(
outputs
,
(
list
,
tuple
)):
outputs
=
map
(
FunctionMaker
.
wrap_out
,
outputs
)
else
:
outputs
=
FunctionMaker
.
wrap_out
(
outputs
)
defaults
=
[
getattr
(
input
,
'value'
,
None
)
for
input
in
inputs
]
mode
=
mode_module
.
predefined_modes
.
get
(
mode
,
mode
)
if
isinstance
(
mode
,
(
list
,
tuple
)):
# "mode comparison" semantics
_logger
.
warning
(
'Passing multiple modes is deprecated (20091019)'
)
if
not
mode
:
raise
ValueError
(
"Please provide at least one mode."
)
elif
len
(
mode
)
==
1
:
...
...
theano/compile/mode.py
浏览文件 @
cd644635
"""WRITEME
"""
import
os
,
logging
import
numpy
import
os
import
scipy.sparse
as
sp
from
theano
import
gof
_logger
=
logging
.
getLogger
(
'theano.compile.mode'
)
def
check_equal
(
x
,
y
):
"""
Returns True iff x[0] and y[0] are equal (checks the dtype and
...
...
@@ -74,9 +79,51 @@ def register_optimizer(name, opt):
raise
ValueError
(
'Optimizer name already taken:
%
s'
%
name
)
predefined_optimizers
[
name
]
=
opt
class
OutputGuard
(
gof
.
Op
):
destroy_map
=
{
0
:[
0
]}
view_map
=
{
0
:[
0
]}
def
make_node
(
self
,
x
):
return
gof
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
perform
(
self
,
node
,
(
x
,),
(
z
,)):
z
[
0
]
=
x
def
__str__
(
self
):
return
'
%
s'
%
self
.
__class__
.
__name__
def
c_code
(
self
,
node
,
nodename
,
(
x
,),
(
z
,),
sub
):
return
"""
Py_XDECREF(
%(z)
s);
%(z)
s =
%(x)
s;
Py_XINCREF(
%(z)
s);
"""
%
locals
()
def
c_code_cache_version
(
self
):
return
(
1
,)
_output_guard
=
OutputGuard
()
class
AddDestroyHandler
(
gof
.
Optimizer
):
"""This optimizer performs two important functions:
1) it has a 'requirement' of the destroyhandler. This means that the env will include it
as a feature for this optimization, and keep this feature enabled for subsequent
optimizations. All optimizations that work inplace on any of their inputs must run *after*
this optimization to ensure that the DestroyHandler has been included in the env.
2) It tries to replace each output with an Op that purports to destroy it (but it won't I
promise). If this replacement succeeds it means that there is a bug in theano. It should
not be possible to destroy outputs.
"""
def
apply
(
self
,
env
):
pass
for
o
in
env
.
outputs
:
try
:
env
.
replace_validate
(
o
,
_output_guard
(
o
),
reason
=
'output_guard'
)
_logger
.
warning
(
"Output variable
%
s required output_guard,"
" how was this output left unprotected against destructive operations?"
%
o
)
except
gof
.
InconsistencyError
:
#this output is already impossible to destroy. no guard necessary
pass
def
add_requirements
(
self
,
env
):
super
(
AddDestroyHandler
,
self
)
.
add_requirements
(
env
)
env
.
extend
(
gof
.
DestroyHandler
())
...
...
theano/compile/profilemode.py
浏览文件 @
cd644635
...
...
@@ -21,7 +21,7 @@ class ProfileMode(Mode):
op_time
,
op_cimpl
,
op_call
,
compile_time
))
def
__getstate__
(
self
):
print
"__getstate__"
,
self
.
provided_linker
,
self
.
provided_optimizer
#
print "__getstate__",self.provided_linker,self.provided_optimizer
return
(
self
.
provided_linker
,
self
.
provided_optimizer
,
self
.
local_time
,
self
.
apply_time
,
self
.
apply_call
,
self
.
op_time
,
self
.
op_cimpl
,
self
.
op_call
,
self
.
compile_time
)
...
...
@@ -255,7 +255,8 @@ def atexit_print_default_profile_mode():
THEANO_DEFAULT_MODE=PROFILE_MODE
"""
prof_mode
=
predefined_modes
[
"PROFILE_MODE"
]
if
prof_mode
.
local_time
[
0
]
>
0
:
prof_mode
.
print_summary
()
if
prof_mode
.
local_time
[
0
]
>
0
:
prof_mode
.
print_summary
()
#Register atexit_print_default_profile_mode to have the summary of the
#predefined mode PROFILE_MODE if it is used printed when the program terminate.
...
...
theano/compile/tests/test_module.py
浏览文件 @
cd644635
...
...
@@ -737,18 +737,19 @@ def test_pickle_aliased_memory():
m
.
x
[
0
,
0
]
=
3.14
assert
m
.
y
[
0
,
0
]
==
3.14
import
StringIO
import
StringIO
,
logging
sio
=
StringIO
.
StringIO
()
handler
=
logging
.
StreamHandler
(
sio
)
logging
.
getLogger
(
'theano.compile.function_module'
)
.
addHandler
(
handler
)
try
:
m
.
f
.
pickle_aliased_memory_strategy
=
'warn'
m
.
g
.
pickle_aliased_memory_strategy
=
'warn'
m_dup
=
cPickle
.
loads
(
cPickle
.
dumps
(
m
))
assert
sio
.
getvalue
()
.
startswith
(
'aliased relat'
)
finally
:
logging
.
getLogger
(
'theano.compile.function_module'
)
.
removeHandler
(
handler
)
old_stderr
=
sys
.
stderr
sys
.
stderr
=
sio
m
.
f
.
pickle_aliased_memory_strategy
=
'warn'
m
.
g
.
pickle_aliased_memory_strategy
=
'warn'
m_dup
=
cPickle
.
loads
(
cPickle
.
dumps
(
m
))
sys
.
stderr
=
old_stderr
assert
sio
.
getvalue
()
.
startswith
(
'WARNING: aliased relat'
)
try
:
m
.
f
.
pickle_aliased_memory_strategy
=
'raise'
m
.
g
.
pickle_aliased_memory_strategy
=
'raise'
...
...
theano/gof/cc.py
浏览文件 @
cd644635
...
...
@@ -585,9 +585,12 @@ class CLinker(link.Linker):
"""
ret
=
[
"-O3"
]
# this is the param the -ffast-math activate. I put the explicitly as FillMissing must disable some of them. Putting -ffast-math would make it disable all other parameter at the same time.
ret
+=
[
"-fno-math-errno"
,
"-funsafe-math-optimizations"
,
"-fno-signaling-nans"
,
"-fcx-limited-range"
,
"-fno-rounding-math"
,
"-ffinite-math-only"
,
ret
+=
[
"-fno-math-errno"
,
#"-funsafe-math-optimizations",
#"-fno-signaling-nans",
#"-fcx-limited-range",
#"-fno-rounding-math",
#"-ffinite-math-only",
"-Wno-unused-label"
,
#the current code generate label event if they are not used. Could use gcc attribute for those label only
"-Wno-unused-variable"
,
#idem as the precedent
"-Wno-write-strings"
,
#generated by our code generator...
...
...
@@ -758,38 +761,61 @@ class CLinker(link.Linker):
return
res
def
cmodule_key
(
self
):
"""Return a complete hashable signature of the module we compiled
"""Return a complete hashable signature of the module we compiled
.
This function must have the property that no two programs that compute different things
yield the same key.
The key returned by this function is of the form (version, signature)
The signature has the following form:
{{{
'CLinker.cmodule_key', compilation args, libraries,
op0, (input0.type, input1.type, input0 pos, input1 pos)
op1, (...)
(op0, input_signature0, output_signature0),
(op1, input_signature1, output_signature1),
...
opK, (...)
}}}
(opK, input_signatureK, output_signatureK),
}}}
The signature is a tuple, some elements of which are sub-tuples.
The signature is a tuple of tuples.
The outer tuple has a brief header, followed by elements for every node in the
topological ordering of `self.env`.
If the Op of any Apply in the Env does not have c_code_cache_ok()==True, then this
function raises a KeyError exception.
The outer tuple has one element for every node in the topological ordering of
`self.env`.
Input Signature
---------------
The inner tuple has one element for the op used at that node, and one element for the
inputs to that node. The inputs are identified by their type and "graph position"
Each input signature is a tuple with an element for each input to the corresponding
Apply node.
Each element identifies the type of the node input, and the nature of that input in the
graph.
The
graph position of a typical variable is encoded by integer pairs ``(a,b
)``:
The
nature of a typical variable is encoded by integer pairs ``((a,b),c
)``:
``a`` is the topological position of the input's owner (-1 for graph inputs),
``b`` is the index of the variable in the owner's output list.
``c`` is a flag indicating whether the variable is in the no_recycling set.
If a variable is also a graph output, then its position in the outputs list is also
bundled with this tuple (after the b).
The
graph position
of a Constant instance is defined as its signature, together with
The
nature
of a Constant instance is defined as its signature, together with
two integers: the topological position of the first Apply using that Constant instance,
and the lowest index into that Apply's inputs that refers to that Constant. (These two
integers are a surrogate for the id() of the Constant. The integers are important
because merge-able constants have the same signature, but require separate containers
in C code.)
in C code.)
The membership in no_recycling is also included in the signature.
If the Op of any Apply in the Env does not have c_code_cache_ok()==True, then this
function raises a KeyError exception.
Output Signature
----------------
The outputs of a node are entirely determined by the node's Op and the nature of the
inputs, but the set of outputs that may be re-used by the computation (the elements of
self.no_recycling) can affect the code that is generated.
The format of each Op's output signature is simply a list of booleans, indicating
whether each output is in the no_recycling set.
"""
return
self
.
cmodule_key_
(
self
.
env
,
self
.
no_recycling
,
...
...
@@ -797,68 +823,81 @@ class CLinker(link.Linker):
libraries
=
self
.
libraries
()
)
@staticmethod
def
cmodule_key_
(
env
,
no_recycling
,
compile_args
=
None
,
libraries
=
None
):
def
cmodule_key_
(
env
,
no_recycling
,
compile_args
=
[],
libraries
=
[]
):
"""
Do the actual computation of cmodule_key in a static method
to allow it to be reused in scalar.Composite.__eq__
"""
order
=
list
(
env
.
toposort
())
env_computed_set
=
set
()
env_inputs_dict
=
dict
((
i
,
[
-
1
,
pos
])
for
pos
,
i
in
enumerate
(
env
.
inputs
))
#set of variables that have been computed by nodes we have
# seen 'so far' in the loop below
env_computed_set
=
set
()
env_inputs_dict
=
dict
((
i
,
(
-
1
,
pos
))
for
pos
,
i
in
enumerate
(
env
.
inputs
))
constant_ids
=
dict
()
op_pos
=
{}
# Apply -> topological position
rval
=
[
'CLinker.cmodule_key'
]
# will be cast to tuple on return
if
compile_args
is
not
None
:
rval
.
append
(
tuple
(
compile_args
))
if
libraries
is
not
None
:
rval
.
append
(
tuple
(
libraries
))
version
=
[]
# assert that every input to every node is one of'
# - an env input
# - an output from a node in the Env
# - a Constant
# first we put the header, compile_args, library names into the signature
sig
=
[
'CLinker.cmodule_key'
]
# will be cast to tuple on return
if
compile_args
is
not
None
:
sig
.
append
(
tuple
(
compile_args
))
if
libraries
is
not
None
:
sig
.
append
(
tuple
(
libraries
))
def
in_sig
(
i
,
topological_pos
,
i_idx
):
# assert that every input to every node is one of'
# - an env input
# - an output from a node in the Env
# - a Constant
# It is important that a variable (i)
# yield a 'position' that reflects its role in code_gen()
def
graphpos
(
i
,
topological_pos
,
i_idx
):
rval
=
[]
# It is important that a variable (i)
# yield a 'position' that reflects its role in code_gen()
if
isinstance
(
i
,
graph
.
Constant
):
#orphans
if
id
(
i
)
not
in
constant_ids
:
constant_ids
[
id
(
i
)]
=
[
i
.
signature
(),
topological_pos
,
i_idx
]
rval
+
=
constant_ids
[
id
(
i
)]
constant_ids
[
id
(
i
)]
=
(
i
.
signature
(),
topological_pos
,
i_idx
)
isig
=
constant_ids
[
id
(
i
)]
#print 'SIGNATURE', i.signature()
#return i.signature()
elif
i
in
env_inputs_dict
:
#inputs
rval
+
=
env_inputs_dict
[
i
]
isig
=
env_inputs_dict
[
i
]
else
:
if
i
.
owner
is
None
:
assert
all
(
all
(
out
is
not
None
for
out
in
o
.
outputs
)
for
o
in
order
)
assert
all
(
input
.
owner
is
None
for
input
in
env
.
inputs
)
raise
Exception
(
'what is this?'
,
(
i
,
type
(
i
),
i
.
clients
,
env
))
if
i
in
env
.
outputs
:
rval
+=
[
op_pos
[
i
.
owner
],
# outputs
isig
=
(
op_pos
[
i
.
owner
],
# outputs
i
.
owner
.
outputs
.
index
(
i
),
env
.
outputs
.
index
(
i
)
]
env
.
outputs
.
index
(
i
)
)
else
:
rval
+=
[
op_pos
[
i
.
owner
],
i
.
owner
.
outputs
.
index
(
i
)]
# temps
assert
rval
rval
.
append
(
i
in
no_recycling
)
return
tuple
(
rval
)
isig
=
(
op_pos
[
i
.
owner
],
i
.
owner
.
outputs
.
index
(
i
))
# temps
return
(
isig
,
i
in
no_recycling
)
version
=
[]
for
node_pos
,
node
in
enumerate
(
order
):
version
.
append
(
node
.
op
.
c_code_cache_version_apply
(
node
))
for
i
in
node
.
inputs
:
version
.
append
(
i
.
type
.
c_code_cache_version
())
for
o
in
node
.
outputs
:
version
.
append
(
o
.
type
.
c_code_cache_version
())
rval
.
append
((
node
.
op
,
tuple
((
i
.
type
,
graphpos
(
i
,
node_pos
,
ipos
))
for
ipos
,
i
in
enumerate
(
node
.
inputs
))))
#add the signature for this node
sig
.
append
((
node
.
op
,
tuple
((
i
.
type
,
in_sig
(
i
,
node_pos
,
ipos
))
for
ipos
,
i
in
enumerate
(
node
.
inputs
)),
tuple
(
o
in
no_recycling
for
o
in
node
.
outputs
)))
op_pos
[
node
]
=
node_pos
env_computed_set
.
update
(
node
.
outputs
)
#crystalize the signature and version
sig
=
tuple
(
sig
)
version
=
tuple
(
version
)
for
v
in
version
:
if
not
v
:
#one of the ops or types here is unversioned
return
((),
tuple
(
rval
))
return
tuple
(
version
),
tuple
(
rval
)
if
not
v
:
# one of the ops or types here is unversioned,
# so this env is entirely unversioned
return
((),
sig
)
return
version
,
sig
def
compile_cmodule
(
self
,
location
=
None
):
"""
...
...
theano/gof/cmodule.py
浏览文件 @
cd644635
...
...
@@ -257,9 +257,13 @@ class ModuleCache(object):
warning
((
"The __eq__ and __hash__ functions are broken for some element"
" in the following two keys. The cache mechanism will say that"
" graphs like this need recompiling, when they could have been"
" retrieved
)
:"
))
" retrieved:"
))
warning
(
"Key 0:"
,
k0
)
warning
(
"Entry 0:"
,
self
.
entry_from_key
[
k0
])
warning
(
"hash 0:"
,
hash
(
k0
))
warning
(
"Key 1:"
,
k1
)
warning
(
"Entry 1:"
,
self
.
entry_from_key
[
k1
])
warning
(
"hash 1:"
,
hash
(
k1
))
def
refresh
(
self
):
"""Update self.entry_from_key by walking the cache directory structure.
...
...
theano/gof/link.py
浏览文件 @
cd644635
...
...
@@ -260,15 +260,15 @@ def streamline(env, thunks, order, post_thunk_old_storage = None, no_recycling =
(
len
(
thunks
),
len
(
post_thunk_old_storage
)))
def
streamline_default_f
():
for
x
in
no_recycling
:
x
[
0
]
=
None
try
:
for
thunk
,
node
,
old_storage
in
zip
(
thunks
,
order
,
post_thunk_old_storage
):
thunk
()
for
old_s
in
old_storage
:
old_s
[
0
]
=
None
except
:
raise_with_op
(
node
)
for
x
in
no_recycling
:
x
[
0
]
=
None
try
:
for
thunk
,
node
,
old_storage
in
zip
(
thunks
,
order
,
post_thunk_old_storage
):
thunk
()
for
old_s
in
old_storage
:
old_s
[
0
]
=
None
except
:
raise_with_op
(
node
)
f
=
streamline_default_f
elif
nice_errors
:
thunk_node_list
=
zip
(
thunks
,
order
)
...
...
theano/scalar/basic.py
浏览文件 @
cd644635
...
...
@@ -787,19 +787,17 @@ class Pow(BinaryScalarOp):
return
"
%(z)
s = pow(
%(x)
s,
%(y)
s);"
%
locals
()
def
grad
(
self
,
(
x
,
y
),
(
gz
,
)):
if
x
.
type
in
grad_types
:
first_part
=
gz
*
y
*
x
**
(
y
-
1
)
first_part
=
gz
*
y
*
x
**
(
y
-
1
)
else
:
first_part
=
None
first_part
=
None
if
y
.
type
in
grad_types
:
second_part
=
gz
*
log
(
x
)
*
x
**
y
second_part
=
gz
*
log
(
x
)
*
x
**
y
else
:
second_part
=
None
second_part
=
None
return
(
first_part
,
second_part
)
#return (gz * y * x**(y - 1) if x.type in grad_types else None,
# gz * log(x) * x**y if y.type in grad_types else None)
pow
=
Pow
(
upcast_out
,
name
=
'pow'
)
class
Clip
(
ScalarOp
):
...
...
theano/tensor/basic.py
浏览文件 @
cd644635
...
...
@@ -180,7 +180,24 @@ def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
assert
len
(
bcastable
)
==
ndim
try
:
return
rtype
(
TensorType
(
dtype
=
x_
.
dtype
,
broadcastable
=
bcastable
),
x_
,
name
=
name
)
if
rtype
is
TensorConstant
:
if
0
:
# put the shape into the type
# This is disabled because if a tensor has shape, then the following fails:
# theano.lvector == as_tensor_variable([0,1]).type
# I think the solution is that we should implement something more like
# compatability instead of equality in our Type comparisons... but we're not
# there yet.
x_shape
=
x_
.
shape
else
:
x_shape
=
None
return
rtype
(
TensorType
(
dtype
=
x_
.
dtype
,
broadcastable
=
bcastable
,
shape
=
x_shape
),
x_
,
name
=
name
)
else
:
# leave the shape out of the type
return
rtype
(
TensorType
(
dtype
=
x_
.
dtype
,
broadcastable
=
bcastable
),
x_
,
name
=
name
)
except
:
raise
TypeError
(
"Could not convert
%
s to TensorType"
%
x
,
type
(
x
))
...
...
@@ -236,7 +253,7 @@ class TensorType(Type):
When this is True, strict filtering rejects data containing NaN or Inf entries. (Used in `DebugMode`)
"""
def
__init__
(
self
,
dtype
,
broadcastable
,
name
=
None
):
def
__init__
(
self
,
dtype
,
broadcastable
,
name
=
None
,
shape
=
None
):
"""Initialize self.dtype and self.broadcastable.
:Parameters:
...
...
@@ -256,6 +273,20 @@ class TensorType(Type):
self
.
broadcastable
=
tuple
(
broadcastable
)
self
.
dtype_specs
()
# error checking is done there
self
.
name
=
name
if
shape
is
None
:
self
.
shape
=
tuple
((
1
if
b
else
None
)
for
b
in
self
.
broadcastable
)
else
:
self
.
shape
=
tuple
(
shape
)
if
len
(
self
.
shape
)
!=
len
(
self
.
broadcastable
):
raise
ValueError
(
'shape and broadcastable must have equal lengths'
,
(
self
.
shape
,
self
.
broadcastable
))
def
__setstate__
(
self
,
dct
):
self
.
__dict__
.
update
(
dct
)
#add shape when unpickling old pickled things
if
'shape'
not
in
dct
:
self
.
shape
=
tuple
(
1
if
b
else
None
for
b
in
self
.
broadcastable
)
def
filter
(
self
,
data
,
strict
=
False
):
"""Convert `data` to something which can be associated to a `TensorVariable`.
...
...
@@ -273,6 +304,11 @@ class TensorType(Type):
raise
TypeError
(
"
%
s expected a ndarray object with
%
s dimensions (got
%
s)."
%
(
self
,
self
.
ndim
,
data
.
ndim
))
if
self
.
filter_checks_isfinite
and
(
not
numpy
.
all
(
numpy
.
isfinite
(
data
))):
raise
TypeError
(
"non-finite elements not allowed"
)
for
si
,
di
in
zip
(
self
.
shape
,
data
.
shape
):
if
not
(
si
is
None
or
si
==
di
):
raise
TypeError
(
'
%
s requires ndarray with shape matching
%
s (got
%
s)'
%
(
self
,
self
.
shape
,
data
.
shape
))
return
data
else
:
data
=
numpy
.
asarray
(
data
,
dtype
=
self
.
dtype
)
...
...
@@ -311,7 +347,9 @@ class TensorType(Type):
def
__eq__
(
self
,
other
):
"""Compare True iff other is the same kind of TensorType"""
return
type
(
self
)
==
type
(
other
)
and
other
.
dtype
==
self
.
dtype
and
other
.
broadcastable
==
self
.
broadcastable
return
type
(
self
)
==
type
(
other
)
and
other
.
dtype
==
self
.
dtype
\
and
other
.
broadcastable
==
self
.
broadcastable
\
and
other
.
shape
==
self
.
shape
@staticmethod
def
values_eq
(
a
,
b
):
...
...
@@ -382,7 +420,7 @@ class TensorType(Type):
def
__hash__
(
self
):
"""Hash equal for same kinds of TensorType"""
return
hashtype
(
self
)
^
hash
(
self
.
dtype
)
^
hash
(
self
.
broadcastable
)
return
hashtype
(
self
)
^
hash
(
self
.
dtype
)
^
hash
(
self
.
broadcastable
)
^
hash
(
self
.
shape
)
ndim
=
property
(
lambda
self
:
len
(
self
.
broadcastable
),
doc
=
"number of dimensions"
)
"""Number of dimensions
...
...
@@ -405,6 +443,8 @@ class TensorType(Type):
def
__str__
(
self
):
if
self
.
name
:
return
self
.
name
elif
not
all
(
None
==
si
for
si
in
self
.
shape
):
return
'TensorType{
%
s,
%
s}'
%
(
self
.
dtype
,
self
.
shape
)
else
:
b
=
self
.
broadcastable
named_broadcastable
=
{():
'scalar'
,
...
...
@@ -782,7 +822,6 @@ class _tensor_py_operators:
dtype
=
property
(
lambda
self
:
self
.
type
.
dtype
)
""" The dtype of this tensor. """
#extra pseudo-operator symbols
def
__dot__
(
left
,
right
):
return
dot
(
left
,
right
)
def
__rdot__
(
right
,
left
):
return
dot
(
left
,
right
)
...
...
@@ -806,6 +845,14 @@ class _tensor_py_operators:
"""See `theano.tensor.var`"""
return
var
(
self
,
axis
)
def
min
(
self
,
axis
=
None
):
"""See `theano.tensor.min`"""
return
min
(
self
,
axis
)
def
max
(
self
,
axis
=
None
):
"""See `theano.tensor.max`"""
return
max
(
self
,
axis
)
#TO TRUMP NUMPY OPERATORS
__array_priority__
=
1000
...
...
@@ -1051,11 +1098,25 @@ class Shape(Op):
out
[
0
]
=
numpy
.
asarray
(
x
.
shape
,
dtype
=
'int64'
)
def
grad
(
self
,
(
x
,),
(
gz
,)):
return
[
None
]
@_redefine_asRoutine
(
Shape
())
_shape
=
Shape
()
@constructor
def
shape
(
a
):
pass
"""Return the shape tuple of a TensorType Variable, it may be either symbolic or nonsymbolic.
pprint
.
assign
(
shape
,
printing
.
MemberPrinter
(
'shape'
))
If the shape of the expression is not known at graph-construction time, then a symbolic
lvector will be returned, corresponding to the actual shape at graph-execution time.
"""
va
=
as_tensor_variable
(
a
)
#print 'HERE', va, va.type
if
None
in
va
.
type
.
shape
:
# Some shape components are unknown at this time
return
_shape
(
va
)
else
:
# all shape components are known at compile time, so we return
# a tuple directly. This tuple is like the numpy.ndarray.shape tuple.
return
va
.
type
.
shape
pprint
.
assign
(
_shape
,
printing
.
MemberPrinter
(
'shape'
))
class
MaxAndArgmax
(
Op
):
...
...
@@ -2352,7 +2413,7 @@ def get_vector_length(v):
return
join
.
vec_length
(
v
)
except
ValueError
:
pass
if
v
.
owner
and
v
.
owner
.
op
==
shape
:
if
v
.
owner
and
v
.
owner
.
op
==
_
shape
:
return
v
.
owner
.
inputs
[
0
]
.
type
.
ndim
raise
ValueError
(
"length not known"
)
...
...
@@ -2806,6 +2867,11 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False):
if
not
isinstance
(
cost
,
TensorVariable
):
raise
TypeError
(
'In tensor.grad(), cost argument should be a TensorVariable.'
,
cost
)
if
cost
.
type
.
ndim
:
_warn
(
'the passing of a non-scalar cost to theano.tensor.grad() is deprecated.'
' Use the lower-level '
'theano.gradient if you really want to do this'
)
if
g_cost
is
None
:
g_cost
=
ones_like
(
cost
)
inputs
=
gof
.
graph
.
inputs
([
cost
])
...
...
theano/tensor/blas.py
浏览文件 @
cd644635
...
...
@@ -18,6 +18,7 @@ from theano import compile #to register the optimizer built by this file
from
theano.tensor.blas_headers
import
cblas_header_text
,
blas_header_text
_logger
=
logging
.
getLogger
(
'theano.tensor.blas'
)
_logger
.
setLevel
(
logging
.
INFO
)
def
debug
(
*
msg
):
_logger
.
debug
(
' '
.
join
(
str
(
m
)
for
m
in
msg
))
def
info
(
*
msg
):
_logger
.
info
(
' '
.
join
(
str
(
m
)
for
m
in
msg
))
def
warn
(
*
msg
):
_logger
.
warn
(
' '
.
join
(
str
(
m
)
for
m
in
msg
))
...
...
@@ -604,10 +605,15 @@ class Dot22(GemmRelated):
This is a specialization of the more general Dot()
"""
def
make_node
(
self
,
x
,
y
):
assert
_is_real_matrix
(
x
)
assert
y
.
type
==
x
.
type
#makes sure y is a matrix
if
not
_is_real_matrix
(
x
):
raise
TypeError
(
x
)
if
not
_is_real_matrix
(
x
):
raise
TypeError
(
y
)
if
y
.
type
.
dtype
!=
x
.
type
.
dtype
:
raise
TypeError
(
'dtype mismatch to Dot22'
)
out_shape
=
(
x
.
type
.
shape
[
0
],
y
.
type
.
shape
[
1
])
bz
=
[
False
,
False
]
outputs
=
[
T
.
tensor
(
x
.
type
.
dtype
,
bz
)]
outputs
=
[
T
.
tensor
(
x
.
type
.
dtype
,
bz
,
shape
=
out_shape
)]
return
Apply
(
self
,
[
x
,
y
],
outputs
)
def
perform
(
self
,
node
,
(
x
,
y
),
(
z
,
)):
...
...
@@ -660,10 +666,10 @@ _dot22 = Dot22()
def
local_dot_to_dot22
(
node
):
if
node
.
op
==
T
.
dot
:
x
,
y
=
node
.
inputs
if
_is_real_matrix
(
x
)
and
y
.
type
==
x
.
type
:
if
_is_real_matrix
(
x
)
and
_is_real_matrix
(
y
)
and
y
.
type
.
dtype
==
x
.
type
.
d
type
:
return
[
_dot22
(
*
node
.
inputs
)]
else
:
info
(
'Not optimizing dot with inputs'
,
x
,
y
)
info
(
'Not optimizing dot with inputs'
,
x
,
y
,
x
.
type
,
y
.
type
)
else
:
return
False
register_specialize
(
local_dot_to_dot22
)
...
...
theano/tensor/nnet.py
浏览文件 @
cd644635
...
...
@@ -142,9 +142,6 @@ class SoftmaxWithBias(gof.Op):
return
[
'<iostream>'
,
'<cmath>'
]
@staticmethod
def
c_code_cache_version
():
return
(
4
,)
@staticmethod
def
c_code_template
():
# this implementation was lifted from
# /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
...
...
@@ -180,7 +177,7 @@ class SoftmaxWithBias(gof.Op):
}
if ((
%(x)
s->dimensions[1] !=
%(b)
s->dimensions[0]))
{
PyErr_Format(PyExc_ValueError, "number of columns in x (
%%
i) does not match length of b (
%%
i)",
PyErr_Format(PyExc_ValueError, "number of columns in x (
%%
zi) does not match length of b (
%%
z
i)",
%(x)
s->dimensions[1],
%(b)
s->dimensions[0]);
%(fail)
s;
}
...
...
@@ -236,20 +233,6 @@ class SoftmaxWithBias(gof.Op):
sum += sm_ij;
sm_i[j * Ssm] = sm_ij;
}
//std::cout << "
\\
n";
if (std::isinf(sum))
{
//that was our best...
PyErr_SetString(PyExc_ValueError, "softmax is impossible (inf)!");
%(fail)
s;
}
if (0.0 == sum)
{
//that was our best...
PyErr_SetString(PyExc_ValueError, "softmax is impossible (zero)!");
%(fail)
s;
}
//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
double sum_inv = 1.0 / sum;
...
...
@@ -271,6 +254,10 @@ class SoftmaxWithBias(gof.Op):
code_template
=
''
.
join
(
self
.
c_code_template
())
return
code_template
%
dict
(
locals
(),
**
sub
)
@staticmethod
def
c_code_cache_version
():
return
(
5
,)
softmax_with_bias
=
SoftmaxWithBias
()
...
...
theano/tensor/opt.py
浏览文件 @
cd644635
...
...
@@ -196,20 +196,20 @@ def local_shape_lift_sum(node):
register_canonicalize
(
local_shape_lift_sum
,
'shape_lift'
)
@gof.local_optimizer
([
T
.
shape
,
T
.
dot
])
@gof.local_optimizer
([
T
.
_
shape
,
T
.
dot
])
def
local_shape_lift_dot
(
node
):
"""
shape(dot(a, b)) -> [shape(a)[0], shape(b)[1]]
"""
if
not
opt
.
check_chain
(
node
,
T
.
shape
,
T
.
dot
):
if
not
opt
.
check_chain
(
node
,
T
.
_
shape
,
T
.
dot
):
return
False
a
,
b
=
node
.
inputs
[
0
]
.
owner
.
inputs
if
a
.
type
.
ndim
==
2
and
b
.
type
.
ndim
==
2
:
return
T
.
make_lvector
.
make_node
(
T
.
shape
(
a
)[
0
],
T
.
shape
(
b
)[
1
])
.
outputs
return
T
.
make_lvector
.
make_node
(
T
.
_shape
(
a
)[
0
],
T
.
_
shape
(
b
)[
1
])
.
outputs
elif
a
.
type
.
ndim
==
1
and
b
.
type
.
ndim
==
2
:
return
T
.
make_lvector
.
make_node
(
T
.
shape
(
b
)[
1
])
.
outputs
return
T
.
make_lvector
.
make_node
(
T
.
_
shape
(
b
)[
1
])
.
outputs
elif
a
.
type
.
ndim
==
2
and
b
.
type
.
ndim
==
1
:
return
T
.
make_lvector
.
make_node
(
T
.
shape
(
a
)[
0
])
.
outputs
return
T
.
make_lvector
.
make_node
(
T
.
_
shape
(
a
)[
0
])
.
outputs
elif
a
.
type
.
ndim
==
1
and
b
.
type
.
ndim
==
1
:
return
T
.
make_lvector
.
make_node
()
.
outputs
else
:
...
...
theano/tensor/tests/test_nnet.py
浏览文件 @
cd644635
...
...
@@ -163,7 +163,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
theano
.
compile
.
mode
.
optdb
.
query
(
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
env
)
assert
env
.
outputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
assert
str
(
env
.
outputs
[
0
]
.
owner
.
op
)
==
'OutputGuard'
assert
env
.
outputs
[
0
]
.
owner
.
inputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
def
test_softmax_optimizations_w_bias
(
self
):
x
=
tensor
.
matrix
(
'x'
)
...
...
@@ -186,9 +187,10 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
theano
.
compile
.
mode
.
optdb
.
query
(
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
env
)
assert
len
(
env
.
toposort
())
==
1
assert
len
(
env
.
toposort
())
==
2
assert
env
.
outputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
assert
str
(
env
.
outputs
[
0
]
.
owner
.
op
)
==
'OutputGuard'
assert
env
.
outputs
[
0
]
.
owner
.
inputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
def
test_softmax_grad_optimizations
(
self
):
...
...
@@ -249,7 +251,7 @@ def test_argmax_pushdown():
#print 'AFTER'
#for node in env.toposort():
#print node.op
assert
len
(
env
.
toposort
())
==
1
assert
len
(
env
.
toposort
())
==
2
# an output_guard is second
assert
env
.
toposort
()[
0
]
.
op
==
tensor
.
_max_and_argmax
def
test_argmax_pushdown_bias
():
...
...
@@ -263,10 +265,14 @@ def test_argmax_pushdown_bias():
theano
.
compile
.
mode
.
optdb
.
query
(
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
env
)
#print 'AFTER'
#for node in env.toposort():
#print node.op
assert
len
(
env
.
toposort
())
==
3
print
'AFTER'
for
node
in
env
.
toposort
():
print
node
.
op
assert
len
(
env
.
toposort
())
==
4
assert
isinstance
(
env
.
toposort
()[
0
]
.
op
,
tensor
.
DimShuffle
)
assert
isinstance
(
env
.
toposort
()[
1
]
.
op
,
tensor
.
Elemwise
)
assert
isinstance
(
env
.
toposort
()[
2
]
.
op
,
tensor
.
MaxAndArgmax
)
assert
str
(
env
.
toposort
()[
3
]
.
op
)
==
'OutputGuard'
def
test_asymptotic_32
():
"""
...
...
theano/tensor/tests/test_opt.py
浏览文件 @
cd644635
...
...
@@ -246,16 +246,20 @@ class test_canonize(unittest.TestCase):
#We must be sure that the Canonizer is working, but that we don't have other
# optimisation that could hide bug in the Canonizer as local_elemwise_fusion
mode
=
compile
.
mode
.
predefined_modes
[
compile
.
mode
.
default_mode
]
mode
.
_optimizer
=
gof
.
Query
([
"canonicalize"
])
mode
.
_optimizer
=
mode
.
_optimizer
.
excluding
(
'local_elemwise_fusion'
)
for
id
,
[
g
,
sym_inputs
,
val_inputs
,
nb_elemwise
,
out_dtype
]
in
enumerate
(
cases
):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
#we need the optimisation enabled, debug do this.
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
(
len
(
f
.
maker
.
env
.
toposort
())
==
nb_elemwise
)
assert
(
out_dtype
==
out
.
dtype
)
old_optimizer
=
mode
.
_optimizer
try
:
mode
.
_optimizer
=
gof
.
Query
([
"canonicalize"
])
mode
.
_optimizer
=
mode
.
_optimizer
.
excluding
(
'local_elemwise_fusion'
)
for
id
,
[
g
,
sym_inputs
,
val_inputs
,
nb_elemwise
,
out_dtype
]
in
enumerate
(
cases
):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
#we need the optimisation enabled, debug do this.
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
(
len
(
f
.
maker
.
env
.
toposort
())
==
nb_elemwise
)
assert
(
out_dtype
==
out
.
dtype
)
finally
:
mode
.
_optimizer
=
old_optimizer
def
test_elemwise_multiple_inputs_optimisation2
(
self
):
"""
...
...
@@ -367,130 +371,134 @@ class test_canonize(unittest.TestCase):
#We must be sure that the Canonizer is working, but that we don't have other
# optimisation that could hide bug in the Canonizer as local_elemwise_fusion
mode
=
compile
.
mode
.
predefined_modes
[
compile
.
mode
.
default_mode
]
mode
.
_optimizer
=
gof
.
Query
([
"canonicalize"
])
mode
.
_optimizer
=
mode
.
_optimizer
.
excluding
(
'local_elemwise_fusion'
)
#test x / x -> 1
for
id
,
(
g
,
sym_inputs
,
val_inputs
,
out_dtype
)
in
enumerate
([(
fx
/
fx
,[
fx
],[
fxv
],
'float32'
),
(
dx
/
dx
,[
dx
],[
dxv
],
'float64'
),
(
fv
/
fv
,[
fv
],[
fvv
],
'float32'
),
(
dv
/
dv
,[
dv
],[
dvv
],
'float64'
),
]):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
(
out
==
numpy
.
ones
(
shp
,
dtype
=
out_dtype
))
.
all
()
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
1
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,
theano
.
scalar
.
basic
.
Second
)
assert
len
(
topo
[
0
]
.
inputs
)
==
2
assert
(
out_dtype
==
out
.
dtype
)
#test (x * y) / x -> y
for
id
,(
g
,
sym_inputs
,
val_inputs
,
nb_elemwise
,
out_dtype
)
in
enumerate
([
((
dx
*
dy
)
/
dx
,[
dx
,
dy
],[
dxv
,
dyv
],
0
,
'float64'
),
((
fx
*
fy
)
/
fx
,[
fx
,
fy
],[
fxv
,
fyv
],
0
,
'float32'
),
((
dv
*
dy
)
/
dv
,[
dv
,
dy
],[
dvv
,
dyv
],
0
,
'float64'
),
((
fv
*
fy
)
/
fv
,[
fv
,
fy
],[
fvv
,
fyv
],
0
,
'float32'
),
#must broadcast as their is a dimshuffle in the computation
((
dx
*
dv
)
/
dx
,[
dx
,
dv
],[
dxv
,
dvv
],
1
,
'float64'
),
#topo: [Elemwise{second,no_inplace}(x, <TensorType(float64, row)>)]
((
fx
*
fv
)
/
fx
,[
fx
,
fv
],[
fxv
,
fvv
],
1
,
'float32'
)
#topo: [Elemwise{second,no_inplace}(x, <TensorType(float32, row)>)]
]):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,
val_inputs
[
1
])
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
nb_elemwise
assert
(
out_dtype
==
out
.
dtype
)
#test x / y / x -> 1 / y
for
id
,(
g
,
sym_inputs
,
val_inputs
,
nb_elemwise
,
out_dtype
)
in
enumerate
([
((
dx
/
dy
)
/
dx
,[
dx
,
dy
],[
dxv
,
dyv
],
1
,
'float64'
),
((
fx
/
fy
)
/
fx
,[
fx
,
fy
],[
fxv
,
fyv
],
1
,
'float32'
),
((
dv
/
dy
)
/
dv
,[
dv
,
dy
],[
dvv
,
dyv
],
1
,
'float64'
),
((
fv
/
fy
)
/
fv
,[
fv
,
fy
],[
fvv
,
fyv
],
1
,
'float32'
),
#must broadcast as their is a dimshuffle in the computation
((
dx
/
dv
)
/
dx
,[
dx
,
dv
],[
dxv
,
dvv
],
2
,
'float64'
),
#topo: [Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)]
((
fx
/
fv
)
/
fx
,[
fx
,
fv
],[
fxv
,
fvv
],
2
,
'float32'
),
#topo:[Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)]
]):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,(
1
/
val_inputs
[
1
]))
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
nb_elemwise
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,(
theano
.
scalar
.
basic
.
Inv
,
theano
.
scalar
.
basic
.
TrueDiv
))
assert
(
out_dtype
==
out
.
dtype
)
#test (a / b) * (b / c) * (c / d) -> a / d
for
id
,(
g
,
sym_inputs
,
val_inputs
,
out_dtype
)
in
enumerate
([
((
dx
/
dy
)
*
(
dy
/
dz
)
*
(
dz
/
dw
),[
dx
,
dy
,
dz
,
dw
],[
dxv
,
dyv
,
dzv
,
dwv
],
'float64'
),
((
fx
/
fy
)
*
(
fy
/
fz
)
*
(
fz
/
fw
),[
fx
,
fy
,
fz
,
fw
],[
fxv
,
fyv
,
fzv
,
fwv
],
'float32'
),
((
dv
/
dy
)
*
(
dy
/
dz
)
*
(
dz
/
dw
),[
dv
,
dy
,
dz
,
dw
],[
dvv
,
dyv
,
dzv
,
dwv
],
'float64'
),
((
fv
/
fy
)
*
(
fy
/
fz
)
*
(
fz
/
fw
),[
fv
,
fy
,
fz
,
fw
],[
fvv
,
fyv
,
fzv
,
fwv
],
'float32'
),
((
dx
/
dv
)
*
(
dv
/
dz
)
*
(
dz
/
dw
),[
dx
,
dv
,
dz
,
dw
],[
dxv
,
dvv
,
dzv
,
dwv
],
'float64'
),
((
fx
/
fv
)
*
(
fv
/
fz
)
*
(
fz
/
fw
),[
fx
,
fv
,
fz
,
fw
],[
fxv
,
fvv
,
fzv
,
fwv
],
'float32'
),
((
dx
/
dy
)
*
(
dy
/
dv
)
*
(
dv
/
dw
),[
dx
,
dy
,
dv
,
dw
],[
dxv
,
dyv
,
dvv
,
dwv
],
'float64'
),
((
fx
/
fy
)
*
(
fy
/
fv
)
*
(
fv
/
fw
),[
fx
,
fy
,
fv
,
fw
],[
fxv
,
fyv
,
fvv
,
fwv
],
'float32'
),
((
dx
/
dy
)
*
(
dy
/
dz
)
*
(
dz
/
dv
),[
dx
,
dy
,
dz
,
dv
],[
dxv
,
dyv
,
dzv
,
dvv
],
'float64'
),
((
fx
/
fy
)
*
(
fy
/
fz
)
*
(
fz
/
fv
),[
fx
,
fy
,
fz
,
fv
],[
fxv
,
fyv
,
fzv
,
fvv
],
'float32'
),
]):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,(
val_inputs
[
0
]
/
val_inputs
[
3
]))
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
1
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,
theano
.
scalar
.
basic
.
TrueDiv
)
assert
len
(
topo
[
0
]
.
inputs
)
==
2
assert
(
out_dtype
==
out
.
dtype
)
#test (2.0 * x) / (4.0 * y) -> (0.5 * x) / y
for
id
,(
g
,
sym_inputs
,
val_inputs
,
out_dtype
)
in
enumerate
([
(((
2.0
*
dx
)
/
(
4.0
*
dy
)),[
dx
,
dy
],[
dxv
,
dyv
],
'float64'
),
(((
2.0
*
fx
)
/
(
4.0
*
fy
)),[
fx
,
fy
],[
fxv
,
fyv
],
'float32'
),
(((
2.0
*
dv
)
/
(
4.0
*
dy
)),[
dv
,
dy
],[
dvv
,
dyv
],
'float64'
),
(((
2.0
*
fv
)
/
(
4.0
*
fy
)),[
fv
,
fy
],[
fvv
,
fyv
],
'float32'
),
(((
2.0
*
dx
)
/
(
4.0
*
dv
)),[
dx
,
dv
],[
dxv
,
dvv
],
'float64'
),
(((
2.0
*
fx
)
/
(
4.0
*
fv
)),[
fx
,
fv
],[
fxv
,
fvv
],
'float32'
),
]):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,(
0.5
*
val_inputs
[
0
]
/
val_inputs
[
1
]))
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
2
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,
theano
.
scalar
.
basic
.
Mul
)
assert
len
(
topo
[
0
]
.
inputs
)
==
2
assert
isinstance
(
topo
[
1
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
1
]
.
op
.
scalar_op
,
theano
.
scalar
.
basic
.
TrueDiv
)
assert
len
(
topo
[
1
]
.
inputs
)
==
2
assert
(
out_dtype
==
out
.
dtype
)
#test 2 * x / 2 -> x
for
id
,(
g
,
sym_inputs
,
val_inputs
,
out_dtype
)
in
enumerate
([
((
2
*
dx
)
/
2
,[
dx
],[
dxv
],
'float64'
),
((
2
*
fx
)
/
2
,[
fx
],[
fxv
],
'float32'
),
((
2
*
dv
)
/
2
,[
dv
],[
dvv
],
'float64'
),
((
2
*
fv
)
/
2
,[
fv
],[
fvv
],
'float32'
),
]):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,
val_inputs
[
0
])
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
0
assert
(
out_dtype
==
out
.
dtype
)
old_optimizer
=
mode
.
_optimizer
try
:
mode
.
_optimizer
=
gof
.
Query
([
"canonicalize"
])
mode
.
_optimizer
=
mode
.
_optimizer
.
excluding
(
'local_elemwise_fusion'
)
#test x / x -> 1
for
id
,
(
g
,
sym_inputs
,
val_inputs
,
out_dtype
)
in
enumerate
([(
fx
/
fx
,[
fx
],[
fxv
],
'float32'
),
(
dx
/
dx
,[
dx
],[
dxv
],
'float64'
),
(
fv
/
fv
,[
fv
],[
fvv
],
'float32'
),
(
dv
/
dv
,[
dv
],[
dvv
],
'float64'
),
]):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
(
out
==
numpy
.
ones
(
shp
,
dtype
=
out_dtype
))
.
all
()
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
1
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,
theano
.
scalar
.
basic
.
Second
)
assert
len
(
topo
[
0
]
.
inputs
)
==
2
assert
(
out_dtype
==
out
.
dtype
)
#test (x * y) / x -> y
for
id
,(
g
,
sym_inputs
,
val_inputs
,
nb_elemwise
,
out_dtype
)
in
enumerate
([
((
dx
*
dy
)
/
dx
,[
dx
,
dy
],[
dxv
,
dyv
],
0
,
'float64'
),
((
fx
*
fy
)
/
fx
,[
fx
,
fy
],[
fxv
,
fyv
],
0
,
'float32'
),
((
dv
*
dy
)
/
dv
,[
dv
,
dy
],[
dvv
,
dyv
],
0
,
'float64'
),
((
fv
*
fy
)
/
fv
,[
fv
,
fy
],[
fvv
,
fyv
],
0
,
'float32'
),
#must broadcast as their is a dimshuffle in the computation
((
dx
*
dv
)
/
dx
,[
dx
,
dv
],[
dxv
,
dvv
],
1
,
'float64'
),
#topo: [Elemwise{second,no_inplace}(x, <TensorType(float64, row)>)]
((
fx
*
fv
)
/
fx
,[
fx
,
fv
],[
fxv
,
fvv
],
1
,
'float32'
)
#topo: [Elemwise{second,no_inplace}(x, <TensorType(float32, row)>)]
]):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,
val_inputs
[
1
])
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
nb_elemwise
assert
(
out_dtype
==
out
.
dtype
)
#test x / y / x -> 1 / y
for
id
,(
g
,
sym_inputs
,
val_inputs
,
nb_elemwise
,
out_dtype
)
in
enumerate
([
((
dx
/
dy
)
/
dx
,[
dx
,
dy
],[
dxv
,
dyv
],
1
,
'float64'
),
((
fx
/
fy
)
/
fx
,[
fx
,
fy
],[
fxv
,
fyv
],
1
,
'float32'
),
((
dv
/
dy
)
/
dv
,[
dv
,
dy
],[
dvv
,
dyv
],
1
,
'float64'
),
((
fv
/
fy
)
/
fv
,[
fv
,
fy
],[
fvv
,
fyv
],
1
,
'float32'
),
#must broadcast as their is a dimshuffle in the computation
((
dx
/
dv
)
/
dx
,[
dx
,
dv
],[
dxv
,
dvv
],
2
,
'float64'
),
#topo: [Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)]
((
fx
/
fv
)
/
fx
,[
fx
,
fv
],[
fxv
,
fvv
],
2
,
'float32'
),
#topo:[Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)]
]):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,(
1
/
val_inputs
[
1
]))
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
nb_elemwise
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,(
theano
.
scalar
.
basic
.
Inv
,
theano
.
scalar
.
basic
.
TrueDiv
))
assert
(
out_dtype
==
out
.
dtype
)
#test (a / b) * (b / c) * (c / d) -> a / d
for
id
,(
g
,
sym_inputs
,
val_inputs
,
out_dtype
)
in
enumerate
([
((
dx
/
dy
)
*
(
dy
/
dz
)
*
(
dz
/
dw
),[
dx
,
dy
,
dz
,
dw
],[
dxv
,
dyv
,
dzv
,
dwv
],
'float64'
),
((
fx
/
fy
)
*
(
fy
/
fz
)
*
(
fz
/
fw
),[
fx
,
fy
,
fz
,
fw
],[
fxv
,
fyv
,
fzv
,
fwv
],
'float32'
),
((
dv
/
dy
)
*
(
dy
/
dz
)
*
(
dz
/
dw
),[
dv
,
dy
,
dz
,
dw
],[
dvv
,
dyv
,
dzv
,
dwv
],
'float64'
),
((
fv
/
fy
)
*
(
fy
/
fz
)
*
(
fz
/
fw
),[
fv
,
fy
,
fz
,
fw
],[
fvv
,
fyv
,
fzv
,
fwv
],
'float32'
),
((
dx
/
dv
)
*
(
dv
/
dz
)
*
(
dz
/
dw
),[
dx
,
dv
,
dz
,
dw
],[
dxv
,
dvv
,
dzv
,
dwv
],
'float64'
),
((
fx
/
fv
)
*
(
fv
/
fz
)
*
(
fz
/
fw
),[
fx
,
fv
,
fz
,
fw
],[
fxv
,
fvv
,
fzv
,
fwv
],
'float32'
),
((
dx
/
dy
)
*
(
dy
/
dv
)
*
(
dv
/
dw
),[
dx
,
dy
,
dv
,
dw
],[
dxv
,
dyv
,
dvv
,
dwv
],
'float64'
),
((
fx
/
fy
)
*
(
fy
/
fv
)
*
(
fv
/
fw
),[
fx
,
fy
,
fv
,
fw
],[
fxv
,
fyv
,
fvv
,
fwv
],
'float32'
),
((
dx
/
dy
)
*
(
dy
/
dz
)
*
(
dz
/
dv
),[
dx
,
dy
,
dz
,
dv
],[
dxv
,
dyv
,
dzv
,
dvv
],
'float64'
),
((
fx
/
fy
)
*
(
fy
/
fz
)
*
(
fz
/
fv
),[
fx
,
fy
,
fz
,
fv
],[
fxv
,
fyv
,
fzv
,
fvv
],
'float32'
),
]):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,(
val_inputs
[
0
]
/
val_inputs
[
3
]))
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
1
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,
theano
.
scalar
.
basic
.
TrueDiv
)
assert
len
(
topo
[
0
]
.
inputs
)
==
2
assert
(
out_dtype
==
out
.
dtype
)
#test (2.0 * x) / (4.0 * y) -> (0.5 * x) / y
for
id
,(
g
,
sym_inputs
,
val_inputs
,
out_dtype
)
in
enumerate
([
(((
2.0
*
dx
)
/
(
4.0
*
dy
)),[
dx
,
dy
],[
dxv
,
dyv
],
'float64'
),
(((
2.0
*
fx
)
/
(
4.0
*
fy
)),[
fx
,
fy
],[
fxv
,
fyv
],
'float32'
),
(((
2.0
*
dv
)
/
(
4.0
*
dy
)),[
dv
,
dy
],[
dvv
,
dyv
],
'float64'
),
(((
2.0
*
fv
)
/
(
4.0
*
fy
)),[
fv
,
fy
],[
fvv
,
fyv
],
'float32'
),
(((
2.0
*
dx
)
/
(
4.0
*
dv
)),[
dx
,
dv
],[
dxv
,
dvv
],
'float64'
),
(((
2.0
*
fx
)
/
(
4.0
*
fv
)),[
fx
,
fv
],[
fxv
,
fvv
],
'float32'
),
]):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,(
0.5
*
val_inputs
[
0
]
/
val_inputs
[
1
]))
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
2
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,
theano
.
scalar
.
basic
.
Mul
)
assert
len
(
topo
[
0
]
.
inputs
)
==
2
assert
isinstance
(
topo
[
1
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
1
]
.
op
.
scalar_op
,
theano
.
scalar
.
basic
.
TrueDiv
)
assert
len
(
topo
[
1
]
.
inputs
)
==
2
assert
(
out_dtype
==
out
.
dtype
)
#test 2 * x / 2 -> x
for
id
,(
g
,
sym_inputs
,
val_inputs
,
out_dtype
)
in
enumerate
([
((
2
*
dx
)
/
2
,[
dx
],[
dxv
],
'float64'
),
((
2
*
fx
)
/
2
,[
fx
],[
fxv
],
'float32'
),
((
2
*
dv
)
/
2
,[
dv
],[
dvv
],
'float64'
),
((
2
*
fv
)
/
2
,[
fv
],[
fvv
],
'float32'
),
]):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,
val_inputs
[
0
])
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
0
assert
(
out_dtype
==
out
.
dtype
)
finally
:
mode
.
_optimizer
=
old_optimizer
def
test_multiple_case_that_fail
(
self
):
...
...
@@ -510,43 +518,48 @@ class test_canonize(unittest.TestCase):
#We must be sure that the Canonizer is working, but that we don't have other
# optimisation that could hide bug in the Canonizer as local_elemwise_fusion
mode
=
compile
.
mode
.
predefined_modes
[
compile
.
mode
.
default_mode
]
mode
.
_optimizer
=
gof
.
Query
([
"canonicalize"
])
mode
.
_optimizer
=
mode
.
_optimizer
.
excluding
(
'local_elemwise_fusion'
)
#test fail!
#test x / y / z -> x / (y * z)
for
(
g
,
sym_inputs
,
val_inputs
,
out_dtype
)
in
[
((
dx
/
dy
)
/
dz
,[
dx
,
dy
,
dz
],[
dxv
,
dyv
,
dzv
],
'float64'
),
((
fx
/
fy
)
/
fz
,[
fx
,
fy
,
fz
],[
fxv
,
fyv
,
fzv
],
'float32'
)
]:
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,
val_inputs
[
0
]
/
val_inputs
[
1
]
/
val_inputs
[
2
])
topo
=
f
.
maker
.
env
.
toposort
()
print
topo
assert
len
(
topo
)
==
2
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,
theano
.
scalar
.
basic
.
Inv
)
assert
len
(
topo
[
0
]
.
inputs
)
==
1
assert
(
out_dtype
==
out
.
dtype
)
#test x / (y / z) -> (x * z) / y
for
(
g
,
sym_inputs
,
val_inputs
,
out_dtype
)
in
[
(
dx
/
(
dy
/
dz
),[
dx
,
dy
,
dz
],[
dxv
,
dyv
,
dzv
],
'float64'
),
(
fx
/
(
fy
/
fz
),[
fx
,
fy
,
fz
],[
fxv
,
fyv
,
fzv
],
'float32'
)
]:
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,
val_inputs
[
0
]
/
(
val_inputs
[
1
]
/
val_inputs
[
2
]))
topo
=
f
.
maker
.
env
.
toposort
()
print
topo
assert
len
(
topo
)
==
2
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,
theano
.
scalar
.
basic
.
Inv
)
assert
len
(
topo
[
0
]
.
inputs
)
==
1
assert
(
out_dtype
==
out
.
dtype
)
old_optimizer
=
mode
.
_optimizer
try
:
mode
.
_optimizer
=
gof
.
Query
([
"canonicalize"
])
mode
.
_optimizer
=
mode
.
_optimizer
.
excluding
(
'local_elemwise_fusion'
)
#test fail!
#test x / y / z -> x / (y * z)
for
(
g
,
sym_inputs
,
val_inputs
,
out_dtype
)
in
[
((
dx
/
dy
)
/
dz
,[
dx
,
dy
,
dz
],[
dxv
,
dyv
,
dzv
],
'float64'
),
((
fx
/
fy
)
/
fz
,[
fx
,
fy
,
fz
],[
fxv
,
fyv
,
fzv
],
'float32'
)
]:
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,
val_inputs
[
0
]
/
val_inputs
[
1
]
/
val_inputs
[
2
])
topo
=
f
.
maker
.
env
.
toposort
()
print
topo
assert
len
(
topo
)
==
2
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,
theano
.
scalar
.
basic
.
Inv
)
assert
len
(
topo
[
0
]
.
inputs
)
==
1
assert
(
out_dtype
==
out
.
dtype
)
#test x / (y / z) -> (x * z) / y
for
(
g
,
sym_inputs
,
val_inputs
,
out_dtype
)
in
[
(
dx
/
(
dy
/
dz
),[
dx
,
dy
,
dz
],[
dxv
,
dyv
,
dzv
],
'float64'
),
(
fx
/
(
fy
/
fz
),[
fx
,
fy
,
fz
],[
fxv
,
fyv
,
fzv
],
'float32'
)
]:
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,
val_inputs
[
0
]
/
(
val_inputs
[
1
]
/
val_inputs
[
2
]))
topo
=
f
.
maker
.
env
.
toposort
()
print
topo
assert
len
(
topo
)
==
2
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,
theano
.
scalar
.
basic
.
Inv
)
assert
len
(
topo
[
0
]
.
inputs
)
==
1
assert
(
out_dtype
==
out
.
dtype
)
finally
:
mode
.
_optimizer
=
old_optimizer
def
test_dont_merge_if_multiple_client
(
self
):
""" test those case take from the comment in Canonizer
...
...
@@ -571,10 +584,16 @@ def test_local_shape_lift_dot():
for
y
in
[
fvector
,
fmatrix
]:
i
=
x
()
j
=
y
()
print
'I SHAPE'
,
i
.
type
.
shape
print
'J SHAPE'
,
j
.
type
.
shape
d
=
shape
(
dot
(
i
,
j
))
g
=
Env
([
i
,
j
],
[
d
])
gof
.
TopoOptimizer
(
gof
.
LocalOptGroup
(
local_shape_lift_dot
),
order
=
'out_to_in'
)
.
optimize
(
g
)
assert
pprint
(
g
.
outputs
[
0
])
==
args_to_result
[(
x
,
y
)]
if
x
is
fvector
and
y
is
fvector
:
assert
d
==
()
else
:
g
=
Env
([
i
,
j
],
[
d
])
gof
.
TopoOptimizer
(
gof
.
LocalOptGroup
(
local_shape_lift_dot
),
order
=
'out_to_in'
)
.
optimize
(
g
)
print
pprint
(
g
.
outputs
[
0
]),
args_to_result
[(
x
,
y
)]
assert
pprint
(
g
.
outputs
[
0
])
==
args_to_result
[(
x
,
y
)]
# def test_plusmin(self):
# x, y, z = inputs()
...
...
@@ -982,23 +1001,27 @@ class test_fusion(unittest.TestCase):
#Follow up. Clinker do the same... second cause?
mode2
=
compile
.
Mode
(
linker
(),
copy
.
copy
(
compile
.
mode
.
OPT_FAST_RUN
))
# mode2=copy.copy(compile.mode.predefined_modes['FAST_RUN'])
mode2
.
_optimizer
=
mode2
.
_optimizer
.
excluding
(
'local_elemwise_fusion'
)
# mode2=compile.Mode(gof.OpWiseCLinker(allow_gc=True), compile.mode.OPT_FAST_COMPILE)
if
s
is
None
:
s
=
slice
(
0
,
49
)
#s=slice(49,59)
nb_repeat
=
10
print
"test with linker"
,
str
(
linker
)
times1
=
self
.
do
(
mode1
,
shared_fn
,
shp
,
gpu
=
gpu
,
nb_repeat
=
nb_repeat
,
assert_len_topo
=
False
,
slice
=
s
)
times2
=
self
.
do
(
mode2
,
shared_fn
,
shp
,
gpu
=
gpu
,
nb_repeat
=
nb_repeat
,
assert_len_topo
=
False
,
slice
=
s
)
print
"times1 FAST_RUN optimisation"
print
times1
,
times1
.
min
(),
times1
.
max
(),
times1
.
sum
()
print
"times2 FAST_RUN optimisation without local_elemwise_fusion"
print
times2
,
times2
.
min
(),
times2
.
max
(),
times2
.
sum
()
d
=
times2
/
times1
# d.sort()
print
"times2/times1"
,
d
,
d
.
min
(),
d
.
max
(),
d
.
mean
(),
d
.
std
()
old_optimizer
=
mode2
.
_optimizer
try
:
mode2
.
_optimizer
=
mode2
.
_optimizer
.
excluding
(
'local_elemwise_fusion'
)
# mode2=compile.Mode(gof.OpWiseCLinker(allow_gc=True), compile.mode.OPT_FAST_COMPILE)
if
s
is
None
:
s
=
slice
(
0
,
49
)
#s=slice(49,59)
nb_repeat
=
10
print
"test with linker"
,
str
(
linker
)
times1
=
self
.
do
(
mode1
,
shared_fn
,
shp
,
gpu
=
gpu
,
nb_repeat
=
nb_repeat
,
assert_len_topo
=
False
,
slice
=
s
)
times2
=
self
.
do
(
mode2
,
shared_fn
,
shp
,
gpu
=
gpu
,
nb_repeat
=
nb_repeat
,
assert_len_topo
=
False
,
slice
=
s
)
print
"times1 FAST_RUN optimisation"
print
times1
,
times1
.
min
(),
times1
.
max
(),
times1
.
sum
()
print
"times2 FAST_RUN optimisation without local_elemwise_fusion"
print
times2
,
times2
.
min
(),
times2
.
max
(),
times2
.
sum
()
d
=
times2
/
times1
# d.sort()
print
"times2/times1"
,
d
,
d
.
min
(),
d
.
max
(),
d
.
mean
(),
d
.
std
()
finally
:
mode2
.
_optimizer
=
old_optimizer
def
speed_fusion_gpu
(
self
):
import
theano_cuda_ndarray
as
tcn
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论