Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
af804be5
提交
af804be5
authored
2月 18, 2010
作者:
James Bergstra
浏览文件
操作
浏览文件
下载
差异文件
merge
上级
1364578a
8963a3d1
隐藏空白字符变更
内嵌
并排
正在显示
18 个修改的文件
包含
1376 行增加
和
521 行删除
+1376
-521
debugmode.py
theano/compile/debugmode.py
+11
-3
mode.py
theano/compile/mode.py
+24
-8
opt.py
theano/gof/opt.py
+26
-44
optdb.py
theano/gof/optdb.py
+37
-7
printing.py
theano/printing.py
+46
-3
var.py
theano/sandbox/cuda/var.py
+6
-1
scan.py
theano/scan.py
+5
-3
basic.py
theano/tensor/basic.py
+197
-113
blas.py
theano/tensor/blas.py
+48
-14
elemwise.py
theano/tensor/elemwise.py
+45
-3
__init__.py
theano/tensor/nnet/__init__.py
+1
-0
nnet.py
theano/tensor/nnet/nnet.py
+2
-87
sigm.py
theano/tensor/nnet/sigm.py
+315
-0
test_sigm.py
theano/tensor/nnet/tests/test_sigm.py
+75
-0
opt.py
theano/tensor/opt.py
+419
-172
raw_random.py
theano/tensor/raw_random.py
+17
-4
test_opt.py
theano/tensor/tests/test_opt.py
+91
-52
test_shared_randomstreams.py
theano/tensor/tests/test_shared_randomstreams.py
+11
-7
没有找到文件。
theano/compile/debugmode.py
浏览文件 @
af804be5
...
@@ -197,6 +197,11 @@ class BadOptimization(DebugModeError):
...
@@ -197,6 +197,11 @@ class BadOptimization(DebugModeError):
print
>>
ssio
,
" Mean Abs Diff: "
,
numpy
.
mean
(
numpy
.
absolute
(
nv
-
ov
))
print
>>
ssio
,
" Mean Abs Diff: "
,
numpy
.
mean
(
numpy
.
absolute
(
nv
-
ov
))
print
>>
ssio
,
" Median Abs Diff: "
,
numpy
.
median
(
numpy
.
absolute
(
nv
-
ov
))
print
>>
ssio
,
" Median Abs Diff: "
,
numpy
.
median
(
numpy
.
absolute
(
nv
-
ov
))
print
>>
ssio
,
" Std Abs Diff: "
,
numpy
.
std
(
numpy
.
absolute
(
nv
-
ov
))
print
>>
ssio
,
" Std Abs Diff: "
,
numpy
.
std
(
numpy
.
absolute
(
nv
-
ov
))
reldiff
=
numpy
.
absolute
(
nv
-
ov
)
/
(
numpy
.
absolute
(
nv
)
+
numpy
.
absolute
(
ov
))
print
>>
ssio
,
" Max Rel Diff: "
,
numpy
.
max
(
reldiff
)
print
>>
ssio
,
" Mean Rel Diff: "
,
numpy
.
mean
(
reldiff
)
print
>>
ssio
,
" Median Rel Diff: "
,
numpy
.
median
(
reldiff
)
print
>>
ssio
,
" Std Rel Diff: "
,
numpy
.
std
(
reldiff
)
# only if all succeeds to we add anything to sio
# only if all succeeds to we add anything to sio
print
>>
sio
,
ssio
.
getvalue
()
print
>>
sio
,
ssio
.
getvalue
()
except
:
except
:
...
@@ -349,14 +354,17 @@ def debugprint(r, prefix='', depth=-1, done=None, file=sys.stdout):
...
@@ -349,14 +354,17 @@ def debugprint(r, prefix='', depth=-1, done=None, file=sys.stdout):
# this variable is the output of computation,
# this variable is the output of computation,
# so just print out the apply
# so just print out the apply
a
=
r
.
owner
a
=
r
.
owner
print
>>
file
,
prefix
,
a
.
op
,
id
(
a
)
if
len
(
a
.
outputs
)
==
1
:
print
>>
file
,
'
%
s
%
s [@
%
i]'
%
(
prefix
,
a
.
op
,
id
(
r
))
else
:
print
>>
file
,
'
%
s
%
s.
%
i [@
%
i]'
%
(
prefix
,
a
.
op
,
a
.
outputs
.
index
(
r
),
id
(
r
))
if
id
(
a
)
not
in
done
:
if
id
(
a
)
not
in
done
:
done
.
add
(
id
(
a
))
done
.
add
(
id
(
a
))
for
i
in
a
.
inputs
:
for
i
in
a
.
inputs
:
debugprint
(
i
,
prefix
+
'
'
,
depth
=
depth
-
1
,
done
=
done
,
file
=
file
)
debugprint
(
i
,
prefix
+
'
|
'
,
depth
=
depth
-
1
,
done
=
done
,
file
=
file
)
else
:
else
:
#this is a variable
#this is a variable
print
>>
file
,
prefix
,
r
,
id
(
r
)
print
>>
file
,
'
%
s
%
s [@
%
i]'
%
(
prefix
,
r
,
id
(
r
)
)
return
file
return
file
...
...
theano/compile/mode.py
浏览文件 @
af804be5
...
@@ -116,7 +116,7 @@ class AddDestroyHandler(gof.Optimizer):
...
@@ -116,7 +116,7 @@ class AddDestroyHandler(gof.Optimizer):
for
o
in
env
.
outputs
:
for
o
in
env
.
outputs
:
try
:
try
:
env
.
replace_validate
(
o
,
_output_guard
(
o
),
reason
=
'output_guard'
)
env
.
replace_validate
(
o
,
_output_guard
(
o
),
reason
=
'output_guard'
)
_logger
.
warning
(
"Output variable
%
s required output_guard,"
_logger
.
info
(
"Output variable
%
s required output_guard,"
" how was this output left unprotected against destructive operations?"
" how was this output left unprotected against destructive operations?"
%
o
)
%
o
)
except
gof
.
InconsistencyError
:
except
gof
.
InconsistencyError
:
...
@@ -127,12 +127,22 @@ class AddDestroyHandler(gof.Optimizer):
...
@@ -127,12 +127,22 @@ class AddDestroyHandler(gof.Optimizer):
env
.
extend
(
gof
.
DestroyHandler
())
env
.
extend
(
gof
.
DestroyHandler
())
optdb
=
gof
.
SequenceDB
()
optdb
=
gof
.
SequenceDB
()
optdb
.
register
(
'merge1'
,
gof
.
MergeOptimizer
(),
0
,
'fast_run'
,
'fast_compile'
)
optdb
.
register
(
'merge1'
,
gof
.
MergeOptimizer
(),
optdb
.
register
(
'canonicalize'
,
gof
.
EquilibriumDB
(),
1
,
'fast_run'
)
0
,
'fast_run'
,
'fast_compile'
)
optdb
.
register
(
'specialize'
,
gof
.
EquilibriumDB
(),
2
,
'fast_run'
)
optdb
.
register
(
'canonicalize'
,
gof
.
EquilibriumDB
(),
# rearranges elemwise expressions
optdb
.
register
(
'merge2'
,
gof
.
MergeOptimizer
(),
49
,
'fast_run'
)
1
,
'fast_run'
)
optdb
.
register
(
'add_destroy_handler'
,
AddDestroyHandler
(),
49.5
,
'fast_run'
,
'inplace'
)
optdb
.
register
(
'merge1.2'
,
gof
.
MergeOptimizer
(
skip_const_merge
=
True
),
optdb
.
register
(
'merge3'
,
gof
.
MergeOptimizer
(),
100
,
'fast_run'
)
1.2
,
'fast_run'
,
'fast_compile'
)
optdb
.
register
(
'stabilize'
,
gof
.
EquilibriumDB
(),
# replace unstable subgraphs
1.5
,
'fast_run'
)
optdb
.
register
(
'specialize'
,
gof
.
EquilibriumDB
(),
# misc special cases for speed
2
,
'fast_run'
)
optdb
.
register
(
'merge2'
,
gof
.
MergeOptimizer
(),
# especially constant merge
49
,
'fast_run'
)
optdb
.
register
(
'add_destroy_handler'
,
AddDestroyHandler
(),
49.5
,
'fast_run'
,
'inplace'
)
optdb
.
register
(
'merge3'
,
gof
.
MergeOptimizer
(),
# final pass just to make sure
100
,
'fast_run'
)
class
Mode
(
object
):
class
Mode
(
object
):
...
@@ -153,6 +163,12 @@ class Mode(object):
...
@@ -153,6 +163,12 @@ class Mode(object):
def
__init__
(
self
,
linker
=
config
.
linker
,
optimizer
=
config
.
optimizer
):
def
__init__
(
self
,
linker
=
config
.
linker
,
optimizer
=
config
.
optimizer
):
self
.
__setstate__
((
linker
,
optimizer
))
self
.
__setstate__
((
linker
,
optimizer
))
#self.provided_optimizer - typically the `optimizer` arg. But if the `optimizer` arg is
# keyword corresponding to a predefined Query, then this stores the query
#self._optimizer - typically same as provided_optimizer??
#self.__get_optimizer - returns self._optimizer (possibly querying optdb with self._optimizer)
#self.optimizer - property that returns __get_optimizer()
def
__getstate__
(
self
):
def
__getstate__
(
self
):
return
(
self
.
provided_linker
,
self
.
provided_optimizer
)
return
(
self
.
provided_linker
,
self
.
provided_optimizer
)
...
@@ -218,7 +234,7 @@ predefined_modes = {'FAST_COMPILE': FAST_COMPILE,
...
@@ -218,7 +234,7 @@ predefined_modes = {'FAST_COMPILE': FAST_COMPILE,
def
get_mode
(
string
):
def
get_mode
(
string
):
if
string
is
None
:
string
=
config
.
mode
if
string
is
None
:
string
=
config
.
mode
if
not
isinstance
(
string
,
str
):
return
string
#it is already a mode...
if
not
isinstance
(
string
,
str
):
return
string
#it is
hopefully
already a mode...
if
not
predefined_modes
.
has_key
(
string
):
if
not
predefined_modes
.
has_key
(
string
):
raise
Exception
(
"No predefixed mode exist for string:
%
s"
%
string
)
raise
Exception
(
"No predefixed mode exist for string:
%
s"
%
string
)
return
predefined_modes
[
string
]
return
predefined_modes
[
string
]
...
...
theano/gof/opt.py
浏览文件 @
af804be5
...
@@ -197,12 +197,19 @@ class _metadict:
...
@@ -197,12 +197,19 @@ class _metadict:
class
MergeOptimizer
(
Optimizer
):
class
MergeOptimizer
(
Optimizer
):
"""WRITEME
Merges parts of the graph that are identical, i.e. parts that
take the same inputs and carry out the asme computations so we
can avoid doing them more than once. Also merges variables that
are constant.
"""
"""
Merges parts of the graph that are identical and redundant.
The basic principle is that if two Applies have ops that compare equal, and identical
inputs, then they do not both need to be computed. The clients of one are transfered to
the other and one of them is removed from the graph. This procedure is carried out in
input->output order through the graph.
The first step of merging is constant-merging, so that all clients of an int(1) for example,
are transfered to a particular instance of int(1).
"""
def
__init__
(
self
,
skip_const_merge
=
False
):
self
.
skip_const_merge
=
skip_const_merge
def
add_requirements
(
self
,
env
):
def
add_requirements
(
self
,
env
):
env
.
extend
(
toolbox
.
ReplaceValidate
())
env
.
extend
(
toolbox
.
ReplaceValidate
())
...
@@ -230,41 +237,6 @@ class MergeOptimizer(Optimizer):
...
@@ -230,41 +237,6 @@ class MergeOptimizer(Optimizer):
const_sig
[
c
]
=
sig
const_sig
[
c
]
=
sig
const_sig_inv
[
sig
]
=
c
const_sig_inv
[
sig
]
=
c
def
exptime_apply_node_merge
(
self
,
env
):
# we clear the dicts because the Constants signatures are not necessarily hashable
# and it's more efficient to give them an integer like the other Variables
symbol_idx
=
{}
#variable -> int
symbol_idx_inv
=
{}
#int -> variable (inverse of symbol_idx)
#add all graph sources to the symbol_idx dictionaries (arbitrary order)
for
i
,
r
in
enumerate
(
r
for
r
in
env
.
variables
if
r
.
owner
is
None
):
symbol_idx
[
r
]
=
i
symbol_idx_inv
[
i
]
=
r
for
node
in
_list_of_nodes
(
env
):
node_cid
=
(
node
.
op
,
tuple
([
symbol_idx
[
input
]
for
input
in
node
.
inputs
]))
#print 'NODE', node, node_cid
dup
=
symbol_idx_inv
.
get
(
node_cid
,
None
)
success
=
False
if
dup
is
not
None
:
success
=
True
pairs
=
zip
(
node
.
outputs
,
dup
.
outputs
)
for
output
,
new_output
in
pairs
:
if
output
.
name
and
not
new_output
.
name
:
new_output
.
name
=
output
.
name
try
:
env
.
replace_all_validate
(
pairs
,
reason
=
'Merge (exptime)'
)
except
InconsistencyError
,
e
:
success
=
False
if
not
success
:
symbol_idx
[
node
]
=
node_cid
symbol_idx_inv
[
node_cid
]
=
node
for
i
,
output
in
enumerate
(
node
.
outputs
):
ref
=
(
i
,
node_cid
)
symbol_idx
[
output
]
=
ref
symbol_idx_inv
[
ref
]
=
output
def
apply_node_merge
(
self
,
env
):
def
apply_node_merge
(
self
,
env
):
# we clear the dicts because the Constants signatures are not necessarily hashable
# we clear the dicts because the Constants signatures are not necessarily hashable
# and it's more efficient to give them an integer like the other Variables
# and it's more efficient to give them an integer like the other Variables
...
@@ -316,7 +288,8 @@ class MergeOptimizer(Optimizer):
...
@@ -316,7 +288,8 @@ class MergeOptimizer(Optimizer):
#TODO: Consider splitting this into a separate optimizer (SeqOptimizer)
#TODO: Consider splitting this into a separate optimizer (SeqOptimizer)
def
apply
(
self
,
env
):
def
apply
(
self
,
env
):
self
.
apply_constant_merge
(
env
)
if
not
self
.
skip_const_merge
:
self
.
apply_constant_merge
(
env
)
self
.
apply_node_merge
(
env
)
self
.
apply_node_merge
(
env
)
merge_optimizer
=
MergeOptimizer
()
merge_optimizer
=
MergeOptimizer
()
...
@@ -541,7 +514,7 @@ class PatternSub(LocalOptimizer):
...
@@ -541,7 +514,7 @@ class PatternSub(LocalOptimizer):
PatternSub((subtract, (add, 'x', 'y'), 'y'), 'x')
PatternSub((subtract, (add, 'x', 'y'), 'y'), 'x')
PatternSub((power, 'x', Constant(double, 2.0)), (square, 'x'))
PatternSub((power, 'x', Constant(double, 2.0)), (square, 'x'))
PatternSub((boggle, {'pattern': 'x',
PatternSub((boggle, {'pattern': 'x',
'constraint': lambda e
nv, e
xpr: expr.type == scrabble}),
'constraint': lambda expr: expr.type == scrabble}),
(scrabble, 'x'))
(scrabble, 'x'))
"""
"""
...
@@ -789,7 +762,10 @@ class NavigatorOptimizer(Optimizer):
...
@@ -789,7 +762,10 @@ class NavigatorOptimizer(Optimizer):
raise
raise
if
replacements
is
False
or
replacements
is
None
:
if
replacements
is
False
or
replacements
is
None
:
return
False
return
False
assert
len
(
node
.
outputs
)
==
len
(
replacements
)
if
not
isinstance
(
replacements
,
(
tuple
,
list
)):
raise
TypeError
(
'Optimizer
%
s gave wrong type of replacement'
%
lopt
)
if
len
(
node
.
outputs
)
!=
len
(
replacements
):
raise
ValueError
(
'Optimizer
%
s gave wrong number of replacements'
%
lopt
)
repl_pairs
=
zip
(
node
.
outputs
,
replacements
)
repl_pairs
=
zip
(
node
.
outputs
,
replacements
)
try
:
try
:
env
.
replace_all_validate
(
repl_pairs
,
reason
=
lopt
)
env
.
replace_all_validate
(
repl_pairs
,
reason
=
lopt
)
...
@@ -904,8 +880,13 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -904,8 +880,13 @@ class EquilibriumOptimizer(NavigatorOptimizer):
max_depth
=
None
,
max_depth
=
None
,
max_use_ratio
=
None
):
max_use_ratio
=
None
):
"""
"""
:param local_optimizers: list or set of local optimizations to apply until
equilibrium.
:param max_use_ratio: each optimizer can be applied at most (size of graph * this number)
:param max_use_ratio: each optimizer can be applied at most (size of graph * this number)
:param max_depth: TODO what does this do? (EquilibriumDB sets it to 5)
"""
"""
super
(
EquilibriumOptimizer
,
self
)
.
__init__
(
super
(
EquilibriumOptimizer
,
self
)
.
__init__
(
...
@@ -916,6 +897,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -916,6 +897,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
self
.
local_optimizers
=
local_optimizers
self
.
local_optimizers
=
local_optimizers
self
.
max_depth
=
max_depth
self
.
max_depth
=
max_depth
self
.
max_use_ratio
=
max_use_ratio
self
.
max_use_ratio
=
max_use_ratio
assert
self
.
max_use_ratio
is
not
None
,
'max_use_ratio has to be a number'
def
apply
(
self
,
env
,
start_from
=
None
):
def
apply
(
self
,
env
,
start_from
=
None
):
if
start_from
is
None
:
if
start_from
is
None
:
...
@@ -960,7 +942,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -960,7 +942,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
changed
|=
lopt_change
changed
|=
lopt_change
finally
:
finally
:
self
.
detach_updater
(
env
,
u
)
self
.
detach_updater
(
env
,
u
)
self
.
detach_updater
(
env
,
u
)
self
.
detach_updater
(
env
,
u
)
#TODO: erase this line, it's redundant at best
if
max_use_abort
:
if
max_use_abort
:
print
>>
sys
.
stderr
,
"WARNING: EquilibriumOptimizer max'ed out"
print
>>
sys
.
stderr
,
"WARNING: EquilibriumOptimizer max'ed out"
...
...
theano/gof/optdb.py
浏览文件 @
af804be5
...
@@ -26,7 +26,7 @@ class DB(object):
...
@@ -26,7 +26,7 @@ class DB(object):
# It is an instance of a DB.In the tests for example,
# It is an instance of a DB.In the tests for example,
# this is not always the case.
# this is not always the case.
if
not
isinstance
(
obj
,
(
DB
,
opt
.
Optimizer
,
opt
.
LocalOptimizer
)):
if
not
isinstance
(
obj
,
(
DB
,
opt
.
Optimizer
,
opt
.
LocalOptimizer
)):
raise
Exception
(
'Triing to register an optimizer that don
\'
t herite from theano.gof.opt.Optimizer or theano.gof.opt.LocalOptimizer
'
,
obj
)
raise
TypeError
(
'Object cannot be registered in OptDB
'
,
obj
)
if
self
.
name
is
not
None
:
if
self
.
name
is
not
None
:
tags
=
tags
+
(
self
.
name
,)
tags
=
tags
+
(
self
.
name
,)
...
@@ -132,6 +132,18 @@ class Query(object):
...
@@ -132,6 +132,18 @@ class Query(object):
class
EquilibriumDB
(
DB
):
class
EquilibriumDB
(
DB
):
"""A set of potential optimizations which should be applied in an arbitrary order until
equilibrium is reached.
Canonicalize, Stabilize, and Specialize are all equilibrium optimizations.
.. note::
It seems like this might be supposed to contain LocalOptimizer instances rather than
optimizer instances, because whatever is selected by the query is passed to
EquilibriumOptimizer and EquilibriumOptimizer requires LocalOptimizer instances.
"""
def
query
(
self
,
*
tags
,
**
kwtags
):
def
query
(
self
,
*
tags
,
**
kwtags
):
opts
=
super
(
EquilibriumDB
,
self
)
.
query
(
*
tags
,
**
kwtags
)
opts
=
super
(
EquilibriumDB
,
self
)
.
query
(
*
tags
,
**
kwtags
)
...
@@ -142,27 +154,45 @@ class EquilibriumDB(DB):
...
@@ -142,27 +154,45 @@ class EquilibriumDB(DB):
class
SequenceDB
(
DB
):
class
SequenceDB
(
DB
):
"""A sequence of potential optimizations.
Retrieve a sequence of optimizations (a SeqOptimizer) by calling query().
Each potential optimization is registered with a floating-point position.
No matter which optimizations are selected by a query, they are carried out in order of
increasing position.
The optdb itself (`theano.compile.mode.optdb`), from which (among many other tags) fast_run
and fast_compile optimizers are drawn is a SequenceDB.
"""
def
__init__
(
self
,
failure_callback
=
opt
.
SeqOptimizer
.
warn
):
def
__init__
(
self
,
failure_callback
=
opt
.
SeqOptimizer
.
warn
):
super
(
SequenceDB
,
self
)
.
__init__
()
super
(
SequenceDB
,
self
)
.
__init__
()
self
.
__p
riority
__
=
{}
self
.
__p
osition
__
=
{}
self
.
failure_callback
=
failure_callback
self
.
failure_callback
=
failure_callback
def
register
(
self
,
name
,
obj
,
p
riority
,
*
tags
):
def
register
(
self
,
name
,
obj
,
p
osition
,
*
tags
):
super
(
SequenceDB
,
self
)
.
register
(
name
,
obj
,
*
tags
)
super
(
SequenceDB
,
self
)
.
register
(
name
,
obj
,
*
tags
)
self
.
__p
riority__
[
name
]
=
priority
self
.
__p
osition__
[
name
]
=
position
def
query
(
self
,
*
tags
,
**
kwtags
):
def
query
(
self
,
*
tags
,
**
kwtags
):
"""
:type position_cutoff: float or int
:param position_cutoff: only optimizations with position less than the cutoff are returned.
"""
position_cutoff
=
kwtags
.
pop
(
'position_cutoff'
,
float
(
'inf'
))
opts
=
super
(
SequenceDB
,
self
)
.
query
(
*
tags
,
**
kwtags
)
opts
=
super
(
SequenceDB
,
self
)
.
query
(
*
tags
,
**
kwtags
)
opts
=
list
(
opts
)
opts
=
[
o
for
o
in
opts
if
self
.
__position__
[
o
.
name
]
<
position_cutoff
]
opts
.
sort
(
key
=
lambda
obj
:
self
.
__p
riority
__
[
obj
.
name
])
opts
.
sort
(
key
=
lambda
obj
:
self
.
__p
osition
__
[
obj
.
name
])
return
opt
.
SeqOptimizer
(
opts
,
failure_callback
=
self
.
failure_callback
)
return
opt
.
SeqOptimizer
(
opts
,
failure_callback
=
self
.
failure_callback
)
def
print_summary
(
self
,
stream
=
sys
.
stdout
):
def
print_summary
(
self
,
stream
=
sys
.
stdout
):
print
>>
stream
,
"SequenceDB (id
%
i)"
%
id
(
self
)
print
>>
stream
,
"SequenceDB (id
%
i)"
%
id
(
self
)
print
>>
stream
,
" p
riority"
,
self
.
__priority
__
print
>>
stream
,
" p
osition"
,
self
.
__position
__
print
>>
stream
,
" names"
,
self
.
_names
print
>>
stream
,
" names"
,
self
.
_names
print
>>
stream
,
" db"
,
self
.
__db__
print
>>
stream
,
" db"
,
self
.
__db__
def
__str__
(
self
):
def
__str__
(
self
):
sio
=
StringIO
.
StringIO
()
sio
=
StringIO
.
StringIO
()
self
.
print_summary
(
sio
)
self
.
print_summary
(
sio
)
...
...
theano/printing.py
浏览文件 @
af804be5
...
@@ -7,9 +7,52 @@ import sys,os
...
@@ -7,9 +7,52 @@ import sys,os
from
theano
import
config
from
theano
import
config
from
gof
import
Op
,
Apply
from
gof
import
Op
,
Apply
from
theano.gof.python25
import
any
from
theano.gof.python25
import
any
from
theano.compile
import
Function
,
debugmode
#We import the debugprint here to have all printing of graph available from this module
from
theano.compile.debugmode
import
debugprint
def
debugprint
(
obj
,
depth
=-
1
,
file
=
None
):
"""Print a computation graph to file
:type obj: Variable, Apply, or Function instance
:param obj: symbolic thing to print
:type depth: integer
:param depth: print graph to this depth (-1 for unlimited)
:type file: None or file-like object
:param file: print to this file (None means sys.stdout)
:rtype: None or file-like object
:returns: `file` argument
Each line printed represents a Variable in the graph.
The indentation of each line corresponds to its depth in the symbolic graph.
The first part of the text identifies whether it is an input (if a name or type is printed)
or the output of some Apply (in which case the Op is printed).
The second part of the text is the memory location of the Variable.
If a Variable is encountered multiple times in the depth-first search, it is only printed
recursively the first time. Later, just the Variable and its memory location are printed.
If an Apply has multiple outputs, then a '.N' suffix will be appended to the Apply's
identifier, to indicate which output a line corresponds to.
"""
if
file
is
None
:
_file
=
sys
.
stdout
else
:
_file
=
file
done
=
set
()
results_to_print
=
[]
if
isinstance
(
obj
,
gof
.
Variable
):
results_to_print
.
append
(
obj
)
elif
isinstance
(
obj
,
gof
.
Apply
):
results_to_print
.
extend
(
obj
.
outputs
)
elif
isinstance
(
obj
,
Function
):
results_to_print
.
extend
(
obj
.
maker
.
env
.
outputs
)
for
r
in
results_to_print
:
debugmode
.
debugprint
(
r
,
depth
=
depth
,
done
=
done
,
file
=
_file
)
if
file
is
None
:
_file
.
flush
()
return
file
class
Print
(
Op
):
class
Print
(
Op
):
"""This identity-like Op has the side effect of printing a message followed by its inputs
"""This identity-like Op has the side effect of printing a message followed by its inputs
...
@@ -329,7 +372,7 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn
...
@@ -329,7 +372,7 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn
if
var
.
name
is
not
None
:
if
var
.
name
is
not
None
:
varstr
=
var
.
name
varstr
=
var
.
name
elif
isinstance
(
var
,
gof
.
Constant
):
elif
isinstance
(
var
,
gof
.
Constant
):
varstr
=
str
(
var
.
data
)
varstr
=
'
%
s [
%
s]'
%
(
str
(
var
.
data
)
,
str
(
var
.
type
)
)
elif
var
in
input_update
and
input_update
[
var
]
.
variable
.
name
is
not
None
:
elif
var
in
input_update
and
input_update
[
var
]
.
variable
.
name
is
not
None
:
varstr
=
input_update
[
var
]
.
variable
.
name
varstr
=
input_update
[
var
]
.
variable
.
name
else
:
else
:
...
...
theano/sandbox/cuda/var.py
浏览文件 @
af804be5
...
@@ -3,7 +3,7 @@ import numpy
...
@@ -3,7 +3,7 @@ import numpy
import
theano
import
theano
from
theano
import
Op
,
Type
,
Apply
,
Variable
,
Constant
from
theano
import
Op
,
Type
,
Apply
,
Variable
,
Constant
from
theano
import
tensor
from
theano
import
tensor
from
theano.compile
import
shared
,
SharedVariable
,
shared_constructor
from
theano.compile
import
shared
,
SharedVariable
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda
import
filter
as
type_support_filter
from
theano.sandbox.cuda
import
filter
as
type_support_filter
...
@@ -68,6 +68,11 @@ CudaNdarrayType.SharedVariable = CudaNdarraySharedVariable
...
@@ -68,6 +68,11 @@ CudaNdarrayType.SharedVariable = CudaNdarraySharedVariable
def
cuda_shared_constructor
(
value
,
name
,
strict
=
False
,
broadcastable
=
None
):
def
cuda_shared_constructor
(
value
,
name
,
strict
=
False
,
broadcastable
=
None
):
"""SharedVariable Constructor for TensorType"""
"""SharedVariable Constructor for TensorType"""
# THIS CONSTRUCTOR TRIES TO CAST VALUE TO A FLOAT32, WHICH THEN GOES ONTO THE CARD
# SO INT shared vars, float64 shared vars, etc. all end up on the card.
# THIS IS NOT THE DEFAULT BEHAVIOUR THAT WE WANT.
# SEE float32_shared_constructor
#TODO: what should strict mean in this context, since we always have to make a copy?
#TODO: what should strict mean in this context, since we always have to make a copy?
if
strict
:
if
strict
:
_value
=
value
_value
=
value
...
...
theano/scan.py
浏览文件 @
af804be5
...
@@ -20,8 +20,9 @@ Special cases:
...
@@ -20,8 +20,9 @@ Special cases:
Often a for loop can be expressed as a ``scan()`` operation, and ``scan`` is
Often a for loop can be expressed as a ``scan()`` operation, and ``scan`` is
the closest that theano comes to looping. The advantage of using ``scan``
the closest that theano comes to looping. The advantage of using ``scan``
over for loops is that it allows you to express the loop symbolically. The
over for loops is that it allows the number of iterations to be a part of the symbolic graph.
Scan Op should always be used by applying the ``scan`` function.
The Scan Op should always be used by applying the ``scan`` function.
"""
"""
__docformat__
=
'restructedtext en'
__docformat__
=
'restructedtext en'
...
@@ -60,7 +61,8 @@ def hash_listsDictsTuples(x):
...
@@ -60,7 +61,8 @@ def hash_listsDictsTuples(x):
def
scan
(
fn
,
sequences
,
initial_states
,
non_sequences
,
inplace_map
=
{},
\
def
scan
(
fn
,
sequences
,
initial_states
,
non_sequences
,
inplace_map
=
{},
\
sequences_taps
=
{},
outputs_taps
=
{},
n_steps
=
0
,
\
sequences_taps
=
{},
outputs_taps
=
{},
n_steps
=
0
,
\
truncate_gradient
=
-
1
,
go_backwards
=
False
,
mode
=
'FAST_RUN'
):
truncate_gradient
=
-
1
,
go_backwards
=
False
,
mode
=
None
):
'''Function that constructs and applies a Scan op
'''Function that constructs and applies a Scan op
:param fn: Function that describes the operations involved in one step of scan
:param fn: Function that describes the operations involved in one step of scan
...
...
theano/tensor/basic.py
浏览文件 @
af804be5
...
@@ -4,7 +4,7 @@ __docformat__ = "restructuredtext en"
...
@@ -4,7 +4,7 @@ __docformat__ = "restructuredtext en"
import
__builtin__
import
__builtin__
import
sys
# for sys.maxint
import
sys
# for sys.maxint
from
theano.configparser
import
config
from
theano.configparser
import
config
,
AddConfigVar
,
BoolParam
import
traceback
#for overriding Op.__call__
import
traceback
#for overriding Op.__call__
if
sys
.
version_info
>=
(
2
,
5
):
if
sys
.
version_info
>=
(
2
,
5
):
import
functools
import
functools
...
@@ -570,7 +570,6 @@ class TensorType(Type):
...
@@ -570,7 +570,6 @@ class TensorType(Type):
# input received.
# input received.
return
"""
return
"""
%(name)
s = NULL;
%(name)
s = NULL;
type_num_
%(name)
s = ((PyArrayObject*)py_
%(name)
s)->descr->type_num; //we expect
%(type_num)
s
if (py_
%(name)
s == Py_None) {
if (py_
%(name)
s == Py_None) {
// We can either fail here or set
%(name)
s to NULL and rely on Ops using
// We can either fail here or set
%(name)
s to NULL and rely on Ops using
// tensors to handle the NULL case, but if they fail to do so they'll end up
// tensors to handle the NULL case, but if they fail to do so they'll end up
...
@@ -578,18 +577,17 @@ class TensorType(Type):
...
@@ -578,18 +577,17 @@ class TensorType(Type):
PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
%(fail)
s
%(fail)
s
}
}
else
if (!PyArray_Check(py_
%(name)
s)) {
if (!PyArray_Check(py_
%(name)
s)) {
PyErr_SetString(PyExc_ValueError, "expected an ndarray");
PyErr_SetString(PyExc_ValueError, "expected an ndarray");
%(fail)
s
%(fail)
s
}
}
else if (type_num_
%(name)
s !=
%(type_num)
s) {
type_num_
%(name)
s = ((PyArrayObject*)py_
%(name)
s)->descr->type_num; //we expect
%(type_num)
s
if (type_num_
%(name)
s !=
%(type_num)
s) {
PyErr_SetString(PyExc_ValueError, "expected
%(type_num)
s");
PyErr_SetString(PyExc_ValueError, "expected
%(type_num)
s");
%(fail)
s
%(fail)
s
}
}
else {
%(name)
s = (PyArrayObject*)(py_
%(name)
s);
%(name)
s = (PyArrayObject*)(py_
%(name)
s);
Py_XINCREF(
%(name)
s);
Py_XINCREF(
%(name)
s);
}
"""
%
dict
(
sub
,
name
=
name
,
type_num
=
self
.
dtype_specs
()[
2
])
"""
%
dict
(
sub
,
name
=
name
,
type_num
=
self
.
dtype_specs
()[
2
])
def
c_cleanup
(
self
,
name
,
sub
):
def
c_cleanup
(
self
,
name
,
sub
):
...
@@ -631,7 +629,7 @@ class TensorType(Type):
...
@@ -631,7 +629,7 @@ class TensorType(Type):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
scalar_version
=
scal
.
Scalar
(
self
.
dtype
)
.
c_code_cache_version
()
scalar_version
=
scal
.
Scalar
(
self
.
dtype
)
.
c_code_cache_version
()
if
scalar_version
:
if
scalar_version
:
return
(
1
,)
+
scalar_version
return
(
2
,)
+
scalar_version
else
:
else
:
return
()
return
()
...
@@ -943,7 +941,13 @@ class _tensor_py_operators:
...
@@ -943,7 +941,13 @@ class _tensor_py_operators:
break
break
if
advanced
:
if
advanced
:
return
AdvancedSubtensor
(
args
)(
self
,
*
args
)
if
config
.
experimental
.
advanced_indexing
:
if
len
(
args
)
==
1
:
return
AdvancedSubtensor1
()(
self
,
*
args
)
else
:
return
AdvancedSubtensor
(
args
)(
self
,
*
args
)
else
:
return
AdvancedSubtensor
(
args
)(
self
,
*
args
)
else
:
else
:
return
Subtensor
(
args
)(
self
,
*
Subtensor
.
collapse
(
args
,
lambda
entry
:
isinstance
(
entry
,
Variable
)))
return
Subtensor
(
args
)(
self
,
*
Subtensor
.
collapse
(
args
,
lambda
entry
:
isinstance
(
entry
,
Variable
)))
...
@@ -1029,6 +1033,7 @@ class TensorConstantSignature(tuple):
...
@@ -1029,6 +1033,7 @@ class TensorConstantSignature(tuple):
except
:
except
:
return
False
return
False
#N.B. compare shape to ensure no broadcasting in ==
#N.B. compare shape to ensure no broadcasting in ==
#N.B. compare elementwise last because it is the most expensive check
return
(
t0
==
t1
)
and
(
d0
.
shape
==
d1
.
shape
)
\
return
(
t0
==
t1
)
and
(
d0
.
shape
==
d1
.
shape
)
\
and
(
self
.
sum
==
other
.
sum
)
and
(
numpy
.
all
(
d0
==
d1
))
and
(
self
.
sum
==
other
.
sum
)
and
(
numpy
.
all
(
d0
==
d1
))
def
__hash__
(
self
):
def
__hash__
(
self
):
...
@@ -1294,9 +1299,15 @@ def shape(a):
...
@@ -1294,9 +1299,15 @@ def shape(a):
pprint
.
assign
(
_shape
,
printing
.
MemberPrinter
(
'shape'
))
pprint
.
assign
(
_shape
,
printing
.
MemberPrinter
(
'shape'
))
class
MaxAndArgmax
(
Op
):
class
MaxAndArgmax
(
Op
):
"""Calculate the max and argmax over a given axis"""
"""Calculate the max and argmax over a given axis.
.. note::
If axis is None it means to calculate the max over the last dimension which is
DIFFERENT FROM NUMPY!!
"""
nin
=
2
# tensor, axis
nin
=
2
# tensor, axis
nout
=
2
# max val, max idx
nout
=
2
# max val, max idx
E_axis
=
'invalid axis'
E_axis
=
'invalid axis'
...
@@ -1307,7 +1318,8 @@ class MaxAndArgmax(Op):
...
@@ -1307,7 +1318,8 @@ class MaxAndArgmax(Op):
axis
=
x
.
type
.
ndim
-
1
axis
=
x
.
type
.
ndim
-
1
axis
=
_as_tensor_variable
(
axis
)
axis
=
_as_tensor_variable
(
axis
)
inputs
=
[
x
,
axis
]
inputs
=
[
x
,
axis
]
broadcastable
=
[
False
]
*
(
x
.
type
.
ndim
-
1
)
#TODO: be less conservative
#TODO: figure things out if axis is a constant
broadcastable
=
[
False
]
*
(
x
.
type
.
ndim
-
1
)
outputs
=
[
tensor
(
x
.
type
.
dtype
,
broadcastable
,
name
=
'max'
),
outputs
=
[
tensor
(
x
.
type
.
dtype
,
broadcastable
,
name
=
'max'
),
tensor
(
'int32'
,
broadcastable
,
name
=
'argmax'
)]
tensor
(
'int32'
,
broadcastable
,
name
=
'argmax'
)]
return
Apply
(
self
,
inputs
,
outputs
)
return
Apply
(
self
,
inputs
,
outputs
)
...
@@ -1666,60 +1678,113 @@ def zeros_like(model):
...
@@ -1666,60 +1678,113 @@ def zeros_like(model):
#return Zeros(model.type.ndim)(shape(model))
#return Zeros(model.type.ndim)(shape(model))
return
fill
(
model
,
constant
(
0.0
,
dtype
=
model
.
type
.
dtype
))
return
fill
(
model
,
constant
(
0.0
,
dtype
=
model
.
type
.
dtype
))
class
Filler
(
gof
.
Op
):
if
0
:
## COMMENTED OUT FEB 17 2010
## TODO (DOCUMENT AND WRITE TESTS) OR DELETE
class
Filler
(
gof
.
Op
):
"""WRITEME"""
def
__init__
(
self
,
value
,
ndim
,
dtype
=
'float64'
):
self
.
value
=
value
self
.
ndim
=
ndim
self
.
dtype
=
dtype
self
.
type
=
TensorType
(
dtype
=
dtype
,
broadcastable
=
(
False
,)
*
ndim
)
def
make_node
(
self
,
dims
):
dims
=
as_tensor_variable
(
dims
)
return
gof
.
Apply
(
self
,
[
dims
],
[
self
.
type
()])
def
perform
(
self
,
node
,
(
dims
,),
(
out
,)):
if
out
[
0
]
is
not
None
:
out
[
0
]
.
resize
(
dims
,
refcheck
=
0
)
out
[
0
]
.
fill
(
self
.
value
)
else
:
if
self
.
value
==
0
:
out
[
0
]
=
numpy
.
zeros
(
dims
,
dtype
=
self
.
dtype
)
elif
self
.
value
==
1
:
out
[
0
]
=
numpy
.
ones
(
dims
,
dtype
=
self
.
dtype
)
else
:
out
[
0
]
=
numpy
.
ones
(
dims
,
dtype
=
self
.
dtype
)
*
self
.
value
def
grad
(
self
,
(
dims
,),
(
gout
,)):
return
None
,
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
and
self
.
ndim
==
other
.
ndim
and
self
.
dtype
==
other
.
dtype
def
__hash__
(
self
):
return
hash
(
self
.
ndim
)
^
hash
(
self
.
dtype
)
Zeros
=
partial
(
Filler
,
0
)
"""WRITEME"""
Ones
=
partial
(
Filler
,
1
)
"""WRITEME"""
"""WRITEME"""
def
__init__
(
self
,
value
,
ndim
,
dtype
=
'float64'
):
self
.
value
=
value
self
.
ndim
=
ndim
self
.
dtype
=
dtype
self
.
type
=
TensorType
(
dtype
=
dtype
,
broadcastable
=
(
False
,)
*
ndim
)
def
make_node
(
self
,
dims
):
@constructor
dims
=
as_tensor_variable
(
dims
)
def
zero
():
return
gof
.
Apply
(
self
,
[
dims
],
[
self
.
type
()])
"""
Return a scalar zero, e.g. for initializing sums.
"""
return
Zeros
(
0
)([])
def
perform
(
self
,
node
,
(
dims
,),
(
out
,)):
@constructor
if
out
[
0
]
is
not
None
:
def
one
():
out
[
0
]
.
resize
(
dims
,
refcheck
=
0
)
"""WRITEME"""
out
[
0
]
.
fill
(
self
.
value
)
return
Ones
(
0
)([])
else
:
if
self
.
value
==
0
:
out
[
0
]
=
numpy
.
zeros
(
dims
,
dtype
=
self
.
dtype
)
elif
self
.
value
==
1
:
out
[
0
]
=
numpy
.
ones
(
dims
,
dtype
=
self
.
dtype
)
else
:
out
[
0
]
=
numpy
.
ones
(
dims
,
dtype
=
self
.
dtype
)
*
self
.
value
def
grad
(
self
,
(
dims
,),
(
gout
,)):
pprint
.
assign
(
lambda
pstate
,
r
:
r
.
owner
and
isinstance
(
r
.
owner
.
op
,
Filler
)
and
r
.
owner
.
op
.
value
==
0
,
printing
.
FunctionPrinter
(
'zeros'
))
return
None
,
pprint
.
assign
(
lambda
pstate
,
r
:
r
.
owner
and
isinstance
(
r
.
owner
.
op
,
Filler
)
and
r
.
owner
.
op
.
value
==
1
,
printing
.
FunctionPrinter
(
'ones'
))
def
__eq__
(
self
,
other
):
class
Alloc
(
gof
.
Op
):
return
type
(
self
)
==
type
(
other
)
and
self
.
ndim
==
other
.
ndim
and
self
.
dtype
==
other
.
dty
pe
"""Create a Tensor from an initial value and a desired sha
pe
def
__hash__
(
self
):
alloc(value, shape0, shape1, ..., shapeN)
return
hash
(
self
.
ndim
)
^
hash
(
self
.
dtype
)
Zeros
=
partial
(
Filler
,
0
)
Returns an N-dimensional tensor initialized by `value` using something equivalent to
"""WRITEME"""
>>> z = numpy.zeros(shape, value.dtype)
>>> z += value
Ones
=
partial
(
Filler
,
1
)
The result has N dimensions, has the dtype of `value` and is obtained by broadcasting value
"""WRITEME"""
over the output ndarray.
@constructor
This Op is used to replace fill() during optimizations because after shapes are lifted,
def
zero
():
the first argument to fill can often be pruned from the graph.
"""
Return a scalar zero, e.g. for initializing sums.
"""
"""
return
Zeros
(
0
)([])
def
__init__
(
self
,
dtype
):
self
.
dtype
=
dtype
@constructor
def
__eq__
(
self
,
other
):
def
one
():
return
type
(
self
)
==
type
(
other
)
and
self
.
dtype
==
other
.
dtype
"""WRITEME"""
return
Ones
(
0
)([])
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
dtype
)
def
__str__
(
self
):
return
'
%
s{
%
s}'
%
(
self
.
__class__
.
__name__
,
self
.
dtype
)
def
make_node
(
self
,
value
,
*
shape
):
v
=
as_tensor_variable
(
value
)
sh
=
[
as_tensor_variable
(
s
)
for
s
in
shape
]
bcast
=
[]
for
s
in
sh
:
if
s
.
type
.
dtype
[:
3
]
not
in
(
'int'
,
'uin'
):
raise
TypeError
(
'Shape arguments must be integers'
,
s
)
# if s is constant 1, then we're broadcastable in that dim
bcast
.
append
(
isinstance
(
s
,
TensorConstant
)
and
(
s
.
data
==
1
))
otype
=
TensorType
(
dtype
=
self
.
dtype
,
broadcastable
=
bcast
)
return
gof
.
Apply
(
self
,
[
v
]
+
sh
,
[
otype
()])
def
perform
(
self
,
node
,
inputs
,
(
out
,)):
v
=
inputs
[
0
]
sh
=
tuple
([
int
(
i
)
for
i
in
inputs
[
1
:]])
if
out
[
0
]
is
None
or
out
[
0
]
.
shape
!=
sh
:
out
[
0
]
=
numpy
.
zeros
(
sh
,
dtype
=
self
.
dtype
)
out
[
0
][
...
]
+=
v
# broadcast v to fill us up
def
grad
(
self
,
inputs
,
(
gout
,)):
return
[
None
for
i
in
inputs
]
pprint
.
assign
(
lambda
pstate
,
r
:
r
.
owner
and
isinstance
(
r
.
owner
.
op
,
Filler
)
and
r
.
owner
.
op
.
value
==
0
,
printing
.
FunctionPrinter
(
'zeros'
))
pprint
.
assign
(
lambda
pstate
,
r
:
r
.
owner
and
isinstance
(
r
.
owner
.
op
,
Filler
)
and
r
.
owner
.
op
.
value
==
1
,
printing
.
FunctionPrinter
(
'ones'
))
@_redefine
(
elemwise
.
Elemwise
(
scal
.
identity
))
@_redefine
(
elemwise
.
Elemwise
(
scal
.
identity
))
def
tensor_copy
(
a
):
def
tensor_copy
(
a
):
...
@@ -1841,33 +1906,36 @@ def var(input, axis = None):
...
@@ -1841,33 +1906,36 @@ def var(input, axis = None):
#return the mean sqr
#return the mean sqr
return
mean
(
centered_input
**
2
,
axis
)
return
mean
(
centered_input
**
2
,
axis
)
class
Repeat
(
gof
.
Op
):
if
0
:
## COMMENTED OUT FEB 17 2010
def
make_node
(
self
,
input
,
repeats
,
axis
):
## TODO (DOCUMENT AND WRITE TESTS) OR DELETE
assert
isinstance
(
input
.
type
,
TensorType
)
class
Repeat
(
gof
.
Op
):
assert
repeats
.
type
==
iscalar
assert
axis
.
type
==
iscalar
def
make_node
(
self
,
input
,
repeats
,
axis
):
broadcastable
=
[]
assert
isinstance
(
input
.
type
,
TensorType
)
for
i
,
x
in
enumerate
(
input
.
broadcastable
):
assert
repeats
.
type
==
iscalar
if
i
==
axis
:
assert
axis
.
type
==
iscalar
broadcastable
+=
[
False
]
broadcastable
=
[]
else
:
for
i
,
x
in
enumerate
(
input
.
broadcastable
):
broadcastable
+=
[
x
]
if
i
==
axis
:
broadcastable
+=
[
False
]
type
=
TensorType
(
dtype
=
input
.
type
.
dtype
,
broadcastable
=
\
else
:
broadcastable
)
broadcastable
+=
[
x
]
#backport
#type = TensorType(dtype = input.type.dtype,
type
=
TensorType
(
dtype
=
input
.
type
.
dtype
,
broadcastable
=
\
# broadcastable = [False if i==axis else x for i, x in enumerate(input.broadcastable)])
broadcastable
)
return
gof
.
Apply
(
self
,
[
inputs
,
repeats
,
axis
],
[
type
()])
#backport
#type = TensorType(dtype = input.type.dtype,
def
perform
(
self
,
node
,
(
input
,
repeats
,
axis
),
(
out
,
)):
# broadcastable = [False if i==axis else x for i, x in enumerate(input.broadcastable)])
out
[
0
]
=
numpy
.
repeat
(
input
,
repeats
,
axis
)
return
gof
.
Apply
(
self
,
[
inputs
,
repeats
,
axis
],
[
type
()])
def
grad
(
self
,
(
input
,
repeats
,
axis
),
(
gout
,
)):
def
perform
(
self
,
node
,
(
input
,
repeats
,
axis
),
(
out
,
)):
return
add
.
grad
((
input
,
gout
),
(
gout
,))[:
1
]
out
[
0
]
=
numpy
.
repeat
(
input
,
repeats
,
axis
)
repeat
=
Repeat
()
def
grad
(
self
,
(
input
,
repeats
,
axis
),
(
gout
,
)):
return
add
.
grad
((
input
,
gout
),
(
gout
,))[:
1
]
repeat
=
Repeat
()
class
Default
(
gof
.
Op
):
class
Default
(
gof
.
Op
):
"""
"""
...
@@ -2489,6 +2557,10 @@ class Join(Op):
...
@@ -2489,6 +2557,10 @@ class Join(Op):
join(2, x, y, z) # WRONG: the axis has to be an index into the shape
join(2, x, y, z) # WRONG: the axis has to be an index into the shape
join(0, x, u) # WRONG: joined tensors must have the same rank
join(0, x, u) # WRONG: joined tensors must have the same rank
"""
"""
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
make_node
(
self
,
*
axis_and_tensors
):
def
make_node
(
self
,
*
axis_and_tensors
):
"""
"""
...
@@ -2765,37 +2837,6 @@ else:
...
@@ -2765,37 +2837,6 @@ else:
pass
pass
class
MakeVector
(
Op
):
"""WRITEME"""
def
__init__
(
self
,
stype
):
self
.
stype
=
stype
def
make_node
(
self
,
*
inputs
):
inputs
=
map
(
as_tensor_variable
,
inputs
)
assert
all
(
a
.
type
==
self
.
stype
for
a
in
inputs
)
return
Apply
(
self
,
inputs
,
[
TensorType
(
broadcastable
=
(
False
,),
dtype
=
self
.
stype
.
dtype
)()])
def
perform
(
self
,
node
,
inputs
,
(
out
,)):
out
[
0
]
=
numpy
.
asarray
(
inputs
)
def
grad
(
self
,
inputs
,
(
gout
,)):
return
[
None
]
*
len
(
inputs
)
make_lvector
=
MakeVector
(
lscalar
)
"""WRITEME"""
class
MakeVectorPrinter
:
def
process
(
self
,
r
,
pstate
):
if
r
.
owner
is
None
:
raise
TypeError
(
"Can only print make_vector."
)
elif
isinstance
(
r
.
owner
.
op
,
MakeVector
):
return
"[
%
s]"
%
", "
.
join
(
pstate
.
pprinter
.
process
(
input
,
pstate
.
clone
(
precedence
=
1000
))
for
input
in
r
.
owner
.
inputs
)
else
:
raise
TypeError
(
"Can only print make_vector."
)
pprint
.
assign
(
lambda
pstate
,
r
:
r
.
owner
and
isinstance
(
r
.
owner
.
op
,
MakeVector
),
MakeVectorPrinter
())
class
Reshape
(
Op
):
class
Reshape
(
Op
):
"""Perform a reshape operation of the input x to the new shape shp.
"""Perform a reshape operation of the input x to the new shape shp.
The number of dimensions to which to reshape to (ndim) must be known at graph
The number of dimensions to which to reshape to (ndim) must be known at graph
...
@@ -3138,6 +3179,37 @@ def inverse_permutation(perm):
...
@@ -3138,6 +3179,37 @@ def inverse_permutation(perm):
# Should reproduce numpy's behaviour:
# Should reproduce numpy's behaviour:
# http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing
# http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing
AddConfigVar
(
'experimental.advanced_indexing'
,
"enable not-well-tested advanced indexing functionality"
,
BoolParam
(
False
))
class
AdvancedSubtensor1
(
Op
):
"""Implement x[ilist] where ilist is a vector of integers."""
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__eq__
(
self
,
other
):
type
(
self
)
==
type
(
other
)
def
make_node
(
self
,
x
,
ilist
):
x_
=
as_tensor_variable
(
x
)
ilist_
=
as_tensor_variable
(
ilist
)
if
ilist_
.
type
.
dtype
[:
3
]
not
in
(
'int'
,
'uin'
):
raise
TypeError
(
'index must be integers'
)
if
ilist_
.
type
.
broadcastable
!=
(
False
,):
raise
TypeError
(
'index must be vector'
)
if
x_
.
type
.
ndim
==
0
:
raise
TypeError
(
'cannot index into a scalar'
)
if
x_
.
type
.
broadcastable
[
0
]:
# the caller should have made a copy of x len(ilist) times
raise
TypeError
(
'cannot index into a broadcastable dimension'
)
return
gof
.
Apply
(
self
,
[
x_
,
ilist_
],
[
x_
.
type
()])
def
perform
(
self
,
node
,
(
x
,
i
),
(
out
,)):
out
[
0
]
=
x
[
i
]
class
AdvancedSubtensor
(
Op
):
class
AdvancedSubtensor
(
Op
):
"""Return a subtensor copy, using advanced indexing.
"""Return a subtensor copy, using advanced indexing.
"""
"""
...
@@ -3308,6 +3380,18 @@ class Dot(Op):
...
@@ -3308,6 +3380,18 @@ class Dot(Op):
rval
=
dot
(
gz
,
y
.
T
),
dot
(
x
.
T
,
gz
)
rval
=
dot
(
gz
,
y
.
T
),
dot
(
x
.
T
,
gz
)
return
cast
(
rval
[
0
],
x
.
dtype
),
cast
(
rval
[
1
],
y
.
dtype
)
return
cast
(
rval
[
0
],
x
.
dtype
),
cast
(
rval
[
1
],
y
.
dtype
)
def
infer_shape
(
self
,
node
,
(
xshp
,
yshp
)):
x
,
y
=
node
.
inputs
if
x
.
ndim
==
2
and
y
.
ndim
==
2
:
return
[(
xshp
[
0
],
yshp
[
1
])]
if
x
.
ndim
==
1
and
y
.
ndim
==
2
:
return
[(
yshp
[
1
],)]
if
x
.
ndim
==
2
and
y
.
ndim
==
1
:
return
[(
xshp
[
0
],)]
if
x
.
ndim
==
1
and
y
.
ndim
==
1
:
return
[()]
raise
NotImplementedError
()
def
__str__
(
self
):
def
__str__
(
self
):
return
"dot"
return
"dot"
dot
=
Dot
()
dot
=
Dot
()
...
...
theano/tensor/blas.py
浏览文件 @
af804be5
...
@@ -6,10 +6,9 @@ import numpy.distutils
...
@@ -6,10 +6,9 @@ import numpy.distutils
from
theano.configparser
import
config
,
AddConfigVar
,
StrParam
from
theano.configparser
import
config
,
AddConfigVar
,
StrParam
from
theano.gof
import
(
utils
,
Op
,
Apply
,
view_roots
,
PatternSub
,
DestroyHandler
,
from
theano.gof
import
(
utils
,
Op
,
Apply
,
view_roots
,
PatternSub
,
DestroyHandler
,
SeqOptimizer
,
local_optimizer
,
Optimizer
,
LocalOptimizer
,
OpKeyOptimizer
,
SeqOptimizer
,
local_optimizer
,
Optimizer
,
LocalOptimizer
,
OpKeyOptimizer
,
InconsistencyError
,
toolbox
)
InconsistencyError
,
toolbox
,
SequenceDB
,
EquilibriumOptimizer
)
from
theano.printing
import
pprint
,
FunctionPrinter
from
theano.printing
import
pprint
,
FunctionPrinter
from
theano.tensor.opt
import
register_specialize
,
out2in
,
insert_inplace_optimizer
from
theano.compile.mode
import
optdb
# opt.py
import
basic
as
T
import
basic
as
T
...
@@ -30,7 +29,6 @@ AddConfigVar('blas.ldflags',
...
@@ -30,7 +29,6 @@ AddConfigVar('blas.ldflags',
"lib[s] to include for [Fortran] level-3 blas implementation"
,
"lib[s] to include for [Fortran] level-3 blas implementation"
,
StrParam
(
default_blas_ldflags
()))
StrParam
(
default_blas_ldflags
()))
_logger
=
logging
.
getLogger
(
'theano.tensor.blas'
)
_logger
=
logging
.
getLogger
(
'theano.tensor.blas'
)
_logger
.
setLevel
(
logging
.
WARN
)
_logger
.
setLevel
(
logging
.
WARN
)
def
debug
(
*
msg
):
_logger
.
debug
(
' '
.
join
(
str
(
m
)
for
m
in
msg
))
def
debug
(
*
msg
):
_logger
.
debug
(
' '
.
join
(
str
(
m
)
for
m
in
msg
))
...
@@ -391,12 +389,22 @@ class Gemm(GemmRelated):
...
@@ -391,12 +389,22 @@ class Gemm(GemmRelated):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
1
,)
+
self
.
build_gemm_version
()
return
(
1
,)
+
self
.
build_gemm_version
()
gemm
=
Gemm
()
class
PseudoGemm
(
Op
):
# should be replaced by Gemm
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
make_node
(
self
,
*
args
):
inputs
=
[
T
.
as_tensor_variable
(
i
)
for
i
in
args
]
return
Apply
(
self
,
inputs
,
[
inputs
[
0
]
.
type
()])
def
perform
(
self
,
node
,
(
z
,
a
,
x
,
y
,
b
),
(
zout
,
)):
zout
[
0
]
=
a
*
numpy
.
dot
(
x
,
y
)
+
b
*
z
gemm
=
PseudoGemm
()
gemm_inplace
=
Gemm
()
pprint
.
assign
(
gemm
,
FunctionPrinter
(
'gemm'
))
pprint
.
assign
(
gemm
,
FunctionPrinter
(
'gemm'
))
pprint
.
assign
(
gemm_inplace
,
FunctionPrinter
(
'gemm_inplace'
))
def
res_is_a
(
node
,
op
,
maxclients
=
None
):
def
res_is_a
(
node
,
op
,
maxclients
=
None
):
if
maxclients
is
not
None
:
if
maxclients
is
not
None
:
retval
=
(
len
(
node
.
clients
)
<=
maxclients
)
retval
=
(
len
(
node
.
clients
)
<=
maxclients
)
...
@@ -597,6 +605,7 @@ class GemmOptimizer(Optimizer):
...
@@ -597,6 +605,7 @@ class GemmOptimizer(Optimizer):
while
did_something
:
while
did_something
:
nodelist
=
list
(
env
.
toposort
())
nodelist
=
list
(
env
.
toposort
())
did_something
=
False
did_something
=
False
nodelist
.
reverse
()
for
node
in
nodelist
:
for
node
in
nodelist
:
new_outputs
=
_gemm_from_node
(
node
)
new_outputs
=
_gemm_from_node
(
node
)
if
new_outputs
:
if
new_outputs
:
...
@@ -611,10 +620,6 @@ class GemmOptimizer(Optimizer):
...
@@ -611,10 +620,6 @@ class GemmOptimizer(Optimizer):
#TODO: retry other applications of gemm (see comment in _gemm_from_node
#TODO: retry other applications of gemm (see comment in _gemm_from_node
pass
pass
#neede to make the gemm optimisation(step 70) happen before the fusion of elemwise(step 71)
compile
.
optdb
.
register
(
'inplace_gemm'
,
GemmOptimizer
(),
70.00
,
'fast_run'
,
'inplace'
,
'gemm'
)
class
Dot22
(
GemmRelated
):
class
Dot22
(
GemmRelated
):
"""Compute a matrix-matrix product.
"""Compute a matrix-matrix product.
This is a specialization of the more general Dot()
This is a specialization of the more general Dot()
...
@@ -689,5 +694,34 @@ def local_dot_to_dot22(node):
...
@@ -689,5 +694,34 @@ def local_dot_to_dot22(node):
info
(
'Not optimizing dot with inputs'
,
x
,
y
,
x
.
type
,
y
.
type
)
info
(
'Not optimizing dot with inputs'
,
x
,
y
,
x
.
type
,
y
.
type
)
else
:
else
:
return
False
return
False
register_specialize
(
local_dot_to_dot22
)
@local_optimizer
([
gemm
])
def
local_inplace_gemm
(
node
):
if
node
.
op
==
gemm
:
return
[
gemm_inplace
(
*
node
.
inputs
)]
#################################
#
# Set up the BlasOpt optimizer
#
#################################
blas_optdb
=
SequenceDB
()
# run after numerical stability optimizations (1.5)
optdb
.
register
(
'BlasOpt'
,
blas_optdb
,
1.7
,
'fast_run'
)
# run before specialize (2.0) because specialize is basically a free-for-all that makes the
# graph crazy.
blas_optdb
.
register
(
'local_dot_to_dot22'
,
EquilibriumOptimizer
([
local_dot_to_dot22
],
max_use_ratio
=
5
),
0
,
'fast_run'
)
blas_optdb
.
register
(
'local_dot_to_gemm'
,
GemmOptimizer
(),
10
,
'fast_run'
)
# After destroyhandler is in but before we try to make elemwise things inplace
# Try to make gemm inplace
# Also, need to make the gemm optimisation(step 70) happen before the fusion of elemwise(step 71)
optdb
.
register
(
'InplaceBlasOpt'
,
EquilibriumOptimizer
([
local_inplace_gemm
],
max_use_ratio
=
5
),
70.0
,
'fast_run'
,
'inplace'
)
theano/tensor/elemwise.py
浏览文件 @
af804be5
...
@@ -197,6 +197,18 @@ class DimShuffle(Op):
...
@@ -197,6 +197,18 @@ class DimShuffle(Op):
storage
[
0
]
=
numpy
.
asarray
(
res
)
#asarray puts scalars back into array
storage
[
0
]
=
numpy
.
asarray
(
res
)
#asarray puts scalars back into array
def
infer_shape
(
self
,
node
,
(
ishp
,)):
ishp
=
list
(
ishp
)
for
drop
in
reversed
(
self
.
drop
):
del
ishp
[
drop
]
# transpose
rval
=
[
ishp
[
i
]
for
i
in
self
.
shuffle
]
# augment
for
augm
in
self
.
augment
:
rval
.
insert
(
augm
,
1
)
return
[
rval
]
def
c_code
(
self
,
node
,
name
,
(
input
,),
(
res
,),
sub
):
def
c_code
(
self
,
node
,
name
,
(
input
,),
(
res
,),
sub
):
basename
=
input
+
'__view_or_copy'
basename
=
input
+
'__view_or_copy'
...
@@ -613,6 +625,25 @@ class Elemwise(Op):
...
@@ -613,6 +625,25 @@ class Elemwise(Op):
# the following should be used instead of the previous loop, unfortunately it tends to segfault
# the following should be used instead of the previous loop, unfortunately it tends to segfault
# self.ufunc(*(ufunc_args+[s[0] for s in output_storage]))
# self.ufunc(*(ufunc_args+[s[0] for s in output_storage]))
def
infer_shape
(
self
,
node
,
i_shapes
):
rval
=
[]
for
o
in
node
.
outputs
:
oshp
=
[]
for
dim
,
b
in
enumerate
(
o
.
type
.
broadcastable
):
b_dim
=
None
if
b
:
# this is broadcastable
b_dim
=
1
else
:
# there must be some input that is not broadcastable
for
ishp
,
i
in
zip
(
i_shapes
,
node
.
inputs
):
if
not
i
.
type
.
broadcastable
[
dim
]:
b_dim
=
ishp
[
dim
]
assert
b_dim
,
'AA'
break
assert
b_dim
,
'BB'
oshp
.
append
(
b_dim
)
rval
.
append
(
oshp
)
return
rval
def
_c_all
(
self
,
node
,
name
,
inames
,
onames
,
sub
):
def
_c_all
(
self
,
node
,
name
,
inames
,
onames
,
sub
):
_inames
=
inames
_inames
=
inames
_onames
=
onames
_onames
=
onames
...
@@ -764,10 +795,14 @@ class CAReduce(Op):
...
@@ -764,10 +795,14 @@ class CAReduce(Op):
if
scalar_op
.
nin
not
in
[
-
1
,
2
]
or
scalar_op
.
nout
!=
1
:
if
scalar_op
.
nin
not
in
[
-
1
,
2
]
or
scalar_op
.
nout
!=
1
:
raise
NotImplementedError
(
"CAReduce only supports binary functions with a single output."
)
raise
NotImplementedError
(
"CAReduce only supports binary functions with a single output."
)
self
.
scalar_op
=
scalar_op
self
.
scalar_op
=
scalar_op
if
isinstance
(
axis
,
int
):
if
axis
is
None
:
self
.
axis
=
[
axis
]
else
:
self
.
axis
=
axis
self
.
axis
=
axis
elif
isinstance
(
axis
,
int
):
self
.
axis
=
(
axis
,)
else
:
self
.
axis
=
list
(
set
(
axis
))
self
.
axis
.
sort
()
self
.
axis
=
tuple
(
self
.
axis
)
self
.
ufunc
=
numpy
.
frompyfunc
(
scalar_op
.
impl
,
2
,
1
)
self
.
ufunc
=
numpy
.
frompyfunc
(
scalar_op
.
impl
,
2
,
1
)
# CAReduce output views input when reducing scalars
# CAReduce output views input when reducing scalars
...
@@ -834,6 +869,13 @@ class CAReduce(Op):
...
@@ -834,6 +869,13 @@ class CAReduce(Op):
else
:
else
:
output
[
0
]
=
numpy
.
copy
(
variable
)
output
[
0
]
=
numpy
.
copy
(
variable
)
def
infer_shape
(
self
,
node
,
(
ishape
,)):
axis
=
self
.
axis
if
axis
is
None
:
return
(),
return
[
ishape
[
i
]
for
(
i
,
b
)
in
enumerate
(
node
.
inputs
[
0
]
.
type
.
broadcastable
)
if
i
not
in
axis
],
def
_c_all
(
self
,
node
,
name
,
inames
,
onames
,
sub
):
def
_c_all
(
self
,
node
,
name
,
inames
,
onames
,
sub
):
input
=
node
.
inputs
[
0
]
input
=
node
.
inputs
[
0
]
...
...
theano/tensor/nnet/__init__.py
浏览文件 @
af804be5
from
nnet
import
*
from
nnet
import
*
from
sigm
import
softplus
,
sigmoid
,
sigmoid_inplace
,
scalar_sigmoid
theano/tensor/nnet/nnet.py
浏览文件 @
af804be5
...
@@ -4,89 +4,14 @@
...
@@ -4,89 +4,14 @@
"""
"""
from
theano
import
gof
from
theano
import
gof
from
theano
import
scalar
from
theano
import
printing
from
theano
import
printing
from
theano.printing
import
pprint
from
theano.tensor
import
basic
as
tensor
from
theano.tensor
import
basic
as
tensor
from
theano.tensor
import
elemwise
from
theano.tensor
import
elemwise
from
theano.tensor
import
opt
from
theano.tensor
import
opt
from
theano.compile
import
optdb
from
theano.compile
import
optdb
import
numpy
import
numpy
############
from
.sigm
import
sigmoid
,
softplus
#
# SCALAR OPS
#
class
ScalarSigmoid
(
scalar
.
UnaryScalarOp
):
@staticmethod
def
st_impl
(
x
):
if
x
<
-
30.0
:
return
0.0
if
x
>
30.0
:
return
1.0
return
1.0
/
(
1.0
+
numpy
.
exp
(
-
x
))
def
impl
(
self
,
x
):
return
ScalarSigmoid
.
st_impl
(
x
)
def
grad
(
self
,
(
x
,),
(
gz
,)):
y
=
scalar_sigmoid
(
x
)
return
[
gz
*
y
*
(
1.0
-
y
)]
def
c_code
(
self
,
node
,
name
,
(
x
,),
(
z
,),
sub
):
if
node
.
inputs
[
0
]
.
type
==
scalar
.
float32
:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( theano._asarray(1.0, dtype=dt) / (theano._asarray(1.0, dtype=dt) + numpy.exp(-theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return
"""
%(z)
s =
%(x)
s < -88.0f ? 0.0 :
%(x)
s > 15.0f ? 1.0f : 1.0f /(1.0f + exp(-
%(x)
s));"""
%
locals
()
elif
node
.
inputs
[
0
]
.
type
==
scalar
.
float64
:
return
"""
%(z)
s =
%(x)
s < -709.0 ? 0.0 :
%(x)
s > 19.0 ? 1.0 : 1.0 /(1.0+exp(-
%(x)
s));"""
%
locals
()
else
:
raise
NotImplementedError
(
'only floatingpoint is implemented'
)
def
c_code_cache_version
(
self
):
v
=
super
(
ScalarSigmoid
,
self
)
.
c_code_cache_version
()
if
v
:
return
(
2
,)
+
v
else
:
return
v
scalar_sigmoid
=
ScalarSigmoid
(
scalar
.
upgrade_to_float
,
name
=
'scalar_sigmoid'
)
sigmoid
=
elemwise
.
Elemwise
(
scalar_sigmoid
,
name
=
'sigmoid'
)
pprint
.
assign
(
sigmoid
,
printing
.
FunctionPrinter
(
'sigmoid'
))
class
ScalarSoftplus
(
scalar
.
UnaryScalarOp
):
@staticmethod
def
static_impl
(
x
):
if
x
<
-
30.0
:
return
0.0
if
x
>
30.0
:
return
x
return
numpy
.
log1p
(
numpy
.
exp
(
x
))
def
impl
(
self
,
x
):
return
ScalarSoftplus
.
static_impl
(
x
)
def
grad
(
self
,
(
x
,),
(
gz
,)):
return
[
gz
*
scalar_sigmoid
(
x
)]
def
c_code
(
self
,
node
,
name
,
(
x
,),
(
z
,),
sub
):
if
node
.
inputs
[
0
]
.
type
==
scalar
.
float32
:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( numpy.log1p(numpy.exp(theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return
"""
%(z)
s =
%(x)
s < -103.0f ? 0.0 :
%(x)
s > 14.0f ?
%(x)
s : log1p(exp(
%(x)
s));"""
%
locals
()
elif
node
.
inputs
[
0
]
.
type
==
scalar
.
float64
:
return
"""
%(z)
s =
%(x)
s < -745.0 ? 0.0 :
%(x)
s > 16.0 ?
%(x)
s : log1p(exp(
%(x)
s));"""
%
locals
()
else
:
raise
NotImplementedError
(
'only floatingpoint is implemented'
)
def
c_code_cache_version
(
self
):
v
=
super
(
ScalarSoftplus
,
self
)
.
c_code_cache_version
()
if
v
:
return
(
2
,)
+
v
else
:
return
v
scalar_softplus
=
ScalarSoftplus
(
scalar
.
upgrade_to_float
,
name
=
'scalar_softplus'
)
softplus
=
elemwise
.
Elemwise
(
scalar_softplus
,
name
=
'softplus'
)
pprint
.
assign
(
softplus
,
printing
.
FunctionPrinter
(
'softplus'
))
############
############
...
@@ -1351,6 +1276,7 @@ def categorical_crossentropy(coding_dist, true_dist):
...
@@ -1351,6 +1276,7 @@ def categorical_crossentropy(coding_dist, true_dist):
raise
TypeError
(
'rank mismatch between coding and true distributions'
)
raise
TypeError
(
'rank mismatch between coding and true distributions'
)
from
theano
import
scalar
class
Prepend_scalar_constant_to_each_row
(
gof
.
Op
):
class
Prepend_scalar_constant_to_each_row
(
gof
.
Op
):
def
__init__
(
self
,
val
=
0
):
def
__init__
(
self
,
val
=
0
):
...
@@ -1440,14 +1366,3 @@ prepend_scalar_to_each_row = Prepend_scalar_to_each_row()
...
@@ -1440,14 +1366,3 @@ prepend_scalar_to_each_row = Prepend_scalar_to_each_row()
prepend_0_to_each_row
=
Prepend_scalar_constant_to_each_row
(
0.
)
prepend_0_to_each_row
=
Prepend_scalar_constant_to_each_row
(
0.
)
prepend_1_to_each_row
=
Prepend_scalar_constant_to_each_row
(
1.
)
prepend_1_to_each_row
=
Prepend_scalar_constant_to_each_row
(
1.
)
logsigm_to_softplus
=
gof
.
PatternSub
(
(
tensor
.
log
,
(
sigmoid
,
'x'
)),
(
tensor
.
neg
,
(
softplus
,
(
tensor
.
neg
,
'x'
))),
allow_multiple_clients
=
True
)
log1msigm_to_softplus
=
gof
.
PatternSub
(
(
tensor
.
log
,
(
tensor
.
sub
,
tensor
.
constant
([[
1.0
]]),
(
sigmoid
,
'x'
))),
(
tensor
.
neg
,
(
softplus
,
'x'
)),
allow_multiple_clients
=
True
)
opt
.
register_specialize
(
logsigm_to_softplus
,
name
=
'logsigm_to_softplus'
)
opt
.
register_specialize
(
log1msigm_to_softplus
,
name
=
'log1msigm_to_softplus'
)
theano/tensor/nnet/sigm.py
0 → 100644
浏览文件 @
af804be5
"""Ops and optimizations: sigmoid, softplus
These functions implement special cases of exp and log to improve numerical stability.
"""
import
numpy
from
theano
import
gof
from
theano
import
scalar
from
theano
import
printing
from
theano.tensor
import
basic
as
tensor
from
theano.printing
import
pprint
from
theano.tensor
import
elemwise
from
theano.tensor
import
opt
from
theano.compile
import
optdb
############
#
# SCALAR OPS
#
class
ScalarSigmoid
(
scalar
.
UnaryScalarOp
):
@staticmethod
def
st_impl
(
x
):
if
x
<
-
30.0
:
return
0.0
if
x
>
30.0
:
return
1.0
return
1.0
/
(
1.0
+
numpy
.
exp
(
-
x
))
def
impl
(
self
,
x
):
return
ScalarSigmoid
.
st_impl
(
x
)
def
grad
(
self
,
(
x
,),
(
gz
,)):
y
=
scalar_sigmoid
(
x
)
return
[
gz
*
y
*
(
1.0
-
y
)]
def
c_code
(
self
,
node
,
name
,
(
x
,),
(
z
,),
sub
):
if
node
.
inputs
[
0
]
.
type
==
scalar
.
float32
:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( theano._asarray(1.0, dtype=dt) / (theano._asarray(1.0, dtype=dt) + numpy.exp(-theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return
"""
%(z)
s =
%(x)
s < -88.0f ? 0.0 :
%(x)
s > 15.0f ? 1.0f : 1.0f /(1.0f + exp(-
%(x)
s));"""
%
locals
()
elif
node
.
inputs
[
0
]
.
type
==
scalar
.
float64
:
return
"""
%(z)
s =
%(x)
s < -709.0 ? 0.0 :
%(x)
s > 19.0 ? 1.0 : 1.0 /(1.0+exp(-
%(x)
s));"""
%
locals
()
else
:
raise
NotImplementedError
(
'only floatingpoint is implemented'
)
def
c_code_cache_version
(
self
):
v
=
super
(
ScalarSigmoid
,
self
)
.
c_code_cache_version
()
if
v
:
return
(
2
,)
+
v
else
:
return
v
scalar_sigmoid
=
ScalarSigmoid
(
scalar
.
upgrade_to_float
,
name
=
'scalar_sigmoid'
)
sigmoid
=
elemwise
.
Elemwise
(
scalar_sigmoid
,
name
=
'sigmoid'
)
sigmoid_inplace
=
elemwise
.
Elemwise
(
ScalarSigmoid
(
scalar
.
transfer_type
(
0
)),
inplace_pattern
=
{
0
:
0
},
name
=
'sigmoid_inplace'
,
)
pprint
.
assign
(
sigmoid
,
printing
.
FunctionPrinter
(
'sigmoid'
))
class
ScalarSoftplus
(
scalar
.
UnaryScalarOp
):
@staticmethod
def
static_impl
(
x
):
if
x
<
-
30.0
:
return
0.0
if
x
>
30.0
:
return
x
return
numpy
.
log1p
(
numpy
.
exp
(
x
))
def
impl
(
self
,
x
):
return
ScalarSoftplus
.
static_impl
(
x
)
def
grad
(
self
,
(
x
,),
(
gz
,)):
return
[
gz
*
scalar_sigmoid
(
x
)]
def
c_code
(
self
,
node
,
name
,
(
x
,),
(
z
,),
sub
):
if
node
.
inputs
[
0
]
.
type
==
scalar
.
float32
:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( numpy.log1p(numpy.exp(theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return
"""
%(z)
s =
%(x)
s < -103.0f ? 0.0 :
%(x)
s > 14.0f ?
%(x)
s : log1p(exp(
%(x)
s));"""
%
locals
()
elif
node
.
inputs
[
0
]
.
type
==
scalar
.
float64
:
return
"""
%(z)
s =
%(x)
s < -745.0 ? 0.0 :
%(x)
s > 16.0 ?
%(x)
s : log1p(exp(
%(x)
s));"""
%
locals
()
else
:
raise
NotImplementedError
(
'only floatingpoint is implemented'
)
def
c_code_cache_version
(
self
):
v
=
super
(
ScalarSoftplus
,
self
)
.
c_code_cache_version
()
if
v
:
return
(
2
,)
+
v
else
:
return
v
scalar_softplus
=
ScalarSoftplus
(
scalar
.
upgrade_to_float
,
name
=
'scalar_softplus'
)
softplus
=
elemwise
.
Elemwise
(
scalar_softplus
,
name
=
'softplus'
)
pprint
.
assign
(
softplus
,
printing
.
FunctionPrinter
(
'softplus'
))
logsigm_to_softplus
=
gof
.
PatternSub
(
(
tensor
.
log
,
(
sigmoid
,
'x'
)),
(
tensor
.
neg
,
(
softplus
,
(
tensor
.
neg
,
'x'
))),
allow_multiple_clients
=
True
)
def
_is_1
(
expr
):
"""rtype bool. True iff expr is a constant close to 1
"""
try
:
v
=
opt
.
get_constant_value
(
expr
)
return
numpy
.
allclose
(
v
,
1
)
except
TypeError
:
return
False
log1msigm_to_softplus
=
gof
.
PatternSub
(
(
tensor
.
log
,
(
tensor
.
sub
,
dict
(
pattern
=
'y'
,
constraint
=
_is_1
),
(
sigmoid
,
'x'
))),
(
tensor
.
neg
,
(
softplus
,
'x'
)),
allow_multiple_clients
=
True
)
opt
.
register_stabilize
(
logsigm_to_softplus
,
name
=
'logsigm_to_softplus'
)
opt
.
register_stabilize
(
log1msigm_to_softplus
,
name
=
'log1msigm_to_softplus'
)
def
is_1pexp
(
t
):
# if t is of form (1+exp(x)), return x
# else return None
if
t
.
owner
and
t
.
owner
.
op
==
tensor
.
add
:
scalars
,
scalar_inputs
,
nonconsts
=
\
opt
.
scalarconsts_rest
(
t
.
owner
.
inputs
)
# scalar_inputs are potentially dimshuffled and fill'd scalars
if
len
(
nonconsts
)
==
1
:
maybe_exp
=
nonconsts
[
0
]
if
maybe_exp
.
owner
and
maybe_exp
.
owner
.
op
==
tensor
.
exp
:
return
False
,
maybe_exp
.
owner
.
inputs
[
0
]
return
None
def
is_exp
(
t
):
# if t is of form (exp(x)) then return x
# else return None
neg
=
False
if
t
.
owner
and
t
.
owner
.
op
==
tensor
.
neg
:
t
=
t
.
owner
.
inputs
[
0
]
neg
=
True
if
t
.
owner
and
t
.
owner
.
op
==
tensor
.
exp
:
return
neg
,
t
.
owner
.
inputs
[
0
]
def
partition_num_or_denom
(
r
,
f
):
if
r
.
owner
and
r
.
owner
.
op
==
tensor
.
mul
:
a
=
r
.
owner
.
inputs
else
:
a
=
[
r
]
# ugly 2.4-compatible thing
f_terms
=
[]
neg
=
False
rest
=
[]
for
t
in
a
:
f_t
=
f
(
t
)
if
f_t
is
None
:
rest
.
append
(
t
)
else
:
neg_t
,
f_t
=
f_t
f_terms
.
append
(
f_t
)
neg
^=
neg_t
#bit flip if neg_t is true
return
f_terms
,
rest
,
neg
@opt.register_stabilize
@gof.local_optimizer
([
tensor
.
true_div
])
def
local_exp_over_1_plus_exp
(
node
):
"""exp(x)/(1+exp(x)) -> sigm(x)
c/(1+exp(x)) -> c*sigm(-x)
"""
# this optimization should be done for numerical stability
# so we don't care to check client counts
if
node
.
op
==
tensor
.
true_div
:
#find all the exp() terms in the numerator
num
,
denom
=
node
.
inputs
num_exp_x
,
num_rest
,
num_neg
=
partition_num_or_denom
(
num
,
is_exp
)
denom_1pexp
,
denom_rest
,
denom_neg
=
partition_num_or_denom
(
denom
,
is_1pexp
)
sigmoids
=
[]
for
t
in
denom_1pexp
:
if
t
in
num_exp_x
:
# case: exp(x) /(1+exp(x))
sigmoids
.
append
(
sigmoid
(
t
))
del
num_exp_x
[
num_exp_x
.
index
(
t
)]
else
:
# case: 1/(1+exp(x))
sigmoids
.
append
(
sigmoid
(
-
t
))
if
not
sigmoids
:
# we didn't find any. abort
return
# put the new numerator together
new_num
=
sigmoids
+
[
tensor
.
exp
(
t
)
for
t
in
num_exp_x
]
+
num_rest
if
len
(
new_num
)
==
1
:
new_num
=
new_num
[
0
]
else
:
new_num
=
tensor
.
mul
(
*
new_num
)
if
num_neg
^
denom_neg
:
new_num
=
-
new_num
if
len
(
denom_rest
)
==
0
:
return
[
new_num
]
elif
len
(
denom_rest
)
==
1
:
return
[
new_num
/
denom_rest
[
0
]]
else
:
return
[
new_num
/
tensor
.
mul
(
*
denom_rest
)]
@opt.register_stabilize
@gof.local_optimizer
([
tensor
.
mul
])
def
local_sigm_times_exp
(
node
):
"""
exp(x)*sigm(-x) -> -sigm(x)
"""
# this is a numerical stability thing, so we dont check clients
if
node
.
op
==
tensor
.
mul
:
exp_x
=
[]
exp_minus_x
=
[]
sigm_x
=
[]
sigm_minus_x
=
[]
other
=
[]
neg
=
False
for
i
in
node
.
inputs
:
while
i
.
owner
and
i
.
owner
.
op
==
tensor
.
neg
:
neg
^=
True
i
=
i
.
owner
.
inputs
[
0
]
if
i
.
owner
and
i
.
owner
.
op
==
tensor
.
exp
:
exp_arg
=
i
.
owner
.
inputs
[
0
]
if
exp_arg
.
owner
and
exp_arg
.
owner
.
op
==
tensor
.
neg
:
exp_minus_x
.
append
(
exp_arg
.
owner
.
inputs
[
0
])
else
:
exp_x
.
append
(
exp_arg
)
elif
i
.
owner
and
i
.
owner
.
op
==
sigmoid
:
sigm_arg
=
i
.
owner
.
inputs
[
0
]
if
sigm_arg
.
owner
and
sigm_arg
.
owner
.
op
==
tensor
.
neg
:
sigm_minus_x
.
append
(
sigm_arg
.
owner
.
inputs
[
0
])
else
:
sigm_x
.
append
(
sigm_arg
)
else
:
other
.
append
(
i
)
# remove matched pairs in exp_x and sigm_minus_x
did_something
=
False
for
i
in
exp_x
:
if
i
in
sigm_minus_x
:
del
sigm_minus_x
[
sigm_minus_x
.
index
(
i
)]
other
.
append
(
sigmoid
(
i
))
did_something
=
True
else
:
other
.
append
(
i
)
# remove matched pairs in exp_minus_x and sigm_x
for
i
in
exp_minus_x
:
if
i
in
sigm_x
:
del
sigm_x
[
sigm_x
.
index
(
i
)]
other
.
append
(
sigm
(
-
i
))
did_something
=
True
else
:
other
.
append
(
i
)
if
did_something
:
terms
=
other
+
[
sigmoid
(
x
)
for
x
in
sigm_x
]
\
+
[
sigmoid
(
-
x
)
for
x
in
sigm_minus_x
]
if
len
(
terms
)
>
1
:
rval
=
tensor
.
mul
(
*
terms
)
else
:
rval
=
terms
[
0
]
if
neg
:
return
[
-
rval
]
else
:
return
[
rval
]
@opt.register_stabilize
@gof.local_optimizer
([
tensor
.
inv
])
def
local_inv_1_plus_exp
(
node
):
"""
1/(1+exp(x)) -> sigm(-x)
"""
# this optimization should be done for numerical stability
# so we don't care to check client counts
if
node
.
op
==
tensor
.
inv
:
inv_arg
=
node
.
inputs
[
0
]
if
inv_arg
.
owner
and
inv_arg
.
owner
.
op
==
tensor
.
add
:
scalars
,
scalar_inputs
,
nonconsts
=
\
opt
.
scalarconsts_rest
(
inv_arg
.
owner
.
inputs
)
# scalar_inputs are potentially dimshuffled and fill'd scalars
if
len
(
nonconsts
)
==
1
:
if
nonconsts
[
0
]
.
owner
and
nonconsts
[
0
]
.
owner
.
op
==
tensor
.
exp
:
if
scalars
and
numpy
.
allclose
(
numpy
.
sum
(
scalars
),
1
):
return
opt
.
_fill_chain
(
sigmoid
(
tensor
.
neg
(
nonconsts
[
0
]
.
owner
.
inputs
[
0
])),
scalar_inputs
)
#@opt.register_canonicalize
@gof.local_optimizer
([
tensor
.
inv
])
def
local_1msigmoid
(
node
):
"""
1-sigm(x) -> sigm(-x)
"""
if
node
.
op
==
tensor
.
sub
:
sub_l
,
sub_r
=
node
.
inputs
if
len
(
sub_r
.
clients
)
>
1
:
return
# graph is using both sigm and 1-sigm
if
sub_r
.
owner
and
sub_r
.
owner
.
op
==
sigmoid
:
try
:
val_l
=
opt
.
get_constant_value
(
sub_l
)
except
Exception
,
e
:
return
if
numpy
.
allclose
(
numpy
.
sum
(
val_l
),
1
):
return
[
sigmoid
(
-
sub_r
.
owner
.
inputs
[
0
])]
theano/tensor/nnet/tests/test_sigm.py
0 → 100644
浏览文件 @
af804be5
import
unittest
import
theano
from
theano
import
tensor
as
T
from
theano
import
gof
import
numpy
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.tests
import
test_basic
as
TT
from
theano.tensor.nnet
import
*
class
T_sigmoid
(
unittest
.
TestCase
):
def
setUp
(
self
):
utt
.
seed_rng
()
def
test_elemwise
(
self
):
utt
.
verify_grad
(
sigmoid
,
[
numpy
.
random
.
rand
(
3
,
4
)])
class
T_softplus
(
unittest
.
TestCase
):
def
setUp
(
self
):
utt
.
seed_rng
()
def
test_elemwise
(
self
):
utt
.
verify_grad
(
softplus
,
[
numpy
.
random
.
rand
(
3
,
4
)])
class
T_sigmoid_opts
(
unittest
.
TestCase
):
def
test_exp_over_1_plus_exp
(
self
):
m
=
theano
.
config
.
mode
if
m
==
'FAST_COMPILE'
:
m
=
'FAST_RUN'
x
=
T
.
dvector
()
# tests exp_over_1_plus_exp
f
=
theano
.
function
([
x
],
T
.
exp
(
x
)
/
(
1
+
T
.
exp
(
x
)),
mode
=
m
)
#theano.printing.debugprint(f)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
sigmoid
]
# tests inv_1_plus_exp
f
=
theano
.
function
([
x
],
T
.
fill
(
x
,
1.0
)
/
(
1
+
T
.
exp
(
-
x
)),
mode
=
m
)
#theano.printing.debugprint(f)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
sigmoid
]
# tests inv_1_plus_exp with neg
f
=
theano
.
function
([
x
],
T
.
fill
(
x
,
-
1.0
)
/
(
1
+
T
.
exp
(
-
x
)),
mode
=
m
)
#theano.printing.debugprint(f)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
sigmoid
,
T
.
inplace
.
neg_inplace
]
# tests double inv_1_plus_exp with neg
f
=
theano
.
function
([
x
],
(
T
.
fill
(
x
,
-
1.0
)
*
T
.
exp
(
x
))
/
((
1
+
T
.
exp
(
x
))
*
(
1
+
T
.
exp
(
-
x
))),
mode
=
m
)
#theano.printing.debugprint(f)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
sigmoid
,
T
.
mul
]
def
test_1msigmoid
(
self
):
m
=
theano
.
config
.
mode
if
m
==
'FAST_COMPILE'
:
m
=
'FAST_RUN'
x
=
T
.
fmatrix
()
# tests exp_over_1_plus_exp
f
=
theano
.
function
([
x
],
1
-
T
.
exp
(
x
)
/
(
1
+
T
.
exp
(
x
)),
mode
=
m
)
theano
.
printing
.
debugprint
(
f
)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
tensor
.
neg
,
sigmoid_inplace
]
# tests inv_1_plus_exp
f
=
theano
.
function
([
x
],
1
-
T
.
fill
(
x
,
1.0
)
/
(
1
+
T
.
exp
(
-
x
)),
mode
=
m
)
theano
.
printing
.
debugprint
(
f
)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
tensor
.
neg
,
sigmoid_inplace
]
theano/tensor/opt.py
浏览文件 @
af804be5
...
@@ -44,34 +44,65 @@ def _fill_chain(new_out, orig_inputs):
...
@@ -44,34 +44,65 @@ def _fill_chain(new_out, orig_inputs):
new_out
=
T
.
fill
(
i
,
new_out
)
new_out
=
T
.
fill
(
i
,
new_out
)
return
[
new_out
]
return
[
new_out
]
def
get_constant_value
(
v
,
fill
=
False
):
def
encompasses_broadcastable
(
b1
,
b2
):
"""return the constant value underlying variable `v`
"""
Returns True if the broadcastable patterns b1 and b2 are such that b2 is
broadcasted to b1's shape and not the opposite.
:param b1: the broadcastable attribute of a tensor type
:param b2: the broadcastable attribute of a tensor type
"""
if
len
(
b1
)
<
len
(
b2
):
return
False
b1
=
b1
[
-
len
(
b2
):]
return
not
any
(
v1
and
not
v2
for
v1
,
v2
in
zip
(
b1
,
b2
))
def
merge_broadcastables
(
broadcastables
):
return
[
all
(
bcast
)
for
bcast
in
zip
(
*
broadcastables
)]
def
get_constant_value
(
v
):
"""return the constant scalar(0-D) value underlying variable `v`
If v is the output of dimshuffles, fills, this function digs through them.
If v is the output of dimshuffles, fills, this function digs through them.
If `v` is not some view of constant data, then raise a TypeError.
If `v` is not some view of constant data, then raise a TypeError.
if fill is True, then it returns (v, [...]) where the second term is a list of variables
that were used in the fill expressions
:note: There may be another function similar to this one in the code, but I'm not sure where it
:note: There may be another function similar to this one in the code, but I'm not sure where it
is.
is.
"""
"""
if
isinstance
(
v
,
gof
.
Constant
):
if
isinstance
(
v
,
gof
.
Constant
):
if
fill
:
#TODO: consider checking for arrays of the form e.g. [1,1,1,1] where
return
v
.
data
,
[]
# it is not a constant, but in some cases it *could* be replaced with one.
return
v
.
data
# Note that this would have an effect on the broadcasting of inputs and so on
try
:
complex
(
v
.
data
)
#works for all numeric scalars
return
v
.
data
except
:
raise
TypeError
(
v
)
if
v
.
owner
and
isinstance
(
v
.
owner
.
op
,
T
.
DimShuffle
):
if
v
.
owner
and
isinstance
(
v
.
owner
.
op
,
T
.
DimShuffle
):
return
get_constant_value
(
v
.
owner
.
inputs
[
0
],
fill
=
fill
)
return
get_constant_value
(
v
.
owner
.
inputs
[
0
])
if
fill
:
if
v
.
owner
and
v
.
owner
.
op
==
T
.
fill
:
if
v
.
owner
and
v
.
owner
.
op
==
T
.
fill
:
shape
,
val
=
v
.
owner
.
inputs
shape
,
val
=
v
.
owner
.
inputs
# fill(a,b) fills the shape of 'a' filled with 'b'
# fill(a,b) fills the shape of 'a' filled with 'b'
return
get_constant_value
(
val
)
rval
,
rshapes
=
get_constant_value
(
val
,
fill
=
fill
)
return
rval
,
rshapes
+
[
shape
]
raise
TypeError
(
v
)
raise
TypeError
(
v
)
def
scalarconsts_rest
(
inputs
):
"""Partition a list of variables into two kinds:
scalar constants, and the rest."""
consts
=
[]
origconsts
=
[]
nonconsts
=
[]
for
i
in
inputs
:
try
:
v
=
get_constant_value
(
i
)
consts
.
append
(
v
)
origconsts
.
append
(
i
)
except
:
nonconsts
.
append
(
i
)
return
consts
,
origconsts
,
nonconsts
@gof.optimizer
@gof.optimizer
def
insert_inplace_optimizer
(
env
):
def
insert_inplace_optimizer
(
env
):
"""
"""
...
@@ -124,6 +155,11 @@ def register_specialize(lopt, *tags, **kwargs):
...
@@ -124,6 +155,11 @@ def register_specialize(lopt, *tags, **kwargs):
compile
.
optdb
[
'specialize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
*
tags
)
compile
.
optdb
[
'specialize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
*
tags
)
return
lopt
return
lopt
def
register_stabilize
(
lopt
,
*
tags
,
**
kwargs
):
name
=
(
kwargs
and
kwargs
.
pop
(
'name'
))
or
lopt
.
__name__
compile
.
optdb
[
'stabilize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
*
tags
)
return
lopt
######################
######################
# DimShuffle lifters #
# DimShuffle lifters #
######################
######################
...
@@ -164,130 +200,321 @@ register_canonicalize(local_dimshuffle_lift)
...
@@ -164,130 +200,321 @@ register_canonicalize(local_dimshuffle_lift)
#################
#################
####################
# Shape
lifters #
# Shape
Feature, Shape optimizations
#################
#################
####################
@gof.local_optimizer
([
T
.
_shape
,
None
])
class
MakeVector
(
T
.
Op
):
def
local_shape_lift_elemwise
(
node
):
"""Concatenate a number of scalars together into a vector
"""
shape(elemwise_op(..., x, ...)) -> shape(x)
Where x contains the maximal shape information.
This is a simple version of stack() that introduces far less cruft into the graph.
"""
"""
if
not
opt
.
check_chain
(
node
,
T
.
_shape
,
T
.
Elemwise
):
def
__init__
(
self
,
dtype
=
'int64'
):
return
False
self
.
dtype
=
dtype
def
__eq__
(
self
,
other
):
output
=
node
.
inputs
[
0
]
return
type
(
self
)
==
type
(
other
)
and
self
.
dtype
==
other
.
dtype
parent
=
output
.
owner
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
dtype
)
for
input
in
parent
.
inputs
:
def
make_node
(
self
,
*
inputs
):
if
input
.
type
.
broadcastable
==
output
.
type
.
broadcastable
:
inputs
=
map
(
T
.
as_tensor_variable
,
inputs
)
return
T
.
_shape
(
input
),
if
not
all
(
a
.
type
==
inputs
[
0
]
.
type
for
a
in
inputs
):
raise
TypeError
(
'This MakeVector instance requires inputs of same type
%
s'
%
return
False
inputs
[
0
]
.
type
)
if
inputs
:
register_canonicalize
(
local_shape_lift_elemwise
,
'shape_lift'
)
dtype
=
inputs
[
0
]
.
type
.
dtype
register_specialize
(
local_shape_lift_elemwise
,
'shape_lift'
)
else
:
dtype
=
self
.
dtype
#bcastable = (len(inputs) == 1)
bcastable
=
False
otype
=
T
.
TensorType
(
broadcastable
=
(
bcastable
,),
dtype
=
dtype
)
return
T
.
Apply
(
self
,
inputs
,
[
otype
()])
def
__str__
(
self
):
return
self
.
__class__
.
__name__
def
perform
(
self
,
node
,
inputs
,
(
out
,)):
out
[
0
]
=
T
.
numpy
.
asarray
(
inputs
)
make_vector
=
MakeVector
()
class
MakeVectorPrinter
:
def
process
(
self
,
r
,
pstate
):
if
r
.
owner
is
None
:
raise
TypeError
(
"Can only print make_vector."
)
elif
isinstance
(
r
.
owner
.
op
,
MakeVector
):
return
"[
%
s]"
%
", "
.
join
(
pstate
.
pprinter
.
process
(
input
,
pstate
.
clone
(
precedence
=
1000
))
for
input
in
r
.
owner
.
inputs
)
else
:
raise
TypeError
(
"Can only print make_vector."
)
T
.
pprint
.
assign
(
lambda
pstate
,
r
:
r
.
owner
and
isinstance
(
r
.
owner
.
op
,
MakeVector
),
MakeVectorPrinter
())
@gof.local_optimizer
([
T
.
_shape
,
None
])
class
Shape_i
(
T
.
Op
):
def
local_shape_lift_sum
(
node
):
"""
"""
shape(sum{n}(x)) -> [shape(x)[0], ..., shape(x)[n-1], shape(x)[n+1], ...]
L{Op} to return the shape of a matrix.
"""
if
not
opt
.
check_chain
(
node
,
T
.
_shape
,
T
.
Sum
):
return
False
input
=
node
.
inputs
[
0
]
.
owner
.
inputs
[
0
]
@note: Non-differentiable.
axis
=
node
.
inputs
[
0
]
.
owner
.
op
.
axis
"""
if
axis
is
None
:
# or len(axis) != 1:
def
__init__
(
self
,
i
):
axis
=
range
(
input
.
type
.
ndim
)
self
.
i
=
i
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
self
.
i
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
and
self
.
i
==
other
.
i
def
__str__
(
self
):
return
'
%
s{
%
i}'
%
(
self
.
__class__
.
__name__
,
self
.
i
)
def
make_node
(
self
,
x
):
x
=
T
.
as_tensor_variable
(
x
)
if
x
.
ndim
<=
self
.
i
:
raise
TypeError
(
'x has too few dimensions for Shape_i'
,
(
x
,
self
.
i
))
return
T
.
Apply
(
self
,
[
x
],
[
T
.
lscalar
()])
def
perform
(
self
,
node
,
(
x
,
),
(
out
,
)):
out
[
0
]
=
theano
.
_asarray
(
x
.
shape
[
self
.
i
],
dtype
=
'int64'
)
def
grad
(
self
,
(
x
,),
(
gz
,)):
return
[
None
]
class
ShapeFeature
(
object
):
"""Graph optimizer for removing all calls to shape()
This optimizer replaces all Shapes and Subtensors of Shapes with Shape_i and MakeVector
Ops.
This optimizer has several goals:
1. to 'lift' Shapes to as close to the inputs as possible.
2. to infer the shape of every node in the graph in terms of the input shapes.
3. remove all fills (T.second, T.fill) from the graph
Lifting shapes as close to the inputs as possible is important for canonicalization because
it is very bad form to have to compute something just to know how big it will be. Firstly,
it is a waste of time to compute such outputs. But it is important to get rid of these
outputs as early as possible in the compilation process because the
extra computations make it appear as if many internal graph nodes have multiple clients.
Many optimizations refuse to work on nodes with multiple clients.
Lifting is done by using an `<Op>.infer_shape` function if one is present, or else using a
conservative default. An Op that supports shape-lifting should define a
infer_shape(self, node, input_shapes) function. The argument input_shapes is a tuple
of tuples... there is an interior tuple for each input to the node. The tuple has as many
elements as dimensions. The element in position i of tuple j represents the i'th shape
component of the j'th input. The function should return a tuple of tuples. One output
tuple for each node.output. Again, the i'th element of the j'th output tuple represents
the output[j].shape[i] of the function. If an output is not a TensorType, then None should
be returned instead of a tuple for that output.
For example the infer_shape for a matrix-matrix product would accept
input_shapes=((x0,x1), (y0,y1)) and return ((x0, y1),).
Inferring the shape of internal nodes in the graph is important for doing size-driven
optimizations. If we know how big various intermediate results will be, we can estimate
the cost of many Ops accurately, and generate c-code that is specific [e.g. unrolled] to
particular sizes.
ish
=
T
.
_shape
(
input
)
.. note::
return
T
.
make_lvector
.
make_node
(
*
(
ish
[
i
]
for
i
in
xrange
(
input
.
type
.
ndim
)
if
i
not
in
axis
))
.
outputs
# return T.vertical_stack.make_node(ish[:axis], ish[axis+1:]).outputs
register_canonicalize
(
local_shape_lift_sum
,
'shape_lift'
)
Right now there is only the ConvOp that can really take advantage of this shape
inference, but it is worth it even just for the ConvOp. All that's necessary to do
shape inference is 1) to mark shared inputs as having a particular shape,
either via a .tag or some similar hacking; and 2) to add an optional Param() argument
to promise that inputs will have a certain shape (or even to have certain shapes in
certain dimensions).
@gof.local_optimizer
([
T
.
_shape
,
T
.
dot
])
def
local_shape_lift_dot
(
node
):
"""
"""
shape(dot(a, b)) -> [shape(a)[0], shape(b)[1]]
def
shape_i
(
self
,
i
):
"""
def
op_deco
(
r
):
if
not
opt
.
check_chain
(
node
,
T
.
_shape
,
T
.
dot
):
if
r
.
type
.
broadcastable
[
i
]:
return
False
return
self
.
lscalar_one
a
,
b
=
node
.
inputs
[
0
]
.
owner
.
inputs
else
:
if
a
.
type
.
ndim
==
2
and
b
.
type
.
ndim
==
2
:
return
Shape_i
(
i
)(
r
)
return
T
.
make_lvector
.
make_node
(
T
.
_shape
(
a
)[
0
],
T
.
_shape
(
b
)[
1
])
.
outputs
return
op_deco
elif
a
.
type
.
ndim
==
1
and
b
.
type
.
ndim
==
2
:
return
T
.
make_lvector
.
make_node
(
T
.
_shape
(
b
)[
1
])
.
outputs
elif
a
.
type
.
ndim
==
2
and
b
.
type
.
ndim
==
1
:
return
T
.
make_lvector
.
make_node
(
T
.
_shape
(
a
)[
0
])
.
outputs
elif
a
.
type
.
ndim
==
1
and
b
.
type
.
ndim
==
1
:
return
T
.
make_lvector
.
make_node
()
.
outputs
else
:
return
False
register_canonicalize
(
local_shape_lift_dot
,
'shape_lift'
)
# local_shape_lift = opt.LocalOptGroup(local_shape_lift_elemwise,
# local_shape_lift_sum,
# local_shape_lift_dot)
def
shape_tuple
(
self
,
r
):
return
tuple
([
self
.
shape_i
(
i
)(
r
)
for
i
in
xrange
(
r
.
ndim
)])
################
def
default_infer_shape
(
self
,
node
,
i_shapes
):
# Fill lifters #
rval
=
[]
################
for
r
in
node
.
outputs
:
try
:
rval
.
append
(
self
.
shape_tuple
(
r
))
except
AttributeError
:
rval
.
append
(
None
)
return
rval
def
unpack
(
self
,
s_i
):
# unpack the s_i that the Op returned
assert
s_i
is
not
None
if
s_i
==
1
:
# don't make the optimizer merge a zillion ones together
return
self
.
lscalar_one
if
type
(
s_i
)
is
int
:
# this shape is a constant
assert
s_i
>=
0
return
T
.
constant
(
s_i
,
dtype
=
'int64'
)
if
type
(
s_i
)
in
(
tuple
,
list
):
# this dimension is the same as many of the inputs
# which tells us that if one of the inputs is known,
# the others all become known.
# TODO: should be implemented in Elemwise, and Dot
#
# worst case, we loop over shape_of and replace things
raise
NotImplementedError
(
s_i
)
elif
s_i
.
type
==
T
.
lscalar
:
return
s_i
else
:
raise
TypeError
(
'Unsupported shape element'
,
s_i
)
def
encompasses_broadcastable
(
b1
,
b2
):
def
set_shape
(
self
,
r
,
s
):
"""
assert
r
not
in
self
.
shape_of
Returns True if the broadcastable patterns b1 and b2 are such that b2 is
if
s
is
None
:
broadcasted to b1's shape and not the opposite.
self
.
shape_of
[
r
]
=
s
else
:
self
.
shape_of
[
r
]
=
tuple
([
self
.
unpack
(
s_i
)
for
s_i
in
s
])
:param b1: the broadcastable attribute of a tensor type
def
make_vector_shape
(
self
,
r
):
:param b2: the broadcastable attribute of a tensor type
return
make_vector
(
*
self
.
shape_of
[
r
])
"""
#
if
len
(
b1
)
<
len
(
b2
):
#
return
False
# Feature inteface
b1
=
b1
[
-
len
(
b2
):]
#
return
not
any
(
v1
and
not
v2
for
v1
,
v2
in
zip
(
b1
,
b2
))
#
def
on_attach
(
self
,
env
):
assert
not
hasattr
(
env
,
'shape_feature'
)
env
.
shape_feature
=
self
self
.
shape_of
=
{}
# Variable -> tuple(scalars) or None (All tensor vars map to tuple)
self
.
lscalar_one
=
T
.
constant
(
1
,
dtype
=
'int64'
)
assert
self
.
lscalar_one
.
type
==
T
.
lscalar
for
node
in
env
.
toposort
():
self
.
on_import
(
env
,
node
)
def
on_import
(
self
,
env
,
node
):
if
node
.
outputs
[
0
]
in
self
.
shape_of
:
# this is a revert, not really an import
for
r
in
node
.
outputs
+
node
.
inputs
:
assert
r
in
self
.
shape_of
return
for
i
,
r
in
enumerate
(
node
.
inputs
):
# make sure we have shapes for the inputs
if
r
not
in
self
.
shape_of
:
try
:
self
.
set_shape
(
r
,
self
.
shape_tuple
(
r
))
except
AttributeError
:
self
.
set_shape
(
r
,
None
)
# not a TensorType variable
def
merge_broadcastables
(
broadcastables
):
try
:
return
[
all
(
bcast
)
for
bcast
in
zip
(
*
broadcastables
)]
shape_infer
=
node
.
op
.
infer_shape
except
AttributeError
:
shape_infer
=
self
.
default_infer_shape
@gof.local_optimizer
([
T
.
fill
,
None
])
try
:
def
local_fill_lift
(
node
):
o_shapes
=
shape_infer
(
node
,
[
self
.
shape_of
[
r
]
for
r
in
node
.
inputs
])
except
Exception
,
e
:
_logger
.
error
(
'Failed to infer_shape from Op
%
s (i_shapes=
%
s):
%
s
%
s'
%
(
node
.
op
,
[
self
.
shape_of
[
r
]
for
r
in
node
.
inputs
],
type
(
e
),
str
(
e
)))
o_shapes
=
default_infer_shape
(
node
,
[
self
.
shape_of
[
r
]
for
r
in
node
.
inputs
])
# this is packed information
# an element of o_shapes is either None or a tuple
# elements of the tuple can be either strings, or ints
assert
len
(
o_shapes
)
==
len
(
node
.
outputs
)
for
r
,
s
in
zip
(
node
.
outputs
,
o_shapes
):
self
.
set_shape
(
r
,
s
)
def
on_change_input
(
self
,
env
,
mode
,
i
,
r
,
new_r
):
# TODO:
# This tells us that r and new_r must have the same shape
# if we didn't know that the shapes are related, now we do.
pass
class
ShapeOptimizer
(
Optimizer
):
"""Optimizer that serves to add ShapeFeature as an env feature.
"""
"""
fill(f(a), b) -> fill(a, b)
def
__init__
(
self
):
If a.type == f(a).type.
Optimizer
.
__init__
(
self
)
fill(a, b) -> b
def
add_requirements
(
self
,
env
):
If a.type == b.type.
env
.
extend
(
ShapeFeature
())
"""
if
not
opt
.
check_chain
(
node
,
T
.
fill
):
return
False
model
,
filling
=
node
.
inputs
def
apply
(
self
,
env
):
pass
mb
,
fb
=
model
.
type
.
broadcastable
,
filling
.
type
.
broadcastable
# -1 should make it run right before the first merge
if
model
.
type
.
dtype
==
filling
.
type
.
dtype
and
encompasses_broadcastable
(
fb
,
mb
):
theano
.
compile
.
mode
.
optdb
.
register
(
'ShapeOpt'
,
ShapeOptimizer
(),
-
1
,
'fast_run'
,
'fast_compile'
)
return
False
# [filling]
parent
=
model
.
owner
@register_specialize
if
parent
is
None
or
not
isinstance
(
parent
,
T
.
Elemwise
):
@register_canonicalize
return
False
@gof.local_optimizer
([
T
.
fill
])
for
input
in
parent
.
inputs
:
def
local_fill_to_alloc
(
node
):
if
input
.
type
==
model
.
type
:
"""fill(s,v) -> alloc(v, shape(s))
return
[
T
.
fill
(
input
,
filling
)]
return
False
This is an important optimization because with the shape_to_shape_i optimization, the
dependency on 's' is often removed.
"""
if
node
.
op
==
T
.
fill
:
r
,
v
=
node
.
inputs
if
v
.
type
==
node
.
outputs
[
0
]
.
type
:
# this is a useless fill, erase it.
rval
=
[
v
]
elif
v
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
:
# this is a cast
rval
=
[
T
.
cast
(
v
,
node
.
outputs
[
0
]
.
type
.
dtype
)]
else
:
# we are broadcasting v somehow
shape_of
=
node
.
env
.
shape_feature
.
shape_of
# TODO: cut out un-necessary dimshuffles of v
rval
=
[
T
.
Alloc
(
node
.
outputs
[
0
]
.
dtype
)(
v
,
*
shape_of
[
node
.
outputs
[
0
]])]
assert
rval
[
0
]
.
type
==
node
.
outputs
[
0
]
.
type
return
rval
register_canonicalize
(
local_fill_lift
,
'fill_lift'
)
@register_specialize
@register_canonicalize
@gof.local_optimizer
([
T
.
_shape
])
def
local_shape_to_shape_i
(
node
):
if
node
.
op
==
T
.
_shape
:
shape_feature
=
node
.
env
.
shape_feature
return
[
shape_feature
.
make_vector_shape
(
node
.
inputs
[
0
])]
@register_specialize
@register_canonicalize
@gof.local_optimizer
([
T
.
Subtensor
])
def
local_subtensor_make_vector
(
node
):
# replace all subtensor(make_vector) like:
# [a,b,c][0] -> a
# [a,b,c][0:2] -> [a,b]
# we can do this for constant indexes
if
isinstance
(
node
.
op
,
T
.
Subtensor
):
shape_feature
=
node
.
env
.
shape_feature
x
=
node
.
inputs
[
0
]
if
x
.
owner
and
x
.
owner
.
op
==
make_vector
:
try
:
idx
,
=
node
.
op
.
idx_list
except
:
#'how can you have multiple indexes into a shape?'
raise
if
isinstance
(
idx
,
int
):
return
[
x
.
owner
.
inputs
[
idx
]]
elif
isinstance
(
idx
,
T
.
TensorVariable
):
# if it is a constant we can do something with it
try
:
v
=
get_constant_value
(
idx
)
return
[
x
.
owner
.
inputs
[
v
]]
except
:
pass
else
:
# it is a slice of ints and/or Variables
#TODO: check subtensor to see if it can contain constant variables,
# and if it can, then try to unpack them.
try
:
return
[
make_vector
(
*
x
.
owner
.
inputs
.
__getitem__
(
idx
))]
except
TypeError
:
pass
except
:
_logger
.
error
(
'failed to index with "
%
s"'
%
str
(
idx
))
raise
##################
##################
# Subtensor opts #
# Subtensor opts #
...
@@ -306,38 +533,6 @@ def local_subtensor_unary(node):
...
@@ -306,38 +533,6 @@ def local_subtensor_unary(node):
x_idx
=
node
.
op
(
u
.
owner
.
inputs
[
0
],
*
idx
)
x_idx
=
node
.
op
(
u
.
owner
.
inputs
[
0
],
*
idx
)
return
[
u
.
owner
.
op
(
x_idx
)]
return
[
u
.
owner
.
op
(
x_idx
)]
@gof.local_optimizer
([
None
,
None
])
def
local_subtensor_make_vector
(
node
):
"""
[a,b,c][0] -> a
[a,b,c][0:2] -> [a,b]
If the index or slice is constant.
"""
if
not
opt
.
check_chain
(
node
,
T
.
Subtensor
,
T
.
MakeVector
):
return
False
joined_r
=
node
.
inputs
[
0
]
try
:
#check that join is being used to join scalars
veclen
=
T
.
join
.
vec_length
(
joined_r
)
except
:
return
False
idxlist
=
node
.
op
.
idx_list
if
len
(
idxlist
)
!=
1
:
return
False
idx
=
idxlist
[
0
]
if
isinstance
(
idx
,
int
):
return
[
node
.
inputs
[
0
]
.
owner
.
inputs
[
idx
]]
try
:
return
T
.
make_vector
(
*
(
node
.
owner
.
inputs
[
0
]
.
owner
.
inputs
.
__getslice__
(
idx
)))
except
TypeError
:
return
False
register_canonicalize
(
local_subtensor_make_vector
)
@register_canonicalize
@register_canonicalize
@gof.local_optimizer
([
None
])
@gof.local_optimizer
([
None
])
def
local_IncSubtensor_serialize
(
node
):
def
local_IncSubtensor_serialize
(
node
):
...
@@ -581,13 +776,18 @@ class Canonizer(gof.LocalOptimizer):
...
@@ -581,13 +776,18 @@ class Canonizer(gof.LocalOptimizer):
# the dtype of the 'input' argument. The leaf-Variables of the graph covered by the
# the dtype of the 'input' argument. The leaf-Variables of the graph covered by the
# recursion may be of any Variable type.
# recursion may be of any Variable type.
if
len
(
input
.
clients
)
>
1
:
if
0
:
# this logic is too conservative, but doing it is better than not doing it.
# UPDATE: This logic makes it impossible to recognize some important patterns
#
# (e.g. variants on the x/x)
# we don't want to canonize a subgraph that we will need to compute anyway for the other clients.
# and it is screwing up the RBM free energy gradient.
# This check is too conservative because if the other clients are also in the subgraph we are canonizing,
#TODO: review this
# then we should [probably?] recurse anyway.
if
len
(
input
.
clients
)
>
1
:
return
[
input
],
[]
# this logic is too conservative, but doing it is better than not doing it.
#
# we don't want to canonize a subgraph that we will need to compute anyway for the other clients.
# This check is too conservative because if the other clients are also in the subgraph we are canonizing,
# then we should [probably?] recurse anyway.
return
[
input
],
[]
if
input
.
owner
is
None
or
input
.
owner
.
op
not
in
[
self
.
main
,
self
.
inverse
,
self
.
reciprocal
]:
if
input
.
owner
is
None
or
input
.
owner
.
op
not
in
[
self
.
main
,
self
.
inverse
,
self
.
reciprocal
]:
if
input
.
owner
and
isinstance
(
input
.
owner
.
op
,
T
.
DimShuffle
):
if
input
.
owner
and
isinstance
(
input
.
owner
.
op
,
T
.
DimShuffle
):
...
@@ -835,8 +1035,8 @@ class Canonizer(gof.LocalOptimizer):
...
@@ -835,8 +1035,8 @@ class Canonizer(gof.LocalOptimizer):
# Here we make the canonical version of the graph around this node
# Here we make the canonical version of the graph around this node
# See the documentation of get_num_denum and simplify
# See the documentation of get_num_denum and simplify
orig_num
,
orig_denum
=
self
.
get_num_denum
(
node
.
outputs
[
0
])
orig_num
,
orig_denum
=
self
.
get_num_denum
(
node
.
outputs
[
0
])
num
,
denum
=
list
(
orig_num
),
list
(
orig_denum
)
num
,
denum
=
self
.
simplify
(
list
(
orig_num
),
list
(
orig_denum
)
)
num
,
denum
=
self
.
simplify
(
num
,
denum
)
def
same
(
x
,
y
):
def
same
(
x
,
y
):
return
len
(
x
)
==
len
(
y
)
and
all
(
N
.
all
(
xe
==
ye
)
for
xe
,
ye
in
zip
(
x
,
y
))
return
len
(
x
)
==
len
(
y
)
and
all
(
N
.
all
(
xe
==
ye
)
for
xe
,
ye
in
zip
(
x
,
y
))
...
@@ -935,6 +1135,60 @@ def local_sum_mul_by_scalar(node):
...
@@ -935,6 +1135,60 @@ def local_sum_mul_by_scalar(node):
if
thing_summed
.
owner
and
thing_summed
.
owner
.
op
==
T
.
neg
:
if
thing_summed
.
owner
and
thing_summed
.
owner
.
op
==
T
.
neg
:
return
[
T
.
neg
(
node
.
op
(
thing_summed
.
owner
.
inputs
[
0
]))]
return
[
T
.
neg
(
node
.
op
(
thing_summed
.
owner
.
inputs
[
0
]))]
@register_canonicalize
@gof.local_optimizer
([])
def
local_sum_all_to_none
(
node
):
"""Sum{0,1,...N} -> Sum{}"""
if
isinstance
(
node
.
op
,
T
.
Sum
):
# if all the axes are named, then use None as a shorthand
# this permits more merging
if
node
.
op
.
axis
is
None
:
return
if
set
(
node
.
op
.
axis
)
==
set
(
range
(
node
.
inputs
[
0
]
.
type
.
ndim
)):
return
[
T
.
Sum
(
axis
=
None
)(
node
.
inputs
[
0
])]
@register_canonicalize
@gof.local_optimizer
([])
def
local_sum_sum
(
node
):
"""Sum(Sum()) -> Sum"""
if
isinstance
(
node
.
op
,
T
.
Sum
):
summed
,
=
node
.
inputs
if
len
(
summed
.
clients
)
==
1
:
if
summed
.
owner
and
isinstance
(
summed
.
owner
.
op
,
T
.
Sum
):
if
summed
.
owner
.
op
.
axis
is
None
:
# special case of local_cut_useless_reduce
return
[
T
.
Sum
(
None
)(
summed
.
owner
.
inputs
[
0
])]
if
node
.
op
.
axis
is
None
:
# we're summing up everything anyway so lets
# do it all at once
return
[
T
.
Sum
(
None
)(
summed
.
owner
.
inputs
[
0
])]
# figure out which dimensions of the original input are preserved
alldims
=
range
(
summed
.
owner
.
inputs
[
0
]
.
type
.
ndim
)
# trim out the dimensions that were removed by the first sum
alldims
=
[
d
for
i
,
d
in
enumerate
(
alldims
)
if
i
in
summed
.
owner
.
op
.
axis
]
# trim out the dimensions removed by second sum
alldims
=
[
d
for
i
,
d
in
enumerate
(
alldims
)
if
i
in
node
.
op
.
axis
]
# figure out an axis argument that combines the effect of both
newaxis
=
[
i
for
i
in
xrange
(
summed
.
owner
.
inputs
[
0
]
.
type
.
ndim
)
if
i
not
in
alldims
]
combined_sum
=
T
.
Sum
(
newaxis
)
return
[
combined_sum
(
summed
.
owner
.
inputs
[
0
])]
@register_canonicalize
@gof.local_optimizer
([])
def
local_cut_useless_reduce
(
node
):
"""Sum(a, axis=[]) -> a """
if
isinstance
(
node
.
op
,
T
.
CAReduce
):
summed
,
=
node
.
inputs
# if reduce were doing anything, the output ndim would be reduced
if
summed
.
type
==
node
.
outputs
[
0
]
.
type
:
return
[
summed
]
@gof.local_optimizer
([
T
.
mul
])
@gof.local_optimizer
([
T
.
mul
])
def
local_mul_to_neg
(
node
):
def
local_mul_to_neg
(
node
):
if
node
.
op
==
T
.
mul
and
N
.
all
(
local_mul_canonizer
.
get_constant
(
node
.
inputs
[
0
])
==
-
1.0
):
if
node
.
op
==
T
.
mul
and
N
.
all
(
local_mul_canonizer
.
get_constant
(
node
.
inputs
[
0
])
==
-
1.0
):
...
@@ -1143,30 +1397,23 @@ register_specialize(local_add_specialize)
...
@@ -1143,30 +1397,23 @@ register_specialize(local_add_specialize)
mul_canonizer
=
in2out
(
gof
.
LocalOptGroup
(
local_mul_canonizer
,
local_fill_cut
,
local_fill_sink
))
mul_canonizer
=
in2out
(
gof
.
LocalOptGroup
(
local_mul_canonizer
,
local_fill_cut
,
local_fill_sink
))
@register_s
pecia
lize
@register_s
tabi
lize
@gof.local_optimizer
([
T
.
log
])
@gof.local_optimizer
([
T
.
log
])
def
local_log1p
(
node
):
def
local_log1p
(
node
):
# log(1+exp(x)) -> log1p(x)
# log(1+exp(x)) -> log1p(x)
if
node
.
op
==
T
.
log
:
if
node
.
op
==
T
.
log
:
log_arg
,
=
node
.
inputs
log_arg
,
=
node
.
inputs
if
log_arg
.
owner
and
log_arg
.
owner
.
op
==
T
.
add
:
if
log_arg
.
owner
and
log_arg
.
owner
.
op
==
T
.
add
:
add_inputs
=
log_arg
.
owner
.
inputs
scalars
,
scalar_inputs
,
nonconsts
=
\
consts
=
[
0
]
scalarconsts_rest
(
log_arg
.
owner
.
inputs
)
fills
=
[]
# scalar_inputs are potentially dimshuffled and fill'd scalars
nonconsts
=
[]
if
scalars
and
numpy
.
allclose
(
numpy
.
sum
(
scalars
),
1
):
for
add_in
in
add_inputs
:
if
not
nonconsts
:
try
:
pass
# leave for constant-merge
v
,
f
=
get_constant_value
(
add_in
,
fill
=
True
)
if
len
(
nonconsts
)
==
1
:
consts
.
append
(
v
)
return
_fill_chain
(
T
.
log1p
(
nonconsts
[
0
]),
scalar_inputs
)
fills
.
extend
(
f
)
else
:
except
:
return
_fill_chain
(
T
.
log1p
(
T
.
add
(
*
nonconsts
)),
scalar_inputs
)
nonconsts
.
append
(
add_in
)
if
nonconsts
:
if
numpy
.
allclose
(
numpy
.
sum
(
consts
),
1
):
if
len
(
nonconsts
)
==
1
:
return
_fill_chain
(
T
.
log1p
(
nonconsts
[
0
]),
fills
)
else
:
return
_fill_chain
(
T
.
log1p
(
T
.
add
(
*
nonconsts
)),
fills
)
def
add_calculate
(
num
,
denum
,
aslist
=
False
,
out_type
=
None
):
def
add_calculate
(
num
,
denum
,
aslist
=
False
,
out_type
=
None
):
...
...
theano/tensor/raw_random.py
浏览文件 @
af804be5
...
@@ -136,10 +136,7 @@ class RandomFunction(gof.Op):
...
@@ -136,10 +136,7 @@ class RandomFunction(gof.Op):
draw.
draw.
"""
"""
if
shape
==
()
or
shape
==
[]:
shape
=
tensor
.
as_tensor_variable
(
shape
,
ndim
=
1
)
shape
=
tensor
.
as_tensor_variable
(
shape
,
dtype
=
'int64'
)
else
:
shape
=
tensor
.
as_tensor_variable
(
shape
,
ndim
=
1
)
assert
shape
.
type
.
ndim
==
1
assert
shape
.
type
.
ndim
==
1
assert
(
shape
.
type
.
dtype
==
'int64'
)
or
(
shape
.
type
.
dtype
==
'int32'
)
assert
(
shape
.
type
.
dtype
==
'int64'
)
or
(
shape
.
type
.
dtype
==
'int32'
)
if
not
isinstance
(
r
.
type
,
RandomStateType
):
if
not
isinstance
(
r
.
type
,
RandomStateType
):
...
@@ -158,6 +155,22 @@ class RandomFunction(gof.Op):
...
@@ -158,6 +155,22 @@ class RandomFunction(gof.Op):
[
r
,
shape
]
+
args
,
[
r
,
shape
]
+
args
,
[
r
.
type
(),
self
.
outtype
()])
[
r
.
type
(),
self
.
outtype
()])
def
infer_shape
(
self
,
node
,
i_shapes
):
r
,
shp
=
node
.
inputs
[
0
:
2
]
#if shp is a constant array of len 0, then it means 'automatic shape'
unknown_shape
=
len
(
getattr
(
shp
,
'data'
,
[
0
,
1
,
2
]))
==
0
# if ndim_added == 0 and shape != () then shape
if
self
.
ndim_added
==
0
and
not
unknown_shape
:
sample_shp
=
shp
else
:
# if shape == () then it will depend on args
# if ndim_added != 0 and shape != () then it will depend on args
sample_shp
=
node
.
outputs
[
1
]
.
shape
return
[
None
,
[
sample_shp
[
i
]
for
i
in
xrange
(
node
.
outputs
[
1
]
.
ndim
)]]
def
perform
(
self
,
node
,
inputs
,
(
rout
,
out
)):
def
perform
(
self
,
node
,
inputs
,
(
rout
,
out
)):
# Use self.fn to draw shape worth of random numbers.
# Use self.fn to draw shape worth of random numbers.
# Numbers are drawn from r if self.inplace is True, and from a copy of r if
# Numbers are drawn from r if self.inplace is True, and from a copy of r if
...
...
theano/tensor/tests/test_opt.py
浏览文件 @
af804be5
...
@@ -89,7 +89,6 @@ class test_greedy_distribute(unittest.TestCase):
...
@@ -89,7 +89,6 @@ class test_greedy_distribute(unittest.TestCase):
g
=
Env
([
a
,
b
,
c
,
d
,
x
,
y
,
z
],
[
e
])
g
=
Env
([
a
,
b
,
c
,
d
,
x
,
y
,
z
],
[
e
])
##print pprint(g.outputs[0])
##print pprint(g.outputs[0])
mul_canonizer
.
optimize
(
g
)
mul_canonizer
.
optimize
(
g
)
gof
.
TopoOptimizer
(
gof
.
LocalOptGroup
(
local_fill_cut
,
local_fill_lift
),
order
=
'out_to_in'
)
.
optimize
(
g
)
gof
.
TopoOptimizer
(
gof
.
LocalOptGroup
(
local_greedy_distributor
),
order
=
'out_to_in'
)
.
optimize
(
g
)
gof
.
TopoOptimizer
(
gof
.
LocalOptGroup
(
local_greedy_distributor
),
order
=
'out_to_in'
)
.
optimize
(
g
)
##print pprint(g.outputs[0])
##print pprint(g.outputs[0])
...
@@ -136,7 +135,6 @@ class test_canonize(unittest.TestCase):
...
@@ -136,7 +135,6 @@ class test_canonize(unittest.TestCase):
g
=
Env
([
x
,
y
,
z
,
a
,
b
,
c
,
d
],
[
e
])
g
=
Env
([
x
,
y
,
z
,
a
,
b
,
c
,
d
],
[
e
])
print
pprint
(
g
.
outputs
[
0
])
print
pprint
(
g
.
outputs
[
0
])
mul_canonizer
.
optimize
(
g
)
mul_canonizer
.
optimize
(
g
)
gof
.
TopoOptimizer
(
gof
.
LocalOptGroup
(
local_fill_cut
,
local_fill_lift
),
order
=
'out_to_in'
)
.
optimize
(
g
)
print
pprint
(
g
.
outputs
[
0
])
print
pprint
(
g
.
outputs
[
0
])
def
test_elemwise_multiple_inputs_optimisation
(
self
):
def
test_elemwise_multiple_inputs_optimisation
(
self
):
...
@@ -296,17 +294,17 @@ class test_canonize(unittest.TestCase):
...
@@ -296,17 +294,17 @@ class test_canonize(unittest.TestCase):
def
test_multiple_case
(
self
):
def
test_multiple_case
(
self
):
""" test those case take from the comment in Canonizer
""" test those case take from the comment in Canonizer
x / x -> 1
x / x -> 1
(x * y) / x -> y
(x * y) / x -> y
x / y / x -> 1 / y
x / y / x -> 1 / y
x / y / z -> x / (y * z)
x / y / z -> x / (y * z)
x / (y / z) -> (x * z) / y
x / (y / z) -> (x * z) / y
(a / b) * (b / c) * (c / d) -> a / d
(a / b) * (b / c) * (c / d) -> a / d
(2.0 * x) / (4.0 * y) -> (0.5 * x) / y
(2.0 * x) / (4.0 * y) -> (0.5 * x) / y
2 * x / 2 -> x
2 * x / 2 -> x
with and without DimShuffle
with and without DimShuffle
TODO: with DimShuffle
TODO: with DimShuffle
"""
"""
import
theano.tensor
,
theano
.
compile
import
theano.tensor
,
theano
.
compile
shp
=
(
3
,
3
)
shp
=
(
3
,
3
)
...
@@ -331,6 +329,7 @@ class test_canonize(unittest.TestCase):
...
@@ -331,6 +329,7 @@ class test_canonize(unittest.TestCase):
old_optimizer
=
mode
.
_optimizer
old_optimizer
=
mode
.
_optimizer
try
:
try
:
mode
.
_optimizer
=
gof
.
Query
([
"canonicalize"
])
mode
.
_optimizer
=
gof
.
Query
([
"canonicalize"
])
mode
.
_optimizer
=
mode
.
_optimizer
.
including
(
'ShapeOpt'
)
mode
.
_optimizer
=
mode
.
_optimizer
.
excluding
(
'local_elemwise_fusion'
)
mode
.
_optimizer
=
mode
.
_optimizer
.
excluding
(
'local_elemwise_fusion'
)
#test x / x -> 1
#test x / x -> 1
...
@@ -344,10 +343,15 @@ class test_canonize(unittest.TestCase):
...
@@ -344,10 +343,15 @@ class test_canonize(unittest.TestCase):
out
=
f
(
*
val_inputs
)
out
=
f
(
*
val_inputs
)
assert
(
out
==
numpy
.
ones
(
shp
,
dtype
=
out_dtype
))
.
all
()
assert
(
out
==
numpy
.
ones
(
shp
,
dtype
=
out_dtype
))
.
all
()
topo
=
f
.
maker
.
env
.
toposort
()
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
1
if
sym_inputs
[
0
]
.
broadcastable
[
0
]:
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
len
(
topo
)
==
2
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,
theano
.
scalar
.
basic
.
Second
)
assert
isinstance
(
topo
[
0
]
.
op
,
Shape_i
)
assert
len
(
topo
[
0
]
.
inputs
)
==
2
assert
isinstance
(
topo
[
1
]
.
op
,
TT
.
Alloc
)
else
:
assert
len
(
topo
)
==
3
assert
isinstance
(
topo
[
0
]
.
op
,
Shape_i
)
assert
isinstance
(
topo
[
1
]
.
op
,
Shape_i
)
assert
isinstance
(
topo
[
2
]
.
op
,
TT
.
Alloc
)
assert
(
out_dtype
==
out
.
dtype
)
assert
(
out_dtype
==
out
.
dtype
)
#test (x * y) / x -> y
#test (x * y) / x -> y
...
@@ -365,10 +369,16 @@ class test_canonize(unittest.TestCase):
...
@@ -365,10 +369,16 @@ class test_canonize(unittest.TestCase):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
mode
=
mode
)
out
=
f
(
*
val_inputs
)
out
=
f
(
*
val_inputs
)
assert
(
out_dtype
==
out
.
dtype
)
assert
numpy
.
allclose
(
out
,
val_inputs
[
1
])
assert
numpy
.
allclose
(
out
,
val_inputs
[
1
])
topo
=
f
.
maker
.
env
.
toposort
()
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
nb_elemwise
print
"ID TOPO"
,
id
,
topo
,
sym_inputs
assert
(
out_dtype
==
out
.
dtype
)
for
r
,
t
in
f
.
maker
.
env
.
shape_feature
.
shape_of
.
items
():
print
' '
,
r
,
t
if
topo
:
for
node
in
topo
[:
-
1
]:
assert
isinstance
(
node
.
op
,
Shape_i
)
assert
isinstance
(
topo
[
-
1
]
.
op
,
TT
.
Alloc
)
#test x / y / x -> 1 / y
#test x / y / x -> 1 / y
for
id
,(
g
,
sym_inputs
,
val_inputs
,
nb_elemwise
,
out_dtype
)
in
enumerate
([
for
id
,(
g
,
sym_inputs
,
val_inputs
,
nb_elemwise
,
out_dtype
)
in
enumerate
([
...
@@ -378,19 +388,21 @@ class test_canonize(unittest.TestCase):
...
@@ -378,19 +388,21 @@ class test_canonize(unittest.TestCase):
((
fv
/
fy
)
/
fv
,[
fv
,
fy
],[
fvv
,
fyv
],
1
,
'float32'
),
((
fv
/
fy
)
/
fv
,[
fv
,
fy
],[
fvv
,
fyv
],
1
,
'float32'
),
#must broadcast as their is a dimshuffle in the computation
#must broadcast as their is a dimshuffle in the computation
((
dx
/
dv
)
/
dx
,[
dx
,
dv
],[
dxv
,
dvv
],
2
,
'float64'
),
((
dx
/
dv
)
/
dx
,[
dx
,
dv
],[
dxv
,
dvv
],
1
,
'float64'
),
#topo: [
Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0
)]
#topo: [
Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Alloc(...
)]
((
fx
/
fv
)
/
fx
,[
fx
,
fv
],[
fxv
,
fvv
],
2
,
'float32'
),
((
fx
/
fv
)
/
fx
,[
fx
,
fv
],[
fxv
,
fvv
],
1
,
'float32'
),
#topo:[
Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0
)]
#topo:[
Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Alloc(...
)]
]):
]):
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
f
=
compile
.
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
mode
=
mode
)
out
=
f
(
*
val_inputs
)
out
=
f
(
*
val_inputs
)
assert
numpy
.
allclose
(
out
,(
1
/
val_inputs
[
1
]))
assert
numpy
.
allclose
(
out
,(
1
/
val_inputs
[
1
]))
topo
=
f
.
maker
.
env
.
toposort
()
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
nb_elemwise
print
topo
assert
isinstance
(
topo
[
0
]
.
op
,(
T
.
Elemwise
,))
elem
=
[
t
for
t
in
topo
if
isinstance
(
t
.
op
,
T
.
Elemwise
)]
assert
isinstance
(
topo
[
0
]
.
op
.
scalar_op
,(
theano
.
scalar
.
basic
.
Inv
,
theano
.
scalar
.
basic
.
TrueDiv
))
assert
len
(
elem
)
==
nb_elemwise
assert
isinstance
(
elem
[
0
]
.
op
,(
T
.
Elemwise
,))
assert
isinstance
(
elem
[
0
]
.
op
.
scalar_op
,(
theano
.
scalar
.
basic
.
Inv
,
theano
.
scalar
.
basic
.
TrueDiv
))
assert
(
out_dtype
==
out
.
dtype
)
assert
(
out_dtype
==
out
.
dtype
)
#test (a / b) * (b / c) * (c / d) -> a / d
#test (a / b) * (b / c) * (c / d) -> a / d
...
@@ -529,29 +541,6 @@ def test_mixeddiv():
...
@@ -529,29 +541,6 @@ def test_mixeddiv():
d
=
dscalar
()
d
=
dscalar
()
assert
0
==
function
([
i
,
d
],
d
*
(
i
/
(
i
+
1
)))(
3
,
1.0
)
assert
0
==
function
([
i
,
d
],
d
*
(
i
/
(
i
+
1
)))(
3
,
1.0
)
def
test_local_shape_lift_dot
():
args_to_result
=
{
(
fvector
,
fvector
):
"[]"
,
(
fvector
,
fmatrix
):
"[<TensorType(float32, matrix)>.shape[1]]"
,
(
fmatrix
,
fvector
):
"[<TensorType(float32, matrix)>.shape[0]]"
,
(
fmatrix
,
fmatrix
):
"[<TensorType(float32, matrix)>.shape[0], <TensorType(float32, matrix)>.shape[1]]"
,
}
for
x
in
[
fvector
,
fmatrix
]:
for
y
in
[
fvector
,
fmatrix
]:
i
=
x
()
j
=
y
()
print
'I SHAPE'
,
i
.
type
.
shape
print
'J SHAPE'
,
j
.
type
.
shape
d
=
shape
(
dot
(
i
,
j
))
if
x
is
fvector
and
y
is
fvector
:
assert
d
==
()
else
:
g
=
Env
([
i
,
j
],
[
d
])
gof
.
TopoOptimizer
(
gof
.
LocalOptGroup
(
local_shape_lift_dot
),
order
=
'out_to_in'
)
.
optimize
(
g
)
print
pprint
(
g
.
outputs
[
0
]),
args_to_result
[(
x
,
y
)]
assert
pprint
(
g
.
outputs
[
0
])
==
args_to_result
[(
x
,
y
)]
def
test_const_type_in_mul_canonizer
():
def
test_const_type_in_mul_canonizer
():
input
=
dmatrix
()
input
=
dmatrix
()
w
=
dmatrix
()
w
=
dmatrix
()
...
@@ -915,11 +904,16 @@ def test_log1p():
...
@@ -915,11 +904,16 @@ def test_log1p():
# check trickier cases (and use different dtype)
# check trickier cases (and use different dtype)
y
=
fmatrix
()
y
=
fmatrix
()
f
=
function
([
x
,
y
],
T
.
log
(
fill
(
y
,
1
)
+
(
x
)),
mode
=
m
)
f
=
function
([
x
,
y
],
T
.
log
(
fill
(
y
,
1
)
+
(
x
)),
mode
=
m
)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
T
.
DimShuffle
([
False
],
[
'x'
,
0
],
True
),
T
.
log1p
,
T
.
fill
]
print
f
.
maker
.
env
.
toposort
()
# the first three ops are Shape_i, Shape_i, and Dimshuffle
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()][
3
:]
\
==
[
T
.
log1p
,
Alloc
(
'float64'
)]
f
=
function
([
x
,
y
],
T
.
log
(
0
+
(
x
)
+
fill
(
y
,
1.0
)),
mode
=
m
)
f
=
function
([
x
,
y
],
T
.
log
(
0
+
(
x
)
+
fill
(
y
,
1.0
)),
mode
=
m
)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
T
.
DimShuffle
([
False
],
[
'x'
,
0
],
True
),
T
.
log1p
,
T
.
fill
]
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()][
3
:]
\
==
[
T
.
log1p
,
Alloc
(
'float64'
)]
f
=
function
([
x
,
y
],
T
.
log
(
2
+
(
x
)
-
fill
(
y
,
1.0
)),
mode
=
m
)
f
=
function
([
x
,
y
],
T
.
log
(
2
+
(
x
)
-
fill
(
y
,
1.0
)),
mode
=
m
)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
T
.
DimShuffle
([
False
],
[
'x'
,
0
],
True
),
T
.
log1p
,
T
.
fill
]
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()][
3
:]
\
==
[
T
.
log1p
,
Alloc
(
'float64'
)]
f
([
1e-7
,
10
],
[[
0
,
0
],
[
0
,
0
]])
#debugmode will verify values
f
([
1e-7
,
10
],
[[
0
,
0
],
[
0
,
0
]])
#debugmode will verify values
...
@@ -969,6 +963,51 @@ class test_local_subtensor_unary(unittest.TestCase):
...
@@ -969,6 +963,51 @@ class test_local_subtensor_unary(unittest.TestCase):
f
([[
0
,
1
],[
2
,
3
]],
[
4
,
5
])
# let debugmode test something
f
([[
0
,
1
],[
2
,
3
]],
[
4
,
5
])
# let debugmode test something
def
test_local_fill_useless
():
m
=
theano
.
config
.
mode
if
m
==
'FAST_COMPILE'
:
m
=
'FAST_RUN'
x
=
dvector
()
y
=
dvector
()
z
=
lvector
()
# basic case
f
=
function
([
x
],
T
.
fill
(
x
,
x
)
*
2
,
mode
=
m
)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
T
.
mul
]
# basic case
f
=
function
([
x
,
y
],
T
.
second
(
y
,
x
)
*
2
,
mode
=
m
)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
T
.
mul
]
# now with different type
f
=
function
([
x
,
z
],
T
.
fill
(
z
,
x
)
*
2
,
mode
=
m
)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
T
.
mul
]
# now cutting out the input ??
f
=
function
([
x
,
y
],
T
.
fill
(
x
,
y
)
*
2
,
mode
=
m
)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
T
.
mul
]
# now filll is serving as a cast
f
=
function
([
x
,
y
],
T
.
fill
(
x
,
y
)
*
2
,
mode
=
m
)
assert
[
node
.
op
for
node
in
f
.
maker
.
env
.
toposort
()]
==
[
T
.
mul
]
class
test_shapeoptimizer
(
unittest
.
TestCase
):
def
test0
(
self
):
v
=
T
.
vector
()
m
=
T
.
matrix
()
f
=
function
([
v
,
m
],
(
v
+
m
)
.
shape
)
for
node
in
f
.
maker
.
env
.
toposort
():
assert
node
.
op
!=
T
.
add
def
test_constant
(
self
):
v
=
T
.
vector
()
m
=
T
.
matrix
()
f
=
function
([
v
,
m
],
v
.
dimshuffle
(
'x'
,
'x'
,
0
)
.
shape
[
1
])
print
f
.
maker
.
env
.
toposort
()
assert
[]
==
f
.
maker
.
env
.
toposort
()
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
# unittest.main()
# unittest.main()
test_fusion
()
.
tes_memory_leak
()
test_fusion
()
.
tes_memory_leak
()
...
...
theano/tensor/tests/test_shared_randomstreams.py
浏览文件 @
af804be5
...
@@ -352,7 +352,7 @@ class T_SharedRandomStreams(unittest.TestCase):
...
@@ -352,7 +352,7 @@ class T_SharedRandomStreams(unittest.TestCase):
def
test_vector_arguments
(
self
):
def
test_vector_arguments
(
self
):
random
=
RandomStreams
(
utt
.
fetch_seed
())
random
=
RandomStreams
(
utt
.
fetch_seed
())
low
=
tensor
.
vector
()
low
=
tensor
.
d
vector
()
out
=
random
.
uniform
(
low
=
low
,
high
=
1
)
out
=
random
.
uniform
(
low
=
low
,
high
=
1
)
assert
out
.
ndim
==
1
assert
out
.
ndim
==
1
f
=
function
([
low
],
out
)
f
=
function
([
low
],
out
)
...
@@ -402,8 +402,8 @@ class T_SharedRandomStreams(unittest.TestCase):
...
@@ -402,8 +402,8 @@ class T_SharedRandomStreams(unittest.TestCase):
def
test_broadcast_arguments
(
self
):
def
test_broadcast_arguments
(
self
):
random
=
RandomStreams
(
utt
.
fetch_seed
())
random
=
RandomStreams
(
utt
.
fetch_seed
())
low
=
tensor
.
vector
()
low
=
tensor
.
d
vector
()
high
=
tensor
.
col
()
high
=
tensor
.
d
col
()
out
=
random
.
uniform
(
low
=
low
,
high
=
high
)
out
=
random
.
uniform
(
low
=
low
,
high
=
high
)
assert
out
.
ndim
==
2
assert
out
.
ndim
==
2
f
=
function
([
low
,
high
],
out
)
f
=
function
([
low
,
high
],
out
)
...
@@ -424,8 +424,8 @@ class T_SharedRandomStreams(unittest.TestCase):
...
@@ -424,8 +424,8 @@ class T_SharedRandomStreams(unittest.TestCase):
def
test_uniform_vector
(
self
):
def
test_uniform_vector
(
self
):
random
=
RandomStreams
(
utt
.
fetch_seed
())
random
=
RandomStreams
(
utt
.
fetch_seed
())
low
=
tensor
.
vector
()
low
=
tensor
.
d
vector
()
high
=
tensor
.
vector
()
high
=
tensor
.
d
vector
()
out
=
random
.
uniform
(
low
=
low
,
high
=
high
)
out
=
random
.
uniform
(
low
=
low
,
high
=
high
)
assert
out
.
ndim
==
1
assert
out
.
ndim
==
1
f
=
function
([
low
,
high
],
out
)
f
=
function
([
low
,
high
],
out
)
...
@@ -438,11 +438,15 @@ class T_SharedRandomStreams(unittest.TestCase):
...
@@ -438,11 +438,15 @@ class T_SharedRandomStreams(unittest.TestCase):
# Arguments of size (3,)
# Arguments of size (3,)
val0
=
f
(
low_val
,
high_val
)
val0
=
f
(
low_val
,
high_val
)
numpy_val0
=
numpy_rng
.
uniform
(
low
=
low_val
,
high
=
high_val
)
numpy_val0
=
numpy_rng
.
uniform
(
low
=
low_val
,
high
=
high_val
)
print
'THEANO'
,
val0
print
'NUMPY'
,
numpy_val0
assert
numpy
.
all
(
val0
==
numpy_val0
)
assert
numpy
.
all
(
val0
==
numpy_val0
)
# arguments of size (2,)
# arguments of size (2,)
val1
=
f
(
low_val
[:
-
1
],
high_val
[:
-
1
])
val1
=
f
(
low_val
[:
-
1
],
high_val
[:
-
1
])
numpy_val1
=
numpy_rng
.
uniform
(
low
=
low_val
[:
-
1
],
high
=
high_val
[:
-
1
])
numpy_val1
=
numpy_rng
.
uniform
(
low
=
low_val
[:
-
1
],
high
=
high_val
[:
-
1
])
print
'THEANO'
,
val1
print
'NUMPY'
,
numpy_val1
assert
numpy
.
all
(
val1
==
numpy_val1
)
assert
numpy
.
all
(
val1
==
numpy_val1
)
# Specifying the size explicitly
# Specifying the size explicitly
...
@@ -486,8 +490,8 @@ class T_SharedRandomStreams(unittest.TestCase):
...
@@ -486,8 +490,8 @@ class T_SharedRandomStreams(unittest.TestCase):
def
test_normal_vector
(
self
):
def
test_normal_vector
(
self
):
random
=
RandomStreams
(
utt
.
fetch_seed
())
random
=
RandomStreams
(
utt
.
fetch_seed
())
avg
=
tensor
.
vector
()
avg
=
tensor
.
d
vector
()
std
=
tensor
.
vector
()
std
=
tensor
.
d
vector
()
out
=
random
.
normal
(
avg
=
avg
,
std
=
std
)
out
=
random
.
normal
(
avg
=
avg
,
std
=
std
)
assert
out
.
ndim
==
1
assert
out
.
ndim
==
1
f
=
function
([
avg
,
std
],
out
)
f
=
function
([
avg
,
std
],
out
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论