Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
7885e618
提交
7885e618
authored
9月 11, 2012
作者:
Ian Goodfellow
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
a lot of pep8
上级
c7d06ac9
隐藏空白字符变更
内嵌
并排
正在显示
8 个修改的文件
包含
272 行增加
和
193 行删除
+272
-193
sigm.py
theano/tensor/nnet/sigm.py
+32
-20
randomstreams.py
theano/tensor/randomstreams.py
+2
-2
test_basic.py
theano/tensor/tests/test_basic.py
+0
-0
test_elemwise.py
theano/tensor/tests/test_elemwise.py
+35
-23
test_naacl09.py
theano/tensor/tests/test_naacl09.py
+110
-73
test_opt.py
theano/tensor/tests/test_opt.py
+0
-0
test_gradient.py
theano/tests/test_gradient.py
+86
-67
test_rop.py
theano/tests/test_rop.py
+7
-8
没有找到文件。
theano/tensor/nnet/sigm.py
浏览文件 @
7885e618
...
@@ -42,7 +42,7 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
...
@@ -42,7 +42,7 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
assert
rval
.
type
.
dtype
.
find
(
'float'
)
!=
-
1
assert
rval
.
type
.
dtype
.
find
(
'float'
)
!=
-
1
return
[
rval
]
return
[
rval
]
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
x
,
=
inp
x
,
=
inp
...
@@ -69,7 +69,7 @@ sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
...
@@ -69,7 +69,7 @@ sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
sigmoid_inplace
=
elemwise
.
Elemwise
(
sigmoid_inplace
=
elemwise
.
Elemwise
(
ScalarSigmoid
(
scalar
.
transfer_type
(
0
)),
ScalarSigmoid
(
scalar
.
transfer_type
(
0
)),
inplace_pattern
=
{
0
:
0
},
inplace_pattern
=
{
0
:
0
},
name
=
'sigmoid_inplace'
,
name
=
'sigmoid_inplace'
,
)
)
...
@@ -84,12 +84,15 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
...
@@ -84,12 +84,15 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
if
x
>
30.0
:
if
x
>
30.0
:
return
x
return
x
return
numpy
.
log1p
(
numpy
.
exp
(
x
))
return
numpy
.
log1p
(
numpy
.
exp
(
x
))
def
impl
(
self
,
x
):
def
impl
(
self
,
x
):
return
ScalarSoftplus
.
static_impl
(
x
)
return
ScalarSoftplus
.
static_impl
(
x
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
x
,
=
inp
x
,
=
inp
gz
,
=
grads
gz
,
=
grads
return
[
gz
*
scalar_sigmoid
(
x
)]
return
[
gz
*
scalar_sigmoid
(
x
)]
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
x
,
=
inp
x
,
=
inp
z
,
=
out
z
,
=
out
...
@@ -103,27 +106,29 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
...
@@ -103,27 +106,29 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
return
"""
%(z)
s =
%(x)
s < -745.0 ? 0.0 :
%(x)
s > 16.0 ?
%(x)
s : log1p(exp(
%(x)
s));"""
%
locals
()
return
"""
%(z)
s =
%(x)
s < -745.0 ? 0.0 :
%(x)
s > 16.0 ?
%(x)
s : log1p(exp(
%(x)
s));"""
%
locals
()
else
:
else
:
raise
NotImplementedError
(
'only floatingpoint is implemented'
)
raise
NotImplementedError
(
'only floatingpoint is implemented'
)
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
v
=
super
(
ScalarSoftplus
,
self
)
.
c_code_cache_version
()
v
=
super
(
ScalarSoftplus
,
self
)
.
c_code_cache_version
()
if
v
:
if
v
:
return
(
2
,)
+
v
return
(
2
,)
+
v
else
:
else
:
return
v
return
v
scalar_softplus
=
ScalarSoftplus
(
scalar
.
upgrade_to_float
,
name
=
'scalar_softplus'
)
scalar_softplus
=
ScalarSoftplus
(
scalar
.
upgrade_to_float
,
name
=
'scalar_softplus'
)
softplus
=
elemwise
.
Elemwise
(
scalar_softplus
,
name
=
'softplus'
)
softplus
=
elemwise
.
Elemwise
(
scalar_softplus
,
name
=
'softplus'
)
pprint
.
assign
(
softplus
,
printing
.
FunctionPrinter
(
'softplus'
))
pprint
.
assign
(
softplus
,
printing
.
FunctionPrinter
(
'softplus'
))
def
_skip_mul_1
(
r
):
def
_skip_mul_1
(
r
):
if
r
.
owner
and
r
.
owner
.
op
==
tensor
.
mul
:
if
r
.
owner
and
r
.
owner
.
op
==
tensor
.
mul
:
not_is_1
=
[
i
for
i
in
r
.
owner
.
inputs
if
not
_is_1
(
i
)
]
not_is_1
=
[
i
for
i
in
r
.
owner
.
inputs
if
not
_is_1
(
i
)]
if
len
(
not_is_1
)
==
1
:
if
len
(
not_is_1
)
==
1
:
return
not_is_1
[
0
]
return
not_is_1
[
0
]
logsigm_to_softplus
=
gof
.
PatternSub
(
logsigm_to_softplus
=
gof
.
PatternSub
(
(
tensor
.
log
,
(
sigmoid
,
'x'
)),
(
tensor
.
log
,
(
sigmoid
,
'x'
)),
(
tensor
.
neg
,
(
softplus
,
(
tensor
.
neg
,
'x'
))),
(
tensor
.
neg
,
(
softplus
,
(
tensor
.
neg
,
'x'
))),
allow_multiple_clients
=
True
,
allow_multiple_clients
=
True
,
skip_identities_fn
=
_skip_mul_1
)
skip_identities_fn
=
_skip_mul_1
)
...
@@ -139,21 +144,22 @@ def _is_1(expr):
...
@@ -139,21 +144,22 @@ def _is_1(expr):
log1msigm_to_softplus
=
gof
.
PatternSub
(
log1msigm_to_softplus
=
gof
.
PatternSub
(
(
tensor
.
log
,
(
tensor
.
log
,
(
tensor
.
sub
,
(
tensor
.
sub
,
dict
(
pattern
=
'y'
,
constraint
=
_is_1
),
dict
(
pattern
=
'y'
,
constraint
=
_is_1
),
(
sigmoid
,
'x'
))),
(
sigmoid
,
'x'
))),
(
tensor
.
neg
,
(
softplus
,
'x'
)),
(
tensor
.
neg
,
(
softplus
,
'x'
)),
allow_multiple_clients
=
True
,
allow_multiple_clients
=
True
,
skip_identities_fn
=
_skip_mul_1
)
skip_identities_fn
=
_skip_mul_1
)
log1pexp_to_softplus
=
gof
.
PatternSub
(
log1pexp_to_softplus
=
gof
.
PatternSub
(
(
tensor
.
log1p
,
(
tensor
.
log1p
,
(
tensor
.
exp
,
'x'
)),
(
tensor
.
exp
,
'x'
)),
(
softplus
,
'x'
),
(
softplus
,
'x'
),
allow_multiple_clients
=
True
)
allow_multiple_clients
=
True
)
opt
.
register_stabilize
(
logsigm_to_softplus
,
name
=
'logsigm_to_softplus'
)
opt
.
register_stabilize
(
log1msigm_to_softplus
,
name
=
'log1msigm_to_softplus'
)
opt
.
register_stabilize
(
log1pexp_to_softplus
,
name
=
'log1pexp_to_softplus'
)
opt
.
register_stabilize
(
logsigm_to_softplus
,
name
=
'logsigm_to_softplus'
)
opt
.
register_stabilize
(
log1msigm_to_softplus
,
name
=
'log1msigm_to_softplus'
)
opt
.
register_stabilize
(
log1pexp_to_softplus
,
name
=
'log1pexp_to_softplus'
)
def
is_1pexp
(
t
):
def
is_1pexp
(
t
):
"""
"""
...
@@ -247,7 +253,7 @@ def partition_num_or_denom(r, f):
...
@@ -247,7 +253,7 @@ def partition_num_or_denom(r, f):
else
:
else
:
neg_t
,
f_t
=
f_t
neg_t
,
f_t
=
f_t
f_terms
.
append
(
f_t
)
f_terms
.
append
(
f_t
)
neg
^=
neg_t
#
bit flip if neg_t is true
neg
^=
neg_t
#
bit flip if neg_t is true
return
f_terms
,
rest
,
neg
return
f_terms
,
rest
,
neg
...
@@ -299,7 +305,8 @@ def local_exp_over_1_plus_exp(node):
...
@@ -299,7 +305,8 @@ def local_exp_over_1_plus_exp(node):
#find all the exp() terms in the numerator
#find all the exp() terms in the numerator
num
,
denom
=
node
.
inputs
num
,
denom
=
node
.
inputs
num_exp_x
,
num_rest
,
num_neg
=
partition_num_or_denom
(
num
,
is_exp
)
num_exp_x
,
num_rest
,
num_neg
=
partition_num_or_denom
(
num
,
is_exp
)
denom_1pexp
,
denom_rest
,
denom_neg
=
partition_num_or_denom
(
denom
,
is_1pexp
)
denom_1pexp
,
denom_rest
,
denom_neg
=
partition_num_or_denom
(
denom
,
is_1pexp
)
sigmoids
=
[]
sigmoids
=
[]
for
t
in
denom_1pexp
:
for
t
in
denom_1pexp
:
...
@@ -311,7 +318,7 @@ def local_exp_over_1_plus_exp(node):
...
@@ -311,7 +318,7 @@ def local_exp_over_1_plus_exp(node):
# case: 1/(1+exp(x))
# case: 1/(1+exp(x))
sigmoids
.
append
(
sigmoid
(
-
t
))
sigmoids
.
append
(
sigmoid
(
-
t
))
if
not
sigmoids
:
# we didn't find any. abort
if
not
sigmoids
:
# we didn't find any. abort
return
return
# put the new numerator together
# put the new numerator together
new_num
=
sigmoids
+
[
tensor
.
exp
(
t
)
for
t
in
num_exp_x
]
+
num_rest
new_num
=
sigmoids
+
[
tensor
.
exp
(
t
)
for
t
in
num_exp_x
]
+
num_rest
...
@@ -330,6 +337,7 @@ def local_exp_over_1_plus_exp(node):
...
@@ -330,6 +337,7 @@ def local_exp_over_1_plus_exp(node):
else
:
else
:
return
[
new_num
/
tensor
.
mul
(
*
denom_rest
)]
return
[
new_num
/
tensor
.
mul
(
*
denom_rest
)]
def
parse_mul_tree
(
root
):
def
parse_mul_tree
(
root
):
"""
"""
Parse a tree of multiplications starting at the given root.
Parse a tree of multiplications starting at the given root.
...
@@ -512,7 +520,7 @@ def perform_sigm_times_exp(tree, exp_x=None, exp_minus_x=None, sigm_x=None,
...
@@ -512,7 +520,7 @@ def perform_sigm_times_exp(tree, exp_x=None, exp_minus_x=None, sigm_x=None,
sigm_minus_x
=
[]
sigm_minus_x
=
[]
if
full_tree
is
None
:
if
full_tree
is
None
:
full_tree
=
tree
full_tree
=
tree
if
False
:
# Debug code.
if
False
:
# Debug code.
print
'<perform_sigm_times_exp>'
print
'<perform_sigm_times_exp>'
print
' full_tree =
%
s'
%
full_tree
print
' full_tree =
%
s'
%
full_tree
print
' tree =
%
s'
%
tree
print
' tree =
%
s'
%
tree
...
@@ -621,10 +629,13 @@ def local_inv_1_plus_exp(node):
...
@@ -621,10 +629,13 @@ def local_inv_1_plus_exp(node):
if
nonconsts
[
0
]
.
owner
and
nonconsts
[
0
]
.
owner
.
op
==
tensor
.
exp
:
if
nonconsts
[
0
]
.
owner
and
nonconsts
[
0
]
.
owner
.
op
==
tensor
.
exp
:
if
scalars
and
numpy
.
allclose
(
numpy
.
sum
(
scalars
),
1
):
if
scalars
and
numpy
.
allclose
(
numpy
.
sum
(
scalars
),
1
):
return
opt
.
_fill_chain
(
return
opt
.
_fill_chain
(
sigmoid
(
tensor
.
neg
(
nonconsts
[
0
]
.
owner
.
inputs
[
0
])),
sigmoid
(
tensor
.
neg
(
nonconsts
[
0
]
.
owner
.
inputs
[
0
])),
scalar_inputs
)
scalar_inputs
)
# Registration is below, and conditional.
# Registration is below, and conditional.
@gof.local_optimizer
([
tensor
.
sub
])
@gof.local_optimizer
([
tensor
.
sub
])
def
local_1msigmoid
(
node
):
def
local_1msigmoid
(
node
):
"""
"""
...
@@ -633,7 +644,7 @@ def local_1msigmoid(node):
...
@@ -633,7 +644,7 @@ def local_1msigmoid(node):
if
node
.
op
==
tensor
.
sub
:
if
node
.
op
==
tensor
.
sub
:
sub_l
,
sub_r
=
node
.
inputs
sub_l
,
sub_r
=
node
.
inputs
if
len
(
sub_r
.
clients
)
>
1
:
if
len
(
sub_r
.
clients
)
>
1
:
return
# graph is using both sigm and 1-sigm
return
# graph is using both sigm and 1-sigm
if
sub_r
.
owner
and
sub_r
.
owner
.
op
==
sigmoid
:
if
sub_r
.
owner
and
sub_r
.
owner
.
op
==
sigmoid
:
try
:
try
:
val_l
=
opt
.
get_constant_value
(
sub_l
)
val_l
=
opt
.
get_constant_value
(
sub_l
)
...
@@ -686,13 +697,14 @@ if 0:
...
@@ -686,13 +697,14 @@ if 0:
assert
t0
.
owner
.
op
==
div
assert
t0
.
owner
.
op
==
div
t0top
,
t0bot
=
t0
.
owner
.
inputs
t0top
,
t0bot
=
t0
.
owner
.
inputs
t1top
,
t1bot
=
t1
.
owner
.
inputs
t1top
,
t1bot
=
t1
.
owner
.
inputs
rval
.
append
(
div
(
mul
(
*
(
t0top
+
t1top
)),
mul
(
*
(
t0bot
+
t1bot
))))
rval
.
append
(
div
(
mul
(
*
(
t0top
+
t1top
)),
mul
(
*
(
t0bot
+
t1bot
))))
if
len
(
rval
)
>
100
:
if
len
(
rval
)
>
100
:
# This loop can be exponentially long.
# This loop can be exponentially long.
# aborting
# aborting
return
[]
return
[]
elif
len
(
node
.
outputs
)
>
1
:
elif
len
(
node
.
outputs
)
>
1
:
return
[]
return
[]
else
:
else
:
return
[
node
.
outputs
[
0
]]
return
[
node
.
outputs
[
0
]]
theano/tensor/randomstreams.py
浏览文件 @
7885e618
...
@@ -136,7 +136,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
...
@@ -136,7 +136,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
"""
"""
def
__init__
(
self
,
seed
=
None
,
no_warn
=
False
):
def
__init__
(
self
,
seed
=
None
,
no_warn
=
False
):
""":type seed: None or int
""":type seed: None or int
:param seed: a default seed to initialize the RandomState
:param seed: a default seed to initialize the RandomState
...
@@ -146,7 +146,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
...
@@ -146,7 +146,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
"""
"""
if
not
no_warn
:
if
not
no_warn
:
deprecation_warning
()
deprecation_warning
()
super
(
RandomStreams
,
self
)
.
__init__
(
no_warn
=
True
)
super
(
RandomStreams
,
self
)
.
__init__
(
no_warn
=
True
)
self
.
random_state_variables
=
[]
self
.
random_state_variables
=
[]
self
.
default_instance_seed
=
seed
self
.
default_instance_seed
=
seed
...
...
theano/tensor/tests/test_basic.py
浏览文件 @
7885e618
This source diff could not be displayed because it is too large. You can
view the blob
instead.
theano/tensor/tests/test_elemwise.py
浏览文件 @
7885e618
...
@@ -47,7 +47,8 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
...
@@ -47,7 +47,8 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
#test that DimShuffle.infer_shape work correctly
#test that DimShuffle.infer_shape work correctly
x
=
TensorType
(
'float64'
,
ib
)(
'x'
)
x
=
TensorType
(
'float64'
,
ib
)(
'x'
)
e
=
DimShuffle
(
ib
,
shuffle
)(
x
)
e
=
DimShuffle
(
ib
,
shuffle
)(
x
)
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
],
[
e
.
shape
]))
.
make_function
()
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
],
[
e
.
shape
]))
.
make_function
()
assert
all
(
f
(
numpy
.
ones
(
xsh
)))
==
all
(
zsh
)
assert
all
(
f
(
numpy
.
ones
(
xsh
)))
==
all
(
zsh
)
# Test when we drop a axis that is not broadcastable
# Test when we drop a axis that is not broadcastable
...
@@ -125,7 +126,8 @@ class test_Broadcast(unittest.TestCase):
...
@@ -125,7 +126,8 @@ class test_Broadcast(unittest.TestCase):
x
=
TensorType
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
x
=
TensorType
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
y
=
TensorType
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
y
=
TensorType
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
e
=
Elemwise
(
scalar
.
add
)(
x
,
y
)
e
=
Elemwise
(
scalar
.
add
)(
x
,
y
)
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
.
shape
]))
.
make_function
()
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
.
shape
]))
.
make_function
()
assert
tuple
(
f
(
xv
,
yv
))
==
tuple
(
zv
.
shape
)
assert
tuple
(
f
(
xv
,
yv
))
==
tuple
(
zv
.
shape
)
def
with_linker_inplace
(
self
,
linker
):
def
with_linker_inplace
(
self
,
linker
):
...
@@ -154,7 +156,8 @@ class test_Broadcast(unittest.TestCase):
...
@@ -154,7 +156,8 @@ class test_Broadcast(unittest.TestCase):
x
=
TensorType
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
x
=
TensorType
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
y
=
TensorType
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
y
=
TensorType
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
e
=
Elemwise
(
scalar
.
Add
(
scalar
.
transfer_type
(
0
)),
{
0
:
0
})(
x
,
y
)
e
=
Elemwise
(
scalar
.
Add
(
scalar
.
transfer_type
(
0
)),
{
0
:
0
})(
x
,
y
)
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
.
shape
]))
.
make_function
()
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
.
shape
]))
.
make_function
()
xv
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
xsh
))
xv
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
xsh
))
yv
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
ysh
))
yv
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
ysh
))
zv
=
xv
+
yv
zv
=
xv
+
yv
...
@@ -349,7 +352,8 @@ class test_CAReduce(unittest_tools.InferShapeTester):
...
@@ -349,7 +352,8 @@ class test_CAReduce(unittest_tools.InferShapeTester):
e
=
tensor_op
(
x
,
axis
=
tosum
)
e
=
tensor_op
(
x
,
axis
=
tosum
)
if
tosum
is
None
:
if
tosum
is
None
:
tosum
=
range
(
len
(
xsh
))
tosum
=
range
(
len
(
xsh
))
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
],
[
e
.
shape
]))
.
make_function
()
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
],
[
e
.
shape
]))
.
make_function
()
if
not
(
scalar_op
in
[
scalar
.
maximum
,
scalar
.
minimum
]
and
if
not
(
scalar_op
in
[
scalar
.
maximum
,
scalar
.
minimum
]
and
((
xsh
==
()
or
numpy
.
prod
(
xsh
)
==
0
))):
((
xsh
==
()
or
numpy
.
prod
(
xsh
)
==
0
))):
assert
all
(
f
(
xv
)
==
zv
.
shape
)
assert
all
(
f
(
xv
)
==
zv
.
shape
)
...
@@ -459,7 +463,8 @@ class test_Prod(unittest.TestCase):
...
@@ -459,7 +463,8 @@ class test_Prod(unittest.TestCase):
# including zeros, as the case with zeros is important
# including zeros, as the case with zeros is important
# (and special cases: 1 zero in the row, more than 1 zero in the row)
# (and special cases: 1 zero in the row, more than 1 zero in the row)
x_val
=
numpy
.
asarray
([[
1
,
2
,
3
],[
4
,
5
,
6
],[
7
,
8
,
9
]],
dtype
=
'float32'
)
x_val
=
numpy
.
asarray
([[
1
,
2
,
3
],
[
4
,
5
,
6
],
[
7
,
8
,
9
]],
dtype
=
'float32'
)
x
=
theano
.
tensor
.
dmatrix
()
x
=
theano
.
tensor
.
dmatrix
()
# now with verify_grad
# now with verify_grad
unittest_tools
.
verify_grad
(
Prod
(
axis
=
1
),
[
x_val
],
mode
=
self
.
mode
)
unittest_tools
.
verify_grad
(
Prod
(
axis
=
1
),
[
x_val
],
mode
=
self
.
mode
)
...
@@ -471,26 +476,28 @@ class test_Prod(unittest.TestCase):
...
@@ -471,26 +476,28 @@ class test_Prod(unittest.TestCase):
unittest_tools
.
verify_grad
(
fn
,
[
x_val
],
mode
=
self
.
mode
)
unittest_tools
.
verify_grad
(
fn
,
[
x_val
],
mode
=
self
.
mode
)
def
test_verify_grad_with_zeros
(
self
):
def
test_verify_grad_with_zeros
(
self
):
# including zeros, as the case with zeros is important
# including zeros, as the case with zeros is important
# (and special cases: 1 zero in the row, more than 1 zero in the row)
# (and special cases: 1 zero in the row, more than 1 zero in the row)
x_val
=
numpy
.
asarray
([[
1.
,
2.
,
3.
],[
0.
,
5.
,
6.
],[
0.
,
0.
,
9.
]],
dtype
=
'float32'
)
x_val
=
numpy
.
asarray
([[
1.
,
2.
,
3.
],
[
0.
,
5.
,
6.
],
[
0.
,
0.
,
9.
]],
dtype
=
'float32'
)
x
=
theano
.
tensor
.
dmatrix
()
x
=
theano
.
tensor
.
dmatrix
()
# sanity check
# sanity check
x2
=
theano
.
tensor
.
dmatrix
()
x2
=
theano
.
tensor
.
dmatrix
()
p
=
Prod
(
axis
=
1
)(
x
)
p
=
Prod
(
axis
=
1
)(
x
)
p2
=
Prod
(
axis
=
1
)(
x2
)
p2
=
Prod
(
axis
=
1
)(
x2
)
fn
=
theano
.
function
([
x
,
x2
],[
p
-
p2
],
mode
=
self
.
mode
)
fn
=
theano
.
function
([
x
,
x2
],
[
p
-
p2
],
mode
=
self
.
mode
)
#print "hand computed diff for each row"
#print "hand computed diff for each row"
x2_val
=
numpy
.
asarray
([[
1.
,
2.
,
3.003
],
[
0.003
,
5.
,
6
],
[
0.
,
0.
,
9.01
]])
x2_val
=
numpy
.
asarray
([[
1.
,
2.
,
3.003
],
[
0.003
,
5.
,
6
],
[
0.
,
0.
,
9.01
]])
#print fn(x_val, x2_val)
#print fn(x_val, x2_val)
fn2
=
theano
.
function
([
x
],[
theano
.
tensor
.
grad
(
p
.
sum
(),
x
)],
mode
=
self
.
mode
)
fn2
=
theano
.
function
([
x
],
[
theano
.
tensor
.
grad
(
p
.
sum
(),
x
)],
mode
=
self
.
mode
)
#print "real grad"
#print "real grad"
#print fn2(x_val)
#print fn2(x_val)
fn3
=
theano
.
function
([
x
],[
p
],
mode
=
self
.
mode
)
fn3
=
theano
.
function
([
x
],
[
p
],
mode
=
self
.
mode
)
assert
numpy
.
allclose
(
fn3
(
x_val
),
[
6.
,
0.
,
0.
])
assert
numpy
.
allclose
(
fn3
(
x_val
),
[
6.
,
0.
,
0.
])
# now with verify_grad
# now with verify_grad
unittest_tools
.
verify_grad
(
Prod
(
axis
=
1
),
[
x_val
],
mode
=
self
.
mode
)
unittest_tools
.
verify_grad
(
Prod
(
axis
=
1
),
[
x_val
],
mode
=
self
.
mode
)
...
@@ -511,10 +518,10 @@ class test_Prod(unittest.TestCase):
...
@@ -511,10 +518,10 @@ class test_Prod(unittest.TestCase):
def
test_prod_without_zeros
(
self
):
def
test_prod_without_zeros
(
self
):
x
=
theano
.
tensor
.
dmatrix
()
x
=
theano
.
tensor
.
dmatrix
()
x_val
=
numpy
.
array
([[
1
,
2
,
3
],[
0
,
5
,
6
],[
0
,
0
,
9
]],
dtype
=
'float32'
)
x_val
=
numpy
.
array
([[
1
,
2
,
3
],
[
0
,
5
,
6
],
[
0
,
0
,
9
]],
dtype
=
'float32'
)
pwz
=
ProdWithoutZeros
(
axis
=
1
)(
x
)
pwz
=
ProdWithoutZeros
(
axis
=
1
)(
x
)
fn
=
theano
.
function
([
x
],
pwz
,
mode
=
self
.
mode
)
fn
=
theano
.
function
([
x
],
pwz
,
mode
=
self
.
mode
)
assert
numpy
.
allclose
(
fn
(
x_val
),
[
6
,
30
,
9
])
assert
numpy
.
allclose
(
fn
(
x_val
),
[
6
,
30
,
9
])
pwz_a0
=
ProdWithoutZeros
(
axis
=
0
)(
x
)
pwz_a0
=
ProdWithoutZeros
(
axis
=
0
)(
x
)
fn_a0
=
theano
.
function
([
x
],
pwz_a0
,
mode
=
self
.
mode
)
fn_a0
=
theano
.
function
([
x
],
pwz_a0
,
mode
=
self
.
mode
)
...
@@ -522,25 +529,30 @@ class test_Prod(unittest.TestCase):
...
@@ -522,25 +529,30 @@ class test_Prod(unittest.TestCase):
def
test_other_grad_tests
(
self
):
def
test_other_grad_tests
(
self
):
x
=
theano
.
tensor
.
dmatrix
()
x
=
theano
.
tensor
.
dmatrix
()
x_val1
=
numpy
.
array
([[
1
,
2
,
3
],[
0
,
5
,
6
],[
0
,
0
,
9
]],
dtype
=
'float32'
)
x_val1
=
numpy
.
array
([[
1
,
2
,
3
],
[
0
,
5
,
6
],
[
0
,
0
,
9
]],
x_val2
=
numpy
.
array
([[
1
,
2
,
0
],[
0
,
5
,
6
],[
7
,
8
,
9
],[
9
,
10
,
0
]],
dtype
=
'float32'
)
dtype
=
'float32'
)
x_val2
=
numpy
.
array
([[
1
,
2
,
0
],
[
0
,
5
,
6
],
[
7
,
8
,
9
],
[
9
,
10
,
0
]],
dtype
=
'float32'
)
rng
=
rng
=
numpy
.
random
.
RandomState
(
43
)
rng
=
rng
=
numpy
.
random
.
RandomState
(
43
)
p
=
Prod
(
axis
=
1
)
p
=
Prod
(
axis
=
1
)
grad_p
=
theano
.
tensor
.
grad
(
p
(
x
)
.
sum
(),
x
)
grad_p
=
theano
.
tensor
.
grad
(
p
(
x
)
.
sum
(),
x
)
grad_fn
=
theano
.
function
([
x
],
grad_p
,
mode
=
self
.
mode
)
grad_fn
=
theano
.
function
([
x
],
grad_p
,
mode
=
self
.
mode
)
assert
numpy
.
allclose
(
grad_fn
(
x_val1
),
[[
6.
,
3.
,
2.
],[
30.
,
0.
,
0.
],[
0.
,
0.
,
0.
]])
assert
numpy
.
allclose
(
grad_fn
(
x_val1
),
[[
6.
,
3.
,
2.
],
[
30.
,
0.
,
assert
numpy
.
allclose
(
grad_fn
(
x_val2
),
[[
0.
,
0.
,
2.
],
[
30.
,
0.
,
0.
],
[
72.
,
63.
,
56.
],
[
0.
,
0.
,
90.
]])
0.
],
[
0.
,
0.
,
0.
]])
assert
numpy
.
allclose
(
grad_fn
(
x_val2
),
[[
0.
,
0.
,
2.
],
[
30.
,
0.
,
0.
],
[
72.
,
63.
,
56.
],
[
0.
,
0.
,
90.
]])
p_axis0
=
Prod
(
axis
=
0
)
p_axis0
=
Prod
(
axis
=
0
)
grad_p_axis0
=
theano
.
tensor
.
grad
(
p_axis0
(
x
)
.
sum
(),
x
)
grad_p_axis0
=
theano
.
tensor
.
grad
(
p_axis0
(
x
)
.
sum
(),
x
)
grad_fn_axis0
=
theano
.
function
([
x
],
grad_p_axis0
,
mode
=
self
.
mode
)
grad_fn_axis0
=
theano
.
function
([
x
],
grad_p_axis0
,
mode
=
self
.
mode
)
assert
numpy
.
allclose
(
grad_fn_axis0
(
x_val2
),
[[
0.
,
400.
,
0.
],[
63.
,
160.
,
0.
],
[
0.
,
100.
,
0.
],
[
0.
,
80.
,
0.
]])
assert
numpy
.
allclose
(
grad_fn_axis0
(
x_val2
),
[[
0.
,
400.
,
0.
],
[
63.
,
160.
,
0.
],
[
0.
,
100.
,
0.
],
[
0.
,
80.
,
0.
]])
tensor
.
verify_grad
(
p
,
[
x_val1
],
rng
=
rng
,
mode
=
self
.
mode
)
tensor
.
verify_grad
(
p
,
[
x_val1
],
rng
=
rng
,
mode
=
self
.
mode
)
def
test_mul_without_zeros_zeros
(
self
):
def
test_mul_without_zeros_zeros
(
self
):
a
=
numpy
.
zeros
((
3
,
3
))
a
=
numpy
.
zeros
((
3
,
3
))
x
=
theano
.
tensor
.
dmatrix
()
x
=
theano
.
tensor
.
dmatrix
()
...
@@ -655,6 +667,7 @@ class T_sum_dtype(unittest.TestCase):
...
@@ -655,6 +667,7 @@ class T_sum_dtype(unittest.TestCase):
idx
+=
1
idx
+=
1
class
T_mean_dtype
(
unittest
.
TestCase
):
class
T_mean_dtype
(
unittest
.
TestCase
):
def
test_mean_default_dtype
(
self
):
def
test_mean_default_dtype
(
self
):
"""
"""
...
@@ -710,6 +723,7 @@ class T_mean_dtype(unittest.TestCase):
...
@@ -710,6 +723,7 @@ class T_mean_dtype(unittest.TestCase):
idx
+=
1
idx
+=
1
class
T_prod_dtype
(
unittest
.
TestCase
):
class
T_prod_dtype
(
unittest
.
TestCase
):
def
test_prod_default_dtype
(
self
):
def
test_prod_default_dtype
(
self
):
"""
"""
...
@@ -761,6 +775,7 @@ class T_prod_dtype(unittest.TestCase):
...
@@ -761,6 +775,7 @@ class T_prod_dtype(unittest.TestCase):
idx
+=
1
idx
+=
1
class
T_prod_without_zeros_dtype
(
unittest
.
TestCase
):
class
T_prod_without_zeros_dtype
(
unittest
.
TestCase
):
def
test_prod_without_zeros_default_dtype
(
self
):
def
test_prod_without_zeros_default_dtype
(
self
):
"""
"""
...
@@ -844,11 +859,8 @@ if __name__ == '__main__':
...
@@ -844,11 +859,8 @@ if __name__ == '__main__':
"""
"""
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
t
=
TestElemwise
(
'setUp'
)
t
=
TestElemwise
(
'setUp'
)
t
.
setUp
()
t
.
setUp
()
t
.
test_infer_shape
()
t
.
test_infer_shape
()
theano/tensor/tests/test_naacl09.py
浏览文件 @
7885e618
...
@@ -12,15 +12,19 @@ import sys
...
@@ -12,15 +12,19 @@ import sys
from
theano.tests
import
unittest_tools
from
theano.tests
import
unittest_tools
from
numpy.testing.noseclasses
import
KnownFailureTest
from
numpy.testing.noseclasses
import
KnownFailureTest
def
cross_entropy
(
target
,
output
,
axis
=
1
):
def
cross_entropy
(
target
,
output
,
axis
=
1
):
"""
"""
@todo: This is essentially duplicated as tensor.nnet.binary_crossentropy
@todo: This is essentially duplicated as tensor.nnet.binary_crossentropy
@warning: OUTPUT and TARGET are reversed in tensor.nnet.binary_crossentropy
@warning: OUTPUT and TARGET are reversed in tensor.nnet.binary_crossentropy
"""
"""
return
-
T
.
mean
(
target
*
T
.
log
(
output
)
+
(
1
-
target
)
*
T
.
log
(
1
-
output
),
axis
=
axis
)
return
-
T
.
mean
(
target
*
T
.
log
(
output
)
+
(
1
-
target
)
*
T
.
log
(
1
-
output
),
axis
=
axis
)
def
quadratic
(
target
,
output
,
axis
=
1
):
def
quadratic
(
target
,
output
,
axis
=
1
):
return
T
.
mean
(
T
.
sqr
(
target
-
output
),
axis
=
axis
)
return
T
.
mean
(
T
.
sqr
(
target
-
output
),
axis
=
axis
)
class
QuadraticDenoisingAA
(
module
.
Module
):
class
QuadraticDenoisingAA
(
module
.
Module
):
"""Quadratic de-noising Auto-encoder
"""Quadratic de-noising Auto-encoder
...
@@ -35,15 +39,15 @@ class QuadraticDenoisingAA(module.Module):
...
@@ -35,15 +39,15 @@ class QuadraticDenoisingAA(module.Module):
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
input
=
None
,
input
=
None
,
# regularize = False,
# regularize = False,
tie_weights
=
False
,
tie_weights
=
False
,
n_quadratic_filters
=
1
,
n_quadratic_filters
=
1
,
_w1
=
None
,
_w1
=
None
,
_w2
=
None
,
_w2
=
None
,
_b1
=
None
,
_b1
=
None
,
_b2
=
None
,
_b2
=
None
,
_qfilters
=
None
,
_qfilters
=
None
,
activation_function
=
NN
.
sigmoid
,
activation_function
=
NN
.
sigmoid
,
reconstruction_cost_function
=
cross_entropy
):
reconstruction_cost_function
=
cross_entropy
):
"""
"""
...
@@ -83,7 +87,8 @@ class QuadraticDenoisingAA(module.Module):
...
@@ -83,7 +87,8 @@ class QuadraticDenoisingAA(module.Module):
# PARAMETERS
# PARAMETERS
if
_qfilters
is
None
:
if
_qfilters
is
None
:
#self.qfilters = [theano.Member(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
#self.qfilters = [theano.Member(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
self
.
qfilters
=
[(
T
.
dmatrix
(
'q
%
i'
%
i
))
for
i
in
xrange
(
n_quadratic_filters
)]
self
.
qfilters
=
[(
T
.
dmatrix
(
'q
%
i'
%
i
))
for
i
in
xrange
(
n_quadratic_filters
)]
else
:
else
:
#self.qfilters = [theano.Member(q) for q in _qfilters]
#self.qfilters = [theano.Member(q) for q in _qfilters]
self
.
qfilters
=
[(
q
)
for
q
in
_qfilters
]
self
.
qfilters
=
[(
q
)
for
q
in
_qfilters
]
...
@@ -91,7 +96,8 @@ class QuadraticDenoisingAA(module.Module):
...
@@ -91,7 +96,8 @@ class QuadraticDenoisingAA(module.Module):
#self.w1 = theano.Member(T.matrix('w1')) if _w1 is None else theano.Member(_w1)
#self.w1 = theano.Member(T.matrix('w1')) if _w1 is None else theano.Member(_w1)
if
_w1
is
None
:
if
_w1
is
None
:
self
.
w1
=
(
T
.
matrix
(
'w1'
))
self
.
w1
=
(
T
.
matrix
(
'w1'
))
else
:
self
.
w1
=
(
_w1
)
else
:
self
.
w1
=
(
_w1
)
if
_w2
is
None
:
if
_w2
is
None
:
if
not
tie_weights
:
if
not
tie_weights
:
#self.w2 = theano.Member(T.matrix())
#self.w2 = theano.Member(T.matrix())
...
@@ -104,30 +110,30 @@ class QuadraticDenoisingAA(module.Module):
...
@@ -104,30 +110,30 @@ class QuadraticDenoisingAA(module.Module):
#self.b1 = theano.Member(T.vector('b1')) if _b1 is None else theano.Member(_b1)
#self.b1 = theano.Member(T.vector('b1')) if _b1 is None else theano.Member(_b1)
if
_b1
is
None
:
if
_b1
is
None
:
self
.
b1
=
(
T
.
vector
(
'b1'
))
self
.
b1
=
(
T
.
vector
(
'b1'
))
else
:
self
.
b1
=
(
_b1
)
else
:
self
.
b1
=
(
_b1
)
#self.b2 = theano.Member(T.vector('b2')) if _b2 is None else theano.Member(_b2)
#self.b2 = theano.Member(T.vector('b2')) if _b2 is None else theano.Member(_b2)
if
_b2
is
None
:
if
_b2
is
None
:
self
.
b2
=
(
T
.
vector
(
'b2'
))
self
.
b2
=
(
T
.
vector
(
'b2'
))
else
:
self
.
b2
=
(
_b2
)
else
:
self
.
b2
=
(
_b2
)
# # REGULARIZATION COST
# # REGULARIZATION COST
# self.regularization = self.build_regularization()
# self.regularization = self.build_regularization()
### NOISELESS ###
### NOISELESS ###
# HIDDEN LAYER
# HIDDEN LAYER
def
_act
(
x
):
def
_act
(
x
):
if
len
(
self
.
qfilters
)
>
0
:
if
len
(
self
.
qfilters
)
>
0
:
qsum
=
10e-10
# helps to control the gradient in the square-root below
qsum
=
10e-10
# helps to control the gradient in the square-root below
for
qf
in
self
.
qfilters
:
for
qf
in
self
.
qfilters
:
qsum
=
qsum
+
T
.
dot
(
x
,
qf
)
**
2
qsum
=
qsum
+
T
.
dot
(
x
,
qf
)
**
2
return
T
.
dot
(
x
,
self
.
w1
)
+
self
.
b1
+
T
.
sqrt
(
qsum
)
return
T
.
dot
(
x
,
self
.
w1
)
+
self
.
b1
+
T
.
sqrt
(
qsum
)
else
:
else
:
return
T
.
dot
(
x
,
self
.
w1
)
+
self
.
b1
return
T
.
dot
(
x
,
self
.
w1
)
+
self
.
b1
self
.
hidden_activation
=
_act
(
self
.
input
)
#
noise-free hidden
self
.
hidden_activation
=
_act
(
self
.
input
)
#
noise-free hidden
self
.
hidden
=
self
.
hid_activation_function
(
self
.
hidden_activation
)
self
.
hidden
=
self
.
hid_activation_function
(
self
.
hidden_activation
)
...
@@ -144,7 +150,6 @@ class QuadraticDenoisingAA(module.Module):
...
@@ -144,7 +150,6 @@ class QuadraticDenoisingAA(module.Module):
# if self.regularize:
# if self.regularize:
# self.cost = self.cost + self.regularization
# self.cost = self.cost + self.regularization
### WITH NOISE ###
### WITH NOISE ###
self
.
corrupted_input
=
self
.
build_corrupted_input
()
self
.
corrupted_input
=
self
.
build_corrupted_input
()
...
@@ -165,7 +170,6 @@ class QuadraticDenoisingAA(module.Module):
...
@@ -165,7 +170,6 @@ class QuadraticDenoisingAA(module.Module):
# if self.regularize:
# if self.regularize:
# self.ncost = self.ncost + self.regularization
# self.ncost = self.ncost + self.regularization
# GRADIENTS AND UPDATES
# GRADIENTS AND UPDATES
if
self
.
tie_weights
:
if
self
.
tie_weights
:
self
.
params
=
[
self
.
w1
,
self
.
b1
,
self
.
b2
]
+
self
.
qfilters
self
.
params
=
[
self
.
w1
,
self
.
b1
,
self
.
b2
]
+
self
.
qfilters
...
@@ -173,7 +177,8 @@ class QuadraticDenoisingAA(module.Module):
...
@@ -173,7 +177,8 @@ class QuadraticDenoisingAA(module.Module):
self
.
params
=
[
self
.
w1
,
self
.
w2
,
self
.
b1
,
self
.
b2
]
+
self
.
qfilters
self
.
params
=
[
self
.
w1
,
self
.
w2
,
self
.
b1
,
self
.
b2
]
+
self
.
qfilters
gradients
=
T
.
grad
(
self
.
ncost
,
self
.
params
)
gradients
=
T
.
grad
(
self
.
ncost
,
self
.
params
)
updates
=
dict
((
p
,
p
-
self
.
lr
*
g
)
for
p
,
g
in
zip
(
self
.
params
,
gradients
))
updates
=
dict
((
p
,
p
-
self
.
lr
*
g
)
for
p
,
g
in
zip
(
self
.
params
,
gradients
))
# INTERFACE METHODS
# INTERFACE METHODS
#self.update = theano.Method(self.input, self.ncost, updates)
#self.update = theano.Method(self.input, self.ncost, updates)
...
@@ -192,16 +197,17 @@ class QuadraticDenoisingAA(module.Module):
...
@@ -192,16 +197,17 @@ class QuadraticDenoisingAA(module.Module):
filter's initial range)
filter's initial range)
"""
"""
if
(
input_size
is
None
)
^
(
hidden_size
is
None
):
if
(
input_size
is
None
)
^
(
hidden_size
is
None
):
raise
ValueError
(
"Must specify input_size and hidden_size or neither."
)
raise
ValueError
(
"Must specify input_size and hidden_size or neither."
)
super
(
QuadraticDenoisingAA
,
self
)
.
_instance_initialize
(
obj
,
{})
super
(
QuadraticDenoisingAA
,
self
)
.
_instance_initialize
(
obj
,
{})
obj
.
random
.
initialize
()
obj
.
random
.
initialize
()
R
=
N
.
random
.
RandomState
(
unittest_tools
.
fetch_seed
(
seed
))
R
=
N
.
random
.
RandomState
(
unittest_tools
.
fetch_seed
(
seed
))
if
input_size
is
not
None
:
if
input_size
is
not
None
:
sz
=
(
input_size
,
hidden_size
)
sz
=
(
input_size
,
hidden_size
)
inf
=
1
/
N
.
sqrt
(
input_size
)
inf
=
1
/
N
.
sqrt
(
input_size
)
hif
=
1
/
N
.
sqrt
(
hidden_size
)
hif
=
1
/
N
.
sqrt
(
hidden_size
)
obj
.
w1
=
N
.
asarray
(
R
.
uniform
(
size
=
sz
,
low
=
-
inf
,
high
=
inf
),
obj
.
w1
=
N
.
asarray
(
R
.
uniform
(
size
=
sz
,
low
=-
inf
,
high
=
inf
),
dtype
=
config
.
floatX
)
dtype
=
config
.
floatX
)
if
not
self
.
tie_weights
:
if
not
self
.
tie_weights
:
obj
.
w2
=
N
.
asarray
(
obj
.
w2
=
N
.
asarray
(
...
@@ -257,14 +263,17 @@ class SigmoidXEQuadraticDenoisingAA(QuadraticDenoisingAA):
...
@@ -257,14 +263,17 @@ class SigmoidXEQuadraticDenoisingAA(QuadraticDenoisingAA):
def
_instance_initialize
(
self
,
obj
,
input_size
,
hidden_size
,
noise_level
,
seed
,
lr
,
qfilter_relscale
):
def
_instance_initialize
(
self
,
obj
,
input_size
,
hidden_size
,
noise_level
,
seed
,
lr
,
qfilter_relscale
):
# obj.l2_coef = 0.0
# obj.l2_coef = 0.0
obj
.
noise_level
=
N
.
asarray
(
noise_level
,
dtype
=
config
.
floatX
)
obj
.
noise_level
=
N
.
asarray
(
noise_level
,
dtype
=
config
.
floatX
)
super
(
SigmoidXEQuadraticDenoisingAA
,
self
)
.
_instance_initialize
(
obj
,
input_size
,
hidden_size
,
seed
,
lr
,
qfilter_relscale
)
super
(
SigmoidXEQuadraticDenoisingAA
,
self
)
.
_instance_initialize
(
obj
,
input_size
,
hidden_size
,
seed
,
lr
,
qfilter_relscale
)
QDAA
=
SigmoidXEQuadraticDenoisingAA
QDAA
=
SigmoidXEQuadraticDenoisingAA
class
Loss01
(
object
):
class
Loss01
(
object
):
def
loss_01
(
self
,
x
,
targ
):
def
loss_01
(
self
,
x
,
targ
):
return
N
.
mean
(
self
.
classify
(
x
)
!=
targ
)
return
N
.
mean
(
self
.
classify
(
x
)
!=
targ
)
class
Module_Nclass
(
module
.
FancyModule
):
class
Module_Nclass
(
module
.
FancyModule
):
def
_instance_initialize
(
mod_self
,
self
,
n_in
,
n_out
,
lr
,
seed
):
def
_instance_initialize
(
mod_self
,
self
,
n_in
,
n_out
,
lr
,
seed
):
#self.component is the LogisticRegressionTemplate instance that built this guy.
#self.component is the LogisticRegressionTemplate instance that built this guy.
...
@@ -280,29 +289,34 @@ class Module_Nclass(module.FancyModule):
...
@@ -280,29 +289,34 @@ class Module_Nclass(module.FancyModule):
self
.
output_dimension
=
n_out
self
.
output_dimension
=
n_out
def
__init__
(
self
,
x
=
None
,
targ
=
None
,
w
=
None
,
b
=
None
,
lr
=
None
,
regularize
=
False
):
def
__init__
(
self
,
x
=
None
,
targ
=
None
,
w
=
None
,
b
=
None
,
lr
=
None
,
regularize
=
False
):
super
(
Module_Nclass
,
self
)
.
__init__
()
#
boilerplate
super
(
Module_Nclass
,
self
)
.
__init__
()
#
boilerplate
#self.x = module.Member(x) if x is not None else T.matrix('input')
#self.x = module.Member(x) if x is not None else T.matrix('input')
if
x
is
not
None
:
if
x
is
not
None
:
self
.
x
=
(
x
)
self
.
x
=
(
x
)
else
:
self
.
x
=
T
.
matrix
(
'input'
)
else
:
self
.
x
=
T
.
matrix
(
'input'
)
#self.targ = module.Member(targ) if targ is not None else T.lvector()
#self.targ = module.Member(targ) if targ is not None else T.lvector()
if
targ
is
not
None
:
if
targ
is
not
None
:
self
.
targ
=
(
targ
)
self
.
targ
=
(
targ
)
else
:
self
.
targ
=
T
.
lvector
()
else
:
self
.
targ
=
T
.
lvector
()
#self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
#self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
if
w
is
not
None
:
if
w
is
not
None
:
self
.
w
=
(
w
)
self
.
w
=
(
w
)
else
:
self
.
w
=
(
T
.
dmatrix
())
else
:
self
.
w
=
(
T
.
dmatrix
())
#self.b = module.Member(b) if b is not None else module.Member(T.dvector())
#self.b = module.Member(b) if b is not None else module.Member(T.dvector())
if
b
is
not
None
:
if
b
is
not
None
:
self
.
b
=
(
b
)
self
.
b
=
(
b
)
else
:
self
.
b
=
(
T
.
dvector
())
else
:
self
.
b
=
(
T
.
dvector
())
#self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
#self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
if
lr
is
not
None
:
if
lr
is
not
None
:
self
.
lr
=
(
lr
)
self
.
lr
=
(
lr
)
else
:
self
.
lr
=
(
T
.
dscalar
())
else
:
self
.
lr
=
(
T
.
dscalar
())
self
.
params
=
[
p
for
p
in
[
self
.
w
,
self
.
b
]
if
p
.
owner
is
None
]
self
.
params
=
[
p
for
p
in
[
self
.
w
,
self
.
b
]
if
p
.
owner
is
None
]
...
@@ -341,13 +355,14 @@ class Module_Nclass(module.FancyModule):
...
@@ -341,13 +355,14 @@ class Module_Nclass(module.FancyModule):
#self.update = module.Method([self.input, self.targ], sum_xent,
#self.update = module.Method([self.input, self.targ], sum_xent,
#updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
#updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
class
ConvolutionalMLP
(
module
.
FancyModule
):
class
ConvolutionalMLP
(
module
.
FancyModule
):
def
__init__
(
self
,
def
__init__
(
self
,
window_size
,
window_size
,
n_quadratic_filters
,
n_quadratic_filters
,
activation_function
,
activation_function
,
reconstruction_cost_function
,
reconstruction_cost_function
,
tie_weights
=
False
,
tie_weights
=
False
,
# _input,
# _input,
# _targ
# _targ
):
):
...
@@ -362,9 +377,9 @@ class ConvolutionalMLP(module.FancyModule):
...
@@ -362,9 +377,9 @@ class ConvolutionalMLP(module.FancyModule):
self
.
input_representations
=
[]
self
.
input_representations
=
[]
self
.
input_representations
.
append
(
QDAA
(
self
.
input_representations
.
append
(
QDAA
(
input
=
self
.
inputs
[
0
],
input
=
self
.
inputs
[
0
],
tie_weights
=
tie_weights
,
tie_weights
=
tie_weights
,
n_quadratic_filters
=
n_quadratic_filters
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
activation_function
=
activation_function
,
reconstruction_cost_function
=
reconstruction_cost_function
reconstruction_cost_function
=
reconstruction_cost_function
)
)
)
)
...
@@ -373,9 +388,9 @@ class ConvolutionalMLP(module.FancyModule):
...
@@ -373,9 +388,9 @@ class ConvolutionalMLP(module.FancyModule):
self
.
input_representations
.
append
(
self
.
input_representations
.
append
(
QDAA
(
QDAA
(
input
=
i
,
input
=
i
,
tie_weights
=
tie_weights
,
tie_weights
=
tie_weights
,
n_quadratic_filters
=
n_quadratic_filters
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
activation_function
=
activation_function
,
reconstruction_cost_function
=
reconstruction_cost_function
,
reconstruction_cost_function
=
reconstruction_cost_function
,
_w1
=
self
.
input_representations
[
0
]
.
w1
,
_w1
=
self
.
input_representations
[
0
]
.
w1
,
_w2
=
self
.
input_representations
[
0
]
.
w2
,
_w2
=
self
.
input_representations
[
0
]
.
w2
,
...
@@ -384,14 +399,16 @@ class ConvolutionalMLP(module.FancyModule):
...
@@ -384,14 +399,16 @@ class ConvolutionalMLP(module.FancyModule):
_qfilters
=
self
.
input_representations
[
0
]
.
qfilters
_qfilters
=
self
.
input_representations
[
0
]
.
qfilters
)
)
)
)
assert
self
.
input_representations
[
-
1
]
.
w1
is
self
.
input_representations
[
0
]
.
w1
assert
self
.
input_representations
[
-
1
]
.
w1
is
self
.
input_representations
[
0
]
.
w1
self
.
input_representation
=
T
.
concatenate
([
i
.
hidden
for
i
in
self
.
input_representations
],
axis
=
1
)
self
.
input_representation
=
T
.
concatenate
([
i
.
hidden
for
i
in
self
.
input_representations
],
axis
=
1
)
self
.
hidden
=
QDAA
(
self
.
hidden
=
QDAA
(
input
=
self
.
input_representation
,
input
=
self
.
input_representation
,
tie_weights
=
tie_weights
,
tie_weights
=
tie_weights
,
n_quadratic_filters
=
n_quadratic_filters
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
activation_function
=
activation_function
,
reconstruction_cost_function
=
reconstruction_cost_function
reconstruction_cost_function
=
reconstruction_cost_function
)
)
self
.
output
=
Module_Nclass
(
x
=
self
.
hidden
.
hidden
,
targ
=
self
.
targ
)
self
.
output
=
Module_Nclass
(
x
=
self
.
hidden
.
hidden
,
targ
=
self
.
targ
)
...
@@ -408,11 +425,13 @@ class ConvolutionalMLP(module.FancyModule):
...
@@ -408,11 +425,13 @@ class ConvolutionalMLP(module.FancyModule):
self
.
hidden
.
b1
,
self
.
hidden
.
b1
,
self
.
hidden
.
b2
self
.
hidden
.
b2
]
+
self
.
hidden
.
qfilters
]
+
self
.
hidden
.
qfilters
input_pretraining_cost
=
sum
(
i
.
ncost
for
i
in
self
.
input_representations
)
input_pretraining_cost
=
sum
(
i
.
ncost
for
i
in
self
.
input_representations
)
hidden_pretraining_cost
=
self
.
hidden
.
ncost
hidden_pretraining_cost
=
self
.
hidden
.
ncost
input_pretraining_gradients
=
T
.
grad
(
input_pretraining_cost
,
input_pretraining_gradients
=
T
.
grad
(
input_pretraining_cost
,
input_pretraining_params
)
input_pretraining_params
)
hidden_pretraining_gradients
=
T
.
grad
(
hidden_pretraining_cost
,
hidden_pretraining_params
)
hidden_pretraining_gradients
=
T
.
grad
(
hidden_pretraining_cost
,
hidden_pretraining_params
)
pretraining_updates
=
\
pretraining_updates
=
\
dict
((
p
,
p
-
self
.
lr
*
g
)
for
p
,
g
in
\
dict
((
p
,
p
-
self
.
lr
*
g
)
for
p
,
g
in
\
zip
(
input_pretraining_params
,
input_pretraining_gradients
)
\
zip
(
input_pretraining_params
,
input_pretraining_gradients
)
\
...
@@ -428,8 +447,10 @@ class ConvolutionalMLP(module.FancyModule):
...
@@ -428,8 +447,10 @@ class ConvolutionalMLP(module.FancyModule):
[
self
.
output
.
w
,
self
.
output
.
b
]
[
self
.
output
.
w
,
self
.
output
.
b
]
finetuning_cost
=
self
.
output
.
cost
finetuning_cost
=
self
.
output
.
cost
finetuning_gradients
=
T
.
grad
(
finetuning_cost
,
finetuning_params
)
finetuning_gradients
=
T
.
grad
(
finetuning_cost
,
finetuning_params
)
finetuning_updates
=
dict
((
p
,
p
-
self
.
lr
*
g
)
for
p
,
g
in
zip
(
finetuning_params
,
finetuning_gradients
))
finetuning_updates
=
dict
((
p
,
p
-
self
.
lr
*
g
)
for
p
,
self
.
finetuning_update
=
module
.
Method
(
self
.
inputs
+
[
self
.
targ
],
self
.
output
.
cost
,
finetuning_updates
)
g
in
zip
(
finetuning_params
,
finetuning_gradients
))
self
.
finetuning_update
=
module
.
Method
(
self
.
inputs
+
[
self
.
targ
],
self
.
output
.
cost
,
finetuning_updates
)
#self.validate = module.Method(self.inputs + [self.targ], [self.output.cost, self.output.argmax, self.output.max_pr])
#self.validate = module.Method(self.inputs + [self.targ], [self.output.cost, self.output.argmax, self.output.max_pr])
#self.softmax_output = module.Method(self.inputs, self.output.softmax_unsupervised)
#self.softmax_output = module.Method(self.inputs, self.output.softmax_unsupervised)
...
@@ -447,8 +468,10 @@ class ConvolutionalMLP(module.FancyModule):
...
@@ -447,8 +468,10 @@ class ConvolutionalMLP(module.FancyModule):
# for layer in obj.layers:
# for layer in obj.layers:
# if layer.lr is None:
# if layer.lr is None:
# layer.lr = lr
# layer.lr = lr
assert
self
.
input_representations
[
-
1
]
is
not
self
.
input_representations
[
0
]
assert
self
.
input_representations
[
-
1
]
assert
self
.
input_representations
[
-
1
]
.
w1
is
self
.
input_representations
[
0
]
.
w1
is
not
self
.
input_representations
[
0
]
assert
self
.
input_representations
[
-
1
]
.
w1
is
self
.
input_representations
[
0
]
.
w1
for
i
in
self
.
input_representations
:
for
i
in
self
.
input_representations
:
# i.initialize(input_size=self.input_size, hidden_size=self.input_representation_size, seed=R.random_integers(2**30), noise_level=noise_level, qfilter_relscale=qfilter_relscale)
# i.initialize(input_size=self.input_size, hidden_size=self.input_representation_size, seed=R.random_integers(2**30), noise_level=noise_level, qfilter_relscale=qfilter_relscale)
...
@@ -465,13 +488,16 @@ class ConvolutionalMLP(module.FancyModule):
...
@@ -465,13 +488,16 @@ class ConvolutionalMLP(module.FancyModule):
assert
(
i
.
w2
==
self
.
input_representations
[
0
]
.
w2
)
.
all
()
assert
(
i
.
w2
==
self
.
input_representations
[
0
]
.
w2
)
.
all
()
assert
(
i
.
b1
==
self
.
input_representations
[
0
]
.
b1
)
.
all
()
assert
(
i
.
b1
==
self
.
input_representations
[
0
]
.
b1
)
.
all
()
assert
(
i
.
b2
==
self
.
input_representations
[
0
]
.
b2
)
.
all
()
assert
(
i
.
b2
==
self
.
input_representations
[
0
]
.
b2
)
.
all
()
assert
N
.
all
((
a
==
b
)
.
all
()
for
a
,
b
in
zip
(
i
.
qfilters
,
self
.
input_representations
[
0
]
.
qfilters
))
assert
N
.
all
((
a
==
b
)
.
all
()
for
a
,
b
in
zip
(
i
.
qfilters
,
self
.
input_representations
[
0
]
.
qfilters
))
self
.
hidden
.
initialize
(
input_size
=
(
len
(
self
.
inputs
)
*
self
.
input_representation_size
),
self
.
hidden
.
initialize
(
input_size
=
(
len
(
self
.
inputs
)
*
self
.
input_representation_size
),
hidden_size
=
self
.
hidden_representation_size
,
noise_level
=
noise_level
,
hidden_size
=
self
.
hidden_representation_size
,
noise_level
=
noise_level
,
seed
=
int
(
R
.
random_integers
(
2
**
30
)),
lr
=
lr
,
qfilter_relscale
=
qfilter_relscale
)
seed
=
int
(
R
.
random_integers
(
2
**
30
)),
lr
=
lr
,
qfilter_relscale
=
qfilter_relscale
)
self
.
output
.
initialize
(
n_in
=
self
.
hidden_representation_size
,
n_out
=
self
.
output_size
,
lr
=
lr
,
seed
=
R
.
random_integers
(
2
**
30
))
self
.
output
.
initialize
(
n_in
=
self
.
hidden_representation_size
,
n_out
=
self
.
output_size
,
lr
=
lr
,
seed
=
R
.
random_integers
(
2
**
30
))
def
create
(
window_size
=
3
,
def
create
(
window_size
=
3
,
input_dimension
=
9
,
input_dimension
=
9
,
...
@@ -488,22 +514,24 @@ def create(window_size=3,
...
@@ -488,22 +514,24 @@ def create(window_size=3,
activation_function
=
T
.
tanh
activation_function
=
T
.
tanh
architecture
=
ConvolutionalMLP
(
\
architecture
=
ConvolutionalMLP
(
\
window_size
=
window_size
,
window_size
=
window_size
,
n_quadratic_filters
=
n_quadratic_filters
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
activation_function
=
activation_function
,
reconstruction_cost_function
=
quadratic
,
reconstruction_cost_function
=
quadratic
,
tie_weights
=
False
tie_weights
=
False
)
)
backup
=
config
.
warn
.
sum_div_dimshuffle_bug
backup
=
config
.
warn
.
sum_div_dimshuffle_bug
config
.
warn
.
sum_div_dimshuffle_bug
=
False
config
.
warn
.
sum_div_dimshuffle_bug
=
False
try
:
try
:
model
=
architecture
.
make
(
input_size
=
input_dimension
,
input_representation_size
=
token_representation_size
,
hidden_representation_size
=
concatenated_representation_size
,
output_size
=
output_vocabsize
,
lr
=
lr
,
seed
=
seed
,
noise_level
=
noise_level
,
qfilter_relscale
=
qfilter_relscale
,
mode
=
compile_mode
)
model
=
architecture
.
make
(
input_size
=
input_dimension
,
input_representation_size
=
token_representation_size
,
hidden_representation_size
=
concatenated_representation_size
,
output_size
=
output_vocabsize
,
lr
=
lr
,
seed
=
seed
,
noise_level
=
noise_level
,
qfilter_relscale
=
qfilter_relscale
,
mode
=
compile_mode
)
finally
:
finally
:
config
.
warn
.
sum_div_dimshuffle_bug
=
backup
config
.
warn
.
sum_div_dimshuffle_bug
=
backup
return
model
return
model
def
create_realistic
(
window_size
=
3
,
#7,
def
create_realistic
(
window_size
=
3
,
# 7,
input_dimension
=
200
,
input_dimension
=
200
,
output_vocabsize
=
23
,
output_vocabsize
=
23
,
n_quadratic_filters
=
2
,
n_quadratic_filters
=
2
,
...
@@ -518,15 +546,17 @@ def create_realistic(window_size=3,#7,
...
@@ -518,15 +546,17 @@ def create_realistic(window_size=3,#7,
activation_function
=
T
.
tanh
activation_function
=
T
.
tanh
architecture
=
ConvolutionalMLP
(
\
architecture
=
ConvolutionalMLP
(
\
window_size
=
window_size
,
window_size
=
window_size
,
n_quadratic_filters
=
n_quadratic_filters
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
activation_function
=
activation_function
,
reconstruction_cost_function
=
quadratic
,
reconstruction_cost_function
=
quadratic
,
tie_weights
=
False
tie_weights
=
False
)
)
model
=
architecture
.
make
(
input_size
=
input_dimension
,
input_representation_size
=
token_representation_size
,
hidden_representation_size
=
concatenated_representation_size
,
output_size
=
output_vocabsize
,
lr
=
lr
,
seed
=
seed
,
noise_level
=
noise_level
,
qfilter_relscale
=
qfilter_relscale
,
mode
=
compile_mode
)
model
=
architecture
.
make
(
input_size
=
input_dimension
,
input_representation_size
=
token_representation_size
,
hidden_representation_size
=
concatenated_representation_size
,
output_size
=
output_vocabsize
,
lr
=
lr
,
seed
=
seed
,
noise_level
=
noise_level
,
qfilter_relscale
=
qfilter_relscale
,
mode
=
compile_mode
)
return
model
return
model
def
test_naacl_model
(
iters_per_unsup
=
3
,
iters_per_sup
=
3
,
def
test_naacl_model
(
iters_per_unsup
=
3
,
iters_per_sup
=
3
,
optimizer
=
None
,
realistic
=
False
):
optimizer
=
None
,
realistic
=
False
):
#print "BUILDING MODEL"
#print "BUILDING MODEL"
...
@@ -535,11 +565,12 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
...
@@ -535,11 +565,12 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
if
optimizer
:
if
optimizer
:
mode
=
theano
.
Mode
(
linker
=
'c|py'
,
optimizer
=
optimizer
)
mode
=
theano
.
Mode
(
linker
=
'c|py'
,
optimizer
=
optimizer
)
else
:
mode
=
get_default_mode
()
else
:
mode
=
get_default_mode
()
if
mode
.
__class__
.
__name__
==
'DebugMode'
:
if
mode
.
__class__
.
__name__
==
'DebugMode'
:
iters_per_unsup
=
1
iters_per_unsup
=
1
iters_per_sup
=
1
iters_per_sup
=
1
if
realistic
:
if
realistic
:
m
=
create_realistic
(
compile_mode
=
mode
)
m
=
create_realistic
(
compile_mode
=
mode
)
...
@@ -552,7 +583,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
...
@@ -552,7 +583,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
for
i
,
node
in
enumerate
(
m
.
pretraining_update
.
maker
.
fgraph
.
toposort
()):
for
i
,
node
in
enumerate
(
m
.
pretraining_update
.
maker
.
fgraph
.
toposort
()):
idx_of_node
[
node
]
=
i
idx_of_node
[
node
]
=
i
if
False
and
i
>
-
1
:
if
False
and
i
>
-
1
:
print
' '
,
i
,
node
,
[(
ii
,
idx_of_node
.
get
(
ii
.
owner
,
'IN'
))
for
ii
in
node
.
inputs
]
print
' '
,
i
,
node
,
[(
ii
,
idx_of_node
.
get
(
ii
.
owner
,
'IN'
))
for
ii
in
node
.
inputs
]
prog_str
.
append
(
str
(
node
))
prog_str
.
append
(
str
(
node
))
#print input_pretraining_gradients[4].owner.inputs
#print input_pretraining_gradients[4].owner.inputs
#print input_pretraining_gradients[4].owner.inputs[1].owner.inputs
#print input_pretraining_gradients[4].owner.inputs[1].owner.inputs
...
@@ -562,8 +594,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
...
@@ -562,8 +594,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
rng
=
N
.
random
.
RandomState
(
unittest_tools
.
fetch_seed
(
23904
))
rng
=
N
.
random
.
RandomState
(
unittest_tools
.
fetch_seed
(
23904
))
inputs
=
[
rng
.
rand
(
10
,
m
.
input_size
)
for
i
in
1
,
2
,
3
]
inputs
=
[
rng
.
rand
(
10
,
m
.
input_size
)
for
i
in
1
,
2
,
3
]
targets
=
N
.
asarray
([
0
,
3
,
4
,
2
,
3
,
4
,
4
,
2
,
1
,
0
])
targets
=
N
.
asarray
([
0
,
3
,
4
,
2
,
3
,
4
,
4
,
2
,
1
,
0
])
#print inputs
#print inputs
#print 'UNSUPERVISED PHASE'
#print 'UNSUPERVISED PHASE'
...
@@ -584,8 +616,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
...
@@ -584,8 +616,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
s0
,
s1
=
[
str
(
j
)
for
j
in
m
.
pretraining_update
(
*
inputs
)]
s0
,
s1
=
[
str
(
j
)
for
j
in
m
.
pretraining_update
(
*
inputs
)]
#print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1
#print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1
if
iters_per_unsup
==
3
:
if
iters_per_unsup
==
3
:
assert
s0
.
startswith
(
'0.927793'
)
#
'0.403044')
assert
s0
.
startswith
(
'0.927793'
)
#
'0.403044')
assert
s1
.
startswith
(
'0.068035'
)
#
'0.074898')
assert
s1
.
startswith
(
'0.068035'
)
#
'0.074898')
#print 'UNSUPERVISED took %.3fs'%(time.time() - t)
#print 'UNSUPERVISED took %.3fs'%(time.time() - t)
#print 'FINETUNING GRAPH'
#print 'FINETUNING GRAPH'
...
@@ -601,6 +633,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
...
@@ -601,6 +633,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
assert
19.7042
<
s0f
and
s0f
<
19.7043
assert
19.7042
<
s0f
and
s0f
<
19.7043
#print 'SUPERVISED took %.3fs'%( time.time() - t)
#print 'SUPERVISED took %.3fs'%( time.time() - t)
def
jtest_main
():
def
jtest_main
():
from
theano
import
gof
from
theano
import
gof
JTEST
=
theano
.
compile
.
mode
.
optdb
.
query
(
*
sys
.
argv
[
2
:])
JTEST
=
theano
.
compile
.
mode
.
optdb
.
query
(
*
sys
.
argv
[
2
:])
...
@@ -609,13 +642,17 @@ def jtest_main():
...
@@ -609,13 +642,17 @@ def jtest_main():
optimizer
=
eval
(
sys
.
argv
[
1
])
optimizer
=
eval
(
sys
.
argv
[
1
])
test_naacl_model
(
optimizer
,
10
,
10
,
realistic
=
False
)
test_naacl_model
(
optimizer
,
10
,
10
,
realistic
=
False
)
def
real_main
():
def
real_main
():
test_naacl_model
()
test_naacl_model
()
def
profile_main
():
def
profile_main
():
# This is the main function for profiling
# This is the main function for profiling
# We've renamed our original main() above to real_main()
# We've renamed our original main() above to real_main()
import
cProfile
,
pstats
,
StringIO
import
cProfile
import
pstats
import
StringIO
prof
=
cProfile
.
Profile
()
prof
=
cProfile
.
Profile
()
prof
=
prof
.
runctx
(
"real_main()"
,
globals
(),
locals
())
prof
=
prof
.
runctx
(
"real_main()"
,
globals
(),
locals
())
stream
=
StringIO
.
StringIO
()
stream
=
StringIO
.
StringIO
()
...
...
theano/tensor/tests/test_opt.py
浏览文件 @
7885e618
This source diff could not be displayed because it is too large. You can
view the blob
instead.
theano/tests/test_gradient.py
浏览文件 @
7885e618
...
@@ -26,6 +26,7 @@ class testgrad_sources_inputs(unittest.TestCase):
...
@@ -26,6 +26,7 @@ class testgrad_sources_inputs(unittest.TestCase):
inputs
=
[
theano
.
tensor
.
vector
()]
inputs
=
[
theano
.
tensor
.
vector
()]
outputs
=
[
theano
.
tensor
.
vector
()]
outputs
=
[
theano
.
tensor
.
vector
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
x
,
=
inp
x
,
=
inp
gz
,
=
grads
gz
,
=
grads
...
@@ -43,14 +44,15 @@ class testgrad_sources_inputs(unittest.TestCase):
...
@@ -43,14 +44,15 @@ class testgrad_sources_inputs(unittest.TestCase):
def
make_node
(
self
,
*
inputs
):
def
make_node
(
self
,
*
inputs
):
outputs
=
[
theano
.
tensor
.
vector
()]
outputs
=
[
theano
.
tensor
.
vector
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inputs
,
grads
):
def
grad
(
self
,
inputs
,
grads
):
return
[
inputs
[
0
]
.
zeros_like
()
]
return
[
inputs
[
0
]
.
zeros_like
()
]
i
=
theano
.
tensor
.
vector
()
i
=
theano
.
tensor
.
vector
()
j
=
theano
.
tensor
.
vector
()
j
=
theano
.
tensor
.
vector
()
a1
=
retOne
()
.
make_node
(
i
)
a1
=
retOne
()
.
make_node
(
i
)
g
=
grad_sources_inputs
([(
a1
.
out
,
one
)],
None
)
g
=
grad_sources_inputs
([(
a1
.
out
,
one
)],
None
)
a2
=
retOne
()
.
make_node
(
i
,
j
)
a2
=
retOne
()
.
make_node
(
i
,
j
)
try
:
try
:
g
=
grad_sources_inputs
([(
a2
.
out
,
one
)],
None
)
g
=
grad_sources_inputs
([(
a2
.
out
,
one
)],
None
)
except
ValueError
,
e
:
except
ValueError
,
e
:
...
@@ -60,11 +62,13 @@ class testgrad_sources_inputs(unittest.TestCase):
...
@@ -60,11 +62,13 @@ class testgrad_sources_inputs(unittest.TestCase):
def
test_1in_1out
(
self
):
def
test_1in_1out
(
self
):
"""Test grad is called correctly for a 1-to-1 op"""
"""Test grad is called correctly for a 1-to-1 op"""
gval
=
theano
.
tensor
.
matrix
()
gval
=
theano
.
tensor
.
matrix
()
class
O
(
gof
.
op
.
Op
):
class
O
(
gof
.
op
.
Op
):
def
make_node
(
self
):
def
make_node
(
self
):
inputs
=
[
theano
.
tensor
.
matrix
()]
inputs
=
[
theano
.
tensor
.
matrix
()]
outputs
=
[
theano
.
tensor
.
matrix
()]
outputs
=
[
theano
.
tensor
.
matrix
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
return
gval
,
return
gval
,
a1
=
O
()
.
make_node
()
a1
=
O
()
.
make_node
()
...
@@ -74,11 +78,13 @@ class testgrad_sources_inputs(unittest.TestCase):
...
@@ -74,11 +78,13 @@ class testgrad_sources_inputs(unittest.TestCase):
def
test_1in_Nout
(
self
):
def
test_1in_Nout
(
self
):
"""Test grad is called correctly for a 1-to-many op"""
"""Test grad is called correctly for a 1-to-many op"""
gval
=
theano
.
tensor
.
matrix
()
gval
=
theano
.
tensor
.
matrix
()
class
O
(
gof
.
op
.
Op
):
class
O
(
gof
.
op
.
Op
):
def
make_node
(
self
):
def
make_node
(
self
):
inputs
=
[
theano
.
tensor
.
matrix
()]
inputs
=
[
theano
.
tensor
.
matrix
()]
outputs
=
[
theano
.
tensor
.
scalar
(),
theano
.
tensor
.
scalar
()]
outputs
=
[
theano
.
tensor
.
scalar
(),
theano
.
tensor
.
scalar
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
x
,
=
inp
x
,
=
inp
gz1
,
gz2
=
grads
gz1
,
gz2
=
grads
...
@@ -91,11 +97,13 @@ class testgrad_sources_inputs(unittest.TestCase):
...
@@ -91,11 +97,13 @@ class testgrad_sources_inputs(unittest.TestCase):
"""Test grad is called correctly for a many-to-1 op"""
"""Test grad is called correctly for a many-to-1 op"""
gval0
=
theano
.
tensor
.
scalar
()
gval0
=
theano
.
tensor
.
scalar
()
gval1
=
theano
.
tensor
.
scalar
()
gval1
=
theano
.
tensor
.
scalar
()
class
O
(
gof
.
op
.
Op
):
class
O
(
gof
.
op
.
Op
):
def
make_node
(
self
):
def
make_node
(
self
):
inputs
=
[
theano
.
tensor
.
scalar
(),
theano
.
tensor
.
scalar
()]
inputs
=
[
theano
.
tensor
.
scalar
(),
theano
.
tensor
.
scalar
()]
outputs
=
[
theano
.
tensor
.
matrix
()]
outputs
=
[
theano
.
tensor
.
matrix
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
x0
,
x1
=
inp
x0
,
x1
=
inp
gz
,
=
grads
gz
,
=
grads
...
@@ -109,11 +117,13 @@ class testgrad_sources_inputs(unittest.TestCase):
...
@@ -109,11 +117,13 @@ class testgrad_sources_inputs(unittest.TestCase):
"""Test grad is called correctly for a many-to-many op"""
"""Test grad is called correctly for a many-to-many op"""
gval0
=
theano
.
tensor
.
matrix
()
gval0
=
theano
.
tensor
.
matrix
()
gval1
=
theano
.
tensor
.
matrix
()
gval1
=
theano
.
tensor
.
matrix
()
class
O
(
gof
.
op
.
Op
):
class
O
(
gof
.
op
.
Op
):
def
make_node
(
self
):
def
make_node
(
self
):
inputs
=
[
theano
.
tensor
.
matrix
(),
theano
.
tensor
.
matrix
()]
inputs
=
[
theano
.
tensor
.
matrix
(),
theano
.
tensor
.
matrix
()]
outputs
=
[
theano
.
tensor
.
matrix
(),
theano
.
tensor
.
matrix
()]
outputs
=
[
theano
.
tensor
.
matrix
(),
theano
.
tensor
.
matrix
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
return
gval0
,
gval1
return
gval0
,
gval1
a1
=
O
()
.
make_node
()
a1
=
O
()
.
make_node
()
...
@@ -126,9 +136,11 @@ class testgrad_sources_inputs(unittest.TestCase):
...
@@ -126,9 +136,11 @@ class testgrad_sources_inputs(unittest.TestCase):
class
O
(
gof
.
op
.
Op
):
class
O
(
gof
.
op
.
Op
):
def
__init__
(
self
,
tst
):
def
__init__
(
self
,
tst
):
self
.
tst
=
tst
self
.
tst
=
tst
def
make_node
(
self
,
*
inputs
):
def
make_node
(
self
,
*
inputs
):
outputs
=
[
theano
.
tensor
.
matrix
(),
theano
.
tensor
.
matrix
()]
outputs
=
[
theano
.
tensor
.
matrix
(),
theano
.
tensor
.
matrix
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inputs
,
g_out
):
def
grad
(
self
,
inputs
,
g_out
):
return
[
one
]
return
[
one
]
i
=
theano
.
tensor
.
matrix
()
i
=
theano
.
tensor
.
matrix
()
...
@@ -136,26 +148,29 @@ class testgrad_sources_inputs(unittest.TestCase):
...
@@ -136,26 +148,29 @@ class testgrad_sources_inputs(unittest.TestCase):
g
=
grad_sources_inputs
([(
a1
.
outputs
[
0
],
one
)],
None
)
g
=
grad_sources_inputs
([(
a1
.
outputs
[
0
],
one
)],
None
)
self
.
assertTrue
(
g
[
i
]
is
one
)
self
.
assertTrue
(
g
[
i
]
is
one
)
def
test_unimplemented_grad_func
():
def
test_unimplemented_grad_func
():
# tests that function compilation catches unimplemented grads in the graph
# tests that function compilation catches unimplemented grads in the graph
a
=
theano
.
tensor
.
vector
()
a
=
theano
.
tensor
.
vector
()
b
=
theano
.
gradient
.
grad_not_implemented
(
theano
.
tensor
.
add
,
0
,
a
)
b
=
theano
.
gradient
.
grad_not_implemented
(
theano
.
tensor
.
add
,
0
,
a
)
try
:
try
:
f
=
theano
.
function
([
a
],
b
,
on_unused_input
=
'ignore'
)
f
=
theano
.
function
([
a
],
b
,
on_unused_input
=
'ignore'
)
assert
0
assert
0
except
TypeError
:
except
TypeError
:
pass
pass
def
test_undefined_grad_func
():
def
test_undefined_grad_func
():
#tests that function compilation catches undefined grads in the graph
#tests that function compilation catches undefined grads in the graph
a
=
theano
.
tensor
.
vector
()
a
=
theano
.
tensor
.
vector
()
b
=
theano
.
gradient
.
grad_undefined
(
theano
.
tensor
.
add
,
0
,
a
)
b
=
theano
.
gradient
.
grad_undefined
(
theano
.
tensor
.
add
,
0
,
a
)
try
:
try
:
f
=
theano
.
function
([
a
],
b
,
on_unused_input
=
'ignore'
)
f
=
theano
.
function
([
a
],
b
,
on_unused_input
=
'ignore'
)
assert
0
assert
0
except
TypeError
:
except
TypeError
:
pass
pass
def
test_unimplemented_grad_grad
():
def
test_unimplemented_grad_grad
():
#tests that unimplemented grads are caught in the grad method
#tests that unimplemented grads are caught in the grad method
...
@@ -164,132 +179,137 @@ def test_unimplemented_grad_grad():
...
@@ -164,132 +179,137 @@ def test_unimplemented_grad_grad():
return
gof
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
return
gof
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
def
grad
(
self
,
inputs
,
output_grads
):
def
grad
(
self
,
inputs
,
output_grads
):
return
[
theano
.
gradient
.
grad_not_implemented
(
self
,
0
,
inputs
[
0
])
]
return
[
theano
.
gradient
.
grad_not_implemented
(
self
,
0
,
inputs
[
0
])
]
a
=
theano
.
tensor
.
scalar
()
a
=
theano
.
tensor
.
scalar
()
b
=
DummyOp
()(
a
)
b
=
DummyOp
()(
a
)
try
:
try
:
g
=
theano
.
gradient
.
grad
(
b
,
a
)
g
=
theano
.
gradient
.
grad
(
b
,
a
)
assert
False
assert
False
except
TypeError
:
except
TypeError
:
pass
pass
def
test_undefined_grad_grad
():
def
test_undefined_grad_grad
():
#tests that undefined grads are caught in the grad method
#tests that undefined grads are caught in the grad method
V
=
theano
.
tensor
.
TensorType
(
dtype
=
config
.
floatX
,
V
=
theano
.
tensor
.
TensorType
(
dtype
=
config
.
floatX
,
broadcastable
=
(
False
,
False
,
False
,
False
,
False
))()
broadcastable
=
(
False
,
False
,
False
,
False
,
False
))()
W
=
theano
.
tensor
.
TensorType
(
dtype
=
config
.
floatX
,
W
=
theano
.
tensor
.
TensorType
(
dtype
=
config
.
floatX
,
broadcastable
=
(
False
,
False
,
False
,
False
,
False
))()
broadcastable
=
(
False
,
False
,
False
,
False
,
False
))()
b
=
theano
.
tensor
.
vector
()
b
=
theano
.
tensor
.
vector
()
d
=
theano
.
tensor
.
ivector
()
d
=
theano
.
tensor
.
ivector
()
Z
=
conv3D
(
V
,
W
,
b
,
d
)
Z
=
conv3D
(
V
,
W
,
b
,
d
)
try
:
try
:
g
=
theano
.
gradient
.
grad
(
Z
.
sum
(),
d
)
g
=
theano
.
gradient
.
grad
(
Z
.
sum
(),
d
)
assert
False
assert
False
except
TypeError
:
except
TypeError
:
pass
pass
def
test_grad_name
():
def
test_grad_name
():
A
=
theano
.
tensor
.
matrix
(
'A'
)
A
=
theano
.
tensor
.
matrix
(
'A'
)
x
=
theano
.
tensor
.
vector
(
'x'
)
x
=
theano
.
tensor
.
vector
(
'x'
)
f
=
theano
.
tensor
.
dot
(
x
,
theano
.
tensor
.
dot
(
A
,
x
))
f
=
theano
.
tensor
.
dot
(
x
,
theano
.
tensor
.
dot
(
A
,
x
))
f
.
name
=
'f'
f
.
name
=
'f'
g
=
theano
.
tensor
.
grad
(
f
,
x
)
g
=
theano
.
tensor
.
grad
(
f
,
x
)
assert
g
.
name
==
'(df/dx)'
assert
g
.
name
==
'(df/dx)'
def
test_grad_duplicate_input
():
def
test_grad_duplicate_input
():
#test that the grad works when a variable
#test that the grad works when a variable
#appears in more than one place in a node's input list
#appears in more than one place in a node's input list
def
output
(
x
):
def
output
(
x
):
return
(
x
*
x
)
return
(
x
*
x
)
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
vx
=
rng
.
randn
(
2
)
vx
=
rng
.
randn
(
2
)
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,[
vx
])
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,
[
vx
])
def
test_grad_quadratic
():
def
test_grad_quadratic
():
#test the gradient on a tiny graph
#test the gradient on a tiny graph
def
cost
(
x
,
A
):
def
cost
(
x
,
A
):
return
theano
.
tensor
.
dot
(
x
,
theano
.
tensor
.
dot
(
A
,
x
))
return
theano
.
tensor
.
dot
(
x
,
theano
.
tensor
.
dot
(
A
,
x
))
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
vx
=
rng
.
randn
(
2
)
vx
=
rng
.
randn
(
2
)
vA
=
rng
.
randn
(
2
,
2
)
vA
=
rng
.
randn
(
2
,
2
)
theano
.
tests
.
unittest_tools
.
verify_grad
(
cost
,
[
vx
,
vA
])
theano
.
tests
.
unittest_tools
.
verify_grad
(
cost
,
[
vx
,
vA
])
def
test_grad_quadratic_vector
():
def
test_grad_quadratic_vector
():
#test the gradient on a small graph
#test the gradient on a small graph
def
output
(
x
,
A
):
def
output
(
x
,
A
):
return
theano
.
tensor
.
dot
(
x
*
x
,
A
)
return
theano
.
tensor
.
dot
(
x
*
x
,
A
)
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
vx
=
rng
.
randn
(
2
)
vx
=
rng
.
randn
(
2
)
vA
=
rng
.
randn
(
2
,
2
)
vA
=
rng
.
randn
(
2
,
2
)
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,
[
vx
,
vA
])
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,
[
vx
,
vA
])
def
test_grad_cubic
():
def
test_grad_cubic
():
#test the gradient on a bigger graph
#test the gradient on a bigger graph
def
cost
(
x
,
A
):
def
cost
(
x
,
A
):
return
theano
.
tensor
.
dot
(
x
*
x
,
theano
.
tensor
.
dot
(
A
,
x
))
return
theano
.
tensor
.
dot
(
x
*
x
,
theano
.
tensor
.
dot
(
A
,
x
))
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
vx
=
rng
.
randn
(
2
)
vx
=
rng
.
randn
(
2
)
vA
=
rng
.
randn
(
2
,
2
)
vA
=
rng
.
randn
(
2
,
2
)
theano
.
tests
.
unittest_tools
.
verify_grad
(
cost
,
[
vx
,
vA
])
theano
.
tests
.
unittest_tools
.
verify_grad
(
cost
,[
vx
,
vA
])
def
test_grad_grad_quadratic
():
def
test_grad_grad_quadratic
():
#test the gradient on a graph constructed using the gradient
#test the gradient on a graph constructed using the gradient
def
output
(
x
,
A
):
def
output
(
x
,
A
):
orig_cost
=
theano
.
tensor
.
dot
(
x
,
theano
.
tensor
.
dot
(
A
,
x
))
orig_cost
=
theano
.
tensor
.
dot
(
x
,
theano
.
tensor
.
dot
(
A
,
x
))
return
theano
.
gradient
.
grad
(
orig_cost
,
x
)
return
theano
.
gradient
.
grad
(
orig_cost
,
x
)
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
vx
=
rng
.
randn
(
2
)
vx
=
rng
.
randn
(
2
)
vA
=
rng
.
randn
(
2
,
2
)
vA
=
rng
.
randn
(
2
,
2
)
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,
[
vx
,
vA
])
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,[
vx
,
vA
])
def
test_grad_grad_cubic
():
def
test_grad_grad_cubic
():
#test the gradient on a bigger graph constructed using the gradient
#test the gradient on a bigger graph constructed using the gradient
def
output
(
x
,
A
):
def
output
(
x
,
A
):
orig_cost
=
theano
.
tensor
.
dot
(
x
*
x
,
theano
.
tensor
.
dot
(
A
,
x
))
orig_cost
=
theano
.
tensor
.
dot
(
x
*
x
,
theano
.
tensor
.
dot
(
A
,
x
))
return
theano
.
gradient
.
grad
(
orig_cost
,
x
)
return
theano
.
gradient
.
grad
(
orig_cost
,
x
)
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
vx
=
rng
.
randn
(
2
)
vx
=
rng
.
randn
(
2
)
vA
=
rng
.
randn
(
2
,
2
)
vA
=
rng
.
randn
(
2
,
2
)
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,
[
vx
,
vA
])
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,[
vx
,
vA
])
def
test_grad_int
():
def
test_grad_int
():
...
@@ -300,11 +320,11 @@ def test_grad_int():
...
@@ -300,11 +320,11 @@ def test_grad_int():
b
=
theano
.
tensor
.
vector
()
b
=
theano
.
tensor
.
vector
()
def
make_grad_func
(
X
):
def
make_grad_func
(
X
):
Z
=
theano
.
tensor
.
dot
(
X
,
W
)
+
b
Z
=
theano
.
tensor
.
dot
(
X
,
W
)
+
b
H
=
theano
.
tensor
.
nnet
.
sigmoid
(
Z
)
H
=
theano
.
tensor
.
nnet
.
sigmoid
(
Z
)
cost
=
H
.
sum
()
cost
=
H
.
sum
()
g
=
gradient
.
grad
(
cost
,
X
)
g
=
gradient
.
grad
(
cost
,
X
)
return
theano
.
function
([
X
,
W
,
b
],
g
,
on_unused_input
=
'ignore'
)
return
theano
.
function
([
X
,
W
,
b
],
g
,
on_unused_input
=
'ignore'
)
int_func
=
make_grad_func
(
theano
.
tensor
.
imatrix
())
int_func
=
make_grad_func
(
theano
.
tensor
.
imatrix
())
#we have to use float64 as the float type to get the results to match
#we have to use float64 as the float type to get the results to match
...
@@ -314,17 +334,17 @@ def test_grad_int():
...
@@ -314,17 +334,17 @@ def test_grad_int():
m
=
5
m
=
5
d
=
3
d
=
3
n
=
4
n
=
4
rng
=
np
.
random
.
RandomState
([
2012
,
9
,
5
])
rng
=
np
.
random
.
RandomState
([
2012
,
9
,
5
])
int_type
=
theano
.
tensor
.
imatrix
()
.
dtype
int_type
=
theano
.
tensor
.
imatrix
()
.
dtype
float_type
=
'float64'
float_type
=
'float64'
X
=
np
.
cast
[
int_type
](
rng
.
randn
(
m
,
d
)
*
127.
)
X
=
np
.
cast
[
int_type
](
rng
.
randn
(
m
,
d
)
*
127.
)
W
=
np
.
cast
[
W
.
dtype
](
rng
.
randn
(
d
,
n
))
W
=
np
.
cast
[
W
.
dtype
](
rng
.
randn
(
d
,
n
))
b
=
np
.
cast
[
b
.
dtype
](
rng
.
randn
(
n
))
b
=
np
.
cast
[
b
.
dtype
](
rng
.
randn
(
n
))
int_result
=
int_func
(
X
,
W
,
b
)
int_result
=
int_func
(
X
,
W
,
b
)
float_result
=
float_func
(
np
.
cast
[
float_type
](
X
),
W
,
b
)
float_result
=
float_func
(
np
.
cast
[
float_type
](
X
),
W
,
b
)
assert
np
.
allclose
(
int_result
,
float_result
)
assert
np
.
allclose
(
int_result
,
float_result
)
...
@@ -333,23 +353,23 @@ def test_grad_disconnected():
...
@@ -333,23 +353,23 @@ def test_grad_disconnected():
#tests corner cases of gradient for shape and alloc
#tests corner cases of gradient for shape and alloc
x
=
theano
.
tensor
.
vector
(
name
=
'x'
)
x
=
theano
.
tensor
.
vector
(
name
=
'x'
)
total
=
x
.
sum
()
total
=
x
.
sum
()
total
.
name
=
'total'
total
.
name
=
'total'
num_elements
=
x
.
shape
[
0
]
num_elements
=
x
.
shape
[
0
]
num_elements
.
name
=
'num_elements'
num_elements
.
name
=
'num_elements'
silly_vector
=
theano
.
tensor
.
alloc
(
total
/
num_elements
,
num_elements
)
silly_vector
=
theano
.
tensor
.
alloc
(
total
/
num_elements
,
num_elements
)
silly_vector
.
name
=
'silly_vector'
silly_vector
.
name
=
'silly_vector'
cost
=
silly_vector
.
sum
()
cost
=
silly_vector
.
sum
()
cost
.
name
=
'cost'
cost
.
name
=
'cost'
#note that cost simplifies to be the same as "total"
#note that cost simplifies to be the same as "total"
g
=
gradient
.
grad
(
cost
,
x
,
add_names
=
False
)
g
=
gradient
.
grad
(
cost
,
x
,
add_names
=
False
)
#we still need to pass in x because it determines the shape of the output
#we still need to pass in x because it determines the shape of the output
f
=
theano
.
function
([
x
],
g
)
f
=
theano
.
function
([
x
],
g
)
rng
=
np
.
random
.
RandomState
([
2012
,
9
,
5
])
rng
=
np
.
random
.
RandomState
([
2012
,
9
,
5
])
x
=
np
.
cast
[
x
.
dtype
](
rng
.
randn
(
3
))
x
=
np
.
cast
[
x
.
dtype
](
rng
.
randn
(
3
))
g
=
f
(
x
)
g
=
f
(
x
)
assert
np
.
allclose
(
g
,
np
.
ones
(
x
.
shape
,
dtype
=
x
.
dtype
))
assert
np
.
allclose
(
g
,
np
.
ones
(
x
.
shape
,
dtype
=
x
.
dtype
))
def
test_disconnected_nan
():
def
test_disconnected_nan
():
...
@@ -361,27 +381,27 @@ def test_disconnected_nan():
...
@@ -361,27 +381,27 @@ def test_disconnected_nan():
class
Op1
(
theano
.
gof
.
Op
):
class
Op1
(
theano
.
gof
.
Op
):
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
return
theano
.
Apply
(
self
,
inputs
=
[
x
],
return
theano
.
Apply
(
self
,
inputs
=
[
x
],
outputs
=
[
x
.
type
(),
theano
.
tensor
.
scalar
()
])
outputs
=
[
x
.
type
(),
theano
.
tensor
.
scalar
()
])
def
connection_pattern
(
self
,
node
):
def
connection_pattern
(
self
,
node
):
return
[[
True
,
False
]]
return
[[
True
,
False
]]
def
grad
(
self
,
inputs
,
output_grads
):
def
grad
(
self
,
inputs
,
output_grads
):
return
[
inputs
[
0
]
.
zeros_like
()
]
return
[
inputs
[
0
]
.
zeros_like
()
]
# Op2 has two inputs, f and g
# Op2 has two inputs, f and g
# Its gradient with respect to g is not defined
# Its gradient with respect to g is not defined
class
Op2
(
theano
.
gof
.
Op
):
class
Op2
(
theano
.
gof
.
Op
):
def
make_node
(
self
,
f
,
g
):
def
make_node
(
self
,
f
,
g
):
return
theano
.
Apply
(
self
,
inputs
=
[
f
,
g
],
return
theano
.
Apply
(
self
,
inputs
=
[
f
,
g
],
outputs
=
[
theano
.
tensor
.
scalar
()
])
outputs
=
[
theano
.
tensor
.
scalar
()
])
def
grad
(
self
,
inputs
,
output_grads
):
def
grad
(
self
,
inputs
,
output_grads
):
return
[
inputs
[
0
]
.
zeros_like
(),
NullType
()()
]
return
[
inputs
[
0
]
.
zeros_like
(),
NullType
()()
]
x
=
theano
.
tensor
.
vector
()
x
=
theano
.
tensor
.
vector
()
f
,
g
=
Op1
()(
x
)
f
,
g
=
Op1
()(
x
)
cost
=
Op2
()(
f
,
g
)
cost
=
Op2
()(
f
,
g
)
# cost is differentiable wrt x
# cost is differentiable wrt x
# but we can't tell that without using Op1's connection pattern
# but we can't tell that without using Op1's connection pattern
...
@@ -394,7 +414,6 @@ def test_disconnected_nan():
...
@@ -394,7 +414,6 @@ def test_disconnected_nan():
# connection_pattern functionality worked correctly
# connection_pattern functionality worked correctly
def
test_sum_disconnected
():
def
test_sum_disconnected
():
# Tests that we can add DisconnectedType to other terms correctly
# Tests that we can add DisconnectedType to other terms correctly
...
@@ -402,7 +421,7 @@ def test_sum_disconnected():
...
@@ -402,7 +421,7 @@ def test_sum_disconnected():
y
=
x
*
2.
y
=
x
*
2.
z
=
x
+
1.
z
=
x
+
1.
cost
=
y
+
z
cost
=
y
+
z
theano
.
tensor
.
grad
(
cost
,
x
,
consider_constant
=
[
y
,
z
])
theano
.
tensor
.
grad
(
cost
,
x
,
consider_constant
=
[
y
,
z
])
# In an earlier version of theano, the above line would have failed
# In an earlier version of theano, the above line would have failed
# while trying to add two DisconnectedTypes
# while trying to add two DisconnectedTypes
...
...
theano/tests/test_rop.py
浏览文件 @
7885e618
...
@@ -47,7 +47,7 @@ class BreakRop(Op):
...
@@ -47,7 +47,7 @@ class BreakRop(Op):
out
[
0
]
=
x
out
[
0
]
=
x
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
return
[
grad_undefined
(
self
,
0
,
inp
[
0
])
]
return
[
grad_undefined
(
self
,
0
,
inp
[
0
])
]
def
R_op
(
self
,
inputs
,
eval_points
):
def
R_op
(
self
,
inputs
,
eval_points
):
return
[
None
]
return
[
None
]
...
@@ -330,21 +330,21 @@ class test_RopLop(RopLop_checker):
...
@@ -330,21 +330,21 @@ class test_RopLop(RopLop_checker):
m_
=
tensor
.
matrix
(
'm_'
)
m_
=
tensor
.
matrix
(
'm_'
)
v_
=
tensor
.
vector
(
'v_'
)
v_
=
tensor
.
vector
(
'v_'
)
mval
=
self
.
rng
.
uniform
(
size
=
(
3
,
7
))
.
astype
(
theano
.
config
.
floatX
)
mval
=
self
.
rng
.
uniform
(
size
=
(
3
,
7
))
.
astype
(
theano
.
config
.
floatX
)
vval
=
self
.
rng
.
uniform
(
size
=
(
7
,))
.
astype
(
theano
.
config
.
floatX
)
vval
=
self
.
rng
.
uniform
(
size
=
(
7
,))
.
astype
(
theano
.
config
.
floatX
)
m_val
=
self
.
rng
.
uniform
(
size
=
(
3
,
7
))
.
astype
(
theano
.
config
.
floatX
)
m_val
=
self
.
rng
.
uniform
(
size
=
(
3
,
7
))
.
astype
(
theano
.
config
.
floatX
)
v_val
=
self
.
rng
.
uniform
(
size
=
(
7
,))
.
astype
(
theano
.
config
.
floatX
)
v_val
=
self
.
rng
.
uniform
(
size
=
(
7
,))
.
astype
(
theano
.
config
.
floatX
)
rop_out1
=
tensor
.
Rop
([
m
,
v
,
m
+
v
],
[
m
,
v
],
[
m_
,
v_
])
rop_out1
=
tensor
.
Rop
([
m
,
v
,
m
+
v
],
[
m
,
v
],
[
m_
,
v_
])
assert
isinstance
(
rop_out1
,
list
)
assert
isinstance
(
rop_out1
,
list
)
assert
len
(
rop_out1
)
==
3
assert
len
(
rop_out1
)
==
3
rop_out2
=
tensor
.
Rop
((
m
,
v
,
m
+
v
),
[
m
,
v
],
[
m_
,
v_
])
rop_out2
=
tensor
.
Rop
((
m
,
v
,
m
+
v
),
[
m
,
v
],
[
m_
,
v_
])
assert
isinstance
(
rop_out2
,
tuple
)
assert
isinstance
(
rop_out2
,
tuple
)
assert
len
(
rop_out2
)
==
3
assert
len
(
rop_out2
)
==
3
lop_out1
=
tensor
.
Lop
([
m
,
v
,
m
+
v
],
(
m
,
v
),
[
m_
,
v_
])
lop_out1
=
tensor
.
Lop
([
m
,
v
,
m
+
v
],
(
m
,
v
),
[
m_
,
v_
])
assert
isinstance
(
lop_out1
,
tuple
)
assert
isinstance
(
lop_out1
,
tuple
)
assert
len
(
lop_out1
)
==
2
assert
len
(
lop_out1
)
==
2
lop_out2
=
tensor
.
Lop
((
m
,
v
,
m
+
v
),
[
m
,
v
],
[
m_
,
v_
])
lop_out2
=
tensor
.
Lop
((
m
,
v
,
m
+
v
),
[
m
,
v
],
[
m_
,
v_
])
assert
isinstance
(
lop_out2
,
list
)
assert
isinstance
(
lop_out2
,
list
)
assert
len
(
lop_out2
)
==
2
assert
len
(
lop_out2
)
==
2
...
@@ -353,4 +353,3 @@ class test_RopLop(RopLop_checker):
...
@@ -353,4 +353,3 @@ class test_RopLop(RopLop_checker):
all_outs
.
extend
(
o
)
all_outs
.
extend
(
o
)
f
=
theano
.
function
([
m
,
v
,
m_
,
v_
],
all_outs
)
f
=
theano
.
function
([
m
,
v
,
m_
,
v_
],
all_outs
)
f
(
mval
,
vval
,
m_val
,
v_val
)
f
(
mval
,
vval
,
m_val
,
v_val
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论