Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
7885e618
提交
7885e618
authored
9月 11, 2012
作者:
Ian Goodfellow
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
a lot of pep8
上级
c7d06ac9
隐藏空白字符变更
内嵌
并排
正在显示
8 个修改的文件
包含
272 行增加
和
193 行删除
+272
-193
sigm.py
theano/tensor/nnet/sigm.py
+32
-20
randomstreams.py
theano/tensor/randomstreams.py
+2
-2
test_basic.py
theano/tensor/tests/test_basic.py
+0
-0
test_elemwise.py
theano/tensor/tests/test_elemwise.py
+35
-23
test_naacl09.py
theano/tensor/tests/test_naacl09.py
+110
-73
test_opt.py
theano/tensor/tests/test_opt.py
+0
-0
test_gradient.py
theano/tests/test_gradient.py
+86
-67
test_rop.py
theano/tests/test_rop.py
+7
-8
没有找到文件。
theano/tensor/nnet/sigm.py
浏览文件 @
7885e618
...
...
@@ -42,7 +42,7 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
assert
rval
.
type
.
dtype
.
find
(
'float'
)
!=
-
1
return
[
rval
]
return
[
rval
]
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
x
,
=
inp
...
...
@@ -69,7 +69,7 @@ sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
sigmoid_inplace
=
elemwise
.
Elemwise
(
ScalarSigmoid
(
scalar
.
transfer_type
(
0
)),
inplace_pattern
=
{
0
:
0
},
inplace_pattern
=
{
0
:
0
},
name
=
'sigmoid_inplace'
,
)
...
...
@@ -84,12 +84,15 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
if
x
>
30.0
:
return
x
return
numpy
.
log1p
(
numpy
.
exp
(
x
))
def
impl
(
self
,
x
):
return
ScalarSoftplus
.
static_impl
(
x
)
def
grad
(
self
,
inp
,
grads
):
x
,
=
inp
gz
,
=
grads
return
[
gz
*
scalar_sigmoid
(
x
)]
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
x
,
=
inp
z
,
=
out
...
...
@@ -103,27 +106,29 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
return
"""
%(z)
s =
%(x)
s < -745.0 ? 0.0 :
%(x)
s > 16.0 ?
%(x)
s : log1p(exp(
%(x)
s));"""
%
locals
()
else
:
raise
NotImplementedError
(
'only floatingpoint is implemented'
)
def
c_code_cache_version
(
self
):
v
=
super
(
ScalarSoftplus
,
self
)
.
c_code_cache_version
()
if
v
:
return
(
2
,)
+
v
else
:
return
v
scalar_softplus
=
ScalarSoftplus
(
scalar
.
upgrade_to_float
,
name
=
'scalar_softplus'
)
scalar_softplus
=
ScalarSoftplus
(
scalar
.
upgrade_to_float
,
name
=
'scalar_softplus'
)
softplus
=
elemwise
.
Elemwise
(
scalar_softplus
,
name
=
'softplus'
)
pprint
.
assign
(
softplus
,
printing
.
FunctionPrinter
(
'softplus'
))
def
_skip_mul_1
(
r
):
if
r
.
owner
and
r
.
owner
.
op
==
tensor
.
mul
:
not_is_1
=
[
i
for
i
in
r
.
owner
.
inputs
if
not
_is_1
(
i
)
]
if
len
(
not_is_1
)
==
1
:
not_is_1
=
[
i
for
i
in
r
.
owner
.
inputs
if
not
_is_1
(
i
)]
if
len
(
not_is_1
)
==
1
:
return
not_is_1
[
0
]
logsigm_to_softplus
=
gof
.
PatternSub
(
(
tensor
.
log
,
(
sigmoid
,
'x'
)),
(
tensor
.
neg
,
(
softplus
,
(
tensor
.
neg
,
'x'
))),
allow_multiple_clients
=
True
,
allow_multiple_clients
=
True
,
skip_identities_fn
=
_skip_mul_1
)
...
...
@@ -139,21 +144,22 @@ def _is_1(expr):
log1msigm_to_softplus
=
gof
.
PatternSub
(
(
tensor
.
log
,
(
tensor
.
sub
,
dict
(
pattern
=
'y'
,
constraint
=
_is_1
),
dict
(
pattern
=
'y'
,
constraint
=
_is_1
),
(
sigmoid
,
'x'
))),
(
tensor
.
neg
,
(
softplus
,
'x'
)),
allow_multiple_clients
=
True
,
allow_multiple_clients
=
True
,
skip_identities_fn
=
_skip_mul_1
)
log1pexp_to_softplus
=
gof
.
PatternSub
(
(
tensor
.
log1p
,
(
tensor
.
exp
,
'x'
)),
(
softplus
,
'x'
),
allow_multiple_clients
=
True
)
allow_multiple_clients
=
True
)
opt
.
register_stabilize
(
logsigm_to_softplus
,
name
=
'logsigm_to_softplus'
)
opt
.
register_stabilize
(
log1msigm_to_softplus
,
name
=
'log1msigm_to_softplus'
)
opt
.
register_stabilize
(
log1pexp_to_softplus
,
name
=
'log1pexp_to_softplus'
)
opt
.
register_stabilize
(
logsigm_to_softplus
,
name
=
'logsigm_to_softplus'
)
opt
.
register_stabilize
(
log1msigm_to_softplus
,
name
=
'log1msigm_to_softplus'
)
opt
.
register_stabilize
(
log1pexp_to_softplus
,
name
=
'log1pexp_to_softplus'
)
def
is_1pexp
(
t
):
"""
...
...
@@ -247,7 +253,7 @@ def partition_num_or_denom(r, f):
else
:
neg_t
,
f_t
=
f_t
f_terms
.
append
(
f_t
)
neg
^=
neg_t
#
bit flip if neg_t is true
neg
^=
neg_t
#
bit flip if neg_t is true
return
f_terms
,
rest
,
neg
...
...
@@ -299,7 +305,8 @@ def local_exp_over_1_plus_exp(node):
#find all the exp() terms in the numerator
num
,
denom
=
node
.
inputs
num_exp_x
,
num_rest
,
num_neg
=
partition_num_or_denom
(
num
,
is_exp
)
denom_1pexp
,
denom_rest
,
denom_neg
=
partition_num_or_denom
(
denom
,
is_1pexp
)
denom_1pexp
,
denom_rest
,
denom_neg
=
partition_num_or_denom
(
denom
,
is_1pexp
)
sigmoids
=
[]
for
t
in
denom_1pexp
:
...
...
@@ -311,7 +318,7 @@ def local_exp_over_1_plus_exp(node):
# case: 1/(1+exp(x))
sigmoids
.
append
(
sigmoid
(
-
t
))
if
not
sigmoids
:
# we didn't find any. abort
if
not
sigmoids
:
# we didn't find any. abort
return
# put the new numerator together
new_num
=
sigmoids
+
[
tensor
.
exp
(
t
)
for
t
in
num_exp_x
]
+
num_rest
...
...
@@ -330,6 +337,7 @@ def local_exp_over_1_plus_exp(node):
else
:
return
[
new_num
/
tensor
.
mul
(
*
denom_rest
)]
def
parse_mul_tree
(
root
):
"""
Parse a tree of multiplications starting at the given root.
...
...
@@ -512,7 +520,7 @@ def perform_sigm_times_exp(tree, exp_x=None, exp_minus_x=None, sigm_x=None,
sigm_minus_x
=
[]
if
full_tree
is
None
:
full_tree
=
tree
if
False
:
# Debug code.
if
False
:
# Debug code.
print
'<perform_sigm_times_exp>'
print
' full_tree =
%
s'
%
full_tree
print
' tree =
%
s'
%
tree
...
...
@@ -621,10 +629,13 @@ def local_inv_1_plus_exp(node):
if
nonconsts
[
0
]
.
owner
and
nonconsts
[
0
]
.
owner
.
op
==
tensor
.
exp
:
if
scalars
and
numpy
.
allclose
(
numpy
.
sum
(
scalars
),
1
):
return
opt
.
_fill_chain
(
sigmoid
(
tensor
.
neg
(
nonconsts
[
0
]
.
owner
.
inputs
[
0
])),
sigmoid
(
tensor
.
neg
(
nonconsts
[
0
]
.
owner
.
inputs
[
0
])),
scalar_inputs
)
# Registration is below, and conditional.
@gof.local_optimizer
([
tensor
.
sub
])
def
local_1msigmoid
(
node
):
"""
...
...
@@ -633,7 +644,7 @@ def local_1msigmoid(node):
if
node
.
op
==
tensor
.
sub
:
sub_l
,
sub_r
=
node
.
inputs
if
len
(
sub_r
.
clients
)
>
1
:
return
# graph is using both sigm and 1-sigm
return
# graph is using both sigm and 1-sigm
if
sub_r
.
owner
and
sub_r
.
owner
.
op
==
sigmoid
:
try
:
val_l
=
opt
.
get_constant_value
(
sub_l
)
...
...
@@ -686,13 +697,14 @@ if 0:
assert
t0
.
owner
.
op
==
div
t0top
,
t0bot
=
t0
.
owner
.
inputs
t1top
,
t1bot
=
t1
.
owner
.
inputs
rval
.
append
(
div
(
mul
(
*
(
t0top
+
t1top
)),
mul
(
*
(
t0bot
+
t1bot
))))
rval
.
append
(
div
(
mul
(
*
(
t0top
+
t1top
)),
mul
(
*
(
t0bot
+
t1bot
))))
if
len
(
rval
)
>
100
:
# This loop can be exponentially long.
# aborting
return
[]
elif
len
(
node
.
outputs
)
>
1
:
elif
len
(
node
.
outputs
)
>
1
:
return
[]
else
:
return
[
node
.
outputs
[
0
]]
theano/tensor/randomstreams.py
浏览文件 @
7885e618
...
...
@@ -136,7 +136,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
"""
def
__init__
(
self
,
seed
=
None
,
no_warn
=
False
):
def
__init__
(
self
,
seed
=
None
,
no_warn
=
False
):
""":type seed: None or int
:param seed: a default seed to initialize the RandomState
...
...
@@ -146,7 +146,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
"""
if
not
no_warn
:
deprecation_warning
()
super
(
RandomStreams
,
self
)
.
__init__
(
no_warn
=
True
)
super
(
RandomStreams
,
self
)
.
__init__
(
no_warn
=
True
)
self
.
random_state_variables
=
[]
self
.
default_instance_seed
=
seed
...
...
theano/tensor/tests/test_basic.py
浏览文件 @
7885e618
This source diff could not be displayed because it is too large. You can
view the blob
instead.
theano/tensor/tests/test_elemwise.py
浏览文件 @
7885e618
...
...
@@ -47,7 +47,8 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
#test that DimShuffle.infer_shape work correctly
x
=
TensorType
(
'float64'
,
ib
)(
'x'
)
e
=
DimShuffle
(
ib
,
shuffle
)(
x
)
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
],
[
e
.
shape
]))
.
make_function
()
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
],
[
e
.
shape
]))
.
make_function
()
assert
all
(
f
(
numpy
.
ones
(
xsh
)))
==
all
(
zsh
)
# Test when we drop a axis that is not broadcastable
...
...
@@ -125,7 +126,8 @@ class test_Broadcast(unittest.TestCase):
x
=
TensorType
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
y
=
TensorType
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
e
=
Elemwise
(
scalar
.
add
)(
x
,
y
)
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
.
shape
]))
.
make_function
()
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
.
shape
]))
.
make_function
()
assert
tuple
(
f
(
xv
,
yv
))
==
tuple
(
zv
.
shape
)
def
with_linker_inplace
(
self
,
linker
):
...
...
@@ -154,7 +156,8 @@ class test_Broadcast(unittest.TestCase):
x
=
TensorType
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
y
=
TensorType
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
e
=
Elemwise
(
scalar
.
Add
(
scalar
.
transfer_type
(
0
)),
{
0
:
0
})(
x
,
y
)
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
.
shape
]))
.
make_function
()
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
.
shape
]))
.
make_function
()
xv
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
xsh
))
yv
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
ysh
))
zv
=
xv
+
yv
...
...
@@ -349,7 +352,8 @@ class test_CAReduce(unittest_tools.InferShapeTester):
e
=
tensor_op
(
x
,
axis
=
tosum
)
if
tosum
is
None
:
tosum
=
range
(
len
(
xsh
))
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
],
[
e
.
shape
]))
.
make_function
()
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
],
[
e
.
shape
]))
.
make_function
()
if
not
(
scalar_op
in
[
scalar
.
maximum
,
scalar
.
minimum
]
and
((
xsh
==
()
or
numpy
.
prod
(
xsh
)
==
0
))):
assert
all
(
f
(
xv
)
==
zv
.
shape
)
...
...
@@ -459,7 +463,8 @@ class test_Prod(unittest.TestCase):
# including zeros, as the case with zeros is important
# (and special cases: 1 zero in the row, more than 1 zero in the row)
x_val
=
numpy
.
asarray
([[
1
,
2
,
3
],[
4
,
5
,
6
],[
7
,
8
,
9
]],
dtype
=
'float32'
)
x_val
=
numpy
.
asarray
([[
1
,
2
,
3
],
[
4
,
5
,
6
],
[
7
,
8
,
9
]],
dtype
=
'float32'
)
x
=
theano
.
tensor
.
dmatrix
()
# now with verify_grad
unittest_tools
.
verify_grad
(
Prod
(
axis
=
1
),
[
x_val
],
mode
=
self
.
mode
)
...
...
@@ -471,26 +476,28 @@ class test_Prod(unittest.TestCase):
unittest_tools
.
verify_grad
(
fn
,
[
x_val
],
mode
=
self
.
mode
)
def
test_verify_grad_with_zeros
(
self
):
# including zeros, as the case with zeros is important
# (and special cases: 1 zero in the row, more than 1 zero in the row)
x_val
=
numpy
.
asarray
([[
1.
,
2.
,
3.
],[
0.
,
5.
,
6.
],[
0.
,
0.
,
9.
]],
dtype
=
'float32'
)
x_val
=
numpy
.
asarray
([[
1.
,
2.
,
3.
],
[
0.
,
5.
,
6.
],
[
0.
,
0.
,
9.
]],
dtype
=
'float32'
)
x
=
theano
.
tensor
.
dmatrix
()
# sanity check
x2
=
theano
.
tensor
.
dmatrix
()
p
=
Prod
(
axis
=
1
)(
x
)
p2
=
Prod
(
axis
=
1
)(
x2
)
fn
=
theano
.
function
([
x
,
x2
],[
p
-
p2
],
mode
=
self
.
mode
)
fn
=
theano
.
function
([
x
,
x2
],
[
p
-
p2
],
mode
=
self
.
mode
)
#print "hand computed diff for each row"
x2_val
=
numpy
.
asarray
([[
1.
,
2.
,
3.003
],
[
0.003
,
5.
,
6
],
[
0.
,
0.
,
9.01
]])
x2_val
=
numpy
.
asarray
([[
1.
,
2.
,
3.003
],
[
0.003
,
5.
,
6
],
[
0.
,
0.
,
9.01
]])
#print fn(x_val, x2_val)
fn2
=
theano
.
function
([
x
],[
theano
.
tensor
.
grad
(
p
.
sum
(),
x
)],
mode
=
self
.
mode
)
fn2
=
theano
.
function
([
x
],
[
theano
.
tensor
.
grad
(
p
.
sum
(),
x
)],
mode
=
self
.
mode
)
#print "real grad"
#print fn2(x_val)
fn3
=
theano
.
function
([
x
],[
p
],
mode
=
self
.
mode
)
assert
numpy
.
allclose
(
fn3
(
x_val
),
[
6.
,
0.
,
0.
])
fn3
=
theano
.
function
([
x
],
[
p
],
mode
=
self
.
mode
)
assert
numpy
.
allclose
(
fn3
(
x_val
),
[
6.
,
0.
,
0.
])
# now with verify_grad
unittest_tools
.
verify_grad
(
Prod
(
axis
=
1
),
[
x_val
],
mode
=
self
.
mode
)
...
...
@@ -511,10 +518,10 @@ class test_Prod(unittest.TestCase):
def
test_prod_without_zeros
(
self
):
x
=
theano
.
tensor
.
dmatrix
()
x_val
=
numpy
.
array
([[
1
,
2
,
3
],[
0
,
5
,
6
],[
0
,
0
,
9
]],
dtype
=
'float32'
)
x_val
=
numpy
.
array
([[
1
,
2
,
3
],
[
0
,
5
,
6
],
[
0
,
0
,
9
]],
dtype
=
'float32'
)
pwz
=
ProdWithoutZeros
(
axis
=
1
)(
x
)
fn
=
theano
.
function
([
x
],
pwz
,
mode
=
self
.
mode
)
assert
numpy
.
allclose
(
fn
(
x_val
),
[
6
,
30
,
9
])
assert
numpy
.
allclose
(
fn
(
x_val
),
[
6
,
30
,
9
])
pwz_a0
=
ProdWithoutZeros
(
axis
=
0
)(
x
)
fn_a0
=
theano
.
function
([
x
],
pwz_a0
,
mode
=
self
.
mode
)
...
...
@@ -522,25 +529,30 @@ class test_Prod(unittest.TestCase):
def
test_other_grad_tests
(
self
):
x
=
theano
.
tensor
.
dmatrix
()
x_val1
=
numpy
.
array
([[
1
,
2
,
3
],[
0
,
5
,
6
],[
0
,
0
,
9
]],
dtype
=
'float32'
)
x_val2
=
numpy
.
array
([[
1
,
2
,
0
],[
0
,
5
,
6
],[
7
,
8
,
9
],[
9
,
10
,
0
]],
dtype
=
'float32'
)
x_val1
=
numpy
.
array
([[
1
,
2
,
3
],
[
0
,
5
,
6
],
[
0
,
0
,
9
]],
dtype
=
'float32'
)
x_val2
=
numpy
.
array
([[
1
,
2
,
0
],
[
0
,
5
,
6
],
[
7
,
8
,
9
],
[
9
,
10
,
0
]],
dtype
=
'float32'
)
rng
=
rng
=
numpy
.
random
.
RandomState
(
43
)
p
=
Prod
(
axis
=
1
)
grad_p
=
theano
.
tensor
.
grad
(
p
(
x
)
.
sum
(),
x
)
grad_fn
=
theano
.
function
([
x
],
grad_p
,
mode
=
self
.
mode
)
assert
numpy
.
allclose
(
grad_fn
(
x_val1
),
[[
6.
,
3.
,
2.
],[
30.
,
0.
,
0.
],[
0.
,
0.
,
0.
]])
assert
numpy
.
allclose
(
grad_fn
(
x_val2
),
[[
0.
,
0.
,
2.
],
[
30.
,
0.
,
0.
],
[
72.
,
63.
,
56.
],
[
0.
,
0.
,
90.
]])
assert
numpy
.
allclose
(
grad_fn
(
x_val1
),
[[
6.
,
3.
,
2.
],
[
30.
,
0.
,
0.
],
[
0.
,
0.
,
0.
]])
assert
numpy
.
allclose
(
grad_fn
(
x_val2
),
[[
0.
,
0.
,
2.
],
[
30.
,
0.
,
0.
],
[
72.
,
63.
,
56.
],
[
0.
,
0.
,
90.
]])
p_axis0
=
Prod
(
axis
=
0
)
grad_p_axis0
=
theano
.
tensor
.
grad
(
p_axis0
(
x
)
.
sum
(),
x
)
grad_fn_axis0
=
theano
.
function
([
x
],
grad_p_axis0
,
mode
=
self
.
mode
)
assert
numpy
.
allclose
(
grad_fn_axis0
(
x_val2
),
[[
0.
,
400.
,
0.
],[
63.
,
160.
,
0.
],
[
0.
,
100.
,
0.
],
[
0.
,
80.
,
0.
]])
assert
numpy
.
allclose
(
grad_fn_axis0
(
x_val2
),
[[
0.
,
400.
,
0.
],
[
63.
,
160.
,
0.
],
[
0.
,
100.
,
0.
],
[
0.
,
80.
,
0.
]])
tensor
.
verify_grad
(
p
,
[
x_val1
],
rng
=
rng
,
mode
=
self
.
mode
)
def
test_mul_without_zeros_zeros
(
self
):
a
=
numpy
.
zeros
((
3
,
3
))
a
=
numpy
.
zeros
((
3
,
3
))
x
=
theano
.
tensor
.
dmatrix
()
...
...
@@ -655,6 +667,7 @@ class T_sum_dtype(unittest.TestCase):
idx
+=
1
class
T_mean_dtype
(
unittest
.
TestCase
):
def
test_mean_default_dtype
(
self
):
"""
...
...
@@ -710,6 +723,7 @@ class T_mean_dtype(unittest.TestCase):
idx
+=
1
class
T_prod_dtype
(
unittest
.
TestCase
):
def
test_prod_default_dtype
(
self
):
"""
...
...
@@ -761,6 +775,7 @@ class T_prod_dtype(unittest.TestCase):
idx
+=
1
class
T_prod_without_zeros_dtype
(
unittest
.
TestCase
):
def
test_prod_without_zeros_default_dtype
(
self
):
"""
...
...
@@ -844,11 +859,8 @@ if __name__ == '__main__':
"""
if
__name__
==
'__main__'
:
t
=
TestElemwise
(
'setUp'
)
t
.
setUp
()
t
.
test_infer_shape
()
theano/tensor/tests/test_naacl09.py
浏览文件 @
7885e618
...
...
@@ -12,15 +12,19 @@ import sys
from
theano.tests
import
unittest_tools
from
numpy.testing.noseclasses
import
KnownFailureTest
def
cross_entropy
(
target
,
output
,
axis
=
1
):
"""
@todo: This is essentially duplicated as tensor.nnet.binary_crossentropy
@warning: OUTPUT and TARGET are reversed in tensor.nnet.binary_crossentropy
"""
return
-
T
.
mean
(
target
*
T
.
log
(
output
)
+
(
1
-
target
)
*
T
.
log
(
1
-
output
),
axis
=
axis
)
def
quadratic
(
target
,
output
,
axis
=
1
):
return
T
.
mean
(
T
.
sqr
(
target
-
output
),
axis
=
axis
)
class
QuadraticDenoisingAA
(
module
.
Module
):
"""Quadratic de-noising Auto-encoder
...
...
@@ -35,15 +39,15 @@ class QuadraticDenoisingAA(module.Module):
"""
def
__init__
(
self
,
input
=
None
,
input
=
None
,
# regularize = False,
tie_weights
=
False
,
n_quadratic_filters
=
1
,
_w1
=
None
,
_w2
=
None
,
_b1
=
None
,
_b2
=
None
,
_qfilters
=
None
,
tie_weights
=
False
,
n_quadratic_filters
=
1
,
_w1
=
None
,
_w2
=
None
,
_b1
=
None
,
_b2
=
None
,
_qfilters
=
None
,
activation_function
=
NN
.
sigmoid
,
reconstruction_cost_function
=
cross_entropy
):
"""
...
...
@@ -83,7 +87,8 @@ class QuadraticDenoisingAA(module.Module):
# PARAMETERS
if
_qfilters
is
None
:
#self.qfilters = [theano.Member(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
self
.
qfilters
=
[(
T
.
dmatrix
(
'q
%
i'
%
i
))
for
i
in
xrange
(
n_quadratic_filters
)]
self
.
qfilters
=
[(
T
.
dmatrix
(
'q
%
i'
%
i
))
for
i
in
xrange
(
n_quadratic_filters
)]
else
:
#self.qfilters = [theano.Member(q) for q in _qfilters]
self
.
qfilters
=
[(
q
)
for
q
in
_qfilters
]
...
...
@@ -91,7 +96,8 @@ class QuadraticDenoisingAA(module.Module):
#self.w1 = theano.Member(T.matrix('w1')) if _w1 is None else theano.Member(_w1)
if
_w1
is
None
:
self
.
w1
=
(
T
.
matrix
(
'w1'
))
else
:
self
.
w1
=
(
_w1
)
else
:
self
.
w1
=
(
_w1
)
if
_w2
is
None
:
if
not
tie_weights
:
#self.w2 = theano.Member(T.matrix())
...
...
@@ -104,30 +110,30 @@ class QuadraticDenoisingAA(module.Module):
#self.b1 = theano.Member(T.vector('b1')) if _b1 is None else theano.Member(_b1)
if
_b1
is
None
:
self
.
b1
=
(
T
.
vector
(
'b1'
))
else
:
self
.
b1
=
(
_b1
)
else
:
self
.
b1
=
(
_b1
)
#self.b2 = theano.Member(T.vector('b2')) if _b2 is None else theano.Member(_b2)
if
_b2
is
None
:
self
.
b2
=
(
T
.
vector
(
'b2'
))
else
:
self
.
b2
=
(
_b2
)
else
:
self
.
b2
=
(
_b2
)
# # REGULARIZATION COST
# self.regularization = self.build_regularization()
### NOISELESS ###
# HIDDEN LAYER
def
_act
(
x
):
if
len
(
self
.
qfilters
)
>
0
:
qsum
=
10e-10
# helps to control the gradient in the square-root below
for
qf
in
self
.
qfilters
:
qsum
=
qsum
+
T
.
dot
(
x
,
qf
)
**
2
qsum
=
qsum
+
T
.
dot
(
x
,
qf
)
**
2
return
T
.
dot
(
x
,
self
.
w1
)
+
self
.
b1
+
T
.
sqrt
(
qsum
)
else
:
return
T
.
dot
(
x
,
self
.
w1
)
+
self
.
b1
self
.
hidden_activation
=
_act
(
self
.
input
)
#
noise-free hidden
self
.
hidden_activation
=
_act
(
self
.
input
)
#
noise-free hidden
self
.
hidden
=
self
.
hid_activation_function
(
self
.
hidden_activation
)
...
...
@@ -144,7 +150,6 @@ class QuadraticDenoisingAA(module.Module):
# if self.regularize:
# self.cost = self.cost + self.regularization
### WITH NOISE ###
self
.
corrupted_input
=
self
.
build_corrupted_input
()
...
...
@@ -165,7 +170,6 @@ class QuadraticDenoisingAA(module.Module):
# if self.regularize:
# self.ncost = self.ncost + self.regularization
# GRADIENTS AND UPDATES
if
self
.
tie_weights
:
self
.
params
=
[
self
.
w1
,
self
.
b1
,
self
.
b2
]
+
self
.
qfilters
...
...
@@ -173,7 +177,8 @@ class QuadraticDenoisingAA(module.Module):
self
.
params
=
[
self
.
w1
,
self
.
w2
,
self
.
b1
,
self
.
b2
]
+
self
.
qfilters
gradients
=
T
.
grad
(
self
.
ncost
,
self
.
params
)
updates
=
dict
((
p
,
p
-
self
.
lr
*
g
)
for
p
,
g
in
zip
(
self
.
params
,
gradients
))
updates
=
dict
((
p
,
p
-
self
.
lr
*
g
)
for
p
,
g
in
zip
(
self
.
params
,
gradients
))
# INTERFACE METHODS
#self.update = theano.Method(self.input, self.ncost, updates)
...
...
@@ -192,16 +197,17 @@ class QuadraticDenoisingAA(module.Module):
filter's initial range)
"""
if
(
input_size
is
None
)
^
(
hidden_size
is
None
):
raise
ValueError
(
"Must specify input_size and hidden_size or neither."
)
raise
ValueError
(
"Must specify input_size and hidden_size or neither."
)
super
(
QuadraticDenoisingAA
,
self
)
.
_instance_initialize
(
obj
,
{})
obj
.
random
.
initialize
()
R
=
N
.
random
.
RandomState
(
unittest_tools
.
fetch_seed
(
seed
))
if
input_size
is
not
None
:
sz
=
(
input_size
,
hidden_size
)
inf
=
1
/
N
.
sqrt
(
input_size
)
hif
=
1
/
N
.
sqrt
(
hidden_size
)
obj
.
w1
=
N
.
asarray
(
R
.
uniform
(
size
=
sz
,
low
=
-
inf
,
high
=
inf
),
inf
=
1
/
N
.
sqrt
(
input_size
)
hif
=
1
/
N
.
sqrt
(
hidden_size
)
obj
.
w1
=
N
.
asarray
(
R
.
uniform
(
size
=
sz
,
low
=-
inf
,
high
=
inf
),
dtype
=
config
.
floatX
)
if
not
self
.
tie_weights
:
obj
.
w2
=
N
.
asarray
(
...
...
@@ -257,14 +263,17 @@ class SigmoidXEQuadraticDenoisingAA(QuadraticDenoisingAA):
def
_instance_initialize
(
self
,
obj
,
input_size
,
hidden_size
,
noise_level
,
seed
,
lr
,
qfilter_relscale
):
# obj.l2_coef = 0.0
obj
.
noise_level
=
N
.
asarray
(
noise_level
,
dtype
=
config
.
floatX
)
super
(
SigmoidXEQuadraticDenoisingAA
,
self
)
.
_instance_initialize
(
obj
,
input_size
,
hidden_size
,
seed
,
lr
,
qfilter_relscale
)
super
(
SigmoidXEQuadraticDenoisingAA
,
self
)
.
_instance_initialize
(
obj
,
input_size
,
hidden_size
,
seed
,
lr
,
qfilter_relscale
)
QDAA
=
SigmoidXEQuadraticDenoisingAA
class
Loss01
(
object
):
def
loss_01
(
self
,
x
,
targ
):
return
N
.
mean
(
self
.
classify
(
x
)
!=
targ
)
class
Module_Nclass
(
module
.
FancyModule
):
def
_instance_initialize
(
mod_self
,
self
,
n_in
,
n_out
,
lr
,
seed
):
#self.component is the LogisticRegressionTemplate instance that built this guy.
...
...
@@ -280,29 +289,34 @@ class Module_Nclass(module.FancyModule):
self
.
output_dimension
=
n_out
def
__init__
(
self
,
x
=
None
,
targ
=
None
,
w
=
None
,
b
=
None
,
lr
=
None
,
regularize
=
False
):
super
(
Module_Nclass
,
self
)
.
__init__
()
#
boilerplate
super
(
Module_Nclass
,
self
)
.
__init__
()
#
boilerplate
#self.x = module.Member(x) if x is not None else T.matrix('input')
if
x
is
not
None
:
self
.
x
=
(
x
)
else
:
self
.
x
=
T
.
matrix
(
'input'
)
else
:
self
.
x
=
T
.
matrix
(
'input'
)
#self.targ = module.Member(targ) if targ is not None else T.lvector()
if
targ
is
not
None
:
self
.
targ
=
(
targ
)
else
:
self
.
targ
=
T
.
lvector
()
else
:
self
.
targ
=
T
.
lvector
()
#self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
if
w
is
not
None
:
self
.
w
=
(
w
)
else
:
self
.
w
=
(
T
.
dmatrix
())
else
:
self
.
w
=
(
T
.
dmatrix
())
#self.b = module.Member(b) if b is not None else module.Member(T.dvector())
if
b
is
not
None
:
self
.
b
=
(
b
)
else
:
self
.
b
=
(
T
.
dvector
())
else
:
self
.
b
=
(
T
.
dvector
())
#self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
if
lr
is
not
None
:
self
.
lr
=
(
lr
)
else
:
self
.
lr
=
(
T
.
dscalar
())
else
:
self
.
lr
=
(
T
.
dscalar
())
self
.
params
=
[
p
for
p
in
[
self
.
w
,
self
.
b
]
if
p
.
owner
is
None
]
...
...
@@ -341,13 +355,14 @@ class Module_Nclass(module.FancyModule):
#self.update = module.Method([self.input, self.targ], sum_xent,
#updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
class
ConvolutionalMLP
(
module
.
FancyModule
):
def
__init__
(
self
,
window_size
,
n_quadratic_filters
,
activation_function
,
reconstruction_cost_function
,
tie_weights
=
False
,
tie_weights
=
False
,
# _input,
# _targ
):
...
...
@@ -362,9 +377,9 @@ class ConvolutionalMLP(module.FancyModule):
self
.
input_representations
=
[]
self
.
input_representations
.
append
(
QDAA
(
input
=
self
.
inputs
[
0
],
tie_weights
=
tie_weights
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
tie_weights
=
tie_weights
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
reconstruction_cost_function
=
reconstruction_cost_function
)
)
...
...
@@ -373,9 +388,9 @@ class ConvolutionalMLP(module.FancyModule):
self
.
input_representations
.
append
(
QDAA
(
input
=
i
,
tie_weights
=
tie_weights
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
tie_weights
=
tie_weights
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
reconstruction_cost_function
=
reconstruction_cost_function
,
_w1
=
self
.
input_representations
[
0
]
.
w1
,
_w2
=
self
.
input_representations
[
0
]
.
w2
,
...
...
@@ -384,14 +399,16 @@ class ConvolutionalMLP(module.FancyModule):
_qfilters
=
self
.
input_representations
[
0
]
.
qfilters
)
)
assert
self
.
input_representations
[
-
1
]
.
w1
is
self
.
input_representations
[
0
]
.
w1
assert
self
.
input_representations
[
-
1
]
.
w1
is
self
.
input_representations
[
0
]
.
w1
self
.
input_representation
=
T
.
concatenate
([
i
.
hidden
for
i
in
self
.
input_representations
],
axis
=
1
)
self
.
input_representation
=
T
.
concatenate
([
i
.
hidden
for
i
in
self
.
input_representations
],
axis
=
1
)
self
.
hidden
=
QDAA
(
input
=
self
.
input_representation
,
tie_weights
=
tie_weights
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
input
=
self
.
input_representation
,
tie_weights
=
tie_weights
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
reconstruction_cost_function
=
reconstruction_cost_function
)
self
.
output
=
Module_Nclass
(
x
=
self
.
hidden
.
hidden
,
targ
=
self
.
targ
)
...
...
@@ -408,11 +425,13 @@ class ConvolutionalMLP(module.FancyModule):
self
.
hidden
.
b1
,
self
.
hidden
.
b2
]
+
self
.
hidden
.
qfilters
input_pretraining_cost
=
sum
(
i
.
ncost
for
i
in
self
.
input_representations
)
input_pretraining_cost
=
sum
(
i
.
ncost
for
i
in
self
.
input_representations
)
hidden_pretraining_cost
=
self
.
hidden
.
ncost
input_pretraining_gradients
=
T
.
grad
(
input_pretraining_cost
,
input_pretraining_params
)
hidden_pretraining_gradients
=
T
.
grad
(
hidden_pretraining_cost
,
hidden_pretraining_params
)
hidden_pretraining_gradients
=
T
.
grad
(
hidden_pretraining_cost
,
hidden_pretraining_params
)
pretraining_updates
=
\
dict
((
p
,
p
-
self
.
lr
*
g
)
for
p
,
g
in
\
zip
(
input_pretraining_params
,
input_pretraining_gradients
)
\
...
...
@@ -428,8 +447,10 @@ class ConvolutionalMLP(module.FancyModule):
[
self
.
output
.
w
,
self
.
output
.
b
]
finetuning_cost
=
self
.
output
.
cost
finetuning_gradients
=
T
.
grad
(
finetuning_cost
,
finetuning_params
)
finetuning_updates
=
dict
((
p
,
p
-
self
.
lr
*
g
)
for
p
,
g
in
zip
(
finetuning_params
,
finetuning_gradients
))
self
.
finetuning_update
=
module
.
Method
(
self
.
inputs
+
[
self
.
targ
],
self
.
output
.
cost
,
finetuning_updates
)
finetuning_updates
=
dict
((
p
,
p
-
self
.
lr
*
g
)
for
p
,
g
in
zip
(
finetuning_params
,
finetuning_gradients
))
self
.
finetuning_update
=
module
.
Method
(
self
.
inputs
+
[
self
.
targ
],
self
.
output
.
cost
,
finetuning_updates
)
#self.validate = module.Method(self.inputs + [self.targ], [self.output.cost, self.output.argmax, self.output.max_pr])
#self.softmax_output = module.Method(self.inputs, self.output.softmax_unsupervised)
...
...
@@ -447,8 +468,10 @@ class ConvolutionalMLP(module.FancyModule):
# for layer in obj.layers:
# if layer.lr is None:
# layer.lr = lr
assert
self
.
input_representations
[
-
1
]
is
not
self
.
input_representations
[
0
]
assert
self
.
input_representations
[
-
1
]
.
w1
is
self
.
input_representations
[
0
]
.
w1
assert
self
.
input_representations
[
-
1
]
is
not
self
.
input_representations
[
0
]
assert
self
.
input_representations
[
-
1
]
.
w1
is
self
.
input_representations
[
0
]
.
w1
for
i
in
self
.
input_representations
:
# i.initialize(input_size=self.input_size, hidden_size=self.input_representation_size, seed=R.random_integers(2**30), noise_level=noise_level, qfilter_relscale=qfilter_relscale)
...
...
@@ -465,13 +488,16 @@ class ConvolutionalMLP(module.FancyModule):
assert
(
i
.
w2
==
self
.
input_representations
[
0
]
.
w2
)
.
all
()
assert
(
i
.
b1
==
self
.
input_representations
[
0
]
.
b1
)
.
all
()
assert
(
i
.
b2
==
self
.
input_representations
[
0
]
.
b2
)
.
all
()
assert
N
.
all
((
a
==
b
)
.
all
()
for
a
,
b
in
zip
(
i
.
qfilters
,
self
.
input_representations
[
0
]
.
qfilters
))
assert
N
.
all
((
a
==
b
)
.
all
()
for
a
,
b
in
zip
(
i
.
qfilters
,
self
.
input_representations
[
0
]
.
qfilters
))
self
.
hidden
.
initialize
(
input_size
=
(
len
(
self
.
inputs
)
*
self
.
input_representation_size
),
hidden_size
=
self
.
hidden_representation_size
,
noise_level
=
noise_level
,
seed
=
int
(
R
.
random_integers
(
2
**
30
)),
lr
=
lr
,
qfilter_relscale
=
qfilter_relscale
)
self
.
output
.
initialize
(
n_in
=
self
.
hidden_representation_size
,
n_out
=
self
.
output_size
,
lr
=
lr
,
seed
=
R
.
random_integers
(
2
**
30
))
self
.
output
.
initialize
(
n_in
=
self
.
hidden_representation_size
,
n_out
=
self
.
output_size
,
lr
=
lr
,
seed
=
R
.
random_integers
(
2
**
30
))
def
create
(
window_size
=
3
,
input_dimension
=
9
,
...
...
@@ -488,22 +514,24 @@ def create(window_size=3,
activation_function
=
T
.
tanh
architecture
=
ConvolutionalMLP
(
\
window_size
=
window_size
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
reconstruction_cost_function
=
quadratic
,
tie_weights
=
False
window_size
=
window_size
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
reconstruction_cost_function
=
quadratic
,
tie_weights
=
False
)
backup
=
config
.
warn
.
sum_div_dimshuffle_bug
config
.
warn
.
sum_div_dimshuffle_bug
=
False
try
:
model
=
architecture
.
make
(
input_size
=
input_dimension
,
input_representation_size
=
token_representation_size
,
hidden_representation_size
=
concatenated_representation_size
,
output_size
=
output_vocabsize
,
lr
=
lr
,
seed
=
seed
,
noise_level
=
noise_level
,
qfilter_relscale
=
qfilter_relscale
,
mode
=
compile_mode
)
model
=
architecture
.
make
(
input_size
=
input_dimension
,
input_representation_size
=
token_representation_size
,
hidden_representation_size
=
concatenated_representation_size
,
output_size
=
output_vocabsize
,
lr
=
lr
,
seed
=
seed
,
noise_level
=
noise_level
,
qfilter_relscale
=
qfilter_relscale
,
mode
=
compile_mode
)
finally
:
config
.
warn
.
sum_div_dimshuffle_bug
=
backup
return
model
def
create_realistic
(
window_size
=
3
,
#7,
def
create_realistic
(
window_size
=
3
,
# 7,
input_dimension
=
200
,
output_vocabsize
=
23
,
n_quadratic_filters
=
2
,
...
...
@@ -518,15 +546,17 @@ def create_realistic(window_size=3,#7,
activation_function
=
T
.
tanh
architecture
=
ConvolutionalMLP
(
\
window_size
=
window_size
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
reconstruction_cost_function
=
quadratic
,
tie_weights
=
False
window_size
=
window_size
,
n_quadratic_filters
=
n_quadratic_filters
,
activation_function
=
activation_function
,
reconstruction_cost_function
=
quadratic
,
tie_weights
=
False
)
model
=
architecture
.
make
(
input_size
=
input_dimension
,
input_representation_size
=
token_representation_size
,
hidden_representation_size
=
concatenated_representation_size
,
output_size
=
output_vocabsize
,
lr
=
lr
,
seed
=
seed
,
noise_level
=
noise_level
,
qfilter_relscale
=
qfilter_relscale
,
mode
=
compile_mode
)
model
=
architecture
.
make
(
input_size
=
input_dimension
,
input_representation_size
=
token_representation_size
,
hidden_representation_size
=
concatenated_representation_size
,
output_size
=
output_vocabsize
,
lr
=
lr
,
seed
=
seed
,
noise_level
=
noise_level
,
qfilter_relscale
=
qfilter_relscale
,
mode
=
compile_mode
)
return
model
def
test_naacl_model
(
iters_per_unsup
=
3
,
iters_per_sup
=
3
,
optimizer
=
None
,
realistic
=
False
):
#print "BUILDING MODEL"
...
...
@@ -535,11 +565,12 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
if
optimizer
:
mode
=
theano
.
Mode
(
linker
=
'c|py'
,
optimizer
=
optimizer
)
else
:
mode
=
get_default_mode
()
else
:
mode
=
get_default_mode
()
if
mode
.
__class__
.
__name__
==
'DebugMode'
:
iters_per_unsup
=
1
iters_per_sup
=
1
iters_per_unsup
=
1
iters_per_sup
=
1
if
realistic
:
m
=
create_realistic
(
compile_mode
=
mode
)
...
...
@@ -552,7 +583,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
for
i
,
node
in
enumerate
(
m
.
pretraining_update
.
maker
.
fgraph
.
toposort
()):
idx_of_node
[
node
]
=
i
if
False
and
i
>
-
1
:
print
' '
,
i
,
node
,
[(
ii
,
idx_of_node
.
get
(
ii
.
owner
,
'IN'
))
for
ii
in
node
.
inputs
]
print
' '
,
i
,
node
,
[(
ii
,
idx_of_node
.
get
(
ii
.
owner
,
'IN'
))
for
ii
in
node
.
inputs
]
prog_str
.
append
(
str
(
node
))
#print input_pretraining_gradients[4].owner.inputs
#print input_pretraining_gradients[4].owner.inputs[1].owner.inputs
...
...
@@ -562,8 +594,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
rng
=
N
.
random
.
RandomState
(
unittest_tools
.
fetch_seed
(
23904
))
inputs
=
[
rng
.
rand
(
10
,
m
.
input_size
)
for
i
in
1
,
2
,
3
]
targets
=
N
.
asarray
([
0
,
3
,
4
,
2
,
3
,
4
,
4
,
2
,
1
,
0
])
inputs
=
[
rng
.
rand
(
10
,
m
.
input_size
)
for
i
in
1
,
2
,
3
]
targets
=
N
.
asarray
([
0
,
3
,
4
,
2
,
3
,
4
,
4
,
2
,
1
,
0
])
#print inputs
#print 'UNSUPERVISED PHASE'
...
...
@@ -584,8 +616,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
s0
,
s1
=
[
str
(
j
)
for
j
in
m
.
pretraining_update
(
*
inputs
)]
#print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1
if
iters_per_unsup
==
3
:
assert
s0
.
startswith
(
'0.927793'
)
#
'0.403044')
assert
s1
.
startswith
(
'0.068035'
)
#
'0.074898')
assert
s0
.
startswith
(
'0.927793'
)
#
'0.403044')
assert
s1
.
startswith
(
'0.068035'
)
#
'0.074898')
#print 'UNSUPERVISED took %.3fs'%(time.time() - t)
#print 'FINETUNING GRAPH'
...
...
@@ -601,6 +633,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
assert
19.7042
<
s0f
and
s0f
<
19.7043
#print 'SUPERVISED took %.3fs'%( time.time() - t)
def
jtest_main
():
from
theano
import
gof
JTEST
=
theano
.
compile
.
mode
.
optdb
.
query
(
*
sys
.
argv
[
2
:])
...
...
@@ -609,13 +642,17 @@ def jtest_main():
optimizer
=
eval
(
sys
.
argv
[
1
])
test_naacl_model
(
optimizer
,
10
,
10
,
realistic
=
False
)
def
real_main
():
test_naacl_model
()
def
profile_main
():
# This is the main function for profiling
# We've renamed our original main() above to real_main()
import
cProfile
,
pstats
,
StringIO
import
cProfile
import
pstats
import
StringIO
prof
=
cProfile
.
Profile
()
prof
=
prof
.
runctx
(
"real_main()"
,
globals
(),
locals
())
stream
=
StringIO
.
StringIO
()
...
...
theano/tensor/tests/test_opt.py
浏览文件 @
7885e618
This source diff could not be displayed because it is too large. You can
view the blob
instead.
theano/tests/test_gradient.py
浏览文件 @
7885e618
...
...
@@ -26,6 +26,7 @@ class testgrad_sources_inputs(unittest.TestCase):
inputs
=
[
theano
.
tensor
.
vector
()]
outputs
=
[
theano
.
tensor
.
vector
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inp
,
grads
):
x
,
=
inp
gz
,
=
grads
...
...
@@ -43,14 +44,15 @@ class testgrad_sources_inputs(unittest.TestCase):
def
make_node
(
self
,
*
inputs
):
outputs
=
[
theano
.
tensor
.
vector
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inputs
,
grads
):
return
[
inputs
[
0
]
.
zeros_like
()
]
return
[
inputs
[
0
]
.
zeros_like
()
]
i
=
theano
.
tensor
.
vector
()
j
=
theano
.
tensor
.
vector
()
a1
=
retOne
()
.
make_node
(
i
)
g
=
grad_sources_inputs
([(
a1
.
out
,
one
)],
None
)
a2
=
retOne
()
.
make_node
(
i
,
j
)
a2
=
retOne
()
.
make_node
(
i
,
j
)
try
:
g
=
grad_sources_inputs
([(
a2
.
out
,
one
)],
None
)
except
ValueError
,
e
:
...
...
@@ -60,11 +62,13 @@ class testgrad_sources_inputs(unittest.TestCase):
def
test_1in_1out
(
self
):
"""Test grad is called correctly for a 1-to-1 op"""
gval
=
theano
.
tensor
.
matrix
()
class
O
(
gof
.
op
.
Op
):
def
make_node
(
self
):
inputs
=
[
theano
.
tensor
.
matrix
()]
outputs
=
[
theano
.
tensor
.
matrix
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inp
,
grads
):
return
gval
,
a1
=
O
()
.
make_node
()
...
...
@@ -74,11 +78,13 @@ class testgrad_sources_inputs(unittest.TestCase):
def
test_1in_Nout
(
self
):
"""Test grad is called correctly for a 1-to-many op"""
gval
=
theano
.
tensor
.
matrix
()
class
O
(
gof
.
op
.
Op
):
def
make_node
(
self
):
inputs
=
[
theano
.
tensor
.
matrix
()]
outputs
=
[
theano
.
tensor
.
scalar
(),
theano
.
tensor
.
scalar
()]
outputs
=
[
theano
.
tensor
.
scalar
(),
theano
.
tensor
.
scalar
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inp
,
grads
):
x
,
=
inp
gz1
,
gz2
=
grads
...
...
@@ -91,11 +97,13 @@ class testgrad_sources_inputs(unittest.TestCase):
"""Test grad is called correctly for a many-to-1 op"""
gval0
=
theano
.
tensor
.
scalar
()
gval1
=
theano
.
tensor
.
scalar
()
class
O
(
gof
.
op
.
Op
):
def
make_node
(
self
):
inputs
=
[
theano
.
tensor
.
scalar
(),
theano
.
tensor
.
scalar
()]
outputs
=
[
theano
.
tensor
.
matrix
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inp
,
grads
):
x0
,
x1
=
inp
gz
,
=
grads
...
...
@@ -109,11 +117,13 @@ class testgrad_sources_inputs(unittest.TestCase):
"""Test grad is called correctly for a many-to-many op"""
gval0
=
theano
.
tensor
.
matrix
()
gval1
=
theano
.
tensor
.
matrix
()
class
O
(
gof
.
op
.
Op
):
def
make_node
(
self
):
inputs
=
[
theano
.
tensor
.
matrix
(),
theano
.
tensor
.
matrix
()]
outputs
=
[
theano
.
tensor
.
matrix
(),
theano
.
tensor
.
matrix
()]
inputs
=
[
theano
.
tensor
.
matrix
(),
theano
.
tensor
.
matrix
()]
outputs
=
[
theano
.
tensor
.
matrix
(),
theano
.
tensor
.
matrix
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inp
,
grads
):
return
gval0
,
gval1
a1
=
O
()
.
make_node
()
...
...
@@ -126,9 +136,11 @@ class testgrad_sources_inputs(unittest.TestCase):
class
O
(
gof
.
op
.
Op
):
def
__init__
(
self
,
tst
):
self
.
tst
=
tst
def
make_node
(
self
,
*
inputs
):
outputs
=
[
theano
.
tensor
.
matrix
(),
theano
.
tensor
.
matrix
()]
outputs
=
[
theano
.
tensor
.
matrix
(),
theano
.
tensor
.
matrix
()]
return
gof
.
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inputs
,
g_out
):
return
[
one
]
i
=
theano
.
tensor
.
matrix
()
...
...
@@ -136,26 +148,29 @@ class testgrad_sources_inputs(unittest.TestCase):
g
=
grad_sources_inputs
([(
a1
.
outputs
[
0
],
one
)],
None
)
self
.
assertTrue
(
g
[
i
]
is
one
)
def
test_unimplemented_grad_func
():
# tests that function compilation catches unimplemented grads in the graph
a
=
theano
.
tensor
.
vector
()
b
=
theano
.
gradient
.
grad_not_implemented
(
theano
.
tensor
.
add
,
0
,
a
)
try
:
f
=
theano
.
function
([
a
],
b
,
on_unused_input
=
'ignore'
)
f
=
theano
.
function
([
a
],
b
,
on_unused_input
=
'ignore'
)
assert
0
except
TypeError
:
pass
def
test_undefined_grad_func
():
#tests that function compilation catches undefined grads in the graph
a
=
theano
.
tensor
.
vector
()
b
=
theano
.
gradient
.
grad_undefined
(
theano
.
tensor
.
add
,
0
,
a
)
try
:
f
=
theano
.
function
([
a
],
b
,
on_unused_input
=
'ignore'
)
f
=
theano
.
function
([
a
],
b
,
on_unused_input
=
'ignore'
)
assert
0
except
TypeError
:
pass
def
test_unimplemented_grad_grad
():
#tests that unimplemented grads are caught in the grad method
...
...
@@ -164,132 +179,137 @@ def test_unimplemented_grad_grad():
return
gof
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
def
grad
(
self
,
inputs
,
output_grads
):
return
[
theano
.
gradient
.
grad_not_implemented
(
self
,
0
,
inputs
[
0
])
]
return
[
theano
.
gradient
.
grad_not_implemented
(
self
,
0
,
inputs
[
0
])
]
a
=
theano
.
tensor
.
scalar
()
b
=
DummyOp
()(
a
)
try
:
g
=
theano
.
gradient
.
grad
(
b
,
a
)
g
=
theano
.
gradient
.
grad
(
b
,
a
)
assert
False
except
TypeError
:
pass
def
test_undefined_grad_grad
():
#tests that undefined grads are caught in the grad method
V
=
theano
.
tensor
.
TensorType
(
dtype
=
config
.
floatX
,
broadcastable
=
(
False
,
False
,
False
,
False
,
False
))()
broadcastable
=
(
False
,
False
,
False
,
False
,
False
))()
W
=
theano
.
tensor
.
TensorType
(
dtype
=
config
.
floatX
,
broadcastable
=
(
False
,
False
,
False
,
False
,
False
))()
broadcastable
=
(
False
,
False
,
False
,
False
,
False
))()
b
=
theano
.
tensor
.
vector
()
d
=
theano
.
tensor
.
ivector
()
Z
=
conv3D
(
V
,
W
,
b
,
d
)
Z
=
conv3D
(
V
,
W
,
b
,
d
)
try
:
g
=
theano
.
gradient
.
grad
(
Z
.
sum
(),
d
)
g
=
theano
.
gradient
.
grad
(
Z
.
sum
(),
d
)
assert
False
except
TypeError
:
pass
def
test_grad_name
():
A
=
theano
.
tensor
.
matrix
(
'A'
)
x
=
theano
.
tensor
.
vector
(
'x'
)
f
=
theano
.
tensor
.
dot
(
x
,
theano
.
tensor
.
dot
(
A
,
x
))
f
=
theano
.
tensor
.
dot
(
x
,
theano
.
tensor
.
dot
(
A
,
x
))
f
.
name
=
'f'
g
=
theano
.
tensor
.
grad
(
f
,
x
)
g
=
theano
.
tensor
.
grad
(
f
,
x
)
assert
g
.
name
==
'(df/dx)'
def
test_grad_duplicate_input
():
#test that the grad works when a variable
#appears in more than one place in a node's input list
def
output
(
x
):
return
(
x
*
x
)
return
(
x
*
x
)
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
vx
=
rng
.
randn
(
2
)
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,[
vx
])
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,
[
vx
])
def
test_grad_quadratic
():
#test the gradient on a tiny graph
def
cost
(
x
,
A
):
return
theano
.
tensor
.
dot
(
x
,
theano
.
tensor
.
dot
(
A
,
x
))
def
cost
(
x
,
A
):
return
theano
.
tensor
.
dot
(
x
,
theano
.
tensor
.
dot
(
A
,
x
))
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
vx
=
rng
.
randn
(
2
)
vA
=
rng
.
randn
(
2
,
2
)
vA
=
rng
.
randn
(
2
,
2
)
theano
.
tests
.
unittest_tools
.
verify_grad
(
cost
,
[
vx
,
vA
])
theano
.
tests
.
unittest_tools
.
verify_grad
(
cost
,
[
vx
,
vA
])
def
test_grad_quadratic_vector
():
#test the gradient on a small graph
def
output
(
x
,
A
):
return
theano
.
tensor
.
dot
(
x
*
x
,
A
)
def
output
(
x
,
A
):
return
theano
.
tensor
.
dot
(
x
*
x
,
A
)
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
vx
=
rng
.
randn
(
2
)
vA
=
rng
.
randn
(
2
,
2
)
vA
=
rng
.
randn
(
2
,
2
)
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,
[
vx
,
vA
])
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,
[
vx
,
vA
])
def
test_grad_cubic
():
#test the gradient on a bigger graph
def
cost
(
x
,
A
):
return
theano
.
tensor
.
dot
(
x
*
x
,
theano
.
tensor
.
dot
(
A
,
x
))
def
cost
(
x
,
A
):
return
theano
.
tensor
.
dot
(
x
*
x
,
theano
.
tensor
.
dot
(
A
,
x
))
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
vx
=
rng
.
randn
(
2
)
vA
=
rng
.
randn
(
2
,
2
)
vA
=
rng
.
randn
(
2
,
2
)
theano
.
tests
.
unittest_tools
.
verify_grad
(
cost
,
[
vx
,
vA
])
theano
.
tests
.
unittest_tools
.
verify_grad
(
cost
,[
vx
,
vA
])
def
test_grad_grad_quadratic
():
#test the gradient on a graph constructed using the gradient
def
output
(
x
,
A
):
orig_cost
=
theano
.
tensor
.
dot
(
x
,
theano
.
tensor
.
dot
(
A
,
x
))
def
output
(
x
,
A
):
orig_cost
=
theano
.
tensor
.
dot
(
x
,
theano
.
tensor
.
dot
(
A
,
x
))
return
theano
.
gradient
.
grad
(
orig_cost
,
x
)
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
vx
=
rng
.
randn
(
2
)
vA
=
rng
.
randn
(
2
,
2
)
vA
=
rng
.
randn
(
2
,
2
)
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,
[
vx
,
vA
])
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,[
vx
,
vA
])
def
test_grad_grad_cubic
():
#test the gradient on a bigger graph constructed using the gradient
def
output
(
x
,
A
):
orig_cost
=
theano
.
tensor
.
dot
(
x
*
x
,
theano
.
tensor
.
dot
(
A
,
x
))
def
output
(
x
,
A
):
orig_cost
=
theano
.
tensor
.
dot
(
x
*
x
,
theano
.
tensor
.
dot
(
A
,
x
))
return
theano
.
gradient
.
grad
(
orig_cost
,
x
)
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
rng
=
np
.
random
.
RandomState
([
2012
,
8
,
28
])
vx
=
rng
.
randn
(
2
)
vA
=
rng
.
randn
(
2
,
2
)
vA
=
rng
.
randn
(
2
,
2
)
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,
[
vx
,
vA
])
theano
.
tests
.
unittest_tools
.
verify_grad
(
output
,[
vx
,
vA
])
def
test_grad_int
():
...
...
@@ -300,11 +320,11 @@ def test_grad_int():
b
=
theano
.
tensor
.
vector
()
def
make_grad_func
(
X
):
Z
=
theano
.
tensor
.
dot
(
X
,
W
)
+
b
Z
=
theano
.
tensor
.
dot
(
X
,
W
)
+
b
H
=
theano
.
tensor
.
nnet
.
sigmoid
(
Z
)
cost
=
H
.
sum
()
g
=
gradient
.
grad
(
cost
,
X
)
return
theano
.
function
([
X
,
W
,
b
],
g
,
on_unused_input
=
'ignore'
)
g
=
gradient
.
grad
(
cost
,
X
)
return
theano
.
function
([
X
,
W
,
b
],
g
,
on_unused_input
=
'ignore'
)
int_func
=
make_grad_func
(
theano
.
tensor
.
imatrix
())
#we have to use float64 as the float type to get the results to match
...
...
@@ -314,17 +334,17 @@ def test_grad_int():
m
=
5
d
=
3
n
=
4
rng
=
np
.
random
.
RandomState
([
2012
,
9
,
5
])
rng
=
np
.
random
.
RandomState
([
2012
,
9
,
5
])
int_type
=
theano
.
tensor
.
imatrix
()
.
dtype
float_type
=
'float64'
X
=
np
.
cast
[
int_type
](
rng
.
randn
(
m
,
d
)
*
127.
)
W
=
np
.
cast
[
W
.
dtype
](
rng
.
randn
(
d
,
n
))
X
=
np
.
cast
[
int_type
](
rng
.
randn
(
m
,
d
)
*
127.
)
W
=
np
.
cast
[
W
.
dtype
](
rng
.
randn
(
d
,
n
))
b
=
np
.
cast
[
b
.
dtype
](
rng
.
randn
(
n
))
int_result
=
int_func
(
X
,
W
,
b
)
float_result
=
float_func
(
np
.
cast
[
float_type
](
X
),
W
,
b
)
int_result
=
int_func
(
X
,
W
,
b
)
float_result
=
float_func
(
np
.
cast
[
float_type
](
X
),
W
,
b
)
assert
np
.
allclose
(
int_result
,
float_result
)
...
...
@@ -333,23 +353,23 @@ def test_grad_disconnected():
#tests corner cases of gradient for shape and alloc
x
=
theano
.
tensor
.
vector
(
name
=
'x'
)
x
=
theano
.
tensor
.
vector
(
name
=
'x'
)
total
=
x
.
sum
()
total
.
name
=
'total'
num_elements
=
x
.
shape
[
0
]
num_elements
.
name
=
'num_elements'
silly_vector
=
theano
.
tensor
.
alloc
(
total
/
num_elements
,
num_elements
)
silly_vector
=
theano
.
tensor
.
alloc
(
total
/
num_elements
,
num_elements
)
silly_vector
.
name
=
'silly_vector'
cost
=
silly_vector
.
sum
()
cost
.
name
=
'cost'
#note that cost simplifies to be the same as "total"
g
=
gradient
.
grad
(
cost
,
x
,
add_names
=
False
)
g
=
gradient
.
grad
(
cost
,
x
,
add_names
=
False
)
#we still need to pass in x because it determines the shape of the output
f
=
theano
.
function
([
x
],
g
)
rng
=
np
.
random
.
RandomState
([
2012
,
9
,
5
])
f
=
theano
.
function
([
x
],
g
)
rng
=
np
.
random
.
RandomState
([
2012
,
9
,
5
])
x
=
np
.
cast
[
x
.
dtype
](
rng
.
randn
(
3
))
g
=
f
(
x
)
assert
np
.
allclose
(
g
,
np
.
ones
(
x
.
shape
,
dtype
=
x
.
dtype
))
assert
np
.
allclose
(
g
,
np
.
ones
(
x
.
shape
,
dtype
=
x
.
dtype
))
def
test_disconnected_nan
():
...
...
@@ -361,27 +381,27 @@ def test_disconnected_nan():
class
Op1
(
theano
.
gof
.
Op
):
def
make_node
(
self
,
x
):
return
theano
.
Apply
(
self
,
inputs
=
[
x
],
outputs
=
[
x
.
type
(),
theano
.
tensor
.
scalar
()
])
outputs
=
[
x
.
type
(),
theano
.
tensor
.
scalar
()
])
def
connection_pattern
(
self
,
node
):
return
[[
True
,
False
]]
def
grad
(
self
,
inputs
,
output_grads
):
return
[
inputs
[
0
]
.
zeros_like
()
]
return
[
inputs
[
0
]
.
zeros_like
()
]
# Op2 has two inputs, f and g
# Its gradient with respect to g is not defined
class
Op2
(
theano
.
gof
.
Op
):
def
make_node
(
self
,
f
,
g
):
return
theano
.
Apply
(
self
,
inputs
=
[
f
,
g
],
outputs
=
[
theano
.
tensor
.
scalar
()
])
return
theano
.
Apply
(
self
,
inputs
=
[
f
,
g
],
outputs
=
[
theano
.
tensor
.
scalar
()
])
def
grad
(
self
,
inputs
,
output_grads
):
return
[
inputs
[
0
]
.
zeros_like
(),
NullType
()()
]
return
[
inputs
[
0
]
.
zeros_like
(),
NullType
()()
]
x
=
theano
.
tensor
.
vector
()
f
,
g
=
Op1
()(
x
)
cost
=
Op2
()(
f
,
g
)
cost
=
Op2
()(
f
,
g
)
# cost is differentiable wrt x
# but we can't tell that without using Op1's connection pattern
...
...
@@ -394,7 +414,6 @@ def test_disconnected_nan():
# connection_pattern functionality worked correctly
def
test_sum_disconnected
():
# Tests that we can add DisconnectedType to other terms correctly
...
...
@@ -402,7 +421,7 @@ def test_sum_disconnected():
y
=
x
*
2.
z
=
x
+
1.
cost
=
y
+
z
theano
.
tensor
.
grad
(
cost
,
x
,
consider_constant
=
[
y
,
z
])
theano
.
tensor
.
grad
(
cost
,
x
,
consider_constant
=
[
y
,
z
])
# In an earlier version of theano, the above line would have failed
# while trying to add two DisconnectedTypes
...
...
theano/tests/test_rop.py
浏览文件 @
7885e618
...
...
@@ -47,7 +47,7 @@ class BreakRop(Op):
out
[
0
]
=
x
def
grad
(
self
,
inp
,
grads
):
return
[
grad_undefined
(
self
,
0
,
inp
[
0
])
]
return
[
grad_undefined
(
self
,
0
,
inp
[
0
])
]
def
R_op
(
self
,
inputs
,
eval_points
):
return
[
None
]
...
...
@@ -330,21 +330,21 @@ class test_RopLop(RopLop_checker):
m_
=
tensor
.
matrix
(
'm_'
)
v_
=
tensor
.
vector
(
'v_'
)
mval
=
self
.
rng
.
uniform
(
size
=
(
3
,
7
))
.
astype
(
theano
.
config
.
floatX
)
mval
=
self
.
rng
.
uniform
(
size
=
(
3
,
7
))
.
astype
(
theano
.
config
.
floatX
)
vval
=
self
.
rng
.
uniform
(
size
=
(
7
,))
.
astype
(
theano
.
config
.
floatX
)
m_val
=
self
.
rng
.
uniform
(
size
=
(
3
,
7
))
.
astype
(
theano
.
config
.
floatX
)
m_val
=
self
.
rng
.
uniform
(
size
=
(
3
,
7
))
.
astype
(
theano
.
config
.
floatX
)
v_val
=
self
.
rng
.
uniform
(
size
=
(
7
,))
.
astype
(
theano
.
config
.
floatX
)
rop_out1
=
tensor
.
Rop
([
m
,
v
,
m
+
v
],
[
m
,
v
],
[
m_
,
v_
])
rop_out1
=
tensor
.
Rop
([
m
,
v
,
m
+
v
],
[
m
,
v
],
[
m_
,
v_
])
assert
isinstance
(
rop_out1
,
list
)
assert
len
(
rop_out1
)
==
3
rop_out2
=
tensor
.
Rop
((
m
,
v
,
m
+
v
),
[
m
,
v
],
[
m_
,
v_
])
rop_out2
=
tensor
.
Rop
((
m
,
v
,
m
+
v
),
[
m
,
v
],
[
m_
,
v_
])
assert
isinstance
(
rop_out2
,
tuple
)
assert
len
(
rop_out2
)
==
3
lop_out1
=
tensor
.
Lop
([
m
,
v
,
m
+
v
],
(
m
,
v
),
[
m_
,
v_
])
lop_out1
=
tensor
.
Lop
([
m
,
v
,
m
+
v
],
(
m
,
v
),
[
m_
,
v_
])
assert
isinstance
(
lop_out1
,
tuple
)
assert
len
(
lop_out1
)
==
2
lop_out2
=
tensor
.
Lop
((
m
,
v
,
m
+
v
),
[
m
,
v
],
[
m_
,
v_
])
lop_out2
=
tensor
.
Lop
((
m
,
v
,
m
+
v
),
[
m
,
v
],
[
m_
,
v_
])
assert
isinstance
(
lop_out2
,
list
)
assert
len
(
lop_out2
)
==
2
...
...
@@ -353,4 +353,3 @@ class test_RopLop(RopLop_checker):
all_outs
.
extend
(
o
)
f
=
theano
.
function
([
m
,
v
,
m_
,
v_
],
all_outs
)
f
(
mval
,
vval
,
m_val
,
v_val
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论