Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
fe026b97
提交
fe026b97
authored
7月 02, 2012
作者:
Eric Larsen
提交者:
Frederic
7月 26, 2012
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test_nnet.py: layout cleanup
上级
9c66685f
显示空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
177 行增加
和
169 行删除
+177
-169
test_nnet.py
theano/tensor/nnet/tests/test_nnet.py
+177
-169
没有找到文件。
theano/tensor/nnet/tests/test_nnet.py
浏览文件 @
fe026b97
...
@@ -22,42 +22,53 @@ from theano.tensor.nnet import (categorical_crossentropy,
...
@@ -22,42 +22,53 @@ from theano.tensor.nnet import (categorical_crossentropy,
CrossentropyCategorical1Hot
,
CrossentropyCategorical1Hot
,
CrossentropyCategorical1HotGrad
,
CrossentropyCategorical1HotGrad
,
sigmoid
,
softplus
,
sigmoid
,
softplus
,
Softmax
,
softmax
,
SoftmaxWithBias
,
softmax_grad
,
Softmax
,
softmax
,
SoftmaxWithBias
,
softmax_grad
,
softmax_with_bias
,
SoftmaxGrad
,
softmax_with_bias
,
SoftmaxGrad
,
Prepend_scalar_constant_to_each_row
,
Prepend_scalar_constant_to_each_row
,
Prepend_scalar_to_each_row
)
Prepend_scalar_to_each_row
)
from
theano.tensor
import
dmatrix
,
dvector
,
lvector
,
dscalar
from
theano.tensor
import
dmatrix
,
dvector
,
lvector
,
dscalar
class
T_sigmoid
(
unittest
.
TestCase
):
class
T_sigmoid
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
utt
.
seed_rng
()
utt
.
seed_rng
()
def
test_elemwise
(
self
):
def
test_elemwise
(
self
):
utt
.
verify_grad
(
sigmoid
,
[
numpy
.
random
.
rand
(
3
,
4
)])
utt
.
verify_grad
(
sigmoid
,
[
numpy
.
random
.
rand
(
3
,
4
)])
class
T_softplus
(
unittest
.
TestCase
):
class
T_softplus
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
utt
.
seed_rng
()
utt
.
seed_rng
()
def
test_elemwise
(
self
):
def
test_elemwise
(
self
):
utt
.
verify_grad
(
softplus
,
[
numpy
.
random
.
rand
(
3
,
4
)])
utt
.
verify_grad
(
softplus
,
[
numpy
.
random
.
rand
(
3
,
4
)])
class
T_Softmax
(
utt
.
InferShapeTester
):
class
T_Softmax
(
utt
.
InferShapeTester
):
def
test0
(
self
):
def
test0
(
self
):
def
f
(
a
):
def
f
(
a
):
return
softmax
(
a
)[:,
0
]
return
softmax
(
a
)[:,
0
]
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
)])
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
)])
def
test1
(
self
):
def
test1
(
self
):
def
f
(
a
):
def
f
(
a
):
return
softmax
(
a
)[:,
1
]
return
softmax
(
a
)[:,
1
]
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
)])
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
)])
def
test2
(
self
):
def
test2
(
self
):
def
f
(
a
):
def
f
(
a
):
return
softmax
(
a
)[:,
2
]
return
softmax
(
a
)[:,
2
]
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
)])
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
)])
def
test3
(
self
):
def
test3
(
self
):
def
f
(
a
):
def
f
(
a
):
return
softmax
(
a
)[:,
3
]
return
softmax
(
a
)[:,
3
]
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
)])
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
)])
def
test_infer_shape
(
self
):
def
test_infer_shape
(
self
):
admat
=
dmatrix
()
admat
=
dmatrix
()
...
@@ -71,6 +82,7 @@ class T_Softmax(utt.InferShapeTester):
...
@@ -71,6 +82,7 @@ class T_Softmax(utt.InferShapeTester):
xv
=
numpy
.
random
.
randn
(
6
)
.
astype
(
config
.
floatX
)
xv
=
numpy
.
random
.
randn
(
6
)
.
astype
(
config
.
floatX
)
assert
numpy
.
allclose
(
f
(
xv
),
numpy
.
exp
(
xv
)
/
numpy
.
exp
(
xv
)
.
sum
())
assert
numpy
.
allclose
(
f
(
xv
),
numpy
.
exp
(
xv
)
/
numpy
.
exp
(
xv
)
.
sum
())
def
test_vector_grad
(
self
):
def
test_vector_grad
(
self
):
def
f
(
a
):
def
f
(
a
):
return
softmax
(
a
)
return
softmax
(
a
)
...
@@ -81,43 +93,46 @@ class T_SoftmaxWithBias(utt.InferShapeTester):
...
@@ -81,43 +93,46 @@ class T_SoftmaxWithBias(utt.InferShapeTester):
def
test0
(
self
):
def
test0
(
self
):
def
f
(
a
,
b
):
def
f
(
a
,
b
):
return
softmax_with_bias
(
a
,
b
)[:,
0
]
return
softmax_with_bias
(
a
,
b
)[:,
0
]
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
),
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
),
numpy
.
random
.
rand
(
4
)])
numpy
.
random
.
rand
(
4
)])
def
test1
(
self
):
def
test1
(
self
):
def
f
(
a
,
b
):
def
f
(
a
,
b
):
return
softmax_with_bias
(
a
,
b
)[:,
1
]
return
softmax_with_bias
(
a
,
b
)[:,
1
]
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
),
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
),
numpy
.
random
.
rand
(
4
)])
numpy
.
random
.
rand
(
4
)])
def
test2
(
self
):
def
test2
(
self
):
def
f
(
a
,
b
):
def
f
(
a
,
b
):
return
softmax_with_bias
(
a
,
b
)[:,
2
]
return
softmax_with_bias
(
a
,
b
)[:,
2
]
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
),
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
),
numpy
.
random
.
rand
(
4
)])
numpy
.
random
.
rand
(
4
)])
def
test3
(
self
):
def
test3
(
self
):
def
f
(
a
,
b
):
def
f
(
a
,
b
):
return
softmax_with_bias
(
a
,
b
)[:,
3
]
return
softmax_with_bias
(
a
,
b
)[:,
3
]
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
),
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
),
numpy
.
random
.
rand
(
4
)])
numpy
.
random
.
rand
(
4
)])
def
test_broadcast
(
self
):
def
test_broadcast
(
self
):
#test that we don't raise an error during optimization for no good
#test that we don't raise an error during optimization for no good
#reason as softmax_with_bias don't support correctly some/all
#reason as softmax_with_bias don't support correctly some/all
#broadcasted inputs pattern
#broadcasted inputs pattern
initial_W
=
numpy
.
asarray
(
[[
0.1
,
0.1
,
0.1
],
\
initial_W
=
numpy
.
asarray
([[
0.1
,
0.1
,
0.1
],
\
[
0.1
,
0.1
,
0.1
],
\
[
0.1
,
0.1
,
0.1
],
\
[
0.1
,
0.1
,
0.1
]],
\
[
0.1
,
0.1
,
0.1
]],
\
dtype
=
theano
.
config
.
floatX
)
dtype
=
theano
.
config
.
floatX
)
W
=
theano
.
shared
(
value
=
initial_W
,
name
=
'W'
)
W
=
theano
.
shared
(
value
=
initial_W
,
name
=
'W'
)
vbias
=
theano
.
shared
(
value
=
0.1
,
name
=
'vbias'
)
#0.01
vbias
=
theano
.
shared
(
value
=
0.1
,
name
=
'vbias'
)
# 0.01
hid
=
T
.
vector
(
'hid'
)
hid
=
T
.
vector
(
'hid'
)
f
=
theano
.
function
([
hid
],
f
=
theano
.
function
([
hid
],
T
.
nnet
.
softmax
(
T
.
dot
(
hid
,
W
.
T
)
+
vbias
))
T
.
nnet
.
softmax
(
T
.
dot
(
hid
,
W
.
T
)
+
vbias
))
ops
=
[
node
.
op
for
node
in
f
.
maker
.
fgraph
.
toposort
()]
ops
=
[
node
.
op
for
node
in
f
.
maker
.
fgraph
.
toposort
()]
assert
softmax_with_bias
not
in
ops
assert
softmax_with_bias
not
in
ops
assert
softmax
in
ops
assert
softmax
in
ops
f
([
0
,
1
,
0
])
f
([
0
,
1
,
0
])
#print f.maker.fgraph.toposort()
#print f.maker.fgraph.toposort()
def
test_infer_shape
(
self
):
def
test_infer_shape
(
self
):
...
@@ -125,13 +140,14 @@ class T_SoftmaxWithBias(utt.InferShapeTester):
...
@@ -125,13 +140,14 @@ class T_SoftmaxWithBias(utt.InferShapeTester):
advec
=
dvector
()
advec
=
dvector
()
admat_val
=
numpy
.
random
.
rand
(
3
,
4
)
admat_val
=
numpy
.
random
.
rand
(
3
,
4
)
advec_val
=
numpy
.
random
.
rand
(
4
)
advec_val
=
numpy
.
random
.
rand
(
4
)
self
.
_compile_and_check
([
admat
,
advec
],
[
SoftmaxWithBias
()(
admat
,
advec
)],
self
.
_compile_and_check
([
admat
,
advec
],
[
SoftmaxWithBias
()(
admat
,
advec
)],
[
admat_val
,
advec_val
],
SoftmaxWithBias
)
[
admat_val
,
advec_val
],
SoftmaxWithBias
)
class
T_SoftmaxGrad
(
utt
.
InferShapeTester
):
class
T_SoftmaxGrad
(
utt
.
InferShapeTester
):
def
test_infer_shape
(
self
):
def
test_infer_shape
(
self
):
admat
=
dmatrix
()
admat
=
dmatrix
()
bdmat
=
dmatrix
()
bdmat
=
dmatrix
()
admat_val
=
numpy
.
random
.
rand
(
3
,
4
)
admat_val
=
numpy
.
random
.
rand
(
3
,
4
)
...
@@ -140,32 +156,41 @@ class T_SoftmaxGrad(utt.InferShapeTester):
...
@@ -140,32 +156,41 @@ class T_SoftmaxGrad(utt.InferShapeTester):
[
admat_val
,
bdmat_val
],
SoftmaxGrad
)
[
admat_val
,
bdmat_val
],
SoftmaxGrad
)
class
T_CrossentropySoftmax1Hot
(
unittest
.
TestCase
):
class
T_CrossentropySoftmax1Hot
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
utt
.
seed_rng
()
utt
.
seed_rng
()
def
test0
(
self
):
def
test0
(
self
):
y_idx
=
[
0
,
1
,
3
]
y_idx
=
[
0
,
1
,
3
]
def
f
(
a
,
b
):
def
f
(
a
,
b
):
return
crossentropy_softmax_1hot_with_bias
(
a
,
b
,
y_idx
)[
0
]
return
crossentropy_softmax_1hot_with_bias
(
a
,
b
,
y_idx
)[
0
]
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
),
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
),
numpy
.
random
.
rand
(
4
)])
numpy
.
random
.
rand
(
4
)])
def
test1
(
self
):
def
test1
(
self
):
y_idx
=
[
0
,
1
,
3
]
y_idx
=
[
0
,
1
,
3
]
def
f
(
a
):
def
f
(
a
):
return
crossentropy_softmax_1hot
(
a
,
y_idx
)[
0
]
return
crossentropy_softmax_1hot
(
a
,
y_idx
)[
0
]
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
)])
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
3
,
4
)])
def
test_vector
(
self
):
def
test_vector
(
self
):
y_idx
=
[
3
]
y_idx
=
[
3
]
def
f
(
a
):
def
f
(
a
):
return
crossentropy_softmax_1hot
(
T
.
shape_padleft
(
a
),
y_idx
)[
0
]
return
crossentropy_softmax_1hot
(
T
.
shape_padleft
(
a
),
y_idx
)[
0
]
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
4
)])
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
4
)])
def
test_vectors
(
self
):
def
test_vectors
(
self
):
y_idx
=
[
3
]
y_idx
=
[
3
]
def
f
(
a
,
b
):
def
f
(
a
,
b
):
return
crossentropy_softmax_1hot
(
T
.
shape_padleft
(
a
)
+
b
,
y_idx
)[
0
]
return
crossentropy_softmax_1hot
(
T
.
shape_padleft
(
a
)
+
b
,
y_idx
)[
0
]
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
4
),
numpy
.
random
.
rand
(
4
)])
utt
.
verify_grad
(
f
,
[
numpy
.
random
.
rand
(
4
),
numpy
.
random
.
rand
(
4
)])
class
T_CrossentropySoftmax1HotWithBiasDx
(
utt
.
InferShapeTester
):
class
T_CrossentropySoftmax1HotWithBiasDx
(
utt
.
InferShapeTester
):
def
test0
(
self
):
def
test0
(
self
):
...
@@ -173,15 +198,18 @@ class T_CrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester):
...
@@ -173,15 +198,18 @@ class T_CrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester):
return
(
theano
.
tensor
.
nnet
.
crossentropy_softmax_1hot_with_bias_dx
(
return
(
theano
.
tensor
.
nnet
.
crossentropy_softmax_1hot_with_bias_dx
(
numpy
.
random
.
rand
(
10
),
# Gradient w.r.t. NLL.
numpy
.
random
.
rand
(
10
),
# Gradient w.r.t. NLL.
sm
,
# Softmax output.
sm
,
# Softmax output.
numpy
.
random
.
randint
(
low
=
0
,
high
=
5
,
size
=
10
)))
# Class indices.
numpy
.
random
.
randint
(
low
=
0
,
high
=
5
,
size
=
10
)))
# Class indices.
# Build a random softmax output whose rows sum to 1.
# Build a random softmax output whose rows sum to 1.
softmax_output
=
numpy
.
random
.
rand
(
10
,
5
)
softmax_output
=
numpy
.
random
.
rand
(
10
,
5
)
softmax_output
/=
softmax_output
.
sum
(
axis
=
1
)
.
reshape
(
10
,
1
)
softmax_output
/=
softmax_output
.
sum
(
axis
=
1
)
.
reshape
(
10
,
1
)
utt
.
verify_grad
(
f
,
[
softmax_output
])
utt
.
verify_grad
(
f
,
[
softmax_output
])
def
test1
(
self
):
def
test1
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
softmax_output
=
rng
.
rand
(
10
,
5
)
softmax_output
=
rng
.
rand
(
10
,
5
)
softmax_output
/=
softmax_output
.
sum
(
axis
=
1
)
.
reshape
(
10
,
1
)
softmax_output
/=
softmax_output
.
sum
(
axis
=
1
)
.
reshape
(
10
,
1
)
def
f
(
dy
):
def
f
(
dy
):
return
(
theano
.
tensor
.
nnet
.
crossentropy_softmax_1hot_with_bias_dx
(
return
(
theano
.
tensor
.
nnet
.
crossentropy_softmax_1hot_with_bias_dx
(
dy
,
dy
,
...
@@ -208,15 +236,18 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
...
@@ -208,15 +236,18 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
def
setUp
(
self
):
def
setUp
(
self
):
super
(
T_CrossentropySoftmaxArgmax1HotWithBias
,
self
)
.
setUp
()
super
(
T_CrossentropySoftmaxArgmax1HotWithBias
,
self
)
.
setUp
()
self
.
op
=
theano
.
tensor
.
nnet
.
crossentropy_softmax_argmax_1hot_with_bias
self
.
op
=
theano
.
tensor
.
nnet
.
crossentropy_softmax_argmax_1hot_with_bias
def
test0
(
self
):
def
test0
(
self
):
n_classes
=
5
n_classes
=
5
n_samples
=
3
n_samples
=
3
# First test gradient when getting a gradient on the NLL output.
# First test gradient when getting a gradient on the NLL output.
def
grad_on_nll
(
x
,
b
):
def
grad_on_nll
(
x
,
b
):
return
self
.
op
(
x
,
b
,
y_idx
=
numpy
.
random
.
randint
(
return
self
.
op
(
x
,
b
,
y_idx
=
numpy
.
random
.
randint
(
low
=
0
,
high
=
n_classes
,
size
=
n_samples
))[
0
]
low
=
0
,
high
=
n_classes
,
size
=
n_samples
))[
0
]
utt
.
verify_grad
(
grad_on_nll
,
[
numpy
.
random
.
rand
(
n_samples
,
n_classes
),
utt
.
verify_grad
(
grad_on_nll
,
[
numpy
.
random
.
rand
(
n_samples
,
n_classes
),
numpy
.
random
.
rand
(
n_classes
)])
numpy
.
random
.
rand
(
n_classes
)])
# Then test gradient when getting a gradient on the softmax output.
# Then test gradient when getting a gradient on the softmax output.
def
grad_on_softmax
(
x
,
b
):
def
grad_on_softmax
(
x
,
b
):
return
self
.
op
(
x
,
b
,
y_idx
=
numpy
.
random
.
randint
(
return
self
.
op
(
x
,
b
,
y_idx
=
numpy
.
random
.
randint
(
...
@@ -240,15 +271,15 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
...
@@ -240,15 +271,15 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
class
T_prepend
(
utt
.
InferShapeTester
):
class
T_prepend
(
utt
.
InferShapeTester
):
def
test0
(
self
):
x
=
tensor
.
matrix
(
'x'
)
def
test0
(
self
):
y
=
Prepend_scalar_constant_to_each_row
(
4.
)(
x
)
x
=
tensor
.
matrix
(
'x'
)
f
=
theano
.
function
([
x
],[
y
])
y
=
Prepend_scalar_constant_to_each_row
(
4.
)(
x
)
m
=
numpy
.
random
.
rand
(
3
,
5
)
f
=
theano
.
function
([
x
],
[
y
])
m
=
numpy
.
random
.
rand
(
3
,
5
)
my
=
f
(
m
)
my
=
f
(
m
)
self
.
assertTrue
(
my
.
shape
==
(
3
,
6
),
my
.
shape
)
self
.
assertTrue
(
my
.
shape
==
(
3
,
6
),
my
.
shape
)
self
.
assertTrue
(
numpy
.
all
(
my
[:,
0
]
==
4.0
))
self
.
assertTrue
(
numpy
.
all
(
my
[:,
0
]
==
4.0
))
def
test_infer_shape
(
self
):
def
test_infer_shape
(
self
):
admat
=
dmatrix
()
admat
=
dmatrix
()
...
@@ -262,15 +293,16 @@ class T_prepend(utt.InferShapeTester):
...
@@ -262,15 +293,16 @@ class T_prepend(utt.InferShapeTester):
class
T_prepend
(
utt
.
InferShapeTester
):
class
T_prepend
(
utt
.
InferShapeTester
):
def
test0
(
self
):
def
test0
(
self
):
"""basic functionality"""
"""basic functionality"""
x
=
tensor
.
matrix
(
'x'
)
x
=
tensor
.
matrix
(
'x'
)
y
=
Prepend_scalar_to_each_row
()(
5.
,
x
)
y
=
Prepend_scalar_to_each_row
()(
5.
,
x
)
f
=
theano
.
function
([
x
],
y
)
f
=
theano
.
function
([
x
],
y
)
m
=
numpy
.
ones
((
3
,
5
),
dtype
=
"float32"
)
m
=
numpy
.
ones
((
3
,
5
),
dtype
=
"float32"
)
my
=
f
(
m
)
my
=
f
(
m
)
self
.
assertTrue
(
my
.
shape
==
(
3
,
6
))
self
.
assertTrue
(
my
.
shape
==
(
3
,
6
))
self
.
assertTrue
(
numpy
.
all
(
my
[:,
0
]
==
5.0
))
self
.
assertTrue
(
numpy
.
all
(
my
[:,
0
]
==
5.0
))
def
test_infer_shape
(
self
):
def
test_infer_shape
(
self
):
admat
=
dmatrix
()
admat
=
dmatrix
()
...
@@ -300,24 +332,20 @@ class T_CrossentropyCategorical1HotGrad(utt.InferShapeTester):
...
@@ -300,24 +332,20 @@ class T_CrossentropyCategorical1HotGrad(utt.InferShapeTester):
class
T_CrossentropyCategorical1Hot
(
utt
.
InferShapeTester
):
class
T_CrossentropyCategorical1Hot
(
utt
.
InferShapeTester
):
def
test_grad
(
self
):
def
test_grad
(
self
):
x
=
tensor
.
matrix
(
'x'
)
x
=
tensor
.
matrix
(
'x'
)
one_of_n
=
tensor
.
lvector
(
'one_of_n'
)
one_of_n
=
tensor
.
lvector
(
'one_of_n'
)
op
=
crossentropy_categorical_1hot
op
=
crossentropy_categorical_1hot
xe
=
op
(
x
,
one_of_n
)
xe
=
op
(
x
,
one_of_n
)
f
=
theano
.
function
([
x
,
one_of_n
],
xe
)
f
=
theano
.
function
([
x
,
one_of_n
],
xe
)
x_val
=
numpy
.
asarray
([[
.
4
,
.
6
,
.
0
],
[
.
1
,
.
8
,
.
1
]],
x_val
=
numpy
.
asarray
([[
.
4
,
.
6
,
.
0
],
[
.
1
,
.
8
,
.
1
]],
dtype
=
config
.
floatX
)
dtype
=
config
.
floatX
)
xe_val
=
f
(
x_val
,
[
0
,
1
])
xe_val
=
f
(
x_val
,
[
0
,
1
])
assert
numpy
.
allclose
(
xe_val
,
-
numpy
.
log
([
.
4
,
.
8
]))
assert
numpy
.
allclose
(
xe_val
,
-
numpy
.
log
([
.
4
,
.
8
]))
def
oplike
(
x
):
def
oplike
(
x
):
return
op
(
x
,
[
0
,
1
])
return
op
(
x
,
[
0
,
1
])
tensor
.
verify_grad
(
oplike
,
[
x_val
],
rng
=
numpy
.
random
)
tensor
.
verify_grad
(
oplike
,
[
x_val
],
rng
=
numpy
.
random
)
...
@@ -336,7 +364,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -336,7 +364,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
x
=
tensor
.
matrix
(
'x'
)
x
=
tensor
.
matrix
(
'x'
)
one_of_n
=
tensor
.
lvector
(
'one_of_n'
)
one_of_n
=
tensor
.
lvector
(
'one_of_n'
)
op
=
crossentropy_categorical_1hot
op
=
crossentropy_categorical_1hot
xe
=
op
(
x
,
one_of_n
)
xe
=
op
(
x
,
one_of_n
)
fgraph
=
gof
.
FunctionGraph
(
fgraph
=
gof
.
FunctionGraph
(
...
@@ -348,7 +375,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -348,7 +375,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
assert
str
(
fgraph
.
outputs
[
0
]
.
owner
.
op
)
==
'OutputGuard'
assert
str
(
fgraph
.
outputs
[
0
]
.
owner
.
op
)
==
'OutputGuard'
assert
fgraph
.
outputs
[
0
]
.
owner
.
inputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
assert
(
fgraph
.
outputs
[
0
]
.
owner
.
inputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
)
def
test_softmax_optimizations_vector
(
self
):
def
test_softmax_optimizations_vector
(
self
):
x
=
tensor
.
vector
(
'x'
)
x
=
tensor
.
vector
(
'x'
)
...
@@ -362,19 +390,19 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -362,19 +390,19 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
theano
.
compile
.
mode
.
optdb
.
query
(
theano
.
compile
.
mode
.
optdb
.
query
(
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
assert
str
(
fgraph
.
outputs
[
0
]
.
owner
.
op
)
==
'OutputGuard'
assert
str
(
fgraph
.
outputs
[
0
]
.
owner
.
op
)
==
'OutputGuard'
assert
fgraph
.
outputs
[
0
]
.
owner
.
inputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
assert
(
fgraph
.
outputs
[
0
]
.
owner
.
inputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
)
def
test_softmax_optimizations_w_bias
(
self
):
def
test_softmax_optimizations_w_bias
(
self
):
x
=
tensor
.
matrix
(
'x'
)
x
=
tensor
.
matrix
(
'x'
)
b
=
tensor
.
vector
(
'b'
)
b
=
tensor
.
vector
(
'b'
)
one_of_n
=
tensor
.
lvector
(
'one_of_n'
)
one_of_n
=
tensor
.
lvector
(
'one_of_n'
)
op
=
crossentropy_categorical_1hot
op
=
crossentropy_categorical_1hot
xe
=
op
(
x
,
one_of_n
)
xe
=
op
(
x
,
one_of_n
)
fgraph
=
gof
.
FunctionGraph
(
fgraph
=
gof
.
FunctionGraph
(
[
x
,
b
,
one_of_n
],
[
x
,
b
,
one_of_n
],
[
op
(
softmax
(
x
+
b
),
one_of_n
)])
[
op
(
softmax
(
x
+
b
),
one_of_n
)])
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
op
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
op
#print 'BEFORE'
#print 'BEFORE'
...
@@ -394,7 +422,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -394,7 +422,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
assert
len
(
fgraph
.
toposort
())
==
2
assert
len
(
fgraph
.
toposort
())
==
2
assert
str
(
fgraph
.
outputs
[
0
]
.
owner
.
op
)
==
'OutputGuard'
assert
str
(
fgraph
.
outputs
[
0
]
.
owner
.
op
)
==
'OutputGuard'
assert
fgraph
.
outputs
[
0
]
.
owner
.
inputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
assert
(
fgraph
.
outputs
[
0
]
.
owner
.
inputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
)
def
test_softmax_optimizations_w_bias2
(
self
):
def
test_softmax_optimizations_w_bias2
(
self
):
x
=
tensor
.
matrix
(
'x'
)
x
=
tensor
.
matrix
(
'x'
)
...
@@ -405,7 +434,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -405,7 +434,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
fgraph
=
gof
.
FunctionGraph
(
fgraph
=
gof
.
FunctionGraph
(
[
x
,
b
,
c
,
one_of_n
],
[
x
,
b
,
c
,
one_of_n
],
[
op
(
softmax
(
T
.
add
(
x
,
b
,
c
)),
one_of_n
)])
[
op
(
softmax
(
T
.
add
(
x
,
b
,
c
)),
one_of_n
)])
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
op
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
op
#print 'BEFORE'
#print 'BEFORE'
...
@@ -423,7 +452,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -423,7 +452,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
assert
len
(
fgraph
.
toposort
())
==
3
assert
len
(
fgraph
.
toposort
())
==
3
assert
str
(
fgraph
.
outputs
[
0
]
.
owner
.
op
)
==
'OutputGuard'
assert
str
(
fgraph
.
outputs
[
0
]
.
owner
.
op
)
==
'OutputGuard'
assert
fgraph
.
outputs
[
0
]
.
owner
.
inputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
assert
(
fgraph
.
outputs
[
0
]
.
owner
.
inputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
)
def
test_softmax_optimizations_w_bias_vector
(
self
):
def
test_softmax_optimizations_w_bias_vector
(
self
):
x
=
tensor
.
vector
(
'x'
)
x
=
tensor
.
vector
(
'x'
)
...
@@ -432,7 +462,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -432,7 +462,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
op
=
crossentropy_categorical_1hot
op
=
crossentropy_categorical_1hot
fgraph
=
gof
.
FunctionGraph
(
fgraph
=
gof
.
FunctionGraph
(
[
x
,
b
,
one_of_n
],
[
x
,
b
,
one_of_n
],
[
op
(
softmax
(
x
+
b
),
one_of_n
)])
[
op
(
softmax
(
x
+
b
),
one_of_n
)])
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
op
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
op
#print 'BEFORE'
#print 'BEFORE'
#for node in fgraph.toposort():
#for node in fgraph.toposort():
...
@@ -448,15 +478,14 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -448,15 +478,14 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
#print '===='
#print '===='
assert
len
(
fgraph
.
toposort
())
==
3
assert
len
(
fgraph
.
toposort
())
==
3
assert
str
(
fgraph
.
outputs
[
0
]
.
owner
.
op
)
==
'OutputGuard'
assert
str
(
fgraph
.
outputs
[
0
]
.
owner
.
op
)
==
'OutputGuard'
assert
fgraph
.
outputs
[
0
]
.
owner
.
inputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
assert
(
fgraph
.
outputs
[
0
]
.
owner
.
inputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
)
def
test_softmax_grad_optimizations
(
self
):
def
test_softmax_grad_optimizations
(
self
):
x
=
tensor
.
matrix
(
'x'
)
x
=
tensor
.
matrix
(
'x'
)
one_of_n
=
tensor
.
lvector
(
'one_of_n'
)
one_of_n
=
tensor
.
lvector
(
'one_of_n'
)
op
=
crossentropy_categorical_1hot
op
=
crossentropy_categorical_1hot
xe
=
op
(
softmax
(
x
),
one_of_n
)
xe
=
op
(
softmax
(
x
),
one_of_n
)
sum_xe
=
tensor
.
sum
(
xe
)
sum_xe
=
tensor
.
sum
(
xe
)
g_x
=
tensor
.
grad
(
sum_xe
,
x
)
g_x
=
tensor
.
grad
(
sum_xe
,
x
)
fgraph
=
gof
.
FunctionGraph
(
fgraph
=
gof
.
FunctionGraph
(
...
@@ -474,8 +503,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -474,8 +503,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
#for node in fgraph.toposort():
#for node in fgraph.toposort():
# print node.op, node.inputs
# print node.op, node.inputs
# the function has 9 ops because the dimshuffle and
elemwise{second} aren't getting
# the function has 9 ops because the dimshuffle and
lemwise{second}
# cleaned up as well as we'd like.
#
aren't getting
cleaned up as well as we'd like.
has_cx1hot
=
False
has_cx1hot
=
False
has_cx1hotdx
=
False
has_cx1hotdx
=
False
has_softmax
=
False
has_softmax
=
False
...
@@ -483,13 +512,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -483,13 +512,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for
node
in
fgraph
.
toposort
():
for
node
in
fgraph
.
toposort
():
if
node
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
:
if
node
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
:
has_cx1hot
=
True
has_cx1hot
=
True
if
node
.
op
==
crossentropy_softmax_1hot_with_bias_dx
:
if
node
.
op
==
crossentropy_softmax_1hot_with_bias_dx
:
has_cx1hotdx
=
True
has_cx1hotdx
=
True
if
node
.
op
==
softmax
:
if
node
.
op
==
softmax
:
has_softmax
=
True
has_softmax
=
True
if
node
.
op
==
softmax_grad
:
if
node
.
op
==
softmax_grad
:
has_softmaxdx
=
True
has_softmaxdx
=
True
assert
has_cx1hot
assert
has_cx1hot
assert
has_cx1hotdx
assert
has_cx1hotdx
assert
not
has_softmax
assert
not
has_softmax
...
@@ -517,8 +545,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -517,8 +545,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
#for node in fgraph.toposort():
#for node in fgraph.toposort():
# print node.op, node.inputs
# print node.op, node.inputs
# the function has 9 ops because the dimshuffle and elemwise{second}
aren't getting
# the function has 9 ops because the dimshuffle and elemwise{second}
# cleaned up as well as we'd like.
#
aren't getting
cleaned up as well as we'd like.
has_cx1hot
=
False
has_cx1hot
=
False
has_cx1hotdx
=
False
has_cx1hotdx
=
False
has_softmax
=
False
has_softmax
=
False
...
@@ -526,13 +554,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -526,13 +554,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for
node
in
fgraph
.
toposort
():
for
node
in
fgraph
.
toposort
():
if
node
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
:
if
node
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
:
has_cx1hot
=
True
has_cx1hot
=
True
if
node
.
op
==
crossentropy_softmax_1hot_with_bias_dx
:
if
node
.
op
==
crossentropy_softmax_1hot_with_bias_dx
:
has_cx1hotdx
=
True
has_cx1hotdx
=
True
if
node
.
op
==
softmax
:
if
node
.
op
==
softmax
:
has_softmax
=
True
has_softmax
=
True
if
node
.
op
==
softmax_grad
:
if
node
.
op
==
softmax_grad
:
has_softmaxdx
=
True
has_softmaxdx
=
True
assert
has_cx1hot
assert
has_cx1hot
assert
has_cx1hotdx
assert
has_cx1hotdx
assert
not
has_softmax
assert
not
has_softmax
...
@@ -547,13 +574,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -547,13 +574,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
if
mode
==
theano
.
compile
.
mode
.
get_mode
(
'FAST_COMPILE'
):
if
mode
==
theano
.
compile
.
mode
.
get_mode
(
'FAST_COMPILE'
):
mode
=
'FAST_RUN'
mode
=
'FAST_RUN'
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
x_val
=
rng
.
randn
(
3
,
5
)
x_val
=
rng
.
randn
(
3
,
5
)
b_val
=
rng
.
randn
(
5
)
b_val
=
rng
.
randn
(
5
)
y_val
=
numpy
.
asarray
([
2
,
4
,
1
])
y_val
=
numpy
.
asarray
([
2
,
4
,
1
])
x
=
T
.
dmatrix
(
'x'
)
x
=
T
.
dmatrix
(
'x'
)
b
=
T
.
dvector
(
'b'
)
b
=
T
.
dvector
(
'b'
)
y
=
T
.
lvector
(
'y'
)
y
=
T
.
lvector
(
'y'
)
...
@@ -565,10 +589,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -565,10 +589,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
-
T
.
sum
(
T
.
log
(
softmax
(
x
))[
T
.
arange
(
y
.
shape
[
0
]),
y
]),
-
T
.
sum
(
T
.
log
(
softmax
(
x
))[
T
.
arange
(
y
.
shape
[
0
]),
y
]),
T
.
sum
(
-
T
.
log
(
softmax
(
x
))[
T
.
arange
(
y
.
shape
[
0
]),
y
])
T
.
sum
(
-
T
.
log
(
softmax
(
x
))[
T
.
arange
(
y
.
shape
[
0
]),
y
])
]
]
for
expr
in
expressions
:
for
expr
in
expressions
:
# Verify the optimizer worked on the expressions
# Verify the optimizer worked on the expressions
f
=
theano
.
function
([
x
,
y
],
expr
,
mode
=
mode
)
f
=
theano
.
function
([
x
,
y
],
expr
,
mode
=
mode
)
if
verbose
:
if
verbose
:
theano
.
printing
.
debugprint
(
f
)
theano
.
printing
.
debugprint
(
f
)
try
:
try
:
...
@@ -579,7 +603,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -579,7 +603,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise
raise
# Also verify the gradient wrt x
# Also verify the gradient wrt x
g
=
theano
.
function
([
x
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
g
=
theano
.
function
([
x
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
if
verbose
:
if
verbose
:
theano
.
printing
.
debugprint
(
g
)
theano
.
printing
.
debugprint
(
g
)
try
:
try
:
...
@@ -591,13 +615,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -591,13 +615,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
## Test that a biased softmax is optimized correctly
## Test that a biased softmax is optimized correctly
bias_expressions
=
[
bias_expressions
=
[
T
.
sum
(
-
T
.
log
(
softmax
(
x
+
b
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
T
.
sum
(
-
T
.
log
(
softmax
(
x
+
b
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
-
T
.
sum
(
T
.
log
(
softmax
(
b
+
x
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
-
T
.
sum
(
T
.
log
(
softmax
(
b
+
x
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
-
T
.
sum
(
T
.
log
(
softmax
(
x
+
b
))[
T
.
arange
(
y
.
shape
[
0
]),
y
]),
-
T
.
sum
(
T
.
log
(
softmax
(
x
+
b
))[
T
.
arange
(
y
.
shape
[
0
]),
y
]),
T
.
sum
(
-
T
.
log
(
softmax
(
b
+
x
))[
T
.
arange
(
y
.
shape
[
0
]),
y
])]
T
.
sum
(
-
T
.
log
(
softmax
(
b
+
x
))[
T
.
arange
(
y
.
shape
[
0
]),
y
])]
for
expr
in
bias_expressions
:
for
expr
in
bias_expressions
:
f
=
theano
.
function
([
x
,
b
,
y
],
expr
,
mode
=
mode
)
f
=
theano
.
function
([
x
,
b
,
y
],
expr
,
mode
=
mode
)
if
verbose
:
if
verbose
:
theano
.
printing
.
debugprint
(
f
)
theano
.
printing
.
debugprint
(
f
)
try
:
try
:
...
@@ -606,8 +630,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -606,8 +630,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
except
Exception
:
except
Exception
:
theano
.
printing
.
debugprint
(
f
)
theano
.
printing
.
debugprint
(
f
)
raise
raise
g
=
theano
.
function
([
x
,
b
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
g
=
theano
.
function
([
x
,
b
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
if
verbose
:
if
verbose
:
theano
.
printing
.
debugprint
(
g
)
theano
.
printing
.
debugprint
(
g
)
try
:
try
:
...
@@ -625,7 +648,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -625,7 +648,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
T
.
mean
(
-
T
.
log
(
softmax
(
x
))[
T
.
arange
(
y
.
shape
[
0
]),
y
])]
T
.
mean
(
-
T
.
log
(
softmax
(
x
))[
T
.
arange
(
y
.
shape
[
0
]),
y
])]
for
expr
in
mean_expressions
:
for
expr
in
mean_expressions
:
f
=
theano
.
function
([
x
,
y
],
expr
,
mode
=
mode
)
f
=
theano
.
function
([
x
,
y
],
expr
,
mode
=
mode
)
if
verbose
:
if
verbose
:
theano
.
printing
.
debugprint
(
f
)
theano
.
printing
.
debugprint
(
f
)
try
:
try
:
...
@@ -635,11 +658,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -635,11 +658,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
theano
.
printing
.
debugprint
(
f
)
theano
.
printing
.
debugprint
(
f
)
raise
raise
g
=
theano
.
function
([
x
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
g
=
theano
.
function
([
x
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
if
verbose
:
if
verbose
:
theano
.
printing
.
debugprint
(
g
)
theano
.
printing
.
debugprint
(
g
)
try
:
try
:
assert
len
(
g
.
maker
.
fgraph
.
toposort
())
in
(
6
,
7
)
#there's an extra dimshuffle in there
assert
len
(
g
.
maker
.
fgraph
.
toposort
())
in
(
6
,
7
)
#there's an extra dimshuffle in there
# but I can't think of a good rule to get rid of it
# but I can't think of a good rule to get rid of it
g
(
x_val
,
y_val
)
g
(
x_val
,
y_val
)
except
Exception
:
except
Exception
:
...
@@ -647,13 +671,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -647,13 +671,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise
raise
mean_bias_expressions
=
[
mean_bias_expressions
=
[
T
.
mean
(
-
T
.
log
(
softmax
(
x
+
b
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
T
.
mean
(
-
T
.
log
(
softmax
(
x
+
b
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
-
T
.
mean
(
T
.
log
(
softmax
(
b
+
x
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
-
T
.
mean
(
T
.
log
(
softmax
(
b
+
x
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
-
T
.
mean
(
T
.
log
(
softmax
(
x
+
b
))[
T
.
arange
(
y
.
shape
[
0
]),
y
]),
-
T
.
mean
(
T
.
log
(
softmax
(
x
+
b
))[
T
.
arange
(
y
.
shape
[
0
]),
y
]),
T
.
mean
(
-
T
.
log
(
softmax
(
b
+
x
))[
T
.
arange
(
y
.
shape
[
0
]),
y
])]
T
.
mean
(
-
T
.
log
(
softmax
(
b
+
x
))[
T
.
arange
(
y
.
shape
[
0
]),
y
])]
for
expr
in
mean_bias_expressions
:
for
expr
in
mean_bias_expressions
:
f
=
theano
.
function
([
x
,
b
,
y
],
expr
,
mode
=
mode
)
f
=
theano
.
function
([
x
,
b
,
y
],
expr
,
mode
=
mode
)
if
verbose
:
if
verbose
:
theano
.
printing
.
debugprint
(
f
)
theano
.
printing
.
debugprint
(
f
)
try
:
try
:
...
@@ -661,12 +685,11 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -661,12 +685,11 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
except
Exception
:
except
Exception
:
theano
.
printing
.
debugprint
(
f
)
theano
.
printing
.
debugprint
(
f
)
raise
raise
g
=
theano
.
function
([
x
,
b
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
g
=
theano
.
function
([
x
,
b
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
if
verbose
:
if
verbose
:
theano
.
printing
.
debugprint
(
g
)
theano
.
printing
.
debugprint
(
g
)
try
:
try
:
assert
len
(
g
.
maker
.
fgraph
.
toposort
())
in
(
6
,
7
)
assert
len
(
g
.
maker
.
fgraph
.
toposort
())
in
(
6
,
7
)
g
(
x_val
,
b_val
,
y_val
)
g
(
x_val
,
b_val
,
y_val
)
except
Exception
:
except
Exception
:
theano
.
printing
.
debugprint
(
g
)
theano
.
printing
.
debugprint
(
g
)
...
@@ -678,15 +701,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -678,15 +701,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
if
mode
==
theano
.
compile
.
mode
.
get_mode
(
'FAST_COMPILE'
):
if
mode
==
theano
.
compile
.
mode
.
get_mode
(
'FAST_COMPILE'
):
mode
=
'FAST_RUN'
mode
=
'FAST_RUN'
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
x_val
=
rng
.
randn
(
3
,
5
)
x_val
=
rng
.
randn
(
3
,
5
)
b_val
=
rng
.
randn
(
5
)
b_val
=
rng
.
randn
(
5
)
y_val
=
numpy
.
asarray
([
2
,
4
,
1
],
dtype
=
'int64'
)
y_val
=
numpy
.
asarray
([
2
,
4
,
1
],
dtype
=
'int64'
)
x
=
T
.
dmatrix
(
'x'
)
x
=
T
.
dmatrix
(
'x'
)
b
=
T
.
dvector
(
'b'
)
b
=
T
.
dvector
(
'b'
)
y
=
T
.
lvector
(
'y'
)
y
=
T
.
lvector
(
'y'
)
yi
=
T
.
cast
(
y
,
'int32'
)
yi
=
T
.
cast
(
y
,
'int32'
)
expressions
=
[
expressions
=
[
T
.
sum
(
-
T
.
log
(
softmax
(
x
)[
T
.
arange
(
yi
.
shape
[
0
]),
yi
])),
T
.
sum
(
-
T
.
log
(
softmax
(
x
)[
T
.
arange
(
yi
.
shape
[
0
]),
yi
])),
-
T
.
sum
(
T
.
log
(
softmax
(
x
)[
T
.
arange
(
yi
.
shape
[
0
]),
yi
])),
-
T
.
sum
(
T
.
log
(
softmax
(
x
)[
T
.
arange
(
yi
.
shape
[
0
]),
yi
])),
...
@@ -696,7 +717,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -696,7 +717,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for
expr
in
expressions
:
for
expr
in
expressions
:
# Verify the optimizer worked on the expressions
# Verify the optimizer worked on the expressions
f
=
theano
.
function
([
x
,
y
],
expr
,
mode
=
mode
)
f
=
theano
.
function
([
x
,
y
],
expr
,
mode
=
mode
)
if
verbose
:
if
verbose
:
theano
.
printing
.
debugprint
(
f
)
theano
.
printing
.
debugprint
(
f
)
try
:
try
:
...
@@ -707,7 +728,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -707,7 +728,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise
raise
# Also verify the gradient wrt x
# Also verify the gradient wrt x
g
=
theano
.
function
([
x
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
g
=
theano
.
function
([
x
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
if
verbose
:
if
verbose
:
theano
.
printing
.
debugprint
(
g
)
theano
.
printing
.
debugprint
(
g
)
try
:
try
:
...
@@ -717,7 +738,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -717,7 +738,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
theano
.
printing
.
debugprint
(
g
)
theano
.
printing
.
debugprint
(
g
)
raise
raise
def
test_optimize_xent_vector
(
self
):
def
test_optimize_xent_vector
(
self
):
verbose
=
0
verbose
=
0
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
...
@@ -743,8 +763,9 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -743,8 +763,9 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
-
T
.
sum
(
T
.
log
(
softmax
(
x
)[
T
.
arange
(
y
.
shape
[
0
]),
y
]))]
-
T
.
sum
(
T
.
log
(
softmax
(
x
)[
T
.
arange
(
y
.
shape
[
0
]),
y
]))]
for
expr
in
bias_expressions
:
for
expr
in
bias_expressions
:
f
=
theano
.
function
([
x
,
y
],
expr
,
mode
=
mode
)
f
=
theano
.
function
([
x
,
y
],
expr
,
mode
=
mode
)
if
verbose
:
print_graph
(
f
)
if
verbose
:
print_graph
(
f
)
try
:
try
:
prev
,
last
=
f
.
maker
.
fgraph
.
toposort
()[
-
2
:]
prev
,
last
=
f
.
maker
.
fgraph
.
toposort
()[
-
2
:]
assert
len
(
f
.
maker
.
fgraph
.
toposort
())
==
5
assert
len
(
f
.
maker
.
fgraph
.
toposort
())
==
5
...
@@ -752,7 +773,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -752,7 +773,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
except
Exception
:
except
Exception
:
theano
.
printing
.
debugprint
(
f
)
theano
.
printing
.
debugprint
(
f
)
raise
raise
g
=
theano
.
function
([
x
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
g
=
theano
.
function
([
x
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
print_graph
(
g
)
print_graph
(
g
)
try
:
try
:
...
@@ -789,17 +809,19 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -789,17 +809,19 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
## Test that a biased softmax is optimized correctly
## Test that a biased softmax is optimized correctly
bias_expressions
=
[
bias_expressions
=
[
T
.
sum
(
-
T
.
log
(
softmax
(
x
+
b
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
T
.
sum
(
-
T
.
log
(
softmax
(
x
+
b
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
-
T
.
sum
(
T
.
log
(
softmax
(
b
+
x
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
-
T
.
sum
(
T
.
log
(
softmax
(
b
+
x
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
-
T
.
sum
(
T
.
log
(
softmax
(
x
+
b
))[
T
.
arange
(
y
.
shape
[
0
]),
y
]),
-
T
.
sum
(
T
.
log
(
softmax
(
x
+
b
))[
T
.
arange
(
y
.
shape
[
0
]),
y
]),
T
.
sum
(
-
T
.
log
(
softmax
(
b
+
x
))[
T
.
arange
(
y
.
shape
[
0
]),
y
])]
T
.
sum
(
-
T
.
log
(
softmax
(
b
+
x
))[
T
.
arange
(
y
.
shape
[
0
]),
y
])]
for
expr
in
bias_expressions
:
for
expr
in
bias_expressions
:
f
=
theano
.
function
([
x
,
b
,
y
],
expr
,
mode
=
mode
)
f
=
theano
.
function
([
x
,
b
,
y
],
expr
,
mode
=
mode
)
if
verbose
:
print_graph
(
f
)
if
verbose
:
print_graph
(
f
)
try
:
try
:
prev
,
last
=
f
.
maker
.
fgraph
.
toposort
()[
-
2
:]
prev
,
last
=
f
.
maker
.
fgraph
.
toposort
()[
-
2
:]
assert
len
(
f
.
maker
.
fgraph
.
toposort
())
==
3
# [big_op, sum, dim_shuffle]
assert
len
(
f
.
maker
.
fgraph
.
toposort
())
==
3
# [big_op, sum, dim_shuffle]
f
(
x_val
,
b_val
,
y_val
)
f
(
x_val
,
b_val
,
y_val
)
except
Exception
:
except
Exception
:
theano
.
printing
.
debugprint
(
f
)
theano
.
printing
.
debugprint
(
f
)
...
@@ -808,7 +830,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -808,7 +830,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
backup
=
config
.
warn
.
sum_div_dimshuffle_bug
backup
=
config
.
warn
.
sum_div_dimshuffle_bug
config
.
warn
.
sum_div_dimshuffle_bug
=
False
config
.
warn
.
sum_div_dimshuffle_bug
=
False
try
:
try
:
g
=
theano
.
function
([
x
,
b
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
g
=
theano
.
function
([
x
,
b
,
y
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
finally
:
finally
:
config
.
warn
.
sum_div_dimshuffle_bug
=
backup
config
.
warn
.
sum_div_dimshuffle_bug
=
backup
...
@@ -830,13 +852,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -830,13 +852,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
if
mode
==
theano
.
compile
.
mode
.
get_mode
(
'FAST_COMPILE'
):
if
mode
==
theano
.
compile
.
mode
.
get_mode
(
'FAST_COMPILE'
):
mode
=
'FAST_RUN'
mode
=
'FAST_RUN'
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
x_val
=
rng
.
randn
(
3
,
5
)
x_val
=
rng
.
randn
(
3
,
5
)
b_val
=
rng
.
randn
(
5
)
b_val
=
rng
.
randn
(
5
)
y_val
=
numpy
.
asarray
([
2
,
4
,
1
])
y_val
=
numpy
.
asarray
([
2
,
4
,
1
])
x
=
T
.
dmatrix
(
'x'
)
x
=
T
.
dmatrix
(
'x'
)
b
=
T
.
dvector
(
'b'
)
b
=
T
.
dvector
(
'b'
)
y
=
T
.
lvector
(
'y'
)
y
=
T
.
lvector
(
'y'
)
...
@@ -878,7 +897,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -878,7 +897,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
assert
has_softmax
assert
has_softmax
assert
not
has_softmaxdx
assert
not
has_softmaxdx
## Cases to test
## Cases to test
expressions
=
[
expressions
=
[
a
*
T
.
sum
(
-
T
.
log
(
softmax
(
x
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
a
*
T
.
sum
(
-
T
.
log
(
softmax
(
x
)[
T
.
arange
(
y
.
shape
[
0
]),
y
])),
...
@@ -904,7 +922,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -904,7 +922,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for
expr
in
expressions
:
for
expr
in
expressions
:
# Verify the optimizer worked on the expressions
# Verify the optimizer worked on the expressions
f
=
theano
.
function
([
x
,
y
,
a
],
expr
,
mode
=
mode
)
f
=
theano
.
function
([
x
,
y
,
a
],
expr
,
mode
=
mode
)
try
:
try
:
assert
5
<=
len
(
f
.
maker
.
fgraph
.
toposort
())
<=
10
assert
5
<=
len
(
f
.
maker
.
fgraph
.
toposort
())
<=
10
validate_fn_graph
(
f
)
validate_fn_graph
(
f
)
...
@@ -914,7 +932,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -914,7 +932,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise
raise
# Verify the gradient wrt x
# Verify the gradient wrt x
g
=
theano
.
function
([
x
,
y
,
a
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
g
=
theano
.
function
([
x
,
y
,
a
],
T
.
grad
(
expr
,
x
),
mode
=
mode
)
try
:
try
:
assert
5
<=
len
(
g
.
maker
.
fgraph
.
toposort
())
<=
12
assert
5
<=
len
(
g
.
maker
.
fgraph
.
toposort
())
<=
12
validate_grad_graph
(
g
)
validate_grad_graph
(
g
)
...
@@ -924,7 +942,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
...
@@ -924,7 +942,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise
raise
# Verify the gradient when providing output gradient
# Verify the gradient when providing output gradient
h
=
theano
.
function
([
x
,
y
,
a
],
T
.
grad
(
expr
,
x
,
g_cost
=
a
*
x
.
sum
()),
mode
=
mode
)
h
=
theano
.
function
([
x
,
y
,
a
],
T
.
grad
(
expr
,
x
,
g_cost
=
a
*
x
.
sum
()),
mode
=
mode
)
try
:
try
:
assert
8
<=
len
(
h
.
maker
.
fgraph
.
toposort
())
<=
17
assert
8
<=
len
(
h
.
maker
.
fgraph
.
toposort
())
<=
17
validate_grad_graph
(
h
)
validate_grad_graph
(
h
)
...
@@ -944,7 +963,6 @@ def test_argmax_pushdown():
...
@@ -944,7 +963,6 @@ def test_argmax_pushdown():
fgraph
=
gof
.
FunctionGraph
(
fgraph
=
gof
.
FunctionGraph
(
[
x
],
[
x
],
[
out
])
[
out
])
theano
.
compile
.
mode
.
optdb
.
query
(
theano
.
compile
.
mode
.
optdb
.
query
(
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
...
@@ -981,14 +999,13 @@ def test_argmax_pushdown():
...
@@ -981,14 +999,13 @@ def test_argmax_pushdown():
assert
isinstance
(
fgraph
.
toposort
()[
2
]
.
op
.
scalar_op
,
theano
.
scalar
.
Maximum
)
assert
isinstance
(
fgraph
.
toposort
()[
2
]
.
op
.
scalar_op
,
theano
.
scalar
.
Maximum
)
assert
str
(
fgraph
.
toposort
()[
3
]
.
op
)
==
'OutputGuard'
assert
str
(
fgraph
.
toposort
()[
3
]
.
op
)
==
'OutputGuard'
def
test_argmax_pushdown_bias
():
def
test_argmax_pushdown_bias
():
x
=
tensor
.
dmatrix
()
x
=
tensor
.
dmatrix
()
b
=
tensor
.
dvector
()
b
=
tensor
.
dvector
()
out
=
tensor
.
argmax
(
softmax_with_bias
(
x
,
b
),
axis
=-
1
)
out
=
tensor
.
argmax
(
softmax_with_bias
(
x
,
b
),
axis
=-
1
)
fgraph
=
gof
.
FunctionGraph
(
fgraph
=
gof
.
FunctionGraph
(
[
x
,
b
],
[
x
,
b
],
[
out
])
[
out
])
theano
.
compile
.
mode
.
optdb
.
query
(
theano
.
compile
.
mode
.
optdb
.
query
(
...
@@ -1005,10 +1022,9 @@ def test_argmax_pushdown_bias():
...
@@ -1005,10 +1022,9 @@ def test_argmax_pushdown_bias():
x
=
tensor
.
dmatrix
()
x
=
tensor
.
dmatrix
()
b
=
tensor
.
dvector
()
b
=
tensor
.
dvector
()
out
=
tensor
.
max_and_argmax
(
softmax_with_bias
(
x
,
b
),
axis
=-
1
)[
0
]
out
=
tensor
.
max_and_argmax
(
softmax_with_bias
(
x
,
b
),
axis
=-
1
)[
0
]
fgraph
=
gof
.
FunctionGraph
(
fgraph
=
gof
.
FunctionGraph
(
[
x
,
b
],
[
x
,
b
],
[
out
])
[
out
])
backup
=
config
.
warn
.
argmax_pushdown_bug
backup
=
config
.
warn
.
argmax_pushdown_bug
...
@@ -1028,13 +1044,15 @@ def test_argmax_pushdown_bias():
...
@@ -1028,13 +1044,15 @@ def test_argmax_pushdown_bias():
assert
isinstance
(
fgraph
.
toposort
()[
1
]
.
op
.
scalar_op
,
theano
.
scalar
.
Maximum
)
assert
isinstance
(
fgraph
.
toposort
()[
1
]
.
op
.
scalar_op
,
theano
.
scalar
.
Maximum
)
assert
str
(
fgraph
.
toposort
()[
2
]
.
op
)
==
'OutputGuard'
assert
str
(
fgraph
.
toposort
()[
2
]
.
op
)
==
'OutputGuard'
def
test_asymptotic_32
():
def
test_asymptotic_32
():
"""
"""
This test makes sure that our functions behave sensibly when huge values are present
This test makes sure that our functions behave sensibly when
huge values are present
"""
"""
#TODO: consider adding the optimization of crossentropy into the current
mode for the
#TODO: consider adding the optimization of crossentropy into the current
# purpose of running this test
#
mode for the
purpose of running this test
for
dtype
in
'float32'
,
'float64'
:
for
dtype
in
'float32'
,
'float64'
:
if
dtype
==
'float32'
:
if
dtype
==
'float32'
:
...
@@ -1045,10 +1063,11 @@ def test_asymptotic_32():
...
@@ -1045,10 +1063,11 @@ def test_asymptotic_32():
x2
=
tensor
.
dvector
()
x2
=
tensor
.
dvector
()
y
=
tensor
.
lvector
()
y
=
tensor
.
lvector
()
c
=
categorical_crossentropy
(
softmax
(
x
+
x2
),
y
)
c
=
categorical_crossentropy
(
softmax
(
x
+
x2
),
y
)
f
=
theano
.
function
([
x
,
y
,
x2
],
[
c
.
sum
(),
tensor
.
grad
(
c
.
sum
(),
x
)],
mode
=
'FAST_RUN'
)
f
=
theano
.
function
([
x
,
y
,
x2
],
[
c
.
sum
(),
tensor
.
grad
(
c
.
sum
(),
x
)],
mode
=
'FAST_RUN'
)
if
0
:
if
0
:
for
i
,
n
in
enumerate
(
f
.
maker
.
fgraph
.
toposort
()):
for
i
,
n
in
enumerate
(
f
.
maker
.
fgraph
.
toposort
()):
print
i
,
n
print
i
,
n
xval
=
numpy
.
zeros
((
5
,
5
),
dtype
=
dtype
)
xval
=
numpy
.
zeros
((
5
,
5
),
dtype
=
dtype
)
...
@@ -1071,51 +1090,50 @@ def test_asymptotic_32():
...
@@ -1071,51 +1090,50 @@ def test_asymptotic_32():
#print cval, gxval
#print cval, gxval
assert
cval
>
61750000
assert
cval
>
61750000
assert
gxval
[
0
,
0
]
==
-
1.0
assert
gxval
[
0
,
0
]
==
-
1.0
assert
gxval
[
0
,
1
]
==
0.25
assert
gxval
[
0
,
1
]
==
0.25
class
Test_softmax_opt
:
class
Test_softmax_opt
:
# Test that expressions of softmax in terms of exponentiated things divided by row sums
# Test that expressions of softmax in terms of exponentiated things
# are replaced by softmax expressions.
# divided by row sums are replaced by softmax expressions.
#
#
# Softmax_grad isn't that interesting as an Op, but it
's the signature we look for when
# Softmax_grad isn't that interesting as an Op, but it
has the signature
#
trying to insert CrossEntropySoftmax... grad. So for now, we add softmax_grad to graphs.
#
we look for when trying to insert CrossEntropySoftmax... grad. So for now,
#
In future, we may modify the CrossEntropySoftmax...grad to look for the more basic
#
we add softmax_grad to graphs. In the future, we may modify the
# pattern.
#
CrossEntropySoftmax...grad to look for the more basic
pattern.
#
#
def
setUp
(
self
):
def
setUp
(
self
):
utt
.
seed_rng
()
utt
.
seed_rng
()
self
.
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
self
.
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
self
.
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
self
.
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
self
.
mode
=
self
.
mode
.
including
(
'canonicalize'
)
self
.
mode
=
self
.
mode
.
including
(
'canonicalize'
)
def
test_basic
(
self
):
def
test_basic
(
self
):
c
=
T
.
matrix
()
c
=
T
.
matrix
()
p_y
=
T
.
exp
(
c
)
/
T
.
exp
(
c
)
.
sum
(
axis
=
1
)
.
dimshuffle
(
0
,
'x'
)
p_y
=
T
.
exp
(
c
)
/
T
.
exp
(
c
)
.
sum
(
axis
=
1
)
.
dimshuffle
(
0
,
'x'
)
# test that function contains softmax and no div.
# test that function contains softmax and no div.
f
=
theano
.
function
([
c
],
p_y
,
mode
=
self
.
mode
)
f
=
theano
.
function
([
c
],
p_y
,
mode
=
self
.
mode
)
f_ops
=
[
n
.
op
for
n
in
f
.
maker
.
fgraph
.
toposort
()]
f_ops
=
[
n
.
op
for
n
in
f
.
maker
.
fgraph
.
toposort
()]
#print '--- f ='
#print '--- f ='
#printing.debugprint(f)
#printing.debugprint(f)
#print '==='
#print '==='
assert
len
(
f_ops
)
==
1
assert
len
(
f_ops
)
==
1
assert
softmax
in
f_ops
assert
softmax
in
f_ops
f
(
self
.
rng
.
rand
(
3
,
4
)
.
astype
(
config
.
floatX
))
f
(
self
.
rng
.
rand
(
3
,
4
)
.
astype
(
config
.
floatX
))
def
test_grad
(
self
):
def
test_grad
(
self
):
c
=
T
.
matrix
()
c
=
T
.
matrix
()
p_y
=
T
.
exp
(
c
)
/
T
.
exp
(
c
)
.
sum
(
axis
=
1
)
.
dimshuffle
(
0
,
'x'
)
p_y
=
T
.
exp
(
c
)
/
T
.
exp
(
c
)
.
sum
(
axis
=
1
)
.
dimshuffle
(
0
,
'x'
)
# test that function contains softmax and softmaxgrad
# test that function contains softmax and softmaxgrad
w
=
T
.
matrix
()
w
=
T
.
matrix
()
backup
=
config
.
warn
.
sum_div_dimshuffle_bug
backup
=
config
.
warn
.
sum_div_dimshuffle_bug
config
.
warn
.
sum_div_dimshuffle_bug
=
False
config
.
warn
.
sum_div_dimshuffle_bug
=
False
try
:
try
:
g
=
theano
.
function
([
c
,
w
],
T
.
grad
((
p_y
*
w
)
.
sum
(),
c
))
g
=
theano
.
function
([
c
,
w
],
T
.
grad
((
p_y
*
w
)
.
sum
(),
c
))
finally
:
finally
:
config
.
warn
.
sum_div_dimshuffle_bug
=
backup
config
.
warn
.
sum_div_dimshuffle_bug
=
backup
g_ops
=
[
n
.
op
for
n
in
g
.
maker
.
fgraph
.
toposort
()]
g_ops
=
[
n
.
op
for
n
in
g
.
maker
.
fgraph
.
toposort
()]
...
@@ -1127,7 +1145,7 @@ class Test_softmax_opt:
...
@@ -1127,7 +1145,7 @@ class Test_softmax_opt:
assert
len
(
g_ops
)
==
2
assert
len
(
g_ops
)
==
2
assert
softmax
in
g_ops
assert
softmax
in
g_ops
assert
softmax_grad
in
g_ops
assert
softmax_grad
in
g_ops
g
(
self
.
rng
.
rand
(
3
,
4
),
self
.
rng
.
uniform
(
.
5
,
1
,
(
3
,
4
)))
g
(
self
.
rng
.
rand
(
3
,
4
),
self
.
rng
.
uniform
(
.
5
,
1
,
(
3
,
4
)))
def
test_transpose_basic
(
self
):
def
test_transpose_basic
(
self
):
# this should be a transposed softmax
# this should be a transposed softmax
...
@@ -1135,14 +1153,14 @@ class Test_softmax_opt:
...
@@ -1135,14 +1153,14 @@ class Test_softmax_opt:
p_y
=
T
.
exp
(
c
)
/
T
.
exp
(
c
)
.
sum
(
axis
=
0
)
p_y
=
T
.
exp
(
c
)
/
T
.
exp
(
c
)
.
sum
(
axis
=
0
)
# test that function contains softmax and no div.
# test that function contains softmax and no div.
f
=
theano
.
function
([
c
],
p_y
)
f
=
theano
.
function
([
c
],
p_y
)
#printing.debugprint(f)
#printing.debugprint(f)
# test that function contains softmax and no div.
# test that function contains softmax and no div.
backup
=
config
.
warn
.
sum_div_dimshuffle_bug
backup
=
config
.
warn
.
sum_div_dimshuffle_bug
config
.
warn
.
sum_div_dimshuffle_bug
=
False
config
.
warn
.
sum_div_dimshuffle_bug
=
False
try
:
try
:
g
=
theano
.
function
([
c
],
T
.
grad
(
p_y
.
sum
(),
c
))
g
=
theano
.
function
([
c
],
T
.
grad
(
p_y
.
sum
(),
c
))
finally
:
finally
:
config
.
warn
.
sum_div_dimshuffle_bug
=
backup
config
.
warn
.
sum_div_dimshuffle_bug
=
backup
#printing.debugprint(g)
#printing.debugprint(g)
...
@@ -1169,15 +1187,5 @@ class Test_softmax_opt:
...
@@ -1169,15 +1187,5 @@ class Test_softmax_opt:
# REPEAT 3 CASES in presence of log(softmax) with the advanced indexing etc.
# REPEAT 3 CASES in presence of log(softmax) with the advanced indexing etc.
#if __name__ == '__main__':
# unittest.main()
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
t
=
T_CrossentropyCategorical1HotGrad
(
'setUp'
)
t
.
setUp
()
t
.
test_infer_shape
()
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论