Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
d2324428
提交
d2324428
authored
4月 24, 2009
作者:
James Bergstra
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
added some simpler expression types and optimizations to nnet
上级
0601a868
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
362 行增加
和
11 行删除
+362
-11
nnet.py
theano/tensor/nnet.py
+234
-11
test_nnet.py
theano/tensor/tests/test_nnet.py
+128
-0
没有找到文件。
theano/tensor/nnet.py
浏览文件 @
d2324428
...
...
@@ -10,6 +10,7 @@ import basic as tensor
import
elemwise
import
numpy
import
opt
from
..compile
import
optdb
############
#
...
...
@@ -120,7 +121,7 @@ class SoftmaxWithBias(gof.Op):
def
grad
(
self
,
(
x
,
b
),
(
g_sm
,)):
sm
=
softmax_with_bias
(
x
,
b
)
dx
=
SoftmaxWithBiasDx
()
(
g_sm
,
sm
)
dx
=
softmax_grad
(
g_sm
,
sm
)
db
=
tensor
.
sum
(
dx
,
axis
=
0
)
return
dx
,
db
...
...
@@ -254,14 +255,20 @@ class SoftmaxWithBias(gof.Op):
softmax_with_bias
=
SoftmaxWithBias
()
class
SoftmaxWithBiasDx
(
gof
.
Op
):
class
SoftmaxGrad
(
gof
.
Op
):
"""Gradient wrt x of the Softmax Op"""
nin
=
2
nout
=
1
"""Gradient wrt x of the SoftmaxWithBias Op"""
def
__init__
(
self
,
**
kwargs
):
gof
.
Op
.
__init__
(
self
,
**
kwargs
)
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
make_node
(
self
,
dy
,
sm
,
**
kwargs
):
dy
=
tensor
.
as_tensor_variable
(
dy
)
sm
=
tensor
.
as_tensor_variable
(
sm
)
...
...
@@ -333,18 +340,82 @@ class SoftmaxWithBiasDx(gof.Op):
}
}
'''
%
dict
(
locals
(),
**
sub
)
softmax_grad
=
SoftmaxGrad
()
def
softmax
(
x
,
**
kwargs
):
b
=
tensor
.
zeros_like
(
x
[
0
,:])
return
softmax_with_bias
(
x
,
b
,
**
kwargs
)
class
Softmax
(
gof
.
Op
):
"""
WRITEME
"""
nin
=
1
nout
=
1
def
__init__
(
self
,
**
kwargs
):
gof
.
Op
.
__init__
(
self
,
**
kwargs
)
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
make_node
(
self
,
x
):
x
=
tensor
.
as_tensor_variable
(
x
)
if
x
.
type
.
ndim
!=
2
\
or
x
.
type
.
dtype
not
in
[
'float32'
,
'float64'
]:
raise
ValueError
(
'x must be 2-d tensor of floats'
)
sm
=
x
.
type
.
make_variable
()
return
gof
.
Apply
(
self
,
[
x
],
[
sm
])
def
perform
(
self
,
node
,
input_storage
,
output_storage
):
x
,
=
input_storage
sm
=
numpy
.
zeros_like
(
x
)
for
i
in
xrange
(
sm
.
shape
[
0
]):
row
=
x
[
i
]
sm
[
i
]
=
numpy
.
exp
(
row
-
numpy
.
max
(
row
))
sm
[
i
]
*=
1.0
/
numpy
.
sum
(
sm
[
i
])
output_storage
[
0
][
0
]
=
sm
def
grad
(
self
,
(
x
,),
(
g_sm
,)):
sm
=
softmax
(
x
)
return
[
softmax_grad
(
g_sm
,
sm
)]
softmax
=
Softmax
()
@opt.register_specialize
@gof.local_optimizer
([
softmax
])
def
local_softmax_with_bias
(
node
):
if
node
.
op
==
softmax
:
x
,
=
node
.
inputs
if
x
.
owner
and
x
.
owner
.
op
==
tensor
.
add
:
vectors
=
[]
non_vectors
=
[]
for
x_in
in
x
.
owner
.
inputs
:
if
list
(
x_in
.
type
.
broadcastable
)
==
[
True
,
False
]
\
and
isinstance
(
x_in
.
owner
.
op
,
tensor
.
DimShuffle
):
assert
len
(
x_in
.
owner
.
inputs
)
==
1
vectors
.
append
(
x_in
.
owner
.
inputs
[
0
])
else
:
non_vectors
.
append
(
x_in
)
assert
non_vectors
#not empty
if
vectors
:
#we're in business...
vector_sum
=
tensor
.
add
(
*
vectors
)
if
len
(
vectors
)
>
1
else
vectors
[
0
]
non_vector_sum
=
tensor
.
add
(
*
non_vectors
)
if
len
(
non_vectors
)
>
1
else
non_vectors
[
0
]
try
:
sm_bias
=
softmax_with_bias
(
non_vector_sum
,
vector_sum
)
except
:
#if our arguments have the wrong types, then forget about it
return
return
[
sm_bias
]
class
CrossentropySoftmaxArgmax1HotWithBias
(
gof
.
Op
):
"""A special compound L{Op} for the output of neural-net classifiers.
@type x: is a matrix of floats (32 or 64)
@type b: is a [row] vector of floats (32 or 64), length is number of cols in x
@type y_idx: a [column] vector of int (32 or 64), length is number of rows in x
:type x: is a matrix of floats (32 or 64)
:type b: is a [row] vector of floats (32 or 64), length is number of cols in x
:type y_idx: a [column] vector of int (32 or 64), length is number of rows in x
:returns: row-wise NLL, softmax(x+b), row-wise argmax of (x+b)
@precondition: every entry in y_idx is a valid (non-negative) column index into x
...
...
@@ -646,8 +717,124 @@ def crossentropy_softmax_max_and_argmax_1hot(x, y_idx, **kwargs):
b
=
tensor
.
zeros_like
(
x
[
0
,:])
return
crossentropy_softmax_max_and_argmax_1hot_with_bias
(
x
,
b
,
y_idx
,
**
kwargs
)
class
MultinomialCrossentropy1Hot
(
gof
.
Op
):
pass
class
CrossentropyCategorical1HotGrad
(
gof
.
Op
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
make_node
(
self
,
g_y
,
coding_dist
,
true_one_of_n
):
return
gof
.
Apply
(
self
,
[
g_y
,
coding_dist
,
true_one_of_n
],
[
coding_dist
.
type
()])
def
perform
(
self
,
node
,
(
g_y
,
coding_dist
,
true_one_of_n
),
(
g_coding_strg
,)):
g_coding
=
numpy
.
zeros_like
(
coding_dist
)
for
i
in
xrange
(
len
(
g_y
)):
g_coding
[
i
,
true_one_of_n
[
i
]]
=
-
g_y
[
i
]
/
coding_dist
[
i
,
true_one_of_n
[
i
]]
g_coding_strg
[
0
]
=
g_coding
crossentropy_categorical_1hot_grad
=
CrossentropyCategorical1HotGrad
()
class
CrossentropyCategorical1Hot
(
gof
.
Op
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
"""Compute the cross entropy between a coding distribution and
a true distribution of the form [0, 0, ... 0, 1, 0, ..., 0]
.. math::
y[i] = -
\
log(coding_dist[i, one_of_n[i])
:note:
In the case that the coding distribution is the output of a softmax, an application of this
Op will probably be optimized away in favour of one with a C implementation.
"""
def
make_node
(
self
,
coding_dist
,
true_one_of_n
):
"""
:type coding_dist: dense matrix
:type true_one_of_n: lvector
:rtype: dvector
"""
_coding_dist
=
tensor
.
as_tensor_variable
(
coding_dist
)
_true_one_of_n
=
tensor
.
as_tensor_variable
(
true_one_of_n
)
if
_coding_dist
.
type
.
ndim
!=
2
:
raise
TypeError
(
'matrix required for argument: coding_dist'
)
if
_true_one_of_n
.
type
!=
tensor
.
lvector
:
raise
TypeError
(
'integer vector required for argument: true_one_of_n'
)
return
gof
.
Apply
(
self
,
[
_coding_dist
,
_true_one_of_n
],
[
tensor
.
dvector
()])
def
perform
(
self
,
node
,
(
coding
,
one_of_n
),
(
y_out
,)):
y
=
numpy
.
zeros_like
(
coding
[:,
0
])
for
i
in
xrange
(
len
(
y
)):
y
[
i
]
=
-
numpy
.
log
(
coding
[
i
,
one_of_n
[
i
]])
y_out
[
0
]
=
y
def
grad
(
self
,
(
coding
,
one_of_n
),
(
g_y
,)):
return
[
crossentropy_categorical_1hot_grad
(
g_y
,
coding
,
one_of_n
),
None
]
crossentropy_categorical_1hot
=
CrossentropyCategorical1Hot
()
@gof.optimizer
def
crossentropy_to_crossentropy_with_softmax
(
env
):
#not a local optimization because we are replacing outputs from several nodes at once
def
search_make_one_sub
():
for
node
in
env
.
toposort
():
if
node
.
op
==
crossentropy_categorical_1hot
:
nll
,
=
node
.
outputs
sm
,
one_of_n
=
node
.
inputs
if
sm
.
owner
and
sm
.
owner
.
op
==
softmax
:
x
,
=
sm
.
owner
.
inputs
new_nll
,
new_sm
,
new_am
=
crossentropy_softmax_argmax_1hot_with_bias
(
x
,
tensor
.
zeros_like
(
x
[
0
]),
one_of_n
)
env
.
replace_all_validate
([(
nll
,
new_nll
),(
sm
,
new_sm
)],
reason
=
"Merge"
)
return
True
if
sm
.
owner
and
sm
.
owner
.
op
==
softmax_with_bias
:
x
,
b
=
sm
.
owner
.
inputs
new_nll
,
new_sm
,
new_am
=
crossentropy_softmax_argmax_1hot_with_bias
(
x
,
b
,
one_of_n
)
env
.
replace_all_validate
([(
nll
,
new_nll
),(
sm
,
new_sm
)],
reason
=
"Merge"
)
return
True
return
False
while
search_make_one_sub
():
pass
return
optdb
.
register
(
'XentThing'
,
crossentropy_to_crossentropy_with_softmax
,
60.00
,
'fast_run'
,
'inplace'
,
'xent'
)
@gof.local_optimizer
([
softmax_grad
])
def
local_crossentropy_to_crossentropy_with_softmax_grad
(
node
):
if
node
.
op
==
softmax_grad
:
g_coding_dist
,
coding_dist
=
node
.
inputs
if
g_coding_dist
.
owner
and
g_coding_dist
.
owner
.
op
==
crossentropy_categorical_1hot_grad
:
g_nll
,
coding_dist
,
true_one_of_n
=
g_coding_dist
.
owner
.
inputs
dx
=
crossentropy_softmax_1hot_with_bias_dx
(
g_nll
,
coding_dist
,
true_one_of_n
)
return
[
dx
]
opt
.
register_specialize
(
local_crossentropy_to_crossentropy_with_softmax_grad
)
@opt.register_specialize
@gof.local_optimizer
([
tensor
.
_max_and_argmax
])
def
local_argmax_pushdown
(
node
):
if
node
.
op
==
tensor
.
_max_and_argmax
:
x_max
,
x_argmax
=
node
.
outputs
x
,
axis
=
node
.
inputs
#TODO: Make a list/set of monotonic ops...
if
x
.
owner
and
x
.
owner
.
op
in
(
softmax
,
softplus
,
tensor
.
exp
,
tensor
.
log
,
tensor
.
tanh
,
sigmoid
):
pre_x
,
=
x
.
owner
.
inputs
return
tensor
.
_max_and_argmax
(
pre_x
,
axis
)
if
x
.
owner
and
x
.
owner
.
op
==
softmax_with_bias
:
pre_x
,
pre_bias
=
x
.
owner
.
inputs
return
tensor
.
_max_and_argmax
(
pre_x
+
tensor
.
DimShuffle
(
pre_bias
.
broadcastable
,
(
'x'
,
0
))(
pre_bias
),
axis
)
def
binary_crossentropy
(
output
,
target
):
...
...
@@ -660,6 +847,42 @@ def binary_crossentropy(output, target):
"""
return
-
(
target
*
tensor
.
log
(
output
)
+
(
1.0
-
target
)
*
tensor
.
log
(
1.0
-
output
))
def
categorical_crossentropy
(
coding_dist
,
true_dist
,
axis
=
1
):
"""Return the cross-entropy between an approximating distribution and a true distribution
The cross entropy between two probability distributions measures the average number of bits
needed to identify an event from a set of possibilities, if a coding scheme is used based
on a given probability distribution q, rather than the "true" distribution p.
Mathematically it is defined as follows:
.. math::
H(p,q) = -
\
sum_x p(x)
\
log(q(x))
:type coding_dist: a dense matrix.
:param coding_dist: Each slice along axis represents one distribution.
:type true_dist: a dense matrix or sparse matrix or integer vector.
:param coding_dist: In the case of a matrix argument, each slice along axis represents one
distribution. In the case of an integer vector argument, each element represents the
position of the '1' in a 1-of-N encoding.
:type axis: int
:param axis: the dimension over which each distribution runs. (1 for row distributions, 0
for column distributions)
:rtype: dvector
:returns: the cross entropy between each coding and true distribution.
"""
if
true_dist
.
ndim
==
2
:
return
-
theano
.
sum
(
true_dist
*
log
(
coding_dist
),
axis
=
axis
)
else
:
return
categorical_crossentropy_1hot
(
coding_dist
.
T
if
axis
==
0
else
coding_dist
,
true_dist
)
class
Prepend_scalar_constant_to_each_row
(
gof
.
Op
):
...
...
theano/tensor/tests/test_nnet.py
浏览文件 @
d2324428
...
...
@@ -123,5 +123,133 @@ class T_solve(unittest.TestCase):
#print numpy.dot(A,x)
class
T_CrossentropyCategorical1Hot
(
unittest
.
TestCase
):
def
setUp
(
self
):
utt
.
seed_rng
()
def
test_grad
(
self
):
x
=
tensor
.
matrix
(
'x'
)
one_of_n
=
tensor
.
lvector
(
'one_of_n'
)
op
=
crossentropy_categorical_1hot
xe
=
op
(
x
,
one_of_n
)
f
=
theano
.
function
([
x
,
one_of_n
],
xe
)
xe_val
=
f
(
numpy
.
asarray
([[
.
4
,
.
6
,
.
0
],
[
.
1
,
.
8
,
.
1
]]),
[
0
,
1
])
assert
numpy
.
allclose
(
xe_val
,
-
numpy
.
log
([
.
4
,
.
8
]))
def
oplike
(
x
):
return
op
(
x
,
[
0
,
1
])
tensor
.
verify_grad
(
oplike
,
[
numpy
.
asarray
([[
.
4
,
.
6
,
.
0
],
[
.
1
,
.
8
,
.
1
]])],
rng
=
numpy
.
random
)
def
test_softmax_optimizations
(
self
):
x
=
tensor
.
matrix
(
'x'
)
one_of_n
=
tensor
.
lvector
(
'one_of_n'
)
op
=
crossentropy_categorical_1hot
xe
=
op
(
x
,
one_of_n
)
env
=
gof
.
Env
(
[
x
,
one_of_n
],
[
op
(
softmax
(
x
),
one_of_n
)])
assert
env
.
outputs
[
0
]
.
owner
.
op
==
op
theano
.
compile
.
mode
.
optdb
.
query
(
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
env
)
assert
env
.
outputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
def
test_softmax_optimizations_w_bias
(
self
):
x
=
tensor
.
matrix
(
'x'
)
b
=
tensor
.
vector
(
'b'
)
one_of_n
=
tensor
.
lvector
(
'one_of_n'
)
op
=
crossentropy_categorical_1hot
xe
=
op
(
x
,
one_of_n
)
env
=
gof
.
Env
(
[
x
,
b
,
one_of_n
],
[
op
(
softmax
(
x
+
b
),
one_of_n
)])
assert
env
.
outputs
[
0
]
.
owner
.
op
==
op
print
'BEFORE'
for
node
in
env
.
toposort
():
print
node
.
op
print
'----'
theano
.
compile
.
mode
.
optdb
.
query
(
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
env
)
assert
len
(
env
.
toposort
())
==
1
assert
env
.
outputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
def
test_softmax_grad_optimizations
(
self
):
x
=
tensor
.
matrix
(
'x'
)
one_of_n
=
tensor
.
lvector
(
'one_of_n'
)
op
=
crossentropy_categorical_1hot
xe
=
op
(
softmax
(
x
),
one_of_n
)
sum_xe
=
tensor
.
sum
(
xe
)
g_x
=
tensor
.
grad
(
sum_xe
,
x
)
env
=
gof
.
Env
(
[
x
,
one_of_n
],
[
g_x
])
print
'BEFORE'
for
node
in
env
.
toposort
():
print
node
.
op
print
'----'
theano
.
compile
.
mode
.
optdb
.
query
(
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
env
)
print
'AFTER'
for
node
in
env
.
toposort
():
print
node
.
op
assert
env
.
toposort
()[
3
]
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
assert
env
.
toposort
()[
5
]
.
op
==
crossentropy_softmax_1hot_with_bias_dx
assert
len
(
env
.
toposort
())
==
6
#shorthand for actually checking what I really
def
test_argmax_pushdown
():
x
=
tensor
.
dmatrix
()
env
=
gof
.
Env
(
[
x
],
[
tensor
.
max
(
softmax
(
tensor
.
exp
(
tensor
.
tanh
(
sigmoid
(
x
)))))])
theano
.
compile
.
mode
.
optdb
.
query
(
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
env
)
#print 'AFTER'
#for node in env.toposort():
#print node.op
assert
len
(
env
.
toposort
())
==
1
assert
env
.
toposort
()[
0
]
.
op
==
tensor
.
_max_and_argmax
def
test_argmax_pushdown_bias
():
x
=
tensor
.
dmatrix
()
b
=
tensor
.
dvector
()
env
=
gof
.
Env
(
[
x
,
b
],
[
tensor
.
max
(
softmax_with_bias
(
x
,
b
))])
theano
.
compile
.
mode
.
optdb
.
query
(
theano
.
compile
.
mode
.
OPT_FAST_RUN
)
.
optimize
(
env
)
#print 'AFTER'
#for node in env.toposort():
#print node.op
assert
len
(
env
.
toposort
())
==
3
if
__name__
==
'__main__'
:
unittest
.
main
()
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论