Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
8f038cc6
提交
8f038cc6
authored
9月 21, 2015
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3404 from kelvinxu/stack_trace
Add stack trace for nnet
上级
cebafe0d
8b239b25
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
33 行增加
和
120 行删除
+33
-120
nnet.py
theano/tensor/nnet/nnet.py
+33
-120
没有找到文件。
theano/tensor/nnet/nnet.py
浏览文件 @
8f038cc6
...
@@ -21,8 +21,8 @@ from theano import gof
...
@@ -21,8 +21,8 @@ from theano import gof
from
theano
import
scalar
from
theano
import
scalar
from
theano.tensor
import
basic
as
tensor
from
theano.tensor
import
basic
as
tensor
from
theano.tensor
import
subtensor
from
theano.tensor
import
subtensor
from
theano.tensor
import
elemwise
from
theano.tensor
import
opt
from
theano.tensor
import
opt
from
theano.tensor.opt
import
copy_stack_trace
from
theano.compile
import
optdb
from
theano.compile
import
optdb
from
theano.gof
import
Apply
from
theano.gof
import
Apply
...
@@ -31,6 +31,7 @@ from theano.gradient import DisconnectedType
...
@@ -31,6 +31,7 @@ from theano.gradient import DisconnectedType
from
theano.gradient
import
grad_not_implemented
from
theano.gradient
import
grad_not_implemented
from
theano.tensor.type
import
values_eq_approx_remove_nan
from
theano.tensor.type
import
values_eq_approx_remove_nan
############
############
#
#
# TENSOR OPS
# TENSOR OPS
...
@@ -113,7 +114,8 @@ class SoftmaxWithBias(gof.Op):
...
@@ -113,7 +114,8 @@ class SoftmaxWithBias(gof.Op):
# TODO: set error messages for failures in this code
# TODO: set error messages for failures in this code
# TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
# TODO: use this to accept float32 and int32:
# node.inputs[0].type.dtype_specs()[1]
init_decl
=
"""
init_decl
=
"""
npy_intp* Nx = PyArray_DIMS(
%(x)
s);
npy_intp* Nx = PyArray_DIMS(
%(x)
s);
npy_intp Sx = 0;
npy_intp Sx = 0;
...
@@ -634,16 +636,19 @@ def local_softmax_with_bias(node):
...
@@ -634,16 +636,19 @@ def local_softmax_with_bias(node):
# we're in business...
# we're in business...
if
len
(
vectors
)
>
1
:
if
len
(
vectors
)
>
1
:
vector_sum
=
tensor
.
add
(
*
vectors
)
vector_sum
=
tensor
.
add
(
*
vectors
)
copy_stack_trace
(
x_in
,
vector_sum
)
else
:
else
:
vector_sum
=
vectors
[
0
]
vector_sum
=
vectors
[
0
]
if
len
(
non_vectors
)
>
1
:
if
len
(
non_vectors
)
>
1
:
non_vector_sum
=
tensor
.
add
(
*
non_vectors
)
non_vector_sum
=
tensor
.
add
(
*
non_vectors
)
copy_stack_trace
(
x_in
,
non_vector_sum
)
else
:
else
:
non_vector_sum
=
non_vectors
[
0
]
non_vector_sum
=
non_vectors
[
0
]
try
:
try
:
sm_bias
=
softmax_with_bias
(
non_vector_sum
,
vector_sum
)
sm_bias
=
softmax_with_bias
(
non_vector_sum
,
vector_sum
)
copy_stack_trace
(
node
.
outputs
[
0
],
sm_bias
)
except
Exception
:
except
Exception
:
# if our arguments have the wrong types, then
# if our arguments have the wrong types, then
# forget about it
# forget about it
...
@@ -692,114 +697,6 @@ def softmax_simplifier(numerators, denominators):
...
@@ -692,114 +697,6 @@ def softmax_simplifier(numerators, denominators):
return
numerators
,
denominators
return
numerators
,
denominators
opt
.
local_mul_canonizer
.
add_simplifier
(
softmax_simplifier
,
'softmax_simplifier'
)
opt
.
local_mul_canonizer
.
add_simplifier
(
softmax_simplifier
,
'softmax_simplifier'
)
if
0
:
@opt.register_specialize
@gof.local_optimizer
([
tensor
.
add
])
def
local_softmax_grad
(
node
):
'''dy*sm - DimShuffle{0,'x'}(sum{1}(dy*sm))*sm -> softmax_grad(dy,sm)'''
# TODO what if the signs are changed?
# TODO and if a scalar is distributed before each of the terms?
# TODO 'dy' could also be a product
if
node
.
op
==
tensor
.
add
and
node
.
out
.
ndim
==
2
:
add_inputs
=
node
.
inputs
# Trying to locate two nodes in the sum:
# dy * sm, prod_term
# - DimShuffle{0,'x'}(sum{1}(dy*sm))*sm
prod_term
=
None
other_terms
=
[]
# First, prod_term
for
add_in
in
add_inputs
:
if
(
add_in
.
owner
and
add_in
.
owner
.
op
==
tensor
.
mul
and
prod_term
is
None
):
mul_inputs
=
add_in
.
owner
.
inputs
if
(
len
(
mul_inputs
)
==
2
and
all
([
mul_in
.
ndim
==
2
for
mul_in
in
mul_inputs
])):
prod_term
=
add_in
else
:
other_terms
.
append
(
add_in
)
else
:
other_terms
.
append
(
add_in
)
if
prod_term
is
None
:
# print 'no prod_term'
return
assert
len
(
other_terms
)
==
len
(
add_inputs
)
-
1
ds_term
=
None
rest
=
[]
for
add_in
in
other_terms
:
if
add_in
.
owner
and
add_in
.
owner
.
op
==
tensor
.
neg
:
neg_input
=
add_in
.
owner
.
inputs
[
0
]
if
neg_input
.
owner
and
neg_input
.
owner
.
op
==
tensor
.
mul
:
mul2_inputs
=
neg_input
.
owner
.
inputs
if
len
(
mul2_inputs
)
!=
2
:
rest
.
append
(
add_in
)
# print 'len(mul2_inputs) =', len(mul2_inputs)
continue
# Try and find DimShuffle(Sum)
maybe_ds
=
None
for
i
,
mul2_in
in
enumerate
(
mul2_inputs
):
if
mul2_in
.
owner
and
isinstance
(
mul2_in
.
owner
.
op
,
elemwise
.
DimShuffle
):
maybe_ds
=
mul2_in
maybe_sm
=
mul2_inputs
[
1
-
i
]
# The other one
if
(
maybe_ds
is
None
or
maybe_ds
.
ndim
!=
2
or
maybe_sm
.
ndim
!=
2
):
rest
.
append
(
add_in
)
# print 'maybe_ds =', maybe_ds
# if maybe_ds:
# print 'maybe_ds.ndim =', maybe_ds.ndim, ', maybe_sm.ndim =', maybe_sm.ndim
continue
if
maybe_sm
is
mul_inputs
[
0
]:
maybe_dy
=
mul_inputs
[
1
]
elif
maybe_sm
is
mul_inputs
[
1
]:
maybe_dy
=
mul_inputs
[
0
]
else
:
rest
.
append
(
add_in
)
# print 'maybe_sm, maybe_dy =', maybe_sm, maybe_dy
# print 'mul_inputs =', mul_inputs
continue
ds_order
=
maybe_ds
.
owner
.
op
.
new_order
ds_input
=
maybe_ds
.
owner
.
inputs
[
0
]
axis
=
None
if
ds_input
.
owner
and
isinstance
(
ds_input
.
owner
.
op
,
elemwise
.
Sum
):
axis
=
ds_input
.
owner
.
op
.
axis
sum_input
=
ds_input
.
owner
.
inputs
[
0
]
if
((
ds_order
!=
(
0
,
'x'
))
or
(
axis
!=
(
1
,))
or
(
sum_input
is
not
prod_term
)):
rest
.
append
(
add_in
)
# print 'ds_order =', ds_order
# print 'axis =', axis
# if axis is not None:
# print 'sum_input =', sum_input, ', prod_term =', prod_term
# else:
# print 'ds_input.owner =', ds_input.owner
# print 'add_in =', add_in
continue
ds_term
=
add_in
else
:
# print 'neg_input.owner =', neg_input.owner
rest
.
append
(
add_in
)
else
:
# print 'add_in.owner =', add_in.owner
rest
.
append
(
add_in
)
if
ds_term
is
None
:
# print 'no ds_term'
return
if
len
(
rest
)
==
0
:
return
[
softmax_grad
(
maybe_dy
,
maybe_sm
)]
else
:
return
[
tensor
.
add
(
softmax_grad
(
maybe_dy
,
maybe_sm
),
*
rest
)]
class
CrossentropySoftmaxArgmax1HotWithBias
(
gof
.
Op
):
class
CrossentropySoftmaxArgmax1HotWithBias
(
gof
.
Op
):
"""
"""
...
@@ -1457,6 +1354,7 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node):
...
@@ -1457,6 +1354,7 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node):
g_nll
,
coding_dist
,
true_one_of_n
=
g_coding_dist
.
owner
.
inputs
g_nll
,
coding_dist
,
true_one_of_n
=
g_coding_dist
.
owner
.
inputs
dx
=
crossentropy_softmax_1hot_with_bias_dx
(
g_nll
,
coding_dist
,
dx
=
crossentropy_softmax_1hot_with_bias_dx
(
g_nll
,
coding_dist
,
true_one_of_n
)
true_one_of_n
)
copy_stack_trace
(
node
.
outputs
[
0
],
dx
)
return
[
dx
]
return
[
dx
]
...
@@ -1485,13 +1383,18 @@ def local_argmax_pushdown(node):
...
@@ -1485,13 +1383,18 @@ def local_argmax_pushdown(node):
if
x
.
owner
and
x
.
owner
.
op
in
(
softmax_op
,
softplus
,
tensor
.
exp
,
if
x
.
owner
and
x
.
owner
.
op
in
(
softmax_op
,
softplus
,
tensor
.
exp
,
tensor
.
log
,
tensor
.
tanh
,
sigmoid
):
tensor
.
log
,
tensor
.
tanh
,
sigmoid
):
pre_x
,
=
x
.
owner
.
inputs
pre_x
,
=
x
.
owner
.
inputs
return
tensor
.
_max_and_argmax
(
pre_x
,
axis
)
ret
=
tensor
.
_max_and_argmax
(
pre_x
,
axis
)
copy_stack_trace
(
x_max
,
ret
)
return
ret
if
x
.
owner
and
x
.
owner
.
op
==
softmax_with_bias
:
if
x
.
owner
and
x
.
owner
.
op
==
softmax_with_bias
:
pre_x
,
pre_bias
=
x
.
owner
.
inputs
pre_x
,
pre_bias
=
x
.
owner
.
inputs
return
tensor
.
_max_and_argmax
(
pre_x
+
ret
=
tensor
.
_max_and_argmax
(
pre_x
+
tensor
.
DimShuffle
(
tensor
.
DimShuffle
(
pre_bias
.
broadcastable
,
pre_bias
.
broadcastable
,
(
'x'
,
0
))(
pre_bias
),
axis
)
(
'x'
,
0
))(
pre_bias
),
axis
)
# copy both stack traces
copy_stack_trace
(
x_max
,
ret
)
return
ret
# Utility function used by the two next optimizations
# Utility function used by the two next optimizations
...
@@ -1585,9 +1488,12 @@ def local_advanced_indexing_crossentropy_onehot(node):
...
@@ -1585,9 +1488,12 @@ def local_advanced_indexing_crossentropy_onehot(node):
# Check that rows == arange(labels.shape[0])
# Check that rows == arange(labels.shape[0])
if
_check_rows_is_arange_len_labels
(
rows
,
labels
):
if
_check_rows_is_arange_len_labels
(
rows
,
labels
):
if
labels
.
ndim
==
1
and
x_var
.
ndim
==
2
:
if
labels
.
ndim
==
1
and
x_var
.
ndim
==
2
:
return
[
-
crossentropy_softmax_argmax_1hot_with_bias
(
x_var
,
minus_ret
=
crossentropy_softmax_argmax_1hot_with_bias
(
x_var
,
b_var
,
b_var
,
labels
)[
0
]]
labels
)[
0
]
ret
=
-
minus_ret
copy_stack_trace
(
node
.
outputs
[
0
],
[
minus_ret
,
ret
])
return
[
ret
]
@opt.register_specialize
(
'fast_compile_gpu'
)
@opt.register_specialize
(
'fast_compile_gpu'
)
...
@@ -1809,7 +1715,11 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
...
@@ -1809,7 +1715,11 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
# Dimension check before substitution
# Dimension check before substitution
if
labels
.
ndim
==
1
and
x_var
.
ndim
==
2
:
if
labels
.
ndim
==
1
and
x_var
.
ndim
==
2
:
return
[
crossentropy_softmax_1hot_with_bias_dx
(
out_grad
,
sm
,
labels
)]
ret
=
crossentropy_softmax_1hot_with_bias_dx
(
out_grad
,
sm
,
labels
)
# The stack trace is not added to output_grad, sm and labels at
# the moment but may need to be added at a future point
copy_stack_trace
(
node
.
outputs
[
0
],
ret
)
return
[
ret
]
else
:
else
:
return
return
...
@@ -1825,6 +1735,7 @@ def graph_merge_softmax_with_crossentropy_softmax(node):
...
@@ -1825,6 +1735,7 @@ def graph_merge_softmax_with_crossentropy_softmax(node):
if
big_client
in
[
b_client
[
0
]
for
b_client
in
b
.
clients
]:
if
big_client
in
[
b_client
[
0
]
for
b_client
in
b
.
clients
]:
xx
,
bb
,
ll
=
big_client
.
inputs
xx
,
bb
,
ll
=
big_client
.
inputs
mergeable_client
=
big_client
.
op
(
x
,
b
,
ll
)
mergeable_client
=
big_client
.
op
(
x
,
b
,
ll
)
copy_stack_trace
(
node
.
outputs
[
0
],
mergeable_client
[
1
])
return
[
mergeable_client
[
1
]]
return
[
mergeable_client
[
1
]]
...
@@ -1885,7 +1796,9 @@ def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(node):
...
@@ -1885,7 +1796,9 @@ def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(node):
msg
=
'`sm` and `dy` do not have the same shape.'
msg
=
'`sm` and `dy` do not have the same shape.'
dz
=
opt
.
Assert
(
msg
)(
dz
,
cond
)
dz
=
opt
.
Assert
(
msg
)(
dz
,
cond
)
return
[
node
.
op
(
dz
,
sm
,
y_idx
)]
ret
=
node
.
op
(
dz
,
sm
,
y_idx
)
copy_stack_trace
(
node
.
outputs
[
0
],
ret
)
return
[
ret
]
def
binary_crossentropy
(
output
,
target
):
def
binary_crossentropy
(
output
,
target
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论