Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
1b1e2ec3
提交
1b1e2ec3
authored
3月 11, 2014
作者:
Nicholas Leonard
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
initial commit
上级
a915ac04
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
129 行增加
和
1 行删除
+129
-1
__init__.py
theano/__init__.py
+1
-1
gradient.py
theano/gradient.py
+75
-0
test_gradient.py
theano/tests/test_gradient.py
+53
-0
没有找到文件。
theano/__init__.py
浏览文件 @
1b1e2ec3
...
...
@@ -79,7 +79,7 @@ from theano.updates import Updates, OrderedUpdates
#we don't import by default as we don't want to force having scipy installed.
#import sparse
from
theano.gradient
import
Rop
,
Lop
,
grad
from
theano.gradient
import
Rop
,
Lop
,
grad
,
subgrad
if
config
.
device
.
startswith
(
'gpu'
)
or
config
.
init_gpu_device
.
startswith
(
'gpu'
):
import
theano.sandbox.cuda
...
...
theano/gradient.py
浏览文件 @
1b1e2ec3
...
...
@@ -543,6 +543,81 @@ def grad(cost, wrt, consider_constant=None,
rval
,
=
rval
return
rval
def
subgrad
(
wrt
,
grad_end
,
known_grads
=
None
,
cost
=
None
,
details
=
False
):
'''
With respect to wrt, computes gradients of known_grads, cost,
or both, up to grad_end theano variables in theano digraph.
In other words, computes gradients for a subgraph of the
symbolic theano function. Ignores all disconnected inputs.
This can be useful when one needs to perform the gradient descent
iteratively (e.g. one layer at a time in an MLP), or when a particular
operation is not differentiable in theano (e.g. stochastic sampling
from a multinomial). In the latter case, the gradient of the
non-differentiable process could be approximated by user-defined
formula which could be calculated using the gradients at the
output of the process. These are obtained by performing a subgrad
from the cost or previously known_grads up to the outputs of the
process (grad_end). The gradients obtained from the user defined
gradient of the process can then be fed into another subgrad as
known_grads with any other cost functions (e.g. weight decay), and
so on.
parameters
----------
wrt : list
gradients are computed with regard to (wrt) these variables.
known_grads : dict
parameters, gradients (key, value) in the forward part
(near cost) of the graph for which gradients are known.
These will be used to compute the gradients backwards
up to the variables in grad_end.
grad_end : list
theano variables where to stop the backpropagation of gradients
(they will be considered constant in theano.grad).
cost : theano scalar
additional costs for which to compute the gradients. For
example, these could be weight decay, or l1 constraint on output
details: bool
when True, return OrderedDict of wrt, gradients, and lists of
gradients derived from known_grads, cost_grads, respectively
(in same order as params)
return
------
Returns an OrderedDict of params (keys), gradients (values)
'''
assert
((
cost
is
not
None
)
or
(
known_grads
is
not
None
))
assert
isinstance
(
grad_end
,
list
)
assert
isinstance
(
wrt
,
list
)
if
known_grads
is
not
None
:
assert
isinstance
(
known_grads
,
dict
)
kg_grads
=
None
cost_grads
=
None
if
known_grads
is
not
None
:
kg_grads
=
list
(
theano
.
grad
(
cost
=
None
,
wrt
=
wrt
,
known_grads
=
known_grads
,
consider_constant
=
grad_end
,
disconnected_inputs
=
'ignore'
))
if
cost
is
not
None
:
cost_grads
=
list
(
theano
.
grad
(
cost
=
cost
,
wrt
=
wrt
,
consider_constant
=
grad_end
,
disconnected_inputs
=
'ignore'
))
grads
=
None
if
known_grads
is
None
:
grads
=
cost_grads
else
:
grads
=
kg_grads
if
cost_grads
is
not
None
:
for
i
in
range
(
len
(
grads
)):
grads
[
i
]
+=
cost_grads
[
i
]
if
details
:
return
grads
,
kg_grads
,
cost_grads
return
grads
def
_node_to_pattern
(
node
):
""" given an apply node, obtain its connection pattern
...
...
theano/tests/test_gradient.py
浏览文件 @
1b1e2ec3
...
...
@@ -553,6 +553,59 @@ def test_disconnected_cost_grad():
except
theano
.
gradient
.
DisconnectedInputError
:
return
raise
AssertionError
(
"A disconnected gradient has been ignored."
)
def
test_subgrad
():
# Tests that the grad method with no known_grads
# matches what happens if you use successive subgrads
x
=
theano
.
tensor
.
fvector
(
'x'
)
t
=
theano
.
tensor
.
fvector
(
't'
)
w1
=
theano
.
shared
(
np
.
random
.
randn
(
3
,
4
))
w2
=
theano
.
shared
(
np
.
random
.
randn
(
4
,
2
))
a1
=
theano
.
tensor
.
tanh
(
theano
.
tensor
.
dot
(
x
,
w1
))
a2
=
theano
.
tensor
.
tanh
(
theano
.
tensor
.
dot
(
a1
,
w2
))
cost2
=
theano
.
tensor
.
sqr
(
a2
-
t
)
.
sum
()
cost2
+=
theano
.
tensor
.
sqr
(
w2
.
sum
())
cost1
=
theano
.
tensor
.
sqr
(
w1
.
sum
())
params
=
[[
w2
,
a1
],[
w1
,
x
]]
costs
=
[
cost2
,
cost1
]
grad_ends
=
[[
a1
],
[
x
]]
inputs
=
[
t
,
x
]
rng
=
np
.
random
.
RandomState
([
2012
,
11
,
15
])
values
=
[
rng
.
randn
(
2
),
rng
.
randn
(
3
)]
values
=
[
np
.
cast
[
ipt
.
dtype
](
value
)
for
ipt
,
value
in
zip
(
inputs
,
values
)]
wrt
=
[
w2
,
a1
,
w1
,
x
]
cost
=
cost2
+
cost1
true_grads
=
theano
.
grad
(
cost
,
wrt
)
true_grads
=
theano
.
function
(
inputs
,
true_grads
)
true_grads
=
true_grads
(
*
values
)
from
theano.gof.python25
import
OrderedDict
known_grad
=
None
params2
=
[]
for
i
in
xrange
(
2
):
param
=
params
[
i
]
cost
=
costs
[
i
]
grad_end
=
grad_ends
[
i
]
pgrad
=
theano
.
subgrad
(
wrt
=
param
,
grad_end
=
grad_end
,
known_grads
=
known_grad
,
cost
=
cost
)
known_grad
=
OrderedDict
(
zip
(
param
,
pgrad
))
params2
.
extend
(
pgrad
)
pgrads
=
theano
.
function
(
inputs
,
params2
)
pgrads
=
pgrads
(
*
values
)
print
(
pgrads
)
print
(
true_grads
)
for
true_grad
,
pgrad
in
zip
(
true_grads
,
pgrads
):
print
(
true_grad
,
pgrad
)
assert
(
np
.
sum
(
np
.
abs
(
true_grad
-
pgrad
))
<
0.00001
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论