Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
8d9fa9e5
提交
8d9fa9e5
authored
11月 28, 2016
作者:
khaotik
提交者:
khaotik
1月 27, 2017
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cleaner grad() method for OpFromGraph
上级
788e8bac
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
69 行增加
和
59 行删除
+69
-59
builders.py
theano/compile/builders.py
+69
-59
没有找到文件。
theano/compile/builders.py
浏览文件 @
8d9fa9e5
...
...
@@ -37,25 +37,33 @@ class OpFromGraph(gof.Op):
replace
=
dict
(
izip
(
self
.
shared_inputs
,
shared_vars
)),
copy_inputs_over
=
False
)
(
internal_inputs
,
intern
al_outputs
,
(
local_inputs
,
loc
al_outputs
,
[
clone_d
,
update_d
,
update_expr
,
shared_inputs
])
=
new
assert
len
(
intern
al_inputs
)
==
len
(
inputs
)
+
len
(
self
.
shared_inputs
)
assert
len
(
intern
al_outputs
)
==
len
(
outputs
)
assert
len
(
loc
al_inputs
)
==
len
(
inputs
)
+
len
(
self
.
shared_inputs
)
assert
len
(
loc
al_outputs
)
==
len
(
outputs
)
assert
not
update_d
assert
not
update_expr
assert
not
shared_inputs
self
.
internal_inputs
=
intern
al_inputs
self
.
internal_outputs
=
intern
al_outputs
self
.
local_inputs
=
loc
al_inputs
self
.
local_outputs
=
loc
al_outputs
self
.
inputs
=
inputs
self
.
outputs
=
outputs
self
.
kwargs
=
kwargs
self
.
input_types
=
[
inp
.
type
for
inp
in
inputs
]
self
.
output_types
=
[
out
.
type
for
out
in
outputs
]
# used to cache gradient for subgraph
self
.
grad_ops
=
grad_overrides
# should be True after 1st call to grad()
self
.
cached_grad_ops
=
False
# grad_op: a functor takes form:
#
# def grad_op(inputs:list, ups_grads:list):
# return dns_grads:list
#
# This is used to cache gradient for subgraph
# for __init__, just set as grad_overrides
#
# grad_op should be build on the 1st call to grad()
# after which grad_op_is_cached should be True
self
.
grad_op
=
grad_overrides
self
.
grad_op_is_cached
=
False
def
__eq__
(
self
,
other
):
# TODO: recognize a copy
...
...
@@ -66,45 +74,45 @@ class OpFromGraph(gof.Op):
return
hash
(
type
(
self
))
def
grad
(
self
,
inputs
,
output_grads
):
if
self
.
cached_grad_ops
:
return
self
.
grad_ops
(
inputs
,
output_grads
)
upstream_grads
=
dict
(
izip
(
self
.
internal_outputs
,
output_grads
))
if
self
.
grad_ops
is
None
:
self
.
grad_ops
=
[]
grad_ops_l
=
self
.
grad_ops
if
isinstance
(
grad_ops_l
,
list
):
if
len
(
grad_ops_l
)
>
len
(
self
.
internal_inputs
):
if
self
.
grad_op_is_cached
:
return
self
.
grad_op
(
inputs
,
output_grads
)
if
self
.
grad_op
is
None
:
self
.
grad_op
=
[]
# we need to convert a list into a single funtor
if
isinstance
(
self
.
grad_op
,
list
):
grad_op_l
=
self
.
grad_op
if
len
(
grad_op_l
)
>
len
(
self
.
local_inputs
):
raise
ValueError
(
'Can override
%
d gradients at most, got
%
d'
%
(
len
(
self
.
internal_inputs
),
len
(
grad_ops_l
)))
if
len
(
grad_ops_l
)
<
len
(
self
.
internal_inputs
):
grad_ops_l
+=
[
None
]
*
(
len
(
self
.
internal_inputs
)
-
len
(
grad_ops_l
))
# It is normal if some inputs are not needed in order
# to compute the gradient, so we ignore them.
gs
=
[
go
if
go
else
type
(
self
)(
self
.
internal_inputs
+
output_grads
,
(
lambda
g
:
g
if
g
else
(
lambda
*
a
:
None
))(
theano
.
gradient
.
grad
(
cost
=
None
,
known_grads
=
upstream_grads
,
wrt
=
[
inp
],
disconnected_inputs
=
'ignore'
)
),
on_unused_input
=
'ignore'
)
for
go
,
inp
in
izip
(
grad_ops_l
,
self
.
internal_inputs
)]
# since OpFromGraphBase only accepts input sequence,
# additional filtering is needed
def
grad_ops
(
inps
,
grds
):
# nonlocal gs, grad_ops_l
return
[(
go
(
inps
,
grds
)
if
ov
else
go
(
*
(
inps
+
grds
)))
for
go
,
ov
in
izip
(
gs
,
grad_ops_l
)]
self
.
grad_ops
=
grad_ops
else
:
grad_ops
=
grad_ops_l
self
.
cached_grad_ops
=
True
return
grad_ops
(
inputs
,
output_grads
)
len
(
self
.
local_inputs
),
len
(
grad_op_l
)))
if
len
(
grad_op_l
)
<
len
(
self
.
local_inputs
):
grad_op_l
+=
[
None
]
*
(
len
(
self
.
local_inputs
)
-
len
(
grad_op_l
))
wrt
=
[
self
.
local_inputs
[
i
]
for
i
,
go
in
enumerate
(
grad_op_l
)
if
not
go
]
# compute non-overriding downsteam gradients from upstreams grads
# it's normal some input may be disconnected, thus the 'ignore'
ups_grads_d
=
dict
(
izip
(
self
.
local_outputs
,
output_grads
))
nat_dns_grads
=
iter
(
theano
.
gradient
.
grad
(
cost
=
None
,
known_grads
=
ups_grads_d
,
wrt
=
wrt
,
disconnected_inputs
=
'ignore'
))
# combine overriding gradients
dns_grads_l
=
[
go
(
self
.
local_inputs
,
output_grads
)
if
go
else
next
(
nat_dns_grads
)
for
go
in
grad_op_l
]
grad_ofg
=
type
(
self
)(
inputs
=
self
.
local_inputs
+
output_grads
,
outputs
=
dns_grads_l
,
inline
=
self
.
is_inline
,
on_unused_input
=
'ignore'
)
def
grad_op
(
inps
,
grds
):
return
grad_ofg
(
*
(
list
(
inps
)
+
list
(
grds
)))
self
.
grad_op
=
grad_op
self
.
grad_op_is_cached
=
True
return
self
.
grad_op
(
inputs
,
output_grads
)
def
make_node
(
self
,
*
inputs
):
for
input
,
type
in
zip
(
inputs
,
self
.
input_types
):
...
...
@@ -115,8 +123,8 @@ class OpFromGraph(gof.Op):
apply_node
=
gof
.
Apply
(
self
,
list
(
inputs
)
+
self
.
shared_inputs
,
[
type
()
for
type
in
self
.
output_types
])
apply_node
.
internal_inputs
=
self
.
intern
al_inputs
apply_node
.
internal_outputs
=
self
.
intern
al_outputs
apply_node
.
local_inputs
=
self
.
loc
al_inputs
apply_node
.
local_outputs
=
self
.
loc
al_outputs
return
apply_node
def
connection_pattern
(
self
,
node
):
...
...
@@ -125,12 +133,12 @@ class OpFromGraph(gof.Op):
"""
return
io_connection_pattern
(
self
.
internal_inputs
,
self
.
intern
al_outputs
)
self
.
local_inputs
,
self
.
loc
al_outputs
)
def
infer_shape
(
self
,
node
,
shapes
):
out_shp
=
theano
.
scan_module
.
scan_utils
.
infer_shape
(
self
.
intern
al_outputs
,
self
.
intern
al_inputs
,
self
.
loc
al_outputs
,
self
.
loc
al_inputs
,
shapes
)
# Clone the output shape so that shape are computed from outer inputs.
...
...
@@ -140,7 +148,7 @@ class OpFromGraph(gof.Op):
# But doing it multiple time could duplicate common subgraph between
# each shape call. Theano optimizer will clean this up later, but this
# will ask extra work to the optimizer.
repl
=
dict
(
zip
(
self
.
intern
al_inputs
,
node
.
inputs
))
repl
=
dict
(
zip
(
self
.
loc
al_inputs
,
node
.
inputs
))
cloned
=
theano
.
clone
(
reduce
(
tuple
.
__add__
,
out_shp
),
replace
=
repl
)
ret
=
[]
used
=
0
...
...
@@ -153,8 +161,8 @@ class OpFromGraph(gof.Op):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
if
not
hasattr
(
self
,
"fn"
)
and
impl
==
'py'
:
self
.
fn
=
orig_function
(
self
.
intern
al_inputs
,
self
.
intern
al_outputs
,
self
.
fn
=
orig_function
(
self
.
loc
al_inputs
,
self
.
loc
al_outputs
,
**
self
.
kwargs
)
def
perform
(
self
,
node
,
inputs
,
outputs
):
...
...
@@ -165,6 +173,7 @@ class OpFromGraph(gof.Op):
# we wont need this copy anymore
output
[
0
]
=
variable
.
copy
()
@gof.local_optimizer
([
OpFromGraph
])
def
inline_ofg_expansion
(
node
):
"""
...
...
@@ -178,9 +187,9 @@ def inline_ofg_expansion(node):
if
not
op
.
is_inline
:
return
False
return
theano
.
clone
(
op
.
intern
al_outputs
,
{
op
.
loc
al_outputs
,
{
u
:
v
for
u
,
v
in
izip
(
node
.
op
.
intern
al_inputs
,
node
.
inputs
)})
node
.
op
.
loc
al_inputs
,
node
.
inputs
)})
optdb
.
register
(
'inline_ofg_expansion'
,
...
...
@@ -191,6 +200,7 @@ optdb.register(
# we should let DebugMode know about it
ops_with_inner_function
[
OpFromGraph
]
=
'fn'
# API for OpFromGraph
def
op_from_graph
(
inputs
,
outputs
,
inline
=
False
,
grad_overrides
=
None
,
**
kwargs
...
...
@@ -214,7 +224,7 @@ def op_from_graph(
grad_overrides: None | function | list of (None|function), optional
Used to override default gradient routine.
Overriding function(s) must take two list of variable as inputs,
the original inputs and ups
tream
gradients
the original inputs and ups gradients
For different `grad_overrides`:
- `None` : will use default gradient routine.
...
...
@@ -225,8 +235,8 @@ def op_from_graph(
TODO:
- examples for a multi-layer mlp. where?
- __hash__, __eq__ otherwise won't merge, try
gof.opt.is_same_graph_with_merge(op1.
intern
al_outputs, op2,
intern
al_outputs)
gof.opt.is_same_graph_with_merge(op1.
loc
al_outputs, op2,
loc
al_outputs)
- c_code() to remove the double overhead?
- grad() make it support DisconnectedType and the new interface
- check how it works with updates.
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论