Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
a1be5079
提交
a1be5079
authored
8月 21, 2012
作者:
Ian Goodfellow
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
implemented correct handling of unimplemented gradients
上级
7ebae191
隐藏空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
155 行增加
和
15 行删除
+155
-15
function.py
theano/compile/function.py
+2
-1
function_module.py
theano/compile/function_module.py
+3
-0
gradient.py
theano/gradient.py
+123
-10
test_gradient.py
theano/tests/test_gradient.py
+27
-4
没有找到文件。
theano/compile/function.py
浏览文件 @
a1be5079
...
@@ -13,7 +13,6 @@ from profiling import ProfileStats
...
@@ -13,7 +13,6 @@ from profiling import ProfileStats
from
pfunc
import
pfunc
from
pfunc
import
pfunc
from
numpy
import
any
# to work in python 2.4
from
numpy
import
any
# to work in python 2.4
def
function
(
inputs
,
outputs
=
None
,
mode
=
None
,
updates
=
None
,
givens
=
None
,
def
function
(
inputs
,
outputs
=
None
,
mode
=
None
,
updates
=
None
,
givens
=
None
,
no_default_updates
=
False
,
accept_inplace
=
False
,
name
=
None
,
no_default_updates
=
False
,
accept_inplace
=
False
,
name
=
None
,
rebuild_strict
=
True
,
allow_input_downcast
=
None
,
profile
=
None
,
rebuild_strict
=
True
,
allow_input_downcast
=
None
,
profile
=
None
,
...
@@ -192,6 +191,8 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
...
@@ -192,6 +191,8 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
mode
=
mode
,
mode
=
mode
,
accept_inplace
=
accept_inplace
,
name
=
name
)
accept_inplace
=
accept_inplace
,
name
=
name
)
else
:
else
:
#note: pfunc will also call orig_function-- orig_function is a choke point
# that all compilation must pass through
fn
=
pfunc
(
params
=
inputs
,
fn
=
pfunc
(
params
=
inputs
,
outputs
=
outputs
,
outputs
=
outputs
,
mode
=
mode
,
mode
=
mode
,
...
...
theano/compile/function_module.py
浏览文件 @
a1be5079
...
@@ -15,6 +15,7 @@ import numpy
...
@@ -15,6 +15,7 @@ import numpy
import
theano
import
theano
from
theano
import
gof
from
theano
import
gof
from
theano.gof.python25
import
partial
from
theano.gof.python25
import
partial
from
theano.gradient
import
check_for_bad_grad
import
mode
as
mode_module
import
mode
as
mode_module
from
io
import
In
,
SymbolicInput
,
SymbolicInputKit
,
SymbolicOutput
from
io
import
In
,
SymbolicInput
,
SymbolicInputKit
,
SymbolicOutput
...
@@ -1336,6 +1337,8 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False,
...
@@ -1336,6 +1337,8 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False,
t1
=
time
.
time
()
t1
=
time
.
time
()
mode
=
mode_module
.
get_mode
(
mode
)
mode
=
mode_module
.
get_mode
(
mode
)
check_for_bad_grad
(
outputs
)
inputs
=
map
(
convert_function_input
,
inputs
)
inputs
=
map
(
convert_function_input
,
inputs
)
if
outputs
is
not
None
:
if
outputs
is
not
None
:
if
isinstance
(
outputs
,
(
list
,
tuple
)):
if
isinstance
(
outputs
,
(
list
,
tuple
)):
...
...
theano/gradient.py
浏览文件 @
a1be5079
"""Driver for gradient calculations."""
"""Driver for gradient calculations."""
__authors__
=
"James Bergstra, Razvan Pascanu, Arnaud Bergeron"
__authors__
=
"James Bergstra, Razvan Pascanu, Arnaud Bergeron
, Ian Goodfellow
"
__copyright__
=
"(c) 2011, Universite de Montreal"
__copyright__
=
"(c) 2011, Universite de Montreal"
__license__
=
"3-clause BSD License"
__license__
=
"3-clause BSD License"
__contact__
=
"theano-dev <theano-dev@googlegroups.com>"
__contact__
=
"theano-dev <theano-dev@googlegroups.com>"
...
@@ -11,9 +11,9 @@ import __builtin__
...
@@ -11,9 +11,9 @@ import __builtin__
import
logging
import
logging
import
warnings
import
warnings
_logger
=
logging
.
getLogger
(
'theano.gradient'
)
_logger
=
logging
.
getLogger
(
'theano.gradient'
)
import
sys
import
numpy
# for numeric_grad
import
numpy
# for numeric_grad
from
collections
import
deque
import
theano
import
theano
from
theano.raise_op
import
Raise
from
theano.raise_op
import
Raise
...
@@ -194,20 +194,131 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
...
@@ -194,20 +194,131 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
gmap
[
r
]
=
g_r
gmap
[
r
]
=
g_r
return
gmap
return
gmap
class
BadGradOp
(
gof
.
Op
):
"""
An Op representing a gradient that cannot be computed.
theano.tensor.grad checks the graphs it returns to ensure
they do not contain these ops.
theano.function also checks that the subgraph it implements
does not contain these ops.
"""
def
__init__
(
self
,
exc
,
msg
=
""
):
"""
exc: the exception type to raise if a subgraph contains
this op.
msg: the message to include in the exception.
"""
def
unimplemented_grad
(
op
,
x_pos
,
x
):
self
.
exc
=
exc
self
.
msg
=
msg
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
((
type
(
self
)))
def
__str__
(
self
):
return
"BadGrad{
%
s,
%
s}"
%
(
self
.
exc
,
self
.
msg
)
def
make_node
(
self
,
x
):
return
gof
.
Apply
(
self
,
[
x
],
[
x
.
type
()]
)
def
perform
(
self
,
node
,
inputs
,
out_storage
):
""" This should never be called"""
raise
AssertionError
(
"A BadGradOp should never be compiled, "
+
\
"and certainly not executed."
)
#Note: essentially, this op should just be NaNs_like(inputs[0])
#but 0 * BadGradOp(x) + y optimizes to just y
#so until we develop a way of symbolically representing a variable
#that is always NaN and implement the logic for 0 * NaN = NaN, etc.
#the only way we can guarantee correctness of a theano function
#is to guarantee that its initial subgraph contained no BadGradOps
def
raise_exc
(
self
):
raise
self
.
exc
(
self
.
msg
)
class
GradNotImplementedOp
(
BadGradOp
):
""" A BadGradOp representing a gradient that hasn't been implemented yet.
"""
"""
DO NOT USE. Remove this function after all usage of it has been
removed from theano.
def
__init__
(
self
,
op
,
x_pos
):
"""
op: A theano op whose grad is not implemented for some input
x_pos: An int, giving the index in the op's input list of
a variable for which the gradient is not implemented
(if op has unimplemented gradients for several inputs,
it must still return a separate UnimplementedGradOp for
each)
"""
assert
isinstance
(
op
,
gof
.
Op
)
assert
isinstance
(
x_pos
,
int
)
assert
x_pos
>=
0
super
(
GradNotImplementedOp
,
self
)
.
__init__
(
NotImplementedError
,
"
%
s does not implement its gradient with respect to input
%
d"
\
%
(
str
(
type
(
op
)),
x_pos
))
def
grad_not_implemented
(
op
,
x_pos
,
x
):
"""
Return an un-computable symbolic variable of type `x.type`.
Return an un-computable symbolic variable of type `x.type`.
If any function tries to compute this un-computable variable, an exception
If any call to tensor.grad results in an expression containing this
(NotImplementedError) will be raised indicating that the gradient on the
un-computable variable, an exception (NotImplementedError) will be
`x_pos`'th input of `op` has not been implemented.
raised indicating that the gradient on the
`x_pos`'th input of `op` has not been implemented. Likewise if
any call to theano.function involves this variable.
"""
"""
msg
=
'
%
s.grad not implemented for input
%
i'
%
(
op
,
x_pos
)
return
Raise
(
msg
=
msg
)(
x
)
return
GradNotImplementedOp
(
op
,
x_pos
)(
x
)
def
check_for_bad_grad
(
variables
):
"""
variables: A gof.Variable or list thereof
Raises an exception if any of the variables represents
an expression involving a BadGradOp
"""
#implemented using a deque rather than recursion because python recursion
#limit is set low by default
if
not
(
isinstance
(
variables
,
list
)
or
\
isinstance
(
variables
,
gof
.
Variable
)):
raise
TypeError
(
"Expected gof.Variable or list thereof, got "
+
\
str
(
type
(
variables
)))
if
not
isinstance
(
variables
,
list
):
variables
=
[
variables
]
vars_to_check
=
deque
(
variables
)
already_checked
=
set
([])
while
True
:
try
:
var
=
vars_to_check
.
pop
()
except
IndexError
:
break
if
var
not
in
already_checked
:
already_checked
.
update
([
var
])
assert
isinstance
(
var
,
gof
.
Variable
)
node
=
var
.
owner
if
node
is
not
None
:
op
=
node
.
op
if
isinstance
(
op
,
BadGradOp
):
op
.
raise_exc
()
vars_to_check
.
extendleft
(
node
.
inputs
)
#end if node is not None
#end if not already_checked
#end while
########################
########################
# R Operator
# R Operator
...
@@ -528,6 +639,8 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
...
@@ -528,6 +639,8 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
and
ret
[
-
1
]
.
name
is
None
:
and
ret
[
-
1
]
.
name
is
None
:
ret
[
-
1
]
.
name
=
'(d
%
s/d
%
s)'
%
(
cost
.
name
,
p
.
name
)
ret
[
-
1
]
.
name
=
'(d
%
s/d
%
s)'
%
(
cost
.
name
,
p
.
name
)
check_for_bad_grad
(
ret
)
return
format_as
(
using_list
,
using_tuple
,
ret
)
return
format_as
(
using_list
,
using_tuple
,
ret
)
...
...
theano/tests/test_gradient.py
浏览文件 @
a1be5079
...
@@ -251,13 +251,36 @@ class test_grad_sources_inputs(unittest.TestCase):
...
@@ -251,13 +251,36 @@ class test_grad_sources_inputs(unittest.TestCase):
self
.
assertTrue
(
g
[
a1
.
inputs
[
0
]]
==
6
)
self
.
assertTrue
(
g
[
a1
.
inputs
[
0
]]
==
6
)
self
.
assertTrue
(
g
[
a1
.
inputs
[
1
]]
==
11
)
self
.
assertTrue
(
g
[
a1
.
inputs
[
1
]]
==
11
)
def
test_unimplemented_grad
():
def
test_unimplemented_grad_func
():
#tests that function compilation catches unimplemented grads in the graph
a
=
theano
.
tensor
.
vector
()
a
=
theano
.
tensor
.
vector
()
b
=
theano
.
gradient
.
unimplemented_grad
(
theano
.
tensor
.
add
,
1
,
a
)
b
=
theano
.
gradient
.
grad_not_implemented
(
theano
.
tensor
.
add
,
0
,
a
)
f
=
theano
.
function
([
a
],
b
)
try
:
try
:
f
([
1
,
2
,
3
]
)
f
=
theano
.
function
([
a
],
b
)
assert
0
assert
0
#Note: it's important that the NotImplementedGradOp is caught
#at COMPILATION time, not execution time.
#If the uncomputable variable is, for example, multiplied by 0,
#it could be optimized out of the final graph.
except
NotImplementedError
:
pass
def
test_unimplemented_grad_grad
():
#tests that unimplemented grads are caught in the grad method
class
DummyOp
(
gof
.
Op
):
def
make_node
(
self
,
x
):
return
gof
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
def
grad
(
self
,
inputs
,
output_grads
):
return
[
theano
.
gradient
.
grad_not_implemented
(
self
,
0
,
inputs
[
0
])
]
a
=
theano
.
tensor
.
scalar
()
b
=
DummyOp
()(
a
)
try
:
g
=
theano
.
gradient
.
grad
(
b
,
a
)
assert
False
except
NotImplementedError
:
except
NotImplementedError
:
pass
pass
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论