Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
0ec49804
提交
0ec49804
authored
8月 31, 2012
作者:
Ian Goodfellow
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fixed bug where comments did not appear
added DisconnectedType
上级
17562537
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
69 行增加
和
16 行删除
+69
-16
op.txt
doc/extending/op.txt
+10
-3
gradient.py
theano/gradient.py
+42
-9
basic.py
theano/tensor/basic.py
+17
-4
没有找到文件。
doc/extending/op.txt
浏览文件 @
0ec49804
...
@@ -110,9 +110,16 @@ following methods:
...
@@ -110,9 +110,16 @@ following methods:
the gradient with respect to that input computed based on the symbolic gradients with
the gradient with respect to that input computed based on the symbolic gradients with
respect to each output.
respect to each output.
If the output is not differentiable with respect to any inputs,
If the output is not differentiable with respect to an input
then this method should be defined to return ``[None for i in
then this method should be defined to return a variable of type
inputs]``. If this method is not defined, then Theano assumes it has been
NullType for that input.
If an element of output_gradient is of type theano.gradient.DisconnectedType,
it means that the cost is not a function of this output. If any of the
op's inputs participate in the computation of only disconnected outputs,
then Op.grad should return DisconnectedType variables for those inputs.
If the grad method is not defined, then Theano assumes it has been
forgotten. Symbolic differentiation will fail on a graph that
forgotten. Symbolic differentiation will fail on a graph that
includes this Op.
includes this Op.
...
...
theano/gradient.py
浏览文件 @
0ec49804
...
@@ -70,7 +70,7 @@ def grad_not_implemented(op, x_pos, x, comment = ""):
...
@@ -70,7 +70,7 @@ def grad_not_implemented(op, x_pos, x, comment = ""):
return
NaNType
(
"This variable is NaN because the grad method for "
+
\
return
NaNType
(
"This variable is NaN because the grad method for "
+
\
"input "
+
str
(
x_pos
)
+
" ("
+
str
(
x
)
+
") of the "
+
str
(
op
)
+
" op is"
+
\
"input "
+
str
(
x_pos
)
+
" ("
+
str
(
x
)
+
") of the "
+
str
(
op
)
+
" op is"
+
\
" not implemented."
)()
" not implemented."
+
comment
)()
def
grad_undefined
(
op
,
x_pos
,
x
,
comment
=
""
):
def
grad_undefined
(
op
,
x_pos
,
x
,
comment
=
""
):
"""
"""
...
@@ -88,7 +88,31 @@ def grad_undefined(op, x_pos, x, comment = ""):
...
@@ -88,7 +88,31 @@ def grad_undefined(op, x_pos, x, comment = ""):
return
NaNType
(
"This variable is NaN because the gradient for "
+
\
return
NaNType
(
"This variable is NaN because the gradient for "
+
\
"input "
+
str
(
x_pos
)
+
" ("
+
str
(
x
)
+
") of the "
+
str
(
op
)
+
" op is"
+
\
"input "
+
str
(
x_pos
)
+
" ("
+
str
(
x
)
+
") of the "
+
str
(
op
)
+
" op is"
+
\
" mathematically undefined."
)()
" mathematically undefined."
+
comment
)()
class
DisconnectedType
(
theano
.
gof
.
type
.
Type
):
""" A type indicating that a variable is a result
of taking the gradient of c with respect to x
when c is not a function of x.
A symbolic placeholder for 0, but to convey
the extra information that this gradient is 0
because it is disconnected.
"""
def
filter
(
self
,
data
,
strict
=
False
,
allow_downcast
=
None
):
raise
AssertionError
(
"If you're assigning to a DisconnectedType you're"
" doing something wrong. It should only be used as "
"symbolic placeholder."
)
def
fiter_variable
(
self
,
other
):
raise
def
may_share_memory
(
a
,
b
):
return
False
def
value_eq
(
a
,
b
,
force_same_dtype
=
True
):
raise
########################
########################
...
@@ -378,7 +402,7 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = False,
...
@@ -378,7 +402,7 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = False,
#the gradient of the constants is 0
#the gradient of the constants is 0
for
const
in
consider_constant
:
for
const
in
consider_constant
:
grad_dict
[
const
]
=
const
.
zeros_like
()
grad_dict
[
const
]
=
DisconnectedType
()
()
#variables that do not influence the cost have zero gradient.
#variables that do not influence the cost have zero gradient.
#if wrt is such a variable, populate the grad_dict with this info
#if wrt is such a variable, populate the grad_dict with this info
...
@@ -400,12 +424,16 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = False,
...
@@ -400,12 +424,16 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = False,
raise
ValueError
(
"Invalid value for keyword "
raise
ValueError
(
"Invalid value for keyword "
"'disconnected_inputs', valid values are "
"'disconnected_inputs', valid values are "
"'ignore', 'warn' and 'raise'."
)
"'ignore', 'warn' and 'raise'."
)
grad_dict
[
elem
]
=
elem
.
zeros_like
()
grad_dict
[
elem
]
=
DisconnectedType
()
()
rval
=
_populate_grad_dict
(
var_to_node_to_idx
,
rval
=
_populate_grad_dict
(
var_to_node_to_idx
,
grad_dict
,
wrt
,
warn_type
,
grad_dict
,
wrt
,
warn_type
,
cost
.
name
)
cost
.
name
)
for
i
in
xrange
(
len
(
rval
)):
if
isinstance
(
rval
[
i
]
.
type
,
DisconnectedType
):
rval
[
i
]
=
wrt
[
i
]
.
zeros_like
()
if
using_tuple
:
if
using_tuple
:
rval
=
tuple
(
rval
)
rval
=
tuple
(
rval
)
elif
not
using_list
:
elif
not
using_list
:
...
@@ -468,12 +496,12 @@ def _populate_grad_dict(var_to_node_to_idx,\
...
@@ -468,12 +496,12 @@ def _populate_grad_dict(var_to_node_to_idx,\
grad_dict: a dictionary mapping variables to their gradients
grad_dict: a dictionary mapping variables to their gradients
should be populated by grad or grad_sources_inputs
should be populated by grad or grad_sources_inputs
grad should set gradients to
zeros_like
for
grad should set gradients to
DisconnectedType()()
for
variables to be considered constant, set the
variables to be considered constant, set the
gradient for the cost variable to g_cost, etc.
gradient for the cost variable to g_cost, etc.
both should set the gradient for disconnected
both should set the gradient for disconnected
inputs to
zeros_like
inputs to
DisconnectedType()
wrt: the minimal set of variables that must be included in grad_dict
wrt: the minimal set of variables that must be included in grad_dict
...
@@ -513,6 +541,11 @@ def _populate_grad_dict(var_to_node_to_idx,\
...
@@ -513,6 +541,11 @@ def _populate_grad_dict(var_to_node_to_idx,\
for
i
in
xrange
(
len
(
term_dict
[
node
])):
for
i
in
xrange
(
len
(
term_dict
[
node
])):
if
term_dict
[
node
][
i
]
is
None
:
if
term_dict
[
node
][
i
]
is
None
:
#we don't know what None means. in the past it has been used to
#mean undefined, zero, or disconnected. So for now we assume it is
#zero. Assuming it is zero prevents us from disconnecting NaNs above.
#eventually we should disallow this return type and force all ops
#to return the correct thing
term_dict
[
node
][
i
]
=
node
.
inputs
[
i
]
.
zeros_like
()
term_dict
[
node
][
i
]
=
node
.
inputs
[
i
]
.
zeros_like
()
if
warn_type
:
if
warn_type
:
...
@@ -560,8 +593,8 @@ def _populate_grad_dict(var_to_node_to_idx,\
...
@@ -560,8 +593,8 @@ def _populate_grad_dict(var_to_node_to_idx,\
grad_dict
[
var
]
.
name
=
'(d
%
s/d
%
s)'
%
(
cost_name
,
var
.
name
)
grad_dict
[
var
]
.
name
=
'(d
%
s/d
%
s)'
%
(
cost_name
,
var
.
name
)
else
:
else
:
#this variable is not connected to the cost in the computational
#this variable is not connected to the cost in the computational
#graph
so the gradient on it is zero
#graph
grad_dict
[
var
]
=
var
.
zeros_like
()
grad_dict
[
var
]
=
DisconnectedType
()
()
return
grad_dict
[
var
]
return
grad_dict
[
var
]
...
@@ -657,7 +690,7 @@ def grad_sources_inputs(sources, graph_inputs, warn_type = True):
...
@@ -657,7 +690,7 @@ def grad_sources_inputs(sources, graph_inputs, warn_type = True):
#according to the flag, possibly raise an error if wrt is disconnected
#according to the flag, possibly raise an error if wrt is disconnected
for
elem
in
wrt
:
for
elem
in
wrt
:
if
elem
not
in
var_to_node_to_idx
and
elem
not
in
outputs
:
if
elem
not
in
var_to_node_to_idx
and
elem
not
in
outputs
:
grad_dict
[
elem
]
=
elem
.
zeros_lik
e
()
grad_dict
[
elem
]
=
DisconnectedTyp
e
()
_populate_grad_dict
(
var_to_node_to_idx
,
_populate_grad_dict
(
var_to_node_to_idx
,
...
...
theano/tensor/basic.py
浏览文件 @
0ec49804
...
@@ -25,6 +25,7 @@ from theano.tensor.utils import hash_from_ndarray
...
@@ -25,6 +25,7 @@ from theano.tensor.utils import hash_from_ndarray
from
theano.scalar
import
ComplexError
,
IntegerDivisionError
from
theano.scalar
import
ComplexError
,
IntegerDivisionError
import
theano.scalar.sharedvar
import
theano.scalar.sharedvar
from
theano.gradient
import
grad_undefined
from
theano.gradient
import
grad_undefined
from
theano.gradient
import
DisconnectedType
### set up the external interface
### set up the external interface
from
elemwise
import
Elemwise
,
DimShuffle
,
CAReduce
,
Sum
from
elemwise
import
Elemwise
,
DimShuffle
,
CAReduce
,
Sum
...
@@ -2324,9 +2325,21 @@ class MaxAndArgmax(Op):
...
@@ -2324,9 +2325,21 @@ class MaxAndArgmax(Op):
x
,
axis
=
inp
x
,
axis
=
inp
g_max
,
g_max_idx
=
grads
g_max
,
g_max_idx
=
grads
# Check to see if the gradient on max is None
g_max_disconnected
=
isinstance
(
g_max
.
type
,
DisconnectedType
)
if
g_max
is
None
:
g_max_idx_disconnected
=
isinstance
(
g_max_idx
.
type
,
DisconnectedType
)
return
None
,
None
#if the op is totally disconnected, so are its inputs
if
g_max_disconnected
and
g_max_idx_disconnected
:
return
[
DisconnectedType
()(),
DisconnectedType
()()
]
axis_grad
=
grad_undefined
(
self
,
1
,
axis
,
"argmax is not defined for non-integer axes so"
" argmax(x, axis+eps) is undefined"
)
#if the max is disconnected but the argmax is not,
#the gradient on its inputs is zero
if
g_max_disconnected
:
return
[
x
.
zeros_like
(),
axis_grad
]
xmax
=
max
(
x
,
axis
)
xmax
=
max
(
x
,
axis
)
# Raise the g_max and xmax to the same number of dim as the input.
# Raise the g_max and xmax to the same number of dim as the input.
...
@@ -2346,7 +2359,7 @@ class MaxAndArgmax(Op):
...
@@ -2346,7 +2359,7 @@ class MaxAndArgmax(Op):
# Set the grad to the correct position.
# Set the grad to the correct position.
g_x
=
eq
(
xmax_pad
,
x
)
*
g_max_pad
g_x
=
eq
(
xmax_pad
,
x
)
*
g_max_pad
return
g_x
,
None
return
g_x
,
axis_grad
def
__str__
(
self
):
def
__str__
(
self
):
return
self
.
__class__
.
__name__
return
self
.
__class__
.
__name__
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论