Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
40bbb7da
提交
40bbb7da
authored
11月 16, 2012
作者:
David Warde-Farley
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1068 from goodfeli/fix_consider_constant
Fixes several issues with gradients and some other bugs
上级
87cd138e
83781003
隐藏空白字符变更
内嵌
并排
正在显示
8 个修改的文件
包含
402 行增加
和
299 行删除
+402
-299
op.txt
doc/extending/op.txt
+2
-0
builders.py
theano/compile/builders.py
+6
-3
gradient.py
theano/gradient.py
+189
-238
scan_op.py
theano/scan_module/scan_op.py
+20
-11
basic.py
theano/tensor/basic.py
+54
-7
conv.py
theano/tensor/nnet/conv.py
+2
-3
test_gradient.py
theano/tests/test_gradient.py
+128
-30
test_rop.py
theano/tests/test_rop.py
+1
-7
没有找到文件。
doc/extending/op.txt
浏览文件 @
40bbb7da
...
@@ -249,6 +249,8 @@ following methods:
...
@@ -249,6 +249,8 @@ following methods:
1) They must be Variable instances.
1) They must be Variable instances.
2) When they are types that have dtypes, they must never have an integer dtype.
2) When they are types that have dtypes, they must never have an integer dtype.
The output gradients passed *to* Op.grad will also obey these constraints.
Integers are a tricky subject. Integers are the main reason for having DisconnectedType,
Integers are a tricky subject. Integers are the main reason for having DisconnectedType,
NullType or zero gradient. When you have an integer as an argument to your grad method,
NullType or zero gradient. When you have an integer as an argument to your grad method,
recall the definition of a derivative to help you decide what value to return:
recall the definition of a derivative to help you decide what value to return:
...
...
theano/compile/builders.py
浏览文件 @
40bbb7da
...
@@ -55,9 +55,12 @@ class OpFromGraph(gof.Op):
...
@@ -55,9 +55,12 @@ class OpFromGraph(gof.Op):
if
grad_depth
>
0
:
if
grad_depth
>
0
:
output_grads
=
[
t
()
for
t
in
self
.
output_types
]
output_grads
=
[
t
()
for
t
in
self
.
output_types
]
gd
=
G
.
grad_sources_inputs
(
zip
(
self
.
outputs
,
output_grads
),
# OpFromGraph doesn't implement a connection_pattern, so for now we regard
self
.
inputs
)
# all inputs and outputs as connected. This will compute the right numerical
gs
=
map
(
gd
.
get
,
self
.
inputs
)
# value for the gradients but could fail to raise the disconnected inputs error
# in some cases.
gs
=
G
.
grad
(
cost
=
None
,
known_grads
=
dict
(
zip
(
self
.
outputs
,
output_grads
)),
wrt
=
self
.
inputs
,
disconnected_inputs
=
'ignore'
)
self
.
grad_ops
=
[]
self
.
grad_ops
=
[]
for
g
in
gs
:
for
g
in
gs
:
if
g
is
None
:
if
g
is
None
:
...
...
theano/gradient.py
浏览文件 @
40bbb7da
...
@@ -13,9 +13,11 @@ import warnings
...
@@ -13,9 +13,11 @@ import warnings
_logger
=
logging
.
getLogger
(
'theano.gradient'
)
_logger
=
logging
.
getLogger
(
'theano.gradient'
)
import
numpy
# for numeric_grad
import
numpy
# for numeric_grad
np
=
numpy
import
theano
import
theano
from
itertools
import
izip
from
theano
import
gof
from
theano
import
gof
from
theano.gof
import
Variable
from
theano.gof
import
Variable
from
theano.gof.python25
import
all
from
theano.gof.python25
import
all
...
@@ -317,9 +319,6 @@ def Lop(f, wrt, eval_points, consider_constant=None,
...
@@ -317,9 +319,6 @@ def Lop(f, wrt, eval_points, consider_constant=None,
coordinates of the tensor element in the last
coordinates of the tensor element in the last
If `f` is a list/tuple, then return a list/tuple with the results.
If `f` is a list/tuple, then return a list/tuple with the results.
"""
"""
if
consider_constant
is
None
:
consider_constant
=
[]
if
type
(
eval_points
)
not
in
(
list
,
tuple
):
if
type
(
eval_points
)
not
in
(
list
,
tuple
):
eval_points
=
[
eval_points
]
eval_points
=
[
eval_points
]
...
@@ -333,50 +332,15 @@ def Lop(f, wrt, eval_points, consider_constant=None,
...
@@ -333,50 +332,15 @@ def Lop(f, wrt, eval_points, consider_constant=None,
f
=
list
(
f
)
f
=
list
(
f
)
grads
=
list
(
eval_points
)
grads
=
list
(
eval_points
)
for
elem
in
consider_constant
:
assert
elem
not
in
f
f
.
append
(
elem
)
grads
.
append
(
elem
.
zeros_like
())
if
not
isinstance
(
wrt
,
(
list
,
tuple
)):
if
not
isinstance
(
wrt
,
(
list
,
tuple
)):
wrt
=
[
wrt
]
wrt
=
[
wrt
]
arg1
=
zip
(
f
,
eval_points
)
assert
len
(
f
)
==
len
(
grads
)
arg2
=
list
(
wrt
)
known
=
dict
(
izip
(
f
,
grads
))
gmap
=
grad_sources_inputs
(
ret
=
grad
(
cost
=
None
,
known_grads
=
known
,
arg1
,
consider_constant
=
consider_constant
,
wrt
=
wrt
,
arg2
)
disconnected_inputs
=
disconnected_inputs
)
# Note : If p is not in gmap there can be several reasons, among which
# is the fact that p might not be part of the computational graph. A
# simple example is that for a+b for e.g. a[0] is not part of the graph,
# so Theano does not know how to compute TT.grad(TT.sum(a+b), a[0])
# such subtle cases can be fixed by a more careful implementation of the
# gradient, but for now Theano needs to throw an exception, and make the
# user aware that it does not know how to compute that gradient
ret
=
[]
for
p
in
wrt
:
if
p
in
gmap
:
ret
.
append
(
gmap
[
p
])
else
:
message
=
(
"Lop method was asked to compute the gradient "
"with respect to a variable that is not part of "
"the computational graph of the cost, or is used "
"only by a non-differentiable operator:
%
s"
%
p
)
if
disconnected_inputs
==
'ignore'
:
pass
elif
disconnected_inputs
==
'warn'
:
warnings
.
warn
(
message
,
stacklevel
=
1
)
elif
disconnected_inputs
==
'raise'
:
raise
ValueError
(
message
)
else
:
raise
ValueError
(
"Invalid value for keyword "
"'disconnected_inputs', valid values are "
"'ignore', 'warn' and 'raise'."
)
ret
.
append
(
p
.
zeros_like
())
return
format_as
(
using_list
,
using_tuple
,
ret
)
return
format_as
(
using_list
,
using_tuple
,
ret
)
...
@@ -386,9 +350,11 @@ def Lop(f, wrt, eval_points, consider_constant=None,
...
@@ -386,9 +350,11 @@ def Lop(f, wrt, eval_points, consider_constant=None,
#########################
#########################
def
grad
(
cost
,
wrt
,
g_cost
=
None
,
consider_constant
=
None
,
def
grad
(
cost
,
wrt
,
g_cost
=
None
,
consider_constant
=
None
,
disconnected_inputs
=
'raise'
,
add_names
=
True
):
disconnected_inputs
=
'raise'
,
add_names
=
True
,
known_grads
=
None
,
return_disconnected
=
'zero'
):
"""
"""
:type cost: Scalar (0-dimensional) Variable.
:type cost: Scalar (0-dimensional) Variable.
May optionally be None if known_grads is provided.
:type wrt: Variable or list of Variables.
:type wrt: Variable or list of Variables.
:type g_cost: Scalar Variable, or None.
:type g_cost: Scalar Variable, or None.
:param g_cost: an expression for the gradient through cost. The default is
:param g_cost: an expression for the gradient through cost. The default is
...
@@ -409,6 +375,20 @@ def grad(cost, wrt, g_cost=None, consider_constant=None,
...
@@ -409,6 +375,20 @@ def grad(cost, wrt, g_cost=None, consider_constant=None,
(d<cost.name>/d<wrt.name>) provided that both cost and wrt have
(d<cost.name>/d<wrt.name>) provided that both cost and wrt have
names
names
:type known_grads: dict
:param known_grads: If not None, a dictionary mapping variables to their
gradients. This is useful in the case where you know the
gradient on some variables but do not know the original
cost.
:type return_disconnected: string
:param return_disconnected:
'zero' : If wrt[i] is disconnected, return value i will be
wrt[i].zeros_like()
'None' : If wrt[i] is disconnected, return value i will be
None
'Disconnected' : returns variables of type DisconnectedType
:rtype: Variable or list/tuple of Variables (depending upon `wrt`)
:rtype: Variable or list/tuple of Variables (depending upon `wrt`)
:return: symbolic expression of gradient of `cost` with respect to `wrt`.
:return: symbolic expression of gradient of `cost` with respect to `wrt`.
...
@@ -422,29 +402,17 @@ def grad(cost, wrt, g_cost=None, consider_constant=None,
...
@@ -422,29 +402,17 @@ def grad(cost, wrt, g_cost=None, consider_constant=None,
if
tensor
is
None
:
if
tensor
is
None
:
from
theano
import
tensor
from
theano
import
tensor
if
isinstance
(
cost
.
type
,
NullType
):
if
cost
is
None
:
assert
known_grads
is
not
None
if
cost
is
not
None
and
isinstance
(
cost
.
type
,
NullType
):
raise
ValueError
(
"Can't differentiate a NaN cost."
raise
ValueError
(
"Can't differentiate a NaN cost."
"cost is NaN because "
+
\
"cost is NaN because "
+
\
cost
.
type
.
why_null
)
cost
.
type
.
why_null
)
if
cost
.
ndim
!=
0
:
if
cost
is
not
None
and
cost
.
ndim
!=
0
:
raise
TypeError
(
"cost must be a scalar."
)
raise
TypeError
(
"cost must be a scalar."
)
if
consider_constant
is
None
:
consider_constant
=
[]
else
:
# error checking on consider_constant: verify that it is a collection
# of theano variables
# this is important, if someone accidentally passes a nested data
# structure with theano variables at the leaves, only the root will
# be properly considered constant
if
not
hasattr
(
consider_constant
,
'__iter__'
):
raise
TypeError
(
'consider_constant must be an iterable collection,'
' got '
+
str
(
type
(
consider_constant
)))
for
elem
in
consider_constant
:
if
not
isinstance
(
elem
,
gof
.
Variable
):
raise
TypeError
(
'Elements of consider_constant must be '
'variables, but got '
+
str
(
type
(
elem
)))
if
isinstance
(
wrt
,
set
):
if
isinstance
(
wrt
,
set
):
raise
TypeError
(
"wrt must not be a set. sets have no defined "
raise
TypeError
(
"wrt must not be a set. sets have no defined "
...
@@ -461,7 +429,14 @@ def grad(cost, wrt, g_cost=None, consider_constant=None,
...
@@ -461,7 +429,14 @@ def grad(cost, wrt, g_cost=None, consider_constant=None,
raise
TypeError
(
"Expected Variable, got "
+
str
(
elem
)
+
raise
TypeError
(
"Expected Variable, got "
+
str
(
elem
)
+
" of type "
+
str
(
type
(
elem
)))
" of type "
+
str
(
type
(
elem
)))
var_to_node_to_idx
=
_populate_var_to_node_to_idx
([
cost
],
wrt
)
outputs
=
[]
if
cost
is
not
None
:
outputs
.
append
(
cost
)
if
known_grads
is
not
None
:
outputs
.
extend
(
known_grads
.
keys
())
var_to_node_to_idx
=
_populate_var_to_node_to_idx
(
outputs
,
wrt
,
consider_constant
)
# build a dict mapping var to the gradient of cost with respect to var
# build a dict mapping var to the gradient of cost with respect to var
grad_dict
=
{}
grad_dict
=
{}
...
@@ -469,49 +444,57 @@ def grad(cost, wrt, g_cost=None, consider_constant=None,
...
@@ -469,49 +444,57 @@ def grad(cost, wrt, g_cost=None, consider_constant=None,
# The gradient of the cost should default to 1 if the cost is of a
# The gradient of the cost should default to 1 if the cost is of a
# continuous dtype (float, for the moment, as complex are unsupported),
# continuous dtype (float, for the moment, as complex are unsupported),
# and should always be 0 if the cost is of discrete (integer) dtype.
# and should always be 0 if the cost is of discrete (integer) dtype.
if
getattr
(
cost
.
type
,
'dtype'
,
None
)
not
in
tensor
.
float_dtypes
:
if
cost
is
not
None
:
if
g_cost
is
None
:
g_cost
=
_float_ones_like
(
cost
)
# g_cost may be Disconnected or NullType. A creative use of the function,
# sure, but nonetheless one we can and should support. So before we try
# to cast it make sure it even has a dtype
if
hasattr
(
g_cost
.
type
,
'dtype'
)
and
cost
.
type
.
dtype
not
in
tensor
.
discrete_dtypes
:
# Here we enforce the constraint that floating point variables have
# the same dtype as their gradient.
g_cost
=
g_cost
.
astype
(
cost
.
type
.
dtype
)
# DO NOT enforce g_cost to be 0 if cost is an integer.
# This is to be enforced by the Op.grad method for the Op that outputs cost.
assert
g_cost
not
in
tensor
.
discrete_dtypes
grad_dict
[
cost
]
=
g_cost
else
:
if
g_cost
is
not
None
:
if
g_cost
is
not
None
:
try
:
raise
ValueError
(
"No cost node was specified, but a gradient"
cval
=
theano
.
get_constant_value
(
g_cost
)
" on it was."
)
if
cval
==
0
:
g_cost_is_zero
=
True
else
:
g_cost_is_zero
=
False
except
TypeError
:
g_cost_is_zero
=
False
if
not
g_cost_is_zero
:
raise
ValueError
(
"The gradient of a cost of non-continuous "
"dtype (here,
%
s), if it is defined, should be 0. "
"However, a value of
%
s was provided in the 'g_cost' "
"argument of theano.grad(). To remove this error, "
"you can simply omit the 'g_cost' argument, or "
"give it the default value of None."
%
(
getattr
(
g_cost
.
type
,
'dtype'
,
'no dtype defined'
),
g_cost
))
g_cost
=
tensor
.
zeros_like
(
cost
)
elif
g_cost
is
None
:
# cost.type.dtype is in tensor.float_dtypes at that point
g_cost
=
tensor
.
ones_like
(
cost
)
else
:
if
known_grads
is
not
None
:
# Cast the provided gradient so that it has the same dtype
for
var
in
known_grads
:
# as the cost.
g_var
=
known_grads
[
var
]
g_cost
=
g_cost
.
astype
(
cost
.
type
.
dtype
)
if
not
hasattr
(
g_var
,
'type'
):
raise
TypeError
(
'output grads must be theano variables.'
'Ambiguous whether
%
s should be made into tensor'
' or sparse theano variable'
%
str
(
type
(
g_var
)))
if
g_var
.
type
not
in
[
NullType
,
DisconnectedType
]
and
'float'
\
not
in
str
(
g_var
.
type
.
dtype
):
raise
TypeError
(
"Gradients must always be NullType, "
"DisconnectedType, or continuous, but grad was "
"given a known_grad of type "
+
str
(
g_var
.
type
))
# DO NOT check that these gradients are equal to 0 if var is int
# The gradient is allowed to be non-zero on var in that case
# Ops outputing var should not backpropagate its gradient further
# but that is enforced elsewhere (grep for only_connected_to_int)
grad_dict
[
var
]
=
g_var
grad_dict
[
cost
]
=
g_cost
# the gradient of the constants is 0
for
const
in
consider_constant
:
grad_dict
[
const
]
=
DisconnectedType
()()
# variables that do not influence the cost have zero gradient.
# variables that do not influence the cost have zero gradient.
# if wrt is such a variable, populate the grad_dict with this info
# if wrt is such a variable, populate the grad_dict with this info
# so that wrt not being in var_to_node_to_idx won't cause an error below
# so that wrt not being in var_to_node_to_idx won't cause an error below
# according to the flag, possibly raise an error if wrt is disconnected
# according to the flag, possibly raise an error if wrt is disconnected
for
elem
in
wrt
:
for
elem
in
wrt
:
if
elem
not
in
var_to_node_to_idx
and
elem
is
not
cost
:
if
elem
not
in
var_to_node_to_idx
and
elem
is
not
cost
\
and
elem
not
in
grad_dict
:
message
=
(
"grad method was asked to compute the gradient "
message
=
(
"grad method was asked to compute the gradient "
"with respect to a variable that is not part of "
"with respect to a variable that is not part of "
"the computational graph of the cost, or is used "
"the computational graph of the cost, or is used "
...
@@ -529,15 +512,15 @@ def grad(cost, wrt, g_cost=None, consider_constant=None,
...
@@ -529,15 +512,15 @@ def grad(cost, wrt, g_cost=None, consider_constant=None,
grad_dict
[
elem
]
=
DisconnectedType
()()
grad_dict
[
elem
]
=
DisconnectedType
()()
cost_name
=
None
cost_name
=
None
if
add_names
:
if
add_names
and
cost
is
not
None
:
cost_name
=
cost
.
name
cost_name
=
cost
.
name
# Make sure we didn't initialize the grad_dict with any ints
# Make sure we didn't initialize the grad_dict with any ints
# for non-int outputs
# The gradient may NEVER be an int, even if the variable is an int.
# Read the Op contract and talk to Ian Goodfellow before changing this!
for
var
in
grad_dict
:
for
var
in
grad_dict
:
g
=
grad_dict
[
var
]
g
=
grad_dict
[
var
]
if
(
hasattr
(
g
.
type
,
'dtype'
)
and
if
hasattr
(
g
.
type
,
'dtype'
):
getattr
(
var
.
type
,
'dtype'
,
''
)
in
tensor
.
float_dtypes
):
assert
g
.
type
.
dtype
in
tensor
.
float_dtypes
assert
g
.
type
.
dtype
in
tensor
.
float_dtypes
rval
=
_populate_grad_dict
(
var_to_node_to_idx
,
rval
=
_populate_grad_dict
(
var_to_node_to_idx
,
...
@@ -545,7 +528,12 @@ def grad(cost, wrt, g_cost=None, consider_constant=None,
...
@@ -545,7 +528,12 @@ def grad(cost, wrt, g_cost=None, consider_constant=None,
for
i
in
xrange
(
len
(
rval
)):
for
i
in
xrange
(
len
(
rval
)):
if
isinstance
(
rval
[
i
]
.
type
,
DisconnectedType
):
if
isinstance
(
rval
[
i
]
.
type
,
DisconnectedType
):
rval
[
i
]
=
_float_zeros_like
(
wrt
[
i
])
if
return_disconnected
==
'zero'
:
rval
[
i
]
=
_float_zeros_like
(
wrt
[
i
])
elif
return_disconnected
==
'None'
:
rval
[
i
]
=
None
else
:
assert
return_disconnected
==
'Disconnected'
if
using_tuple
:
if
using_tuple
:
rval
=
tuple
(
rval
)
rval
=
tuple
(
rval
)
...
@@ -592,15 +580,18 @@ def _node_to_pattern(node):
...
@@ -592,15 +580,18 @@ def _node_to_pattern(node):
return
connection_pattern
return
connection_pattern
def
_populate_var_to_node_to_idx
(
outputs
,
wrt
):
def
_populate_var_to_node_to_idx
(
outputs
,
wrt
,
consider_constant
):
"""
"""
Common code shared between grad and grad_sources_inputs
Helper function for grad function.
outputs: a list of variables we want to take gradients of
outputs: a list of variables we want to take gradients of
wrt: a list of variables we want to take the gradient with
wrt: a list of variables we want to take the gradient with
respect to.
respect to.
consider_constant: a list of variables not to backpropagate
through.
returns:
returns:
var_to_app_to_idx:
var_to_app_to_idx:
...
@@ -622,8 +613,30 @@ def _populate_var_to_node_to_idx(outputs, wrt):
...
@@ -622,8 +613,30 @@ def _populate_var_to_node_to_idx(outputs, wrt):
This set is exactly the set of variables that connect
This set is exactly the set of variables that connect
the variables in wrt to the cost being differentiated.
the variables in wrt to the cost being differentiated.
(A variable in consider_constant is not a function of
anything)
"""
"""
# Validate and format consider_constant
if
consider_constant
is
None
:
consider_constant
=
[]
else
:
# error checking on consider_constant: verify that it is a collection
# of theano variables
# this is important, if someone accidentally passes a nested data
# structure with theano variables at the leaves, only the root will
# be properly considered constant
try
:
iter
(
consider_constant
)
except
TypeError
:
raise
TypeError
(
'consider_constant must be an iterable collection,'
' got '
+
str
(
type
(
consider_constant
)))
for
elem
in
consider_constant
:
if
not
isinstance
(
elem
,
gof
.
Variable
):
raise
TypeError
(
'Elements of consider_constant must be '
'variables, but got '
+
str
(
type
(
elem
)))
# var_to_app_to_idx[var][node] = [i,j] means node has
# var_to_app_to_idx[var][node] = [i,j] means node has
# var as input at positions i and j
# var as input at positions i and j
var_to_app_to_idx
=
{}
var_to_app_to_idx
=
{}
...
@@ -638,9 +651,17 @@ def _populate_var_to_node_to_idx(outputs, wrt):
...
@@ -638,9 +651,17 @@ def _populate_var_to_node_to_idx(outputs, wrt):
accounted_for
=
set
([])
accounted_for
=
set
([])
def
account_for
(
var
):
def
account_for
(
var
):
# Don't visit the same variable twice
if
var
in
accounted_for
:
if
var
in
accounted_for
:
return
return
accounted_for
.
add
(
var
)
accounted_for
.
add
(
var
)
# Constants are not a function of anything
if
var
in
consider_constant
:
return
# Recursively add the variables that this variable is
# a function of.
if
var
.
owner
is
not
None
:
if
var
.
owner
is
not
None
:
app
=
var
.
owner
app
=
var
.
owner
...
@@ -699,11 +720,16 @@ def _populate_var_to_node_to_idx(outputs, wrt):
...
@@ -699,11 +720,16 @@ def _populate_var_to_node_to_idx(outputs, wrt):
return
var_to_app_to_idx
return
var_to_app_to_idx
class
NullTypeGradError
(
TypeError
):
"""
Raised when grad encounters a NullType.
"""
pass
def
_populate_grad_dict
(
var_to_node_to_idx
,
def
_populate_grad_dict
(
var_to_node_to_idx
,
grad_dict
,
wrt
,
cost_name
=
None
):
grad_dict
,
wrt
,
cost_name
=
None
):
"""
"""
Common code shared between grad_sources_inputs and grad
Helper function for grad function.
var_to_node_to_idx: a dictionary mapping a variable to
var_to_node_to_idx: a dictionary mapping a variable to
a second dictionary.
a second dictionary.
...
@@ -712,7 +738,7 @@ def _populate_grad_dict(var_to_node_to_idx,
...
@@ -712,7 +738,7 @@ def _populate_grad_dict(var_to_node_to_idx,
node's input list
node's input list
grad_dict: a dictionary mapping variables to their gradients
grad_dict: a dictionary mapping variables to their gradients
should be populated by grad
or grad_sources_inputs
should be populated by grad
function.
grad should set gradients to DisconnectedType()() for
grad should set gradients to DisconnectedType()() for
variables to be considered constant, set the
variables to be considered constant, set the
...
@@ -779,38 +805,46 @@ def _populate_grad_dict(var_to_node_to_idx,
...
@@ -779,38 +805,46 @@ def _populate_grad_dict(var_to_node_to_idx,
inputs
=
[
try_to_copy_if_needed
(
ipt
)
for
ipt
in
inputs
]
inputs
=
[
try_to_copy_if_needed
(
ipt
)
for
ipt
in
inputs
]
# Build a list of output gradients with the same dtype as
# Build a list of output gradients with the same dtype as
# the corresponding output variable.
# the corresponding output variable.
# If an output is of a float dtype, we want to cast the
# If an output is of a float dtype, we want to cast the
# output gradient into the same dtype, to avoid having a
# output gradient into the same dtype, to avoid having a
# gradient graph with double precision (taking more memory,
# gradient graph with double precision (taking more memory,
# and more computation).
# and more computation).
# If an output is of an integer dtype, then we ensure the
# If an output is of an integer dtype, then we just leave it
# output gradient is zero, and that zero can be represented
# alone.
# in the same int dtype.
# DO NOT force integer variables to have zero grad. This causes
# If an output gradient is a NullType or DisconnectedType,
# bugs where we fail to detect disconnected or undefined gradients.
# then it will not have a dtype, and it will not be changed.
# DO NOT force integer variables to have integer dtype. This is
# a violation of the op contract.
new_output_grads
=
[]
new_output_grads
=
[]
for
o
,
og
in
zip
(
node
.
outputs
,
output_grads
):
for
o
,
og
in
zip
(
node
.
outputs
,
output_grads
):
o_dt
=
getattr
(
o
.
type
,
'dtype'
,
None
)
o_dt
=
getattr
(
o
.
type
,
'dtype'
,
None
)
og_dt
=
getattr
(
og
.
type
,
'dtype'
,
None
)
og_dt
=
getattr
(
og
.
type
,
'dtype'
,
None
)
if
og_dt
and
o_dt
in
theano
.
tensor
.
discrete_dtypes
:
if
o_dt
not
in
theano
.
tensor
.
discrete_dtypes
and
og_dt
and
o_dt
!=
og_dt
:
new_output_grads
.
append
(
o
.
zeros_like
())
elif
o_dt
and
og_dt
and
o_dt
!=
og_dt
:
new_output_grads
.
append
(
og
.
astype
(
o_dt
))
new_output_grads
.
append
(
og
.
astype
(
o_dt
))
else
:
else
:
new_output_grads
.
append
(
og
)
new_output_grads
.
append
(
og
)
# Make sure that, if new_output_grads[i] has a dtype:
# Make sure that, if new_output_grads[i] has a floating point dtype,
# - it is the same dtype as outputs[i]
# it is the same dtype as outputs[i]
# - if the dtype is an int, then new_output_grads[i] is 0.
for
o
,
ng
in
zip
(
node
.
outputs
,
new_output_grads
):
for
o
,
ng
in
zip
(
node
.
outputs
,
new_output_grads
):
o_dt
=
getattr
(
o
.
type
,
'dtype'
,
None
)
o_dt
=
getattr
(
o
.
type
,
'dtype'
,
None
)
ng_dt
=
getattr
(
ng
.
type
,
'dtype'
,
None
)
ng_dt
=
getattr
(
ng
.
type
,
'dtype'
,
None
)
if
ng_dt
:
if
ng_dt
is
not
None
and
o_dt
not
in
theano
.
tensor
.
discrete_dtypes
:
assert
ng_dt
==
o_dt
assert
ng_dt
==
o_dt
if
ng_dt
in
theano
.
tensor
.
discrete_dtypes
:
assert
theano
.
get_constant_value
(
ng
)
==
0
# Someone who had obviously not read the Op contract tried
# to modify this part of the function.
# If you ever think it is a good idea to make an integer
# valued gradient, please
# 1) Read the Op contract again
# 2) Talk to Ian Goodfellow
# (Both of these sources will tell you not to do it)
for
ng
in
new_output_grads
:
assert
getattr
(
ng
.
type
,
'dtype'
,
None
)
not
in
theano
.
tensor
.
discrete_dtypes
input_grads
=
node
.
op
.
grad
(
inputs
,
new_output_grads
)
input_grads
=
node
.
op
.
grad
(
inputs
,
new_output_grads
)
...
@@ -863,6 +897,7 @@ def _populate_grad_dict(var_to_node_to_idx,
...
@@ -863,6 +897,7 @@ def _populate_grad_dict(var_to_node_to_idx,
'the grad_undefined or grad_unimplemented helper '
'the grad_undefined or grad_unimplemented helper '
'functions.'
)
%
node
.
op
)
'functions.'
)
%
node
.
op
)
if
not
isinstance
(
term
.
type
,
if
not
isinstance
(
term
.
type
,
(
NullType
,
DisconnectedType
)):
(
NullType
,
DisconnectedType
)):
if
term
.
type
.
dtype
not
in
theano
.
tensor
.
float_dtypes
:
if
term
.
type
.
dtype
not
in
theano
.
tensor
.
float_dtypes
:
...
@@ -875,14 +910,9 @@ def _populate_grad_dict(var_to_node_to_idx,
...
@@ -875,14 +910,9 @@ def _populate_grad_dict(var_to_node_to_idx,
# it's not undefined or disconnected
# it's not undefined or disconnected
# The only other valid thing it can be is 0
# The only other valid thing it can be is 0
no_constant_value
=
True
is_zero
=
_is_zero
(
term
)
try
:
assert
is_zero
in
[
'yes'
,
'no'
,
'maybe'
]
constant_value
=
theano
.
get_constant_value
(
term
)
if
is_zero
==
'maybe'
:
no_constant_value
=
False
except
TypeError
:
pass
if
no_constant_value
:
msg
=
"
%
s.grad returned
%
s of type
%
s for input"
msg
=
"
%
s.grad returned
%
s of type
%
s for input"
msg
+=
"
%
d. This input's only connections to "
msg
+=
"
%
d. This input's only connections to "
msg
+=
"the cost through this op are via "
msg
+=
"the cost through this op are via "
...
@@ -896,8 +926,7 @@ def _populate_grad_dict(var_to_node_to_idx,
...
@@ -896,8 +926,7 @@ def _populate_grad_dict(var_to_node_to_idx,
msg
=
msg
%
(
str
(
node
.
op
),
str
(
term
),
msg
=
msg
%
(
str
(
node
.
op
),
str
(
term
),
str
(
type
(
term
)),
i
)
str
(
type
(
term
)),
i
)
raise
ValueError
(
msg
)
if
is_zero
==
'no'
:
if
constant_value
!=
0
:
msg
=
"
%
s.grad returned
%
s of type
%
s for input"
msg
=
"
%
s.grad returned
%
s of type
%
s for input"
msg
+=
"
%
d. Since this input is only connected "
msg
+=
"
%
d. Since this input is only connected "
msg
+=
"to integer-valued outputs, it should "
msg
+=
"to integer-valued outputs, it should "
...
@@ -905,7 +934,7 @@ def _populate_grad_dict(var_to_node_to_idx,
...
@@ -905,7 +934,7 @@ def _populate_grad_dict(var_to_node_to_idx,
msg
+=
"
%
s."
msg
+=
"
%
s."
msg
%
(
str
(
node
.
op
),
str
(
term
),
str
(
type
(
term
)),
msg
%
(
str
(
node
.
op
),
str
(
term
),
str
(
type
(
term
)),
i
,
str
(
constant_value
))
i
,
str
(
theano
.
get_constant_value
(
term
)
))
raise
ValueError
(
msg
)
raise
ValueError
(
msg
)
...
@@ -961,7 +990,7 @@ def _populate_grad_dict(var_to_node_to_idx,
...
@@ -961,7 +990,7 @@ def _populate_grad_dict(var_to_node_to_idx,
type
(
term
)))
type
(
term
)))
if
isinstance
(
term
.
type
,
NullType
):
if
isinstance
(
term
.
type
,
NullType
):
raise
Type
Error
(
"tensor.grad "
raise
NullTypeGrad
Error
(
"tensor.grad "
"encountered a NaN. "
+
\
"encountered a NaN. "
+
\
term
.
type
.
why_null
)
term
.
type
.
why_null
)
...
@@ -997,113 +1026,6 @@ def _populate_grad_dict(var_to_node_to_idx,
...
@@ -997,113 +1026,6 @@ def _populate_grad_dict(var_to_node_to_idx,
return
rval
return
rval
def
grad_sources_inputs
(
sources
,
graph_inputs
):
"""
Used to compute the gradient of a cost with respect to all the
variables between graph_input and cost, but in the special
case where you don't know the cost, you only know its gradient
on a set of intermediate values.
A gradient source is a pair (``v``, ``g_v``), in which ``v`` is
a `Variable`, and ``g_v`` is a `Variable` that is a gradient wrt
``v``. More specifically, ``g_v`` is the gradient of an external
scalar cost, ``cost`` (that is not explicitly used), wrt ``v``.
This function traverses the graph backward from the ``r`` sources,
calling ``op.grad(...)`` for all ops with some non-None gradient
on an output, to compute gradients of ``cost`` wrt intermediate
variables and ``graph_inputs``.
The ``op.grad(...)`` functions are called like this:
.. code-block:: python
op.grad(op.inputs[:], [total_gradient(v) for v in op.outputs])
This call to ``op.grad`` should return a list or tuple: one symbolic
gradient per input. These gradients represent the gradients of
the same implicit ``cost`` mentionned above, wrt ``op.inputs``. Note
that this is **not** the same as the gradient of ``op.outputs`` wrt
``op.inputs``.
If ``op`` has a single input, then ``op.grad`` should return a list
or tuple of length 1.
For each input wrt to which ``op`` is not differentiable, it should
return ``None`` instead of a `Variable` instance.
If a source ``r`` receives a gradient from another source ``r2``,
then the effective gradient on ``r`` is the sum of both gradients.
:type sources: list of pairs of Variable: (v, gradient-on-v) to
initialize the total_gradient dictionary
:param sources: gradients to back-propagate using chain rule
:type graph_inputs: list of Variable
:param graph_inputs: variables considered to be constant
(do not backpropagate through them)
:rtype: dictionary whose keys and values are of type Variable
:return: mapping from each Variable encountered in the backward
traversal to the gradient with respect to that Variable.
It is assumed that there is some objective J shared between all members of
sources, so that for each v, gradient-on-v is the gradient of J with
respect to v
"""
outputs
,
output_grads
=
zip
(
*
sources
)
for
output_grad
in
output_grads
:
if
not
hasattr
(
output_grad
,
'type'
):
raise
TypeError
(
'output grads must be theano variables.'
'Ambiguous whether
%
s should be made into tensor'
' or sparse theano variable'
%
str
(
type
(
output_grad
)))
if
graph_inputs
is
None
:
graph_inputs
=
gof
.
graph
.
inputs
(
outputs
)
wrt
=
graph_inputs
var_to_node_to_idx
=
_populate_var_to_node_to_idx
(
outputs
,
wrt
)
# build a dict mapping var to the gradient of cost with respect to var
grad_dict
=
{}
for
output
,
output_grad
in
sources
:
# The gradient of the cost should always be 0 if the cost is of
# discrete (integer) dtype.
if
getattr
(
output
.
type
,
'dtype'
,
''
)
not
in
theano
.
tensor
.
float_dtypes
:
output_grad
=
output
.
zeros_like
()
else
:
# Cast the provided gradient so that it has the same dtype
# as the cost.
output_grad
=
output_grad
.
astype
(
output
.
type
.
dtype
)
grad_dict
[
output
]
=
output_grad
# variables that do not influence the cost have zero gradient.
# if wrt is such a variable, populate the grad_dict with this info
# so that wrt not being in var_to_node_to_idx won't cause an error below
# according to the flag, possibly raise an error if wrt is disconnected
for
elem
in
wrt
:
if
elem
not
in
var_to_node_to_idx
and
elem
not
in
outputs
:
grad_dict
[
elem
]
=
DisconnectedType
()()
_populate_grad_dict
(
var_to_node_to_idx
,
grad_dict
,
wrt
)
# post-process out the DisconnectedTypes
for
key
in
grad_dict
:
if
isinstance
(
grad_dict
[
key
]
.
type
,
DisconnectedType
):
if
hasattr
(
key
,
'zeros_like'
):
grad_dict
[
key
]
=
_float_zeros_like
(
key
)
return
grad_dict
def
_float_zeros_like
(
x
):
def
_float_zeros_like
(
x
):
""" Like zeros_like, but forces the object to have a
""" Like zeros_like, but forces the object to have a
a floating point dtype """
a floating point dtype """
...
@@ -1634,3 +1556,32 @@ def hessian(cost, wrt, consider_constant=None,
...
@@ -1634,3 +1556,32 @@ def hessian(cost, wrt, consider_constant=None,
"script that generated the error)"
)
"script that generated the error)"
)
hessians
.
append
(
hess
)
hessians
.
append
(
hess
)
return
format_as
(
using_list
,
using_tuple
,
hessians
)
return
format_as
(
using_list
,
using_tuple
,
hessians
)
def
_is_zero
(
x
):
"""
Returns 'yes', 'no', or 'maybe' indicating whether x
is always 0.
'maybe' means that x is an expression that is complicated enough
that we can't tell that it simplifies to 0.
"""
if
not
hasattr
(
x
,
'type'
):
return
np
.
all
(
x
==
0.
)
if
isinstance
(
x
.
type
,
NullType
):
return
'no'
if
isinstance
(
x
.
type
,
DisconnectedType
):
return
'yes'
no_constant_value
=
True
try
:
constant_value
=
theano
.
get_constant_value
(
x
)
no_constant_value
=
False
except
TypeError
:
pass
if
no_constant_value
:
return
'maybe'
if
constant_value
!=
0.
:
return
'no'
return
'yes'
theano/scan_module/scan_op.py
浏览文件 @
40bbb7da
...
@@ -221,7 +221,8 @@ class Scan(PureOp):
...
@@ -221,7 +221,8 @@ class Scan(PureOp):
'following error has been encountered: The '
'following error has been encountered: The '
'
%
s
%
s (argument number
%
d) has dtype '
'
%
s
%
s (argument number
%
d) has dtype '
'
%
s and
%
d dimension(s). The corresponding slice
%
s '
'
%
s and
%
d dimension(s). The corresponding slice
%
s '
'however has dtype
%
s and
%
d dimension(s). This '
'however has dtype
%
s and
%
d dimension(s) (it should '
'have the same dtype and one fewer dimensions). This '
'should never happen, please '
'should never happen, please '
'report to theano-dev mailing list'
'report to theano-dev mailing list'
)
)
...
@@ -1261,11 +1262,9 @@ class Scan(PureOp):
...
@@ -1261,11 +1262,9 @@ class Scan(PureOp):
if
x
in
diff_inputs
]
if
x
in
diff_inputs
]
for
x
in
consider_inps
:
for
x
in
consider_inps
:
try
:
try
:
_gmp
=
gradient
.
grad_sources_inputs
(
gmp
[
x
]
=
gradient
.
grad
(
cost
=
None
,
[(
y
,
g_y
)],
known_grads
=
{
y
:
g_y
},
wrt
=
x
)
[
x
])
except
gradient
.
NullTypeGradError
:
gmp
[
x
]
=
_gmp
[
x
]
except
TypeError
:
# It means the gradient is undefined (which implies
# It means the gradient is undefined (which implies
# is connected)
# is connected)
gmp
[
x
]
=
x
gmp
[
x
]
=
x
...
@@ -1374,11 +1373,21 @@ class Scan(PureOp):
...
@@ -1374,11 +1373,21 @@ class Scan(PureOp):
self
.
inner_nitsot_outs
(
self_outputs
))
self
.
inner_nitsot_outs
(
self_outputs
))
def
compute_gradient
(
y
,
g_y
):
def
compute_gradient
(
y
,
g_y
):
gmp
=
gradient
.
grad_sources_inputs
(
if
'int'
in
str
(
g_y
.
dtype
):
[(
y
,
g_y
)],
raise
TypeError
(
"Gradients may never be integers but g_y "
[
x
for
x
in
theano
.
gof
.
graph
.
inputs
([
y
])
"has type "
+
str
(
g_y
.
type
))
if
x
in
diff_inputs
])
return
[
gmp
.
get
(
p
,
None
)
for
p
in
diff_inputs
]
wrt
=
[
x
for
x
in
theano
.
gof
.
graph
.
inputs
([
y
])
if
x
in
diff_inputs
]
grads
=
gradient
.
grad
(
cost
=
None
,
known_grads
=
{
y
:
g_y
},
wrt
=
wrt
,
consider_constant
=
wrt
,
disconnected_inputs
=
'ignore'
,
return_disconnected
=
'None'
)
gmp
=
dict
(
zip
(
wrt
,
grads
))
rval
=
[
gmp
.
get
(
p
,
None
)
for
p
in
diff_inputs
]
return
rval
dC_dinps_t
=
[
None
for
inp
in
diff_inputs
]
dC_dinps_t
=
[
None
for
inp
in
diff_inputs
]
disconnected_dC_dinps_t
=
[
True
for
inp
in
diff_inputs
]
disconnected_dC_dinps_t
=
[
True
for
inp
in
diff_inputs
]
dC_dXts
=
[]
dC_dXts
=
[]
...
...
theano/tensor/basic.py
浏览文件 @
40bbb7da
...
@@ -462,13 +462,27 @@ def _allclose(a, b, rtol=None, atol=None):
...
@@ -462,13 +462,27 @@ def _allclose(a, b, rtol=None, atol=None):
return
numpy
.
allclose
(
a
,
b
,
atol
=
atol_
,
rtol
=
rtol_
)
return
numpy
.
allclose
(
a
,
b
,
atol
=
atol_
,
rtol
=
rtol_
)
class
NotConstantError
(
TypeError
):
"""
Raised by get_constant_value if called on something that is
not constant.
For now it is a TypeError, to maintain the old interface
that get_constant_value should raise a TypeError in this
situation. However, this is unsafe because get_constant_value
could inadvertently raise a TypeError if it has a bug.
So we should eventually make NotConstantError derive
from Exception directly, and modify all code that uses
get_constant_value to catch this more specific exception.
"""
pass
def
get_constant_value
(
v
):
def
get_constant_value
(
v
):
"""return the constant scalar(0-D) value underlying variable `v`
"""return the constant scalar(0-D) value underlying variable `v`
If v is the output of dimshuffles, fills, allocs, rebroadcasts, cast
If v is the output of dimshuffles, fills, allocs, rebroadcasts, cast
this function digs through them.
this function digs through them.
If `v` is not some view of constant data, then raise a
Type
Error.
If `v` is not some view of constant data, then raise a
NotConstant
Error.
:note: There may be another function similar to this one in the
:note: There may be another function similar to this one in the
code, but I'm not sure where it is.
code, but I'm not sure where it is.
...
@@ -488,7 +502,7 @@ def get_constant_value(v):
...
@@ -488,7 +502,7 @@ def get_constant_value(v):
numpy
.
complex
(
data
)
# works for all numeric scalars
numpy
.
complex
(
data
)
# works for all numeric scalars
return
data
return
data
except
Exception
:
except
Exception
:
raise
Type
Error
(
raise
NotConstant
Error
(
'v.data is non-numeric, non-scalar, or has more than one'
'v.data is non-numeric, non-scalar, or has more than one'
' unique value'
,
v
)
' unique value'
,
v
)
if
v
.
owner
:
if
v
.
owner
:
...
@@ -516,9 +530,17 @@ def get_constant_value(v):
...
@@ -516,9 +530,17 @@ def get_constant_value(v):
v
.
owner
.
op
.
perform
(
v
.
owner
,
[
const
],
ret
)
v
.
owner
.
op
.
perform
(
v
.
owner
,
[
const
],
ret
)
return
ret
[
0
][
0
]
return
ret
[
0
][
0
]
if
isinstance
(
v
.
owner
.
op
,
Subtensor
)
and
v
.
ndim
==
0
:
if
isinstance
(
v
.
owner
.
op
,
Subtensor
)
and
v
.
ndim
==
0
:
if
isinstance
(
v
.
owner
.
inputs
[
0
],
TensorConstant
):
# This condition depends on Subtensor always embedding constant
return
v
.
owner
.
inputs
[
0
]
.
data
.
__getitem__
(
# indices in the Op rather than making them inputs to the Apply node
if
isinstance
(
v
.
owner
.
inputs
[
0
],
TensorConstant
)
and
\
len
(
v
.
owner
.
inputs
)
==
1
:
try
:
return
v
.
owner
.
inputs
[
0
]
.
data
.
__getitem__
(
tuple
(
v
.
owner
.
op
.
idx_list
))
tuple
(
v
.
owner
.
op
.
idx_list
))
except
IndexError
:
raise
IndexError
(
str
(
tuple
(
v
.
owner
.
op
.
idx_list
))
+
" is not a valid index into "
+
\
str
(
v
.
owner
.
inputs
[
0
]
.
data
))
# The index list 'idx_list' should have length the same
# The index list 'idx_list' should have length the same
# shape as the input.
# shape as the input.
...
@@ -3780,7 +3802,7 @@ class AdvancedIndexingError(TypeError):
...
@@ -3780,7 +3802,7 @@ class AdvancedIndexingError(TypeError):
class
Subtensor
(
Op
):
class
Subtensor
(
Op
):
"""Return a subtensor view
"""Return a subtensor view
The inputs array is the tensor x, followed by scalar integer
variabl
es.
The inputs array is the tensor x, followed by scalar integer
typ
es.
TODO: WRITEME: how are the scalar integer variables formatted?
TODO: WRITEME: how are the scalar integer variables formatted?
This class uses a relatively complex internal representation of the inputs
This class uses a relatively complex internal representation of the inputs
...
@@ -3789,7 +3811,7 @@ class Subtensor(Op):
...
@@ -3789,7 +3811,7 @@ class Subtensor(Op):
idx_list: instance variable TODO: WRITEME: is this a list or a tuple?
idx_list: instance variable TODO: WRITEME: is this a list or a tuple?
(old docstring gives two conflicting
(old docstring gives two conflicting
descriptions)
descriptions)
elements are either integers, theano scalars, or slices.
elements are either integers, theano scalar
type
s, or slices.
one element per "explicitly named dimension"
one element per "explicitly named dimension"
TODO: WRITEME: what is an "explicitly named dimension" ?
TODO: WRITEME: what is an "explicitly named dimension" ?
...
@@ -3798,7 +3820,11 @@ class Subtensor(Op):
...
@@ -3798,7 +3820,11 @@ class Subtensor(Op):
if slice:
if slice:
start/stop/step members of each slice are integer indices
start/stop/step members of each slice are integer indices
into the inputs array or None
into the inputs array or None
integer indices be actual integers or theano scalars
integer indices be actual integers or theano scalar types
Note that the idx_list defines the Op, so two Subtensor instances are
considered to be different Ops if they have different idx_list fields.
This means that the entries in it are theano Types, not theano Variables.
@todo: add support for advanced tensor indexing (in Subtensor_dx too).
@todo: add support for advanced tensor indexing (in Subtensor_dx too).
...
@@ -3816,6 +3842,17 @@ class Subtensor(Op):
...
@@ -3816,6 +3842,17 @@ class Subtensor(Op):
@staticmethod
@staticmethod
def
collapse
(
idxs
,
cond
):
def
collapse
(
idxs
,
cond
):
"""
idxs: a list of indices or slices.
cond: a callable that returns a bool
returns: idxs, with the slices flattened out into a list.
if cond is true for an entry, does not flatten it.
"""
ret
=
[]
ret
=
[]
def
helper
(
entry
):
def
helper
(
entry
):
...
@@ -3828,10 +3865,20 @@ class Subtensor(Op):
...
@@ -3828,10 +3865,20 @@ class Subtensor(Op):
for
idx
in
idxs
:
for
idx
in
idxs
:
helper
(
idx
)
helper
(
idx
)
return
ret
return
ret
@staticmethod
@staticmethod
def
convert
(
entry
,
slice_ok
=
True
):
def
convert
(
entry
,
slice_ok
=
True
):
"""
The "idx_list" field is unique to each Subtensor instance.
It is not unique to each Apply node, so it should not refer to
specific Variables. This method changes references to Variables
into references to Types.
TODO: WRITEME: This method also accepts "entry" already being a Type;
when would that happen?
"""
invalid_scal_types
=
[
scal
.
float64
,
scal
.
float32
]
invalid_scal_types
=
[
scal
.
float64
,
scal
.
float32
]
scal_types
=
[
scal
.
int64
,
scal
.
int32
,
scal
.
int16
,
scal
.
int8
]
scal_types
=
[
scal
.
int64
,
scal
.
int32
,
scal
.
int16
,
scal
.
int8
]
tensor_types
=
[
lscalar
,
iscalar
,
wscalar
,
bscalar
]
tensor_types
=
[
lscalar
,
iscalar
,
wscalar
,
bscalar
]
...
...
theano/tensor/nnet/conv.py
浏览文件 @
40bbb7da
...
@@ -801,10 +801,9 @@ class ConvOp(OpenMPOp):
...
@@ -801,10 +801,9 @@ class ConvOp(OpenMPOp):
# mimic what happens inside theano.grad: get the input gradient
# mimic what happens inside theano.grad: get the input gradient
# of the final cost wrt all variables involved.
# of the final cost wrt all variables involved.
tmp_gmap
=
theano
.
gradient
.
grad_sources_inputs
(
return
theano
.
gradient
.
grad
(
cost
=
None
,
[(
node
,
gz
)],
[
inputs
,
kerns
])
known_grads
=
{
node
:
gz
},
wrt
=
[
inputs
,
kerns
])
return
[
tmp_gmap
[
inputs
],
tmp_gmap
[
kerns
]]
if
self
.
dx
not
in
(
1
,
2
)
or
self
.
dy
not
in
(
1
,
2
):
if
self
.
dx
not
in
(
1
,
2
)
or
self
.
dy
not
in
(
1
,
2
):
raise
NotImplementedError
(
raise
NotImplementedError
(
...
...
theano/tests/test_gradient.py
浏览文件 @
40bbb7da
...
@@ -6,7 +6,6 @@ import unittest
...
@@ -6,7 +6,6 @@ import unittest
import
theano
import
theano
from
theano
import
gof
from
theano
import
gof
from
theano.gradient
import
grad_sources_inputs
from
theano
import
gradient
from
theano
import
gradient
from
theano.tensor.nnet.Conv3D
import
conv3D
from
theano.tensor.nnet.Conv3D
import
conv3D
from
theano
import
config
from
theano
import
config
...
@@ -16,6 +15,16 @@ from theano.gof.null_type import NullType
...
@@ -16,6 +15,16 @@ from theano.gof.null_type import NullType
one
=
theano
.
tensor
.
as_tensor_variable
(
1.
)
one
=
theano
.
tensor
.
as_tensor_variable
(
1.
)
def
grad_sources_inputs
(
sources
,
inputs
):
"""
This implements the old grad_sources_inputs function in terms of
the new interface so the tests don't need to be rewritten.
"""
if
inputs
is
None
:
inputs
=
theano
.
gof
.
graph
.
inputs
([
source
[
0
]
for
source
in
sources
])
return
dict
(
zip
(
inputs
,
theano
.
gradient
.
grad
(
cost
=
None
,
known_grads
=
dict
(
sources
),
wrt
=
inputs
,
consider_constant
=
inputs
)))
class
testgrad_sources_inputs
(
unittest
.
TestCase
):
class
testgrad_sources_inputs
(
unittest
.
TestCase
):
def
test_retNone1
(
self
):
def
test_retNone1
(
self
):
...
@@ -369,35 +378,6 @@ class test_grad(unittest.TestCase):
...
@@ -369,35 +378,6 @@ class test_grad(unittest.TestCase):
# If we made it to here without an exception, then the
# If we made it to here without an exception, then the
# connection_pattern functionality worked correctly
# connection_pattern functionality worked correctly
def
test_sum_disconnected
(
self
):
# Tests that we can add DisconnectedType to other terms correctly
x
=
theano
.
tensor
.
scalar
()
y
=
x
*
2.
z
=
x
+
1.
cost
=
y
+
z
theano
.
tensor
.
grad
(
cost
,
x
,
consider_constant
=
[
y
,
z
])
# In an earlier version of theano, the above line would have failed
# while trying to add two DisconnectedTypes
def
test_output_grad_on_int
(
self
):
# If the g_cost argument is specified when x has a discrete dtype,
# g_cost should be equivalent to 0.
x
=
theano
.
tensor
.
iscalar
(
'x'
)
y
=
x
*
2
# Should work:
c0
=
theano
.
tensor
.
constant
(
0
)
theano
.
grad
(
y
,
x
,
g_cost
=
c0
)
theano
.
grad
(
y
,
x
,
g_cost
=
y
.
zeros_like
())
theano
.
grad
(
y
,
x
,
g_cost
=
y
.
zeros_like
()
.
astype
(
'float64'
))
# Should raise ValueError
c1
=
theano
.
tensor
.
constant
(
1
)
self
.
assertRaises
(
ValueError
,
theano
.
grad
,
y
,
x
,
g_cost
=
c1
)
s0
=
theano
.
shared
(
np
.
zeros
((),
dtype
=
'int8'
))
self
.
assertRaises
(
ValueError
,
theano
.
grad
,
y
,
x
,
g_cost
=
s0
)
def
test_downcast_dtype
(
self
):
def
test_downcast_dtype
(
self
):
# Test that the gradient of a cost wrt a float32 variable does not
# Test that the gradient of a cost wrt a float32 variable does not
# get upcasted to float64.
# get upcasted to float64.
...
@@ -418,6 +398,124 @@ class test_grad(unittest.TestCase):
...
@@ -418,6 +398,124 @@ class test_grad(unittest.TestCase):
# be downcasted to float32, so dc_dx should also be float32
# be downcasted to float32, so dc_dx should also be float32
assert
dc_dx
.
dtype
==
'float32'
assert
dc_dx
.
dtype
==
'float32'
def
test_grad_constant
(
self
):
# Test that the gradient handles Constants and consider_constant variables
# consistently
x
=
theano
.
tensor
.
scalar
()
y
=
theano
.
tensor
.
scalar
()
z_x
=
x
+
y
z_one
=
one
+
y
g_x
=
theano
.
tensor
.
grad
(
z_x
,
x
,
consider_constant
=
[
x
])
g_one
=
theano
.
tensor
.
grad
(
z_one
,
one
)
f
=
theano
.
function
([
x
,
y
],[
g_x
,
g_one
])
g_x
,
g_one
=
f
(
1
,
.
5
)
if
not
np
.
allclose
(
g_x
,
g_one
):
raise
AssertionError
(
"Gradient using consider constant is "
+
str
(
g_x
)
\
+
" but gradient with respect to the same Constant is "
+
\
str
(
g_one
))
def
test_known_grads
():
# Tests that the grad method with no known_grads
# matches what happens if you put its own known_grads
# in for each variable
full_range
=
theano
.
tensor
.
arange
(
10
)
x
=
theano
.
tensor
.
scalar
(
'x'
)
t
=
theano
.
tensor
.
iscalar
(
't'
)
ft
=
full_range
[
t
]
ft
.
name
=
'ft'
coeffs
=
theano
.
tensor
.
vector
(
'c'
)
ct
=
coeffs
[
t
]
ct
.
name
=
'ct'
p
=
x
**
ft
p
.
name
=
'p'
y
=
ct
*
p
y
.
name
=
'y'
cost
=
theano
.
tensor
.
sqr
(
y
)
cost
.
name
=
'cost'
layers
=
[
[
cost
],
[
y
],
[
ct
,
p
],
[
ct
,
x
,
ft
],
[
coeffs
,
t
,
full_range
,
x
]
]
inputs
=
[
coeffs
,
t
,
x
]
rng
=
np
.
random
.
RandomState
([
2012
,
11
,
15
])
values
=
[
rng
.
randn
(
10
),
rng
.
randint
(
10
),
rng
.
randn
()
]
values
=
[
np
.
cast
[
ipt
.
dtype
](
value
)
for
ipt
,
value
in
zip
(
inputs
,
values
)]
true_grads
=
theano
.
tensor
.
grad
(
cost
,
inputs
,
disconnected_inputs
=
'ignore'
)
true_grads
=
theano
.
function
(
inputs
,
true_grads
)
true_grads
=
true_grads
(
*
values
)
for
layer
in
layers
:
print
'Testing by separately computing '
,
layer
first
=
theano
.
tensor
.
grad
(
cost
,
layer
,
disconnected_inputs
=
'ignore'
)
known
=
dict
(
zip
(
layer
,
first
))
full
=
theano
.
tensor
.
grad
(
cost
=
None
,
known_grads
=
known
,
wrt
=
inputs
,
disconnected_inputs
=
'ignore'
)
full
=
theano
.
function
(
inputs
,
full
)
full
=
full
(
*
values
)
assert
len
(
true_grads
)
==
len
(
full
)
for
a
,
b
,
var
in
zip
(
true_grads
,
full
,
inputs
):
if
not
np
.
allclose
(
a
,
b
):
print
'Failure'
print
a
print
b
print
var
print
layer
for
v
in
known
:
print
v
,
':'
,
theano
.
function
(
inputs
,
known
[
v
])(
*
values
)
assert
False
def
test_dxdx
():
# Tests that the gradient of a scalar with respect to itself is 1
# I use an integer in this case because people keep changing this
# gradient to be 0 on integers but according to our interpretation
# of the gradient as defined in the Op contract, it should be 1.
# If you feel the need to change this unit test you are probably
# modifying the Op contract and should definitely get the approval
# of multiple people on theano-dev.
x
=
theano
.
tensor
.
iscalar
()
g
=
theano
.
tensor
.
grad
(
x
,
x
)
g
=
g
.
eval
({
x
:
12
})
assert
np
.
allclose
(
g
,
1.
)
def
test_known_grads_integers
():
# Tests that known_grads works on integers
x
=
theano
.
tensor
.
iscalar
()
g_expected
=
theano
.
tensor
.
scalar
()
g_grad
=
theano
.
gradient
.
grad
(
cost
=
None
,
known_grads
=
{
x
:
g_expected
},
wrt
=
x
)
f
=
theano
.
function
([
g_expected
],
g_grad
)
x
=
-
3
gv
=
np
.
cast
[
theano
.
config
.
floatX
](
.
6
)
g_actual
=
f
(
gv
)
assert
np
.
allclose
(
g_actual
,
gv
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
theano/tests/test_rop.py
浏览文件 @
40bbb7da
...
@@ -341,15 +341,9 @@ class test_RopLop(RopLop_checker):
...
@@ -341,15 +341,9 @@ class test_RopLop(RopLop_checker):
rop_out2
=
tensor
.
Rop
((
m
,
v
,
m
+
v
),
[
m
,
v
],
[
m_
,
v_
])
rop_out2
=
tensor
.
Rop
((
m
,
v
,
m
+
v
),
[
m
,
v
],
[
m_
,
v_
])
assert
isinstance
(
rop_out2
,
tuple
)
assert
isinstance
(
rop_out2
,
tuple
)
assert
len
(
rop_out2
)
==
3
assert
len
(
rop_out2
)
==
3
lop_out1
=
tensor
.
Lop
([
m
,
v
,
m
+
v
],
(
m
,
v
),
[
m_
,
v_
])
assert
isinstance
(
lop_out1
,
tuple
)
assert
len
(
lop_out1
)
==
2
lop_out2
=
tensor
.
Lop
((
m
,
v
,
m
+
v
),
[
m
,
v
],
[
m_
,
v_
])
assert
isinstance
(
lop_out2
,
list
)
assert
len
(
lop_out2
)
==
2
all_outs
=
[]
all_outs
=
[]
for
o
in
rop_out1
,
rop_out2
,
lop_out1
,
lop_out2
:
for
o
in
rop_out1
,
rop_out2
:
all_outs
.
extend
(
o
)
all_outs
.
extend
(
o
)
f
=
theano
.
function
([
m
,
v
,
m_
,
v_
],
all_outs
)
f
=
theano
.
function
([
m
,
v
,
m_
,
v_
],
all_outs
)
f
(
mval
,
vval
,
m_val
,
v_val
)
f
(
mval
,
vval
,
m_val
,
v_val
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论