Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
475e8ad9
提交
475e8ad9
authored
7月 08, 2013
作者:
Frederic
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
pep8
上级
c3d4ad81
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
52 行增加
和
50 行删除
+52
-50
gradient.py
theano/gradient.py
+49
-48
test_linalg.py
theano/sandbox/linalg/tests/test_linalg.py
+3
-2
没有找到文件。
theano/gradient.py
浏览文件 @
475e8ad9
...
@@ -78,10 +78,9 @@ def grad_not_implemented(op, x_pos, x, comment=""):
...
@@ -78,10 +78,9 @@ def grad_not_implemented(op, x_pos, x, comment=""):
gradient is not implemented.
gradient is not implemented.
"""
"""
return
(
NullType
(
return
(
NullType
((
(
"This variable is Null because the grad method for "
"This variable is Null because the grad method for "
"input
%
s (
%
s) of the
%
s op is not implemented.
%
s"
"input
%
s (
%
s) of the
%
s op is not implemented.
%
s"
)
%
(
x_pos
,
x
,
op
,
comment
)))()
)
%
(
x_pos
,
x
,
op
,
comment
)))()
...
@@ -341,8 +340,8 @@ def Lop(f, wrt, eval_points, consider_constant=None,
...
@@ -341,8 +340,8 @@ def Lop(f, wrt, eval_points, consider_constant=None,
known
=
dict
(
izip
(
f
,
grads
))
known
=
dict
(
izip
(
f
,
grads
))
ret
=
grad
(
cost
=
None
,
known_grads
=
known
,
ret
=
grad
(
cost
=
None
,
known_grads
=
known
,
consider_constant
=
consider_constant
,
wrt
=
wrt
,
consider_constant
=
consider_constant
,
wrt
=
wrt
,
disconnected_inputs
=
disconnected_inputs
)
disconnected_inputs
=
disconnected_inputs
)
return
format_as
(
using_list
,
using_tuple
,
ret
)
return
format_as
(
using_list
,
using_tuple
,
ret
)
...
@@ -352,8 +351,8 @@ def Lop(f, wrt, eval_points, consider_constant=None,
...
@@ -352,8 +351,8 @@ def Lop(f, wrt, eval_points, consider_constant=None,
#########################
#########################
def
grad
(
cost
,
wrt
,
consider_constant
=
None
,
def
grad
(
cost
,
wrt
,
consider_constant
=
None
,
disconnected_inputs
=
'raise'
,
add_names
=
True
,
disconnected_inputs
=
'raise'
,
add_names
=
True
,
known_grads
=
None
,
return_disconnected
=
'zero'
):
known_grads
=
None
,
return_disconnected
=
'zero'
):
"""
"""
:type cost: Scalar (0-dimensional) Variable.
:type cost: Scalar (0-dimensional) Variable.
May optionally be None if known_grads is provided.
May optionally be None if known_grads is provided.
...
@@ -406,17 +405,16 @@ def grad(cost, wrt, consider_constant=None,
...
@@ -406,17 +405,16 @@ def grad(cost, wrt, consider_constant=None,
if
cost
is
not
None
and
isinstance
(
cost
.
type
,
NullType
):
if
cost
is
not
None
and
isinstance
(
cost
.
type
,
NullType
):
raise
ValueError
(
"Can't differentiate a NaN cost."
raise
ValueError
(
"Can't differentiate a NaN cost."
"cost is NaN because "
+
\
"cost is NaN because "
+
cost
.
type
.
why_null
)
cost
.
type
.
why_null
)
if
cost
is
not
None
and
cost
.
ndim
!=
0
:
if
cost
is
not
None
and
cost
.
ndim
!=
0
:
raise
TypeError
(
"cost must be a scalar."
)
raise
TypeError
(
"cost must be a scalar."
)
if
isinstance
(
wrt
,
set
):
if
isinstance
(
wrt
,
set
):
raise
TypeError
(
"wrt must not be a set. sets have no defined "
raise
TypeError
(
"wrt must not be a set. sets have no defined "
"iteration order, so we can't return gradients in a matching
"
"iteration order, so we can't return gradients in a
"
"
order."
)
" matching
order."
)
using_list
=
isinstance
(
wrt
,
list
)
using_list
=
isinstance
(
wrt
,
list
)
using_tuple
=
isinstance
(
wrt
,
tuple
)
using_tuple
=
isinstance
(
wrt
,
tuple
)
...
@@ -426,7 +424,7 @@ def grad(cost, wrt, consider_constant=None,
...
@@ -426,7 +424,7 @@ def grad(cost, wrt, consider_constant=None,
for
elem
in
wrt
:
for
elem
in
wrt
:
if
not
isinstance
(
elem
,
Variable
):
if
not
isinstance
(
elem
,
Variable
):
raise
TypeError
(
"Expected Variable, got "
+
str
(
elem
)
+
raise
TypeError
(
"Expected Variable, got "
+
str
(
elem
)
+
" of type "
+
str
(
type
(
elem
)))
" of type "
+
str
(
type
(
elem
)))
outputs
=
[]
outputs
=
[]
if
cost
is
not
None
:
if
cost
is
not
None
:
...
@@ -435,7 +433,7 @@ def grad(cost, wrt, consider_constant=None,
...
@@ -435,7 +433,7 @@ def grad(cost, wrt, consider_constant=None,
outputs
.
extend
(
known_grads
.
keys
())
outputs
.
extend
(
known_grads
.
keys
())
var_to_app_to_idx
=
_populate_var_to_app_to_idx
(
var_to_app_to_idx
=
_populate_var_to_app_to_idx
(
outputs
,
wrt
,
consider_constant
)
outputs
,
wrt
,
consider_constant
)
# build a dict mapping var to the gradient of cost with respect to var
# build a dict mapping var to the gradient of cost with respect to var
grad_dict
=
OrderedDict
()
grad_dict
=
OrderedDict
()
...
@@ -452,7 +450,8 @@ def grad(cost, wrt, consider_constant=None,
...
@@ -452,7 +450,8 @@ def grad(cost, wrt, consider_constant=None,
# g_cost may be Disconnected or NullType. A creative use of the function,
# g_cost may be Disconnected or NullType. A creative use of the function,
# sure, but nonetheless one we can and should support. So before we try
# sure, but nonetheless one we can and should support. So before we try
# to cast it make sure it even has a dtype
# to cast it make sure it even has a dtype
if
hasattr
(
g_cost
.
type
,
'dtype'
)
and
cost
.
type
.
dtype
not
in
tensor
.
discrete_dtypes
:
if
(
hasattr
(
g_cost
.
type
,
'dtype'
)
and
cost
.
type
.
dtype
not
in
tensor
.
discrete_dtypes
):
# Here we enforce the constraint that floating point variables have
# Here we enforce the constraint that floating point variables have
# the same dtype as their gradient.
# the same dtype as their gradient.
g_cost
=
g_cost
.
astype
(
cost
.
type
.
dtype
)
g_cost
=
g_cost
.
astype
(
cost
.
type
.
dtype
)
...
@@ -471,8 +470,8 @@ def grad(cost, wrt, consider_constant=None,
...
@@ -471,8 +470,8 @@ def grad(cost, wrt, consider_constant=None,
'Ambiguous whether
%
s should be made into tensor'
'Ambiguous whether
%
s should be made into tensor'
' or sparse theano variable'
%
str
(
type
(
g_var
)))
' or sparse theano variable'
%
str
(
type
(
g_var
)))
if
not
isinstance
(
g_var
.
type
,
(
NullType
,
DisconnectedType
))
and
'float'
\
if
(
not
isinstance
(
g_var
.
type
,
(
NullType
,
DisconnectedType
))
and
not
in
str
(
g_var
.
type
.
dtype
):
'float'
not
in
str
(
g_var
.
type
.
dtype
)
):
raise
TypeError
(
"Gradients must always be NullType, "
raise
TypeError
(
"Gradients must always be NullType, "
"DisconnectedType, or continuous, but grad was "
"DisconnectedType, or continuous, but grad was "
"given a known_grad of type "
+
str
(
g_var
.
type
))
"given a known_grad of type "
+
str
(
g_var
.
type
))
...
@@ -728,11 +727,13 @@ def _populate_var_to_app_to_idx(outputs, wrt, consider_constant):
...
@@ -728,11 +727,13 @@ def _populate_var_to_app_to_idx(outputs, wrt, consider_constant):
return
var_to_app_to_idx
return
var_to_app_to_idx
class
NullTypeGradError
(
TypeError
):
class
NullTypeGradError
(
TypeError
):
"""
"""
Raised when grad encounters a NullType.
Raised when grad encounters a NullType.
"""
"""
class
DisconnectedInputError
(
ValueError
):
class
DisconnectedInputError
(
ValueError
):
"""
"""
Raised when grad is asked to compute the gradient
Raised when grad is asked to compute the gradient
...
@@ -740,8 +741,9 @@ class DisconnectedInputError(ValueError):
...
@@ -740,8 +741,9 @@ class DisconnectedInputError(ValueError):
disconnected_inputs='raise'.
disconnected_inputs='raise'.
"""
"""
def
_populate_grad_dict
(
var_to_app_to_idx
,
def
_populate_grad_dict
(
var_to_app_to_idx
,
grad_dict
,
wrt
,
cost_name
=
None
):
grad_dict
,
wrt
,
cost_name
=
None
):
"""
"""
Helper function for grad function.
Helper function for grad function.
...
@@ -783,7 +785,7 @@ def _populate_grad_dict(var_to_app_to_idx,
...
@@ -783,7 +785,7 @@ def _populate_grad_dict(var_to_app_to_idx,
# list of bools indicating if each output is connected to the cost
# list of bools indicating if each output is connected to the cost
outputs_connected
=
[
not
isinstance
(
g
.
type
,
DisconnectedType
)
outputs_connected
=
[
not
isinstance
(
g
.
type
,
DisconnectedType
)
for
g
in
output_grads
]
for
g
in
output_grads
]
connection_pattern
=
_node_to_pattern
(
node
)
connection_pattern
=
_node_to_pattern
(
node
)
...
@@ -840,7 +842,7 @@ def _populate_grad_dict(var_to_app_to_idx,
...
@@ -840,7 +842,7 @@ def _populate_grad_dict(var_to_app_to_idx,
# each destroyed input.
# each destroyed input.
try
:
try
:
dinputs
=
[
node
.
inputs
[
x
[
0
]]
for
x
in
dinputs
=
[
node
.
inputs
[
x
[
0
]]
for
x
in
node
.
op
.
destroy_map
.
values
()]
node
.
op
.
destroy_map
.
values
()]
except
AttributeError
:
except
AttributeError
:
dinputs
=
[]
dinputs
=
[]
...
@@ -899,11 +901,11 @@ def _populate_grad_dict(var_to_app_to_idx,
...
@@ -899,11 +901,11 @@ def _populate_grad_dict(var_to_app_to_idx,
if
input_grads
is
None
:
if
input_grads
is
None
:
raise
TypeError
(
"
%
s.grad returned NoneType, "
raise
TypeError
(
"
%
s.grad returned NoneType, "
"expected iterable."
%
str
(
node
.
op
))
"expected iterable."
%
str
(
node
.
op
))
if
len
(
input_grads
)
!=
len
(
inputs
):
if
len
(
input_grads
)
!=
len
(
inputs
):
raise
ValueError
((
"
%
s returned the wrong number of"
+
\
raise
ValueError
((
"
%
s returned the wrong number of"
+
" gradient terms."
)
%
str
(
node
.
op
))
" gradient terms."
)
%
str
(
node
.
op
))
# must convert to list in case the op returns a tuple
# must convert to list in case the op returns a tuple
# we won't be able to post-process out the Nones if it does that
# we won't be able to post-process out the Nones if it does that
...
@@ -926,7 +928,7 @@ def _populate_grad_dict(var_to_app_to_idx,
...
@@ -926,7 +928,7 @@ def _populate_grad_dict(var_to_app_to_idx,
# used to mean undefined, zero, or disconnected.
# used to mean undefined, zero, or disconnected.
# We therefore don't allow it because its usage has become
# We therefore don't allow it because its usage has become
# so muddied.
# so muddied.
raise
TypeError
((
'
%
s.grad returned None for'
+
\
raise
TypeError
((
'
%
s.grad returned None for'
+
' a gradient term, '
' a gradient term, '
'this is prohibited. Instead of None,'
'this is prohibited. Instead of None,'
'return zeros_like(input), DisconnectedType()(),'
'return zeros_like(input), DisconnectedType()(),'
...
@@ -964,7 +966,7 @@ def _populate_grad_dict(var_to_app_to_idx,
...
@@ -964,7 +966,7 @@ def _populate_grad_dict(var_to_app_to_idx,
msg
+=
"verifiably zeros."
msg
+=
"verifiably zeros."
msg
=
msg
%
(
str
(
node
.
op
),
str
(
term
),
msg
=
msg
%
(
str
(
node
.
op
),
str
(
term
),
str
(
type
(
term
)),
i
)
str
(
type
(
term
)),
i
)
if
is_zero
==
'no'
:
if
is_zero
==
'no'
:
msg
=
"
%
s.grad returned
%
s of type
%
s for input"
msg
=
"
%
s.grad returned
%
s of type
%
s for input"
...
@@ -980,8 +982,8 @@ def _populate_grad_dict(var_to_app_to_idx,
...
@@ -980,8 +982,8 @@ def _populate_grad_dict(var_to_app_to_idx,
#Check that op.connection_pattern matches the connectivity
#Check that op.connection_pattern matches the connectivity
#logic driving the op.grad method
#logic driving the op.grad method
for
i
,
packed
in
\
for
i
,
packed
in
enumerate
(
zip
(
inputs
,
input_grads
,
enumerate
(
zip
(
inputs
,
input_grads
,
inputs_connected
)):
inputs_connected
)):
ipt
,
ig
,
connected
=
packed
ipt
,
ig
,
connected
=
packed
actually_connected
=
\
actually_connected
=
\
not
isinstance
(
ig
.
type
,
DisconnectedType
)
not
isinstance
(
ig
.
type
,
DisconnectedType
)
...
@@ -1027,11 +1029,11 @@ def _populate_grad_dict(var_to_app_to_idx,
...
@@ -1027,11 +1029,11 @@ def _populate_grad_dict(var_to_app_to_idx,
if
not
isinstance
(
term
,
gof
.
Variable
):
if
not
isinstance
(
term
,
gof
.
Variable
):
raise
TypeError
(
"
%
s.grad returned
%
s, expected"
raise
TypeError
(
"
%
s.grad returned
%
s, expected"
" Variable instance."
%
(
str
(
node
.
op
),
" Variable instance."
%
(
str
(
node
.
op
),
type
(
term
)))
type
(
term
)))
if
isinstance
(
term
.
type
,
NullType
):
if
isinstance
(
term
.
type
,
NullType
):
raise
NullTypeGradError
(
"tensor.grad "
raise
NullTypeGradError
(
"tensor.grad "
"encountered a NaN. "
+
\
"encountered a NaN. "
+
term
.
type
.
why_null
)
term
.
type
.
why_null
)
#Don't try to sum up DisconnectedType placeholders
#Don't try to sum up DisconnectedType placeholders
...
@@ -1121,9 +1123,9 @@ class numeric_grad(object):
...
@@ -1121,9 +1123,9 @@ class numeric_grad(object):
# For now, we use a heuristic that catches very bad gradients, but is not
# For now, we use a heuristic that catches very bad gradients, but is not
# perfectly accurate.
# perfectly accurate.
type_eps
=
{
'float64'
:
1e-7
,
type_eps
=
{
'float64'
:
1e-7
,
'float32'
:
3e-4
,
'float32'
:
3e-4
,
numpy
.
dtype
(
'float64'
):
1e-7
,
numpy
.
dtype
(
'float64'
):
1e-7
,
numpy
.
dtype
(
'float32'
):
3e-4
}
numpy
.
dtype
(
'float32'
):
3e-4
}
def
__init__
(
self
,
f
,
pt
,
eps
=
None
,
out_type
=
None
):
def
__init__
(
self
,
f
,
pt
,
eps
=
None
,
out_type
=
None
):
"""Return the gradient of f at pt.
"""Return the gradient of f at pt.
...
@@ -1243,15 +1245,13 @@ class numeric_grad(object):
...
@@ -1243,15 +1245,13 @@ class numeric_grad(object):
"""
"""
if
len
(
g_pt
)
!=
len
(
self
.
gf
):
if
len
(
g_pt
)
!=
len
(
self
.
gf
):
raise
ValueError
(
raise
ValueError
(
'argument has wrong number of elements'
,
'argument has wrong number of elements'
,
len
(
g_pt
))
len
(
g_pt
))
errs
=
[]
errs
=
[]
for
i
,
(
a
,
b
)
in
enumerate
(
zip
(
g_pt
,
self
.
gf
)):
for
i
,
(
a
,
b
)
in
enumerate
(
zip
(
g_pt
,
self
.
gf
)):
if
a
.
shape
!=
b
.
shape
:
if
a
.
shape
!=
b
.
shape
:
raise
ValueError
(
raise
ValueError
(
'argument element
%
i has wrong shape
%
s'
%
(
'argument element
%
i has wrong shape
%
s'
%
(
i
,
str
((
a
.
shape
,
b
.
shape
))))
i
,
str
((
a
.
shape
,
b
.
shape
))))
errs
.
append
(
numeric_grad
.
abs_rel_err
(
a
,
b
))
errs
.
append
(
numeric_grad
.
abs_rel_err
(
a
,
b
))
return
errs
return
errs
...
@@ -1368,11 +1368,12 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
...
@@ -1368,11 +1368,12 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
def
function
(
inputs
,
output
):
def
function
(
inputs
,
output
):
if
mode
is
None
:
if
mode
is
None
:
f
=
compile
.
function
(
inputs
,
output
,
accept_inplace
=
True
,
f
=
compile
.
function
(
inputs
,
output
,
accept_inplace
=
True
,
allow_input_downcast
=
True
,
on_unused_input
=
'ignore'
)
allow_input_downcast
=
True
,
on_unused_input
=
'ignore'
)
else
:
else
:
f
=
compile
.
function
(
inputs
,
output
,
accept_inplace
=
True
,
f
=
compile
.
function
(
inputs
,
output
,
accept_inplace
=
True
,
allow_input_downcast
=
True
,
mode
=
mode
,
allow_input_downcast
=
True
,
mode
=
mode
,
on_unused_input
=
'ignore'
)
on_unused_input
=
'ignore'
)
return
f
return
f
tensor_pt
=
[
TensorType
(
tensor_pt
=
[
TensorType
(
...
@@ -1525,9 +1526,9 @@ def jacobian(expression, wrt, consider_constant=None,
...
@@ -1525,9 +1526,9 @@ def jacobian(expression, wrt, consider_constant=None,
rvals
=
[]
rvals
=
[]
for
inp
in
args
[
2
:]:
for
inp
in
args
[
2
:]:
rval
=
grad
(
expr
[
idx
],
rval
=
grad
(
expr
[
idx
],
inp
,
inp
,
consider_constant
=
consider_constant
,
consider_constant
=
consider_constant
,
disconnected_inputs
=
disconnected_inputs
)
disconnected_inputs
=
disconnected_inputs
)
rvals
.
append
(
rval
)
rvals
.
append
(
rval
)
return
rvals
return
rvals
# Computing the gradients does not affect the random seeds on any random
# Computing the gradients does not affect the random seeds on any random
...
@@ -1535,8 +1536,8 @@ def jacobian(expression, wrt, consider_constant=None,
...
@@ -1535,8 +1536,8 @@ def jacobian(expression, wrt, consider_constant=None,
# just backtracking over old values. (rp Jan 2012 - if anyone has a
# just backtracking over old values. (rp Jan 2012 - if anyone has a
# counter example please show me)
# counter example please show me)
jacobs
,
updates
=
theano
.
scan
(
inner_function
,
jacobs
,
updates
=
theano
.
scan
(
inner_function
,
sequences
=
arange
(
expression
.
shape
[
0
]),
sequences
=
arange
(
expression
.
shape
[
0
]),
non_sequences
=
[
expression
]
+
wrt
)
non_sequences
=
[
expression
]
+
wrt
)
assert
not
updates
,
\
assert
not
updates
,
\
(
"Scan has returned a list of updates. This should not "
(
"Scan has returned a list of updates. This should not "
"happen! Report this to theano-users (also include the "
"happen! Report this to theano-users (also include the "
...
@@ -1545,7 +1546,7 @@ def jacobian(expression, wrt, consider_constant=None,
...
@@ -1545,7 +1546,7 @@ def jacobian(expression, wrt, consider_constant=None,
def
hessian
(
cost
,
wrt
,
consider_constant
=
None
,
def
hessian
(
cost
,
wrt
,
consider_constant
=
None
,
disconnected_inputs
=
'raise'
):
disconnected_inputs
=
'raise'
):
"""
"""
:type cost: Scalar (0-dimensional) Variable.
:type cost: Scalar (0-dimensional) Variable.
:type wrt: Vector (1-dimensional tensor) 'Variable' or list of
:type wrt: Vector (1-dimensional tensor) 'Variable' or list of
...
...
theano/sandbox/linalg/tests/test_linalg.py
浏览文件 @
475e8ad9
...
@@ -204,8 +204,8 @@ def test_rop_lop():
...
@@ -204,8 +204,8 @@ def test_rop_lop():
rop_f
=
function
([
mx
,
mv
],
yv
)
rop_f
=
function
([
mx
,
mv
],
yv
)
sy
,
_
=
theano
.
scan
(
lambda
i
,
y
,
x
,
v
:
(
tensor
.
grad
(
y
[
i
],
x
)
*
v
)
.
sum
(),
sy
,
_
=
theano
.
scan
(
lambda
i
,
y
,
x
,
v
:
(
tensor
.
grad
(
y
[
i
],
x
)
*
v
)
.
sum
(),
sequences
=
tensor
.
arange
(
y
.
shape
[
0
]),
sequences
=
tensor
.
arange
(
y
.
shape
[
0
]),
non_sequences
=
[
y
,
mx
,
mv
])
non_sequences
=
[
y
,
mx
,
mv
])
scan_f
=
function
([
mx
,
mv
],
sy
)
scan_f
=
function
([
mx
,
mv
],
sy
)
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
...
@@ -561,6 +561,7 @@ class test_Eigh(test_Eig):
...
@@ -561,6 +561,7 @@ class test_Eigh(test_Eig):
class
test_Eigh_float32
(
test_Eigh
):
class
test_Eigh_float32
(
test_Eigh
):
dtype
=
'float32'
dtype
=
'float32'
def
test_matrix_inverse_solve
():
def
test_matrix_inverse_solve
():
if
not
imported_scipy
:
if
not
imported_scipy
:
raise
SkipTest
(
"Scipy needed for the Solve op."
)
raise
SkipTest
(
"Scipy needed for the Solve op."
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论