Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
d8c5ab33
提交
d8c5ab33
authored
11月 29, 2011
作者:
Razvan Pascanu
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #239 from goodfeli/to_rebase
rebased Razvan's Rop/Lop pull request It was checked by me and Ian.
上级
2cdfc327
8e0301c5
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
74 行增加
和
64 行删除
+74
-64
tensor_grad.py
theano/tensor/tensor_grad.py
+74
-64
没有找到文件。
theano/tensor/tensor_grad.py
浏览文件 @
d8c5ab33
"""Driver for gradient calculations."""
"""Driver for gradient calculations."""
__authors__
=
"James Bergstra, Razvan Pascanu"
__authors__
=
"James Bergstra, Razvan Pascanu"
__copyright__
=
"(c) 2011, Universite de Montreal"
__copyright__
=
"(c) 2011, Universite de Montreal"
__license__
=
"3-clause BSD License"
__license__
=
"3-clause BSD License"
__contact__
=
"theano-dev <theano-dev@googlegroups.com>"
__contact__
=
"theano-dev <theano-dev@googlegroups.com>"
__docformat__
=
"restructuredtext en"
__docformat__
=
"restructuredtext en"
...
@@ -11,7 +11,7 @@ import __builtin__
...
@@ -11,7 +11,7 @@ import __builtin__
import
logging
import
logging
import
warnings
import
warnings
import
numpy
#
for numeric_grad
import
numpy
#
for numeric_grad
import
theano
import
theano
from
theano.tensor
import
TensorType
,
TensorVariable
,
ones_like
,
\
from
theano.tensor
import
TensorType
,
TensorVariable
,
ones_like
,
\
...
@@ -26,6 +26,7 @@ _logger = logging.getLogger('theano.tensor.tensor_grad')
...
@@ -26,6 +26,7 @@ _logger = logging.getLogger('theano.tensor.tensor_grad')
# R Operator
# R Operator
########################
########################
def
Rop
(
f
,
wrt
,
eval_points
):
def
Rop
(
f
,
wrt
,
eval_points
):
"""
"""
Computes the R operation on `f` wrt to `wrt` evaluated at points given
Computes the R operation on `f` wrt to `wrt` evaluated at points given
...
@@ -50,16 +51,16 @@ def Rop(f, wrt, eval_points):
...
@@ -50,16 +51,16 @@ def Rop(f, wrt, eval_points):
If `wrt` is a list/tuple, then return a list/tuple with the results.
If `wrt` is a list/tuple, then return a list/tuple with the results.
"""
"""
using_list
=
isinstance
(
wrt
,
list
)
using_list
=
isinstance
(
f
,
list
)
using_tuple
=
isinstance
(
wrt
,
tuple
)
using_tuple
=
isinstance
(
f
,
tuple
)
if
not
(
using_list
or
using_tuple
):
if
not
isinstance
(
wrt
,
(
list
,
tuple
)
):
wrt
=
[
wrt
]
wrt
=
[
wrt
]
if
not
isinstance
(
eval_points
,
(
list
,
tuple
)):
if
not
isinstance
(
eval_points
,
(
list
,
tuple
)):
eval_points
=
[
eval_points
]
eval_points
=
[
eval_points
]
if
not
isinstance
(
f
,
(
list
,
tuple
)):
if
not
isinstance
(
f
,
(
list
,
tuple
)):
f
=
[
f
]
f
=
[
f
]
assert
len
(
wrt
)
==
len
(
eval_points
)
assert
len
(
wrt
)
==
len
(
eval_points
)
...
@@ -84,7 +85,7 @@ def Rop(f, wrt, eval_points):
...
@@ -84,7 +85,7 @@ def Rop(f, wrt, eval_points):
if
node
is
None
:
if
node
is
None
:
return
None
return
None
else
:
else
:
op
=
node
.
op
op
=
node
.
op
inputs
=
node
.
inputs
inputs
=
node
.
inputs
if
not
hasattr
(
op
,
'R_op'
):
if
not
hasattr
(
op
,
'R_op'
):
raise
Exception
((
' R_op was not implemented for
%
s'
raise
Exception
((
' R_op was not implemented for
%
s'
...
@@ -95,23 +96,24 @@ def Rop(f, wrt, eval_points):
...
@@ -95,23 +96,24 @@ def Rop(f, wrt, eval_points):
local_eval_points
=
[]
local_eval_points
=
[]
for
inp
in
inputs
:
for
inp
in
inputs
:
if
inp
in
wrt
:
if
inp
in
wrt
:
local_eval_points
.
append
(
eval_points
[
wrt
.
index
(
inp
)]
)
local_eval_points
.
append
(
eval_points
[
wrt
.
index
(
inp
)]
)
elif
inp
.
owner
is
None
:
elif
inp
.
owner
is
None
:
local_eval_points
.
append
(
zeros_like
(
inp
)
)
local_eval_points
.
append
(
zeros_like
(
inp
)
)
elif
inp
.
owner
in
seen_nodes
:
elif
inp
.
owner
in
seen_nodes
:
local_eval_points
.
append
(
local_eval_points
.
append
(
seen_nodes
[
inp
.
owner
][
inp
.
owner
.
outputs
.
index
(
inp
)
]
)
seen_nodes
[
inp
.
owner
][
inp
.
owner
.
outputs
.
index
(
inp
)
]
)
else
:
else
:
# We actually need to compute the R_op for this node
# We actually need to compute the R_op for this node
_traverse
(
inp
.
owner
)
_traverse
(
inp
.
owner
)
local_eval_points
.
append
(
local_eval_points
.
append
(
seen_nodes
[
inp
.
owner
][
inp
.
owner
.
outputs
.
index
(
inp
)
])
seen_nodes
[
inp
.
owner
][
inp
.
owner
.
outputs
.
index
(
inp
)])
for
x
,
y
in
zip
(
inputs
,
local_eval_points
):
for
x
,
y
in
zip
(
inputs
,
local_eval_points
):
if
y
is
not
None
:
if
y
is
not
None
:
assert
(
as_tensor_variable
(
x
)
.
type
==
as_tensor_variable
(
y
)
.
type
)
assert
(
as_tensor_variable
(
x
)
.
type
==
as_tensor_variable
(
y
)
.
type
)
seen_nodes
[
node
]
=
op
.
R_op
(
node
.
inputs
,
local_eval_points
)
seen_nodes
[
node
]
=
op
.
R_op
(
node
.
inputs
,
local_eval_points
)
return
None
return
None
...
@@ -123,12 +125,12 @@ def Rop(f, wrt, eval_points):
...
@@ -123,12 +125,12 @@ def Rop(f, wrt, eval_points):
rval
=
[]
rval
=
[]
for
out
in
f
:
for
out
in
f
:
if
out
in
wrt
:
if
out
in
wrt
:
rval
.
append
(
eval_points
[
wrt
.
index
(
out
)])
rval
.
append
(
eval_points
[
wrt
.
index
(
out
)])
elif
seen_nodes
[
out
.
owner
][
out
.
owner
.
outputs
.
index
(
out
)]
is
None
:
elif
seen_nodes
[
out
.
owner
][
out
.
owner
.
outputs
.
index
(
out
)]
is
None
:
raise
ValueError
((
'The function is not differentiable with '
raise
ValueError
((
'The function is not differentiable with '
'respect to the provided inputs !'
))
'respect to the provided inputs !'
))
else
:
else
:
rval
.
append
(
seen_nodes
[
out
.
owner
][
out
.
owner
.
outputs
.
index
(
out
)]
)
rval
.
append
(
seen_nodes
[
out
.
owner
][
out
.
owner
.
outputs
.
index
(
out
)])
if
len
(
rval
)
==
1
:
if
len
(
rval
)
==
1
:
if
using_list
:
if
using_list
:
...
@@ -170,14 +172,16 @@ def Lop(f, wrt, eval_points, consider_constant=None, warn_type=False,
...
@@ -170,14 +172,16 @@ def Lop(f, wrt, eval_points, consider_constant=None, warn_type=False,
consider_constant
=
[]
consider_constant
=
[]
if
not
isinstance
(
f
,
TensorVariable
):
if
not
isinstance
(
f
,
TensorVariable
):
raise
TypeError
(
'In tensor.Lop(), cost argument should be a TensorVariable.'
,
f
)
raise
TypeError
((
'In tensor.Lop(), cost argument should be '
'a TensorVariable.'
),
f
)
if
type
(
eval_points
)
not
in
(
list
,
tuple
):
if
type
(
eval_points
)
not
in
(
list
,
tuple
):
eval_points
=
[
eval_points
]
eval_points
=
[
eval_points
]
using_list
=
isinstance
(
f
,
list
)
using_list
=
isinstance
(
wrt
,
list
)
using_tuple
=
isinstance
(
f
,
tuple
)
using_tuple
=
isinstance
(
wrt
,
tuple
)
if
not
(
using_list
or
using_tuple
):
if
not
isinstance
(
f
,
(
list
,
tuple
)):
f
=
[
f
]
f
=
[
f
]
inputs
=
gof
.
graph
.
inputs
(
f
)
inputs
=
gof
.
graph
.
inputs
(
f
)
...
@@ -193,7 +197,8 @@ def Lop(f, wrt, eval_points, consider_constant=None, warn_type=False,
...
@@ -193,7 +197,8 @@ def Lop(f, wrt, eval_points, consider_constant=None, warn_type=False,
# such subtle cases can be fixed by a more careful implementation of the
# such subtle cases can be fixed by a more careful implementation of the
# gradient, but for now Theano needs to throw an exception, and make the
# gradient, but for now Theano needs to throw an exception, and make the
# user aware that it does not know how to compute that gradient
# user aware that it does not know how to compute that gradient
if
not
isinstance
(
wrt
,
(
list
,
tuple
)):
if
not
(
using_list
or
using_tuple
):
wrt
=
[
wrt
]
wrt
=
[
wrt
]
ret
=
[]
ret
=
[]
for
p
in
wrt
:
for
p
in
wrt
:
...
@@ -241,10 +246,11 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
...
@@ -241,10 +246,11 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
:type g_cost: Scalar `Variable`, or None
:type g_cost: Scalar `Variable`, or None
:param g_cost: an expression for the gradient through cost. The default is
:param g_cost: an expression for the gradient through cost. The default is
``ones_like(cost)``.
``ones_like(cost)``.
:param consider_constant: a list of expressions not to backpropagate through
:param consider_constant: a list of expressions not to backpropagate
through
:param warn_type: a value of True will cause warnings to be logged for any
Op that emits a
:param warn_type: a value of True will cause warnings to be logged for any
gradient that does not match its input type.
Op that emits a
gradient that does not match its input type.
:type disconnected_inputs: string
:type disconnected_inputs: string
:param disconnected_inputs: Defines the behaviour if some of the variables
:param disconnected_inputs: Defines the behaviour if some of the variables
...
@@ -285,7 +291,8 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
...
@@ -285,7 +291,8 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
if
not
isinstance
(
cost
,
TensorVariable
):
if
not
isinstance
(
cost
,
TensorVariable
):
raise
TypeError
(
'In tensor.grad(), cost argument should be a TensorVariable.'
,
cost
)
raise
TypeError
((
'In tensor.grad(), cost argument should be '
'a TensorVariable.'
),
cost
)
if
cost
.
type
.
ndim
:
if
cost
.
type
.
ndim
:
raise
TypeError
(
raise
TypeError
(
...
@@ -302,7 +309,6 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
...
@@ -302,7 +309,6 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
list
(
inputs
)
+
list
(
consider_constant
),
list
(
inputs
)
+
list
(
consider_constant
),
warn_type
=
warn_type
)
warn_type
=
warn_type
)
# Note : If p is not in gmap there can be several reasons, among which
# Note : If p is not in gmap there can be several reasons, among which
# is the fact that p might not be part of the computational graph. A
# is the fact that p might not be part of the computational graph. A
# simple example is that for a+b for e.g. a[0] is not part of the graph,
# simple example is that for a+b for e.g. a[0] is not part of the graph,
...
@@ -357,28 +363,29 @@ class numeric_grad(object):
...
@@ -357,28 +363,29 @@ class numeric_grad(object):
#
#
# There is a relationship between the step size and the function value and
# There is a relationship between the step size and the function value and
# the measurement error that is incurred due to rounding. The finite
# the measurement error that is incurred due to rounding. The finite
# difference we measure is delta = f(x0) - f(x0+eps)
# difference we measure is
# delta = f(x0) - f(x0+eps)
#
#
# For maximum precision, f should be close to zero.
# For maximum precision, f should be close to zero.
# For every power of 2 that f departs from zero, we lose a bit of
# For every power of 2 that f departs from zero, we lose a bit of
precision
#
precision
in delta.
# in delta.
#
#
# Even in this case of maximum accuracy, there is a tradeoff between
# Even in this case of maximum accuracy, there is a tradeoff between
# stepsize and measurement error. Taking small steps allows us to measure
# stepsize and measurement error.
# large derivatives accuractly, but longer steps are required to measure
# Taking small steps allows us to measure large derivatives accuractly,
# small derivatives accurately. However longer steps introduce bias into
# but longer steps are required to measure small derivatives accurately.
# our measurement in general for non-linear functions.
# However longer steps introduce bias into our measurement in general
# for non-linear functions.
#
#
# It would be interesting to have a version of numeric grad that used an
# It would be interesting to have a version of numeric grad that used an
# adaptive stepsize.
# adaptive stepsize.
#
#
# For now, we use a heuristic that catches very bad gradients, but is not
# For now, we use a heuristic that catches very bad gradients, but is not
# perfectly accurate.
# perfectly accurate.
type_eps
=
{
'float64'
:
1e-7
,
type_eps
=
{
'float64'
:
1e-7
,
'float32'
:
3e-4
,
'float32'
:
3e-4
,
numpy
.
dtype
(
'float64'
):
1e-7
,
numpy
.
dtype
(
'float64'
):
1e-7
,
numpy
.
dtype
(
'float32'
):
3e-4
}
numpy
.
dtype
(
'float32'
):
3e-4
}
def
__init__
(
self
,
f
,
pt
,
eps
=
None
):
def
__init__
(
self
,
f
,
pt
,
eps
=
None
):
"""Return the gradient of f at pt.
"""Return the gradient of f at pt.
...
@@ -413,13 +420,15 @@ class numeric_grad(object):
...
@@ -413,13 +420,15 @@ class numeric_grad(object):
dtypes
=
[
str
(
p
.
dtype
)
for
p
in
apt
]
dtypes
=
[
str
(
p
.
dtype
)
for
p
in
apt
]
# TODO: remove this eventually (why was this here in the first place ?)
# TODO: remove this eventually (why was this here in the first place ?)
# In the case of CSM, the arguments are a mixture of floats and integers...
# In the case of CSM, the arguments are a mixture of floats and
#if not dtypes == [dtypes[0]] * len(apt):
# integers...
#raise TypeError('All function arguments must have same dtype')
# if not dtypes == [dtypes[0]] * len(apt):
# raise TypeError('All function arguments must have same dtype')
total_size
=
__builtin__
.
sum
(
prod
(
sh
)
for
sh
in
shapes
)
total_size
=
__builtin__
.
sum
(
prod
(
sh
)
for
sh
in
shapes
)
working_dtype
=
__builtin__
.
min
((
self
.
type_eps
[
dt
],
dt
)
for
dt
in
dtypes
)[
1
]
working_dtype
=
__builtin__
.
min
((
self
.
type_eps
[
dt
],
dt
)
for
dt
in
dtypes
)[
1
]
#create un-initialized memory
#create un-initialized memory
x
=
numpy
.
ndarray
((
total_size
,),
dtype
=
working_dtype
)
x
=
numpy
.
ndarray
((
total_size
,),
dtype
=
working_dtype
)
...
@@ -428,16 +437,15 @@ class numeric_grad(object):
...
@@ -428,16 +437,15 @@ class numeric_grad(object):
if
eps
is
None
:
if
eps
is
None
:
eps
=
__builtin__
.
max
(
self
.
type_eps
[
dt
]
for
dt
in
dtypes
)
eps
=
__builtin__
.
max
(
self
.
type_eps
[
dt
]
for
dt
in
dtypes
)
#set up aliases so that apt[i] is backed by memory in x
#set up aliases so that apt[i] is backed by memory in x
# and self.gf is backed by memory in gx
# and self.gf is backed by memory in gx
cur_pos
=
0
cur_pos
=
0
self
.
gf
=
[]
self
.
gf
=
[]
for
i
,
p
in
enumerate
(
apt
):
for
i
,
p
in
enumerate
(
apt
):
p_size
=
prod
(
p
.
shape
)
p_size
=
prod
(
p
.
shape
)
# set up alias
# set up alias
apt
[
i
]
=
x
[
cur_pos
:
cur_pos
+
p_size
]
.
reshape
(
p
.
shape
)
apt
[
i
]
=
x
[
cur_pos
:
cur_pos
+
p_size
]
.
reshape
(
p
.
shape
)
self
.
gf
.
append
(
gx
[
cur_pos
:
cur_pos
+
p_size
]
.
reshape
(
p
.
shape
))
self
.
gf
.
append
(
gx
[
cur_pos
:
cur_pos
+
p_size
]
.
reshape
(
p
.
shape
))
# initialize with p's value
# initialize with p's value
apt
[
i
][
...
]
=
p
apt
[
i
][
...
]
=
p
cur_pos
+=
p_size
cur_pos
+=
p_size
...
@@ -452,7 +460,7 @@ class numeric_grad(object):
...
@@ -452,7 +460,7 @@ class numeric_grad(object):
x
[
i
]
+=
eps
x
[
i
]
+=
eps
f_eps
=
f
(
*
apt
)
f_eps
=
f
(
*
apt
)
gx
[
i
]
=
numpy
.
asarray
((
f_eps
-
f_x
)
/
eps
)
gx
[
i
]
=
numpy
.
asarray
((
f_eps
-
f_x
)
/
eps
)
if
packed_pt
:
if
packed_pt
:
self
.
gf
=
self
.
gf
[
0
]
self
.
gf
=
self
.
gf
[
0
]
...
@@ -536,7 +544,7 @@ class numeric_grad(object):
...
@@ -536,7 +544,7 @@ class numeric_grad(object):
def
verify_grad
(
fun
,
pt
,
n_tests
=
2
,
rng
=
None
,
eps
=
None
,
abs_tol
=
None
,
def
verify_grad
(
fun
,
pt
,
n_tests
=
2
,
rng
=
None
,
eps
=
None
,
abs_tol
=
None
,
rel_tol
=
None
,
mode
=
None
,
cast_to_output_type
=
False
):
rel_tol
=
None
,
mode
=
None
,
cast_to_output_type
=
False
):
""" Test a gradient by Finite Difference Method. Raise error on failure.
""" Test a gradient by Finite Difference Method. Raise error on failure.
Example:
Example:
...
@@ -571,7 +579,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
...
@@ -571,7 +579,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
there is an experimental verify_grad that covers that case as well by
there is an experimental verify_grad that covers that case as well by
using random projections.
using random projections.
"""
"""
assert
isinstance
(
pt
,
(
list
,
tuple
))
assert
isinstance
(
pt
,
(
list
,
tuple
))
pt
=
[
numpy
.
array
(
p
)
for
p
in
pt
]
pt
=
[
numpy
.
array
(
p
)
for
p
in
pt
]
for
i
,
p
in
enumerate
(
pt
):
for
i
,
p
in
enumerate
(
pt
):
...
@@ -579,7 +587,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
...
@@ -579,7 +587,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
raise
TypeError
((
'verify_grad can work only with floating point '
raise
TypeError
((
'verify_grad can work only with floating point '
'inputs, but input
%
i has dtype "
%
s".'
)
%
(
i
,
p
.
dtype
))
'inputs, but input
%
i has dtype "
%
s".'
)
%
(
i
,
p
.
dtype
))
_type_tol
=
dict
(
# relativ error tolerances for different types
_type_tol
=
dict
(
# relativ error tolerances for different types
float32
=
1e-2
,
float32
=
1e-2
,
float64
=
1e-4
)
float64
=
1e-4
)
...
@@ -589,10 +597,11 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
...
@@ -589,10 +597,11 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
rel_tol
=
__builtin__
.
max
(
_type_tol
[
str
(
p
.
dtype
)]
for
p
in
pt
)
rel_tol
=
__builtin__
.
max
(
_type_tol
[
str
(
p
.
dtype
)]
for
p
in
pt
)
if
rng
is
None
:
if
rng
is
None
:
raise
TypeError
(
'rng be instance of numpy.random.RandomState'
,
(
raise
TypeError
((
'rng should be a valid instance of '
' hint: Maybe you meant to call'
'numpy.random.RandomState. You may '
' theano.tests.unittest_tools.verify_grad instead of'
'want to use theano.tests.unittest'
' theano.tensor.verify_grad.'
))
'_tools.verify_grad instead of '
'theano.tensor.verify_grad.'
))
# We allow input downcast in function, because numeric_grad works in the
# We allow input downcast in function, because numeric_grad works in the
# most precise dtype used among the inputs, so we may need to cast some.
# most precise dtype used among the inputs, so we may need to cast some.
...
@@ -614,9 +623,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
...
@@ -614,9 +623,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
o_output
=
fun
(
*
tensor_pt
)
o_output
=
fun
(
*
tensor_pt
)
if
isinstance
(
o_output
,
list
):
if
isinstance
(
o_output
,
list
):
raise
NotImplementedError
(
'verify gradient on multiple outputs'
)
raise
NotImplementedError
((
'cant (yet) autotest gradient of fun '
# we could make loop over outputs making random projections R for
'with multiple outputs'
))
# each, but this doesn't handle the case where not all the outputs are
# we could make loop over outputs making random projections R for each,
# but this doesn't handle the case where not all the outputs are
# differentiable... so I leave this as TODO for now -JB.
# differentiable... so I leave this as TODO for now -JB.
o_fn
=
function
(
tensor_pt
,
o_output
)
o_fn
=
function
(
tensor_pt
,
o_output
)
...
@@ -630,15 +640,17 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
...
@@ -630,15 +640,17 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
# random_projection should not have elements too small,
# random_projection should not have elements too small,
# otherwise too much precision is lost in numerical gradient
# otherwise too much precision is lost in numerical gradient
def
random_projection
():
def
random_projection
():
plain
=
rng
.
rand
(
*
o_fn_out
.
shape
)
+
0.5
plain
=
rng
.
rand
(
*
o_fn_out
.
shape
)
+
0.5
if
cast_to_output_type
:
if
cast_to_output_type
:
return
numpy
.
array
(
plain
,
o_output
.
dtype
)
return
numpy
.
array
(
plain
,
o_output
.
dtype
)
return
plain
return
plain
t_r
=
shared
(
random_projection
())
t_r
=
shared
(
random_projection
())
#random projection of o onto t_r
# random projection of o onto t_r
# This sum() is defined above, it's not the builtin sum.
cost
=
theano
.
tensor
.
sum
(
t_r
*
o_output
)
cost
=
theano
.
tensor
.
sum
(
t_r
*
o_output
)
cost_fn
=
function
(
tensor_pt
,
cost
)
cost_fn
=
function
(
tensor_pt
,
cost
)
#todo-- determine if this is actually needed
#todo-- determine if this is actually needed
...
@@ -654,7 +666,6 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
...
@@ -654,7 +666,6 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
for
test_num
in
xrange
(
n_tests
):
for
test_num
in
xrange
(
n_tests
):
num_grad
=
numeric_grad
(
cost_fn
,
[
p
.
copy
()
for
p
in
pt
],
eps
)
num_grad
=
numeric_grad
(
cost_fn
,
[
p
.
copy
()
for
p
in
pt
],
eps
)
analytic_grad
=
grad_fn
(
*
[
p
.
copy
()
for
p
in
pt
])
analytic_grad
=
grad_fn
(
*
[
p
.
copy
()
for
p
in
pt
])
# Since `tensor_pt` is a list, `analytic_grad` should be one too.
# Since `tensor_pt` is a list, `analytic_grad` should be one too.
...
@@ -682,7 +693,6 @@ class GradientError(Exception):
...
@@ -682,7 +693,6 @@ class GradientError(Exception):
self
.
abs_tol
=
abs_tol
self
.
abs_tol
=
abs_tol
self
.
rel_tol
=
rel_tol
self
.
rel_tol
=
rel_tol
def
__str__
(
self
):
def
__str__
(
self
):
# args may have been inserted by e.g. makeTester
# args may have been inserted by e.g. makeTester
args_msg
=
", "
.
join
(
str
(
a
)
for
a
in
self
.
args
)
args_msg
=
", "
.
join
(
str
(
a
)
for
a
in
self
.
args
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论