Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
94a52915
提交
94a52915
authored
10月 22, 2011
作者:
James Bergstra
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'master' of github.com:Theano/Theano
上级
73103598
03e6e398
显示空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
190 行增加
和
78 行删除
+190
-78
debugmode.py
theano/compile/debugmode.py
+3
-0
ops.py
theano/sandbox/linalg/ops.py
+35
-19
test_linalg.py
theano/sandbox/linalg/tests/test_linalg.py
+68
-3
__init__.py
theano/sparse/__init__.py
+4
-1
tensor_grad.py
theano/tensor/tensor_grad.py
+80
-55
没有找到文件。
theano/compile/debugmode.py
浏览文件 @
94a52915
...
@@ -294,6 +294,9 @@ class BadOptimization(DebugModeError):
...
@@ -294,6 +294,9 @@ class BadOptimization(DebugModeError):
print
>>
sio
,
self
.
old_graph
print
>>
sio
,
self
.
old_graph
print
>>
sio
,
" New Graph:"
print
>>
sio
,
" New Graph:"
print
>>
sio
,
self
.
new_graph
print
>>
sio
,
self
.
new_graph
print
>>
sio
,
""
print
>>
sio
,
"Hint: relax the tolerance by setting tensor.cmp_sloppy=1"
print
>>
sio
,
" or even tensor.cmp_sloppy=2 for less-strict comparison"
return
sio
.
getvalue
()
return
sio
.
getvalue
()
class
BadDestroyMap
(
DebugModeError
):
class
BadDestroyMap
(
DebugModeError
):
...
...
theano/sandbox/linalg/ops.py
浏览文件 @
94a52915
import
logging
logger
=
logging
.
getLogger
(
__name__
)
import
numpy
import
numpy
from
theano.gof
import
Op
,
Apply
from
theano.gof
import
Op
,
Apply
...
@@ -57,6 +60,7 @@ def hints(variable):
...
@@ -57,6 +60,7 @@ def hints(variable):
@local_optimizer
([])
@local_optimizer
([])
def
remove_hint_nodes
(
node
):
def
remove_hint_nodes
(
node
):
if
is_hint_node
(
node
):
if
is_hint_node
(
node
):
# transfer hints from graph to Feature
try
:
try
:
for
k
,
v
in
node
.
op
.
hints
:
for
k
,
v
in
node
.
op
.
hints
:
node
.
env
.
hints_feature
.
add_hint
(
node
.
inputs
[
0
],
k
,
v
)
node
.
env
.
hints_feature
.
add_hint
(
node
.
inputs
[
0
],
k
,
v
)
...
@@ -95,7 +99,7 @@ class HintsFeature(object):
...
@@ -95,7 +99,7 @@ class HintsFeature(object):
"""
"""
def
add_hint
(
self
,
r
,
k
,
v
):
def
add_hint
(
self
,
r
,
k
,
v
):
print
'adding hint'
,
r
,
k
,
v
logger
.
debug
(
'adding hint;
%
s,
%
s,
%
s'
%
(
r
,
k
,
v
))
self
.
hints
[
r
][
k
]
=
v
self
.
hints
[
r
][
k
]
=
v
def
ensure_init_r
(
self
,
r
):
def
ensure_init_r
(
self
,
r
):
...
@@ -171,9 +175,8 @@ def is_positive(v):
...
@@ -171,9 +175,8 @@ def is_positive(v):
return
True
return
True
#TODO: how to handle this - a registry?
#TODO: how to handle this - a registry?
# infer_hints on Ops?
# infer_hints on Ops?
print
'is_positive'
,
v
logger
.
debug
(
'is_positive:
%
s'
%
str
(
v
))
if
v
.
owner
and
v
.
owner
.
op
==
tensor
.
pow
:
if
v
.
owner
and
v
.
owner
.
op
==
tensor
.
pow
:
print
'try for pow'
,
v
,
v
.
owner
.
inputs
try
:
try
:
exponent
=
tensor
.
get_constant_value
(
v
.
owner
.
inputs
[
1
])
exponent
=
tensor
.
get_constant_value
(
v
.
owner
.
inputs
[
1
])
except
TypeError
:
except
TypeError
:
...
@@ -250,7 +253,6 @@ def local_log_prod_sqr(node):
...
@@ -250,7 +253,6 @@ def local_log_prod_sqr(node):
# we cannot always make this substitution because
# we cannot always make this substitution because
# the prod might include negative terms
# the prod might include negative terms
p
=
x
.
owner
.
inputs
[
0
]
p
=
x
.
owner
.
inputs
[
0
]
print
"AAA"
,
p
# p is the matrix we're reducing with prod
# p is the matrix we're reducing with prod
if
is_positive
(
p
):
if
is_positive
(
p
):
...
@@ -316,7 +318,7 @@ class Cholesky(Op):
...
@@ -316,7 +318,7 @@ class Cholesky(Op):
destr
=
'destructive'
destr
=
'destructive'
else
:
else
:
destr
=
'non-destructive'
destr
=
'non-destructive'
return
'Cholesky{
%
s,
%
s}'
%
(
lu
,
destr
)
return
'Cholesky{
%
s,
%
s}'
%
(
lu
,
destr
)
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
x
=
as_tensor_variable
(
x
)
x
=
as_tensor_variable
(
x
)
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
...
@@ -378,7 +380,10 @@ class Solve(Op):
...
@@ -378,7 +380,10 @@ class Solve(Op):
def
make_node
(
self
,
A
,
b
):
def
make_node
(
self
,
A
,
b
):
A
=
as_tensor_variable
(
A
)
A
=
as_tensor_variable
(
A
)
b
=
as_tensor_variable
(
b
)
b
=
as_tensor_variable
(
b
)
return
Apply
(
self
,
[
A
,
b
],
[
b
.
type
()])
otype
=
tensor
.
tensor
(
broadcastable
=
b
.
broadcastable
,
dtype
=
(
A
*
b
)
.
dtype
)
return
Apply
(
self
,
[
A
,
b
],
[
otype
])
def
perform
(
self
,
node
,
inputs
,
output_storage
):
def
perform
(
self
,
node
,
inputs
,
output_storage
):
A
,
b
=
inputs
A
,
b
=
inputs
#TODO: use the A_structure to go faster
#TODO: use the A_structure to go faster
...
@@ -394,40 +399,46 @@ class ExtractDiag(Op):
...
@@ -394,40 +399,46 @@ class ExtractDiag(Op):
self
.
view
=
view
self
.
view
=
view
if
self
.
view
:
if
self
.
view
:
self
.
view_map
=
{
0
:[
0
]}
self
.
view_map
=
{
0
:[
0
]}
self
.
perform
=
self
.
perform_view
else
:
self
.
perform
=
self
.
perform_noview
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
and
self
.
view
==
other
.
view
return
type
(
self
)
==
type
(
other
)
and
self
.
view
==
other
.
view
def
__hash__
(
self
):
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
view
)
return
hash
(
type
(
self
))
^
hash
(
self
.
view
)
def
make_node
(
self
,
_x
):
def
make_node
(
self
,
_x
):
x
=
as_tensor_variable
(
_x
)
x
=
as_tensor_variable
(
_x
)
if
x
.
type
.
ndim
!=
2
:
if
x
.
type
.
ndim
!=
2
:
raise
TypeError
(
'ExtractDiag only works on matrices'
,
_x
)
raise
TypeError
(
'ExtractDiag only works on matrices'
,
_x
)
return
Apply
(
self
,
[
x
],
[
tensor
.
vector
(
dtype
=
x
.
type
.
dtype
)])
return
Apply
(
self
,
[
x
],
[
tensor
.
vector
(
dtype
=
x
.
type
.
dtype
)])
def
perform_noview
(
self
,
node
,
(
x
,),
(
z
,)):
def
perform
(
self
,
node
,
ins
,
outs
):
x
,
=
ins
z
,
=
outs
#for some reason numpy.diag(x) is really slow
#for some reason numpy.diag(x) is really slow
N
,
M
=
x
.
shape
N
,
M
=
x
.
shape
assert
N
==
M
assert
N
==
M
rval
=
x
[
0
]
rval
=
x
[
0
]
rval
.
strides
=
(
x
.
strides
[
0
]
+
x
.
strides
[
1
],)
rval
.
strides
=
(
x
.
strides
[
0
]
+
x
.
strides
[
1
],)
z
[
0
]
=
rval
.
copy
()
if
self
.
view
:
def
perform_view
(
self
,
node
,
(
x
,),
(
z
,)):
N
,
M
=
x
.
shape
a
,
b
=
x
.
strides
assert
N
==
M
rval
=
x
[
0
]
rval
.
strides
=
a
+
b
,
z
[
0
]
=
rval
z
[
0
]
=
rval
else
:
z
[
0
]
=
rval
.
copy
()
def
__str__
(
self
):
def
__str__
(
self
):
return
'ExtractDiag{view=
%
s}'
%
self
.
view
return
'ExtractDiag{view=
%
s}'
%
self
.
view
def
grad
(
self
,
inputs
,
g_outputs
):
def
grad
(
self
,
inputs
,
g_outputs
):
return
[
alloc_diag
(
g_outputs
[
0
])]
return
[
alloc_diag
(
g_outputs
[
0
])]
extract_diag
=
ExtractDiag
()
def
infer_shape
(
self
,
node
,
shapes
):
x_s
,
=
shapes
return
[(
x_s
[
0
],)]
extract_diag
=
ExtractDiag
()
#TODO: optimization to insert ExtractDiag with view=True
#TODO: optimization to insert ExtractDiag with view=True
class
AllocDiag
(
Op
):
class
AllocDiag
(
Op
):
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
return
type
(
self
)
==
type
(
other
)
...
@@ -449,7 +460,10 @@ alloc_diag = AllocDiag()
...
@@ -449,7 +460,10 @@ alloc_diag = AllocDiag()
def
diag
(
x
):
def
diag
(
x
):
"""Numpy-compatibility method
"""Numpy-compatibility method
For vector `x`, return a zero matrix except for `x` as diagonal.
If `x` is a matrix, return its diagonal.
If `x` is a vector return a matrix with it as its diagonal.
* This method does not support the `k` argument that numpy supports.
"""
"""
xx
=
as_tensor_variable
(
x
)
xx
=
as_tensor_variable
(
x
)
if
xx
.
type
.
ndim
==
1
:
if
xx
.
type
.
ndim
==
1
:
...
@@ -461,6 +475,7 @@ def diag(x):
...
@@ -461,6 +475,7 @@ def diag(x):
class
Det
(
Op
):
class
Det
(
Op
):
"""matrix determinant
"""matrix determinant
TODO: move this op to another file that request scipy.
TODO: move this op to another file that request scipy.
"""
"""
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
...
@@ -488,6 +503,7 @@ def trace(X):
...
@@ -488,6 +503,7 @@ def trace(X):
"""
"""
return
extract_diag
(
X
)
.
sum
()
return
extract_diag
(
X
)
.
sum
()
def
spectral_radius_bound
(
X
,
log2_exponent
):
def
spectral_radius_bound
(
X
,
log2_exponent
):
"""
"""
Returns upper bound on the largest eigenvalue of square symmetrix matrix X.
Returns upper bound on the largest eigenvalue of square symmetrix matrix X.
...
...
theano/sandbox/linalg/tests/test_linalg.py
浏览文件 @
94a52915
from
pkg_resources
import
parse_version
as
V
import
numpy
import
numpy
import
theano
import
theano
from
theano
import
tensor
,
function
from
theano
import
tensor
,
function
from
theano.tensor.basic
import
_allclose
from
theano.tensor.basic
import
_allclose
from
theano.tests
import
unittest_tools
as
utt
from
theano
import
config
utt
.
seed_rng
()
try
:
try
:
import
scipy
import
scipy
if
scipy
.
__version__
<
'0.7'
:
if
V
(
scipy
.
__version__
)
<
V
(
'0.7'
)
:
raise
ImportError
()
raise
ImportError
()
use_scipy
=
True
use_scipy
=
True
except
ImportError
:
except
ImportError
:
...
@@ -17,11 +23,12 @@ from theano.sandbox.linalg.ops import (cholesky,
...
@@ -17,11 +23,12 @@ from theano.sandbox.linalg.ops import (cholesky,
matrix_inverse
,
matrix_inverse
,
#solve,
#solve,
#diag,
#diag,
#extract_diag,
ExtractDiag
,
extract_diag
,
#alloc_diag,
#alloc_diag,
det
,
det
,
#PSD_hint,
#PSD_hint,
#
trace,
trace
,
#spectral_radius_bound
#spectral_radius_bound
)
)
...
@@ -88,3 +95,61 @@ def test_det_grad():
...
@@ -88,3 +95,61 @@ def test_det_grad():
r
=
rng
.
randn
(
5
,
5
)
r
=
rng
.
randn
(
5
,
5
)
tensor
.
verify_grad
(
det
,
[
r
],
rng
=
numpy
.
random
)
tensor
.
verify_grad
(
det
,
[
r
],
rng
=
numpy
.
random
)
def
test_extract_diag
():
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
x
=
theano
.
tensor
.
matrix
()
g
=
extract_diag
(
x
)
f
=
theano
.
function
([
x
],
g
)
m
=
rng
.
rand
(
3
,
3
)
.
astype
(
config
.
floatX
)
v
=
numpy
.
diag
(
m
)
r
=
f
(
m
)
# The right diagonal is extracted
assert
(
r
==
v
)
.
all
()
m
=
rng
.
rand
(
2
,
3
)
.
astype
(
config
.
floatX
)
ok
=
False
try
:
r
=
f
(
m
)
except
Exception
:
ok
=
True
assert
ok
xx
=
theano
.
tensor
.
vector
()
ok
=
False
try
:
extract_diag
(
xx
)
except
TypeError
:
ok
=
True
assert
ok
f
=
theano
.
function
([
x
],
g
.
shape
)
topo
=
f
.
maker
.
env
.
toposort
()
assert
sum
([
node
.
op
.
__class__
==
ExtractDiag
for
node
in
topo
])
==
0
m
=
rng
.
rand
(
3
,
3
)
.
astype
(
config
.
floatX
)
assert
f
(
m
)
==
3
# not testing the view=True case since it is not used anywhere.
def
test_trace
():
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
x
=
theano
.
tensor
.
matrix
()
g
=
trace
(
x
)
f
=
theano
.
function
([
x
],
g
)
m
=
rng
.
rand
(
4
,
4
)
.
astype
(
config
.
floatX
)
v
=
numpy
.
trace
(
m
)
assert
v
==
f
(
m
)
xx
=
theano
.
tensor
.
vector
()
ok
=
False
try
:
trace
(
xx
)
except
TypeError
:
ok
=
True
assert
ok
theano/sparse/__init__.py
浏览文件 @
94a52915
from
pkg_resources
import
parse_version
as
V
import
sys
import
sys
try
:
try
:
import
scipy
import
scipy
enable_sparse
=
scipy
.
__version__
>=
'0.7'
enable_sparse
=
V
(
scipy
.
__version__
)
>=
V
(
'0.7'
)
if
not
enable_sparse
:
if
not
enable_sparse
:
sys
.
stderr
.
write
(
"WARNING: scipy version =
%
s."
sys
.
stderr
.
write
(
"WARNING: scipy version =
%
s."
" We request version >=0.7.0 for the sparse code as it has"
" We request version >=0.7.0 for the sparse code as it has"
...
...
theano/tensor/tensor_grad.py
浏览文件 @
94a52915
...
@@ -333,28 +333,34 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
...
@@ -333,28 +333,34 @@ def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
return
ret
return
ret
class
numeric_grad
:
class
numeric_grad
(
object
):
"""WRITEME"""
"""
Compute the numeric derivative of a scalar-valued function at a particular
point.
"""
# Note on step sizes and tolerances:
# Note on step sizes and tolerances:
#
#
# There is a relationship between the step size and the function value and
the measurement
# There is a relationship between the step size and the function value and
#
error that is incurred due to rounding. The finite difference we measure is
#
the measurement error that is incurred due to rounding. The finite
# delta = f(x0) - f(x0+eps)
# d
ifference we measure is d
elta = f(x0) - f(x0+eps)
#
#
# For maximum precision, f should be close to zero.
# For maximum precision, f should be close to zero.
# For every power of 2 that f departs from zero, we lose a bit of precision in delta.
# For every power of 2 that f departs from zero, we lose a bit of
# precision in delta.
#
#
# Even in this case of maximum accuracy, there is a tradeoff between
stepsize and
# Even in this case of maximum accuracy, there is a tradeoff between
#
measurement error.
#
stepsize and measurement error. Taking small steps allows us to measure
#
Taking small steps allows us to measure large derivatives accuractly, but longer steps
#
large derivatives accuractly, but longer steps are required to measure
#
are required to measure small derivatives accurately. However longer steps introduce
#
small derivatives accurately. However longer steps introduce bias into
#
bias into
our measurement in general for non-linear functions.
# our measurement in general for non-linear functions.
#
#
# It would be interesting to have a version of numeric grad that used an adaptive stepsize.
# It would be interesting to have a version of numeric grad that used an
# adaptive stepsize.
#
#
# For now, we use a heuristic that catches very bad gradients, but is not perfectly
# For now, we use a heuristic that catches very bad gradients, but is not
# accurate.
# perfectly accurate.
type_eps
=
{
'float64'
:
1e-7
,
type_eps
=
{
'float64'
:
1e-7
,
'float32'
:
3e-4
,
'float32'
:
3e-4
,
numpy
.
dtype
(
'float64'
):
1e-7
,
numpy
.
dtype
(
'float64'
):
1e-7
,
...
@@ -363,6 +369,9 @@ class numeric_grad:
...
@@ -363,6 +369,9 @@ class numeric_grad:
def
__init__
(
self
,
f
,
pt
,
eps
=
None
):
def
__init__
(
self
,
f
,
pt
,
eps
=
None
):
"""Return the gradient of f at pt.
"""Return the gradient of f at pt.
:param f: a differentiable function such that f(*pt) is a scalar
:param pt: an ndarray, a list of ndarrays or tuple of ndarrays
This function computes the gradient by a one-sided finite differences of a
This function computes the gradient by a one-sided finite differences of a
fixed step size (eps).
fixed step size (eps).
...
@@ -435,39 +444,46 @@ class numeric_grad:
...
@@ -435,39 +444,46 @@ class numeric_grad:
self
.
gf
=
self
.
gf
[
0
]
self
.
gf
=
self
.
gf
[
0
]
@staticmethod
@staticmethod
def
abs_rel_err
(
a
,
b
):
def
abs_rel_err
(
a
,
b
):
"""Return absolute and relative error between a and b.
"""Return absolute and relative error between a and b.
The relative error is a small number when a and b are close, relative to how big they are.
The relative error is a small number when a and b are close, relative
to how big they are.
Formulas used:
Formulas used:
abs_err = abs(a - b)
abs_err = abs(a - b)
rel_err = abs_err / (abs(a) + abs(b))
rel_err = abs_err / max(abs(a) + abs(b), 1e-8)
The denominator is clipped at 1e-8 to avoid dividing by 0 when a and b
are both close to 0.
The tuple (abs_err, rel_err) is returned
The tuple (abs_err, rel_err) is returned
"""
"""
abs_err
=
abs
(
a
-
b
)
abs_err
=
abs
(
a
-
b
)
rel_err
=
abs_err
/
(
abs
(
a
)
+
abs
(
b
)
)
rel_err
=
abs_err
/
numpy
.
maximum
(
abs
(
a
)
+
abs
(
b
),
1e-8
)
return
(
abs_err
,
rel_err
)
return
(
abs_err
,
rel_err
)
def
abs_rel_errors
(
self
,
g_pt
):
def
abs_rel_errors
(
self
,
g_pt
):
"""Return the abs and rel error of gradient estimate `g_pt`
"""Return the abs and rel error of gradient estimate `g_pt`
`g_pt` must be a list of ndarrays of the same length as self.gf,
otherwise a ValueError
`g_pt` must be a list of ndarrays of the same length as self.gf,
is raised.
otherwise a ValueError
is raised.
Corresponding ndarrays in `g_pt` and `self.gf` must have the same
shape or ValueError
Corresponding ndarrays in `g_pt` and `self.gf` must have the same
is raised.
shape or ValueError
is raised.
"""
"""
if
len
(
g_pt
)
!=
len
(
self
.
gf
):
if
len
(
g_pt
)
!=
len
(
self
.
gf
):
raise
ValueError
(
'argument has wrong number of elements'
,
len
(
g_pt
))
raise
ValueError
(
'argument has wrong number of elements'
,
len
(
g_pt
))
errs
=
[]
errs
=
[]
for
i
,
(
a
,
b
)
in
enumerate
(
zip
(
g_pt
,
self
.
gf
)):
for
i
,
(
a
,
b
)
in
enumerate
(
zip
(
g_pt
,
self
.
gf
)):
if
a
.
shape
!=
b
.
shape
:
if
a
.
shape
!=
b
.
shape
:
raise
ValueError
(
'argument element
%
i has wrong shape
%
s'
%
(
i
,
str
((
a
.
shape
,
raise
ValueError
(
b
.
shape
))))
'argument element
%
i has wrong shape
%
s'
%
(
errs
.
append
(
numeric_grad
.
abs_rel_err
(
a
,
b
))
i
,
str
((
a
.
shape
,
b
.
shape
))))
errs
.
append
(
numeric_grad
.
abs_rel_err
(
a
,
b
))
return
errs
return
errs
def
max_err
(
self
,
g_pt
,
abs_tol
,
rel_tol
):
def
max_err
(
self
,
g_pt
,
abs_tol
,
rel_tol
):
...
@@ -477,8 +493,8 @@ class numeric_grad:
...
@@ -477,8 +493,8 @@ class numeric_grad:
wrt the provided tolerances (abs_tol, rel_tol).
wrt the provided tolerances (abs_tol, rel_tol).
A value > 1 means both tolerances are exceeded.
A value > 1 means both tolerances are exceeded.
Return the argmax of min(abs_err / abs_tol, rel_err / rel_tol) over
g_pt,
Return the argmax of min(abs_err / abs_tol, rel_err / rel_tol) over
as well as abs_err and rel_err at this point.
g_pt,
as well as abs_err and rel_err at this point.
"""
"""
pos
=
[]
pos
=
[]
errs
=
[]
errs
=
[]
...
@@ -487,7 +503,11 @@ class numeric_grad:
...
@@ -487,7 +503,11 @@ class numeric_grad:
abs_rel_errs
=
self
.
abs_rel_errors
(
g_pt
)
abs_rel_errs
=
self
.
abs_rel_errors
(
g_pt
)
for
abs_err
,
rel_err
in
abs_rel_errs
:
for
abs_err
,
rel_err
in
abs_rel_errs
:
scaled_err
=
numpy
.
minimum
(
abs_err
/
abs_tol
,
rel_err
/
rel_tol
)
if
not
numpy
.
all
(
numpy
.
isfinite
(
abs_err
)):
raise
ValueError
(
'abs_err not finite'
,
repr
(
abs_err
))
if
not
numpy
.
all
(
numpy
.
isfinite
(
rel_err
)):
raise
ValueError
(
'rel_err not finite'
,
repr
(
rel_err
))
scaled_err
=
numpy
.
minimum
(
abs_err
/
abs_tol
,
rel_err
/
rel_tol
)
max_i
=
scaled_err
.
argmax
()
max_i
=
scaled_err
.
argmax
()
pos
.
append
(
max_i
)
pos
.
append
(
max_i
)
...
@@ -501,8 +521,8 @@ class numeric_grad:
...
@@ -501,8 +521,8 @@ class numeric_grad:
return
(
max_arg
,
pos
[
max_arg
],
abs_errs
[
max_arg
],
rel_errs
[
max_arg
])
return
(
max_arg
,
pos
[
max_arg
],
abs_errs
[
max_arg
],
rel_errs
[
max_arg
])
def
verify_grad
(
fun
,
pt
,
n_tests
=
2
,
rng
=
None
,
eps
=
None
,
abs_tol
=
None
,
rel_tol
=
None
,
def
verify_grad
(
fun
,
pt
,
n_tests
=
2
,
rng
=
None
,
eps
=
None
,
abs_tol
=
None
,
mode
=
None
,
cast_to_output_type
=
False
):
rel_tol
=
None
,
mode
=
None
,
cast_to_output_type
=
False
):
""" Test a gradient by Finite Difference Method. Raise error on failure.
""" Test a gradient by Finite Difference Method. Raise error on failure.
Example:
Example:
...
@@ -511,9 +531,9 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
...
@@ -511,9 +531,9 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
rng=numpy.random)
rng=numpy.random)
Raises an Exception if the difference between the analytic gradient and
Raises an Exception if the difference between the analytic gradient and
numerical gradient (computed through the Finite Difference Method) of a
random
numerical gradient (computed through the Finite Difference Method) of a
projection of the fun's output to a scalar exceeds
random projection of the fun's output to a scalar exceeds the given
t
he given t
olerance.
tolerance.
:param fun: a Python function that takes Theano variables as inputs,
:param fun: a Python function that takes Theano variables as inputs,
and returns a Theano variable. For instance, an Op instance with
and returns a Theano variable. For instance, an Op instance with
...
@@ -521,19 +541,21 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
...
@@ -521,19 +541,21 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
:param pt: the list of numpy.ndarrays to use as input values.
:param pt: the list of numpy.ndarrays to use as input values.
These arrays must be either float32 or float64 arrays.
These arrays must be either float32 or float64 arrays.
:param n_tests: number of times to run the test
:param n_tests: number of times to run the test
:param rng: random number generator used to sample u, we test gradient of sum(u * fun) at pt
:param rng: random number generator used to sample u, we test gradient of
:param eps: stepsize used in the Finite Difference Method (Default None is type-dependent)
sum(u * fun) at pt
:param eps: stepsize used in the Finite Difference Method (Default None is
type-dependent)
:param abs_tol: absolute tolerance used as threshold for gradient comparison
:param abs_tol: absolute tolerance used as threshold for gradient comparison
:param rel_tol: relative tolerance used as threshold for gradient comparison
:param rel_tol: relative tolerance used as threshold for gradient comparison
:note: WARNING to unit-test writers: if `op` is a function that builds a
graph,
:note: WARNING to unit-test writers: if `op` is a function that builds a
try to make it a SMALL graph. Often verify grad is run in
graph,
try to make it a SMALL graph. Often verify grad is run in
debug mode, which can be very slow if it has to verify a lot
debug mode, which can be very slow if it has to verify a lot of
of
intermediate computations.
intermediate computations.
:note: This op does not support multiple outputs. In tests/test_scan.py
there is
:note: This op does not support multiple outputs. In tests/test_scan.py
an experimental verify_grad that covers that case as well by using random
there is an experimental verify_grad that covers that case as well by
projections.
using random
projections.
"""
"""
assert
isinstance
(
pt
,
(
list
,
tuple
))
assert
isinstance
(
pt
,
(
list
,
tuple
))
pt
=
[
numpy
.
array
(
p
)
for
p
in
pt
]
pt
=
[
numpy
.
array
(
p
)
for
p
in
pt
]
...
@@ -553,8 +575,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
...
@@ -553,8 +575,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
rel_tol
=
__builtin__
.
max
(
_type_tol
[
str
(
p
.
dtype
)]
for
p
in
pt
)
rel_tol
=
__builtin__
.
max
(
_type_tol
[
str
(
p
.
dtype
)]
for
p
in
pt
)
if
rng
is
None
:
if
rng
is
None
:
raise
TypeError
(
'rng should be a valid instance of numpy.random.RandomState.'
,
raise
TypeError
(
'rng be instance of numpy.random.RandomState'
,
(
'You may want to use theano.tests.unittest_tools.verify_grad instead of theano.tensor.verify_grad.'
)
' hint: Maybe you meant to call'
' theano.tests.unittest_tools.verify_grad instead of'
' theano.tensor.verify_grad.'
))
# We allow input downcast in function, because numeric_grad works in the
# We allow input downcast in function, because numeric_grad works in the
# most precise dtype used among the inputs, so we may need to cast some.
# most precise dtype used among the inputs, so we may need to cast some.
...
@@ -567,15 +591,18 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
...
@@ -567,15 +591,18 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
allow_input_downcast
=
True
,
mode
=
mode
)
allow_input_downcast
=
True
,
mode
=
mode
)
return
f
return
f
tensor_pt
=
[
TensorType
(
as_tensor_variable
(
p
)
.
dtype
,
as_tensor_variable
(
p
)
.
broadcastable
)(
name
=
'input
%
i'
%
i
)
for
i
,
p
in
enumerate
(
pt
)]
tensor_pt
=
[
TensorType
(
as_tensor_variable
(
p
)
.
dtype
,
as_tensor_variable
(
p
)
.
broadcastable
)(
name
=
'input
%
i'
%
i
)
for
i
,
p
in
enumerate
(
pt
)]
#fun can be either a function or an actual Op instance
#fun can be either a function or an actual Op instance
o_output
=
fun
(
*
tensor_pt
)
o_output
=
fun
(
*
tensor_pt
)
if
isinstance
(
o_output
,
list
):
if
isinstance
(
o_output
,
list
):
raise
NotImplementedError
(
'
cant (yet) autotest gradient of fun with
multiple outputs'
)
raise
NotImplementedError
(
'
verify gradient on
multiple outputs'
)
# we could make loop over outputs making random projections R for
each,
# we could make loop over outputs making random projections R for
# but this doesn't handle the case where not all the outputs are
#
each,
but this doesn't handle the case where not all the outputs are
# differentiable... so I leave this as TODO for now -JB.
# differentiable... so I leave this as TODO for now -JB.
o_fn
=
function
(
tensor_pt
,
o_output
)
o_fn
=
function
(
tensor_pt
,
o_output
)
...
@@ -597,18 +624,16 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
...
@@ -597,18 +624,16 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
t_r
=
shared
(
random_projection
())
t_r
=
shared
(
random_projection
())
#random projection of o onto t_r
#random projection of o onto t_r
cost
=
theano
.
tensor
.
sum
(
t_r
*
o_output
)
#This sum() is defined above, it's not the builtin sum.
cost
=
theano
.
tensor
.
sum
(
t_r
*
o_output
)
cost_fn
=
function
(
tensor_pt
,
cost
)
cost_fn
=
function
(
tensor_pt
,
cost
)
#todo-- determine if this is actually needed
#todo-- determine if this is actually needed
g_cost
=
as_tensor_variable
(
1.0
,
name
=
'g_cost'
)
g_cost
=
as_tensor_variable
(
1.0
,
name
=
'g_cost'
)
if
cast_to_output_type
:
if
cast_to_output_type
:
g_cost
=
cast
(
g_cost
,
o_output
.
dtype
)
g_cost
=
cast
(
g_cost
,
o_output
.
dtype
)
symbolic_grad
=
grad
(
cost
,
tensor_pt
,
g_cost
,
symbolic_grad
=
grad
(
cost
,
tensor_pt
,
g_cost
,
disconnected_inputs
=
'ignore'
)
disconnected_inputs
=
'ignore'
)
#if o_output.dtype in ['float32','float64']:
# assert all([x.dtype == o_output.dtype for x in symbolic_grad]),("Expected grad of type %s, got %s "%( symbolic_grad.dtype, o_output.dtyp))
grad_fn
=
function
(
tensor_pt
,
symbolic_grad
)
grad_fn
=
function
(
tensor_pt
,
symbolic_grad
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论