Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
e2202c3f
提交
e2202c3f
authored
9月 26, 2008
作者:
James Bergstra
浏览文件
操作
浏览文件
下载
差异文件
merged
上级
8dd464d2
357efa53
隐藏空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
560 行增加
和
248 行删除
+560
-248
_test_tensor.py
_test_tensor.py
+93
-79
compile.py
compile.py
+24
-19
gradient.py
gradient.py
+0
-58
tensor.py
tensor.py
+421
-90
tensor_opt.py
tensor_opt.py
+22
-2
没有找到文件。
_test_tensor.py
浏览文件 @
e2202c3f
...
...
@@ -533,64 +533,6 @@ DotTester = make_tester(name = 'DotTester',
# rationale: it's tricky, and necessary everytime you want to verify
# gradient numerically
def
verify_grad
(
testcase
,
op
,
pt
,
n_tests
=
1
,
rng
=
numpy
.
random
,
eps
=
0.0000001
,
tol
=
0.0001
,
linker
=
'c&py'
):
"""testcase.failUnless(analytic gradient matches finite-diff gradient)"""
pt
=
[
numpy
.
asarray
(
p
)
for
p
in
pt
]
for
test_num
in
xrange
(
n_tests
):
# tensor_pt = [as_tensor(p,name='input %i'%i) for i,p in enumerate(pt)]
tensor_pt
=
[
constant
(
p
)
.
type
(
'input
%
i'
%
i
)
for
i
,
p
in
enumerate
(
pt
)]
#o = op.make_node(*[tpt.copy() for tpt in tensor_pt])
o
=
safe_make_node
(
op
,
*
[
tpt
.
copy
()
for
tpt
in
tensor_pt
])
if
hasattr
(
o
,
'outputs'
):
o_outputs
=
o
.
outputs
else
:
o_outputs
=
o
if
len
(
o_outputs
)
>
1
:
raise
NotImplementedError
(
'cant (yet) autotest gradient of op with multiple outputs'
)
# we could make loop over outputs making random projections R for each,
# but this doesn't handle the case where not all the outputs are
# differentiable... so I leave this as TODO for now -JB.
o_fn
=
function
(
tensor_pt
,
o_outputs
[
0
],
mode
=
compile
.
Mode
(
optimizer
=
None
,
linker
=
linker
))
o_fn_out
=
o_fn
(
*
pt
)
random_projection
=
rng
.
rand
(
*
o_fn_out
.
shape
)
t_r
=
as_tensor
(
random_projection
)
#random projection of o onto t_r
cost
=
sum
(
t_r
*
o_outputs
[
0
])
cost_fn
=
function
(
tensor_pt
,
cost
,
mode
=
compile
.
Mode
(
optimizer
=
None
,
linker
=
linker
))
num_grad
=
gradient
.
numeric_grad
(
cost_fn
,
pt
)
symbolic_grad
=
grad
(
cost
,
tensor_pt
,
as_tensor
(
1.0
,
name
=
'g_cost'
))
if
0
:
print
'-------'
print
'----------'
for
op
in
gof
.
graph
.
io_toposort
(
tensor_pt
,
symbolic_grad
):
print
op
grad_fn
=
function
(
tensor_pt
,
symbolic_grad
,
mode
=
compile
.
Mode
(
optimizer
=
None
,
linker
=
linker
))
analytic_grad
=
grad_fn
(
*
pt
)
if
not
isinstance
(
analytic_grad
,
(
list
,
tuple
)):
analytic_grad
=
[
analytic_grad
]
# if num_grad.max_err(analytic_grad) > 1.0e-4:
# print "aaaaaaaaaa"
# print gof.Env(tensor_pt, [cost])
# print gof.Env(tensor_pt, symbolic_grad)
# print analytic_grad
# print num_grad.gf
# print num_grad.max_err(analytic_grad)
# print "bbbbbbbbbb"
if
num_grad
.
max_err
(
analytic_grad
)
>
1.0e-4
:
raise
Exception
(
verify_grad
.
E_grad
)
verify_grad
.
E_grad
=
'gradient error exceeded tolerance'
#useful mostly for unit tests
...
...
@@ -945,29 +887,101 @@ class T_subtensor(unittest.TestCase):
class
T_Stack
(
unittest
.
TestCase
):
def
test_hstack
(
self
):
a
=
as_tensor
(
numpy
.
array
([[
1
,
2
,
3
],
[
4
,
5
,
6
]]))
b
=
as_tensor
(
numpy
.
array
([[
7
],
[
8
]]))
s
=
horizontal_stack
(
a
,
b
)
c
=
numpy
.
array
([[
1
,
2
,
3
,
7
],
[
4
,
5
,
6
,
8
]])
self
.
failUnless
((
eval_outputs
([
s
])
==
c
)
.
all
())
def
test_vstack
(
self
):
a
=
as_tensor
(
numpy
.
array
([[
1
,
2
,
3
],
[
4
,
5
,
6
]]))
b
=
as_tensor
(
numpy
.
array
([[
7
,
8
,
9
]]))
s
=
vertical_stack
(
a
,
b
)
c
=
numpy
.
array
([[
1
,
2
,
3
],
[
4
,
5
,
6
],
[
7
,
8
,
9
]])
self
.
failUnless
((
eval_outputs
([
s
])
==
c
)
.
all
())
class
T_Join_and_Split
(
unittest
.
TestCase
):
"""
Split is tested by each verify_grad method.
"""
class
Join1
(
Op
):
def
make_node
(
self
,
*
inputs
):
inputs
=
[
as_tensor
(
t
)
for
t
in
inputs
]
outputs
=
[
lscalar
()]
+
[
i
.
type
()
for
i
in
inputs
]
return
Apply
(
self
,
inputs
,
outputs
)
def
perform
(
self
,
node
,
inputs
,
outputs
):
outputs
[
0
][
0
]
=
1
for
i
,
o
in
zip
(
inputs
,
outputs
[
1
:]):
o
[
0
]
=
i
.
copy
()
def
grad
(
self
,
inputs
,
g_outputs
):
return
g_outputs
[
1
:]
def
setUp
(
self
):
Join
.
debug
=
False
def
test_join_scalar
(
self
):
a
=
as_tensor
(
1
)
b
=
as_tensor
(
2
)
try
:
s
=
join
(
0
,
a
,
b
)
except
:
return
self
.
fail
()
def
test_stack_scalar
(
self
):
a
=
as_tensor
(
1
)
b
=
as_tensor
(
2
)
c
=
as_tensor
(
3
)
s
=
stack
(
a
,
b
,
c
)
want
=
numpy
.
array
([
1
,
2
,
3
])
self
.
failUnless
((
eval_outputs
([
s
])
==
want
)
.
all
())
def
test_vstack_grad
(
self
):
def
test_join_vector
(
self
):
a
=
as_tensor
(
numpy
.
array
([
1
,
2
,
3
]))
b
=
as_tensor
(
numpy
.
array
([
7
,
8
,
9
]))
s
=
join
(
0
,
a
,
b
)
want
=
numpy
.
array
([
1
,
2
,
3
,
7
,
8
,
9
])
self
.
failUnless
((
eval_outputs
([
s
])
==
want
)
.
all
())
def
test_stack_vector
(
self
):
a
=
as_tensor
(
numpy
.
array
([
1
,
2
,
3
]))
b
=
as_tensor
(
numpy
.
array
([
7
,
8
,
9
]))
s
=
stack
(
a
,
b
)
want
=
numpy
.
array
([[
1
,
2
,
3
],[
7
,
8
,
9
]])
self
.
failUnless
((
eval_outputs
([
s
])
==
want
)
.
all
())
def
test_join_matrix0
(
self
):
a
=
as_tensor
(
numpy
.
array
([[
1
,
2
,
3
],
[
4
,
5
,
6
]]))
b
=
as_tensor
(
numpy
.
array
([[
7
,
8
,
9
]]))
s
=
vertical_stack
(
a
,
b
)
ga
,
gb
=
grad
(
sum
(
vertical_stack
(
a
,
b
)),
[
a
,
b
])
s
=
join
(
0
,
a
,
b
)
want
=
numpy
.
array
([[
1
,
2
,
3
],[
4
,
5
,
6
],[
7
,
8
,
9
]])
self
.
failUnless
((
eval_outputs
([
s
])
==
want
)
.
all
())
def
test_join_matrix1
(
self
):
av
=
numpy
.
array
([[
1
,
2
,
3
],
[
4
,
5
,
6
]],
dtype
=
'float32'
)
bv
=
numpy
.
array
([[
7
],
[
8
]],
dtype
=
'float32'
)
a
=
as_tensor
(
av
)
b
=
as_tensor
(
bv
)
s
=
join
(
1
,
a
,
b
)
want
=
numpy
.
array
([[
1
,
2
,
3
,
7
],
[
4
,
5
,
6
,
8
]],
dtype
=
'float32'
)
self
.
failUnless
((
eval_outputs
([
s
])
==
want
)
.
all
())
verify_grad
(
self
,
lambda
a
,
b
:
join
(
1
,
a
,
b
),
[
av
,
bv
],
eps
=
1.0e-4
,
tol
=
1.0e-3
)
def
test_join_matrixV
(
self
):
"""variable join axis"""
v
=
numpy
.
array
([[
1.
,
2.
,
3.
],
[
4.
,
5.
,
6.
]])
a
=
as_tensor
(
v
.
copy
())
b
=
as_tensor
(
v
.
copy
())
ax
=
lscalar
()
s
=
join
(
ax
,
a
,
b
)
f
=
function
([
ax
],
[
s
])
want
=
numpy
.
array
([[
1
,
2
,
3
],
[
4
,
5
,
6
]
,[
1
,
2
,
3
],
[
4
,
5
,
6
]])
got
=
f
(
0
)
self
.
failUnless
((
got
==
want
)
.
all
(),
(
got
,
want
))
want
=
numpy
.
array
([[
1
,
2
,
3
,
1
,
2
,
3
],
[
4
,
5
,
6
,
4
,
5
,
6
]])
got
=
f
(
1
)
self
.
failUnless
((
got
==
want
)
.
all
(),
(
got
,
want
))
verify_grad
(
self
,
lambda
a
,
b
:
join
(
0
,
a
,
b
),
[
v
,
2
*
v
])
verify_grad
(
self
,
lambda
a
,
b
:
join
(
1
,
a
,
b
),
[
v
,
2
*
v
])
gval
=
eval_outputs
([
ga
,
gb
])
self
.
failUnless
(
numpy
.
all
(
gval
[
0
]
==
1.0
))
self
.
failUnless
(
numpy
.
all
(
gval
[
1
]
==
1.0
))
class
_test_comparison
(
unittest
.
TestCase
):
...
...
@@ -1761,10 +1775,10 @@ class T_op_cache(unittest.TestCase):
self
.
failUnless
(
numpy
.
all
(
fn_py
(
a
)
==
fn_c_or_py
(
a
)))
if
__name__
==
'__main__'
:
if
1
:
if
0
:
unittest
.
main
()
else
:
testcase
=
t_dot
testcase
=
AbsInplaceTester
suite
=
unittest
.
TestLoader
()
suite
=
suite
.
loadTestsFromTestCase
(
testcase
)
...
...
compile.py
浏览文件 @
e2202c3f
"""Convenient driver of graph construction, optimization, and linking."""
import
copy_reg
import
cPickle
from
functools
import
partial
import
numpy
import
gof
import
sys
from
copy
import
copy
import
tensor_opt
def
check_equal
(
x
,
y
):
"""
...
...
@@ -57,6 +60,12 @@ predefined_linkers = {
default_linker
=
'c|py'
def
register_linker
(
name
,
linker
):
"""Add a `Linker` which can be referred to by `name` in `Mode`."""
if
name
in
predefined_linkers
:
raise
ValueError
(
'Linker name already taken:
%
s'
%
name
)
predefined_linkers
[
name
]
=
linker
# If a string is passed as the optimizer argument in the constructor
# for Mode, it will be used as the key to retrieve the real optimizer
...
...
@@ -64,13 +73,15 @@ default_linker = 'c|py'
predefined_optimizers
=
{
None
:
lambda
env
:
None
,
'merge'
:
gof
.
MergeOptimizer
(),
'math'
:
gof
.
MergeOptMerge
(
gof
.
PureThenInplaceOptimizer
(
tensor_opt
.
math_optimizer
,
tensor_opt
.
inplace_optimizer
))
}
default_optimizer
=
'merge'
def
register_optimizer
(
name
,
opt
):
"""Add a `Optimizer` which can be referred to by `name` in `Mode`."""
if
name
in
predefined_optimizers
:
raise
ValueError
(
'Optimizer name already taken:
%
s'
%
name
)
predefined_optimizers
[
name
]
=
opt
class
Mode
(
object
):
"""
...
...
@@ -110,15 +121,14 @@ class Mode(object):
# If a string is passed as the mode argument in function or
# FunctionMaker, the Mode will be taken from this dictionary using the
# string as the key
predefined_modes
=
{
'SANITY_CHECK'
:
Mode
(
'c&py'
,
'math'
),
'FAST_COMPILE'
:
Mode
(
'py'
,
'merge'
),
'FAST_RUN'
:
Mode
(
'c|py'
,
'math'
),
'EXPENSIVE_OPTIMIZATIONS'
:
Mode
(
'c|py'
,
'math'
),
}
default_mode
=
'FAST_RUN'
predefined_modes
=
{
'FAST_COMPILE'
:
Mode
(
'py'
,
'merge'
)}
default_mode
=
'FAST_COMPILE'
def
register_mode
(
name
,
mode
):
"""Add a `Mode` which can be referred to by `name` in `function`."""
if
name
in
predefined_modes
:
raise
ValueError
(
'Mode name already taken:
%
s'
%
name
)
predefined_modes
[
name
]
=
mode
...
...
@@ -508,9 +518,6 @@ class FunctionMaker(object):
return
fn
import
copy_reg
import
cPickle
def
_pickle_FunctionMaker
(
fm
):
return
(
_constructor_FunctionMaker
,
(
fm
.
inputs
,
fm
.
outputs
,
fm
.
mode
,
fm
.
accept_inplace
))
...
...
@@ -527,8 +534,6 @@ copy_reg.pickle(slice, _pickle_slice)
from
functools
import
partial
DUPLICATE
=
[
'DUPLICATE'
]
# unique id object used as a placeholder for duplicate entries
class
Function
(
object
):
...
...
gradient.py
浏览文件 @
e2202c3f
...
...
@@ -110,62 +110,4 @@ def grad_sources_inputs(sources, graph_inputs):
gmap
[
r
]
=
g_r
return
gmap
class
numeric_grad
:
def
__init__
(
self
,
f
,
pt
,
eps
=
1.0e-7
):
"""Return the gradient of f at pt.
This function computes the gradient by a one-sided finite differences of a
fixed step size (eps).
It is assumed that f(...) will return a scalar.
It is assumed that all f's inputs are numpy.ndarray objects.
"""
gf
=
[
numpy
.
ndarray
(
x
.
shape
)
for
x
in
pt
]
f_pt
=
f
(
*
pt
)
if
isinstance
(
f
,
(
list
,
tuple
)):
f_pt
=
[
numpy
.
copy
(
x
)
for
x
in
f_pt
]
else
:
f_pt
=
numpy
.
copy
(
f_pt
)
for
idx
in
xrange
(
len
(
gf
)):
if
len
(
pt
[
idx
]
.
shape
)
==
0
:
orig
=
pt
[
idx
]
pt
[
idx
]
=
numpy
.
asarray
(
pt
[
idx
]
+
eps
)
f_eps
=
f
(
*
pt
)
gf
[
idx
]
=
numpy
.
asarray
((
f_eps
-
f_pt
)
/
eps
)
pt
[
idx
]
=
orig
elif
len
(
pt
[
idx
]
.
shape
)
==
1
:
for
i
in
xrange
(
pt
[
idx
]
.
shape
[
0
]):
orig
=
pt
[
idx
][
i
]
pt
[
idx
][
i
]
=
pt
[
idx
][
i
]
+
eps
f_eps
=
f
(
*
pt
)
gf
[
idx
][
i
]
=
numpy
.
asarray
((
f_eps
-
f_pt
)
/
eps
)
pt
[
idx
][
i
]
=
orig
elif
len
(
pt
[
idx
]
.
shape
)
==
2
:
for
i
in
xrange
(
pt
[
idx
]
.
shape
[
0
]):
for
j
in
xrange
(
pt
[
idx
]
.
shape
[
1
]):
orig
=
pt
[
idx
][
i
,
j
]
pt
[
idx
][
i
,
j
]
=
pt
[
idx
][
i
,
j
]
+
eps
f_eps
=
f
(
*
pt
)
gf
[
idx
][
i
,
j
]
=
numpy
.
asarray
((
f_eps
-
f_pt
)
/
eps
)
pt
[
idx
][
i
,
j
]
=
orig
else
:
raise
NotImplementedError
()
self
.
gf
=
gf
@staticmethod
def
abs_rel_err
(
a
,
b
,
eps
=
1.0e-10
):
"""Return a small number when a and b are close, relative to how big they are"""
return
abs
(
a
-
b
)
/
(
abs
(
a
)
+
abs
(
b
)
+
eps
)
def
max_err
(
self
,
g_pt
):
"""Return the biggest relative error between g_pt and self.gf"""
assert
len
(
g_pt
)
==
len
(
self
.
gf
)
errs
=
[]
for
a
,
b
in
zip
(
g_pt
,
self
.
gf
):
errs
.
append
(
numpy
.
max
(
numeric_grad
.
abs_rel_err
(
a
,
b
)))
return
max
(
errs
)
tensor.py
浏览文件 @
e2202c3f
...
...
@@ -2,6 +2,7 @@
__docformat__
=
"restructuredtext en"
import
__builtin__
import
sys
# for sys.maxint
import
inspect
import
functools
...
...
@@ -20,6 +21,8 @@ import elemwise
import
scalar
as
scal
from
gof.python25
import
partial
import
compile
### set up the external interface
from
elemwise
import
Elemwise
,
DimShuffle
,
CAReduce
,
Sum
...
...
@@ -91,7 +94,7 @@ def constant(x):
except
:
raise
TypeError
(
"Could not convert
%
s to Tensor"
%
x
,
type
(
x
))
def
value
(
x
):
def
value
(
x
,
name
=
None
):
"""Return a symbolic `Value` with default value `x`
:Exceptions:
...
...
@@ -102,8 +105,12 @@ def value(x):
else
:
x_
=
numpy
.
asarray
(
x
)
try
:
return
TensorValue
(
Tensor
(
dtype
=
x_
.
dtype
,
if
name
is
None
:
return
TensorValue
(
Tensor
(
dtype
=
x_
.
dtype
,
broadcastable
=
[
d
==
1
for
d
in
x_
.
shape
]),
x_
)
else
:
return
TensorValue
(
Tensor
(
dtype
=
x_
.
dtype
,
broadcastable
=
[
d
==
1
for
d
in
x_
.
shape
]),
x_
,
name
=
name
)
except
:
raise
TypeError
(
"Could not convert
%
s to Tensor"
%
x
,
type
(
x
))
...
...
@@ -203,7 +210,7 @@ class Tensor(Type):
- `name`: str
A pretty name to identify this `Result` when printing and debugging
"""
"""
return
TensorResult
(
self
,
name
=
name
)
def
__str__
(
self
):
...
...
@@ -478,6 +485,18 @@ class _tensor_py_operators:
raise
TypeError
(
'Tensor does not support iteration. '
'Maybe you are using builtin.sum instead of theano.tensor.sum? (Maybe .max?)'
)
# CONVENIENT ACCESS TO TYPE PROPERTIES
ndim
=
property
(
lambda
self
:
self
.
type
.
ndim
)
"""The rank of this tensor."""
broadcastable
=
property
(
lambda
self
:
self
.
type
.
broadcastable
)
"""The broadcastable signature of this tensor.
See :doc:`broadcasting` for details.
"""
dtype
=
property
(
lambda
self
:
self
.
type
.
dtype
)
""" The dtype of this tensor. """
class
TensorResult
(
Result
,
_tensor_py_operators
):
...
...
@@ -1311,32 +1330,101 @@ class SetSubtensor(Subtensor):
x
.
__setitem__
(
cdata
,
y
)
out
[
0
]
=
x
class
Split
(
Op
):
"""Partition a `TensorResult` along some axis.
class
MakeVector
(
Op
):
"""WRITEME"""
def
__init__
(
self
,
stype
):
self
.
stype
=
stype
def
make_node
(
self
,
*
inputs
):
inputs
=
map
(
as_tensor
,
inputs
)
assert
all
(
a
.
type
==
self
.
stype
for
a
in
inputs
)
return
Apply
(
self
,
inputs
,
[
Tensor
(
broadcastable
=
(
False
,),
dtype
=
self
.
stype
.
dtype
)()])
def
perform
(
self
,
node
,
inputs
,
(
out
,)):
out
[
0
]
=
numpy
.
asarray
(
inputs
)
def
grad
(
self
,
inputs
,
(
gout
,)):
return
[
None
]
*
len
(
inputs
)
make_lvector
=
MakeVector
(
lscalar
)
"""WRITEME"""
.. python::
x = vector()
splits = lvector()
# you have to declare right away how many split_points there will be.
ra, rb, rc = split(x, axis=0, points=splits, n_splits=3)
f = compile([x, splits], [ra, rb, rc])
a, b, c = f([0,1,2,3,4,5,6], [3, 2, 1])
class
Concatenate
(
Op
):
#a == [0,1,2]
#b == [3, 4]
#c == [5]
"""
len_splits
=
None
"""A Split instance will have this many outputs, and require that the splits argument to
`perform` have exactly this many elements.
"""
def
__init__
(
self
,
len_splits
):
self
.
len_splits
=
int
(
len_splits
)
def
make_node
(
self
,
x
,
axis
,
splits
):
"""WRITEME"""
x
=
as_tensor
(
x
)
axis
=
as_tensor
(
axis
)
splits
=
as_tensor
(
splits
)
if
splits
.
type
!=
lvector
:
raise
TypeError
(
'splits must have type tensor.lvector'
,
splits
.
type
)
if
axis
.
type
!=
lscalar
:
raise
TypeError
(
'axis must have type lscalar'
,
axis
.
type
)
inputs
=
[
x
,
axis
,
splits
]
outputs
=
[
x
.
type
()
for
i
in
xrange
(
self
.
len_splits
)]
return
Apply
(
self
,
inputs
,
outputs
)
def
perform
(
self
,
node
,
(
x
,
axis
,
splits
),
outputs
):
"""WRITEME"""
try
:
len_along_axis
=
x
.
shape
[
axis
]
except
:
raise
ValueError
(
'Split.perform() with axis=(
%
s) is invalid for x.shape==(
%
s)'
%
(
axis
,
x
.
shape
))
if
len
(
splits
)
!=
self
.
len_splits
:
raise
ValueError
(
'In Split.perform(), len(splits) != len_splits.'
,
(
len
(
splits
),
self
.
len_splits
))
# Checking is done, lets roll the splitting algorithm!
# Basically we step along the given axis of x, extracting subtensors of size splits[i]
# as we go along.
general_key
=
[
slice
(
None
,
None
,
None
)
for
s
in
x
.
shape
]
lower_idx
=
0
for
i
in
xrange
(
self
.
len_splits
):
upper_idx
=
lower_idx
+
splits
[
i
]
general_key
[
axis
]
=
slice
(
lower_idx
,
upper_idx
,
None
)
outputs
[
i
][
0
]
=
x
.
__getitem__
(
general_key
)
.
copy
()
lower_idx
=
upper_idx
def
grad
(
self
,
(
x
,
axis
,
splits
),
g_outputs
):
"""Join the gradients along the axis that was used to split x."""
return
[
join
(
axis
,
*
g_outputs
),
None
,
None
]
class
Join
(
Op
):
"""
Concatenate two L{Tensor}s along the given axis.
These L{Tensor}s must have the same shape along all dimensions other than
this axis.
Concatenate two `TensorResult`s along some axis.
These tensors must have the same shape along all dimensions other than this axis.
Of course, TensorResult instances don't have a shape, so this error can't be caught until
runtime. See `perform()`.
.. python::
x, y, z = tensor.matrix(), tensor.matrix(), tensor.matrix()
u = tensor.vector()
r = join(0, x, y, z)
c = join(1, x, y, z)
join(2, x, y, z) # WRONG: the axis has to be an index into the shape
join(0, x, y) # WRONG: tensors have to have the same rank to be joined
"""
def
make_node
(
self
,
*
axis_and_tensors
):
"""
WRITEME
"""
axis
,
tensors
=
axis_and_tensors
[
0
],
axis_and_tensors
[
1
:]
as_tensor_args
=
[
as_tensor
(
x
)
for
x
in
tensors
]
dtypes
=
[
x
.
type
.
dtype
for
x
in
as_tensor_args
]
...
...
@@ -1364,17 +1452,36 @@ class Concatenate(Op):
bcastable
[:]
=
as_tensor_args
[
0
]
.
type
.
broadcastable
bcastable
[
axis
]
=
False
inputs
=
[
scal
.
as_scalar
(
axis
)]
+
as_tensor_args
inputs
=
[
as_tensor
(
axis
)]
+
as_tensor_args
if
inputs
[
0
]
.
type
!=
lscalar
:
raise
TypeError
(
'Axis could not be cast to lscalar'
,
axis
)
outputs
=
[
tensor
(
dtype
=
dtypes
[
0
],
broadcastable
=
bcastable
)]
return
Apply
(
self
,
inputs
,
outputs
)
def
perform
(
self
,
node
,
axis_and_tensors
,
(
out
,
)):
"""
WRITEME
"""
axis
,
tensors
=
axis_and_tensors
[
0
],
axis_and_tensors
[
1
:]
out
[
0
]
=
numpy
.
concatenate
(
tensors
,
axis
=
axis
)
def
grad
(
self
,
axis_and_tensors
,
(
gz
,)):
""" The gradient wrt a join op is a `Split`, used to partition the gradient along the
`axis` which was used for joining.
"""
axis
,
tensors
=
axis_and_tensors
[
0
],
axis_and_tensors
[
1
:]
if
'float'
in
tensors
[
0
]
.
dtype
or
'complex'
in
tensors
[
0
]
.
dtype
:
# assume that this isn't differentiable
split
=
Split
(
len
(
tensors
))
return
[
None
]
+
split
(
gz
,
axis
,
stack
(
*
[
shape
(
x
)[
axis
]
for
x
in
tensors
]))
else
:
# assume that this isn't differentiable
return
[
None
]
*
(
1
+
len
(
tensors
))
def
_native_grad
(
self
,
axis_and_tensors
,
(
gz
,)):
"""WRITEME"""
axis
,
tensors
=
axis_and_tensors
[
0
],
axis_and_tensors
[
1
:]
n_dims
=
len
(
shape
(
tensors
[
0
]))
sizes_along_axis
=
[
shape
(
x
)[
axis
]
for
x
in
tensors
]
...
...
@@ -1387,91 +1494,172 @@ class Concatenate(Op):
[
slice
(
None
)]
*
(
n_dims
-
axis
-
1
)]
\
for
k
in
range
(
len
(
sizes_along_axis
))]
def
get_vector_length
(
v
):
if
isinstance
(
v
,
gof
.
Constant
)
and
v
.
type
.
ndim
==
1
:
return
len
(
v
.
data
)
elif
v
.
owner
and
isinstance
(
v
.
owner
.
op
,
MakeVector
):
return
len
(
v
.
owner
.
inputs
)
elif
v
.
owner
and
v
.
owner
.
op
==
shape
:
return
v
.
owner
.
inputs
[
0
]
.
type
.
ndim
else
:
return
None
def
concatenate
(
tensors
,
axis
=
0
):
def
vec_length
(
self
,
node
):
assert
isinstance
(
node
.
owner
.
op
,
Join
)
if
node
.
ndim
!=
1
:
raise
TypeError
(
'argument must be symbolic vector'
)
inputs
=
node
.
owner
.
inputs
axis
,
tensors
=
inputs
[
0
],
inputs
[
1
]
# if v is a vector, axis must be 0
# the question is whether all the inputs are broadcastable.
if
all
(
i
.
broadcastable
[
0
]
for
i
in
tensors
):
return
len
(
tensors
)
@_redefine_asRoutine
(
Join
())
def
join
(
axis
,
*
tensors
):
"""
Convenience function to concatenate `Tensor`s along the given axis.
The `axis` parameter may either be an integer or an object that can be
converted to a scalar using `as_scalar`(`axis`). In the former case,
the axis is fixed at construction, while in the latter it may vary over
time depending on the value of the `axis` variable.
:Parameters:
- `tensors` : list of tensors (or list-like)
A list of tensors to be concatenated along the given axis.
- `axis` : int (symbolic or literal)
On which dimension should the tensors be joined? The `axis` must be a valid index into
the shape of the tensors to be concatenated.
The `axis` parameter may either be an integer or an object that can be converted to a
scalar using `as_scalar`(`axis`). In the former case, the axis is fixed at construction,
while in the latter it may vary over time depending on the value of the `axis` variable.
The shapes of the tensors to be concatenated must be all identical, except in the dimension
(`axis`) on which they are to be joined.
"""
@constructor
def
leftpad_shape
(
tensor
,
n_ones
):
"""Reshape `tensor` by left-padding the shape with `n_ones` 1s"""
pattern
=
[
'x'
]
*
n_ones
+
[
i
for
i
in
range
(
tensor
.
type
.
ndim
)]
return
DimShuffle
(
tensor
.
broadcastable
,
pattern
)(
tensor
)
@constructor
def
stack
(
*
tensors
):
"""Insert the arguments as slices into a tensor of 1 rank greater.
EXAMPLE
"""
return
join
(
0
,
*
[
leftpad_shape
(
t
,
1
)
for
t
in
tensors
])
@constructor
def
concatenate
(
tensor_list
,
axis
=
0
):
"""Alias for `join`(axis, *tensor_list).
This function is similar to `join`, but uses the signature of numpy's concatenate function.
This function
:Exceptions:
- `TypeError` : the tensor_list must be a tuple or list
"""
# Check someone did not make the common mistake to do something like:
# c = concatenate(x, y)
# instead of
# c = concatenate((x, y))
if
not
isinstance
(
tensor
s
,
(
tuple
,
list
)):
if
not
isinstance
(
tensor
_list
,
(
tuple
,
list
)):
raise
TypeError
(
"The 'tensors' argument must be either a tuple "
"or a list, make sure you did not forget () or [] around "
"arguments of concatenate."
,
tensors
)
# Ensure we only create one instance of 'Concatenate', to simplify the
# merging job.
if
not
hasattr
(
concatenate
,
'obj'
):
concatenate
.
obj
=
Concatenate
()
return
concatenate
.
obj
(
axis
,
*
tensors
)
return
join
(
axis
,
*
tensor_list
)
class
VerticalStack
(
Op
):
"""
Vertically stack two L{Tensor}s.
Stack two L{Tensor}s along the first axis (row wise). These
L{Tensor}s must have the same shape along all dimensions but the
first.
def
get_vector_length
(
v
):
"""Return the run-time length of a symbolic vector.
:Parameters:
- `v` : A rank-1 Tensor result.
:Exceptions:
- `TypeError` : `v` hasn't the proper type.
- `ValueError` : No special case applies, the length is not known.
In general this is not possible, but for a number of special cases the length can be
determined at compile / graph-construction time. This function implements these special
cases.
@attention: Because we use vstack as the implementation, if the
inputs have 1-dimension, the output will have 2-dimensions.
"""
def
make_node
(
self
,
x
,
y
):
x
=
as_tensor
(
x
)
y
=
as_tensor
(
y
)
assert
x
.
type
.
dtype
==
y
.
type
.
dtype
if
x
.
type
.
broadcastable
[
1
:]
!=
y
.
type
.
broadcastable
[
1
:]:
raise
NotImplementedError
inputs
=
[
x
,
y
]
bcastable
=
(
False
,
)
+
x
.
type
.
broadcastable
[
1
:]
outputs
=
[
tensor
(
dtype
=
x
.
type
.
dtype
,
broadcastable
=
bcastable
)]
return
Apply
(
self
,
inputs
,
outputs
)
def
perform
(
self
,
node
,
(
x
,
y
),
(
out
,
)):
assert
x
.
ndim
==
y
.
ndim
# Make sure every dimension (save the first) is the same
for
i
in
range
(
x
.
ndim
):
assert
i
==
0
or
x
.
shape
[
i
]
==
y
.
shape
[
i
]
out
[
0
]
=
numpy
.
vstack
([
x
,
y
])
def
grad
(
self
,
(
x
,
y
),
(
gz
,)):
if
v
.
ndim
!=
1
:
raise
TypeError
(
'argument must be symbolic vector'
)
if
isinstance
(
v
,
gof
.
Constant
)
and
v
.
type
.
ndim
==
1
:
return
len
(
v
.
data
)
if
v
.
owner
and
isinstance
(
v
.
owner
.
op
,
join
):
try
:
return
join
.
vec_length
(
v
)
except
:
pass
if
v
.
owner
and
v
.
owner
.
op
==
shape
:
return
v
.
owner
.
inputs
[
0
]
.
type
.
ndim
raise
ValueError
(
"length not known"
)
if
0
:
#vertical and horizontal stacking are deprecated. Better to use stack() and join().
class
VerticalStack
(
Op
):
"""
@todo: Make VSplit (or this grad implementation) its own L{Op},
that way we can do more sanity-checking::
Vertically stack two L{Tensor}s.
Stack two L{Tensor}s along the first axis (row wise). These
L{Tensor}s must have the same shape along all dimensions but the
first.
@attention: Because we use vstack as the implementation, if the
inputs have 1-dimension, the output will have 2-dimensions.
"""
def
make_node
(
self
,
x
,
y
):
x
=
as_tensor
(
x
)
y
=
as_tensor
(
y
)
assert
x
.
type
.
dtype
==
y
.
type
.
dtype
if
x
.
type
.
broadcastable
[
1
:]
!=
y
.
type
.
broadcastable
[
1
:]:
raise
NotImplementedError
inputs
=
[
x
,
y
]
bcastable
=
(
False
,
)
+
x
.
type
.
broadcastable
[
1
:]
outputs
=
[
tensor
(
dtype
=
x
.
type
.
dtype
,
broadcastable
=
bcastable
)]
return
Apply
(
self
,
inputs
,
outputs
)
def
perform
(
self
,
node
,
(
x
,
y
),
(
out
,
)):
assert
x
.
ndim
==
y
.
ndim
# Make sure every dimension (save the first) is the same
for i in range(x.data.ndim): assert i == 0 or x.data.shape[i] == y.shape[i]
etc...
for
i
in
range
(
x
.
ndim
):
assert
i
==
0
or
x
.
shape
[
i
]
==
y
.
shape
[
i
]
out
[
0
]
=
numpy
.
vstack
([
x
,
y
])
def
grad
(
self
,
(
x
,
y
),
(
gz
,)):
"""
@todo: Make VSplit (or this grad implementation) its own L{Op},
that way we can do more sanity-checking::
assert x.ndim == y.ndim
# Make sure every dimension (save the first) is the same
for i in range(x.data.ndim): assert i == 0 or x.data.shape[i] == y.shape[i]
etc...
"""
xs
=
shape
(
x
)
ys
=
shape
(
y
)
return
gz
[:
xs
[
0
]],
gz
[
xs
[
0
]:]
vertical_stack
=
VerticalStack
()
def
horizontal_stack
(
x
,
y
):
"""
xs
=
shape
(
x
)
ys
=
shape
(
y
)
return
gz
[:
xs
[
0
]],
gz
[
xs
[
0
]:]
vertical_stack
=
VerticalStack
()
Horizontally stack two L{Tensor}s.
Stack two L{Tensor}s along the second axis (column wise). These
L{Tensor}s must have the same shape along all dimensions but the
second.
def
horizontal_stack
(
x
,
y
):
"""
Horizontally stack two L{Tensor}s.
Stack two L{Tensor}s along the second axis (column wise). These
L{Tensor}s must have the same shape along all dimensions but the
second.
@note: Unlike VerticalStack, we assume that the L{Tensor}s have
two dimensions.
"""
assert
x
.
type
.
ndim
==
2
assert
y
.
type
.
ndim
==
2
return
transpose
(
vertical_stack
(
x
.
T
,
y
.
T
))
class
MakeVector
(
Op
):
"""WRITEME"""
def
__init__
(
self
,
stype
):
self
.
stype
=
stype
def
make_node
(
self
,
*
inputs
):
inputs
=
map
(
as_tensor
,
inputs
)
assert
all
(
a
.
type
==
self
.
stype
for
a
in
inputs
)
return
Apply
(
self
,
inputs
,
[
Tensor
(
broadcastable
=
(
False
,),
dtype
=
self
.
stype
.
dtype
)()])
def
perform
(
self
,
node
,
inputs
,
(
out
,)):
out
[
0
]
=
numpy
.
asarray
(
inputs
)
def
grad
(
self
,
inputs
,
(
gout
,)):
return
[
None
]
*
len
(
inputs
)
make_lvector
=
MakeVector
(
lscalar
)
"""WRITEME"""
@note: Unlike VerticalStack, we assume that the L{Tensor}s have
two dimensions.
"""
assert
x
.
type
.
ndim
==
2
assert
y
.
type
.
ndim
==
2
return
transpose
(
vertical_stack
(
x
.
T
,
y
.
T
))
else
:
pass
#########################
...
...
@@ -1842,3 +2030,146 @@ def grad(cost, wrt, g_cost=None):
else
:
return
gmap
.
get
(
wrt
,
zero
(
wrt
))
class
numeric_grad
:
"""WRITEME"""
def
__init__
(
self
,
f
,
pt
,
eps
=
1.0e-7
):
"""Return the gradient of f at pt.
This function computes the gradient by a one-sided finite differences of a
fixed step size (eps).
It is assumed that f(...) will return a scalar.
It is assumed that all f's inputs are numpy.ndarray objects.
"""
def
prod
(
inputs
):
rval
=
1
for
i
in
inputs
:
rval
*=
i
return
rval
packed_pt
=
False
if
not
isinstance
(
pt
,
(
list
,
tuple
)):
pt
=
[
pt
]
packed_pt
=
True
apt
=
[
numpy
.
array
(
p
)
for
p
in
pt
]
shapes
=
[
p
.
shape
for
p
in
apt
]
dtypes
=
[
str
(
p
.
dtype
)
for
p
in
apt
]
if
not
dtypes
==
[
dtypes
[
0
]]
*
len
(
apt
):
raise
TypeError
(
'All function arguments must have same dtype'
)
total_size
=
__builtin__
.
sum
(
prod
(
sh
)
for
sh
in
shapes
)
#create un-initialized memory
x
=
numpy
.
ndarray
((
total_size
,),
dtype
=
dtypes
[
0
])
gx
=
numpy
.
ndarray
((
total_size
,),
dtype
=
dtypes
[
0
])
#set up aliases so that apt[i] is backed by memory in x
# and self.gf is backed by memory in gx
cur_pos
=
0
self
.
gf
=
[]
for
i
,
p
in
enumerate
(
apt
):
p_size
=
prod
(
p
.
shape
)
# set up alias
apt
[
i
]
=
x
[
cur_pos
:
cur_pos
+
p_size
]
.
reshape
(
p
.
shape
)
self
.
gf
.
append
(
gx
[
cur_pos
:
cur_pos
+
p_size
]
.
reshape
(
p
.
shape
))
# initialize with p's value
apt
[
i
][:]
=
p
cur_pos
+=
p_size
f_x
=
f
(
*
[
p
.
copy
()
for
p
in
apt
])
# now iterate over the elements of x, and call f on apt.
x_copy
=
x
.
copy
()
for
i
in
xrange
(
total_size
):
x
[:]
=
x_copy
x
[
i
]
+=
eps
f_eps
=
f
(
*
apt
)
gx
[
i
]
=
numpy
.
asarray
((
f_eps
-
f_x
)
/
eps
)
if
packed_pt
:
self
.
gf
=
self
.
gf
[
0
]
@staticmethod
def
abs_rel_err
(
a
,
b
,
eps
=
1.0e-10
):
"""Return a small number when a and b are close, relative to how big they are"""
return
abs
(
a
-
b
)
/
(
abs
(
a
)
+
abs
(
b
)
+
eps
)
def
max_err
(
self
,
g_pt
):
"""Return the biggest relative error between g_pt and self.gf"""
assert
len
(
g_pt
)
==
len
(
self
.
gf
)
errs
=
[]
for
a
,
b
in
zip
(
g_pt
,
self
.
gf
):
errs
.
append
(
numpy
.
max
(
numeric_grad
.
abs_rel_err
(
a
,
b
)))
return
numpy
.
max
(
errs
)
def
verify_grad
(
testcase
,
op
,
pt
,
n_tests
=
1
,
rng
=
numpy
.
random
,
eps
=
1.0e-7
,
tol
=
0.0001
,
linker
=
'c&py'
):
""" WRITEME
testcase.failUnless(analytic gradient matches finite-diff gradient)
"""
pt
=
[
numpy
.
array
(
p
)
for
p
in
pt
]
#print "PT", pt
def
function
(
inputs
,
output
):
return
compile
.
function
(
inputs
,
output
,
mode
=
compile
.
Mode
(
optimizer
=
None
,
linker
=
linker
),
accept_inplace
=
True
)
for
test_num
in
xrange
(
n_tests
):
tensor_pt
=
[
value
(
p
.
copy
(),
name
=
'input
%
i'
%
i
)
for
i
,
p
in
enumerate
(
pt
)]
#op can be either a function or an actual Op instance
#print "OP", op
#print "TENSOR PT", tensor_pt
o_output
=
op
(
*
tensor_pt
)
if
isinstance
(
o_output
,
list
)
>
1
:
raise
NotImplementedError
(
'cant (yet) autotest gradient of op with multiple outputs'
)
# we could make loop over outputs making random projections R for each,
# but this doesn't handle the case where not all the outputs are
# differentiable... so I leave this as TODO for now -JB.
o_fn
=
function
(
tensor_pt
,
o_output
)
#print "PT B", pt
o_fn_out
=
o_fn
(
*
[
p
.
copy
()
for
p
in
pt
])
#print "PT C", pt
random_projection
=
rng
.
rand
(
*
o_fn_out
.
shape
)
t_r
=
as_tensor
(
random_projection
)
#random projection of o onto t_r
cost
=
sum
(
t_r
*
o_output
)
cost_fn
=
function
(
tensor_pt
,
cost
)
num_grad
=
numeric_grad
(
cost_fn
,
[
p
.
copy
()
for
p
in
pt
],
eps
)
symbolic_grad
=
grad
(
cost
,
tensor_pt
,
as_tensor
(
1.0
,
name
=
'g_cost'
))
if
0
:
print
'----------'
for
op
in
gof
.
graph
.
io_toposort
(
tensor_pt
,
symbolic_grad
):
print
op
grad_fn
=
function
(
tensor_pt
,
symbolic_grad
)
#print "PT D", pt
analytic_grad
=
grad_fn
(
*
pt
)
#print "PT Z", pt
if
not
isinstance
(
analytic_grad
,
(
list
,
tuple
)):
analytic_grad
=
[
analytic_grad
]
max_err
=
num_grad
.
max_err
(
analytic_grad
)
if
max_err
>
tol
:
#print 'analytic grad', analytic_grad
#print 'numeric grad', num_grad.gf
raise
Exception
(
verify_grad
.
E_grad
,
(
max_err
,
tol
))
verify_grad
.
E_grad
=
'gradient error exceeded tolerance'
"""This error is raised when a gradient is calculated, but incorrect."""
tensor_opt.py
浏览文件 @
e2202c3f
...
...
@@ -8,6 +8,7 @@ import numpy as N
import
operator
import
itertools
import
sys
import
compile
#to register the optimizer built by this file
# Utilities
...
...
@@ -32,9 +33,10 @@ gemm_pattern_1 = gof.PatternSub((T._sub_inplace,
# gemm: (d,a,b,c,s) -> d = d*s + a*dot(b,c)
# Transforms dot(a, b) into gemm(zeros(2)(hstack(shape(a)[:1], shape(b)[1:])), 1.0, a, b, 1.0)
# The construction of the 'gemm' node may fail if, for example, a and b are not both matrices.
dot_to_gemm
=
gof
.
PatternSub
((
T
.
dot
,
'a'
,
'b'
),
(
T
.
gemm
,
(
T
.
Zeros
(
2
),
(
T
.
vertical_
stack
,
(
T
.
stack
,
(
T
.
Subtensor
([
slice
(
0
,
1
)]),
(
T
.
shape
,
'a'
)),
(
T
.
Subtensor
([
slice
(
1
,
2
)]),
(
T
.
shape
,
'b'
)))),
T
.
constant
(
1.0
),
'a'
,
'b'
,
T
.
constant
(
1.0
)),
...
...
@@ -231,7 +233,15 @@ def local_subtensor_make_vector(node):
If the index or slice is constant.
"""
if
not
opt
.
check_chain
(
node
,
T
.
Subtensor
,
T
.
MakeVector
):
if
not
opt
.
check_chain
(
node
,
T
.
Subtensor
,
T
.
Join
):
return
False
joined_r
=
node
.
inputs
[
0
]
try
:
#check that join is being used to join scalars
veclen
=
T
.
join
.
vec_length
(
joined_r
)
except
:
return
False
idxlist
=
node
.
op
.
idx_list
...
...
@@ -644,6 +654,16 @@ def _math_optimizer():
math_optimizer
=
_math_optimizer
()
compile
.
register_optimizer
(
'math'
,
gof
.
MergeOptMerge
(
gof
.
PureThenInplaceOptimizer
(
math_optimizer
,
inplace_optimizer
)))
compile
.
register_mode
(
'SANITY_CHECK'
,
compile
.
Mode
(
'c&py'
,
'math'
))
compile
.
register_mode
(
'FAST_RUN'
,
compile
.
Mode
(
'c|py'
,
'math'
))
compile
.
register_mode
(
'EXPENSIVE_OPTIMIZATIONS'
,
compile
.
Mode
(
'c|py'
,
'math'
))
# @gof.local_optimizer
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论