Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
a87e9bb0
提交
a87e9bb0
authored
10月 31, 2011
作者:
goodfeli
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #176 from jaberg/Composite_fixes
Composite fixes
上级
d63ae441
2b95f6a2
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
12 个修改的文件
包含
211 行增加
和
80 行删除
+211
-80
function_module.py
theano/compile/function_module.py
+3
-0
cc.py
theano/gof/cc.py
+7
-2
cmodule.py
theano/gof/cmodule.py
+4
-3
op.py
theano/gof/op.py
+9
-1
elemwise.py
theano/sandbox/cuda/elemwise.py
+13
-11
opt.py
theano/sandbox/cuda/opt.py
+12
-4
basic.py
theano/scalar/basic.py
+0
-0
test_basic.py
theano/scalar/tests/test_basic.py
+4
-0
basic.py
theano/tensor/basic.py
+9
-20
elemwise.py
theano/tensor/elemwise.py
+8
-7
opt.py
theano/tensor/opt.py
+68
-30
test_opt.py
theano/tensor/tests/test_opt.py
+74
-2
没有找到文件。
theano/compile/function_module.py
浏览文件 @
a87e9bb0
...
...
@@ -994,6 +994,8 @@ class FunctionMaker(object):
try
:
theano
.
config
.
compute_test_value
=
"off"
start_optimizer
=
time
.
time
()
add_stack_trace_on_call
=
gof
.
Op
.
add_stack_trace_on_call
gof
.
Op
.
add_stack_trace_on_call
=
False
optimizer
(
env
)
end_optimizer
=
time
.
time
()
...
...
@@ -1007,6 +1009,7 @@ class FunctionMaker(object):
insert_deepcopy
(
env
,
inputs
,
outputs
+
additional_outputs
)
finally
:
theano
.
config
.
compute_test_value
=
compute_test_value_orig
gof
.
Op
.
add_stack_trace_on_call
=
add_stack_trace_on_call
# initialize the linker
if
not
hasattr
(
linker
,
'accept'
):
...
...
theano/gof/cc.py
浏览文件 @
a87e9bb0
...
...
@@ -853,7 +853,8 @@ class CLinker(link.Linker):
libraries
=
self
.
libraries
()
)
@staticmethod
def
cmodule_key_
(
env
,
no_recycling
,
compile_args
=
[],
libraries
=
[]):
def
cmodule_key_
(
env
,
no_recycling
,
compile_args
=
[],
libraries
=
[],
insert_config_md5
=
True
):
"""
Do the actual computation of cmodule_key in a static method
to allow it to be reused in scalar.Composite.__eq__
...
...
@@ -871,11 +872,15 @@ class CLinker(link.Linker):
sig
=
[
'CLinker.cmodule_key'
]
# will be cast to tuple on return
if
compile_args
is
not
None
:
sig
.
append
(
tuple
(
compile_args
))
if
libraries
is
not
None
:
sig
.
append
(
tuple
(
libraries
))
# IMPORTANT: The 'md5' prefix is used to isolate the compilation
# parameters from the rest of the key. If you want to add more key
# elements, they should be before this md5 hash if and only if they
# can lead to a different compiled file with the same source code.
sig
.
append
(
'md5:'
+
theano
.
configparser
.
get_config_md5
())
if
insert_config_md5
:
sig
.
append
(
'md5:'
+
theano
.
configparser
.
get_config_md5
())
else
:
sig
.
append
(
'md5: <omitted>'
)
# technically this should only be appended for gcc-compiled Ops
# and the flags of other compilers should be inserted here... but it's not clear how to
...
...
theano/gof/cmodule.py
浏览文件 @
a87e9bb0
...
...
@@ -955,9 +955,10 @@ class ModuleCache(object):
if
found
==
0
:
msg
=
'Key not found in unpickled KeyData file'
if
key_data
.
keys
:
# This is only to make debugging in pdb easier, by providing
# the offending key in the local context.
other_key
=
key_data
.
keys
.
__iter__
()
.
next
()
# This is to make debugging in pdb easier, by providing
# the offending keys in the local context.
key_data_keys
=
list
(
key_data
.
keys
)
## import pdb; pdb.set_trace()
elif
found
>
1
:
msg
=
'Multiple equal keys found in unpickled KeyData file'
if
msg
:
...
...
theano/gof/op.py
浏览文件 @
a87e9bb0
...
...
@@ -310,6 +310,13 @@ class PureOp(object):
"""
add_stack_trace_on_call
=
True
"""This class variable governs whether __call__ adds a stack trace to the node it creates.
The tag trace is meant to connect a node to the line a user typed. It is nice for
debugging. It does not make as much sense during optimizations to store this information.
"""
#############
# make_node #
#############
...
...
@@ -367,7 +374,8 @@ class PureOp(object):
"""
node
=
self
.
make_node
(
*
inputs
,
**
kwargs
)
self
.
add_tag_trace
(
node
)
if
self
.
add_stack_trace_on_call
:
self
.
add_tag_trace
(
node
)
if
config
.
compute_test_value
!=
'off'
:
run_perform
=
True
...
...
theano/sandbox/cuda/elemwise.py
浏览文件 @
a87e9bb0
...
...
@@ -38,6 +38,10 @@ class NaiveAlgo(object):
:param scalar_op: the scalar operation to execute on each element.
:param sync: if True, will wait after the kernel launch and check for error call.
"""
if
scalar_op
.
c_support_code_apply
(
node
=
None
,
nodename
=
"nodename"
):
raise
ValueError
((
'It is currently not possible to auto-generate'
' a GPU implementation for an elementwise Op with support'
' code'
),
scalar_op
)
self
.
scalar_op
=
scalar_op
self
.
sync
=
sync
self
.
inplace_pattern
=
inplace_pattern
...
...
@@ -799,12 +803,15 @@ nd_collapse_[i]=0;
def
c_support_code_apply
(
self
,
node
,
nodename
):
nd
=
node
.
outputs
[
0
]
.
type
.
ndim
return
""
.
join
(
[
self
.
c_src_kernel
(
node
,
nodename
,
x
)
for
x
in
xrange
(
1
,
nd
+
1
)]
+
[
self
.
c_src_kernel_Ccontiguous
(
node
,
nodename
),
self
.
c_src_callkernel
(
node
,
nodename
),
])
defines
=
"""
#define INTDIV_POW2(a, b) (a >> b)
#define INTMOD_POW2(a, b) (a & ((1<<b)-1))
"""
kernels
=
""
.
join
(
[
self
.
c_src_kernel
(
node
,
nodename
,
x
)
for
x
in
xrange
(
1
,
nd
+
1
)]
+
[
self
.
c_src_kernel_Ccontiguous
(
node
,
nodename
)],
+
[
self
.
c_src_callkernel
(
node
,
nodename
)])
return
defines
+
kernels
def
c_code
(
self
,
node
,
nodename
,
inputs
,
outputs
,
sub
):
d
=
dict
(
sub
)
...
...
@@ -951,8 +958,3 @@ nd_collapse_[i]=0;
#print sio.getvalue()
return
sio
.
getvalue
()
def
c_support_code
(
self
):
return
"""
#define INTDIV_POW2(a, b) (a >> b)
#define INTMOD_POW2(a, b) (a & ((1<<b)-1))
"""
theano/sandbox/cuda/opt.py
浏览文件 @
a87e9bb0
...
...
@@ -37,13 +37,13 @@ gpu_seqopt.register('gpu_cut_transfers', gpu_cut_copies, 2,
optdb
.
register
(
'gpu_opt'
,
gpu_seqopt
,
optdb
.
__position__
.
get
(
'add_destroy_handler'
,
49.5
)
-
1
,
'gpu'
)
'gpu'
,
'fast_run'
)
# This second pass is needed as the fusion can put all the non float32 code
# inside the elemwise. When it there is no float64 op, this is working.
optdb
.
register
(
'gpu_after_fusion'
,
ProxyDB
(
gpu_seqopt
),
optdb
.
__position__
.
get
(
'elemwise_fusion'
,
71
)
+
.
1
,
'gpu'
)
'gpu'
,
'fast_run'
)
def
register_opt
(
*
tags
,
**
kwargs
):
def
f
(
local_opt
):
...
...
@@ -144,7 +144,11 @@ def local_gpu_elemwise_0(node):
if
numpy
.
all
([
o
.
type
.
dtype
==
'float32'
for
o
in
node
.
outputs
]):
# Don't set any inplace pattern.
# gpu_inplace_elemwise_optimizer will do it later
new_op
=
GpuElemwise
(
node
.
op
.
scalar_op
)
try
:
new_op
=
GpuElemwise
(
node
.
op
.
scalar_op
)
except
ValueError
:
# This happens when scalar_op requires support code
return
False
# first establish that float32 can store all inputs
upcastable
=
set
([
'float32'
,
'int8'
,
'int16'
,
'uint8'
,
'uint16'
])
...
...
@@ -188,7 +192,11 @@ def local_gpu_elemwise_1(node):
elemwise_node
=
host_i
.
owner
# Don't set any inplace pattern.
# gpu_inplace_elemwise_optimizer will do it later
new_op
=
GpuElemwise
(
elemwise_node
.
op
.
scalar_op
)
try
:
new_op
=
GpuElemwise
(
elemwise_node
.
op
.
scalar_op
)
except
ValueError
:
# This happens when scalar_op requires support code
return
False
if
all
([
i
.
dtype
==
'float32'
for
i
in
elemwise_node
.
inputs
]):
gpu_elemwise
=
new_op
(
*
[
gpu_from_host
(
i
)
for
i
in
elemwise_node
.
inputs
])
gpu_elemwise
=
split_huge_add_or_mul
(
gpu_elemwise
.
owner
)
...
...
theano/scalar/basic.py
浏览文件 @
a87e9bb0
差异被折叠。
点击展开。
theano/scalar/tests/test_basic.py
浏览文件 @
a87e9bb0
...
...
@@ -208,5 +208,9 @@ class test_div(unittest.TestCase):
assert
isinstance
((
a
/
c
)
.
owner
.
op
,
TrueDiv
)
# Testing of Composite is done in tensor/tests/test_opt.py
# in test_fusion, TestCompositeCodegen
if
__name__
==
'__main__'
:
unittest
.
main
()
theano/tensor/basic.py
浏览文件 @
a87e9bb0
...
...
@@ -1081,6 +1081,7 @@ def tensor4(name=None, dtype=None):
tensor4s
,
ftensor4s
,
dtensor4s
,
itensor4s
,
ltensor4s
=
_multi
(
tensor4
,
ftensor4
,
dtensor4
,
itensor4
,
ltensor4
)
class
_tensor_py_operators
:
#UNARY
def
__abs__
(
self
):
return
abs_
(
self
)
...
...
@@ -1370,10 +1371,14 @@ class _tensor_py_operators:
def
get_constant_value
(
self
):
return
get_constant_value
(
self
)
class
TensorVariable
(
_tensor_py_operators
,
Variable
):
"""Subclass to add the tensor operators to the basic `Variable` class."""
TensorType
.
Variable
=
TensorVariable
class
TensorConstantSignature
(
tuple
):
"""A Signature object for comparing TensorConstant instances
...
...
@@ -1497,7 +1502,8 @@ class TensorValue(_tensor_py_operators, Value):
Tensor
=
TensorType
#QUESTION: why are we doing this!?
# This bizarre push-import avoids a circular dependency.
elemwise
.
as_tensor_variable
=
as_tensor_variable
elemwise
.
TensorType
=
TensorType
elemwise
.
TensorVariable
=
TensorVariable
...
...
@@ -1505,29 +1511,10 @@ elemwise.TensorConstant = TensorConstant
elemwise
.
TensorValue
=
TensorValue
#########################
# Utilities
#########################
def
_elemwise
(
scalar_op
,
name
,
doc_prefix
=
''
):
straight
=
elemwise
.
Elemwise
(
scalar_op
,
name
=
name
)
inplace_scalar_op
=
scalar_op
.
__class__
(
scal
.
transfer_type
(
0
))
inplace
=
elemwise
.
Elemwise
(
inplace_scalar_op
,
{
0
:
0
},
name
=
name
+
"_inplace"
)
# don't add the inplace versions, they aren't supposed to be part of the user interface
_constructor_list
.
append
(
straight
)
# This is here so that gen_oplist can detect which module declared these variables.
straight
.
__module__
=
'tensor'
inplace
.
__module__
=
'tensor'
if
doc_prefix
:
straight
.
__doc__
=
doc_prefix
+
'
\n
'
+
straight
.
__doc__
return
straight
,
inplace
def
_redefine
(
real_symbol_value
,
module
=
'tensor'
):
"""Replace the value associated with a function symbol.
...
...
@@ -1538,12 +1525,14 @@ def _redefine(real_symbol_value, module='tensor'):
return
real_symbol_value
return
decorator
def
_redefine_asRoutine
(
real_symbol_value
):
real_symbol_value
.
__epydoc_asRoutine
=
True
def
decorator
(
f
):
return
real_symbol_value
return
decorator
def
_scal_elemwise_with_nfunc
(
nfunc
,
nin
,
nout
):
"""
Replace a symbol definition with an elementwise version of the
...
...
theano/tensor/elemwise.py
浏览文件 @
a87e9bb0
...
...
@@ -793,7 +793,7 @@ class Elemwise(Op):
rval
.
append
(
tuple
(
oshp
))
return
rval
def
_c_all
(
self
,
node
,
name
,
inames
,
onames
,
sub
):
def
_c_all
(
self
,
node
,
n
oden
ame
,
inames
,
onames
,
sub
):
_inames
=
inames
_onames
=
onames
...
...
@@ -901,7 +901,7 @@ class Elemwise(Op):
Apply
(
self
.
scalar_op
,
[
Scalar
(
dtype
=
input
.
type
.
dtype
)()
for
input
in
node
.
inputs
],
[
Scalar
(
dtype
=
output
.
type
.
dtype
)()
for
output
in
node
.
outputs
]),
name
+
'_scalar_'
,
n
oden
ame
+
'_scalar_'
,
[
"
%
s_i"
%
s
for
s
in
_inames
],
[
"
%
s_i"
%
s
for
s
in
onames
],
sub
)
...
...
@@ -922,19 +922,20 @@ class Elemwise(Op):
sub
=
sub
)
return
decl
,
checks
,
alloc
,
loop
def
c_code
(
self
,
node
,
name
,
inames
,
onames
,
sub
):
code
=
"
\n
"
.
join
(
self
.
_c_all
(
node
,
name
,
inames
,
onames
,
sub
))
def
c_code
(
self
,
node
,
n
oden
ame
,
inames
,
onames
,
sub
):
code
=
"
\n
"
.
join
(
self
.
_c_all
(
node
,
n
oden
ame
,
inames
,
onames
,
sub
))
return
code
def
c_headers
(
self
):
return
[
'<vector>'
,
'<algorithm>'
]
def
c_support_code
(
self
):
support_code
=
self
.
scalar_op
.
c_support_code
()
def
c_support_code_apply
(
self
,
node
,
nodename
):
support_code
=
self
.
scalar_op
.
c_support_code_apply
(
node
,
nodename
+
'_scalar_'
)
return
support_code
def
c_code_cache_version_apply
(
self
,
node
):
version
=
[
5
]
# the version corresponding to the c code in this Op
version
=
[
6
]
# the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend...
scalar_node
=
Apply
(
self
.
scalar_op
,
...
...
theano/tensor/opt.py
浏览文件 @
a87e9bb0
...
...
@@ -629,18 +629,23 @@ class ShapeFeature(object):
"""
def
shape_ir
(
self
,
i
,
r
):
#TODO: Write a doc string for this method
"""Return symbolic r.shape[i] for tensor variable r, int i"""
if
hasattr
(
r
.
type
,
"broadcastable"
)
and
r
.
type
.
broadcastable
[
i
]:
return
self
.
lscalar_one
else
:
return
Shape_i
(
i
)
.
make_node
(
r
)
.
outputs
[
0
]
def
shape_tuple
(
self
,
r
):
#TODO: Write a doc string for this method
"""Return a tuple of symbolic shape vars for tensor variable r"""
return
tuple
([
self
.
shape_ir
(
i
,
r
)
for
i
in
xrange
(
r
.
ndim
)])
def
default_infer_shape
(
self
,
node
,
i_shapes
):
"""Return a list of shape tuple or None for the outputs of node.
This function is used for Ops that don't implement infer_shape.
Ops that do implement infer_shape should use the i_shapes parameter,
but this default implementation ignores it.
"""
rval
=
[]
for
r
in
node
.
outputs
:
try
:
...
...
@@ -650,16 +655,21 @@ class ShapeFeature(object):
return
rval
def
unpack
(
self
,
s_i
):
"""Return a symbolic integer scalar for the shape element s_i.
The s_i argument was produced by the infer_shape() of an Op subclass.
"""
# unpack the s_i that the Op returned
assert
s_i
is
not
None
if
s_i
==
1
:
# don't make the optimizer merge a zillion ones together
# by always returning the same object to represent 1
return
self
.
lscalar_one
if
type
(
s_i
)
in
(
int
,
long
)
or
isinstance
(
s_i
,
numpy
.
integer
):
# this shape is a constant
assert
s_i
>=
0
return
T
.
constant
(
s_i
,
dtype
=
'int64'
)
if
type
(
s_i
)
in
(
tuple
,
list
):
if
type
(
s_i
)
in
(
tuple
,
list
):
# this dimension is the same as many of the inputs
# which tells us that if one of the inputs is known,
# the others all become known.
...
...
@@ -676,11 +686,19 @@ class ShapeFeature(object):
s_i
,
type
(
s_i
),
getattr
(
s_i
,
'type'
,
None
))
def
set_shape
(
self
,
r
,
s
):
"""Assign the shape `s` to previously un-shaped variable `r`.
:type r: a variable
:type s: None or a tuple of symbolic integers
"""
assert
r
not
in
self
.
shape_of
,
'r already in shape_of'
if
s
is
None
:
self
.
shape_of
[
r
]
=
s
else
:
self
.
shape_of
[
r
]
=
tuple
([
self
.
unpack
(
s_i
)
for
s_i
in
s
])
shape_vars
=
[
self
.
unpack
(
s_i
)
for
s_i
in
s
]
self
.
shape_of
[
r
]
=
tuple
(
shape_vars
)
for
sv
in
shape_vars
:
self
.
shape_of_reverse_index
.
setdefault
(
sv
,
set
())
.
add
(
r
)
def
update_shape
(
self
,
r
,
other_r
):
'''Replace shape of r by shape of other_r.
...
...
@@ -692,16 +710,17 @@ class ShapeFeature(object):
assert
other_r
in
self
.
shape_of
,
(
'other_r not in shape_of'
,
other_r
)
other_shape
=
self
.
shape_of
[
other_r
]
# If other_shape has no information, call is pointless.
if
other_shape
is
None
:
return
if
r
in
self
.
shape_of
:
r_shape
=
self
.
shape_of
[
r
]
else
:
# If no info is known on r's shape, use other_shape
self
.
shape_of
[
r
]
=
other_shape
return
# If other_shape has no information, use r_shape
if
other_shape
is
None
:
self
.
shape_of
[
r
]
=
r_shape
for
sv
in
other_shape
:
self
.
shape_of_reverse_index
.
setdefault
(
sv
,
set
())
.
add
(
r
)
return
# Merge other_shape with r_shape, giving the priority to other_shape
...
...
@@ -711,14 +730,16 @@ class ShapeFeature(object):
# For now, we consider 2 cases of uninformative other_shape[i]:
# - Shape_i(i)(other_r);
# - Shape_i(i)(r).
if
(
ps
.
owner
and
isinstance
(
getattr
(
ps
.
owner
,
'op'
,
None
),
Shape_i
)
and
ps
.
owner
.
op
.
i
==
i
and
ps
.
owner
.
inputs
[
0
]
in
(
r
,
other_r
)):
if
(
ps
.
owner
and
isinstance
(
getattr
(
ps
.
owner
,
'op'
,
None
),
Shape_i
)
and
ps
.
owner
.
op
.
i
==
i
and
ps
.
owner
.
inputs
[
0
]
in
(
r
,
other_r
)):
merged_shape
.
append
(
r_shape
[
i
])
else
:
merged_shape
.
append
(
other_shape
[
i
])
self
.
shape_of
[
r
]
=
tuple
(
merged_shape
)
for
sv
in
self
.
shape_of
[
r
]:
self
.
shape_of_reverse_index
.
setdefault
(
sv
,
set
())
.
add
(
r
)
def
set_shape_i
(
self
,
r
,
i
,
s_i
):
'''Replace element i of shape_of[r] by s_i'''
...
...
@@ -733,14 +754,16 @@ class ShapeFeature(object):
else
:
new_shape
.
append
(
s_j
)
self
.
shape_of
[
r
]
=
tuple
(
new_shape
)
for
sv
in
self
.
shape_of
[
r
]:
self
.
shape_of_reverse_index
.
setdefault
(
sv
,
set
())
.
add
(
r
)
def
init_r
(
self
,
r
):
'''Register r's shape in the shape_of dictionary.'''
if
r
not
in
self
.
shape_of
:
try
:
self
.
set_shape
(
r
,
self
.
shape_tuple
(
r
))
except
AttributeError
:
self
.
set_shape
(
r
,
None
)
except
AttributeError
:
#XXX: where would this come from?
self
.
set_shape
(
r
,
None
)
def
make_vector_shape
(
self
,
r
):
return
make_vector
(
*
self
.
shape_of
[
r
])
...
...
@@ -757,8 +780,15 @@ class ShapeFeature(object):
self
.
lscalar_one
=
T
.
constant
(
1
,
dtype
=
'int64'
)
assert
self
.
lscalar_one
.
type
==
T
.
lscalar
self
.
shape_of
=
{}
# Variable -> tuple(scalars) or None (All tensor vars map to tuple)
self
.
scheduled
=
{}
# Variable ->
self
.
shape_of
=
{}
# Variable -> tuple(scalars) or None (All tensor vars map to tuple)
self
.
scheduled
=
{}
# Variable ->
self
.
shape_of_reverse_index
=
{}
# shape var -> graph v
for
node
in
env
.
toposort
():
self
.
on_import
(
env
,
node
)
...
...
@@ -798,9 +828,11 @@ class ShapeFeature(object):
# this is packed information
# an element of o_shapes is either None or a tuple
# elements of the tuple can be either strings, or ints
if
len
(
o_shapes
)
!=
len
(
node
.
outputs
):
raise
Exception
(
'len(o_shapes) = '
+
str
(
len
(
o_shapes
))
+
' != len(node.outputs) = '
+
str
(
len
(
node
.
outputs
)))
raise
Exception
(
'len(o_shapes) = '
+
str
(
len
(
o_shapes
))
+
' != len(node.outputs) = '
+
str
(
len
(
node
.
outputs
)))
for
r
,
s
in
zip
(
node
.
outputs
,
o_shapes
):
self
.
set_shape
(
r
,
s
)
...
...
@@ -818,23 +850,28 @@ class ShapeFeature(object):
# the shape of new_r. Say that r is *scheduled*.
# At that point, node is no longer a client of r, but of new_r
for
(
shpnode
,
idx
)
in
(
r
.
clients
+
[(
node
,
i
)]):
if
isinstance
(
getattr
(
shpnode
,
'op'
,
None
),
Shape_i
):
if
isinstance
(
getattr
(
shpnode
,
'op'
,
None
),
Shape_i
):
self
.
scheduled
[
shpnode
]
=
new_r
# In case 2, if r is a variable that we've scheduled for shape update, then we
# should cancel it.
# TODO: store some kind of reverse index?
for
k
,
v
in
self
.
scheduled
.
items
():
if
v
==
r
:
del
self
.
scheduled
[
k
]
unscheduled
=
[
k
for
k
,
v
in
self
.
scheduled
.
items
()
if
v
==
r
]
for
k
in
unscheduled
:
del
self
.
scheduled
[
k
]
# In either case, r could be in shape_of.values(), that is, r itself
# is the shape of something. In that case, we want to update
# the value in shape_of, to keep it up-to-date.
for
k
,
v
in
self
.
shape_of
.
iteritems
():
if
v
is
not
None
:
for
ii
,
vi
in
enumerate
(
v
):
if
vi
==
r
:
self
.
set_shape_i
(
k
,
ii
,
new_r
)
for
v
in
self
.
shape_of_reverse_index
.
get
(
r
,
[]):
# The reverse index is only approximate. It is not updated on
# deletion of variables, or on change_input so it might be the
# case that there are a few extra `v`'s in it that no longer have
# a shape of r or possibly have been deleted from shape_of
# entirely. The important thing is that it permits to recall
# all variables with r in their shape.
for
ii
,
svi
in
enumerate
(
self
.
shape_of
.
get
(
v
,
[])):
if
svi
==
r
:
self
.
set_shape_i
(
v
,
ii
,
new_r
)
self
.
shape_of_reverse_index
[
r
]
=
set
()
class
ShapeOptimizer
(
Optimizer
):
"""Optimizer that serves to add ShapeFeature as an env feature.
...
...
@@ -926,6 +963,7 @@ def local_track_shape_i(node):
if
node
in
shape_feature
.
scheduled
:
assert
isinstance
(
node
.
op
,
Shape_i
)
replacement
=
shape_feature
.
scheduled
[
node
]
# XXX: what the heck is up with node.op.i ???
return
[
shape_feature
.
shape_of
[
replacement
][
node
.
op
.
i
]]
@register_specialize
...
...
theano/tensor/tests/test_opt.py
浏览文件 @
a87e9bb0
...
...
@@ -767,8 +767,8 @@ class test_fusion(unittest.TestCase):
cases
=
[
(
fx
+
fy
+
fz
,(
fx
,
fy
,
fz
),(
fxv
,
fyv
,
fzv
),
1
,
fxv
+
fyv
+
fzv
,
'float32'
),
#0
(
fx
*
fy
*
fz
,(
fx
,
fy
,
fz
),(
fxv
,
fyv
,
fzv
),
1
,
fxv
*
fyv
*
fzv
,
'float32'
),
#1
(
fx
+
fy
*
fz
,(
fx
,
fy
,
fz
),(
fxv
,
fyv
,
fzv
),
1
,
fxv
+
fyv
*
fzv
,
'float32'
),
(
fx
*
fy
+
fz
,(
fx
,
fy
,
fz
),(
fxv
,
fyv
,
fzv
),
1
,
fxv
*
fyv
+
fzv
,
'float32'
),
(
fx
+
fy
*
fz
,(
fx
,
fy
,
fz
),(
fxv
,
fyv
,
fzv
),
1
,
fxv
+
fyv
*
fzv
,
'float32'
),
#2
(
fx
*
fy
+
fz
,(
fx
,
fy
,
fz
),(
fxv
,
fyv
,
fzv
),
1
,
fxv
*
fyv
+
fzv
,
'float32'
),
#3
(
fw
+
fx
+
fy
+
fz
,(
fw
,
fx
,
fy
,
fz
),(
fwv
,
fxv
,
fyv
,
fzv
),
1
,
fwv
+
fxv
+
fyv
+
fzv
,
'float32'
),
((
fw
+
fx
)
+
(
fy
+
fz
),(
fw
,
fx
,
fy
,
fz
),(
fwv
,
fxv
,
fyv
,
fzv
),
1
,
fwv
+
fxv
+
fyv
+
fzv
,
'float32'
),
#5
(((
fw
+
fx
)
+
fy
)
+
fz
,(
fw
,
fx
,
fy
,
fz
),(
fwv
,
fxv
,
fyv
,
fzv
),
1
,
fwv
+
fxv
+
fyv
+
fzv
,
'float32'
),
...
...
@@ -891,11 +891,19 @@ class test_fusion(unittest.TestCase):
t1
=
time
.
time
()
out
=
out
.
get_value
()
#print "CASE2/3", f.maker.env.toposort()
#print 'CASE2/3', f.maker.env
#print 'CASE2/3', f.maker.env.toposort()[3].op.scalar_op.env
times
[
id
]
=
t1
-
t0
atol
=
1e-8
if
out_dtype
==
'float32'
:
atol
=
1e-6
if
not
numpy
.
allclose
(
out
,
answer
*
nb_repeat
,
atol
=
atol
):
fail1
.
append
(
id
)
print
val_inputs
print
out
print
answer
*
nb_repeat
#assert 0
topo
=
f
.
maker
.
env
.
toposort
()
if
gpu
:
import
theano.sandbox.cuda
as
cuda
...
...
@@ -1109,6 +1117,70 @@ class test_fusion(unittest.TestCase):
# cases[id]=None #to remove g, that link to out that link to the ndarray!
#g.owner.inputs[0] is out... make owner a weakref?
class
TestCompositeCodegen
(
unittest
.
TestCase
):
"""
Test The Composite Ops code generation in a case where there is multiple
scalar ops with support code.
"""
def
setUp
(
self
):
class
TimesN
(
theano
.
scalar
.
basic
.
UnaryScalarOp
):
def
__init__
(
self
,
n
,
*
args
,
**
kwargs
):
self
.
n
=
n
theano
.
scalar
.
basic
.
UnaryScalarOp
.
__init__
(
self
,
*
args
,
**
kwargs
)
def
impl
(
self
,
x
):
return
x
*
self
.
n
def
c_support_code_apply
(
self
,
node
,
nodename
):
n
=
str
(
self
.
n
)
return
"""
float
%(nodename)
s_timesn(float x) { return x *
%(n)
s; }
"""
%
locals
()
def
c_code
(
self
,
node
,
name
,
(
x
,
),
(
z
,
),
sub
):
return
"
%(z)
s =
%(name)
s_timesn(
%(x)
s);"
%
locals
()
upgrade_to_float
=
theano
.
scalar
.
basic
.
upgrade_to_float
self
.
scal_times_2
=
TimesN
(
2
,
upgrade_to_float
,
name
=
'times_2'
)
self
.
times_2
=
theano
.
tensor
.
elemwise
.
Elemwise
(
self
.
scal_times_2
,
name
=
'times_2'
)
self
.
scal_times_3
=
TimesN
(
3
,
upgrade_to_float
,
name
=
'times_3'
)
self
.
times_3
=
theano
.
tensor
.
elemwise
.
Elemwise
(
self
.
scal_times_3
,
name
=
'times_3'
)
self
.
x
=
fvector
()
def
test_nested_composite
(
self
):
y
=
self
.
times_2
(
self
.
x
)
z
=
self
.
times_3
(
y
)
f
=
function
([
self
.
x
],
z
)
assert
len
(
f
.
maker
.
env
.
toposort
())
==
1
fval
=
f
([
1
,
2
,
3
])
assert
numpy
.
all
(
fval
==
[
6
,
12
,
18
])
def
test_nested_gpu
(
self
):
import
theano.sandbox.cuda
as
cuda
if
not
cuda
.
cuda_available
:
raise
SkipTest
(
"cuda not available"
)
import
theano.sandbox.cuda.opt
y
=
self
.
times_2
(
self
.
x
)
z
=
self
.
times_3
(
y
)
f
=
theano
.
function
([
self
.
x
],
cuda
.
gpu_from_host
(
z
))
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
2
assert
topo
[
1
]
.
op
==
cuda
.
gpu_from_host
# topo1 is doing the composite work on the CPU. Auto-generation of
# GPU code for ops with support code is not possible.
fval
=
numpy
.
asarray
(
f
([
1
,
2
,
3
]))
assert
numpy
.
all
(
fval
==
[
6
,
12
,
18
]),
fval
def
test_log1p
():
m
=
theano
.
config
.
mode
if
m
==
'FAST_COMPILE'
:
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论