Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
9bc05a38
提交
9bc05a38
authored
4月 10, 2017
作者:
Tim Cooijmans
提交者:
Reyhane Askari
8月 25, 2017
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
define and use with_stack_trace
上级
592e7c75
隐藏空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
110 行增加
和
61 行删除
+110
-61
opt.py
theano/gof/opt.py
+28
-0
basic_ops.py
theano/gpuarray/basic_ops.py
+8
-7
opt.py
theano/gpuarray/opt.py
+18
-12
test_opt.py
theano/gpuarray/tests/test_opt.py
+49
-39
opt_uncanonicalize.py
theano/tensor/opt_uncanonicalize.py
+7
-3
没有找到文件。
theano/gof/opt.py
浏览文件 @
9bc05a38
...
...
@@ -2948,6 +2948,34 @@ def copy_stack_trace(from_var, to_var):
to_var
.
tag
.
trace
=
getattr
(
to_var
.
tag
,
'trace'
,
[])
+
tr
def
with_stack_trace
(
from_var
,
to_var
):
"""
Copies the stack trace from one or more tensor variables to
one or more tensor variables and returns the destination variables.
Parameters
----------
from_var
Tensor variable or list of tensor variables to copy stack traces from.
to_var
Tensor variable or list of tensor variables to copy stack traces to.
Returns
-------
tensor variable or list of tensor variables
`to_var`, augmented with the stack traces from `from_var`.
Notes
-----
The stacktrace is assumed to be of the form of a list of lists
of tuples. Each tuple contains the filename, line number, function name
and so on. Each list of tuples contains the truples belonging to a
particular variable.
"""
copy_stack_trace
(
from_var
,
to_var
)
return
to_var
def
check_stack_trace
(
f_or_fgraph
,
ops_to_check
=
'last'
,
bug_print
=
'raise'
):
"""
This function checks if the outputs of specific ops of a compiled graph
...
...
theano/gpuarray/basic_ops.py
浏览文件 @
9bc05a38
...
...
@@ -15,6 +15,7 @@ from theano.tensor.basic import (
from
theano.gof
import
HideC
,
COp
,
ParamsType
from
theano.gof.utils
import
MethodNotDefined
from
theano.gof.opt
import
with_stack_trace
from
collections
import
deque
...
...
@@ -75,11 +76,11 @@ def as_gpuarray_variable(x, context_name):
# If we couldn't deal with transfers, then maybe it's a tensor
if
isinstance
(
x
.
type
,
tensor
.
TensorType
):
return
GpuFromHost
(
context_name
)(
x
)
return
with_stack_trace
(
x
,
GpuFromHost
(
context_name
)(
x
)
)
# Try _as_GpuArrayVariable if possible
if
hasattr
(
x
,
'_as_GpuArrayVariable'
):
return
x
.
_as_GpuArrayVariable
(
context_name
)
return
with_stack_trace
(
x
,
x
.
_as_GpuArrayVariable
(
context_name
)
)
# If it didn't work try for a constant
ctx
=
get_context
(
context_name
)
...
...
@@ -88,13 +89,13 @@ def as_gpuarray_variable(x, context_name):
if
x
.
context
.
ptr
!=
ctx
.
ptr
:
x
=
x
.
transfer
(
ctx
)
x
=
gpuarray
.
asarray
(
x
,
context
=
ctx
)
x
=
with_stack_trace
(
x
,
gpuarray
.
asarray
(
x
,
context
=
ctx
)
)
bcast
=
[(
s
==
1
)
for
s
in
x
.
shape
]
return
GpuArrayConstant
(
GpuArrayType
(
dtype
=
x
.
dtype
,
broadcastable
=
bcast
,
context_name
=
context_name
),
x
)
return
with_stack_trace
(
x
,
GpuArrayConstant
(
GpuArrayType
(
dtype
=
x
.
dtype
,
broadcastable
=
bcast
,
context_name
=
context_name
),
x
)
)
def
infer_context_name
(
*
vars
):
...
...
theano/gpuarray/opt.py
浏览文件 @
9bc05a38
...
...
@@ -15,7 +15,7 @@ from theano.compile.ops import shape_i
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
TopoOptimizer
,
LocalGroupDB
,
SequenceDB
,
Optimizer
,
DB
,
toolbox
,
graph
)
from
theano.gof.opt
import
LocalMetaOptimizer
,
copy_stack_trace
from
theano.gof.opt
import
LocalMetaOptimizer
,
copy_stack_trace
,
with_stack_trace
from
theano.ifelse
import
IfElse
from
theano.misc.ordered_set
import
OrderedSet
...
...
@@ -421,6 +421,8 @@ class GraphToGPU(Optimizer):
if
isinstance
(
new_ops
,
theano
.
Op
):
outputs
=
new_ops
(
*
[
mapping
[
i
]
for
i
in
node
.
inputs
],
return_list
=
True
)
for
old_output
,
new_output
in
zip
(
node
.
outputs
,
outputs
):
copy_stack_trace
(
old_output
,
new_output
)
elif
not
new_ops
:
newnode
=
node
.
clone_with_new_inputs
([
mapping
.
get
(
i
)
for
i
in
node
.
inputs
])
outputs
=
newnode
.
outputs
...
...
@@ -461,7 +463,7 @@ class GraphToGPU(Optimizer):
new_o
.
owner
.
inputs
[
0
]
.
type
==
o
.
type
):
new_o
=
new_o
.
owner
.
inputs
[
0
]
else
:
new_o
=
safe_to_cpu
(
new_o
)
new_o
=
with_stack_trace
(
o
,
safe_to_cpu
(
new_o
)
)
new_nodes
.
append
(
new_o
)
fgraph
.
replace_all_validate
(
zip
(
fgraph
.
outputs
,
new_nodes
),
reason
=
self
.
__class__
.
__name__
)
...
...
@@ -692,8 +694,6 @@ def local_gpu_contiguous_gpu_contiguous(node):
if
isinstance
(
node
.
op
,
GpuContiguous
):
inp
=
node
.
inputs
[
0
]
if
inp
.
owner
and
isinstance
(
inp
.
owner
.
op
,
GpuContiguous
):
if
not
getattr
(
inp
.
tag
,
'trace'
,
None
):
copy_stack_trace
(
node
.
outputs
[
0
],
inp
)
return
[
inp
]
...
...
@@ -1220,7 +1220,7 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
op
.
scalar_op
,
axis
=
op
.
axis
,
dtype
=
odtype
,
acc_dtype
=
adtype
)
gvar
=
greduce
(
x
)
gvar
=
with_stack_trace
(
outputs
,
greduce
(
x
)
)
# We need to have the make node called, otherwise the mask can
# be None
if
(
op2
is
GpuCAReduceCPY
or
...
...
@@ -1260,22 +1260,27 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
dtype
=
getattr
(
op
,
'dtype'
,
outputs
[
0
]
.
dtype
),
acc_dtype
=
getattr
(
op
,
'acc_dtype'
,
None
))
reshaped_x
=
x
.
reshape
(
tensor
.
stack
(
new_in_shp
))
gpu_reshaped_x
=
as_gpuarray_variable
(
reshaped_x
,
context_name
)
gvar
=
greduce
(
gpu_reshaped_x
)
reshaped_x
=
with_stack_trace
(
outputs
,
x
.
reshape
(
tensor
.
stack
(
new_in_shp
)))
gpu_reshaped_x
=
with_stack_trace
(
outputs
,
as_gpuarray_variable
(
reshaped_x
,
context_name
))
gvar
=
with_stack_trace
(
outputs
,
greduce
(
gpu_reshaped_x
))
# We need to have the make node called, otherwise the mask can
# be None
reshaped_gpu_inputs
=
[
gpu_reshaped_x
]
if
greduce
.
supports_c_code
(
reshaped_gpu_inputs
):
reduce_reshaped_x
=
greduce
(
gpu_reshaped_x
)
reduce_reshaped_x
=
with_stack_trace
(
outputs
,
greduce
(
gpu_reshaped_x
))
if
reduce_reshaped_x
.
ndim
!=
outputs
[
0
]
.
ndim
:
out_shp
=
[]
for
i
in
range
(
x
.
ndim
):
if
i
not
in
op
.
axis
:
out_shp
.
append
(
shape_i
(
x
,
i
))
unreshaped_reduce
=
GpuReshape
(
len
(
out_shp
))(
reduce_reshaped_x
,
tensor
.
stack
(
out_shp
))
unreshaped_reduce
=
with_stack_trace
(
outputs
,
GpuReshape
(
len
(
out_shp
))(
reduce_reshaped_x
,
tensor
.
stack
(
out_shp
)))
else
:
unreshaped_reduce
=
reduce_reshaped_x
return
[
unreshaped_reduce
]
...
...
@@ -2398,7 +2403,8 @@ def local_gpu_elemwise_careduce(node):
props
=
node
.
op
.
_props_dict
()
props
[
"pre_scalar_op"
]
=
scalar
.
basic
.
sqr
out
=
GpuCAReduceCuda
(
**
props
)(
inp
)
return
[
out
]
return
with_stack_trace
(
node
.
outputs
,
out
)
@local_optimizer
(
None
)
...
...
theano/gpuarray/tests/test_opt.py
浏览文件 @
9bc05a38
...
...
@@ -14,7 +14,7 @@ import theano.gpuarray
from
..
import
basic_ops
from
..type
import
GpuArrayType
,
gpuarray_shared_constructor
,
get_context
from
..basic_ops
import
(
GpuAlloc
,
GpuAllocEmpty
,
GpuReshape
,
GpuFromHost
,
host_from_gpu
)
GpuAlloc
,
GpuAllocEmpty
,
GpuReshape
,
GpuFromHost
,
HostFromGpu
,
host_from_gpu
)
from
..blas
import
GpuGemm
from
..elemwise
import
(
GpuCAReduceCuda
,
GpuCAReduceCPY
,
GpuElemwise
,
Elemwise
,
max_inputs_to_GpuElemwise
)
...
...
@@ -28,6 +28,16 @@ from theano.tensor.nnet import abstract_conv
from
theano.gpuarray
import
dnn
,
blas
def
_check_stack_trace
(
thing
):
def
_ops_to_check
(
op
):
if
not
isinstance
(
op
,
theano
.
gof
.
Op
):
op
=
op
.
op
# assume node
return
not
isinstance
(
op
,
(
theano
.
compile
.
ops
.
Shape_i
,
theano
.
ifelse
.
IfElse
,
GpuFromHost
,
HostFromGpu
,
GpuElemwise
))
return
check_stack_trace
(
thing
,
ops_to_check
=
_ops_to_check
)
def
test_local_assert
():
x
=
theano
.
tensor
.
fmatrix
()
a
=
theano
.
tensor
.
opt
.
assert_op
(
x
,
theano
.
tensor
.
eq
(
x
,
0
)
.
any
())
...
...
@@ -71,8 +81,8 @@ def test_local_gpu_contiguous_gpu_contiguous():
if
isinstance
(
node
.
op
,
basic_ops
.
GpuContiguous
)])
assert
1
==
len
([
node
for
node
in
f2
.
maker
.
fgraph
.
toposort
()
if
isinstance
(
node
.
op
,
basic_ops
.
GpuContiguous
)])
assert
check_stack_trace
(
f1
,
ops_to_check
=
'all'
)
assert
check_stack_trace
(
f2
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f1
)
assert
_check_stack_trace
(
f2
)
def
test_local_gpu_contiguous
():
...
...
@@ -82,7 +92,7 @@ def test_local_gpu_contiguous():
assert
1
==
len
([
node
for
node
in
f
.
maker
.
fgraph
.
toposort
()
if
isinstance
(
node
.
op
,
basic_ops
.
GpuContiguous
)])
f
([[
2.
]])
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
def
test_flatten
():
...
...
@@ -100,7 +110,7 @@ def test_flatten():
assert
res
.
shape
==
val
.
flatten
()
.
shape
assert
GpuReshape
in
[
type
(
node
.
op
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()]
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
f
=
theano
.
function
([
m
],
m
.
flatten
(
ndim
=
2
),
mode
=
mode_with_gpu
.
excluding
(
"local_useless_reshape"
))
...
...
@@ -110,7 +120,7 @@ def test_flatten():
assert
res
.
shape
==
val
.
shape
assert
GpuReshape
in
[
type
(
node
.
op
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()]
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
m
=
theano
.
tensor
.
tensor3
()
f
=
theano
.
function
([
m
],
m
.
flatten
(
ndim
=
2
),
mode
=
mode_with_gpu
)
...
...
@@ -120,7 +130,7 @@ def test_flatten():
assert
res
.
shape
==
val
.
reshape
(
10
,
-
1
)
.
shape
assert
GpuReshape
in
[
type
(
node
.
op
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()]
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
def
test_reduce
():
...
...
@@ -133,7 +143,7 @@ def test_reduce():
f
=
theano
.
function
([
m
],
getattr
(
m
,
method
)(
axis
=
0
,
**
param
),
mode
=
mode_with_gpu
)
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
val
=
np
.
random
.
rand
(
10
,
11
)
.
astype
(
"float32"
)
res
=
f
(
val
)
utt
.
assert_allclose
(
res
,
getattr
(
val
,
method
)(
axis
=
0
))
...
...
@@ -165,7 +175,7 @@ def test_local_gpualloc_memset_0():
assert
len
(
topo
)
==
1
assert
isinstance
(
topo
[
0
]
.
op
,
theano
.
tensor
.
Alloc
)
assert
(
np
.
asarray
(
f
(
6
))
==
0
)
.
all
()
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
# Test with 0 from CPU op.
# Should be transfered as it is used by another op.
...
...
@@ -175,7 +185,7 @@ def test_local_gpualloc_memset_0():
assert
len
(
topo
)
==
3
assert
isinstance
(
topo
[
0
]
.
op
,
GpuAlloc
)
assert
(
np
.
asarray
(
f
(
6
))
==
0
)
.
all
()
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
# Test with 0
a
=
GpuAlloc
(
test_ctx_name
)(
z
,
i
)
...
...
@@ -184,7 +194,7 @@ def test_local_gpualloc_memset_0():
assert
len
(
topo
)
==
1
assert
isinstance
(
topo
[
0
]
.
op
,
GpuAlloc
)
and
topo
[
0
]
.
op
.
memset_0
assert
(
np
.
asarray
(
f
(
6
))
==
0
)
.
all
()
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
# Test with 1
a
=
GpuAlloc
(
test_ctx_name
)(
o
,
i
)
...
...
@@ -194,7 +204,7 @@ def test_local_gpualloc_memset_0():
assert
isinstance
(
topo
[
0
]
.
op
,
GpuAlloc
)
assert
not
topo
[
0
]
.
op
.
memset_0
assert
(
np
.
asarray
(
f
(
6
))
==
1
)
.
all
()
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
# Test with 1, 1
a
=
GpuAlloc
(
test_ctx_name
)(
ones
,
i
)
...
...
@@ -204,7 +214,7 @@ def test_local_gpualloc_memset_0():
assert
isinstance
(
topo
[
0
]
.
op
,
GpuAlloc
)
assert
not
topo
[
0
]
.
op
.
memset_0
assert
(
np
.
asarray
(
f
(
2
))
==
1
)
.
all
()
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
def
test_local_gpualloc_empty
():
...
...
@@ -220,7 +230,7 @@ def test_local_gpualloc_empty():
assert
isinstance
(
topo
[
0
]
.
op
,
theano
.
tensor
.
AllocEmpty
)
# This return not initilized data, so we can only check the shape
assert
f
(
3
)
.
shape
==
(
3
,)
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
# Test with vector
# Should be moved
...
...
@@ -231,7 +241,7 @@ def test_local_gpualloc_empty():
assert
isinstance
(
topo
[
0
]
.
op
,
GpuAllocEmpty
)
# This return not initilized data, so we can only check the shape
assert
f
(
3
)
.
shape
==
(
3
,)
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
# Test with matrix
a
=
tensor
.
AllocEmpty
(
'float32'
)(
i
,
ii
)
...
...
@@ -241,7 +251,7 @@ def test_local_gpualloc_empty():
assert
isinstance
(
topo
[
0
]
.
op
,
GpuAllocEmpty
)
# This return not initilized data, so we can only check the shape
assert
f
(
3
,
4
)
.
shape
==
(
3
,
4
)
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
def
test_rebroadcast
():
...
...
@@ -259,7 +269,7 @@ def test_rebroadcast():
assert
isinstance
(
rebr
.
inputs
[
0
]
.
type
,
GpuArrayType
)
assert
isinstance
(
rebr
.
outputs
[
0
]
.
type
,
GpuArrayType
)
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
class
TestSpecifyShape
(
test_basic
.
TestSpecifyShape
):
mode
=
mode_with_gpu
...
...
@@ -284,7 +294,7 @@ class test_gpu_ifelse(test_ifelse.test_ifelse):
theano
.
ifelse
.
ifelse
(
cond
,
x
.
mean
(),
x
.
sum
()),
mode
=
mode_with_gpu
)
assert
f
(
np
.
float32
([
1
,
2
,
3
]),
0
)
==
6
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
x
=
tensor
.
vector
()
cond
=
tensor
.
scalar
()
...
...
@@ -292,7 +302,7 @@ class test_gpu_ifelse(test_ifelse.test_ifelse):
theano
.
ifelse
.
ifelse
(
cond
,
x
.
mean
(),
x
.
sum
()),
mode
=
mode_with_gpu
)
assert
f
(
np
.
float32
([
1
,
2
,
3
]),
0
)
==
6
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
def
test_lifter_with_shared_var
(
self
):
x
=
tensor
.
lscalar
(
'x'
)
...
...
@@ -315,7 +325,7 @@ def test_print_op():
assert
isinstance
(
topo
[
1
]
.
op
,
theano
.
printing
.
Print
)
assert
isinstance
(
topo
[
2
]
.
op
,
GpuElemwise
)
assert
topo
[
3
]
.
op
==
host_from_gpu
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
f
(
np
.
random
.
random
((
5
,
5
))
.
astype
(
'float32'
))
...
...
@@ -336,7 +346,7 @@ def test_pdbbreakpoint_op():
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
isinstance
(
topo
[
-
2
]
.
op
,
GpuElemwise
)
assert
topo
[
-
1
]
.
op
==
host_from_gpu
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
def
test_local_gpu_elemwise_careduce
():
...
...
@@ -346,7 +356,7 @@ def test_local_gpu_elemwise_careduce():
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
len
(
topo
)
==
3
assert
topo
[
1
]
.
op
.
pre_scalar_op
==
theano
.
scalar
.
sqr
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
data
=
np
.
random
.
rand
(
3
,
4
)
.
astype
(
theano
.
config
.
floatX
)
utt
.
assert_allclose
(
f
(
data
),
(
data
*
data
)
.
sum
())
...
...
@@ -355,7 +365,7 @@ def test_local_gpu_elemwise_careduce():
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
len
(
topo
)
==
3
assert
topo
[
1
]
.
op
.
pre_scalar_op
==
theano
.
scalar
.
sqr
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
utt
.
assert_allclose
(
f
(
data
),
(
data
*
data
)
.
sum
(
axis
=
1
))
...
...
@@ -374,7 +384,7 @@ def test_local_lift_dot22scalar():
y_val
=
np
.
random
.
random
((
3
,
4
))
.
astype
(
theano
.
config
.
floatX
)
a_val
=
0.5
utt
.
assert_allclose
(
f_cpu
(
x_val
,
y_val
,
a_val
),
f_gpu
(
x_val
,
y_val
,
a_val
))
assert
check_stack_trace
(
f_gpu
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f_gpu
)
def
test_local_gpu_subtensor
():
...
...
@@ -384,7 +394,7 @@ def test_local_gpu_subtensor():
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
any
([
type
(
node
.
op
)
is
tensor
.
Subtensor
for
node
in
topo
])
assert
not
any
([
isinstance
(
node
.
op
,
GpuSubtensor
)
for
node
in
topo
])
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
# Test graph input.
t
=
tensor
.
fmatrix
()
...
...
@@ -392,7 +402,7 @@ def test_local_gpu_subtensor():
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
any
([
type
(
node
.
op
)
is
tensor
.
Subtensor
for
node
in
topo
])
assert
not
any
([
isinstance
(
node
.
op
,
GpuSubtensor
)
for
node
in
topo
])
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
# Test multiple use of the input
# We want the subtensor to be on the GPU to prevent multiple transfer.
...
...
@@ -401,7 +411,7 @@ def test_local_gpu_subtensor():
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
not
any
([
type
(
node
.
op
)
is
tensor
.
Subtensor
for
node
in
topo
])
assert
any
([
isinstance
(
node
.
op
,
GpuSubtensor
)
for
node
in
topo
])
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
# Test multiple use of the input + input as output
# We want the subtensor to be on the GPU to prevent multiple transfer.
...
...
@@ -410,7 +420,7 @@ def test_local_gpu_subtensor():
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
not
any
([
type
(
node
.
op
)
is
tensor
.
Subtensor
for
node
in
topo
])
assert
any
([
isinstance
(
node
.
op
,
GpuSubtensor
)
for
node
in
topo
])
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
# Test shared forced on CPU end we do computation on the output of
# the subtensor.
...
...
@@ -423,7 +433,7 @@ def test_local_gpu_subtensor():
# If it where just a little bit smarter, it could wrongly move it to the GPU.
# If it where super smart, it would know it should not move it to the GPU.
assert
any
([
isinstance
(
node
.
op
,
tensor
.
Elemwise
)
for
node
in
topo
])
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
def
test_local_gpu_elemwise
():
...
...
@@ -445,7 +455,7 @@ def test_local_gpu_elemwise():
assert
sum
(
isinstance
(
node
.
op
,
GpuElemwise
)
for
node
in
topo
)
==
1
assert
sum
(
type
(
node
.
op
)
==
tensor
.
Elemwise
for
node
in
topo
)
==
0
utt
.
assert_allclose
(
f
(
a_v
,
b_v
,
c_v
),
a_v
+
b_v
+
c_v
)
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
# Now test with the composite already on the cpu before we move it
# to the gpu
...
...
@@ -459,7 +469,7 @@ def test_local_gpu_elemwise():
assert
sum
(
isinstance
(
node
.
op
,
GpuElemwise
)
for
node
in
topo
)
==
1
assert
sum
(
type
(
node
.
op
)
==
tensor
.
Elemwise
for
node
in
topo
)
==
0
utt
.
assert_allclose
(
f
(
a_v
,
b_v
,
c_v
),
a_v
+
b_v
+
c_v
)
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
return
# Not yet implemeted
# Test multiple output
...
...
@@ -477,7 +487,7 @@ def test_local_gpu_elemwise():
utt
.
assert_allclose
(
out
[
0
],
a_v
)
utt
.
assert_allclose
(
out
[
1
],
c_v
)
utt
.
assert_allclose
(
out
[
2
],
b_v
)
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
# Test multiple output
out_s
=
theano
.
scalar
.
Composite
([
a_s
,
b_s
,
c_s
],
[
a_s
+
b_s
,
a_s
*
b_s
])
...
...
@@ -489,7 +499,7 @@ def test_local_gpu_elemwise():
out
=
f
(
a_v
,
b_v
,
c_v
)
utt
.
assert_allclose
(
out
[
0
],
a_v
+
b_v
)
utt
.
assert_allclose
(
out
[
1
],
a_v
*
c_v
)
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
# Test non-contiguous input
c
=
gpuarray_shared_constructor
(
np
.
asarray
(
c_v
,
dtype
=
'float32'
))
...
...
@@ -498,7 +508,7 @@ def test_local_gpu_elemwise():
out
=
f
(
a_v
,
b_v
)
utt
.
assert_allclose
(
out
[
0
],
a_v
[::
2
]
+
b_v
[::
2
])
utt
.
assert_allclose
(
out
[
1
],
a_v
[::
2
]
*
c_v
[::
2
])
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
def
test_many_arg_elemwise
():
...
...
@@ -575,7 +585,7 @@ def test_local_lift_abstractconv_gpu_shape():
b
=
tensor
.
ftensor4
()
c
=
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradWeights
()(
a
,
b
,
s
)
f
=
theano
.
function
([
s
,
a
,
b
],
c
,
mode
=
mode_with_gpu
)
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
finally
:
theano
.
config
.
on_opt_error
=
prev
...
...
@@ -606,7 +616,7 @@ def test_local_assert_no_cpu_op():
try
:
theano
.
config
.
assert_no_cpu_op
=
'ignore'
f
=
theano
.
function
([],
out
,
mode
=
mode_local_assert
)
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
finally
:
theano
.
config
.
assert_no_cpu_op
=
old
...
...
@@ -618,7 +628,7 @@ def test_no_complex():
stft_out
=
tensor
.
exp
(
width_var
*
freq_var
)
*
signal_var
f
=
theano
.
function
([
width_var
,
freq_var
,
signal_var
],
stft_out
,
mode
=
mode_with_gpu
)
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
@utt.assertFailure_fast
...
...
@@ -637,7 +647,7 @@ def test_local_lift_solve():
A_val
=
np
.
random
.
uniform
(
-
0.4
,
0.4
,
(
5
,
5
))
.
astype
(
"float32"
)
b_val
=
np
.
random
.
uniform
(
-
0.4
,
0.4
,
(
5
,
3
))
.
astype
(
"float32"
)
utt
.
assert_allclose
(
f_cpu
(
A_val
,
b_val
),
f_gpu
(
A_val
,
b_val
))
assert
check_stack_trace
(
f_gpu
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f_gpu
)
def
test_gpu_solve_not_inplace
():
...
...
@@ -703,7 +713,7 @@ def test_local_gpua_advanced_incsubtensor():
w
=
tensor
.
set_subtensor
(
w
[
tensor
.
eq
(
y
,
1.0
)
.
nonzero
()],
100
)
w
=
tensor
.
set_subtensor
(
w
[
tensor
.
eq
(
y
,
-
1.0
)
.
nonzero
()],
0
)
f
=
theano
.
function
([
target
],
w
)
assert
check_stack_trace
(
f
,
ops_to_check
=
'all'
)
assert
_check_stack_trace
(
f
)
def
test_batched_dot_lifter
():
...
...
theano/tensor/opt_uncanonicalize.py
浏览文件 @
9bc05a38
...
...
@@ -43,6 +43,7 @@ from theano.tensor import DimShuffle, Subtensor
from
theano.tensor.opt
import
register_uncanonicalize
from
theano
import
scalar
as
scal
from
theano.gof.opt
import
copy_stack_trace
,
with_stack_trace
_logger
=
logging
.
getLogger
(
'theano.tensor.opt'
)
...
...
@@ -57,10 +58,13 @@ def local_max_and_argmax(node):
axis
=
node
.
op
.
get_params
(
node
)
if
len
(
node
.
outputs
[
1
]
.
clients
)
==
0
:
new
=
CAReduce
(
scal
.
maximum
,
axis
)(
node
.
inputs
[
0
])
copy_stack_trace
(
node
.
outputs
[
0
],
new
)
return
[
new
,
None
]
if
len
(
node
.
outputs
[
0
]
.
clients
)
==
0
:
return
[
None
,
T
.
Argmax
(
axis
)(
node
.
inputs
[
0
])]
new
=
T
.
Argmax
(
axis
)(
node
.
inputs
[
0
])
copy_stack_trace
(
node
.
outputs
[
0
],
new
)
return
[
None
,
new
]
@register_uncanonicalize
...
...
@@ -84,8 +88,8 @@ def local_max_to_min(node):
max
.
owner
.
op
.
scalar_op
==
scal
.
maximum
):
neg
=
max
.
owner
.
inputs
[
0
]
if
neg
.
owner
and
neg
.
owner
.
op
==
T
.
neg
:
return
[
CAReduce
(
scal
.
minimum
,
max
.
owner
.
op
.
axis
)(
neg
.
owner
.
inputs
[
0
]
)]
new
=
CAReduce
(
scal
.
minimum
,
max
.
owner
.
op
.
axis
)(
neg
.
owner
.
inputs
[
0
])
return
[
with_stack_trace
(
node
.
outputs
[
0
],
new
)]
return
False
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论