Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
7a6d676f
提交
7a6d676f
authored
9月 03, 2017
作者:
Frédéric Bastien
提交者:
GitHub
9月 03, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5688 from cooijmanstim/gpuarray-stack-trace
gpuarray: keep stack trace
上级
47ac5f99
9f8b5561
全部展开
显示空白字符变更
内嵌
并排
正在显示
9 个修改的文件
包含
144 行增加
和
24 行删除
+144
-24
graph.py
theano/gof/graph.py
+54
-0
opt.py
theano/gof/opt.py
+21
-1
basic_ops.py
theano/gpuarray/basic_ops.py
+3
-2
dnn.py
theano/gpuarray/dnn.py
+8
-1
opt.py
theano/gpuarray/opt.py
+39
-13
opt_util.py
theano/gpuarray/opt_util.py
+4
-0
test_opt.py
theano/gpuarray/tests/test_opt.py
+0
-0
blas.py
theano/tensor/blas.py
+8
-4
opt_uncanonicalize.py
theano/tensor/opt_uncanonicalize.py
+7
-3
没有找到文件。
theano/gof/graph.py
浏览文件 @
7a6d676f
...
@@ -4,6 +4,7 @@ Node classes (`Apply`, `Variable`) and expression graph algorithms.
...
@@ -4,6 +4,7 @@ Node classes (`Apply`, `Variable`) and expression graph algorithms.
from
__future__
import
absolute_import
,
print_function
,
division
from
__future__
import
absolute_import
,
print_function
,
division
from
collections
import
deque
from
collections
import
deque
import
contextlib
from
copy
import
copy
from
copy
import
copy
from
itertools
import
count
from
itertools
import
count
...
@@ -390,6 +391,8 @@ class Variable(Node):
...
@@ -390,6 +391,8 @@ class Variable(Node):
self
.
name
=
name
self
.
name
=
name
self
.
auto_name
=
'auto_'
+
str
(
next
(
self
.
__count__
))
self
.
auto_name
=
'auto_'
+
str
(
next
(
self
.
__count__
))
Variable
.
notify_construction_observers
(
self
)
def
__str__
(
self
):
def
__str__
(
self
):
"""Return a str representation of the Variable.
"""Return a str representation of the Variable.
...
@@ -536,6 +539,22 @@ class Variable(Node):
...
@@ -536,6 +539,22 @@ class Variable(Node):
d
[
"tag"
]
=
t
d
[
"tag"
]
=
t
return
d
return
d
# refer to doc in nodes_constructed.
construction_observers
=
[]
@classmethod
def
append_construction_observer
(
cls
,
observer
):
cls
.
construction_observers
.
append
(
observer
)
@classmethod
def
remove_construction_observer
(
cls
,
observer
):
cls
.
construction_observers
.
remove
(
observer
)
@classmethod
def
notify_construction_observers
(
cls
,
instance
):
for
observer
in
cls
.
construction_observers
:
observer
(
instance
)
class
Constant
(
Variable
):
class
Constant
(
Variable
):
"""
"""
...
@@ -1426,3 +1445,38 @@ def is_in_ancestors(l_node, f_node):
...
@@ -1426,3 +1445,38 @@ def is_in_ancestors(l_node, f_node):
todo
.
append
(
cur
)
todo
.
append
(
cur
)
todo
.
extend
(
i
.
owner
for
i
in
cur
.
inputs
if
i
.
owner
)
todo
.
extend
(
i
.
owner
for
i
in
cur
.
inputs
if
i
.
owner
)
return
False
return
False
@contextlib.contextmanager
def
nodes_constructed
():
"""
A contextmanager that is used in inherit_stack_trace and keeps track
of all the newly created varaible nodes inside an optimization. A list
of new_nodes is instantiated but will be filled in a lazy manner (when
Variable.notify_construction_observers is called).
`observer` is the entity that updates the new_nodes list.
construction_observers is a list inside Variable class and contains
a list of observer functions. The observer functions inside
construction_observers are only called when a variable node is
instantiated (where Variable.notify_construction_observers is called).
When the observer function is called, a new variable node is added to
the new_nodes list.
Parameters
----------
new_nodes
A list of all the variable nodes that are created inside the optimization.
yields
new_nodes list.
"""
new_nodes
=
[]
def
observer
(
node
):
new_nodes
.
append
(
node
)
Variable
.
append_construction_observer
(
observer
)
yield
new_nodes
Variable
.
remove_construction_observer
(
observer
)
theano/gof/opt.py
浏览文件 @
7a6d676f
...
@@ -6,6 +6,7 @@ amount of useful generic optimization tools.
...
@@ -6,6 +6,7 @@ amount of useful generic optimization tools.
from
__future__
import
absolute_import
,
print_function
,
division
from
__future__
import
absolute_import
,
print_function
,
division
from
collections
import
deque
,
defaultdict
,
OrderedDict
from
collections
import
deque
,
defaultdict
,
OrderedDict
import
contextlib
import
copy
import
copy
import
inspect
import
inspect
import
logging
import
logging
...
@@ -2902,7 +2903,7 @@ def pre_greedy_local_optimizer(list_optimizations, out):
...
@@ -2902,7 +2903,7 @@ def pre_greedy_local_optimizer(list_optimizations, out):
def
copy_stack_trace
(
from_var
,
to_var
):
def
copy_stack_trace
(
from_var
,
to_var
):
"""
"""
Copies the stack trace from one or more tensor variables to
Copies the stack trace from one or more tensor variables to
one or more tensor variables.
one or more tensor variables
and returns the destination variables
.
Parameters
Parameters
----------
----------
...
@@ -2946,6 +2947,25 @@ def copy_stack_trace(from_var, to_var):
...
@@ -2946,6 +2947,25 @@ def copy_stack_trace(from_var, to_var):
# Copy over stack traces from from_var to each variable to
# Copy over stack traces from from_var to each variable to
# to_var, including the stack_trace of the to_var before
# to_var, including the stack_trace of the to_var before
to_var
.
tag
.
trace
=
getattr
(
to_var
.
tag
,
'trace'
,
[])
+
tr
to_var
.
tag
.
trace
=
getattr
(
to_var
.
tag
,
'trace'
,
[])
+
tr
return
to_var
@contextlib.contextmanager
def
inherit_stack_trace
(
from_var
):
"""
Contextmanager that copies the stack trace from one or more variable nodes to all
variable nodes constructed in the body. new_nodes is the list of all the newly created
variable nodes inside an optimization that is managed by graph.nodes_constructed().
Parameters
----------
from_var
Variable node or a list of variable nodes to copy stack traces from.
"""
with
graph
.
nodes_constructed
()
as
new_nodes
:
yield
copy_stack_trace
(
from_var
,
new_nodes
)
def
check_stack_trace
(
f_or_fgraph
,
ops_to_check
=
'last'
,
bug_print
=
'raise'
):
def
check_stack_trace
(
f_or_fgraph
,
ops_to_check
=
'last'
,
bug_print
=
'raise'
):
...
...
theano/gpuarray/basic_ops.py
浏览文件 @
7a6d676f
...
@@ -15,6 +15,7 @@ from theano.tensor.basic import (
...
@@ -15,6 +15,7 @@ from theano.tensor.basic import (
from
theano.gof
import
HideC
,
COp
,
ParamsType
from
theano.gof
import
HideC
,
COp
,
ParamsType
from
theano.gof.utils
import
MethodNotDefined
from
theano.gof.utils
import
MethodNotDefined
from
theano.gof.opt
import
copy_stack_trace
from
collections
import
deque
from
collections
import
deque
...
@@ -75,11 +76,11 @@ def as_gpuarray_variable(x, context_name):
...
@@ -75,11 +76,11 @@ def as_gpuarray_variable(x, context_name):
# If we couldn't deal with transfers, then maybe it's a tensor
# If we couldn't deal with transfers, then maybe it's a tensor
if
isinstance
(
x
.
type
,
tensor
.
TensorType
):
if
isinstance
(
x
.
type
,
tensor
.
TensorType
):
return
GpuFromHost
(
context_name
)(
x
)
return
copy_stack_trace
(
x
,
GpuFromHost
(
context_name
)(
x
)
)
# Try _as_GpuArrayVariable if possible
# Try _as_GpuArrayVariable if possible
if
hasattr
(
x
,
'_as_GpuArrayVariable'
):
if
hasattr
(
x
,
'_as_GpuArrayVariable'
):
return
x
.
_as_GpuArrayVariable
(
context_name
)
return
copy_stack_trace
(
x
,
x
.
_as_GpuArrayVariable
(
context_name
)
)
# If it didn't work try for a constant
# If it didn't work try for a constant
ctx
=
get_context
(
context_name
)
ctx
=
get_context
(
context_name
)
...
...
theano/gpuarray/dnn.py
浏览文件 @
7a6d676f
...
@@ -18,6 +18,7 @@ from theano.gradient import DisconnectedType, grad_not_implemented
...
@@ -18,6 +18,7 @@ from theano.gradient import DisconnectedType, grad_not_implemented
from
theano.gof
import
Optimizer
,
local_optimizer
,
COp
,
ParamsType
,
EnumList
from
theano.gof
import
Optimizer
,
local_optimizer
,
COp
,
ParamsType
,
EnumList
from
theano.gof.cmodule
import
GCC_compiler
from
theano.gof.cmodule
import
GCC_compiler
from
theano.gof.type
import
CDataType
,
Generic
from
theano.gof.type
import
CDataType
,
Generic
from
theano.gof.opt
import
inherit_stack_trace
from
theano.compile
import
optdb
from
theano.compile
import
optdb
from
theano.compile.ops
import
shape_i
,
shape_i_op
from
theano.compile.ops
import
shape_i
,
shape_i_op
from
theano.tensor.nnet
import
LogSoftmax
,
SoftmaxGrad
from
theano.tensor.nnet
import
LogSoftmax
,
SoftmaxGrad
...
@@ -3127,8 +3128,10 @@ def local_abstractconv_cudnn(node):
...
@@ -3127,8 +3128,10 @@ def local_abstractconv_cudnn(node):
if
node
.
op
.
unshared
:
if
node
.
op
.
unshared
:
return
None
return
None
if
isinstance
(
node
.
op
,
AbstractConv2d
):
if
isinstance
(
node
.
op
,
AbstractConv2d
):
with
inherit_stack_trace
(
node
.
outputs
):
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
elif
isinstance
(
node
.
op
,
AbstractConv3d
):
elif
isinstance
(
node
.
op
,
AbstractConv3d
):
with
inherit_stack_trace
(
node
.
outputs
):
return
local_abstractconv3d_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
return
local_abstractconv3d_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
...
@@ -3352,8 +3355,10 @@ def local_abstractconv_gw_cudnn(node):
...
@@ -3352,8 +3355,10 @@ def local_abstractconv_gw_cudnn(node):
if
node
.
op
.
unshared
:
if
node
.
op
.
unshared
:
return
None
return
None
if
isinstance
(
node
.
op
,
AbstractConv2d_gradWeights
):
if
isinstance
(
node
.
op
,
AbstractConv2d_gradWeights
):
with
inherit_stack_trace
(
node
.
outputs
):
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
elif
isinstance
(
node
.
op
,
AbstractConv3d_gradWeights
):
elif
isinstance
(
node
.
op
,
AbstractConv3d_gradWeights
):
with
inherit_stack_trace
(
node
.
outputs
):
return
local_abstractconv3d_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
return
local_abstractconv3d_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
...
@@ -3365,8 +3370,10 @@ def local_abstractconv_gi_cudnn(node):
...
@@ -3365,8 +3370,10 @@ def local_abstractconv_gi_cudnn(node):
if
node
.
op
.
unshared
:
if
node
.
op
.
unshared
:
return
None
return
None
if
isinstance
(
node
.
op
,
AbstractConv2d_gradInputs
):
if
isinstance
(
node
.
op
,
AbstractConv2d_gradInputs
):
with
inherit_stack_trace
(
node
.
outputs
):
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
elif
isinstance
(
node
.
op
,
AbstractConv3d_gradInputs
):
elif
isinstance
(
node
.
op
,
AbstractConv3d_gradInputs
):
with
inherit_stack_trace
(
node
.
outputs
):
return
local_abstractconv3d_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
return
local_abstractconv3d_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
...
@@ -3384,7 +3391,6 @@ def local_dnn_convgw_inplace(node, inputs):
...
@@ -3384,7 +3391,6 @@ def local_dnn_convgw_inplace(node, inputs):
def
local_dnn_convgi_inplace
(
node
,
inputs
):
def
local_dnn_convgi_inplace
(
node
,
inputs
):
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
,
inplace
=
True
,
num_groups
=
node
.
op
.
num_groups
)(
*
inputs
)]
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
,
inplace
=
True
,
num_groups
=
node
.
op
.
num_groups
)(
*
inputs
)]
optdb
.
register
(
'local_dnna_conv_inplace'
,
optdb
.
register
(
'local_dnna_conv_inplace'
,
tensor
.
opt
.
in2out
(
local_dnn_conv_inplace
,
tensor
.
opt
.
in2out
(
local_dnn_conv_inplace
,
local_dnn_convgw_inplace
,
local_dnn_convgw_inplace
,
...
@@ -3654,6 +3660,7 @@ def local_dnn_reduction(node):
...
@@ -3654,6 +3660,7 @@ def local_dnn_reduction(node):
if
not
cudnn
.
cudnnReduceTensorOp_t
.
has_alias
(
node
.
op
.
scalar_op
.
name
):
if
not
cudnn
.
cudnnReduceTensorOp_t
.
has_alias
(
node
.
op
.
scalar_op
.
name
):
return
return
with
inherit_stack_trace
(
node
.
outputs
):
return
(
GpuDnnReduction
(
node
.
op
.
scalar_op
.
name
,
return
(
GpuDnnReduction
(
node
.
op
.
scalar_op
.
name
,
node
.
op
.
axis
,
node
.
op
.
axis
,
node
.
op
.
acc_dtype
,
node
.
op
.
acc_dtype
,
...
...
theano/gpuarray/opt.py
浏览文件 @
7a6d676f
...
@@ -15,7 +15,8 @@ from theano.compile.ops import shape_i
...
@@ -15,7 +15,8 @@ from theano.compile.ops import shape_i
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
TopoOptimizer
,
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
TopoOptimizer
,
LocalGroupDB
,
LocalGroupDB
,
SequenceDB
,
Optimizer
,
DB
,
toolbox
,
graph
)
SequenceDB
,
Optimizer
,
DB
,
toolbox
,
graph
)
from
theano.gof.opt
import
LocalMetaOptimizer
from
theano.gof.opt
import
(
LocalMetaOptimizer
,
copy_stack_trace
,
inherit_stack_trace
)
from
theano.ifelse
import
IfElse
from
theano.ifelse
import
IfElse
from
theano.misc.ordered_set
import
OrderedSet
from
theano.misc.ordered_set
import
OrderedSet
...
@@ -252,12 +253,25 @@ def op_lifter(OP, cuda_only=False):
...
@@ -252,12 +253,25 @@ def op_lifter(OP, cuda_only=False):
# This is needed as sometimes new_op inherits from OP.
# This is needed as sometimes new_op inherits from OP.
if
new_op
and
new_op
!=
node
.
op
:
if
new_op
and
new_op
!=
node
.
op
:
if
isinstance
(
new_op
,
theano
.
Op
):
if
isinstance
(
new_op
,
theano
.
Op
):
return
[
safe_to_cpu
(
o
)
for
o
in
new_outputs
=
new_op
(
*
node
.
inputs
,
return_list
=
True
)
new_op
(
*
node
.
inputs
,
return_list
=
True
)]
to_cpu_fn
=
safe_to_cpu
elif
isinstance
(
new_op
,
(
tuple
,
list
)):
elif
isinstance
(
new_op
,
(
tuple
,
list
)):
return
[
safe_to_cpu
(
o
)
for
o
in
new_op
]
new_outputs
=
new_op
to_cpu_fn
=
safe_to_cpu
else
:
# suppose it is a variable on the GPU
else
:
# suppose it is a variable on the GPU
return
[
new_op
.
transfer
(
'cpu'
)]
new_outputs
=
[
new_op
]
def
to_cpu_fn
(
x
):
return
x
.
transfer
(
'cpu'
)
# copy stack traces onto gpu outputs
# also copy the stack traces onto HostFromGpu outputs
on_cpu
=
[]
for
old_output
,
new_output
in
zip
(
node
.
outputs
,
new_outputs
):
copy_stack_trace
(
old_output
,
new_output
)
cpu
=
to_cpu_fn
(
new_output
)
on_cpu
.
append
(
cpu
)
copy_stack_trace
(
old_output
,
cpu
)
return
on_cpu
return
False
return
False
local_opt
.
__name__
=
maker
.
__name__
local_opt
.
__name__
=
maker
.
__name__
return
local_optimizer
(
OP
)(
local_opt
)
return
local_optimizer
(
OP
)(
local_opt
)
...
@@ -419,6 +433,9 @@ class GraphToGPU(Optimizer):
...
@@ -419,6 +433,9 @@ class GraphToGPU(Optimizer):
elif
isinstance
(
new_ops
,
theano
.
Variable
):
elif
isinstance
(
new_ops
,
theano
.
Variable
):
outputs
=
[
new_ops
]
outputs
=
[
new_ops
]
for
old_output
,
new_output
in
zip
(
node
.
outputs
,
outputs
):
copy_stack_trace
(
old_output
,
new_output
)
if
new_ops
:
if
new_ops
:
node_created
[
lopt
]
+=
len
(
graph
.
ops
([
mapping
[
i
]
for
i
in
node
.
inputs
],
outputs
))
node_created
[
lopt
]
+=
len
(
graph
.
ops
([
mapping
[
i
]
for
i
in
node
.
inputs
],
outputs
))
if
any
([
getattr
(
old_o
,
'dtype'
,
None
)
!=
getattr
(
new_o
,
'dtype'
,
None
)
if
any
([
getattr
(
old_o
,
'dtype'
,
None
)
!=
getattr
(
new_o
,
'dtype'
,
None
)
...
@@ -451,7 +468,7 @@ class GraphToGPU(Optimizer):
...
@@ -451,7 +468,7 @@ class GraphToGPU(Optimizer):
new_o
.
owner
.
inputs
[
0
]
.
type
==
o
.
type
):
new_o
.
owner
.
inputs
[
0
]
.
type
==
o
.
type
):
new_o
=
new_o
.
owner
.
inputs
[
0
]
new_o
=
new_o
.
owner
.
inputs
[
0
]
else
:
else
:
new_o
=
safe_to_cpu
(
new_o
)
new_o
=
copy_stack_trace
(
o
,
safe_to_cpu
(
new_o
)
)
new_nodes
.
append
(
new_o
)
new_nodes
.
append
(
new_o
)
fgraph
.
replace_all_validate
(
zip
(
fgraph
.
outputs
,
new_nodes
),
fgraph
.
replace_all_validate
(
zip
(
fgraph
.
outputs
,
new_nodes
),
reason
=
self
.
__class__
.
__name__
)
reason
=
self
.
__class__
.
__name__
)
...
@@ -650,7 +667,8 @@ def local_gpualloc_memset_0(node):
...
@@ -650,7 +667,8 @@ def local_gpualloc_memset_0(node):
inp
.
data
.
size
==
1
and
inp
.
data
.
size
==
1
and
(
np
.
asarray
(
inp
.
data
)
==
0
)
.
all
()):
(
np
.
asarray
(
inp
.
data
)
==
0
)
.
all
()):
new_op
=
GpuAlloc
(
node
.
op
.
context_name
,
memset_0
=
True
)
new_op
=
GpuAlloc
(
node
.
op
.
context_name
,
memset_0
=
True
)
return
[
new_op
(
*
node
.
inputs
)]
with
inherit_stack_trace
(
node
.
outputs
):
return
new_op
(
*
node
.
inputs
,
return_list
=
True
)
# Don't register by default.
# Don't register by default.
...
@@ -659,10 +677,9 @@ def local_gpua_alloc_empty_to_zeros(node):
...
@@ -659,10 +677,9 @@ def local_gpua_alloc_empty_to_zeros(node):
if
isinstance
(
node
.
op
,
GpuAllocEmpty
):
if
isinstance
(
node
.
op
,
GpuAllocEmpty
):
context_name
=
infer_context_name
(
*
node
.
inputs
)
context_name
=
infer_context_name
(
*
node
.
inputs
)
z
=
np
.
asarray
(
0
,
dtype
=
node
.
outputs
[
0
]
.
dtype
)
z
=
np
.
asarray
(
0
,
dtype
=
node
.
outputs
[
0
]
.
dtype
)
return
[
GpuAlloc
(
context_name
)(
as_gpuarray_variable
(
z
,
context_name
),
with
inherit_stack_trace
(
node
.
outputs
):
*
node
.
inputs
)]
return
[
GpuAlloc
(
context_name
)(
as_gpuarray_variable
(
z
,
context_name
),
*
node
.
inputs
)]
optdb
.
register
(
'local_gpua_alloc_empty_to_zeros'
,
optdb
.
register
(
'local_gpua_alloc_empty_to_zeros'
,
theano
.
tensor
.
opt
.
in2out
(
local_gpua_alloc_empty_to_zeros
),
theano
.
tensor
.
opt
.
in2out
(
local_gpua_alloc_empty_to_zeros
),
# After move to gpu and merge2, before inplace.
# After move to gpu and merge2, before inplace.
...
@@ -1206,6 +1223,7 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
...
@@ -1206,6 +1223,7 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
op
.
scalar_op
,
axis
=
op
.
axis
,
op
.
scalar_op
,
axis
=
op
.
axis
,
dtype
=
odtype
,
dtype
=
odtype
,
acc_dtype
=
adtype
)
acc_dtype
=
adtype
)
with
inherit_stack_trace
(
outputs
):
gvar
=
greduce
(
x
)
gvar
=
greduce
(
x
)
# We need to have the make node called, otherwise the mask can
# We need to have the make node called, otherwise the mask can
# be None
# be None
...
@@ -1246,11 +1264,12 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
...
@@ -1246,11 +1264,12 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
dtype
=
getattr
(
op
,
'dtype'
,
outputs
[
0
]
.
dtype
),
dtype
=
getattr
(
op
,
'dtype'
,
outputs
[
0
]
.
dtype
),
acc_dtype
=
getattr
(
op
,
'acc_dtype'
,
None
))
acc_dtype
=
getattr
(
op
,
'acc_dtype'
,
None
))
with
inherit_stack_trace
(
outputs
):
reshaped_x
=
x
.
reshape
(
tensor
.
stack
(
new_in_shp
))
reshaped_x
=
x
.
reshape
(
tensor
.
stack
(
new_in_shp
))
gpu_reshaped_x
=
as_gpuarray_variable
(
reshaped_x
,
context_name
)
gpu_reshaped_x
=
as_gpuarray_variable
(
reshaped_x
,
context_name
)
gvar
=
greduce
(
gpu_reshaped_x
)
# We need to have the make node called, otherwise the mask can
# We need to have the make node called, otherwise the mask can
# be None
# be None
gvar
=
greduce
(
gpu_reshaped_x
)
reshaped_gpu_inputs
=
[
gpu_reshaped_x
]
reshaped_gpu_inputs
=
[
gpu_reshaped_x
]
if
greduce
.
supports_c_code
(
reshaped_gpu_inputs
):
if
greduce
.
supports_c_code
(
reshaped_gpu_inputs
):
reduce_reshaped_x
=
greduce
(
gpu_reshaped_x
)
reduce_reshaped_x
=
greduce
(
gpu_reshaped_x
)
...
@@ -1260,7 +1279,8 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
...
@@ -1260,7 +1279,8 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
for
i
in
range
(
x
.
ndim
):
for
i
in
range
(
x
.
ndim
):
if
i
not
in
op
.
axis
:
if
i
not
in
op
.
axis
:
out_shp
.
append
(
shape_i
(
x
,
i
))
out_shp
.
append
(
shape_i
(
x
,
i
))
unreshaped_reduce
=
GpuReshape
(
len
(
out_shp
))(
reduce_reshaped_x
,
unreshaped_reduce
=
GpuReshape
(
len
(
out_shp
))(
reduce_reshaped_x
,
tensor
.
stack
(
out_shp
))
tensor
.
stack
(
out_shp
))
else
:
else
:
unreshaped_reduce
=
reduce_reshaped_x
unreshaped_reduce
=
reduce_reshaped_x
...
@@ -1305,6 +1325,7 @@ def local_gpua_gemm(op, context_name, inputs, outputs):
...
@@ -1305,6 +1325,7 @@ def local_gpua_gemm(op, context_name, inputs, outputs):
def
local_gpua_gemmbatch
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpua_gemmbatch
(
op
,
context_name
,
inputs
,
outputs
):
if
inputs
[
0
]
.
dtype
not
in
[
'float16'
,
'float32'
,
'float64'
]:
if
inputs
[
0
]
.
dtype
not
in
[
'float16'
,
'float32'
,
'float64'
]:
return
return
with
inherit_stack_trace
(
outputs
):
a
,
b
=
inputs
a
,
b
=
inputs
# Since GpuGemmBatch only supports 3D inputs and output,
# Since GpuGemmBatch only supports 3D inputs and output,
# we need to add broadcastable dims to the inputs, and drop
# we need to add broadcastable dims to the inputs, and drop
...
@@ -1378,6 +1399,7 @@ def local_gpua_dot22(op, context_name, inputs, outputs):
...
@@ -1378,6 +1399,7 @@ def local_gpua_dot22(op, context_name, inputs, outputs):
@op_lifter
([
tensor
.
blas
.
Dot22Scalar
])
@op_lifter
([
tensor
.
blas
.
Dot22Scalar
])
@register_opt2
([
tensor
.
blas
.
Dot22Scalar
],
'fast_compile'
)
@register_opt2
([
tensor
.
blas
.
Dot22Scalar
],
'fast_compile'
)
def
local_gpua_dot22scalar
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpua_dot22scalar
(
op
,
context_name
,
inputs
,
outputs
):
with
inherit_stack_trace
(
outputs
):
x
,
y
,
a
=
inputs
x
,
y
,
a
=
inputs
x
=
as_gpuarray_variable
(
x
,
context_name
)
x
=
as_gpuarray_variable
(
x
,
context_name
)
y
=
as_gpuarray_variable
(
y
,
context_name
)
y
=
as_gpuarray_variable
(
y
,
context_name
)
...
@@ -2392,6 +2414,7 @@ def local_gpu_elemwise_careduce(node):
...
@@ -2392,6 +2414,7 @@ def local_gpu_elemwise_careduce(node):
props
=
node
.
op
.
_props_dict
()
props
=
node
.
op
.
_props_dict
()
props
[
"pre_scalar_op"
]
=
scalar
.
basic
.
sqr
props
[
"pre_scalar_op"
]
=
scalar
.
basic
.
sqr
out
=
GpuCAReduceCuda
(
**
props
)(
inp
)
out
=
GpuCAReduceCuda
(
**
props
)(
inp
)
with
inherit_stack_trace
(
node
.
outputs
):
return
[
out
]
return
[
out
]
...
@@ -2583,6 +2606,7 @@ def local_gpu_solve(op, context_name, inputs, outputs):
...
@@ -2583,6 +2606,7 @@ def local_gpu_solve(op, context_name, inputs, outputs):
@local_optimizer
([
GpuCusolverSolve
],
inplace
=
True
)
@local_optimizer
([
GpuCusolverSolve
],
inplace
=
True
)
def
local_inplace_gpu_solve
(
node
):
def
local_inplace_gpu_solve
(
node
):
if
isinstance
(
node
.
op
,
GpuCusolverSolve
)
and
not
node
.
op
.
inplace
:
if
isinstance
(
node
.
op
,
GpuCusolverSolve
)
and
not
node
.
op
.
inplace
:
with
inherit_stack_trace
(
node
.
outputs
):
return
[
GpuCusolverSolve
(
A_structure
=
node
.
op
.
A_structure
,
trans
=
node
.
op
.
trans
,
return
[
GpuCusolverSolve
(
A_structure
=
node
.
op
.
A_structure
,
trans
=
node
.
op
.
trans
,
inplace
=
True
)(
*
node
.
inputs
)]
inplace
=
True
)(
*
node
.
inputs
)]
...
@@ -2622,6 +2646,7 @@ register_opt2([slinalg.Solve], 'fast_compile', name='matrix_ops_db2')(matrix_ops
...
@@ -2622,6 +2646,7 @@ register_opt2([slinalg.Solve], 'fast_compile', name='matrix_ops_db2')(matrix_ops
@local_optimizer
([
GpuCholesky
],
inplace
=
True
)
@local_optimizer
([
GpuCholesky
],
inplace
=
True
)
def
local_inplace_gpu_cholesky
(
node
):
def
local_inplace_gpu_cholesky
(
node
):
if
isinstance
(
node
.
op
,
GpuCholesky
)
and
not
node
.
op
.
inplace
:
if
isinstance
(
node
.
op
,
GpuCholesky
)
and
not
node
.
op
.
inplace
:
with
inherit_stack_trace
(
node
.
outputs
):
return
[
node
.
op
.
clone_inplace
()(
*
node
.
inputs
)]
return
[
node
.
op
.
clone_inplace
()(
*
node
.
inputs
)]
...
@@ -2705,6 +2730,7 @@ def local_gpu_magma_matrix_inverse(op, context_name, inputs, outputs):
...
@@ -2705,6 +2730,7 @@ def local_gpu_magma_matrix_inverse(op, context_name, inputs, outputs):
@local_optimizer
([
GpuMagmaMatrixInverse
])
@local_optimizer
([
GpuMagmaMatrixInverse
])
def
local_inplace_gpu_magma_matrix_inverse
(
node
):
def
local_inplace_gpu_magma_matrix_inverse
(
node
):
if
isinstance
(
node
.
op
,
GpuMagmaMatrixInverse
)
and
not
node
.
op
.
inplace
:
if
isinstance
(
node
.
op
,
GpuMagmaMatrixInverse
)
and
not
node
.
op
.
inplace
:
with
inherit_stack_trace
(
node
.
outputs
):
return
[
node
.
op
.
clone_inplace
()(
*
node
.
inputs
)]
return
[
node
.
op
.
clone_inplace
()(
*
node
.
inputs
)]
...
...
theano/gpuarray/opt_util.py
浏览文件 @
7a6d676f
...
@@ -5,6 +5,7 @@ import numpy as np
...
@@ -5,6 +5,7 @@ import numpy as np
from
theano
import
tensor
,
scalar
as
scal
,
Constant
from
theano
import
tensor
,
scalar
as
scal
,
Constant
from
theano.gof
import
local_optimizer
from
theano.gof
import
local_optimizer
from
theano.gof.opt
import
inherit_stack_trace
from
theano.tensor
import
(
DimShuffle
,
get_scalar_constant_value
,
from
theano.tensor
import
(
DimShuffle
,
get_scalar_constant_value
,
NotScalarConstantError
)
NotScalarConstantError
)
...
@@ -184,6 +185,7 @@ def alpha_merge(cls, alpha_in, beta_in):
...
@@ -184,6 +185,7 @@ def alpha_merge(cls, alpha_in, beta_in):
except
NotScalarConstantError
:
except
NotScalarConstantError
:
inputs
[
alpha_in
]
=
lr
*
targ
.
inputs
[
alpha_in
]
inputs
[
alpha_in
]
=
lr
*
targ
.
inputs
[
alpha_in
]
inputs
[
beta_in
]
=
lr
*
targ
.
inputs
[
beta_in
]
inputs
[
beta_in
]
=
lr
*
targ
.
inputs
[
beta_in
]
with
inherit_stack_trace
(
node
.
outputs
):
return
maker
(
targ
,
*
inputs
)
return
maker
(
targ
,
*
inputs
)
return
opt
return
opt
return
wrapper
return
wrapper
...
@@ -272,6 +274,7 @@ def output_merge(cls, alpha_in, beta_in, out_in):
...
@@ -272,6 +274,7 @@ def output_merge(cls, alpha_in, beta_in, out_in):
inputs
=
list
(
targ
.
inputs
)
inputs
=
list
(
targ
.
inputs
)
inputs
[
out_in
]
=
W
inputs
[
out_in
]
=
W
inputs
[
beta_in
]
=
_one
.
clone
()
inputs
[
beta_in
]
=
_one
.
clone
()
with
inherit_stack_trace
(
node
.
outputs
):
return
maker
(
targ
,
*
inputs
)
return
maker
(
targ
,
*
inputs
)
return
opt
return
opt
return
wrapper
return
wrapper
...
@@ -326,6 +329,7 @@ def inplace_allocempty(op, idx):
...
@@ -326,6 +329,7 @@ def inplace_allocempty(op, idx):
len
(
alloc
.
clients
)
>
1
):
len
(
alloc
.
clients
)
>
1
):
alloc_op
=
GpuAllocEmpty
(
alloc
.
owner
.
op
.
dtype
,
alloc
.
owner
.
op
.
context_name
)
alloc_op
=
GpuAllocEmpty
(
alloc
.
owner
.
op
.
dtype
,
alloc
.
owner
.
op
.
context_name
)
inputs
[
idx
]
=
alloc_op
(
*
alloc
.
owner
.
inputs
)
inputs
[
idx
]
=
alloc_op
(
*
alloc
.
owner
.
inputs
)
with
inherit_stack_trace
(
node
.
outputs
):
return
maker
(
node
,
inputs
)
return
maker
(
node
,
inputs
)
return
opt
return
opt
return
wrapper
return
wrapper
...
...
theano/gpuarray/tests/test_opt.py
浏览文件 @
7a6d676f
差异被折叠。
点击展开。
theano/tensor/blas.py
浏览文件 @
7a6d676f
...
@@ -146,6 +146,7 @@ from theano.gof import (utils, Op, view_roots,
...
@@ -146,6 +146,7 @@ from theano.gof import (utils, Op, view_roots,
EquilibriumOptimizer
,
Apply
,
EquilibriumOptimizer
,
Apply
,
ReplacementDidntRemovedError
)
ReplacementDidntRemovedError
)
from
theano.gof.params_type
import
ParamsType
from
theano.gof.params_type
import
ParamsType
from
theano.gof.opt
import
inherit_stack_trace
from
theano.printing
import
pprint
,
FunctionPrinter
,
debugprint
from
theano.printing
import
pprint
,
FunctionPrinter
,
debugprint
from
theano.compile.mode
import
optdb
from
theano.compile.mode
import
optdb
import
theano.scalar
import
theano.scalar
...
@@ -1625,17 +1626,14 @@ def local_dot_to_dot22(node):
...
@@ -1625,17 +1626,14 @@ def local_dot_to_dot22(node):
return
return
if
y
.
type
.
dtype
in
[
'float16'
,
'float32'
,
'float64'
,
'complex64'
,
'complex128'
]:
if
y
.
type
.
dtype
in
[
'float16'
,
'float32'
,
'float64'
,
'complex64'
,
'complex128'
]:
with
inherit_stack_trace
(
node
.
outputs
):
if
x
.
ndim
==
2
and
y
.
ndim
==
2
:
if
x
.
ndim
==
2
and
y
.
ndim
==
2
:
# print "local_dot_to_dot22: MM"
return
[
_dot22
(
*
node
.
inputs
)]
return
[
_dot22
(
*
node
.
inputs
)]
if
x
.
ndim
==
2
and
y
.
ndim
==
1
:
if
x
.
ndim
==
2
and
y
.
ndim
==
1
:
# print "local_dot_to_dot22: MV"
return
[
_dot22
(
x
,
y
.
dimshuffle
(
0
,
'x'
))
.
dimshuffle
(
0
)]
return
[
_dot22
(
x
,
y
.
dimshuffle
(
0
,
'x'
))
.
dimshuffle
(
0
)]
if
x
.
ndim
==
1
and
y
.
ndim
==
2
:
if
x
.
ndim
==
1
and
y
.
ndim
==
2
:
# print "local_dot_to_dot22: VM"
return
[
_dot22
(
x
.
dimshuffle
(
'x'
,
0
),
y
)
.
dimshuffle
(
1
)]
return
[
_dot22
(
x
.
dimshuffle
(
'x'
,
0
),
y
)
.
dimshuffle
(
1
)]
if
x
.
ndim
==
1
and
y
.
ndim
==
1
:
if
x
.
ndim
==
1
and
y
.
ndim
==
1
:
# print "local_dot_to_dot22: VV"
return
[
_dot22
(
x
.
dimshuffle
(
'x'
,
0
),
return
[
_dot22
(
x
.
dimshuffle
(
'x'
,
0
),
y
.
dimshuffle
(
0
,
'x'
))
.
dimshuffle
()]
y
.
dimshuffle
(
0
,
'x'
))
.
dimshuffle
()]
...
@@ -1646,18 +1644,21 @@ def local_dot_to_dot22(node):
...
@@ -1646,18 +1644,21 @@ def local_dot_to_dot22(node):
@local_optimizer
([
gemm_no_inplace
],
inplace
=
True
)
@local_optimizer
([
gemm_no_inplace
],
inplace
=
True
)
def
local_inplace_gemm
(
node
):
def
local_inplace_gemm
(
node
):
if
node
.
op
==
gemm_no_inplace
:
if
node
.
op
==
gemm_no_inplace
:
with
inherit_stack_trace
(
node
.
outputs
):
return
[
gemm_inplace
(
*
node
.
inputs
)]
return
[
gemm_inplace
(
*
node
.
inputs
)]
@local_optimizer
([
gemv_no_inplace
],
inplace
=
True
)
@local_optimizer
([
gemv_no_inplace
],
inplace
=
True
)
def
local_inplace_gemv
(
node
):
def
local_inplace_gemv
(
node
):
if
node
.
op
==
gemv_no_inplace
:
if
node
.
op
==
gemv_no_inplace
:
with
inherit_stack_trace
(
node
.
outputs
):
return
[
gemv_inplace
(
*
node
.
inputs
)]
return
[
gemv_inplace
(
*
node
.
inputs
)]
@local_optimizer
([
ger
],
inplace
=
True
)
@local_optimizer
([
ger
],
inplace
=
True
)
def
local_inplace_ger
(
node
):
def
local_inplace_ger
(
node
):
if
node
.
op
==
ger
:
if
node
.
op
==
ger
:
with
inherit_stack_trace
(
node
.
outputs
):
return
[
ger_destructive
(
*
node
.
inputs
)]
return
[
ger_destructive
(
*
node
.
inputs
)]
...
@@ -1666,6 +1667,7 @@ def local_gemm_to_gemv(node):
...
@@ -1666,6 +1667,7 @@ def local_gemm_to_gemv(node):
"""GEMM acting on row or column matrices -> GEMV."""
"""GEMM acting on row or column matrices -> GEMV."""
if
node
.
op
==
gemm_no_inplace
:
if
node
.
op
==
gemm_no_inplace
:
z
,
a
,
x
,
y
,
b
=
node
.
inputs
z
,
a
,
x
,
y
,
b
=
node
.
inputs
with
inherit_stack_trace
(
node
.
outputs
):
if
z
.
broadcastable
==
x
.
broadcastable
==
(
True
,
False
):
if
z
.
broadcastable
==
x
.
broadcastable
==
(
True
,
False
):
r
=
gemv_no_inplace
(
z
.
dimshuffle
(
1
),
a
,
y
.
T
,
x
.
dimshuffle
(
1
),
b
)
r
=
gemv_no_inplace
(
z
.
dimshuffle
(
1
),
a
,
y
.
T
,
x
.
dimshuffle
(
1
),
b
)
return
[
r
.
dimshuffle
(
'x'
,
0
)]
return
[
r
.
dimshuffle
(
'x'
,
0
)]
...
@@ -1680,6 +1682,7 @@ def local_gemm_to_ger(node):
...
@@ -1680,6 +1682,7 @@ def local_gemm_to_ger(node):
if
node
.
op
==
gemm_no_inplace
:
if
node
.
op
==
gemm_no_inplace
:
z
,
a
,
x
,
y
,
b
=
node
.
inputs
z
,
a
,
x
,
y
,
b
=
node
.
inputs
if
x
.
broadcastable
[
1
]
and
y
.
broadcastable
[
0
]:
if
x
.
broadcastable
[
1
]
and
y
.
broadcastable
[
0
]:
with
inherit_stack_trace
(
node
.
outputs
):
# x and y are both vectors so this might qualifies for a GER
# x and y are both vectors so this might qualifies for a GER
xv
=
x
.
dimshuffle
(
0
)
xv
=
x
.
dimshuffle
(
0
)
yv
=
y
.
dimshuffle
(
1
)
yv
=
y
.
dimshuffle
(
1
)
...
@@ -1708,6 +1711,7 @@ def local_gemm_to_ger(node):
...
@@ -1708,6 +1711,7 @@ def local_gemm_to_ger(node):
def
local_dot22_to_ger_or_gemv
(
node
):
def
local_dot22_to_ger_or_gemv
(
node
):
"""dot22 computing an outer-product -> GER."""
"""dot22 computing an outer-product -> GER."""
if
node
.
op
==
_dot22
:
if
node
.
op
==
_dot22
:
with
inherit_stack_trace
(
node
.
outputs
):
x
,
y
=
node
.
inputs
x
,
y
=
node
.
inputs
xb
=
x
.
broadcastable
xb
=
x
.
broadcastable
yb
=
y
.
broadcastable
yb
=
y
.
broadcastable
...
...
theano/tensor/opt_uncanonicalize.py
浏览文件 @
7a6d676f
...
@@ -43,6 +43,7 @@ from theano.tensor import DimShuffle, Subtensor
...
@@ -43,6 +43,7 @@ from theano.tensor import DimShuffle, Subtensor
from
theano.tensor.opt
import
register_uncanonicalize
from
theano.tensor.opt
import
register_uncanonicalize
from
theano
import
scalar
as
scal
from
theano
import
scalar
as
scal
from
theano.gof.opt
import
copy_stack_trace
_logger
=
logging
.
getLogger
(
'theano.tensor.opt'
)
_logger
=
logging
.
getLogger
(
'theano.tensor.opt'
)
...
@@ -57,10 +58,13 @@ def local_max_and_argmax(node):
...
@@ -57,10 +58,13 @@ def local_max_and_argmax(node):
axis
=
node
.
op
.
get_params
(
node
)
axis
=
node
.
op
.
get_params
(
node
)
if
len
(
node
.
outputs
[
1
]
.
clients
)
==
0
:
if
len
(
node
.
outputs
[
1
]
.
clients
)
==
0
:
new
=
CAReduce
(
scal
.
maximum
,
axis
)(
node
.
inputs
[
0
])
new
=
CAReduce
(
scal
.
maximum
,
axis
)(
node
.
inputs
[
0
])
copy_stack_trace
(
node
.
outputs
[
0
],
new
)
return
[
new
,
None
]
return
[
new
,
None
]
if
len
(
node
.
outputs
[
0
]
.
clients
)
==
0
:
if
len
(
node
.
outputs
[
0
]
.
clients
)
==
0
:
return
[
None
,
T
.
Argmax
(
axis
)(
node
.
inputs
[
0
])]
new
=
T
.
Argmax
(
axis
)(
node
.
inputs
[
0
])
copy_stack_trace
(
node
.
outputs
[
0
],
new
)
return
[
None
,
new
]
@register_uncanonicalize
@register_uncanonicalize
...
@@ -84,8 +88,8 @@ def local_max_to_min(node):
...
@@ -84,8 +88,8 @@ def local_max_to_min(node):
max
.
owner
.
op
.
scalar_op
==
scal
.
maximum
):
max
.
owner
.
op
.
scalar_op
==
scal
.
maximum
):
neg
=
max
.
owner
.
inputs
[
0
]
neg
=
max
.
owner
.
inputs
[
0
]
if
neg
.
owner
and
neg
.
owner
.
op
==
T
.
neg
:
if
neg
.
owner
and
neg
.
owner
.
op
==
T
.
neg
:
return
[
CAReduce
(
scal
.
minimum
,
new
=
CAReduce
(
scal
.
minimum
,
max
.
owner
.
op
.
axis
)(
neg
.
owner
.
inputs
[
0
])
max
.
owner
.
op
.
axis
)(
neg
.
owner
.
inputs
[
0
]
)]
return
[
copy_stack_trace
(
node
.
outputs
[
0
],
new
)]
return
False
return
False
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论