Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
d0dfb0be
提交
d0dfb0be
authored
6月 13, 2016
作者:
sentient07
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Cleaned up and fixed pep8
上级
c3e8f153
隐藏空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
55 行增加
和
97 行删除
+55
-97
dnn.py
theano/gpuarray/dnn.py
+8
-8
extra_ops.py
theano/gpuarray/extra_ops.py
+3
-2
nerv.py
theano/gpuarray/nerv.py
+1
-1
opt.py
theano/gpuarray/opt.py
+42
-85
opt_util.py
theano/gpuarray/opt_util.py
+1
-1
没有找到文件。
theano/gpuarray/dnn.py
浏览文件 @
d0dfb0be
...
@@ -25,7 +25,7 @@ from theano.tensor.signal.pool import (
...
@@ -25,7 +25,7 @@ from theano.tensor.signal.pool import (
from
.
import
pygpu
from
.
import
pygpu
from
.type
import
get_context
,
gpu_context_type
,
list_contexts
,
GpuArrayType
from
.type
import
get_context
,
gpu_context_type
,
list_contexts
,
GpuArrayType
from
.basic_ops
import
(
as_gpuarray_variable
,
infer_context_name
,
from
.basic_ops
import
(
as_gpuarray_variable
,
infer_context_name
,
gpu_contiguous
,
GpuAllocEmpty
,
gpu_alloc_empty
,
gpu_contiguous
,
gpu_alloc_empty
,
empty_like
)
empty_like
)
from
.elemwise
import
GpuElemwise
from
.elemwise
import
GpuElemwise
...
@@ -942,8 +942,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -942,8 +942,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
shape2
=
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
shape2
=
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
out
=
gpu_alloc_empty
(
img
.
dtype
,
ctx_name
)(
shape_i
(
img
,
0
,
fgraph
),
out
=
gpu_alloc_empty
(
img
.
dtype
,
ctx_name
)(
shape_i
(
img
,
0
,
fgraph
),
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
kerns
,
1
,
fgraph
),
shape2
,
shape3
)
shape2
,
shape3
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
return
gpu_dnn_conv_gradI
()(
kerns
,
img
,
out
,
desc
)
return
gpu_dnn_conv_gradI
()(
kerns
,
img
,
out
,
desc
)
...
@@ -1412,11 +1412,11 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
...
@@ -1412,11 +1412,11 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
@local_optimizer
([
AbstractConv2d
,
AbstractConv2d_gradWeights
,
@local_optimizer
([
AbstractConv2d
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
])
AbstractConv2d_gradInputs
])
@register_opt2
([
AbstractConv2d
,
AbstractConv2d_gradWeights
,
@register_opt2
([
AbstractConv2d
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
],
'fast_compile'
)
AbstractConv2d_gradInputs
],
'fast_compile'
)
def
local_abstractconv_cudnn_graph
(
op
,
context_name
,
inputs
):
def
local_abstractconv_cudnn_graph
(
op
,
context_name
,
inputs
):
if
(
not
isinstance
(
op
,
(
AbstractConv2d
,
if
(
not
isinstance
(
op
,
(
AbstractConv2d
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
))):
AbstractConv2d_gradInputs
))):
return
None
return
None
inp1
=
inputs
[
0
]
inp1
=
inputs
[
0
]
...
@@ -1462,8 +1462,8 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs):
...
@@ -1462,8 +1462,8 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs):
@local_optimizer
([
AbstractConv2d
,
AbstractConv2d_gradWeights
,
@local_optimizer
([
AbstractConv2d
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
])
AbstractConv2d_gradInputs
])
def
local_abstractconv_cudnn
(
node
):
def
local_abstractconv_cudnn
(
node
):
ctx
=
infer_context
(
*
node
.
inputs
)
ctx
=
infer_context
_name
(
*
node
.
inputs
)
return
local_abstractconv_dnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
)
return
local_abstractconv_
cu
dnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
)
conv_groupopt
.
register
(
'local_abstractconv_cudnn_graph'
,
conv_groupopt
.
register
(
'local_abstractconv_cudnn_graph'
,
local_abstractconv_cudnn_graph
,
20
,
local_abstractconv_cudnn_graph
,
20
,
...
...
theano/gpuarray/extra_ops.py
浏览文件 @
d0dfb0be
...
@@ -9,7 +9,7 @@ except ImportError:
...
@@ -9,7 +9,7 @@ except ImportError:
pass
pass
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
,
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
,
infer_context_name
,
GpuFromHost
)
infer_context_name
)
from
.opt
import
register_opt
,
op_lifter
,
register_opt2
from
.opt
import
register_opt
,
op_lifter
,
register_opt2
...
@@ -450,10 +450,11 @@ class GpuCumsum(GpuKernelBase, Op):
...
@@ -450,10 +450,11 @@ class GpuCumsum(GpuKernelBase, Op):
"""
%
locals
()
"""
%
locals
()
return
super
(
GpuCumsum
,
self
)
.
c_support_code_struct
(
node
,
nodename
)
+
code
return
super
(
GpuCumsum
,
self
)
.
c_support_code_struct
(
node
,
nodename
)
+
code
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
CumsumOp
])
@op_lifter
([
CumsumOp
])
@register_opt2
([
CumsumOp
],
'fast_compile'
)
@register_opt2
([
CumsumOp
],
'fast_compile'
)
def
use_gpu_cumsumop
(
op
,
ctx_name
,
inputs
,
):
def
use_gpu_cumsumop
(
op
,
ctx_name
,
inputs
):
if
inputs
[
0
]
.
dtype
==
'float32'
:
if
inputs
[
0
]
.
dtype
==
'float32'
:
axis
=
op
.
axis
axis
=
op
.
axis
x
=
inputs
[
0
]
x
=
inputs
[
0
]
...
...
theano/gpuarray/nerv.py
浏览文件 @
d0dfb0be
...
@@ -10,7 +10,7 @@ from theano.scalar import as_scalar, constant
...
@@ -10,7 +10,7 @@ from theano.scalar import as_scalar, constant
from
.
import
opt
from
.
import
opt
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuAllocEmpty
,
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuAllocEmpty
,
infer_context_name
)
infer_context_name
,
gpu_alloc_empty
)
from
.type
import
gpu_context_type
from
.type
import
gpu_context_type
from
.opt_util
import
alpha_merge
,
output_merge
from
.opt_util
import
alpha_merge
,
output_merge
...
...
theano/gpuarray/opt.py
浏览文件 @
d0dfb0be
...
@@ -8,15 +8,15 @@ from six import itervalues, iteritems
...
@@ -8,15 +8,15 @@ from six import itervalues, iteritems
from
six.moves
import
xrange
from
six.moves
import
xrange
import
theano
import
theano
from
theano.compat
import
OrderedDict
from
theano
import
tensor
,
scalar
,
gof
,
config
from
theano
import
tensor
,
scalar
,
gof
,
config
from
theano.compile
import
optdb
from
theano.compile
import
optdb
from
theano.compile.ops
import
shape_i
from
theano.compile.ops
import
shape_i
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
TopoOptimizer
,
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
TopoOptimizer
,
SequenceDB
,
Optimizer
,
DB
,
toolbox
,
graph
)
SequenceDB
,
Optimizer
,
DB
,
toolbox
,
graph
)
from
theano.gof.opt
import
ChangeTracker
,
NavigatorOptimizer
from
theano.gof.opt
import
NavigatorOptimizer
from
theano.gof.optdb
import
LocalGroupDB
from
theano.gof.optdb
import
LocalGroupDB
from
theano.ifelse
import
IfElse
from
theano.ifelse
import
IfElse
from
theano.misc.ordered_set
import
OrderedSet
from
theano.scalar.basic
import
Scalar
,
Pow
,
Cast
from
theano.scalar.basic
import
Scalar
,
Pow
,
Cast
from
theano.scan_module
import
scan_utils
,
scan_op
,
scan_opt
from
theano.scan_module
import
scan_utils
,
scan_op
,
scan_opt
...
@@ -30,7 +30,7 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
...
@@ -30,7 +30,7 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
from
theano.tests.breakpoint
import
PdbBreakpoint
from
theano.tests.breakpoint
import
PdbBreakpoint
from
.type
import
(
GpuArrayType
,
GpuArrayConstant
,
get_context
,
from
.type
import
(
GpuArrayType
,
GpuArrayConstant
,
get_context
,
ContextNotDefined
,
GpuArray
Variable
,
GpuArrayShared
Variable
)
ContextNotDefined
,
GpuArray
SharedVariable
,
GpuArray
Variable
)
from
.basic_ops
import
(
as_gpuarray_variable
,
infer_context_name
,
from
.basic_ops
import
(
as_gpuarray_variable
,
infer_context_name
,
host_from_gpu
,
GpuToGpu
,
host_from_gpu
,
GpuToGpu
,
HostFromGpu
,
GpuFromHost
,
HostFromGpu
,
GpuFromHost
,
...
@@ -55,8 +55,6 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
...
@@ -55,8 +55,6 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1_dev20
)
GpuAdvancedIncSubtensor1_dev20
)
from
.opt_util
import
alpha_merge
,
output_merge
from
.opt_util
import
alpha_merge
,
output_merge
_logger
=
logging
.
getLogger
(
"theano.gpuarray.opt"
)
_logger
=
logging
.
getLogger
(
"theano.gpuarray.opt"
)
...
@@ -195,7 +193,7 @@ def op_lifter(OP, cuda_only=False):
...
@@ -195,7 +193,7 @@ def op_lifter(OP, cuda_only=False):
try
:
try
:
new_op
=
maker
(
node
.
op
,
context_name
,
node
.
inputs
)
new_op
=
maker
(
node
.
op
,
context_name
,
node
.
inputs
)
except
TypeError
:
except
TypeError
:
# Pass the outputs so that the Local Optimizers don't need to
# Pass the outputs so that the Local Optimizers don't need to
# build the nodes again.
# build the nodes again.
new_op
=
maker
(
node
.
op
,
context_name
,
node
.
inputs
,
node
.
outputs
)
new_op
=
maker
(
node
.
op
,
context_name
,
node
.
inputs
,
node
.
outputs
)
# This is needed as sometimes new_op inherits from OP.
# This is needed as sometimes new_op inherits from OP.
...
@@ -263,7 +261,6 @@ class GraphToGPU(NavigatorOptimizer):
...
@@ -263,7 +261,6 @@ class GraphToGPU(NavigatorOptimizer):
self
.
local_optimizers_all
=
local_optimizers_all
self
.
local_optimizers_all
=
local_optimizers_all
self
.
local_optimizers_map
=
local_optimizers_map
self
.
local_optimizers_map
=
local_optimizers_map
self
.
failure_callback
=
None
self
.
failure_callback
=
None
self
.
new_opts
=
[]
def
add_requirements
(
self
,
fgraph
):
def
add_requirements
(
self
,
fgraph
):
fgraph
.
attach_feature
(
toolbox
.
ReplaceValidate
())
fgraph
.
attach_feature
(
toolbox
.
ReplaceValidate
())
...
@@ -281,13 +278,10 @@ class GraphToGPU(NavigatorOptimizer):
...
@@ -281,13 +278,10 @@ class GraphToGPU(NavigatorOptimizer):
def
apply
(
self
,
fgraph
):
def
apply
(
self
,
fgraph
):
mapping
=
{}
mapping
=
{}
start_nb_nodes
=
len
(
fgraph
.
apply_nodes
)
max_nb_nodes
=
len
(
fgraph
.
apply_nodes
)
io_toposort_timing
=
[]
nb_nodes
=
[]
time_opts
=
{}
time_opts
=
{}
node_created
=
{}
node_created
=
{}
process_count
=
{}
process_count
=
{}
io_toposort_timing
=
[]
# Building a new graph
# Building a new graph
# Iterating through inputs of graph
# Iterating through inputs of graph
for
i
in
fgraph
.
inputs
:
for
i
in
fgraph
.
inputs
:
...
@@ -299,7 +293,7 @@ class GraphToGPU(NavigatorOptimizer):
...
@@ -299,7 +293,7 @@ class GraphToGPU(NavigatorOptimizer):
if
isinstance
(
i
,
theano
.
Constant
):
if
isinstance
(
i
,
theano
.
Constant
):
mapping
[
i
]
=
i
mapping
[
i
]
=
i
for
node
in
fgraph
.
toposort
():
for
node
in
fgraph
.
toposort
():
for
lopt
in
(
self
.
local_optimizers_all
+
for
lopt
in
(
self
.
local_optimizers_all
+
self
.
local_optimizers_map
.
get
(
type
(
node
.
op
),
[])
+
self
.
local_optimizers_map
.
get
(
type
(
node
.
op
),
[])
+
self
.
local_optimizers_map
.
get
(
node
.
op
,
[])):
self
.
local_optimizers_map
.
get
(
node
.
op
,
[])):
process_count
.
setdefault
(
lopt
,
0
)
process_count
.
setdefault
(
lopt
,
0
)
...
@@ -307,29 +301,25 @@ class GraphToGPU(NavigatorOptimizer):
...
@@ -307,29 +301,25 @@ class GraphToGPU(NavigatorOptimizer):
node_created
.
setdefault
(
lopt
,
0
)
node_created
.
setdefault
(
lopt
,
0
)
t_topo
=
time
.
time
()
t_topo
=
time
.
time
()
topo
=
fgraph
.
toposort
()
fgraph
.
toposort
()
time_topo
=
time
.
time
()
-
t_topo
time_topo
=
time
.
time
()
-
t_topo
io_toposort_timing
.
append
(
time_topo
-
t_topo
)
for
node
in
fgraph
.
toposort
():
for
node
in
fgraph
.
toposort
():
t0
=
time
.
time
()
if
isinstance
(
node
.
op
,
HostFromGpu
):
if
isinstance
(
node
.
op
,
HostFromGpu
):
mapping
[
node
.
outputs
[
0
]]
=
node
.
inputs
[
0
]
mapping
[
node
.
outputs
[
0
]]
=
node
.
inputs
[
0
]
continue
continue
# Move only if any of the inputs are on the GPU.
# Move only if any of the inputs are on the GPU.
move_to_GPU
=
Tru
e
move_to_GPU
=
Fals
e
'''
if
any
([
isinstance
(
i
,
GpuArrayVariable
)
or
if
any
([
isinstance
(
i
,
GpuArrayVariable
)
or
isinstance(i, GpuArraySharedVariable)
isinstance
(
i
,
GpuArraySharedVariable
)
for i in [mapping[v] for v in node.inputs] +
for
i
in
[
mapping
[
v
]
for
v
in
node
.
inputs
]
+
node.outputs]):
node
.
outputs
]):
move_to_GPU
=
True
move_to_GPU
=
True
'''
out_clients
=
[
o
.
clients
for
o
in
node
.
outputs
]
context_name
=
None
context_name
=
None
for
i
in
[
mapping
[
i
]
for
i
in
node
.
inputs
]:
for
i
in
[
mapping
[
i
]
for
i
in
node
.
inputs
]:
...
@@ -340,29 +330,28 @@ class GraphToGPU(NavigatorOptimizer):
...
@@ -340,29 +330,28 @@ class GraphToGPU(NavigatorOptimizer):
new_ops
=
None
new_ops
=
None
outputs
=
[]
outputs
=
[]
ex_opt_time
=
None
# Apply the lifter
# Apply the lifter
for
lopt
in
(
self
.
local_optimizers_all
+
for
lopt
in
(
self
.
local_optimizers_all
+
self
.
local_optimizers_map
.
get
(
type
(
node
.
op
),
[])
+
self
.
local_optimizers_map
.
get
(
type
(
node
.
op
),
[])
+
self
.
local_optimizers_map
.
get
(
node
.
op
,
[])):
self
.
local_optimizers_map
.
get
(
node
.
op
,
[])):
process_count
[
lopt
]
+=
1
if
move_to_GPU
:
if
move_to_GPU
:
t_opt
=
time
.
time
()
t_opt
=
time
.
time
()
try
:
try
:
new_ops
=
lopt
.
transform
(
new_ops
=
lopt
.
transform
(
node
.
op
,
context_name
,
node
.
op
,
context_name
,
[
mapping
[
i
]
for
i
in
node
.
inputs
])
[
mapping
[
i
]
for
i
in
node
.
inputs
])
except
TypeError
:
except
TypeError
:
# Updating again because else we'd be counting
# Updating again because else we'd be counting
# time for two except clauses
# time for two except clauses
t_opt
=
time
.
time
()
t_opt
=
time
.
time
()
new_ops
=
lopt
.
transform
(
node
.
op
,
context_name
,
new_ops
=
lopt
.
transform
(
node
.
op
,
context_name
,
[
mapping
[
i
]
for
i
in
node
.
inputs
],
[
mapping
[
i
]
for
i
in
node
.
inputs
],
node
.
outputs
)
node
.
outputs
)
finally
:
finally
:
t_opt2
=
time
.
time
()
t_opt2
=
time
.
time
()
time_opts
[
lopt
]
+=
t_opt2
-
t_opt
if
new_ops
:
if
new_ops
:
process_count
[
lopt
]
+=
1
break
break
if
not
new_ops
:
if
not
new_ops
:
newnode
=
node
.
clone_with_new_inputs
([
mapping
.
get
(
i
)
newnode
=
node
.
clone_with_new_inputs
([
mapping
.
get
(
i
)
...
@@ -385,9 +374,7 @@ class GraphToGPU(NavigatorOptimizer):
...
@@ -385,9 +374,7 @@ class GraphToGPU(NavigatorOptimizer):
return_list
=
True
)
return_list
=
True
)
if
new_ops
:
if
new_ops
:
node_created
[
lopt
]
+=
len
(
theano
.
gof
.
graph
.
ops
([
mapping
[
i
]
for
i
in
node
.
inputs
],
outputs
))
node_created
[
lopt
]
+=
len
(
graph
.
ops
([
mapping
[
i
]
for
i
in
node
.
inputs
],
outputs
))
self
.
new_opts
.
append
(
lopt
)
time_opts
[
lopt
]
=
t_opt2
-
t_opt
for
new_o
,
old_o
in
zip
(
outputs
,
node
.
outputs
):
for
new_o
,
old_o
in
zip
(
outputs
,
node
.
outputs
):
mapping
[
old_o
]
=
new_o
mapping
[
old_o
]
=
new_o
...
@@ -402,47 +389,26 @@ class GraphToGPU(NavigatorOptimizer):
...
@@ -402,47 +389,26 @@ class GraphToGPU(NavigatorOptimizer):
new_nodes
.
append
(
new_o
)
new_nodes
.
append
(
new_o
)
fgraph
.
replace_all_validate
(
zip
(
fgraph
.
outputs
,
new_nodes
))
fgraph
.
replace_all_validate
(
zip
(
fgraph
.
outputs
,
new_nodes
))
end_nb_nodes
=
len
(
fgraph
.
apply_nodes
)
return
(
self
,
io_toposort_timing
,
time_opts
,
node_created
,
process_count
)
return
(
self
,
start_nb_nodes
,
end_nb_nodes
,
max_nb_nodes
,
io_toposort_timing
,
nb_nodes
,
time_opts
,
node_created
)
@staticmethod
@staticmethod
def
print_profile
(
stream
,
prof
,
level
=
0
):
def
print_profile
(
stream
,
prof
,
level
=
0
):
(
opt
,
start_nb_nodes
,
end_nb_nodes
,
max_nb_nodes
,
io_toposort_timing
,
(
opt
,
io_toposort_timing
,
time_opts
,
node_created
,
process_count
)
=
prof
nb_nodes
,
time_opts
,
node_created
)
=
prof
blanc
=
(
' '
*
level
)
blanc
=
(
' '
*
level
)
print
(
blanc
,
"GraphToGPUOptimizer"
,
end
=
' '
,
file
=
stream
)
print
(
blanc
,
"GraphToGPUOptimizer"
,
end
=
' '
,
file
=
stream
)
print
(
blanc
,
getattr
(
opt
,
"name"
,
print
(
blanc
,
getattr
(
opt
,
"name"
,
getattr
(
opt
,
"__name__"
,
""
)),
file
=
stream
)
getattr
(
opt
,
"__name__"
,
""
)),
file
=
stream
)
print
(
blanc
,
" nb nodes (start, end, max)
%
d
%
d
%
d"
%
(
start_nb_nodes
,
end_nb_nodes
,
max_nb_nodes
),
file
=
stream
)
print
(
blanc
,
" time io_toposort
%.3
fs"
%
sum
(
print
(
blanc
,
" time io_toposort
%.3
fs"
%
sum
(
io_toposort_timing
),
file
=
stream
)
io_toposort_timing
),
file
=
stream
)
s
=
sum
([
time_opts
[
o
]
for
o
in
opt
.
new_opts
])
s
=
sum
([
v
for
k
,
v
in
time_opts
.
iteritems
()])
print
(
blanc
,
"Total time taken by local optimizers
%.3
fs "
%
s
,
file
=
stream
)
print
(
blanc
,
" time in local optimizers
%.3
fs"
%
s
,
file
=
stream
)
# Build a dictionary of opt and time taken
opt_time_dict
=
dict
()
for
o
in
opt
.
new_opts
:
if
o
not
in
opt_time_dict
:
opt_time_dict
[
o
]
=
time_opts
[
o
]
else
:
opt_time_dict
[
o
]
+=
time_opts
[
o
]
# print time per each optimizer
for
k
,
v
in
opt_time_dict
.
iteritems
():
print
(
blanc
,
"Local Optimizer :"
+
str
(
k
)
+
" takes time :
%.3
f"
%
v
,
file
=
stream
)
count_opt
=
[]
count_opt
=
[]
not_used
=
[]
not_used
=
[]
not_used_time
=
0
not_used_time
=
0
process_count
=
{}
for
o
in
(
opt
.
new_opts
):
process_count
.
setdefault
(
o
,
0
)
process_count
[
o
]
+
1
for
o
,
count
in
iteritems
(
process_count
):
for
o
,
count
in
iteritems
(
process_count
):
if
count
>
0
:
if
count
>
0
:
...
@@ -454,13 +420,13 @@ class GraphToGPU(NavigatorOptimizer):
...
@@ -454,13 +420,13 @@ class GraphToGPU(NavigatorOptimizer):
if
count_opt
:
if
count_opt
:
print
(
blanc
,
print
(
blanc
,
' times - times applied -
nb n
ode created - name:'
,
' times - times applied -
N
ode created - name:'
,
file
=
stream
)
file
=
stream
)
count_opt
.
sort
()
count_opt
.
sort
()
for
(
t
,
count
,
n_created
,
o
)
in
count_opt
[::
-
1
]:
for
(
t
,
count
,
n_created
,
o
)
in
count_opt
[::
-
1
]:
print
(
blanc
,
'
%.3
fs -
%
d -
%
d -
%
s'
%
(
print
(
blanc
,
'
%.3
fs -
%
d -
%
d -
%
s'
%
(
t
,
count
,
n_created
,
o
),
file
=
stream
)
t
,
count
,
n_created
,
o
),
file
=
stream
)
print
(
blanc
,
'
%.3
fs - in
%
d optimization that w
h
ere not used (display only those with a runtime > 0)'
%
(
print
(
blanc
,
'
%.3
fs - in
%
d optimization that were not used (display only those with a runtime > 0)'
%
(
not_used_time
,
len
(
not_used
)),
file
=
stream
)
not_used_time
,
len
(
not_used
)),
file
=
stream
)
not_used
.
sort
(
key
=
lambda
nu
:
(
nu
[
0
],
str
(
nu
[
1
])))
not_used
.
sort
(
key
=
lambda
nu
:
(
nu
[
0
],
str
(
nu
[
1
])))
for
(
t
,
o
)
in
not_used
[::
-
1
]:
for
(
t
,
o
)
in
not_used
[::
-
1
]:
...
@@ -469,7 +435,6 @@ class GraphToGPU(NavigatorOptimizer):
...
@@ -469,7 +435,6 @@ class GraphToGPU(NavigatorOptimizer):
print
(
blanc
+
" "
,
'
%.3
fs -
%
s'
%
(
t
,
o
),
file
=
stream
)
print
(
blanc
+
" "
,
'
%.3
fs -
%
s'
%
(
t
,
o
),
file
=
stream
)
print
(
file
=
stream
)
print
(
file
=
stream
)
@staticmethod
@staticmethod
def
merge_profile
(
prof1
,
prof2
):
def
merge_profile
(
prof1
,
prof2
):
# (opt, loop_timing, loop_process_count, max_nb_nodes,
# (opt, loop_timing, loop_process_count, max_nb_nodes,
...
@@ -491,8 +456,7 @@ class GraphToGPU(NavigatorOptimizer):
...
@@ -491,8 +456,7 @@ class GraphToGPU(NavigatorOptimizer):
local_optimizers_map
=
merge_dict
(
prof1
[
0
]
.
local_optimizers_map
,
local_optimizers_map
=
merge_dict
(
prof1
[
0
]
.
local_optimizers_map
,
prof2
[
0
]
.
local_optimizers_map
)
prof2
[
0
]
.
local_optimizers_map
)
new_opt
=
GraphToGPU
(
local_optimizers
,
local_optimizers_map
)
new_opt
=
GraphToGPU
(
local_optimizers
,
local_optimizers_map
)
def
merge_list
(
l1
,
l2
):
def
merge_list
(
l1
,
l2
):
l
=
copy
.
copy
(
l1
)
l
=
copy
.
copy
(
l1
)
...
@@ -501,23 +465,17 @@ class GraphToGPU(NavigatorOptimizer):
...
@@ -501,23 +465,17 @@ class GraphToGPU(NavigatorOptimizer):
l
[
idx
]
+=
nb
l
[
idx
]
+=
nb
else
:
else
:
l
.
append
(
nb
)
l
.
append
(
nb
)
return
l
return
l
max_nb_nodes
=
max
(
prof1
[
3
],
prof2
[
3
])
io_toposort_timing
=
merge_list
(
prof1
[
1
],
prof2
[
1
])
time_opts
=
merge_dict
(
prof1
[
2
],
prof2
[
2
])
io_toposort_timing
=
merge_list
(
prof1
[
4
],
prof2
[
4
])
node_created
=
merge_dict
(
prof1
[
3
],
prof2
[
3
])
process_count
=
merge_dict
(
prof1
[
4
],
prof2
[
4
])
nb_nodes
=
merge_list
(
prof1
[
5
],
prof2
[
5
])
time_opts
=
merge_dict
(
prof1
[
6
],
prof2
[
6
])
node_created
=
merge_dict
(
prof1
[
7
],
prof2
[
7
])
return
(
new_opt
,
return
(
new_opt
,
max_nb_nodes
,
io_toposort_timing
,
io_toposort_timing
,
time_opts
,
nb_nodes
,
node_created
,
time_opts
,
process_count
)
node_created
)
@local_optimizer
([
GpuFromHost
,
GpuToGpu
,
HostFromGpu
])
@local_optimizer
([
GpuFromHost
,
GpuToGpu
,
HostFromGpu
])
...
@@ -917,7 +875,7 @@ def local_gpuajoin_1(node):
...
@@ -917,7 +875,7 @@ def local_gpuajoin_1(node):
@op_lifter
([
tensor
.
Split
])
@op_lifter
([
tensor
.
Split
])
@register_opt2
([
tensor
.
Split
],
'fast_compile'
)
@register_opt2
([
tensor
.
Split
],
'fast_compile'
)
def
local_gpua_split
(
op
,
context_name
,
inputs
):
def
local_gpua_split
(
op
,
context_name
,
inputs
):
#
TODO use props
#
TODO use props
return
GpuSplit
(
op
.
len_splits
)
return
GpuSplit
(
op
.
len_splits
)
...
@@ -1009,7 +967,7 @@ def local_advincsub1_gpua_inplace(node):
...
@@ -1009,7 +967,7 @@ def local_advincsub1_gpua_inplace(node):
@register_opt2
([
tensor
.
CAReduce
,
tensor
.
Sum
,
tensor
.
elemwise
.
Prod
],
'fast_compile'
)
@register_opt2
([
tensor
.
CAReduce
,
tensor
.
Sum
,
tensor
.
elemwise
.
Prod
],
'fast_compile'
)
def
local_gpua_careduce
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpua_careduce
(
op
,
context_name
,
inputs
,
outputs
):
if
isinstance
(
op
.
scalar_op
,
(
scalar
.
Add
,
scalar
.
Mul
,
if
isinstance
(
op
.
scalar_op
,
(
scalar
.
Add
,
scalar
.
Mul
,
scalar
.
Maximum
,
scalar
.
Minimum
)):
scalar
.
Maximum
,
scalar
.
Minimum
)):
ctx
=
get_context
(
context_name
)
ctx
=
get_context
(
context_name
)
if
ctx
.
kind
==
b
'opencl'
:
if
ctx
.
kind
==
b
'opencl'
:
...
@@ -1233,7 +1191,6 @@ def local_assert(op, context_name, inputs):
...
@@ -1233,7 +1191,6 @@ def local_assert(op, context_name, inputs):
*
inputs
[
1
:]))]
*
inputs
[
1
:]))]
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
ConvOp
])
@op_lifter
([
ConvOp
])
def
local_error_convop
(
op
,
context_name
,
inputs
):
def
local_error_convop
(
op
,
context_name
,
inputs
):
...
...
theano/gpuarray/opt_util.py
浏览文件 @
d0dfb0be
...
@@ -325,7 +325,7 @@ def inplace_allocempty(op, idx):
...
@@ -325,7 +325,7 @@ def inplace_allocempty(op, idx):
isinstance
(
alloc
.
owner
.
op
,
GpuAllocEmpty
)
and
isinstance
(
alloc
.
owner
.
op
,
GpuAllocEmpty
)
and
len
(
alloc
.
clients
)
>
1
):
len
(
alloc
.
clients
)
>
1
):
alloc_op
=
gpu_alloc_empty
(
alloc
.
owner
.
op
.
dtype
,
alloc_op
=
gpu_alloc_empty
(
alloc
.
owner
.
op
.
dtype
,
alloc
.
owner
.
op
.
context_name
)
alloc
.
owner
.
op
.
context_name
)
inputs
[
idx
]
=
alloc_op
(
*
alloc
.
owner
.
inputs
)
inputs
[
idx
]
=
alloc_op
(
*
alloc
.
owner
.
inputs
)
return
maker
(
node
,
inputs
)
return
maker
(
node
,
inputs
)
return
opt
return
opt
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论