Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
96f7cdf1
提交
96f7cdf1
authored
11月 16, 2016
作者:
Frederic Bastien
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add missing GraphToGPULocalOptDB and use it.
上级
40d372f4
显示空白字符变更
内嵌
并排
正在显示
6 个修改的文件
包含
107 行增加
和
34 行删除
+107
-34
__init__.py
theano/gof/__init__.py
+1
-1
opt.py
theano/gof/opt.py
+42
-0
optdb.py
theano/gof/optdb.py
+4
-2
dnn.py
theano/gpuarray/dnn.py
+25
-10
opt.py
theano/gpuarray/opt.py
+33
-19
test_pool.py
theano/gpuarray/tests/test_pool.py
+2
-2
没有找到文件。
theano/gof/__init__.py
浏览文件 @
96f7cdf1
...
@@ -66,7 +66,7 @@ from theano.gof.opt import (
...
@@ -66,7 +66,7 @@ from theano.gof.opt import (
OpKeyOptimizer
)
OpKeyOptimizer
)
from
theano.gof.optdb
import
\
from
theano.gof.optdb
import
\
DB
,
Query
,
\
DB
,
LocalGroupDB
,
Query
,
\
EquilibriumDB
,
SequenceDB
,
ProxyDB
EquilibriumDB
,
SequenceDB
,
ProxyDB
from
theano.gof.toolbox
import
\
from
theano.gof.toolbox
import
\
...
...
theano/gof/opt.py
浏览文件 @
96f7cdf1
...
@@ -1384,6 +1384,48 @@ class LocalOptGroup(LocalOptimizer):
...
@@ -1384,6 +1384,48 @@ class LocalOptGroup(LocalOptimizer):
opt
.
add_requirements
(
fgraph
)
opt
.
add_requirements
(
fgraph
)
class
GraphToGPULocalOptGroup
(
LocalOptGroup
):
"""
This is the equivalent of LocalOptGroup for GraphToGPU
"""
def
__init__
(
self
,
*
optimizers
,
**
kwargs
):
super
(
GraphToGPULocalOptGroup
,
self
)
.
__init__
(
*
optimizers
,
**
kwargs
)
assert
self
.
apply_all_opts
is
False
def
transform
(
self
,
op
,
context_name
,
inputs
,
outputs
):
if
len
(
self
.
opts
)
==
0
:
return
fgraph
=
outputs
[
0
]
.
fgraph
repl
=
None
while
True
:
opts
=
self
.
track_map
[
type
(
op
)]
+
self
.
track_map
[
op
]
+
self
.
track_map
[
None
]
new_repl
=
None
for
opt
in
opts
:
opt_start
=
time
.
time
()
new_repl
=
opt
.
transform
(
op
,
context_name
,
inputs
,
outputs
)
opt_finish
=
time
.
time
()
if
self
.
profile
:
self
.
time_opts
[
opt
]
+=
opt_start
-
opt_finish
self
.
process_count
[
opt
]
+=
1
if
not
new_repl
:
continue
else
:
if
self
.
profile
:
self
.
node_created
[
opt
]
+=
len
(
graph
.
ops
(
fgraph
.
variables
,
new_repl
))
self
.
applied_true
[
opt
]
+=
1
break
# break from the for loop over optimization.
if
not
new_repl
:
# No optimization applied in the last iteration
return
repl
# only 1 iteration or we are at the start of the graph.
if
not
self
.
apply_all_opts
or
not
new_repl
[
0
]
.
owner
:
return
new_repl
if
len
(
new_repl
)
>
1
:
s
=
set
([
v
.
owner
for
v
in
new_repl
])
assert
len
(
s
)
==
1
repl
=
new_repl
node
=
repl
[
0
]
.
owner
class
OpSub
(
LocalOptimizer
):
class
OpSub
(
LocalOptimizer
):
"""
"""
...
...
theano/gof/optdb.py
浏览文件 @
96f7cdf1
...
@@ -405,12 +405,14 @@ class LocalGroupDB(DB):
...
@@ -405,12 +405,14 @@ class LocalGroupDB(DB):
"""
"""
def
__init__
(
self
,
apply_all_opts
=
False
,
profile
=
False
):
def
__init__
(
self
,
apply_all_opts
=
False
,
profile
=
False
,
local_opt
=
opt
.
LocalOptGroup
):
super
(
LocalGroupDB
,
self
)
.
__init__
()
super
(
LocalGroupDB
,
self
)
.
__init__
()
self
.
failure_callback
=
None
self
.
failure_callback
=
None
self
.
apply_all_opts
=
apply_all_opts
self
.
apply_all_opts
=
apply_all_opts
self
.
profile
=
profile
self
.
profile
=
profile
self
.
__position__
=
{}
self
.
__position__
=
{}
self
.
local_opt
=
local_opt
def
register
(
self
,
name
,
obj
,
*
tags
,
**
kwargs
):
def
register
(
self
,
name
,
obj
,
*
tags
,
**
kwargs
):
super
(
LocalGroupDB
,
self
)
.
register
(
name
,
obj
,
*
tags
)
super
(
LocalGroupDB
,
self
)
.
register
(
name
,
obj
,
*
tags
)
...
@@ -429,7 +431,7 @@ class LocalGroupDB(DB):
...
@@ -429,7 +431,7 @@ class LocalGroupDB(DB):
opts
=
list
(
super
(
LocalGroupDB
,
self
)
.
query
(
*
tags
,
**
kwtags
))
opts
=
list
(
super
(
LocalGroupDB
,
self
)
.
query
(
*
tags
,
**
kwtags
))
opts
.
sort
(
key
=
lambda
obj
:
(
self
.
__position__
[
obj
.
name
],
obj
.
name
))
opts
.
sort
(
key
=
lambda
obj
:
(
self
.
__position__
[
obj
.
name
],
obj
.
name
))
ret
=
opt
.
LocalOptGroup
(
*
opts
,
ret
=
self
.
local_opt
(
*
opts
,
apply_all_opts
=
self
.
apply_all_opts
,
apply_all_opts
=
self
.
apply_all_opts
,
profile
=
self
.
profile
)
profile
=
self
.
profile
)
return
ret
return
ret
...
...
theano/gpuarray/dnn.py
浏览文件 @
96f7cdf1
...
@@ -38,7 +38,7 @@ from .elemwise import GpuElemwise
...
@@ -38,7 +38,7 @@ from .elemwise import GpuElemwise
# These don't exist in gpuarray
# These don't exist in gpuarray
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
from
.nnet
import
GpuSoftmax
from
.nnet
import
GpuSoftmax
from
.opt
import
(
gpu_seqopt
,
register_opt
,
from
.opt
import
(
gpu_seqopt
,
register_opt
,
pool_db
,
pool_db2
,
op_lifter
,
register_opt2
)
op_lifter
,
register_opt2
)
from
.opt_util
import
alpha_merge
,
output_merge
,
inplace_allocempty
,
pad_dims
,
unpad_dims
from
.opt_util
import
alpha_merge
,
output_merge
,
inplace_allocempty
,
pad_dims
,
unpad_dims
...
@@ -2736,9 +2736,6 @@ def local_dnn_convi_output_merge(node, *inputs):
...
@@ -2736,9 +2736,6 @@ def local_dnn_convi_output_merge(node, *inputs):
return
[
gpu_dnn_conv_gradI
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
return
[
gpu_dnn_conv_gradI
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
Pool
])
@register_opt2
([
Pool
],
'fast_compile'
,
'cudnn'
)
def
local_gpua_pool_dnn_alternative
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_gpua_pool_dnn_alternative
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
raise_no_cudnn
()
raise_no_cudnn
()
...
@@ -2758,11 +2755,16 @@ def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs):
...
@@ -2758,11 +2755,16 @@ def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs):
img_padded
=
pad_dims
(
img
,
2
,
nd
)
img_padded
=
pad_dims
(
img
,
2
,
nd
)
ret_padded
=
dnn_pool
(
img_padded
,
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
ret_padded
=
dnn_pool
(
img_padded
,
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
return
unpad_dims
(
ret_padded
,
img
,
2
,
nd
)
return
unpad_dims
(
ret_padded
,
img
,
2
,
nd
)
pool_db
.
register
(
"local_gpua_pool_dnn_alternative"
,
op_lifter
([
Pool
])(
local_gpua_pool_dnn_alternative
),
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
'cudnn'
,
position
=
0
)
pool_db2
.
register
(
"local_gpua_pool_dnn_alternative"
,
local_optimizer
([
Pool
])(
local_gpua_pool_dnn_alternative
),
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
'cudnn'
,
position
=
0
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
MaxPoolGrad
])
@register_opt2
([
MaxPoolGrad
],
'fast_compile'
,
'cudnn'
)
def
local_gpua_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_gpua_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
raise_no_cudnn
()
raise_no_cudnn
()
...
@@ -2797,11 +2799,16 @@ def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
...
@@ -2797,11 +2799,16 @@ def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
stride
,
stride
,
pad
)
pad
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
pool_db
.
register
(
"local_gpua_pool_dnn_grad_stride"
,
op_lifter
([
MaxPoolGrad
])(
local_gpua_pool_dnn_grad_stride
),
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
'cudnn'
,
position
=
0
)
pool_db2
.
register
(
"local_gpua_pool_dnn_grad_stride"
,
local_optimizer
([
MaxPoolGrad
])(
local_gpua_pool_dnn_grad_stride
),
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
'cudnn'
,
position
=
0
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
AveragePoolGrad
])
@register_opt2
([
AveragePoolGrad
],
'fast_compile'
,
'cudnn'
)
def
local_gpua_avg_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_gpua_avg_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
raise_no_cudnn
()
raise_no_cudnn
()
...
@@ -2832,6 +2839,14 @@ def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
...
@@ -2832,6 +2839,14 @@ def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
stride
,
stride
,
pad
)
pad
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
pool_db
.
register
(
"local_gpua_avg_pool_dnn_grad_stride"
,
op_lifter
([
AveragePoolGrad
])(
local_gpua_avg_pool_dnn_grad_stride
),
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
'cudnn'
,
position
=
0
)
pool_db2
.
register
(
"local_gpua_avg_pool_dnn_grad_stride"
,
local_optimizer
([
AveragePoolGrad
])(
local_gpua_avg_pool_dnn_grad_stride
),
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
'cudnn'
,
position
=
0
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
...
...
theano/gpuarray/opt.py
浏览文件 @
96f7cdf1
...
@@ -13,6 +13,7 @@ from theano import tensor, scalar, gof, config
...
@@ -13,6 +13,7 @@ from theano import tensor, scalar, gof, config
from
theano.compile
import
optdb
from
theano.compile
import
optdb
from
theano.compile.ops
import
shape_i
from
theano.compile.ops
import
shape_i
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
TopoOptimizer
,
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
TopoOptimizer
,
LocalGroupDB
,
SequenceDB
,
Optimizer
,
DB
,
toolbox
,
graph
)
SequenceDB
,
Optimizer
,
DB
,
toolbox
,
graph
)
from
theano.ifelse
import
IfElse
from
theano.ifelse
import
IfElse
from
theano.misc.ordered_set
import
OrderedSet
from
theano.misc.ordered_set
import
OrderedSet
...
@@ -129,6 +130,9 @@ def register_opt2(tracks, *tags, **kwargs):
...
@@ -129,6 +130,9 @@ def register_opt2(tracks, *tags, **kwargs):
'''
'''
def
f
(
local_opt
):
def
f
(
local_opt
):
name
=
(
kwargs
and
kwargs
.
pop
(
'name'
))
or
local_opt
.
__name__
name
=
(
kwargs
and
kwargs
.
pop
(
'name'
))
or
local_opt
.
__name__
if
isinstance
(
local_opt
,
theano
.
gof
.
DB
):
opt
=
local_opt
else
:
opt
=
theano
.
gof
.
local_optimizer
(
tracks
)(
local_opt
)
opt
=
theano
.
gof
.
local_optimizer
(
tracks
)(
local_opt
)
gpu_optimizer2
.
register
(
name
,
opt
,
'fast_run'
,
'gpuarray'
,
*
tags
)
gpu_optimizer2
.
register
(
name
,
opt
,
'fast_run'
,
'gpuarray'
,
*
tags
)
return
local_opt
return
local_opt
...
@@ -1592,15 +1596,8 @@ def local_gpua_lift_abstractconv_graph(op, context_name, inputs, outputs):
...
@@ -1592,15 +1596,8 @@ def local_gpua_lift_abstractconv_graph(op, context_name, inputs, outputs):
return
[
op
(
*
inps
)]
return
[
op
(
*
inps
)]
@register_opt
()
@op_lifter
([
pool
.
Pool
])
@register_opt2
([
pool
.
Pool
])
def
local_gpu_pool
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_gpu_pool
(
op
,
ctx_name
,
inputs
,
outputs
):
from
.dnn
import
dnn_available
assert
op
.
__props__
==
(
'ignore_border'
,
'mode'
,
'ndim'
)
assert
op
.
__props__
==
(
'ignore_border'
,
'mode'
,
'ndim'
)
if
op
.
ignore_border
and
dnn_available
(
ctx_name
):
return
inp
,
ws
,
stride
,
pad
=
inputs
inp
,
ws
,
stride
,
pad
=
inputs
nd
=
op
.
ndim
nd
=
op
.
ndim
if
nd
not
in
(
2
,
3
):
if
nd
not
in
(
2
,
3
):
...
@@ -1615,16 +1612,23 @@ def local_gpu_pool(op, ctx_name, inputs, outputs):
...
@@ -1615,16 +1612,23 @@ def local_gpu_pool(op, ctx_name, inputs, outputs):
inp_padded
=
pad_dims
(
inp
,
2
,
nd
)
inp_padded
=
pad_dims
(
inp
,
2
,
nd
)
ret_padded
=
op
(
inp_padded
,
ws
,
stride
,
pad
)
ret_padded
=
op
(
inp_padded
,
ws
,
stride
,
pad
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
pool_db
=
LocalGroupDB
()
pool_db2
=
LocalGroupDB
(
local_opt
=
theano
.
gof
.
opt
.
GraphToGPULocalOptGroup
)
pool_db2
.
__name__
=
"pool_db2"
lifter
=
op_lifter
([
pool
.
Pool
])(
local_gpu_pool
)
pool_db
.
register
(
"local_gpu_pool"
,
lifter
,
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
position
=
1
)
pool_db2
.
register
(
"local_gpu_pool"
,
local_optimizer
([
pool
.
Pool
])(
local_gpu_pool
),
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
position
=
1
)
register_opt
(
'fast_compile'
,
name
=
'pool_db'
)(
pool_db
)
register_opt2
([
pool
.
Pool
],
'fast_compile'
,
name
=
'pool_db2'
)(
pool_db2
)
@register_opt
()
@op_lifter
([
pool
.
MaxPoolGrad
])
@register_opt2
([
pool
.
MaxPoolGrad
])
def
local_gpu_max_pool_grad
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_gpu_max_pool_grad
(
op
,
ctx_name
,
inputs
,
outputs
):
from
.dnn
import
dnn_available
assert
op
.
__props__
==
(
'ignore_border'
,
'mode'
,
'ndim'
)
assert
op
.
__props__
==
(
'ignore_border'
,
'mode'
,
'ndim'
)
if
op
.
ignore_border
and
dnn_available
(
ctx_name
):
return
inp
,
out
,
out_grad
,
ws
,
stride
,
pad
=
inputs
inp
,
out
,
out_grad
,
ws
,
stride
,
pad
=
inputs
nd
=
op
.
ndim
nd
=
op
.
ndim
...
@@ -1645,16 +1649,18 @@ def local_gpu_max_pool_grad(op, ctx_name, inputs, outputs):
...
@@ -1645,16 +1649,18 @@ def local_gpu_max_pool_grad(op, ctx_name, inputs, outputs):
ret_padded
=
op
(
inp_padded
,
out_padded
,
out_grad_padded
,
ret_padded
=
op
(
inp_padded
,
out_padded
,
out_grad_padded
,
ws
,
stride
,
pad
)
ws
,
stride
,
pad
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
lifter
=
op_lifter
([
pool
.
MaxPoolGrad
])(
local_gpu_max_pool_grad
)
pool_db
.
register
(
"local_gpu_max_pool_grad"
,
lifter
,
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
position
=
1
)
pool_db2
.
register
(
"local_gpu_max_pool_grad"
,
local_optimizer
([
pool
.
MaxPoolGrad
])(
local_gpu_max_pool_grad
),
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
position
=
1
)
@register_opt
()
@op_lifter
([
pool
.
AveragePoolGrad
])
@register_opt2
([
pool
.
AveragePoolGrad
])
def
local_gpu_average_pool_grad
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_gpu_average_pool_grad
(
op
,
ctx_name
,
inputs
,
outputs
):
from
.dnn
import
dnn_available
assert
op
.
__props__
==
(
'ignore_border'
,
'mode'
,
'ndim'
)
assert
op
.
__props__
==
(
'ignore_border'
,
'mode'
,
'ndim'
)
if
op
.
ignore_border
and
dnn_available
(
ctx_name
):
return
inp
,
out_grad
,
ws
,
stride
,
pad
=
inputs
inp
,
out_grad
,
ws
,
stride
,
pad
=
inputs
nd
=
op
.
ndim
nd
=
op
.
ndim
...
@@ -1673,6 +1679,14 @@ def local_gpu_average_pool_grad(op, ctx_name, inputs, outputs):
...
@@ -1673,6 +1679,14 @@ def local_gpu_average_pool_grad(op, ctx_name, inputs, outputs):
ret_padded
=
op
(
inp_padded
,
out_grad_padded
,
ret_padded
=
op
(
inp_padded
,
out_grad_padded
,
ws
,
stride
,
pad
)
ws
,
stride
,
pad
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
lifter
=
op_lifter
([
pool
.
AveragePoolGrad
])(
local_gpu_average_pool_grad
)
pool_db
.
register
(
"local_gpu_average_pool_grad"
,
lifter
,
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
position
=
1
)
pool_db2
.
register
(
"local_gpu_average_pool_grad"
,
local_optimizer
([
pool
.
AveragePoolGrad
])(
local_gpu_average_pool_grad
),
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
position
=
1
)
@register_opt
()
@register_opt
()
...
...
theano/gpuarray/tests/test_pool.py
浏览文件 @
96f7cdf1
...
@@ -54,7 +54,7 @@ def test_pool2d():
...
@@ -54,7 +54,7 @@ def test_pool2d():
ref_mode
=
copy
.
copy
(
mode_without_gpu
)
ref_mode
=
copy
.
copy
(
mode_without_gpu
)
ref_mode
.
check_py_code
=
False
ref_mode
.
check_py_code
=
False
gpu_mode
=
copy
.
copy
(
mode_with_gpu
)
gpu_mode
=
copy
.
copy
(
mode_with_gpu
)
.
excluding
(
"cudnn"
)
gpu_mode
.
check_py_code
=
False
gpu_mode
.
check_py_code
=
False
for
shp
in
shps
:
for
shp
in
shps
:
...
@@ -147,7 +147,7 @@ def test_pool3d():
...
@@ -147,7 +147,7 @@ def test_pool3d():
ref_mode
=
copy
.
copy
(
mode_without_gpu
)
ref_mode
=
copy
.
copy
(
mode_without_gpu
)
ref_mode
.
check_py_code
=
False
ref_mode
.
check_py_code
=
False
gpu_mode
=
copy
.
copy
(
mode_with_gpu
)
gpu_mode
=
copy
.
copy
(
mode_with_gpu
)
.
excluding
(
"cudnn"
)
gpu_mode
.
check_py_code
=
False
gpu_mode
.
check_py_code
=
False
for
shp
in
shps
:
for
shp
in
shps
:
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论