Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
658bf2ef
提交
658bf2ef
authored
7月 03, 2015
作者:
Xavier Bouthillier
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add optimizations and relativ tests
上级
ed4e0095
显示空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
347 行增加
和
6 行删除
+347
-6
opt.py
theano/sandbox/cuda/opt.py
+108
-6
test_opt.py
theano/sandbox/cuda/tests/test_opt.py
+35
-0
opt.py
theano/sandbox/opt.py
+171
-0
test_opt.py
theano/sandbox/tests/test_opt.py
+33
-0
没有找到文件。
theano/sandbox/cuda/opt.py
浏览文件 @
658bf2ef
...
@@ -21,6 +21,7 @@ from theano.gof import (local_optimizer, EquilibriumDB, ProxyDB,
...
@@ -21,6 +21,7 @@ from theano.gof import (local_optimizer, EquilibriumDB, ProxyDB,
Optimizer
,
toolbox
)
Optimizer
,
toolbox
)
from
theano.gof.opt
import
LocalMetaOptimizer
from
theano.gof.opt
import
LocalMetaOptimizer
from
theano.sandbox.cuda
import
as_cuda_ndarray_variable
from
theano.sandbox.cuda
import
as_cuda_ndarray_variable
from
theano.sandbox.opt
import
register_meta_opt
from
theano.sandbox.cuda.basic_ops
import
(
from
theano.sandbox.cuda.basic_ops
import
(
gpu_eye
,
gpu_contiguous
,
gpu_eye
,
gpu_contiguous
,
gpu_from_host
,
host_from_gpu
,
GpuFromHost
,
HostFromGpu
,
gpu_from_host
,
host_from_gpu
,
GpuFromHost
,
HostFromGpu
,
...
@@ -45,6 +46,9 @@ from theano.sandbox.cuda.blas import gpu_ger_no_inplace
...
@@ -45,6 +46,9 @@ from theano.sandbox.cuda.blas import gpu_ger_no_inplace
from
theano.sandbox.cuda.blas
import
(
GpuDownsampleFactorMax
,
from
theano.sandbox.cuda.blas
import
(
GpuDownsampleFactorMax
,
GpuDownsampleFactorMaxGrad
,
GpuDownsampleFactorMaxGradGrad
)
GpuDownsampleFactorMaxGrad
,
GpuDownsampleFactorMaxGradGrad
)
from
theano.sandbox.blocksparse
import
SparseBlockGemv
,
SparseBlockOuter
from
theano.sandbox.cuda.blocksparse
import
GpuSparseBlockGemv
,
GpuSparseBlockOuter
from
theano.sandbox.cuda.nnet
import
(
from
theano.sandbox.cuda.nnet
import
(
GpuCrossentropySoftmaxArgmax1HotWithBias
,
GpuCrossentropySoftmaxArgmax1HotWithBias
,
GpuCrossentropySoftmax1HotWithBiasDx
,
GpuCrossentropySoftmax1HotWithBiasDx
,
...
@@ -266,8 +270,8 @@ def local_gpu_elemwise_0(node):
...
@@ -266,8 +270,8 @@ def local_gpu_elemwise_0(node):
'uint16'
])
'uint16'
])
# case 1 - all inputs are already float32
# case 1 - all inputs are already float32
if
all
([
i
.
type
.
dtype
==
'float32'
for
i
in
node
.
inputs
]):
if
all
([
i
.
type
.
dtype
==
'float32'
for
i
in
node
.
inputs
]):
# TODO: change this when fusion makes Elemwise with
multiple
# TODO: change this when fusion makes Elemwise with
# outputs
#
multiple
outputs
gpu_elemwise
=
new_op
(
*
(
gpu_from_host
(
i
)
gpu_elemwise
=
new_op
(
*
(
gpu_from_host
(
i
)
for
i
in
node
.
inputs
))
for
i
in
node
.
inputs
))
# case 2 - it is still ok if some inputs were upcast to float32
# case 2 - it is still ok if some inputs were upcast to float32
...
@@ -741,10 +745,10 @@ def local_gpu_gemm(node):
...
@@ -741,10 +745,10 @@ def local_gpu_gemm(node):
y_on_gpu
=
(
y
.
owner
and
isinstance
(
y
.
owner
.
op
,
HostFromGpu
))
y_on_gpu
=
(
y
.
owner
and
isinstance
(
y
.
owner
.
op
,
HostFromGpu
))
z_on_gpu
=
(
z
.
owner
and
isinstance
(
z
.
owner
.
op
,
HostFromGpu
))
z_on_gpu
=
(
z
.
owner
and
isinstance
(
z
.
owner
.
op
,
HostFromGpu
))
if
x_on_gpu
or
y_on_gpu
or
z_on_gpu
:
if
x_on_gpu
or
y_on_gpu
or
z_on_gpu
:
return
[
host_from_gpu
(
gpu_gemm_no_inplace
(
as_cuda_ndarray_variable
(
z
),
return
[
host_from_gpu
(
gpu_gemm_no_inplace
(
gpu_from_host
(
z
),
a
,
a
,
as_cuda_ndarray_variable
(
x
),
gpu_from_host
(
x
),
as_cuda_ndarray_variable
(
y
),
gpu_from_host
(
y
),
b
))]
b
))]
return
False
return
False
...
@@ -996,7 +1000,8 @@ def local_gpu_advanced_subtensor1(node):
...
@@ -996,7 +1000,8 @@ def local_gpu_advanced_subtensor1(node):
if
node
.
op
.
__class__
is
tensor
.
AdvancedSubtensor1
:
if
node
.
op
.
__class__
is
tensor
.
AdvancedSubtensor1
:
x
=
node
.
inputs
[
0
]
x
=
node
.
inputs
[
0
]
coords
=
node
.
inputs
[
1
:]
coords
=
node
.
inputs
[
1
:]
if
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)
and
x
.
dtype
==
"float32"
:
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)
and
x
.
dtype
==
"float32"
):
gpu_x
,
=
x
.
owner
.
inputs
gpu_x
,
=
x
.
owner
.
inputs
return
[
host_from_gpu
(
GpuAdvancedSubtensor1
()(
gpu_x
,
*
coords
))]
return
[
host_from_gpu
(
GpuAdvancedSubtensor1
()(
gpu_x
,
*
coords
))]
return
False
return
False
...
@@ -2420,4 +2425,101 @@ optdb.register('gpu_scanOp_make_inplace',
...
@@ -2420,4 +2425,101 @@ optdb.register('gpu_scanOp_make_inplace',
'inplace'
,
'inplace'
,
'scan'
)
'scan'
)
# XXX: these optimisations were badly broken and now require a working
# beta param (could only be a 0/1 thing for outer_merge, but
# alpha_merge needs the full range).
# @register_opt()
# @alpha_merge(GpuSparseBlockOuter, alpha_in=5, beta_in=?, nd=4)
# def local_merge_blocksparse_alpha(node, *inputs):
# """
# GpuElemwise{mul}(lr, GpuSparseBlockOuter) -> GpuSparseBlockOuter(..., alpha=lr)
# """
# return [gpu_sparse_block_outer(*inputs)]
# @register_opt()
# @output_merge(GpuSparseBlockOuter, alpha_in=5, beta_in=? out_in=0, nd=4)
# def local_merge_blocksparse_output(node, *inputs):
# return [gpu_sparse_block_outer(*inputs)]
def
_owner_isinstance
(
inp
,
test_class
):
"""
Tests whether input has an owner and if its owner is
of type `test_class`
"""
return
bool
(
inp
.
owner
)
and
isinstance
(
inp
.
owner
.
op
,
test_class
)
def
_clear_host_from_gpu
(
inputs
):
"""
Replace any HostFromGpu by its input
"""
clean_inputs
=
[]
for
inp
in
inputs
:
if
_owner_isinstance
(
inp
,
HostFromGpu
):
clean_inputs
.
append
(
inp
.
owner
.
inputs
[
0
])
else
:
clean_inputs
.
append
(
inp
)
return
clean_inputs
@register_meta_opt
(
SparseBlockGemv
,
[
"gpu_opt"
,
"gpu_local_optimizations"
],
0.
,
'fast_run'
,
'fast_compile'
,
'gpu'
)
@local_optimizer
([
SparseBlockGemv
,
GpuFromHost
])
def
gpu_sparse_block_gemv_opt
(
node
):
"""
SparseBlockGemv(HostFromGpu(input)) ->
HostFromGpu(GpuSparseBlockGemv(input))
or
GpuFromHost(SparseBlockGemv) -> GpuSparseBlockGemv
"""
if
isinstance
(
node
.
op
,
SparseBlockGemv
)
and
\
any
(
_owner_isinstance
(
inp
,
HostFromGpu
)
for
inp
in
node
.
inputs
):
inputs
=
_clear_host_from_gpu
(
node
.
inputs
)
return
[
host_from_gpu
(
GpuSparseBlockGemv
(
node
.
op
.
inplace
)(
*
inputs
))]
elif
isinstance
(
node
.
op
,
GpuFromHost
)
and
\
_owner_isinstance
(
node
.
inputs
[
0
],
SparseBlockGemv
):
meta_node
=
node
.
inputs
[
0
]
.
owner
inputs
=
_clear_host_from_gpu
(
meta_node
.
inputs
)
return
[
GpuSparseBlockGemv
(
meta_node
.
op
.
inplace
)(
*
inputs
)]
@register_meta_opt
(
SparseBlockOuter
,
[
"gpu_opt"
,
"gpu_local_optimizations"
],
0.
,
'fast_run'
,
'fast_compile'
,
'gpu'
)
@local_optimizer
([
SparseBlockOuter
,
GpuFromHost
])
def
gpu_sparse_block_outer_opt
(
node
):
"""
SparseBlockOuter(HostFromGpu(input)) ->
HostFromGpu(GpuSparseBlockOuter(input))
or
GpuFromHost(SparseBlockOuter) -> GpuSparseBlockOuter
"""
if
isinstance
(
node
.
op
,
SparseBlockOuter
)
and
\
any
(
_owner_isinstance
(
inp
,
HostFromGpu
)
for
inp
in
node
.
inputs
):
inputs
=
_clear_host_from_gpu
(
node
.
inputs
)
return
[
host_from_gpu
(
GpuSparseBlockOuter
(
node
.
op
.
inplace
)(
*
inputs
))]
elif
isinstance
(
node
.
op
,
GpuFromHost
)
and
\
_owner_isinstance
(
node
.
inputs
[
0
],
SparseBlockOuter
):
meta_node
=
node
.
inputs
[
0
]
.
owner
inputs
=
_clear_host_from_gpu
(
meta_node
.
inputs
)
return
[
GpuSparseBlockOuter
(
meta_node
.
op
.
inplace
)(
*
inputs
)]
import
theano.sandbox.cuda.extra_ops
import
theano.sandbox.cuda.extra_ops
theano/sandbox/cuda/tests/test_opt.py
浏览文件 @
658bf2ef
...
@@ -29,6 +29,9 @@ from theano.sandbox.cuda import basic_ops
...
@@ -29,6 +29,9 @@ from theano.sandbox.cuda import basic_ops
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.scalar.basic_scipy
import
erfinv
from
theano.scalar.basic_scipy
import
erfinv
from
theano.sandbox.blocksparse
import
sparse_block_dot
from
theano.sandbox.cuda.blocksparse
import
GpuSparseBlockGemv
,
GpuSparseBlockOuter
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpu'
)
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpu'
)
...
@@ -740,6 +743,37 @@ def test_local_gpu_dot_to_dot22dot():
...
@@ -740,6 +743,37 @@ def test_local_gpu_dot_to_dot22dot():
cmp
((
3
,
4
),
(
4
,))
cmp
((
3
,
4
),
(
4
,))
def
test_blocksparse_gpu_gemv_opt
():
b
=
tensor
.
fmatrix
()
W
=
tensor
.
ftensor4
()
h
=
tensor
.
ftensor3
()
iIdx
=
tensor
.
lmatrix
()
oIdx
=
tensor
.
lmatrix
()
o
=
sparse_block_dot
(
W
,
h
,
iIdx
,
b
,
oIdx
)
f
=
theano
.
function
([
W
,
h
,
iIdx
,
b
,
oIdx
],
o
,
mode
=
mode_with_gpu
)
assert
isinstance
(
f
.
maker
.
fgraph
.
toposort
()[
-
2
]
.
op
,
GpuSparseBlockGemv
)
def
test_blocksparse_gpu_outer_opt
():
b
=
tensor
.
fmatrix
()
W
=
tensor
.
ftensor4
()
h
=
tensor
.
ftensor3
()
iIdx
=
tensor
.
lmatrix
()
oIdx
=
tensor
.
lmatrix
()
o
=
sparse_block_dot
(
W
,
h
,
iIdx
,
b
,
oIdx
)
theano
.
printing
.
debugprint
(
tensor
.
grad
(
o
.
sum
(),
wrt
=
W
))
f
=
theano
.
function
([
W
,
h
,
iIdx
,
b
,
oIdx
],
[
o
,
tensor
.
grad
(
o
.
sum
(),
wrt
=
W
)],
mode
=
mode_with_gpu
)
assert
isinstance
(
f
.
maker
.
fgraph
.
toposort
()[
-
2
]
.
op
,
GpuSparseBlockOuter
)
class
test_diag
(
theano
.
tensor
.
tests
.
test_nlinalg
.
test_diag
):
class
test_diag
(
theano
.
tensor
.
tests
.
test_nlinalg
.
test_diag
):
mode
=
mode_with_gpu
mode
=
mode_with_gpu
shared
=
staticmethod
(
cuda
.
shared_constructor
)
shared
=
staticmethod
(
cuda
.
shared_constructor
)
...
@@ -751,6 +785,7 @@ class test_diag(theano.tensor.tests.test_nlinalg.test_diag):
...
@@ -751,6 +785,7 @@ class test_diag(theano.tensor.tests.test_nlinalg.test_diag):
self
)
.
__init__
(
name
)
self
)
.
__init__
(
name
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
test_gpualloc
()
test_gpualloc
()
test_opt_gpujoin_onlyajoin
()
test_opt_gpujoin_onlyajoin
()
...
...
theano/sandbox/opt.py
0 → 100644
浏览文件 @
658bf2ef
"""
Optimizations addressing the ops in sandbox root directory
"""
import
bisect
import
logging
from
theano.compile
import
optdb
from
theano.gof
import
local_optimizer
,
EquilibriumDB
from
theano.tensor.opt
import
register_specialize
from
theano.sandbox.blocksparse
import
(
SparseBlockGemv
,
SparseBlockOuter
,
sparse_block_gemv
,
sparse_block_outer
,
sparse_block_gemv_inplace
,
sparse_block_outer_inplace
,
CpuSparseBlockGemv
,
CpuSparseBlockOuter
)
_logger
=
logging
.
getLogger
(
'theano.sandbox.opt'
)
def
_db_exists
(
db
,
db_name
):
"""
Tests whether the full path from `db_name[0]` down to
`db_name[-1]` exists.
Parameters
----------
db: `theano.gof.optdb.DB`
A dataset of optimisations or sub-datasets.
db_name: list or tuple of strings
Names of datasets from given one `db[db_name[0]]` down
to the dataset of interest where to register.
ex: ['level_1_dataset', 'level_2_dataset']
"""
if
len
(
db_name
)
==
1
:
return
db_name
[
0
]
in
db
.
_names
return
db_name
[
0
]
in
db
.
_names
and
_db_exists
(
db
[
db_name
[
0
]],
db_name
[
1
:])
def
_db_register
(
db
,
db_name
,
*
args
):
"""
Registers an object in last datasets given in db_name. `db_name[-1]`
is deep in the hierarchy of `db`.
Parameters
----------
db: `theano.gof.optdb.DB`
A dataset of optimisations or sub-datasets.
db_name: list or tuple of strings
Names of datasets from given one `db[db_name[0]]` down
to the dataset of interest where to register.
ex: ['level_1_dataset', 'level_2_dataset']
"""
if
len
(
db_name
)
==
0
:
return
db
.
register
(
*
args
)
return
_db_register
(
db
[
db_name
[
0
]],
db_name
[
1
:],
*
args
)
def
_db_positions
(
db
,
db_name
,
positions
=
()):
"""
Returns the list of positions of all databases from `db_name[0]`
down to `db_name[-1]`. The path is hierarchical, hence `db_name[0]`
is in `db`, `db_name[1]` is in `db[db_name[0]]`, etc.
Parameters
----------
db: `theano.gof.optdb.DB`
A dataset of optimisations or sub-datasets.
db_name: list or tuple of strings
Names of datasets from given one `db[db_name[0]]` down
to the dataset of interests.
ex: ['level_1_dataset', 'level_2_dataset']
"""
if
len
(
db_name
)
==
0
:
return
positions
db_position
=
db
.
__position__
.
get
(
db_name
[
0
],
0.
)
return
_db_positions
(
db
[
db_name
[
0
]],
db_name
[
1
:],
positions
+
(
db_position
,
))
def
register_meta_opt
(
op_class
,
db_name
,
position
,
*
args
):
"""
Registers a given optimization under given database name and saves
optimization information in `op_class.registered_opts`.
Parameters
----------
op_class: `theano.gof.Op`
A meta Op which have multiple implementations available
for optimization.
db_name: string, list or tuple of strings
A string if optimization is inserted in `theano.compile.optdb`
directly. List is used to insert an optimization deep inside a
hierarchy of optimization databases.
position: int or float
Position of the optimisation in the target dataset.
(Position in deep database if not optdb)
*args
Arguments to register the optimization.
"""
if
isinstance
(
db_name
,
str
):
db_name
=
[
db_name
]
def
call
(
local_meta_opt
):
if
not
_db_exists
(
optdb
,
db_name
):
# TODO: Would another default DB be better?
_db_register
(
optdb
,
db_name
[:
-
2
],
db_name
[
-
1
],
EquilibriumDB
(),
position
,
*
args
)
_db_register
(
optdb
,
db_name
,
local_meta_opt
.
__name__
,
local_meta_opt
,
*
args
)
positions
=
_db_positions
(
optdb
,
db_name
)
idx
=
bisect
.
bisect_left
((
positions
,
local_meta_opt
),
op_class
.
registered_opts
)
op_class
.
registered_opts
.
insert
(
idx
,
(
positions
,
local_meta_opt
.
__name__
))
return
local_meta_opt
return
call
@register_meta_opt
(
SparseBlockGemv
,
[
"meta_cpu"
],
51.0
,
"fast_run"
,
"fast_compile"
)
@local_optimizer
([
SparseBlockGemv
])
def
cpu_sparse_block_gemv_opt
(
node
):
"""
SparseBlockGemv -> CpuSparseBlockGemv
"""
return
[
CpuSparseBlockGemv
(
node
.
op
.
inplace
)(
*
node
.
inputs
)]
@register_meta_opt
(
SparseBlockOuter
,
[
"meta_cpu"
],
51.0
,
"fast_run"
,
"fast_compile"
)
@local_optimizer
([
SparseBlockOuter
])
def
cpu_sparse_block_outer_opt
(
node
):
"""
SparseBlockOuter -> CpuSparseBlockOuter
"""
return
[
CpuSparseBlockOuter
(
node
.
op
.
inplace
)(
*
node
.
inputs
)]
@register_specialize
@local_optimizer
([
sparse_block_gemv
],
inplace
=
True
)
def
local_inplace_block_sparse_gemv
(
node
):
"""
SparseBlockGemv(inplace=False) -> SparseBlockGemv(inplace=True)
"""
return
[
sparse_block_gemv_inplace
(
*
node
.
inputs
)]
@register_specialize
@local_optimizer
([
sparse_block_outer
],
inplace
=
True
)
def
local_inplace_block_sparse_outer
(
node
):
"""
SparseBlockOuter(inplace=False) -> SparseBlockOuter(inplace=True)
"""
return
[
sparse_block_outer_inplace
(
*
node
.
inputs
)]
theano/sandbox/tests/test_opt.py
0 → 100644
浏览文件 @
658bf2ef
import
theano
from
theano
import
tensor
from
theano.sandbox.blocksparse
import
CpuSparseBlockGemv
,
CpuSparseBlockOuter
,
sparse_block_dot
def
test_blocksparse_cpu_gemv_opt
():
b
=
tensor
.
fmatrix
()
W
=
tensor
.
ftensor4
()
h
=
tensor
.
ftensor3
()
iIdx
=
tensor
.
lmatrix
()
oIdx
=
tensor
.
lmatrix
()
o
=
sparse_block_dot
(
W
,
h
,
iIdx
,
b
,
oIdx
)
f
=
theano
.
function
([
W
,
h
,
iIdx
,
b
,
oIdx
],
o
)
assert
isinstance
(
f
.
maker
.
fgraph
.
toposort
()[
-
1
]
.
op
,
CpuSparseBlockGemv
)
def
test_blocksparse_cpu_outer_opt
():
b
=
tensor
.
fmatrix
()
W
=
tensor
.
ftensor4
()
h
=
tensor
.
ftensor3
()
iIdx
=
tensor
.
lmatrix
()
oIdx
=
tensor
.
lmatrix
()
o
=
sparse_block_dot
(
W
,
h
,
iIdx
,
b
,
oIdx
)
theano
.
printing
.
debugprint
(
tensor
.
grad
(
o
.
sum
(),
wrt
=
W
))
f
=
theano
.
function
([
W
,
h
,
iIdx
,
b
,
oIdx
],
[
o
,
tensor
.
grad
(
o
.
sum
(),
wrt
=
W
)])
assert
isinstance
(
f
.
maker
.
fgraph
.
toposort
()[
-
1
]
.
op
,
CpuSparseBlockOuter
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论