Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
70e25931
提交
70e25931
authored
2月 27, 2014
作者:
abergeron
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1756 from nouiz/mixed
Fix gpu crash and faster optimization
上级
85209fbb
745b5559
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
12 个修改的文件
包含
63 行增加
和
60 行删除
+63
-60
multi_cores.txt
doc/tutorial/multi_cores.txt
+3
-3
__init__.py
theano/gof/__init__.py
+0
-1
opt.py
theano/gof/opt.py
+11
-35
optdb.py
theano/gof/optdb.py
+0
-1
blas.py
theano/sandbox/cuda/blas.py
+1
-1
conv.cu
theano/sandbox/cuda/conv.cu
+1
-0
opt.py
theano/sandbox/cuda/opt.py
+4
-4
test_conv_cuda_ndarray.py
theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+1
-0
opt.py
theano/sandbox/gpuarray/opt.py
+1
-2
scan_opt.py
theano/scan_module/scan_opt.py
+0
-1
blas.py
theano/tensor/blas.py
+41
-12
opt.py
theano/tensor/opt.py
+0
-0
没有找到文件。
doc/tutorial/multi_cores.txt
浏览文件 @
70e25931
...
...
@@ -18,7 +18,7 @@ those operations will run in parallel in Theano.
The most frequent way to control the number of threads used is via the
``OMP_NUM_THREADS`` environment variable. Set it to the number of
threads you want to use before starting the python process. Some BLAS
implementation
support other enviroment variable
.
implementation
s support other enviroment variables
.
Parallel element wise ops with OpenMP
...
...
@@ -27,8 +27,8 @@ Parallel element wise ops with OpenMP
Because element wise ops work on every tensor entry independently they
can be easily parallelized using OpenMP.
To use OpenMP you must set the ``openmp``
flag to ``True`` in Theano
configuration
.
To use OpenMP you must set the ``openmp``
:ref:`flag <libdoc_config>`
to ``True``
.
You can use the flag ``openmp_elemwise_minsize`` to set the minimum
tensor size for which the operation is parallelized because for short
...
...
theano/gof/__init__.py
浏览文件 @
70e25931
...
...
@@ -62,7 +62,6 @@ from theano.gof.opt import (Optimizer, optimizer, SeqOptimizer,
LocalOptimizer
,
local_optimizer
,
LocalOptGroup
,
OpSub
,
OpRemove
,
PatternSub
,
NavigatorOptimizer
,
TopoOptimizer
,
EquilibriumOptimizer
,
InplaceOptimizer
,
PureThenInplaceOptimizer
,
OpKeyOptimizer
)
from
theano.gof.optdb
import
\
...
...
theano/gof/opt.py
浏览文件 @
70e25931
...
...
@@ -131,6 +131,9 @@ class FromFunctionOptimizer(Optimizer):
def
__call__
(
self
,
*
args
,
**
kwargs
):
return
self
.
fn
(
*
args
,
**
kwargs
)
def
__str__
(
self
):
return
self
.
__name__
def
optimizer
(
f
):
"""decorator for FromFunctionOptimizer"""
...
...
@@ -626,7 +629,10 @@ class MergeOptimizer(Optimizer):
print
>>
stream
,
blanc
,
" replace_time"
,
replace_time
print
>>
stream
,
blanc
,
" validate_time"
,
validate_time
print
>>
stream
,
blanc
,
" callback_time"
,
callback_time
print
>>
stream
,
blanc
,
" callback_times"
,
callbacks_time
print
>>
stream
,
blanc
,
" callbacks_time"
for
i
in
sorted
(
callbacks_time
.
iteritems
(),
key
=
lambda
a
:
a
[
1
]):
if
i
[
1
]
>
0
:
print
i
print
>>
stream
,
blanc
,
" nb_merged"
,
nb_merged
print
>>
stream
,
blanc
,
" nb_constant"
,
nb_constant
...
...
@@ -1490,7 +1496,6 @@ class EquilibriumOptimizer(NavigatorOptimizer):
def
__init__
(
self
,
optimizers
,
failure_callback
=
None
,
max_depth
=
None
,
max_use_ratio
=
None
):
"""
:param optimizers: list or set of local or global optimizations to
...
...
@@ -1499,8 +1504,6 @@ class EquilibriumOptimizer(NavigatorOptimizer):
:param max_use_ratio: each optimizer can be applied at most
(size of graph * this number) times
:param max_depth: TODO what does this do? (EquilibriumDB sets it to 5)
"""
super
(
EquilibriumOptimizer
,
self
)
.
__init__
(
...
...
@@ -1520,7 +1523,6 @@ class EquilibriumOptimizer(NavigatorOptimizer):
self
.
local_optimizers_map
.
setdefault
(
c
,
[])
.
append
(
opt
)
else
:
self
.
global_optimizers
.
append
(
opt
)
self
.
max_depth
=
max_depth
self
.
max_use_ratio
=
max_use_ratio
assert
self
.
max_use_ratio
is
not
None
,
(
'max_use_ratio has to be a number'
)
...
...
@@ -1723,11 +1725,13 @@ class EquilibriumOptimizer(NavigatorOptimizer):
for
(
t
,
count
,
opt
)
in
count_opt
[::
-
1
]:
print
>>
stream
,
blanc
,
'
%.3
fs -
%
d -
%
s'
%
(
t
,
count
,
opt
)
print
>>
stream
,
blanc
,
'
%.3
fs - in
%
d optimization that where not used'
%
(
print
>>
stream
,
blanc
,
'
%.3
fs - in
%
d optimization that where not used
(display only those with a runtime > 0)
'
%
(
not_used_time
,
len
(
not_used
))
not_used
.
sort
()
for
(
t
,
opt
)
in
not_used
[::
-
1
]:
print
>>
stream
,
blanc
+
" "
,
'
%.3
fs -
%
s'
%
(
t
,
opt
)
if
t
>
0
:
# Skip opt that have 0 times, they probably wasn't even tried.
print
>>
stream
,
blanc
+
" "
,
'
%.3
fs -
%
s'
%
(
t
,
opt
)
print
>>
stream
@staticmethod
...
...
@@ -1899,31 +1903,3 @@ def pre_greedy_local_optimizer(list_optimizations, out):
final_outs
,
optimized_nodes
=
local_recursive_function
(
list_optimizations
,
out
,
{},
0
)
return
final_outs
[
out_index
]
############
### Misc ###
############
class
InplaceOptimizer
(
Optimizer
):
def
__init__
(
self
,
inplace
):
self
.
inplace
=
inplace
def
apply
(
self
,
fgraph
):
self
.
inplace
(
fgraph
)
def
add_requirements
(
self
,
fgraph
):
fgraph
.
attach_feature
(
dh
.
DestroyHandler
())
class
PureThenInplaceOptimizer
(
Optimizer
):
def
__init__
(
self
,
pure
,
inplace
):
self
.
pure
=
pure
self
.
inplace
=
inplace
def
apply
(
self
,
fgraph
):
self
.
pure
(
fgraph
)
fgraph
.
attach_feature
(
dh
.
DestroyHandler
())
self
.
inplace
(
fgraph
)
theano/gof/optdb.py
浏览文件 @
70e25931
...
...
@@ -194,7 +194,6 @@ class EquilibriumDB(DB):
def
query
(
self
,
*
tags
,
**
kwtags
):
opts
=
super
(
EquilibriumDB
,
self
)
.
query
(
*
tags
,
**
kwtags
)
return
opt
.
EquilibriumOptimizer
(
opts
,
max_depth
=
5
,
max_use_ratio
=
config
.
optdb
.
max_use_ratio
,
failure_callback
=
opt
.
NavigatorOptimizer
.
warn_inplace
)
...
...
theano/sandbox/cuda/blas.py
浏览文件 @
70e25931
...
...
@@ -671,7 +671,7 @@ class GpuConv(GpuOp):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
return
(
0
,
2
0
)
return
(
0
,
2
1
)
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
...
theano/sandbox/cuda/conv.cu
浏览文件 @
70e25931
...
...
@@ -1018,6 +1018,7 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern,
(
version
==
3
||
version
==
4
||
version
==
5
||
version
==-
1
)
&&
out_wid
<=
max_threads_dim0
&&
//Maximum of X threads by block.x
(
kern_len
+
2
*
kern_len
-
2
)
*
img_wid_padded
*
sizeof
(
float
)
+
kern_size_byte
<
shared_avail
&&
//their is only 16k of shared memory
(
kern_len
>
1
||
(
img_size_padded_byte
+
kern_size_byte
)
<=
shared_avail
)
&&
!
work_complete
)
//conv_full_patch_stack_padded
{
//version 3 without split
...
...
theano/sandbox/cuda/opt.py
浏览文件 @
70e25931
...
...
@@ -14,7 +14,7 @@ import theano.ifelse
from
theano.compile
import
optdb
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
SequenceDB
,
ProxyDB
,
Optimizer
,
toolbox
,
DestroyHandler
)
Optimizer
,
toolbox
)
from
theano.gof.python25
import
all
,
any
from
theano.sandbox.cuda.basic_ops
import
(
device_properties
,
gpu_eye
,
...
...
@@ -62,7 +62,7 @@ optdb.register('gpu_opt',
# inside the elemwise. When there is no float64 op, this is working.
optdb
.
register
(
'gpu_after_fusion'
,
ProxyDB
(
gpu_seqopt
),
optdb
.
__position__
.
get
(
'elemwise_fusion'
,
71
)
+
.
1
,
optdb
.
__position__
.
get
(
'elemwise_fusion'
,
49
)
+
.
1
,
'gpu'
)
...
...
@@ -88,7 +88,6 @@ class InputToGpuOptimizer(Optimizer):
def
add_requirements
(
self
,
fgraph
):
fgraph
.
attach_feature
(
toolbox
.
ReplaceValidate
())
fgraph
.
attach_feature
(
DestroyHandler
())
def
apply
(
self
,
fgraph
):
for
input
in
fgraph
.
inputs
:
...
...
@@ -1339,9 +1338,10 @@ gpu_local_elemwise_fusion = tensor.opt.local_elemwise_fusion_op(
max_inputs_to_GpuElemwise
)
if
config
.
gpu
.
local_elemwise_fusion
:
_logger
.
debug
(
"enabling optimization fusion of gpu elemwise in fast_run"
)
#Must be after cpu fusion at 40, gpu at 48.5 and before AddDestroyHandler at 49.5
optdb
.
register
(
'gpu_elemwise_fusion'
,
tensor
.
opt
.
FusionOptimizer
(
gpu_local_elemwise_fusion
),
71.00
,
'fast_run'
,
'fusion'
,
49
,
'fast_run'
,
'fusion'
,
'local_elemwise_fusion'
,
'gpu'
)
else
:
_logger
.
debug
((
"not enabling optimization fusion of gpu elemwise in "
...
...
theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
浏览文件 @
70e25931
...
...
@@ -679,6 +679,7 @@ def test_full():
#Test more than maxThreadsDim0
,
((
2
,
4
,
13
,
1050
),
(
3
,
4
,
10
,
11
),
(
1
,
1
),
(
1
,
1
),
(
1
,
1
))
,
((
2
,
4
,
1050
,
13
),
(
3
,
4
,
10
,
11
),
(
1
,
1
),
(
1
,
1
),
(
1
,
1
))
,
((
1
,
1
,
44800
,
1
),
(
6
,
1
,
1
,
1
),
(
1
,
1
),
(
1
,
1
),
(
1
,
1
))
#This caused crash
]
# shapes=shapes[:277]
...
...
theano/sandbox/gpuarray/opt.py
浏览文件 @
70e25931
...
...
@@ -5,7 +5,7 @@ from theano import tensor, scalar
from
theano.compile
import
optdb
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
SequenceDB
,
ProxyDB
,
Optimizer
,
toolbox
,
DestroyHandler
,
Optimizer
,
toolbox
,
InconsistencyError
,
EquilibriumOptimizer
)
from
theano.gof.python25
import
all
,
any
...
...
@@ -90,7 +90,6 @@ class InputToGpuOptimizer(Optimizer):
def
add_requirements
(
self
,
fgraph
):
fgraph
.
attach_feature
(
toolbox
.
ReplaceValidate
())
fgraph
.
attach_feature
(
DestroyHandler
())
def
apply
(
self
,
fgraph
):
for
input
in
fgraph
.
inputs
:
...
...
theano/scan_module/scan_opt.py
浏览文件 @
70e25931
...
...
@@ -1509,7 +1509,6 @@ class PushOutDot1(gof.Optimizer):
def
add_requirements
(
self
,
fgraph
):
fgraph
.
attach_feature
(
toolbox
.
ReplaceValidate
())
fgraph
.
attach_feature
(
DestroyHandler
())
def
apply
(
self
,
fgraph
):
...
...
theano/tensor/blas.py
浏览文件 @
70e25931
...
...
@@ -139,7 +139,7 @@ except ImportError:
pass
from
theano.configparser
import
config
,
AddConfigVar
,
StrParam
from
theano.gof
import
(
utils
,
Op
,
view_roots
,
DestroyHandler
,
from
theano.gof
import
(
utils
,
Op
,
view_roots
,
local_optimizer
,
Optimizer
,
InconsistencyError
,
toolbox
,
SequenceDB
,
EquilibriumOptimizer
,
Apply
,
...
...
@@ -1488,7 +1488,6 @@ class GemmOptimizer(Optimizer):
def
add_requirements
(
self
,
fgraph
):
fgraph
.
attach_feature
(
toolbox
.
ReplaceValidate
())
fgraph
.
attach_feature
(
DestroyHandler
())
def
apply
(
self
,
fgraph
):
did_something
=
True
...
...
@@ -1501,9 +1500,21 @@ class GemmOptimizer(Optimizer):
time_factor_can
=
0
time_factor_list
=
0
time_toposort
=
0
if
fgraph
.
profile
:
validate_before
=
fgraph
.
profile
.
validate_time
callbacks_before
=
fgraph
.
execute_callbacks_times
.
copy
()
callback_before
=
fgraph
.
execute_callbacks_time
class
Updater
:
def
on_import
(
self
,
fgraph
,
new_node
,
reason
):
if
new_node
is
not
node
:
nodelist
.
append
(
new_node
)
u
=
Updater
()
fgraph
.
attach_feature
(
u
)
while
did_something
:
nb_iter
+=
1
t0
=
time
.
time
()
nodelist
=
list
(
fgraph
.
toposort
()
)
nodelist
=
theano
.
gof
.
graph
.
io_toposort
(
fgraph
.
inputs
,
fgraph
.
outputs
)
time_toposort
+=
time
.
time
()
-
t0
did_something
=
False
nodelist
.
reverse
()
...
...
@@ -1546,16 +1557,30 @@ class GemmOptimizer(Optimizer):
except
ReplacementDidntRemovedError
,
e
:
nb_replacement_didn_t_remove
+=
1
self
.
warned
=
True
nb_iter
+=
1
fgraph
.
remove_feature
(
u
)
if
fgraph
.
profile
:
validate_time
=
fgraph
.
profile
.
validate_time
-
validate_before
callback_time
=
fgraph
.
execute_callbacks_time
-
callback_before
callbacks_time
=
{}
for
k
,
v
in
fgraph
.
execute_callbacks_times
.
iteritems
():
if
k
in
callbacks_before
:
callbacks_time
[
k
]
=
v
-
callbacks_before
[
k
]
else
:
callbacks_time
[
k
]
=
v
else
:
validate_time
=
None
callback_time
=
None
callbacks_time
=
{}
return
(
self
,
nb_iter
,
nb_replacement
,
nb_replacement_didn_t_remove
,
nb_inconsistency_make
,
nb_inconsistency_replace
,
time_canonicalize
,
time_factor_can
,
time_factor_list
,
time_toposort
)
time_factor_list
,
time_toposort
,
validate_time
,
callback_time
,
callbacks_time
,)
@staticmethod
def
print_profile
(
stream
,
prof
,
level
=
0
):
blanc
=
(
' '
*
level
)
#1946.912556s - ('gemm_optimizer', 'GemmOptimizer', 1)
print
>>
stream
,
blanc
,
"GemmOptimizer"
print
>>
stream
,
blanc
,
" nb_iter"
,
prof
[
1
]
print
>>
stream
,
blanc
,
" nb_replacement"
,
prof
[
2
]
...
...
@@ -1566,6 +1591,12 @@ class GemmOptimizer(Optimizer):
print
>>
stream
,
blanc
,
" time_factor_can"
,
prof
[
7
]
print
>>
stream
,
blanc
,
" time_factor_list"
,
prof
[
8
]
print
>>
stream
,
blanc
,
" time_toposort"
,
prof
[
9
]
print
>>
stream
,
blanc
,
" validate_time"
,
prof
[
10
]
print
>>
stream
,
blanc
,
" callback_time"
,
prof
[
11
]
print
>>
stream
,
blanc
,
" callbacks_time"
for
i
in
sorted
(
prof
[
12
]
.
iteritems
(),
key
=
lambda
a
:
a
[
1
]):
if
i
[
1
]
>
0
:
print
i
class
Dot22
(
GemmRelated
):
...
...
@@ -1816,17 +1847,15 @@ blas_optdb.register('local_gemm_to_gemv',
15
,
'fast_run'
)
# After destroyhandler is in but before we try to make elemwise things inplace
# Try to make gemm inplace
# Also, need to make the gemm optimisation(step 70) happen before the
# fusion of elemwise(step 71)
# After destroyhandler(49.5) but before we try to make elemwise things
# inplace (75)
blas_opt_inplace
=
in2out
(
local_inplace_gemm
,
local_inplace_gemv
,
local_inplace_ger
,
name
=
"blas_opt_inplace"
)
optdb
.
register
(
'InplaceBlasOpt'
,
blas_opt_inplace
,
70.0
,
'fast_run'
,
'
inplace'
)
blas_opt_inplace
,
70.0
,
'fast_run'
,
'inplace'
,
'blas_opt_
inplace'
)
class
Dot22Scalar
(
GemmRelated
):
...
...
theano/tensor/opt.py
浏览文件 @
70e25931
差异被折叠。
点击展开。
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论