Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
f0bd940e
提交
f0bd940e
authored
10月 19, 2015
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3477 from nouiz/crash_gpu
Crash gpu and opt speed up
上级
dab522df
7fce44ca
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
15 个修改的文件
包含
57 行增加
和
27 行删除
+57
-27
optimization.txt
doc/extending/optimization.txt
+3
-3
mode.py
theano/compile/mode.py
+10
-1
cc.py
theano/gof/cc.py
+2
-2
opt.py
theano/gof/opt.py
+0
-0
optdb.py
theano/gof/optdb.py
+14
-7
basic_ops.py
theano/sandbox/cuda/basic_ops.py
+1
-1
cudnn_helper.h
theano/sandbox/cuda/cudnn_helper.h
+3
-3
dnn.py
theano/sandbox/cuda/dnn.py
+1
-1
opt.py
theano/sandbox/cuda/opt.py
+1
-1
type.py
theano/sandbox/cuda/type.py
+1
-1
basic.py
theano/tensor/basic.py
+2
-2
opt.py
theano/tensor/opt.py
+0
-0
downsample.py
theano/tensor/signal/downsample.py
+4
-1
test_downsample.py
theano/tensor/signal/tests/test_downsample.py
+10
-0
test_opt.py
theano/tensor/tests/test_opt.py
+5
-4
没有找到文件。
doc/extending/optimization.txt
浏览文件 @
f0bd940e
...
...
@@ -212,11 +212,11 @@ optimization you wrote. For example, consider the following:
Nothing happened here. The reason is: ``add(y, z) != add(y,
z)``. That is the case for efficiency reasons. To fix this problem we
first need to merge the parts of the graph that represent the same
computation, using the ``
merge_o
ptimizer`` defined in
computation, using the ``
MergeO
ptimizer`` defined in
``theano.gof.opt``.
>>> from theano.gof.opt import
merge_o
ptimizer
>>>
merge_optimizer
.optimize(e) # doctest: +ELLIPSIS
>>> from theano.gof.opt import
MergeO
ptimizer
>>>
MergeOptimizer()
.optimize(e) # doctest: +ELLIPSIS
(0, ..., None, None, {}, 1, 0)
>>> e
[true_div(mul(*1 -> add(y, z), x), *1)]
...
...
theano/compile/mode.py
浏览文件 @
f0bd940e
...
...
@@ -198,8 +198,17 @@ optdb.register('merge1', gof.MergeOptimizer(),
0
,
'fast_run'
,
'fast_compile'
,
'merge'
)
# rearranges elemwise expressions
optdb
.
register
(
'canonicalize'
,
gof
.
EquilibriumDB
(),
optdb
.
register
(
'canonicalize'
,
gof
.
EquilibriumDB
(
ignore_newtrees
=
False
),
1
,
'fast_run'
,
'fast_compile'
)
# Register in the canonizer Equilibrium as a clean up opt the merge opt.
# Without this, as the equilibrium have ignore_newtrees=False, we
# won't merge all nodes if it is set as a global optimizer with
# final_opt=True.
# We need a new instance of MergeOptimizer to don't have its name
# changed by other usage of it.
optdb
[
'canonicalize'
]
.
register
(
"merge"
,
gof
.
opt
.
MergeOptimizer
(),
'fast_run'
,
"fast_compile"
,
cleanup
=
True
)
optdb
.
register
(
'merge1.2'
,
gof
.
MergeOptimizer
(),
1.2
,
'fast_run'
,
'fast_compile'
,
'merge'
)
...
...
theano/gof/cc.py
浏览文件 @
f0bd940e
...
...
@@ -547,6 +547,7 @@ class CLinker(link.Linker):
if
no_recycling
is
None
:
no_recycling
=
[]
if
self
.
fgraph
is
not
None
and
self
.
fgraph
is
not
fgraph
:
# A linker can be tied to only one FunctionGraph.
return
type
(
self
)(
self
.
schedule
)
.
accept
(
fgraph
,
no_recycling
)
self
.
fgraph
=
fgraph
self
.
fetch_variables
()
...
...
@@ -1750,14 +1751,13 @@ class OpWiseCLinker(link.LocalLinker):
if
no_recycling
is
None
:
no_recycling
=
[]
if
self
.
fgraph
is
not
None
and
self
.
fgraph
is
not
fgraph
:
# A linker can be tied to only one FunctionGraph.
return
type
(
self
)(
fallback_on_perform
=
self
.
fallback_on_perform
,
allow_gc
=
self
.
allow_gc
,
nice_errors
=
self
.
nice_errors
,
schedule
=
self
.
schedule
,
)
.
accept
(
fgraph
,
no_recycling
)
# raise Exception("Cannot accept from a Linker that is
# already tied to another FunctionGraph.")
self
.
fgraph
=
fgraph
self
.
no_recycling
=
no_recycling
return
self
...
...
theano/gof/opt.py
浏览文件 @
f0bd940e
差异被折叠。
点击展开。
theano/gof/optdb.py
浏览文件 @
f0bd940e
...
...
@@ -268,28 +268,35 @@ class EquilibriumDB(DB):
super
(
EquilibriumDB
,
self
)
.
__init__
()
self
.
ignore_newtrees
=
ignore_newtrees
self
.
__final__
=
{}
self
.
__cleanup__
=
{}
def
register
(
self
,
name
,
obj
,
*
tags
,
**
kwtags
):
if
'final_opt'
in
kwtags
:
final_opt
=
kwtags
[
'final_opt'
]
kwtags
.
pop
(
'final_opt'
,
None
)
else
:
final_opt
=
False
final_opt
=
kwtags
.
pop
(
'final_opt'
,
False
)
cleanup
=
kwtags
.
pop
(
'cleanup'
,
False
)
# An opt should not be final and clean up
assert
not
(
final_opt
and
cleanup
)
super
(
EquilibriumDB
,
self
)
.
register
(
name
,
obj
,
*
tags
,
**
kwtags
)
self
.
__final__
[
name
]
=
final_opt
self
.
__cleanup__
[
name
]
=
cleanup
def
query
(
self
,
*
tags
,
**
kwtags
):
_opts
=
super
(
EquilibriumDB
,
self
)
.
query
(
*
tags
,
**
kwtags
)
final_opts
=
[
o
for
o
in
_opts
if
self
.
__final__
.
get
(
o
.
name
,
False
)]
opts
=
[
o
for
o
in
_opts
if
o
not
in
final_opts
]
cleanup_opts
=
[
o
for
o
in
_opts
if
self
.
__cleanup__
.
get
(
o
.
name
,
False
)]
opts
=
[
o
for
o
in
_opts
if
o
not
in
final_opts
and
o
not
in
cleanup_opts
]
if
len
(
final_opts
)
==
0
:
final_opts
=
None
if
len
(
cleanup_opts
)
==
0
:
cleanup_opts
=
None
return
opt
.
EquilibriumOptimizer
(
opts
,
max_use_ratio
=
config
.
optdb
.
max_use_ratio
,
ignore_newtrees
=
self
.
ignore_newtrees
,
failure_callback
=
opt
.
NavigatorOptimizer
.
warn_inplace
,
final_optimizers
=
final_opts
)
final_optimizers
=
final_opts
,
cleanup_optimizers
=
cleanup_opts
)
class
SequenceDB
(
DB
):
...
...
theano/sandbox/cuda/basic_ops.py
浏览文件 @
f0bd940e
...
...
@@ -3622,7 +3622,7 @@ class GpuAllocEmpty(GpuOp):
const_shp
=
tensor
.
get_scalar_constant_value
(
s
)
except
tensor
.
NotScalarConstantError
:
const_shp
=
None
bcast
.
append
(
numpy
.
all
(
1
==
const_shp
)
)
bcast
.
append
(
1
==
const_shp
)
otype
=
CudaNdarrayType
(
dtype
=
'float32'
,
broadcastable
=
bcast
)
output
=
otype
()
return
sh
,
output
...
...
theano/sandbox/cuda/cudnn_helper.h
浏览文件 @
f0bd940e
...
...
@@ -48,7 +48,7 @@ cudnnSetTensorNdDescriptor(
int
nbDims
,
const
int
dimA
[],
const
int
strideA
[])
{
if
(
n
d
Dims
!=
4
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
if
(
n
b
Dims
!=
4
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
return
cudnnSetTensor4dDescriptorEx
(
tensorDesc
,
dataType
,
dimA
[
0
],
dimA
[
1
],
dimA
[
2
],
dimA
[
3
],
...
...
@@ -204,7 +204,7 @@ cudnnSetPoolingNdDescriptor(
int
nbDims
,
const
int
windowDimA
[],
const
int
paddingA
[],
const
in
strideA
[])
{
const
in
t
strideA
[])
{
if
(
nbDims
!=
2
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
if
(
paddingA
[
0
]
!=
0
||
paddingA
[
1
]
!=
0
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
return
cudnnSetPoolingDescriptor
(
poolingDesc
,
mode
,
...
...
@@ -223,7 +223,7 @@ cudnnGetPoolingNdDescriptor(
int
strideA
[])
{
int
win0
,
win1
,
str0
,
str1
;
cudnnStatus_t
err
;
if
(
n
d
DimsRequested
<
2
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
if
(
n
b
DimsRequested
<
2
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
err
=
cudnnGetPoolingDescriptor
(
poolingDesc
,
mode
,
&
win0
,
&
win1
,
&
str0
,
&
str1
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
return
err
;
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
f0bd940e
...
...
@@ -1760,7 +1760,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
Subsampling stride (default: (1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad}
pad
(pad_h, pad_w) padding information.
(pad_h, pad_w) padding information.
pad_h is the number of zero-valued pixels added to each of the top and
bottom borders.
pad_w is the number of zero-valued pixels added to each of the left
...
...
theano/sandbox/cuda/opt.py
浏览文件 @
f0bd940e
...
...
@@ -104,7 +104,7 @@ optdb.register('gpu_after_fusion',
'gpu'
)
# Register merge_optimizer as a global opt
gpu_optimizer
.
register
(
'gpu_merge'
,
theano
.
gof
.
opt
.
merge_optimizer
,
gpu_optimizer
.
register
(
'gpu_merge'
,
theano
.
gof
.
opt
.
MergeOptimizer
()
,
'fast_run'
,
'fast_compile'
,
final_opt
=
True
)
...
...
theano/sandbox/cuda/type.py
浏览文件 @
f0bd940e
...
...
@@ -81,7 +81,7 @@ class CudaNdarrayType(Type):
raise
TypeError
(
'
%
s only supports dtype float32 for now. Tried '
'using dtype
%
s for variable
%
s'
%
(
self
.
__class__
.
__name__
,
dtype
,
name
))
self
.
broadcastable
=
tuple
(
broadcastable
)
self
.
broadcastable
=
tuple
(
b
ool
(
b
)
for
b
in
b
roadcastable
)
self
.
name
=
name
self
.
dtype_specs
()
# error checking is done there
...
...
theano/tensor/basic.py
浏览文件 @
f0bd940e
...
...
@@ -2673,7 +2673,7 @@ class Alloc(gof.Op):
const_shp
=
get_scalar_constant_value
(
s
)
except
NotScalarConstantError
:
const_shp
=
None
bcast
.
append
(
numpy
.
all
(
1
==
const_shp
)
)
bcast
.
append
(
1
==
const_shp
)
return
sh
,
bcast
def
make_node
(
self
,
value
,
*
shape
):
...
...
@@ -6037,7 +6037,7 @@ class AllocEmpty(gof.Op):
const_shp
=
get_scalar_constant_value
(
s
)
except
NotScalarConstantError
:
const_shp
=
None
bcast
.
append
(
numpy
.
all
(
1
==
const_shp
)
)
bcast
.
append
(
1
==
const_shp
)
otype
=
TensorType
(
dtype
=
self
.
dtype
,
broadcastable
=
bcast
)
output
=
otype
()
return
sh
,
output
...
...
theano/tensor/opt.py
浏览文件 @
f0bd940e
差异被折叠。
点击展开。
theano/tensor/signal/downsample.py
浏览文件 @
f0bd940e
...
...
@@ -256,7 +256,10 @@ class DownsampleFactorMax(Op):
raise
TypeError
()
# TODO: consider restricting the dtype?
x
=
tensor
.
as_tensor_variable
(
x
)
return
gof
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
# If the input shape are broadcastable we can have 0 in the output shape
broad
=
x
.
broadcastable
[:
2
]
+
(
False
,
False
)
out
=
tensor
.
TensorType
(
x
.
dtype
,
broad
)
return
gof
.
Apply
(
self
,
[
x
],
[
out
()])
def
perform
(
self
,
node
,
inp
,
out
):
x
,
=
inp
...
...
theano/tensor/signal/tests/test_downsample.py
浏览文件 @
f0bd940e
...
...
@@ -801,6 +801,16 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
[
image_val
,
maxout_val
,
gz_val
],
MaxPoolGrad
,
warn
=
False
)
# checking with broadcastable input
image
=
tensor
.
tensor
(
dtype
=
'float64'
,
broadcastable
=
(
False
,
False
,
True
,
True
))
image_val
=
rng
.
rand
(
4
,
6
,
1
,
1
)
self
.
_compile_and_check
(
[
image
],
[
DownsampleFactorMax
((
2
,
2
),
ignore_border
=
True
,
padding
=
(
0
,
0
))(
image
)],
[
image_val
],
DownsampleFactorMax
)
def
test_opt_max_to_average
(
self
):
im
=
theano
.
tensor
.
tensor4
()
...
...
theano/tensor/tests/test_opt.py
浏览文件 @
f0bd940e
...
...
@@ -481,7 +481,7 @@ class test_canonize(unittest.TestCase):
mode
=
compile
.
mode
.
get_default_mode
()
opt
=
gof
.
Query
([
"canonicalize"
])
opt
=
opt
.
including
(
'ShapeOpt'
)
opt
=
opt
.
including
(
'ShapeOpt'
,
'local_fill_to_alloc'
)
opt
=
opt
.
excluding
(
'local_elemwise_fusion'
)
mode
=
mode
.
__class__
(
linker
=
mode
.
linker
,
optimizer
=
opt
)
...
...
@@ -4021,7 +4021,8 @@ class T_Rebroadcast(unittest.TestCase):
class
T_useless_elemwise
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
mode
=
theano
.
compile
.
get_default_mode
()
.
including
(
'canonicalize'
)
self
.
mode
=
theano
.
compile
.
get_default_mode
()
.
including
(
'canonicalize'
,
'local_fill_to_alloc'
)
def
test_eq
(
self
):
x
=
T
.
dmatrix
()
...
...
@@ -4545,7 +4546,7 @@ class T_local_erfc(unittest.TestCase):
# test that we work without the mul
f
=
theano
.
function
([
x
],
T
.
exp
(
T
.
neg
(
T
.
sqr
(
x
)))
/
T
.
erfc
(
x
),
mode
=
mode
)
assert
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
==
2
3
,
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
assert
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
==
2
2
,
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
assert
f
.
maker
.
fgraph
.
outputs
[
0
]
.
dtype
==
theano
.
config
.
floatX
assert
all
(
numpy
.
isfinite
(
f
(
val
)))
...
...
@@ -4558,7 +4559,7 @@ class T_local_erfc(unittest.TestCase):
# test that we work without the sqr and neg
f
=
theano
.
function
([
x
],
T
.
exp
(
T
.
mul
(
-
1
,
x
,
x
))
/
T
.
erfc
(
x
),
mode
=
mode
)
assert
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
==
2
2
,
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
assert
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
==
2
1
,
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
assert
f
.
maker
.
fgraph
.
outputs
[
0
]
.
dtype
==
theano
.
config
.
floatX
assert
all
(
numpy
.
isfinite
(
f
(
val
)))
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论