Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
efb4786e
提交
efb4786e
authored
9月 29, 2016
作者:
Frédéric Bastien
提交者:
GitHub
9月 29, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5032 from nouiz/simplify
Speed up the canonizer for big list of num/denum
上级
53ba24bb
f4f8b257
显示空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
92 行增加
和
55 行删除
+92
-55
fg.py
theano/gof/fg.py
+22
-23
opt.py
theano/gpuarray/opt.py
+9
-10
type.py
theano/gpuarray/type.py
+31
-1
opt.py
theano/tensor/opt.py
+30
-11
test_gc.py
theano/tensor/tests/test_gc.py
+0
-10
没有找到文件。
theano/gof/fg.py
浏览文件 @
efb4786e
...
...
@@ -6,7 +6,6 @@ types that it can raise.
"""
from
__future__
import
absolute_import
,
print_function
,
division
from
collections
import
OrderedDict
import
sys
import
time
import
traceback
...
...
@@ -260,7 +259,7 @@ class FunctionGraph(utils.object2):
"""
return
r
.
clients
def
__add_client
s__
(
self
,
r
,
new_clients
):
def
__add_client
__
(
self
,
r
,
new_client
):
"""
Updates the list of clients of r with new_clients.
...
...
@@ -268,20 +267,18 @@ class FunctionGraph(utils.object2):
----------
r
Variable.
new_client
s
List of (node, i) pairs
such that node.inputs[i] is r.
new_client
(node, i) pair
such that node.inputs[i] is r.
"""
if
set
(
r
.
clients
)
.
intersection
(
set
(
new_clients
)):
print
(
'ERROR: clients intersect!'
,
file
=
sys
.
stderr
)
print
(
' RCLIENTS of'
,
r
,
[(
n
,
i
,
type
(
n
),
id
(
n
))
for
n
,
i
in
r
.
clients
],
file
=
sys
.
stderr
)
print
(
' NCLIENTS of'
,
r
,
[(
n
,
i
,
type
(
n
),
id
(
n
))
for
n
,
i
in
new_clients
],
file
=
sys
.
stderr
)
assert
not
set
(
r
.
clients
)
.
intersection
(
set
(
new_clients
))
r
.
clients
+=
new_clients
# Ne need to do the assert as it is always True. The logic
# that call __add_client__ is valid. When the client list is
# long, the check it time consuming, so we don't enable it by
# default.
# assert not new_client in r.clients
r
.
clients
.
append
(
new_client
)
def
__remove_client
s__
(
self
,
r
,
clients
_to_remove
,
def
__remove_client
__
(
self
,
r
,
client
_to_remove
,
prune
=
True
,
reason
=
None
):
"""
Removes all from the clients list of r.
...
...
@@ -296,8 +293,8 @@ class FunctionGraph(utils.object2):
----------
r : Variable
The clients of r will be removed.
client
s_to_remove : List of (op, i) pairs
List of (op, i) pairs
such that node.inputs[i] is not r anymore.
client
_to_remove : (op, i) pair
(op, i) pair
such that node.inputs[i] is not r anymore.
prune : bool
If prune is True, it remove r from this fgraph if it don't
have clients left.
...
...
@@ -311,9 +308,11 @@ class FunctionGraph(utils.object2):
clients_to_remove and prune=True will remove r.
"""
for
entry
in
clients_to_remove
:
r
.
clients
.
remove
(
entry
)
assert
entry
not
in
r
.
clients
# an op,i pair should be unique
if
client_to_remove
:
r
.
clients
.
remove
(
client_to_remove
)
# entry should be uniq in r. No need to assert it as it is
# already asserted in __add_client__.
# assert entry not in r.clients
if
r
.
clients
:
return
False
if
not
prune
:
...
...
@@ -333,7 +332,7 @@ class FunctionGraph(utils.object2):
self
.
execute_callbacks
(
'on_prune'
,
apply_node
,
reason
)
for
i
,
input
in
enumerate
(
apply_node
.
inputs
):
self
.
__remove_client
s__
(
input
,
[(
apply_node
,
i
)]
,
self
.
__remove_client
__
(
input
,
(
apply_node
,
i
)
,
reason
=
reason
)
# variable should not have any clients.
# assert not variable.clients
...
...
@@ -431,7 +430,7 @@ class FunctionGraph(utils.object2):
if
input
not
in
self
.
variables
:
self
.
__setup_r__
(
input
)
self
.
variables
.
add
(
input
)
self
.
__add_client
s__
(
input
,
[(
node
,
i
)]
)
self
.
__add_client
__
(
input
,
(
node
,
i
)
)
assert
node
.
fgraph
is
self
self
.
execute_callbacks
(
'on_import'
,
node
,
reason
)
...
...
@@ -470,15 +469,15 @@ class FunctionGraph(utils.object2):
return
self
.
__import_r__
(
new_r
,
reason
=
reason
)
self
.
__add_client
s__
(
new_r
,
[(
node
,
i
)]
)
prune
=
self
.
__remove_client
s__
(
r
,
[(
node
,
i
)]
,
False
)
self
.
__add_client
__
(
new_r
,
(
node
,
i
)
)
prune
=
self
.
__remove_client
__
(
r
,
(
node
,
i
)
,
False
)
# Precondition: the substitution is semantically valid
# However it may introduce cycles to the graph, in which case the
# transaction will be reverted later.
self
.
execute_callbacks
(
'on_change_input'
,
node
,
i
,
r
,
new_r
,
reason
=
reason
)
if
prune
:
self
.
__remove_client
s__
(
r
,
[]
,
True
,
reason
=
reason
)
self
.
__remove_client
__
(
r
,
None
,
True
,
reason
=
reason
)
# replace #
def
replace
(
self
,
r
,
new_r
,
reason
=
None
,
verbose
=
None
):
...
...
theano/gpuarray/opt.py
浏览文件 @
efb4786e
...
...
@@ -29,18 +29,21 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
from
theano.tests.breakpoint
import
PdbBreakpoint
from
.type
import
(
GpuArrayType
,
GpuArrayConstant
,
get_context
,
ContextNotDefined
)
ContextNotDefined
,
move_to_gpu
)
from
.basic_ops
import
(
as_gpuarray_variable
,
infer_context_name
,
host_from_gpu
,
GpuToGpu
,
HostFromGpu
,
GpuFromHost
,
GpuSplit
,
GpuContiguous
,
gpu_contiguous
,
GpuAlloc
,
GpuAllocEmpty
,
GpuReshape
,
GpuEye
,
gpu_join
,
GpuJoin
,
gpu_alloc_empty
,
gpu_alloc
,
gpu_from_host
)
GpuEye
,
gpu_join
,
GpuJoin
,
gpu_alloc_empty
,
gpu_alloc
,
gpu_from_host
)
from
.blas
import
(
gpu_dot22
,
GpuGemm
,
GpuGer
,
GpuGemmBatch
,
gpugemm_no_inplace
,
gpugemm_inplace
,
gpugemmbatch_no_inplace
,
gpugemm_no_inplace
,
gpugemm_inplace
,
gpugemmbatch_no_inplace
,
gpugemv_no_inplace
,
gpugemv_inplace
)
from
.blocksparse
import
(
GpuSparseBlockGemv
,
GpuSparseBlockOuter
,
gpu_sparse_block_outer
,
gpu_sparse_block_outer_inplace
,
gpu_sparse_block_outer
,
gpu_sparse_block_outer_inplace
,
gpu_sparse_block_gemv
,
gpu_sparse_block_gemv_inplace
)
from
.nnet
import
(
gpu_crossentropy_softmax_1hot_with_bias_dx
,
gpu_crossentropy_softmax_argmax_1hot_with_bias
,
...
...
@@ -239,9 +242,8 @@ class InputToGpuOptimizer(Optimizer):
target
=
getattr
(
input
.
tag
,
'target'
,
None
)
if
target
==
'cpu'
:
continue
# Do not move *int* scalar to the GPU.
if
(
isinstance
(
input
.
type
,
tensor
.
TensorType
)
and
input
.
ndim
==
0
and
'int'
in
input
.
dtype
):
not
move_to_gpu
(
input
)
):
continue
try
:
...
...
@@ -297,10 +299,7 @@ class GraphToGPU(Optimizer):
# Iterating through inputs of graph
target
=
infer_context_name
(
*
fgraph
.
inputs
)
for
i
in
fgraph
.
inputs
:
# Do not move *int* scalar to the GPU.
if
(
isinstance
(
i
.
type
,
tensor
.
TensorType
)
and
(
i
.
ndim
>
0
or
'int'
not
in
i
.
dtype
)
and
"complex"
not
in
i
.
dtype
):
if
isinstance
(
i
.
type
,
tensor
.
TensorType
)
and
move_to_gpu
(
i
):
mapping
[
i
]
=
i
.
transfer
(
getattr
(
i
.
tag
,
'target'
,
target
))
else
:
mapping
[
i
]
=
i
...
...
theano/gpuarray/type.py
浏览文件 @
efb4786e
...
...
@@ -22,6 +22,26 @@ except ImportError:
_context_reg
=
{}
def
move_to_gpu
(
data
):
"""
Do we want to move this computation to the GPU?
Currently, we don't move complex and scalar int.
Parameters
----------
data : numpy.ndarray or TensorVariable
(it must have dtype and ndim parameter)
"""
# We don't support complex on the GPU
if
str
(
data
.
dtype
)
in
tensor
.
basic
.
complex_dtypes
:
return
False
# We don't want scalar int on the GPU.
if
data
.
ndim
==
0
and
str
(
data
.
dtype
)
in
tensor
.
basic
.
discrete_dtypes
:
return
False
return
True
class
ContextNotDefined
(
ValueError
):
pass
...
...
@@ -561,16 +581,22 @@ class GpuArraySharedVariable(_operators, SharedVariable):
GpuArrayType
.
SharedVariable
=
GpuArraySharedVariable
notset
=
object
()
def
gpuarray_shared_constructor
(
value
,
name
=
None
,
strict
=
False
,
allow_downcast
=
None
,
borrow
=
False
,
broadcastable
=
None
,
target
=
None
):
broadcastable
=
None
,
target
=
notset
):
"""
SharedVariable constructor for GpuArrayType.
See :func:`theano.shared`.
:target: default None
The device target. As None is a valid value and we need to
differentiate from the parameter notset and None, we use a
notset object.
"""
if
target
==
'gpu'
or
target
==
'cpu'
:
raise
TypeError
(
'not for me'
)
...
...
@@ -578,6 +604,10 @@ def gpuarray_shared_constructor(value, name=None, strict=False,
if
not
isinstance
(
value
,
(
numpy
.
ndarray
,
pygpu
.
gpuarray
.
GpuArray
)):
raise
TypeError
(
'ndarray or GpuArray required'
)
if
target
is
notset
:
target
=
None
if
not
move_to_gpu
(
value
):
raise
TypeError
(
'We do not move that data by default to the GPU'
)
try
:
get_context
(
target
)
except
ContextNotDefined
:
...
...
theano/tensor/opt.py
浏览文件 @
efb4786e
...
...
@@ -4751,12 +4751,16 @@ class Canonizer(gof.LocalOptimizer):
numeric constant. If v is a plain Variable, returns None.
"""
if
isinstance
(
v
,
Variable
):
try
:
# As the constant folding is in the canonicalize phase,
# We don't need to check all the graph each time.
return
get_scalar_constant_value
(
v
,
only_process_constants
=
True
)
except
NotScalarConstantError
:
if
isinstance
(
v
,
Constant
):
if
getattr
(
v
.
tag
,
'unique_value'
,
None
)
is
not
None
:
data
=
v
.
tag
.
unique_value
else
:
data
=
v
.
data
if
data
.
ndim
==
0
:
return
data
else
:
return
None
elif
isinstance
(
v
,
Variable
):
return
None
else
:
return
v
...
...
@@ -4790,6 +4794,21 @@ class Canonizer(gof.LocalOptimizer):
| [a, b], [c, d] -> [a, b], [c, d]
"""
ln
=
len
(
num
)
ld
=
len
(
denum
)
if
(
ld
>
2
and
ln
>
2
):
# Faster version for "big" inputs.
while
True
:
s
=
set
(
num
)
# Inputs can appear multiple times
redo
=
len
(
s
)
!=
len
(
num
)
inter
=
s
.
intersection
(
denum
)
for
v
in
inter
:
num
.
remove
(
v
)
denum
.
remove
(
v
)
if
not
redo
or
not
inter
:
break
else
:
for
v
in
list
(
num
):
if
v
in
denum
:
num
.
remove
(
v
)
...
...
@@ -4815,9 +4834,8 @@ class Canonizer(gof.LocalOptimizer):
| [x, 2, y], [z, 2] -> [x, y], [z]
"""
# Lists representing the numerator and denumerator
num
,
denum
=
list
(
orig_num
),
list
(
orig_denum
)
num
,
denum
=
[],
[]
# Lists representing the *constant* elements of num and denum
numct
,
denumct
=
[],
[]
...
...
@@ -4826,15 +4844,16 @@ class Canonizer(gof.LocalOptimizer):
ct
=
self
.
get_constant
(
v
)
if
ct
is
not
None
:
# We found a constant in the numerator!
# We remove it from num
num
.
remove
(
v
)
# We add it to numct
numct
.
append
(
ct
)
else
:
num
.
append
(
v
)
for
v
in
orig_denum
:
ct
=
self
.
get_constant
(
v
)
if
ct
is
not
None
:
denum
.
remove
(
v
)
denumct
.
append
(
ct
)
else
:
denum
.
append
(
v
)
if
self
.
use_reciprocal
or
num
:
# This will calculate either:
...
...
theano/tensor/tests/test_gc.py
浏览文件 @
efb4786e
...
...
@@ -89,16 +89,6 @@ def test_gc_never_pickles_temporaries():
# assert that f() didn't cause the function to grow
# allow_gc should leave the function un-changed by calling
if
len_pre_f
!=
len_post_f
:
for
i
in
range
(
len_pre_f
//
100
):
p1
=
pre_f
[
i
*
100
:(
i
+
1
)
*
100
]
p2
=
post_f
[
i
*
100
:(
i
+
1
)
*
100
]
if
p1
!=
p2
:
print
(
i
)
print
(
"p1"
)
print
(
p1
)
print
(
"p2"
)
print
(
p2
)
assert
len_pre_f
==
len_post_f
,
(
len_pre_f
,
len_post_f
)
# assert that g() didn't cause g to grow because temporaries
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论