Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
de4be8be
提交
de4be8be
authored
10月 01, 2015
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Document sandbox/gpuarray/opt_util.py
上级
0ad41ce5
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
205 行增加
和
27 行删除
+205
-27
dnn.py
theano/sandbox/gpuarray/dnn.py
+6
-6
nerv.py
theano/sandbox/gpuarray/nerv.py
+2
-2
opt_util.py
theano/sandbox/gpuarray/opt_util.py
+197
-19
没有找到文件。
theano/sandbox/gpuarray/dnn.py
浏览文件 @
de4be8be
...
@@ -1264,39 +1264,39 @@ optdb.register('local_dnna_conv_inplace',
...
@@ -1264,39 +1264,39 @@ optdb.register('local_dnna_conv_inplace',
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
@alpha_merge
(
GpuDnnConv
,
alpha_in
=
4
,
beta_in
=
5
,
nd
=
4
)
@alpha_merge
(
GpuDnnConv
,
alpha_in
=
4
,
beta_in
=
5
)
def
local_dnn_conv_alpha_merge
(
node
,
*
inputs
):
def
local_dnn_conv_alpha_merge
(
node
,
*
inputs
):
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
@alpha_merge
(
GpuDnnConvGradW
,
alpha_in
=
4
,
beta_in
=
5
,
nd
=
4
)
@alpha_merge
(
GpuDnnConvGradW
,
alpha_in
=
4
,
beta_in
=
5
)
def
local_dnn_convw_alpha_merge
(
node
,
*
inputs
):
def
local_dnn_convw_alpha_merge
(
node
,
*
inputs
):
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
@alpha_merge
(
GpuDnnConvGradI
,
alpha_in
=
4
,
beta_in
=
5
,
nd
=
4
)
@alpha_merge
(
GpuDnnConvGradI
,
alpha_in
=
4
,
beta_in
=
5
)
def
local_dnn_convi_alpha_merge
(
node
,
*
inputs
):
def
local_dnn_convi_alpha_merge
(
node
,
*
inputs
):
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
@output_merge
(
GpuDnnConv
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
,
nd
=
4
)
@output_merge
(
GpuDnnConv
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
)
def
local_dnn_conv_output_merge
(
node
,
*
inputs
):
def
local_dnn_conv_output_merge
(
node
,
*
inputs
):
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
@output_merge
(
GpuDnnConvGradW
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
,
nd
=
4
)
@output_merge
(
GpuDnnConvGradW
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
)
def
local_dnn_convw_output_merge
(
node
,
*
inputs
):
def
local_dnn_convw_output_merge
(
node
,
*
inputs
):
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
@output_merge
(
GpuDnnConvGradI
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
,
nd
=
4
)
@output_merge
(
GpuDnnConvGradI
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
)
def
local_dnn_convi_output_merge
(
node
,
*
inputs
):
def
local_dnn_convi_output_merge
(
node
,
*
inputs
):
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
...
...
theano/sandbox/gpuarray/nerv.py
浏览文件 @
de4be8be
...
@@ -176,13 +176,13 @@ def local_dot_to_gemm16(node):
...
@@ -176,13 +176,13 @@ def local_dot_to_gemm16(node):
@opt.register_opt
()
@opt.register_opt
()
@alpha_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
,
nd
=
2
)
@alpha_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
)
def
local_gemm16_alpha_merge
(
node
,
*
inputs
):
def
local_gemm16_alpha_merge
(
node
,
*
inputs
):
return
[
Gemm16
(
relu
=
node
.
op
.
relu
)(
*
inputs
)]
return
[
Gemm16
(
relu
=
node
.
op
.
relu
)(
*
inputs
)]
@opt.register_opt
()
@opt.register_opt
()
@output_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
,
nd
=
2
)
@output_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
)
def
local_gemm16_output_merge
(
node
,
*
inputs
):
def
local_gemm16_output_merge
(
node
,
*
inputs
):
return
[
Gemm16
(
relu
=
node
.
op
.
relu
)(
*
inputs
)]
return
[
Gemm16
(
relu
=
node
.
op
.
relu
)(
*
inputs
)]
...
...
theano/sandbox/gpuarray/opt_util.py
浏览文件 @
de4be8be
...
@@ -14,16 +14,28 @@ _one = scal.constant(numpy.asarray(1.0, dtype='float64'))
...
@@ -14,16 +14,28 @@ _one = scal.constant(numpy.asarray(1.0, dtype='float64'))
def
grab_cpu_scalar
(
v
,
nd
):
def
grab_cpu_scalar
(
v
,
nd
):
"""
Get a scalar variable value from the tree at `v`.
This function will dig through transfers and dimshuffles to get
the constant value. If no such constant is found, it returns None.
Parameters
----------
v : variable
Theano variable to extract the constant value from.
nd : int
Expected number of dimensions for the variable (for
broadcasted constants).
"""
if
v
.
owner
is
not
None
:
if
v
.
owner
is
not
None
:
n
=
v
.
owner
n
=
v
.
owner
if
(
isinstance
(
n
.
op
,
GpuDimShuffle
)
and
elif
(
isinstance
(
n
.
op
,
(
GpuDimShuffle
,
DimShuffle
))
and
n
.
op
.
new_order
==
(
'x'
,)
*
nd
):
return
grab_cpu_scalar
(
n
.
inputs
[
0
])
elif
(
isinstance
(
n
.
op
,
DimShuffle
)
and
n
.
op
.
new_order
==
(
'x'
,)
*
nd
):
n
.
op
.
new_order
==
(
'x'
,)
*
nd
):
return
grab_cpu_scalar
(
n
.
inputs
[
0
])
return
grab_cpu_scalar
(
n
.
inputs
[
0
]
,
n
.
inputs
[
0
]
.
ndim
)
elif
isinstance
(
n
.
op
,
GpuFromHost
):
elif
isinstance
(
n
.
op
,
(
GpuFromHost
,
HostFromGpu
)
):
return
grab_cpu_scalar
(
n
.
inputs
[
0
],
nd
=
nd
)
return
grab_cpu_scalar
(
n
.
inputs
[
0
],
nd
)
else
:
else
:
return
None
return
None
else
:
else
:
...
@@ -33,10 +45,24 @@ def grab_cpu_scalar(v, nd):
...
@@ -33,10 +45,24 @@ def grab_cpu_scalar(v, nd):
def
find_node
(
v
,
cls
,
ignore_clients
=
False
):
def
find_node
(
v
,
cls
,
ignore_clients
=
False
):
# This digs through possibly redundant transfers to for the node
"""
# that has the op class specified. If ignore_clients is False (the
Find the node that has an op of of type `cls` in `v`.
# default) it will only dig through nodes that have a single
# client.
This digs through possibly redundant transfers to for the node
that has the type `cls`. If `ignore_clients` is False (the
default) it will only dig through nodes that have a single client
to avoid duplicating computations.
Parameters
----------
v : variable
The variable to dig through
cls : Op class
The type of the node we are looking for
ignore_clients : bool, optional
Whether to ignore multiple clients or not.
"""
if
v
.
owner
is
not
None
and
(
ignore_clients
or
len
(
v
.
clients
)
==
1
):
if
v
.
owner
is
not
None
and
(
ignore_clients
or
len
(
v
.
clients
)
==
1
):
if
isinstance
(
v
.
owner
.
op
,
cls
):
if
isinstance
(
v
.
owner
.
op
,
cls
):
return
v
.
owner
return
v
.
owner
...
@@ -50,8 +76,20 @@ def find_node(v, cls, ignore_clients=False):
...
@@ -50,8 +76,20 @@ def find_node(v, cls, ignore_clients=False):
def
is_equal
(
var
,
val
):
def
is_equal
(
var
,
val
):
# Returns True if var is always equal to val (python value), False
"""
# otherwise (including if var is not constant)
Returns True if `var` is always equal to `val`.
This will only return True if the variable will always be equal to
the value. If it might not be true in some cases then it returns False.
Parameters
----------
var : variable
Variable to compare
val : value
Python value
"""
try
:
try
:
v
=
get_scalar_constant_value
(
var
)
v
=
get_scalar_constant_value
(
var
)
return
v
==
val
return
v
==
val
...
@@ -59,7 +97,57 @@ def is_equal(var, val):
...
@@ -59,7 +97,57 @@ def is_equal(var, val):
return
False
return
False
def
alpha_merge
(
cls
,
alpha_in
,
beta_in
,
nd
):
def
alpha_merge
(
cls
,
alpha_in
,
beta_in
):
"""
Decorator to merge multiplication by a scalar on the output.
This will find a pattern of scal * <yourop>(some, params, alpha,
beta) and update it so that the scalar multiplication happens as
part of your op.
The op needs to accept an alpha and a beta scalar which act this way:
out = Op() * alpha + out_like * beta
Where out_like is a buffer that has the same size as the output
and gets added to the "real" output of the operation. An example
of an operation that respects this pattern is GEMM from blas.
The decorated function must have this signature:
maker(node, *inputs)
The `node` argument you recieve is the original apply node that
contains your op. You should use it to grab relevant properties
for your op so that the new version performs the same computation.
The `*inputs` parameters contains the new inputs for your op. You
MUST use those inputs instead of the ones on `node`. Note that
this function can be as simple as:
def maker(node, *inputs):
return node.op(*inputs)
Parameters
----------
cls : op class
The class of the op you want to merge
alpha_in : int
The input index for the alpha scalar for your op (in node.inputs).
beta_in : int
The input index for the beta scalar for your op (in node.inputs).
Returns
-------
This returns an unregistered local optimizer that has the same
name as the decorated function.
Notes
-----
This was factored out since the code to deal with intervening
transfers and correctness in the presence of different values of
alpha and beta scaling factors is not trivial.
"""
def
wrapper
(
maker
):
def
wrapper
(
maker
):
@local_optimizer
([
GpuElemwise
])
@local_optimizer
([
GpuElemwise
])
@wraps
(
maker
)
@wraps
(
maker
)
...
@@ -70,11 +158,12 @@ def alpha_merge(cls, alpha_in, beta_in, nd):
...
@@ -70,11 +158,12 @@ def alpha_merge(cls, alpha_in, beta_in, nd):
targ
=
find_node
(
node
.
inputs
[
0
],
cls
)
targ
=
find_node
(
node
.
inputs
[
0
],
cls
)
if
targ
is
None
:
if
targ
is
None
:
targ
=
find_node
(
node
.
inputs
[
1
],
cls
)
targ
=
find_node
(
node
.
inputs
[
1
],
cls
)
lr
=
grab_cpu_scalar
(
node
.
inputs
[
0
],
nd
=
nd
)
if
targ
is
None
:
return
lr
=
grab_cpu_scalar
(
node
.
inputs
[
0
],
nd
=
targ
.
ndim
)
else
:
else
:
lr
=
grab_cpu_scalar
(
node
.
inputs
[
1
],
nd
=
nd
)
lr
=
grab_cpu_scalar
(
node
.
inputs
[
1
],
nd
=
targ
.
ndim
)
if
(
lr
is
None
or
targ
is
None
or
if
lr
is
None
or
lr
.
dtype
!=
targ
.
outputs
[
0
]
.
dtype
:
lr
.
dtype
!=
targ
.
outputs
[
0
]
.
dtype
):
return
None
return
None
inputs
=
list
(
targ
.
inputs
)
inputs
=
list
(
targ
.
inputs
)
try
:
try
:
...
@@ -96,7 +185,62 @@ def alpha_merge(cls, alpha_in, beta_in, nd):
...
@@ -96,7 +185,62 @@ def alpha_merge(cls, alpha_in, beta_in, nd):
return
wrapper
return
wrapper
def
output_merge
(
cls
,
alpha_in
,
beta_in
,
out_in
,
nd
):
def
output_merge
(
cls
,
alpha_in
,
beta_in
,
out_in
):
"""
Decorator to merge addition by a value on the output.
This will find a pattern of val * <yourop>(some, params, alpha,
beta, out_like) and update it so that the addtition happens as
part of your op.
The op needs to accept an alpha and a beta scalar which act this way:
out = Op() * alpha + out_like * beta
Where out_like is a buffer that has the same size as the output
and gets added to the "real" output of the operation. An example
of an operation that respects this pattern is GEMM from blas.
The decorated function must have this signature:
maker(node, *inputs)
The `node` argument you recieve is the original apply node that
contains your op. You should use it to grab relevant properties
for your op so that the new version performs the same computation.
The `*inputs` parameters contains the new inputs for your op. You
MUST use those inputs instead of the ones on `node`. Note that
this function can be as simple as:
def maker(node, *inputs):
return node.op(*inputs)
Parameters
----------
cls : op class
The class of the op you want to merge
alpha_in : int
The input index for the alpha scalar for your op (in node.inputs).
beta_in : int
The input index for the beta scalar for your op (in node.inputs).
out_in : int
The input index for the out_like input for your op (in node.inputs).
Returns
-------
This returns an unregistered local optimizer that has the same
name as the decorated function.
Notes
-----
This was factored out since the code to deal with intervening
transfers and correctness in the presence of different values of
alpha and beta scaling factors is not trivial.
This also correctly handles the case where the added value is
broadcasted (by not performing the replacement).
"""
def
wrapper
(
maker
):
def
wrapper
(
maker
):
@local_optimizer
([
GpuElemwise
])
@local_optimizer
([
GpuElemwise
])
@wraps
(
maker
)
@wraps
(
maker
)
...
@@ -129,6 +273,40 @@ def output_merge(cls, alpha_in, beta_in, out_in, nd):
...
@@ -129,6 +273,40 @@ def output_merge(cls, alpha_in, beta_in, out_in, nd):
def
inplace_allocempty
(
op
,
idx
):
def
inplace_allocempty
(
op
,
idx
):
"""
Wrapper to make an inplace optimization that deals with AllocEmpty
This will duplicate the alloc input if it has more than one client
to allow the op to work on it inplace.
The decorated function must have this signature:
maker(node, inputs)
The `node` argument you recieve is the original apply node that
contains your op. You should use it to grab relevant properties
for your op so that the new version performs the same computation.
You should also switch the op to work inplace. The `*inputs`
parameters contains the new inputs for your op. You MUST use
those inputs instead of the ones on `node`. Note that this
function can be as simple as:
def maker(node, inputs):
return node.op.__class__(inplace=True)(*inputs)
Parameters
----------
op : op class
The op class to look for to make inplace
idx : int
The index of the (possibly) AllocEmpty input (in node.inputs).
Returns
-------
This returns an unregistered inplace local optimizer that has the
same name as the decorated function.
"""
def
wrapper
(
maker
):
def
wrapper
(
maker
):
@local_optimizer
([
op
],
inplace
=
True
)
@local_optimizer
([
op
],
inplace
=
True
)
@wraps
(
maker
)
@wraps
(
maker
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论