Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
c1c7efe7
提交
c1c7efe7
authored
8月 20, 2014
作者:
f0k
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
GpuCorrMM: Removed pad='auto', added pad='half' and pad='full', added shape…
GpuCorrMM: Removed pad='auto', added pad='half' and pad='full', added shape management for gradients
上级
e5b3c89d
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
101 行增加
和
25 行删除
+101
-25
blas.py
theano/sandbox/cuda/blas.py
+99
-23
opt.py
theano/sandbox/cuda/opt.py
+1
-1
test_conv_cuda_ndarray.py
theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+1
-1
没有找到文件。
theano/sandbox/cuda/blas.py
浏览文件 @
c1c7efe7
...
@@ -514,8 +514,8 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -514,8 +514,8 @@ class BaseGpuCorrMM(GpuOp):
if
len
(
subsample
)
!=
2
:
if
len
(
subsample
)
!=
2
:
raise
ValueError
(
"subsample must have two elements"
)
raise
ValueError
(
"subsample must have two elements"
)
self
.
subsample
=
subsample
self
.
subsample
=
subsample
if
(
pad
!=
"auto"
)
and
(
len
(
pad
)
!=
2
):
if
(
pad
not
in
(
"half"
,
"full"
)
)
and
(
len
(
pad
)
!=
2
):
raise
ValueError
(
"pad must be '
auto'
or have two elements"
)
raise
ValueError
(
"pad must be '
half', 'full',
or have two elements"
)
self
.
pad
=
pad
self
.
pad
=
pad
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
...
@@ -569,7 +569,7 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -569,7 +569,7 @@ class BaseGpuCorrMM(GpuOp):
for
f
in
files
]
for
f
in
files
]
return
reduce
(
str
.
__add__
,
codes
)
return
reduce
(
str
.
__add__
,
codes
)
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
):
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
"""
"""
This generates the C code for GpuCorrMM (direction="forward"),
This generates the C code for GpuCorrMM (direction="forward"),
GpuCorrMM_gradWeights (direction="backprop weights"), and
GpuCorrMM_gradWeights (direction="backprop weights"), and
...
@@ -591,12 +591,26 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -591,12 +591,26 @@ class BaseGpuCorrMM(GpuOp):
(swapping the first two dimensions) and store results in bottom.
(swapping the first two dimensions) and store results in bottom.
:param sub: Dictionary of substitutions useable to help generating the
:param sub: Dictionary of substitutions useable to help generating the
C code.
C code.
:param height: If self.subsample[0] != 1, a variable giving the height
of the filters for direction="backprop weights" or the height of the
input images for direction="backprop inputs".
If self.pad == 'half', a variable giving the height of the filters
for direction="backprop weights".
Ignored otherwise.
:param width: If self.subsample[1] != 1, a variable giving the width
of the filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs".
If self.pad == 'half', a variable giving the width of the filters
for direction="backprop weights".
Ignored otherwise.
"""
"""
if
self
.
border_mode
!=
"valid"
:
if
self
.
border_mode
!=
"valid"
:
raise
ValueError
(
"mode must be 'valid'"
)
raise
ValueError
(
"mode must be 'valid'"
)
dH
,
dW
=
self
.
subsample
dH
,
dW
=
self
.
subsample
if
self
.
pad
==
"
auto
"
:
if
self
.
pad
==
"
half
"
:
padH
=
padW
=
-
1
padH
=
padW
=
-
1
elif
self
.
pad
==
"full"
:
padH
=
padW
=
-
2
else
:
else
:
padH
,
padW
=
self
.
pad
padH
,
padW
=
self
.
pad
if
direction
==
"forward"
:
if
direction
==
"forward"
:
...
@@ -611,6 +625,21 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -611,6 +625,21 @@ class BaseGpuCorrMM(GpuOp):
else
:
else
:
raise
ValueError
(
"direction must be one of 'forward', "
raise
ValueError
(
"direction must be one of 'forward', "
"'backprop weights', 'backprop inputs'"
)
"'backprop weights', 'backprop inputs'"
)
# When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given.
# Similarly, when pad="half", we cannot infer the weight size.
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
not
height
:
raise
ValueError
(
"height must be given for backprop with vertical sampling or pad='half'"
)
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
else
:
height
=
'NULL'
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
if
not
width
:
raise
ValueError
(
"width must be given for backprop with horizontal sampling or pad='half'"
)
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
else
:
width
=
'NULL'
sub
=
sub
.
copy
()
sub
=
sub
.
copy
()
sub
.
update
(
locals
())
sub
.
update
(
locals
())
...
@@ -630,23 +659,58 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -630,23 +659,58 @@ class BaseGpuCorrMM(GpuOp):
CudaNdarray * out2 = NULL;
CudaNdarray * out2 = NULL;
// Obtain or infer kernel width and height
// Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding)
int kH, kW;
int kH, kW;
if (direction != 1) {
if (direction != 1) {
// weight is an input variable, we can just read its shape
kH = CudaNdarray_HOST_DIMS(weights)[2];
kH = CudaNdarray_HOST_DIMS(weights)[2];
kW = CudaNdarray_HOST_DIMS(weights)[3];
kW = CudaNdarray_HOST_DIMS(weights)[3];
}
}
else {
else {
kH = CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH;
if ((dH != 1) || (padH == -1)) {
kW = CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW;
// vertical subsampling or half padding, kernel height is specified
kH =
%(height)
s;
}
else if (padH == -2) {
// vertical full padding, we can infer the kernel height
kH = 2 - CudaNdarray_HOST_DIMS(bottom)[2] + (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH;
}
else {
// explicit padding, we can infer the kernel height
kH = CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH;
}
if ((dW != 1) || (padW == -1)) {
kW =
%(width)
s;
}
else if (padW == -2) {
kW = 2 - CudaNdarray_HOST_DIMS(bottom)[3] + (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW;
}
else {
kW = CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW;
}
}
}
// Auto-padding if requested
// Auto-padding if requested
if (padH < 0) {
if (padH == -1) { // vertical half padding
padH = kH / 2;
}
else if (padH == -2) { // vertical full padding
padH = kH - 1;
padH = kH - 1;
}
}
if (padW < 0) {
else if (padH < 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padH must be >= -2");
%(fail)
s
}
if (padW == -1) { // horizontal half padding
padW = kW / 2;
}
else if (padW == -2) { // horizontal full padding
padW = kW - 1;
padW = kW - 1;
}
}
else if (padW < 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padW must be >= -2");
%(fail)
s
}
// Infer output shape
// Infer output shape
int out_dim[4];
int out_dim[4];
...
@@ -672,8 +736,8 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -672,8 +736,8 @@ class BaseGpuCorrMM(GpuOp):
// height and width: bottom = (top - 1) * sample + weights - 2*pad
// height and width: bottom = (top - 1) * sample + weights - 2*pad
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[2] = (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + CudaNdarray_HOST_DIMS(weights)[2] - 2*padH;
out_dim[2] = (
dH != 1) ?
%(height)
s : (
CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + CudaNdarray_HOST_DIMS(weights)[2] - 2*padH;
out_dim[3] = (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + CudaNdarray_HOST_DIMS(weights)[3] - 2*padW;
out_dim[3] = (
dW != 1) ?
%(width)
s : (
CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + CudaNdarray_HOST_DIMS(weights)[3] - 2*padW;
break;
break;
default:
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2
\\
n");
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2
\\
n");
...
@@ -727,7 +791,7 @@ class GpuCorrMM(BaseGpuCorrMM):
...
@@ -727,7 +791,7 @@ class GpuCorrMM(BaseGpuCorrMM):
pad
=
(
0
,
0
)):
pad
=
(
0
,
0
)):
"""
"""
:param border_mode: currently supports "valid" only; "full" can be
:param border_mode: currently supports "valid" only; "full" can be
simulated by setting `pad="
auto
"` (at the cost of performance), or
simulated by setting `pad="
full
"` (at the cost of performance), or
by using `GpuCorrMM_gradInputs`
by using `GpuCorrMM_gradInputs`
:param subsample: the subsample operation applied to each output image.
:param subsample: the subsample operation applied to each output image.
Should be a tuple with 2 elements.
Should be a tuple with 2 elements.
...
@@ -736,8 +800,9 @@ class GpuCorrMM(BaseGpuCorrMM):
...
@@ -736,8 +800,9 @@ class GpuCorrMM(BaseGpuCorrMM):
Set to `(1, 1)` to disable subsampling.
Set to `(1, 1)` to disable subsampling.
:param pad: the width of a border of implicit zeros to pad the input
:param pad: the width of a border of implicit zeros to pad the input
image with. Should be a tuple with 2 elements giving the numbers of
image with. Should be a tuple with 2 elements giving the numbers of
rows and columns to pad on each side, or "auto" to set the padding
rows and columns to pad on each side, or "half" to set the padding
to `(kernel_rows - 1, kernel_columns - 1)` at runtime.
to `(kernel_rows // 2, kernel_columns // 2)`, or "full" to set the
padding to `(kernel_rows - 1, kernel_columns - 1)` at runtime.
Set to `(0, 0)` to disable padding.
Set to `(0, 0)` to disable padding.
:note: Currently, the Op requires the inputs, filters and outputs to be
:note: Currently, the Op requires the inputs, filters and outputs to be
...
@@ -770,9 +835,9 @@ class GpuCorrMM(BaseGpuCorrMM):
...
@@ -770,9 +835,9 @@ class GpuCorrMM(BaseGpuCorrMM):
top
,
=
grads
top
,
=
grads
top
=
gpu_contiguous
(
top
)
top
=
gpu_contiguous
(
top
)
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
pad
)(
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
pad
)(
weights
,
top
)
weights
,
top
,
bottom
.
shape
[
-
2
:]
)
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
pad
)(
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
pad
)(
bottom
,
top
)
bottom
,
top
,
weights
.
shape
[
-
2
:]
)
return
d_bottom
,
d_weights
return
d_bottom
,
d_weights
...
@@ -787,23 +852,30 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
...
@@ -787,23 +852,30 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
pad
=
(
0
,
0
)):
pad
=
(
0
,
0
)):
super
(
GpuCorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
,
pad
)
super
(
GpuCorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
,
pad
)
def
make_node
(
self
,
img
,
topgrad
):
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
img
=
as_cuda_ndarray_variable
(
img
)
img
=
as_cuda_ndarray_variable
(
img
)
topgrad
=
as_cuda_ndarray_variable
(
topgrad
)
topgrad
=
as_cuda_ndarray_variable
(
topgrad
)
if
img
.
type
.
ndim
!=
4
:
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
raise
TypeError
(
'img must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
self
.
subsample
!=
(
1
,
1
)
or
self
.
pad
==
"half"
:
if
shape
is
None
:
raise
ValueError
(
'shape must be given if subsample != (1, 1) or pad == "half"'
)
height_width
=
[
shape
[
0
],
shape
[
1
]]
else
:
height_width
=
[]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
False
,
False
]
False
,
False
]
return
Apply
(
self
,
[
img
,
topgrad
],
[
CudaNdarrayType
(
broadcastable
)()])
return
Apply
(
self
,
[
img
,
topgrad
]
+
height_width
,
[
CudaNdarrayType
(
broadcastable
)()])
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
bottom
,
top
=
inp
bottom
,
top
=
inp
[:
2
]
height
,
width
=
inp
[
2
:]
or
(
None
,
None
)
weights
,
=
out_
weights
,
=
out_
direction
=
"backprop weights"
direction
=
"backprop weights"
return
super
(
GpuCorrMM_gradWeights
,
self
)
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
)
return
super
(
GpuCorrMM_gradWeights
,
self
)
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
)
class
GpuCorrMM_gradInputs
(
BaseGpuCorrMM
):
class
GpuCorrMM_gradInputs
(
BaseGpuCorrMM
):
...
@@ -817,23 +889,27 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
...
@@ -817,23 +889,27 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
pad
=
(
0
,
0
)):
pad
=
(
0
,
0
)):
super
(
GpuCorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
,
pad
)
super
(
GpuCorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
,
pad
)
def
make_node
(
self
,
kern
,
topgrad
):
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
kern
=
as_cuda_ndarray_variable
(
kern
)
kern
=
as_cuda_ndarray_variable
(
kern
)
topgrad
=
as_cuda_ndarray_variable
(
topgrad
)
topgrad
=
as_cuda_ndarray_variable
(
topgrad
)
if
kern
.
type
.
ndim
!=
4
:
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
raise
TypeError
(
'kern must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
self
.
subsample
!=
(
1
,
1
)
and
shape
is
None
:
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
)
height_width
=
[
shape
[
0
],
shape
[
1
]]
if
self
.
subsample
!=
(
1
,
1
)
else
[]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
False
,
False
]
False
,
False
]
return
Apply
(
self
,
[
kern
,
topgrad
],
[
CudaNdarrayType
(
broadcastable
)()])
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width
,
[
CudaNdarrayType
(
broadcastable
)()])
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
weights
,
top
=
inp
weights
,
top
=
inp
[:
2
]
height
,
width
=
inp
[
2
:]
or
(
None
,
None
)
bottom
,
=
out_
bottom
,
=
out_
direction
=
"backprop inputs"
direction
=
"backprop inputs"
return
super
(
GpuCorrMM_gradInputs
,
self
)
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
)
return
super
(
GpuCorrMM_gradInputs
,
self
)
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
)
##
##
...
...
theano/sandbox/cuda/opt.py
浏览文件 @
c1c7efe7
...
@@ -1357,7 +1357,7 @@ def local_conv_gemm(node):
...
@@ -1357,7 +1357,7 @@ def local_conv_gemm(node):
pad
=
(
0
,
0
)
pad
=
(
0
,
0
)
if
(
border_mode
==
'full'
)
and
(
subsample
!=
(
1
,
1
)):
if
(
border_mode
==
'full'
)
and
(
subsample
!=
(
1
,
1
)):
# need to simulate this via a padded valid convolution
# need to simulate this via a padded valid convolution
pad
=
'
auto
'
pad
=
'
full
'
border_mode
=
'valid'
border_mode
=
'valid'
if
(
border_mode
==
'valid'
):
if
(
border_mode
==
'valid'
):
# need to flip the kernel for valid convolution
# need to flip the kernel for valid convolution
...
...
theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
浏览文件 @
c1c7efe7
...
@@ -914,7 +914,7 @@ def test_gemm_grads():
...
@@ -914,7 +914,7 @@ def test_gemm_grads():
i
=
cuda_tensor4
()
i
=
cuda_tensor4
()
k
=
cuda_tensor4
()
k
=
cuda_tensor4
()
pad
=
'
auto
'
if
mode
==
'full'
else
(
0
,
0
)
pad
=
'
full
'
if
mode
==
'full'
else
(
0
,
0
)
# TODO: also test custom pad values
# TODO: also test custom pad values
corr_op
=
theano
.
sandbox
.
cuda
.
blas
.
GpuCorrMM
(
corr_op
=
theano
.
sandbox
.
cuda
.
blas
.
GpuCorrMM
(
'valid'
,
subsample
,
pad
)(
i
,
k
)
'valid'
,
subsample
,
pad
)(
i
,
k
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论