Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
c072d669
提交
c072d669
authored
12月 19, 2016
作者:
Frédéric Bastien
提交者:
GitHub
12月 19, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5267 from gvtulder/f-abstractconv-differences
Minor inconsistency in AbstractConv_gradInput implementations
上级
1a42bf9b
7f1c3677
隐藏空白字符变更
内嵌
并排
正在显示
24 个修改的文件
包含
2241 行增加
和
440 行删除
+2241
-440
blas.py
theano/gpuarray/blas.py
+199
-90
corr3d_gemm.c
theano/gpuarray/corr3d_gemm.c
+53
-3
corr_gemm.c
theano/gpuarray/corr_gemm.c
+42
-2
dnn.py
theano/gpuarray/dnn.py
+30
-23
dnn_fwd.c
theano/gpuarray/dnn_fwd.c
+14
-5
dnn_gi.c
theano/gpuarray/dnn_gi.c
+56
-5
dnn_gw.c
theano/gpuarray/dnn_gw.c
+56
-5
test_abstractconv.py
theano/gpuarray/tests/test_abstractconv.py
+95
-0
test_dnn.py
theano/gpuarray/tests/test_dnn.py
+19
-24
blas.py
theano/sandbox/cuda/blas.py
+177
-85
corr3d_gemm.cu
theano/sandbox/cuda/corr3d_gemm.cu
+50
-3
corr_gemm.cu
theano/sandbox/cuda/corr_gemm.cu
+48
-2
dnn.py
theano/sandbox/cuda/dnn.py
+23
-14
dnn_fwd.c
theano/sandbox/cuda/dnn_fwd.c
+18
-7
dnn_gi.c
theano/sandbox/cuda/dnn_gi.c
+55
-5
dnn_gw.c
theano/sandbox/cuda/dnn_gw.c
+55
-5
test_abstractconv.py
theano/sandbox/cuda/tests/test_abstractconv.py
+95
-0
test_dnn.py
theano/sandbox/cuda/tests/test_dnn.py
+86
-78
abstract_conv.py
theano/tensor/nnet/abstract_conv.py
+403
-4
corr.py
theano/tensor/nnet/corr.py
+72
-22
corr3d.py
theano/tensor/nnet/corr3d.py
+81
-30
corr3d_gemm.c
theano/tensor/nnet/corr3d_gemm.c
+28
-4
corr_gemm.c
theano/tensor/nnet/corr_gemm.c
+26
-3
test_abstract_conv.py
theano/tensor/nnet/tests/test_abstract_conv.py
+460
-21
没有找到文件。
theano/gpuarray/blas.py
浏览文件 @
c072d669
...
@@ -528,7 +528,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -528,7 +528,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# Raise this whenever modifying the code below.
# Raise this whenever modifying the code below.
return
(
2
,)
return
(
7
,)
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
"""
"""
...
@@ -558,19 +558,19 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -558,19 +558,19 @@ class BaseGpuCorrMM(CGpuKernelBase):
sub
sub
Dictionary of substitutions useable to help generating the C code.
Dictionary of substitutions useable to help generating the C code.
height
height
If self.subsample[0] != 1, a variable giving the height of the
Required if self.subsample[0] != 1, a variable giving the height of
filters for direction="backprop weights" or the height of the input
the filters for direction="backprop weights" or the height of the
images for direction="backprop inputs".
i
nput i
mages for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the height of the
Required if self.border_mode == 'half', a variable giving the height
filters for direction="backprop weights".
of the
filters for direction="backprop weights".
Ignored otherwise
.
Not required otherwise, but if a value is given this will be checked
.
width
width
If self.subsample[1] != 1, a variable giving the width of the
Required if self.subsample[1] != 1, a variable giving the width of
filters for direction="backprop weights" or the width of the
the
filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs".
input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the width of the
Required if self.border_mode == 'half', a variable giving the width
filters for direction="backprop weights".
of the
filters for direction="backprop weights".
Ignored otherwise
.
Not required otherwise, but if a value is given this will be checked
.
"""
"""
dH
,
dW
=
self
.
subsample
dH
,
dW
=
self
.
subsample
...
@@ -599,18 +599,18 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -599,18 +599,18 @@ class BaseGpuCorrMM(CGpuKernelBase):
# When subsampling, we cannot unambiguously infer the height and width
# When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given.
# of bottom and weights from top, so we require them to be given.
# Similarly, when pad="half", we cannot infer the weight size.
# Similarly, when pad="half", we cannot infer the weight size.
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
height
:
if
not
height
:
raise
ValueError
(
"height must be given for backprop with vertical sampling or pad='half'"
)
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
else
:
else
:
height
=
'0'
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
raise
ValueError
(
"height must be given for backprop with vertical sampling or pad='half'"
)
if
not
width
:
height
=
'-1'
raise
ValueError
(
"width must be given for backprop with horizontal sampling or pad='half'"
)
if
width
:
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
else
:
else
:
width
=
'0'
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
raise
ValueError
(
"width must be given for backprop with horizontal sampling or pad='half'"
)
width
=
'-1'
sync
=
""
sync
=
""
if
config
.
gpuarray
.
sync
:
if
config
.
gpuarray
.
sync
:
sync
=
"""
sync
=
"""
...
@@ -643,15 +643,15 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -643,15 +643,15 @@ class BaseGpuCorrMM(CGpuKernelBase):
// Obtain or infer kernel width and height
// Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding)
// (we need to know it early to be able to handle auto-padding)
size_t kH, kW;
size_t kH, kW
, dil_kH, dil_kW
;
if (direction != 1) {
if (direction != 1) {
// weight is an input variable, we can just read its shape
// weight is an input variable, we can just read its shape
kH = PyGpuArray_DIMS(weights)[2];
kH = PyGpuArray_DIMS(weights)[2];
kW = PyGpuArray_DIMS(weights)[3];
kW = PyGpuArray_DIMS(weights)[3];
}
}
else {
else {
if (
(dH != 1) || (padH == -1)
) {
if (
%(height)
s != -1
) {
//
vertical subsampling or half padding, kernel height is specified
//
kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
kH =
%(height)
s;
}
}
else if (padH == -2) {
else if (padH == -2) {
...
@@ -662,7 +662,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -662,7 +662,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
// explicit padding, we can infer the kernel height
// explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
}
}
if (
(dW != 1) || (padW == -1)
) {
if (
%(width)
s != -1
) {
kW =
%(width)
s;
kW =
%(width)
s;
}
}
else if (padW == -2) {
else if (padW == -2) {
...
@@ -674,8 +674,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -674,8 +674,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
}
}
// Implicit dilated kernel size
// Implicit dilated kernel size
size_t
dil_kH = (kH - 1) * dilH + 1;
dil_kH = (kH - 1) * dilH + 1;
size_t
dil_kW = (kW - 1) * dilW + 1;
dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested
// Auto-padding if requested
if (padH == -1) { // vertical half padding
if (padH == -1) { // vertical half padding
...
@@ -700,7 +700,9 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -700,7 +700,9 @@ class BaseGpuCorrMM(CGpuKernelBase):
}
}
// Infer output shape and type
// Infer output shape and type
size_t out_dim[4];
// The inferred shape can be negative.
long long out_dim[4];
size_t out_dim_size[4];
int out_typecode;
int out_typecode;
PyGpuContextObject *out_context;
PyGpuContextObject *out_context;
switch(direction) {
switch(direction) {
...
@@ -713,6 +715,20 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -713,6 +715,20 @@ class BaseGpuCorrMM(CGpuKernelBase):
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] + 2*padW - ((PyGpuArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1;
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] + 2*padW - ((PyGpuArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1;
out_typecode = bottom->ga.typecode;
out_typecode = bottom->ga.typecode;
out_context = bottom->context;
out_context = bottom->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)
s
}
break;
break;
case 1: // backprop wrt. weights
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// output is weights: (num_filters, num_channels, height, width)
...
@@ -723,27 +739,60 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -723,27 +739,60 @@ class BaseGpuCorrMM(CGpuKernelBase):
out_dim[3] = kW; // how convenient
out_dim[3] = kW; // how convenient
out_typecode = top->ga.typecode;
out_typecode = top->ga.typecode;
out_context = top->context;
out_context = top->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)
s
}
break;
break;
case 2: // backprop wrt. inputs
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[1];
out_dim[1] = PyGpuArray_DIMS(weights)[1];
out_dim[2] = (
dH !=
1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[2] = (
%(height)
s != -
1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (
dW !=
1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[3] = (
%(width)
s != -
1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_typecode = top->ga.typecode;
out_typecode = top->ga.typecode;
out_context = top->context;
out_context = top->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weight shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)
s
}
break;
break;
default:
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2
\\
n");
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2
\\
n");
%(fail)
s
%(fail)
s
}
}
out_dim_size[0] = (size_t)out_dim[0];
out_dim_size[1] = (size_t)out_dim[1];
out_dim_size[2] = (size_t)out_dim[2];
out_dim_size[3] = (size_t)out_dim[3];
// Prepare output array
// Prepare output array
if (theano_prep_output(&
%(out)
s, 4, out_dim, out_typecode, GA_C_ORDER, out_context) != 0)
if (theano_prep_output(&
%(out)
s, 4, out_dim
_size
, out_typecode, GA_C_ORDER, out_context) != 0)
{
{
PyErr_Format(PyExc_RuntimeError,
PyErr_Format(PyExc_RuntimeError,
"BaseGpuCorrMM: Failed to allocate output of
%%
l
d x
%%
ld x
%%
ld x
%%
ld",
"BaseGpuCorrMM: Failed to allocate output of
%%
l
ld x
%%
lld x
%%
lld x
%%
l
ld",
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)
s
%(fail)
s
}
}
...
@@ -875,15 +924,15 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
...
@@ -875,15 +924,15 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
raise
TypeError
(
'img must be 4D tensor'
)
raise
TypeError
(
'img must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
s
elf
.
subsample
!=
(
1
,
1
)
or
self
.
border_mode
==
"half"
:
if
s
hape
is
None
:
if
s
hape
is
None
:
if
s
elf
.
subsample
!=
(
1
,
1
)
or
self
.
border_mode
==
"half"
:
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
' or border_mode == "half"'
)
' or border_mode == "half"'
)
height_width
=
[]
else
:
height_width
=
[
shape
[
0
],
shape
[
1
]]
height_width
=
[
shape
[
0
],
shape
[
1
]]
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
else
:
height_width
=
[]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
False
,
False
]
False
,
False
]
...
@@ -946,10 +995,12 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
...
@@ -946,10 +995,12 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
raise
TypeError
(
'kern must be 4D tensor'
)
raise
TypeError
(
'kern must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
self
.
subsample
!=
(
1
,
1
)
and
shape
is
None
:
if
shape
is
None
:
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
)
if
self
.
subsample
!=
(
1
,
1
):
height_width
=
[
shape
[
0
],
shape
[
1
]]
if
self
.
subsample
!=
(
1
,
1
)
else
[]
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
)
if
height_width
:
height_width
=
[]
else
:
height_width
=
[
shape
[
0
],
shape
[
1
]]
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
...
@@ -1074,7 +1125,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
...
@@ -1074,7 +1125,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# raise this whenever modifying the code below.
# raise this whenever modifying the code below.
return
(
2
,)
return
(
7
,)
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
,
depth
=
None
):
height
=
None
,
width
=
None
,
depth
=
None
):
...
@@ -1105,26 +1156,26 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
...
@@ -1105,26 +1156,26 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
sub
sub
Dictionary of substitutions useable to help generating the C code.
Dictionary of substitutions useable to help generating the C code.
height
height
If self.subsample[0] != 1, a variable giving the height of the
Required if self.subsample[0] != 1, a variable giving the height of
filters for direction="backprop weights" or the height of the input
the filters for direction="backprop weights" or the height of the
images for direction="backprop inputs".
i
nput i
mages for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the height of the
Required if self.border_mode == 'half', a variable giving the height
filters for direction="backprop weights".
of the
filters for direction="backprop weights".
Ignored otherwise
.
Not required otherwise, but if a value is given this will be checked
.
width
width
If self.subsample[1] != 1, a variable giving the width of the
Required if self.subsample[1] != 1, a variable giving the width of
filters for direction="backprop weights" or the width of the
the
filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs".
input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the width of the
Required if self.border_mode == 'half', a variable giving the width
filters for direction="backprop weights".
of the
filters for direction="backprop weights".
Ignored otherwise
.
Not required otherwise, but if a value is given this will be checked
.
depth
depth
If self.subsample[2] != 1, a variable giving the depth of the
Required if self.subsample[2] != 1, a variable giving the depth of
filters for direction="backprop weights" or the depth of the
the
filters for direction="backprop weights" or the depth of the
input images for direction="backprop inputs".
input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the depth of the
Required if self.border_mode == 'half', a variable giving the depth
filters for direction="backprop weights".
of the
filters for direction="backprop weights".
Ignored otherwise
.
Not required otherwise, but if a value is given this will be checked
.
"""
"""
dH
,
dW
,
dD
=
self
.
subsample
dH
,
dW
,
dD
=
self
.
subsample
...
@@ -1153,24 +1204,24 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
...
@@ -1153,24 +1204,24 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
# When subsampling, we cannot unambiguously infer the height and width
# When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given.
# of bottom and weights from top, so we require them to be given.
# Similarly, when pad="half", we cannot infer the weight size.
# Similarly, when pad="half", we cannot infer the weight size.
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
height
:
if
not
height
:
raise
ValueError
(
"height must be given for backprop with vertical sampling or pad='half'"
)
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
else
:
else
:
height
=
'0'
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
raise
ValueError
(
"height must be given for backprop with vertical sampling or pad='half'"
)
if
not
width
:
height
=
'-1'
raise
ValueError
(
"width must be given for backprop with horizontal sampling or pad='half'"
)
if
width
:
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
else
:
else
:
width
=
'0'
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
if
((
direction
!=
0
)
and
(
dD
!=
1
))
or
((
direction
==
1
)
and
(
padD
==
-
1
)):
raise
ValueError
(
"width must be given for backprop with horizontal sampling or pad='half'"
)
if
not
depth
:
width
=
'-1'
raise
ValueError
(
"depth must be given for backprop with horizontal sampling or pad='half'"
)
if
depth
:
depth
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
depth
depth
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
depth
else
:
else
:
depth
=
'0'
if
((
direction
!=
0
)
and
(
dD
!=
1
))
or
((
direction
==
1
)
and
(
padD
==
-
1
)):
raise
ValueError
(
"depth must be given for backprop with horizontal sampling or pad='half'"
)
depth
=
'-1'
sync
=
""
sync
=
""
if
config
.
gpuarray
.
sync
:
if
config
.
gpuarray
.
sync
:
sync
=
"""
sync
=
"""
...
@@ -1206,7 +1257,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
...
@@ -1206,7 +1257,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
// Obtain or infer kernel height, width and depth
// Obtain or infer kernel height, width and depth
// (we need to know it early to be able to handle auto-padding)
// (we need to know it early to be able to handle auto-padding)
size_t kH, kW, kD;
size_t kH, kW, kD
, dil_kH, dil_kW, dil_kD
;
if (direction != 1) {
if (direction != 1) {
// weight is an input variable, we can just read its shape
// weight is an input variable, we can just read its shape
kH = PyGpuArray_DIMS(weights)[2];
kH = PyGpuArray_DIMS(weights)[2];
...
@@ -1214,8 +1265,8 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
...
@@ -1214,8 +1265,8 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
kD = PyGpuArray_DIMS(weights)[4];
kD = PyGpuArray_DIMS(weights)[4];
}
}
else {
else {
if (
(dH != 1) || (padH == -1)
) {
if (
%(height)
s != -1
) {
//
vertical subsampling or half padding, kernel height is specified
//
kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
kH =
%(height)
s;
}
}
else if (padH == -2) {
else if (padH == -2) {
...
@@ -1226,7 +1277,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
...
@@ -1226,7 +1277,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
// explicit padding, we can infer the kernel height
// explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
}
}
if (
(dW != 1) || (padW == -1)
) {
if (
%(width)
s != -1
) {
kW =
%(width)
s;
kW =
%(width)
s;
}
}
else if (padW == -2) {
else if (padW == -2) {
...
@@ -1235,7 +1286,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
...
@@ -1235,7 +1286,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
else {
else {
kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
}
if (
(dD != 1) || (padD == -1)
) {
if (
%(depth)
s != -1
) {
kD =
%(depth)
s;
kD =
%(depth)
s;
}
}
else if (padD == -2) {
else if (padD == -2) {
...
@@ -1247,9 +1298,9 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
...
@@ -1247,9 +1298,9 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
}
}
// Implicit dilated kernel size
// Implicit dilated kernel size
size_t
dil_kH = (kH - 1) * dilH + 1;
dil_kH = (kH - 1) * dilH + 1;
size_t
dil_kW = (kW - 1) * dilW + 1;
dil_kW = (kW - 1) * dilW + 1;
size_t
dil_kD = (kD - 1) * dilD + 1;
dil_kD = (kD - 1) * dilD + 1;
// Auto-padding if requested
// Auto-padding if requested
if (padH == -1) { // vertical half padding
if (padH == -1) { // vertical half padding
...
@@ -1284,7 +1335,9 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
...
@@ -1284,7 +1335,9 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
}
}
// Infer output shape and type
// Infer output shape and type
size_t out_dim[5];
// The inferred shape can be negative.
long long out_dim[5];
size_t out_dim_size[5];
int out_typecode;
int out_typecode;
PyGpuContextObject *out_context;
PyGpuContextObject *out_context;
switch(direction) {
switch(direction) {
...
@@ -1298,6 +1351,22 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
...
@@ -1298,6 +1351,22 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
out_dim[4] = (PyGpuArray_DIMS(bottom)[4] + 2*padD - ((PyGpuArray_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1;
out_dim[4] = (PyGpuArray_DIMS(bottom)[4] + 2*padD - ((PyGpuArray_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1;
out_typecode = bottom->ga.typecode;
out_typecode = bottom->ga.typecode;
out_context = bottom->context;
out_context = bottom->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorr3dMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(bottom)[4],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(weights)[4],
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4]);
%(fail)
s
}
break;
break;
case 1: // backprop wrt. weights
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width, depth)
// output is weights: (num_filters, num_channels, height, width, depth)
...
@@ -1309,28 +1378,66 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
...
@@ -1309,28 +1378,66 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
out_dim[4] = kD;
out_dim[4] = kD;
out_typecode = top->ga.typecode;
out_typecode = top->ga.typecode;
out_context = top->context;
out_context = top->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorr3dMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(bottom)[4],
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3],
PyGpuArray_DIMS(top)[4]);
%(fail)
s
}
break;
break;
case 2: // backprop wrt. inputs
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width, depth)
// output is bottom: (batchsize, num_channels, height, width, depth)
// height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
// height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[1];
out_dim[1] = PyGpuArray_DIMS(weights)[1];
out_dim[2] = (
dH !=
1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[2] = (
%(height)
s != -
1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (
dW !=
1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[3] = (
%(width)
s != -
1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (
dD !=
1) ?
%(depth)
s : (PyGpuArray_DIMS(top)[4] - 1) * dD + (PyGpuArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
out_dim[4] = (
%(depth)
s != -
1) ?
%(depth)
s : (PyGpuArray_DIMS(top)[4] - 1) * dD + (PyGpuArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
out_typecode = top->ga.typecode;
out_typecode = top->ga.typecode;
out_context = top->context;
out_context = top->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorr3dMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(weights)[4],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3],
PyGpuArray_DIMS(top)[4]);
%(fail)
s
}
break;
break;
default:
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2
\\
n");
PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2
\\
n");
%(fail)
s
%(fail)
s
}
}
out_dim_size[0] = (size_t)out_dim[0];
out_dim_size[1] = (size_t)out_dim[1];
out_dim_size[2] = (size_t)out_dim[2];
out_dim_size[3] = (size_t)out_dim[3];
out_dim_size[4] = (size_t)out_dim[4];
// Prepare output array
// Prepare output array
if (theano_prep_output(&
%(out)
s, 5, out_dim, out_typecode, GA_C_ORDER, out_context) != 0)
if (theano_prep_output(&
%(out)
s, 5, out_dim
_size
, out_typecode, GA_C_ORDER, out_context) != 0)
{
{
PyErr_Format(PyExc_RuntimeError,
PyErr_Format(PyExc_RuntimeError,
"BaseGpuCorrMM: Failed to allocate output of
%%
l
d x
%%
ld x
%%
ld x
%%
ld x
%%
ld",
"BaseGpuCorrMM: Failed to allocate output of
%%
l
ld x
%%
lld x
%%
lld x
%%
lld x
%%
l
ld",
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4]);
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4]);
%(fail)
s
%(fail)
s
}
}
...
@@ -1464,16 +1571,16 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
...
@@ -1464,16 +1571,16 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
raise
TypeError
(
'img must be 5D tensor'
)
raise
TypeError
(
'img must be 5D tensor'
)
if
topgrad
.
type
.
ndim
!=
5
:
if
topgrad
.
type
.
ndim
!=
5
:
raise
TypeError
(
'topgrad must be 5D tensor'
)
raise
TypeError
(
'topgrad must be 5D tensor'
)
if
s
elf
.
subsample
!=
(
1
,
1
,
1
)
or
self
.
border_mode
==
"half"
:
if
s
hape
is
None
:
if
s
hape
is
None
:
if
s
elf
.
subsample
!=
(
1
,
1
,
1
)
or
self
.
border_mode
==
"half"
:
raise
ValueError
(
'shape must be given if subsample != (1, 1, 1)'
raise
ValueError
(
'shape must be given if subsample != (1, 1, 1)'
' or border_mode == "half"'
)
' or border_mode == "half"'
)
height_width_depth
=
[]
else
:
height_width_depth
=
[
shape
[
0
],
shape
[
1
],
shape
[
2
]]
height_width_depth
=
[
shape
[
0
],
shape
[
1
],
shape
[
2
]]
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
assert
shape
[
2
]
.
ndim
==
0
assert
shape
[
2
]
.
ndim
==
0
else
:
height_width_depth
=
[]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
False
,
False
,
False
]
False
,
False
,
False
]
...
@@ -1536,10 +1643,12 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
...
@@ -1536,10 +1643,12 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
raise
TypeError
(
'kern must be 5D tensor'
)
raise
TypeError
(
'kern must be 5D tensor'
)
if
topgrad
.
type
.
ndim
!=
5
:
if
topgrad
.
type
.
ndim
!=
5
:
raise
TypeError
(
'topgrad must be 5D tensor'
)
raise
TypeError
(
'topgrad must be 5D tensor'
)
if
self
.
subsample
!=
(
1
,
1
,
1
)
and
shape
is
None
:
if
shape
is
None
:
raise
ValueError
(
'shape must be given if subsample != (1, 1, 1)'
)
if
self
.
subsample
!=
(
1
,
1
,
1
):
height_width_depth
=
[
shape
[
0
],
shape
[
1
],
shape
[
2
]]
if
self
.
subsample
!=
(
1
,
1
,
1
)
else
[]
raise
ValueError
(
'shape must be given if subsample != (1, 1, 1)'
)
if
height_width_depth
:
height_width_depth
=
[]
else
:
height_width_depth
=
[
shape
[
0
],
shape
[
1
],
shape
[
2
]]
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
assert
shape
[
2
]
.
ndim
==
0
assert
shape
[
2
]
.
ndim
==
0
...
...
theano/gpuarray/corr3d_gemm.c
浏览文件 @
c072d669
...
@@ -425,9 +425,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
...
@@ -425,9 +425,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
const
size_t
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
const
size_t
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
const
size_t
dil_kD
=
(
kD
-
1
)
*
dilD
+
1
;
const
size_t
dil_kD
=
(
kD
-
1
)
*
dilD
+
1
;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const
size_t
topHeight
=
(
bottomHeight
+
2
*
padH
-
dil_kH
)
/
dH
+
1
;
const
size_t
topHeightNoDH
=
(
bottomHeight
+
2
*
padH
-
dil_kH
);
const
size_t
topWidth
=
(
bottomWidth
+
2
*
padW
-
dil_kW
)
/
dW
+
1
;
const
size_t
topWidthNoDW
=
(
bottomWidth
+
2
*
padW
-
dil_kW
);
const
size_t
topDepth
=
(
bottomDepth
+
2
*
padD
-
dil_kD
)
/
dD
+
1
;
const
size_t
topDepthNoDD
=
(
bottomDepth
+
2
*
padD
-
dil_kD
);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const
size_t
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
size_t
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
const
size_t
topDepth
=
_CONV_FLOORDIV_X
(
topDepthNoDD
,
dD
)
+
1
;
#undef _CONV_FLOORDIV
if
(
batchSize
!=
PyGpuArray_DIMS
(
top
)[
0
]
||
if
(
batchSize
!=
PyGpuArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyGpuArray_DIMS
(
top
)[
1
]
||
nFilters
!=
PyGpuArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyGpuArray_DIMS
(
top
)[
2
]
||
topHeight
!=
PyGpuArray_DIMS
(
top
)[
2
]
||
...
@@ -479,6 +487,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
...
@@ -479,6 +487,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
PyGpuArrayObject
*
output
;
PyGpuArrayObject
*
output
;
if
(
direction
==
0
)
{
// forward pass
if
(
direction
==
0
)
{
// forward pass
output
=
top
;
output
=
top
;
if
(
batchSize
==
0
||
nChannels
==
0
||
nFilters
==
0
)
{
err
=
GpuArray_memset
(
&
output
->
ga
,
0
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorr3dMM could not fill the output with zeros: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
Py_DECREF
(
col
);
return
output
;
}
// valid correlation: im3d2col, then gemm
// valid correlation: im3d2col, then gemm
// Iterate over batch
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
...
@@ -530,6 +549,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
...
@@ -530,6 +549,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
}
}
else
if
(
direction
==
1
)
{
// backprop wrt. weights
else
if
(
direction
==
1
)
{
// backprop wrt. weights
output
=
weight
;
output
=
weight
;
if
(
batchSize
==
0
||
nChannels
==
0
||
nFilters
==
0
)
{
err
=
GpuArray_memset
(
&
output
->
ga
,
0
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorr3dMM grad wrt. weights could not fill the output with zeros: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
Py_DECREF
(
col
);
return
output
;
}
// valid convolution: im3col, then gemm
// valid convolution: im3col, then gemm
// Iterate over batch
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
...
@@ -581,9 +611,29 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
...
@@ -581,9 +611,29 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
return
NULL
;
return
NULL
;
}
}
}
}
if
(
batchSize
==
0
)
{
err
=
GpuArray_memset
(
&
weight
->
ga
,
0
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorr3dMM grad weights could not fill the output with zeros: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
}
}
}
else
if
(
direction
==
2
)
{
// backprop wrt. inputs
else
if
(
direction
==
2
)
{
// backprop wrt. inputs
output
=
bottom
;
output
=
bottom
;
if
(
batchSize
==
0
||
nChannels
==
0
||
nFilters
==
0
)
{
err
=
GpuArray_memset
(
&
output
->
ga
,
0
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorr3dMM grad wrt. inputs could not fill the output with zeros: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
Py_DECREF
(
col
);
return
output
;
}
// full convolution: gemm, then col2im3d
// full convolution: gemm, then col2im3d
// Iterate over batch
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
...
...
theano/gpuarray/corr_gemm.c
浏览文件 @
c072d669
...
@@ -360,8 +360,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -360,8 +360,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
size_t
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
size_t
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
const
size_t
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
// top: (batchSize, nFilters, topHeight, topWidth)
// top: (batchSize, nFilters, topHeight, topWidth)
const
size_t
topHeight
=
(
bottomHeight
+
2
*
padH
-
dil_kH
)
/
dH
+
1
;
const
size_t
topHeightNoDH
=
(
bottomHeight
+
2
*
padH
-
dil_kH
);
const
size_t
topWidth
=
(
bottomWidth
+
2
*
padW
-
dil_kW
)
/
dW
+
1
;
const
size_t
topWidthNoDW
=
(
bottomWidth
+
2
*
padW
-
dil_kW
);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const
size_t
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
size_t
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
#undef _CONV_FLOORDIV
if
(
batchSize
!=
PyGpuArray_DIMS
(
top
)[
0
]
||
if
(
batchSize
!=
PyGpuArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyGpuArray_DIMS
(
top
)[
1
]
||
nFilters
!=
PyGpuArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyGpuArray_DIMS
(
top
)[
2
]
||
topHeight
!=
PyGpuArray_DIMS
(
top
)[
2
]
||
...
@@ -411,6 +418,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -411,6 +418,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
PyGpuArrayObject
*
output
;
PyGpuArrayObject
*
output
;
if
(
direction
==
0
)
{
// forward pass
if
(
direction
==
0
)
{
// forward pass
output
=
top
;
output
=
top
;
if
(
batchSize
==
0
||
nChannels
==
0
||
nFilters
==
0
)
{
err
=
GpuArray_memset
(
&
output
->
ga
,
0
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM could not fill the output with zeros: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
Py_DECREF
(
col
);
return
output
;
}
// valid correlation: im2col, then gemm
// valid correlation: im2col, then gemm
// Iterate over batch
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
...
@@ -462,6 +480,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -462,6 +480,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
}
}
else
if
(
direction
==
1
)
{
// backprop wrt. weights
else
if
(
direction
==
1
)
{
// backprop wrt. weights
output
=
weight
;
output
=
weight
;
if
(
batchSize
==
0
||
nChannels
==
0
||
nFilters
==
0
)
{
err
=
GpuArray_memset
(
&
output
->
ga
,
0
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad wrt. weights could not fill the output with zeros: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
Py_DECREF
(
col
);
return
output
;
}
// valid convolution: im2col, then gemm
// valid convolution: im2col, then gemm
// Iterate over batch
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
...
@@ -516,6 +545,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -516,6 +545,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
}
}
else
if
(
direction
==
2
)
{
// backprop wrt. inputs
else
if
(
direction
==
2
)
{
// backprop wrt. inputs
output
=
bottom
;
output
=
bottom
;
if
(
batchSize
==
0
||
nChannels
==
0
||
nFilters
==
0
)
{
err
=
GpuArray_memset
(
&
output
->
ga
,
0
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad wrt. inputs could not fill the output with zeros: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
Py_DECREF
(
col
);
return
output
;
}
// full convolution: gemm, then col2im
// full convolution: gemm, then col2im
// Iterate over batch
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
...
...
theano/gpuarray/dnn.py
浏览文件 @
c072d669
...
@@ -24,7 +24,8 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
...
@@ -24,7 +24,8 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
AbstractConv3d
,
AbstractConv3d
,
AbstractConv3d_gradWeights
,
AbstractConv3d_gradWeights
,
AbstractConv3d_gradInputs
,
AbstractConv3d_gradInputs
,
get_conv_output_shape
)
get_conv_output_shape
,
assert_conv_shape
)
from
theano.tensor.signal.pool
import
(
from
theano.tensor.signal.pool
import
(
Pool
,
MaxPoolGrad
,
AveragePoolGrad
)
Pool
,
MaxPoolGrad
,
AveragePoolGrad
)
from
.
import
pygpu
from
.
import
pygpu
...
@@ -979,11 +980,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -979,11 +980,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns
=
kerns
[:,
:,
::
-
1
,
::
-
1
]
kerns
=
kerns
[:,
:,
::
-
1
,
::
-
1
]
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
shape2
=
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
out_shp
=
(
shape_i
(
kerns
,
1
,
fgraph
),
shape3
=
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
shape_i
(
img
,
1
,
fgraph
),
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtype
)(
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
,
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
)
shape_i
(
img
,
1
,
fgraph
),
shape2
,
shape3
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtype
)(
*
out_shp
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
'cross'
,
precision
=
precision
)(
out
.
shape
)
conv_mode
=
'cross'
,
precision
=
precision
)(
out
.
shape
)
conv
=
gpu_dnn_conv_gradW
()(
img
,
kerns
,
out
,
desc
)
conv
=
gpu_dnn_conv_gradW
()(
img
,
kerns
,
out
,
desc
)
...
@@ -997,11 +999,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -997,11 +999,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
img
=
gpu_contiguous
(
img
)
# cudnn v2 rc3 need contiguous data
img
=
gpu_contiguous
(
img
)
# cudnn v2 rc3 need contiguous data
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
shape2
=
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
out_shp
=
(
shape_i
(
img
,
0
,
fgraph
),
shape3
=
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
shape_i
(
kerns
,
1
,
fgraph
),
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtype
)(
shape_i
(
img
,
0
,
fgraph
),
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
,
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
)
shape2
,
shape3
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtype
)(
*
out_shp
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
return
gpu_dnn_conv_gradI
()(
kerns
,
img
,
out
,
desc
)
return
gpu_dnn_conv_gradI
()(
kerns
,
img
,
out
,
desc
)
...
@@ -1021,6 +1024,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -1021,6 +1024,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
out_shp
=
get_conv_output_shape
(
ishape
,
kshape
,
out_shp
=
get_conv_output_shape
(
ishape
,
kshape
,
desc_op
.
border_mode
,
desc_op
.
border_mode
,
desc_op
.
subsample
)
desc_op
.
subsample
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtype
)(
*
out_shp
)
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtype
)(
*
out_shp
)
return
gpu_dnn_conv
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
return
gpu_dnn_conv
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
...
@@ -1094,12 +1098,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
...
@@ -1094,12 +1098,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns
=
kerns
[:,
:,
::
-
1
,
::
-
1
]
kerns
=
kerns
[:,
:,
::
-
1
,
::
-
1
]
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
))
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
))
shape2
=
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
out_shp
=
(
shape_i
(
kerns
,
1
,
fgraph
),
shape3
=
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
shape_i
(
img
,
1
,
fgraph
),
shape4
=
shape_i
(
img
,
4
,
fgraph
)
-
shape_i
(
kerns
,
4
,
fgraph
)
+
1
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
,
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtype
)(
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
,
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
img
,
4
,
fgraph
)
-
shape_i
(
kerns
,
4
,
fgraph
)
+
1
)
shape_i
(
img
,
1
,
fgraph
),
shape2
,
shape3
,
shape4
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtype
)(
*
out_shp
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
conv_mode
=
'cross'
,
precision
=
precision
)(
out
.
shape
)
conv_mode
=
'cross'
,
precision
=
precision
)(
out
.
shape
)
conv
=
gpu_dnn_conv_gradW
()(
img
,
kerns
,
out
,
desc
)
conv
=
gpu_dnn_conv_gradW
()(
img
,
kerns
,
out
,
desc
)
...
@@ -1113,12 +1118,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
...
@@ -1113,12 +1118,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
img
=
gpu_contiguous
(
img
)
# cudnn v2 rc3 need contiguous data
img
=
gpu_contiguous
(
img
)
# cudnn v2 rc3 need contiguous data
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
))
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
))
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
shape2
=
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
out_shp
=
(
shape_i
(
img
,
0
,
fgraph
),
shape3
=
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
shape_i
(
kerns
,
1
,
fgraph
),
shape4
=
shape_i
(
img
,
4
,
fgraph
)
+
shape_i
(
kerns
,
4
,
fgraph
)
-
1
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
,
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtype
)(
shape_i
(
img
,
0
,
fgraph
),
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
,
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
img
,
4
,
fgraph
)
+
shape_i
(
kerns
,
4
,
fgraph
)
-
1
)
shape2
,
shape3
,
shape4
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtype
)(
*
out_shp
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
return
gpu_dnn_conv_gradI
()(
kerns
,
img
,
out
,
desc
)
return
gpu_dnn_conv_gradI
()(
kerns
,
img
,
out
,
desc
)
...
@@ -1138,6 +1144,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
...
@@ -1138,6 +1144,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
out_shp
=
get_conv_output_shape
(
ishape
,
kshape
,
out_shp
=
get_conv_output_shape
(
ishape
,
kshape
,
desc_op
.
border_mode
,
desc_op
.
border_mode
,
desc_op
.
subsample
)
desc_op
.
subsample
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtype
)(
*
out_shp
)
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtype
)(
*
out_shp
)
return
gpu_dnn_conv
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
return
gpu_dnn_conv
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
...
...
theano/gpuarray/dnn_fwd.c
浏览文件 @
c072d669
...
@@ -39,11 +39,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -39,11 +39,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return
1
;
return
1
;
}
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
switch
(
input
->
ga
.
typecode
)
{
switch
(
input
->
ga
.
typecode
)
{
case
GA_DOUBLE
:
case
GA_DOUBLE
:
alpha_p
=
(
void
*
)
&
alpha
;
alpha_p
=
(
void
*
)
&
alpha
;
...
@@ -71,6 +66,20 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -71,6 +66,20 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return
1
;
return
1
;
#endif
#endif
if
(
PyGpuArray_DIMS
(
input
)[
0
]
==
0
||
PyGpuArray_DIMS
(
kerns
)[
0
]
==
0
||
PyGpuArray_DIMS
(
kerns
)[
1
]
==
0
)
{
int
err2
=
GpuArray_memset
(
&
(
*
output
)
->
ga
,
0
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv could not fill the output with zeros: %d"
,
err2
);
return
1
;
}
return
0
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
*
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
if
(
c_set_tensorNd
(
*
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
return
1
;
...
...
theano/gpuarray/dnn_gi.c
浏览文件 @
c072d669
...
@@ -38,11 +38,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -38,11 +38,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return
1
;
return
1
;
}
}
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
switch
(
im
->
ga
.
typecode
)
{
switch
(
im
->
ga
.
typecode
)
{
case
GA_DOUBLE
:
case
GA_DOUBLE
:
alpha_p
=
(
void
*
)
&
alpha
;
alpha_p
=
(
void
*
)
&
alpha
;
...
@@ -70,6 +65,20 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -70,6 +65,20 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return
1
;
return
1
;
#endif
#endif
if
(
PyGpuArray_DIMS
(
im
)[
0
]
==
0
||
PyGpuArray_DIMS
(
kerns
)[
0
]
==
0
||
PyGpuArray_DIMS
(
kerns
)[
1
]
==
0
)
{
int
err2
=
GpuArray_memset
(
&
(
*
input
)
->
ga
,
0
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv grad wrt. inputs could not fill the output with zeros: %d"
,
err2
);
return
1
;
}
return
0
;
}
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
*
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
if
(
c_set_tensorNd
(
*
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
return
1
;
...
@@ -77,6 +86,48 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -77,6 +86,48 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_enter
(
c
->
ctx
);
cuda_enter
(
c
->
ctx
);
int
expected_output_dims
[
5
]
=
{
0
};
err
=
cudnnGetConvolutionNdForwardOutputDim
(
desc
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_NDIM
(
im
),
expected_output_dims
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error computing convolution output dim: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
PyGpuArray_NDIM
(
im
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
else
if
(
PyGpuArray_NDIM
(
im
)
==
5
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
])
||
(
PyGpuArray_DIMS
(
output
)[
4
]
!=
expected_output_dims
[
4
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
expected_output_dims
[
4
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
],
PyGpuArray_DIMS
(
output
)[
4
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
#ifdef CHOOSE_ALGO
#ifdef CHOOSE_ALGO
#ifndef CHOOSE_ONCE
#ifndef CHOOSE_ONCE
reuse_algo
=
1
;
reuse_algo
=
1
;
...
...
theano/gpuarray/dnn_gw.c
浏览文件 @
c072d669
...
@@ -38,11 +38,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -38,11 +38,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return
1
;
return
1
;
}
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
switch
(
input
->
ga
.
typecode
)
{
switch
(
input
->
ga
.
typecode
)
{
case
GA_DOUBLE
:
case
GA_DOUBLE
:
alpha_p
=
(
void
*
)
&
alpha
;
alpha_p
=
(
void
*
)
&
alpha
;
...
@@ -70,6 +65,20 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -70,6 +65,20 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return
1
;
return
1
;
#endif
#endif
if
(
PyGpuArray_DIMS
(
input
)[
0
]
==
0
||
PyGpuArray_DIMS
(
km
)[
0
]
==
0
||
PyGpuArray_DIMS
(
km
)[
1
]
==
0
)
{
int
err2
=
GpuArray_memset
(
&
(
*
kerns
)
->
ga
,
0
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv grad wrt. weights could not fill the output with zeros: %d"
,
err2
);
return
1
;
}
return
0
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
if
(
c_set_filter
(
*
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
(
*
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
return
1
;
...
@@ -77,6 +86,48 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -77,6 +86,48 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_enter
(
c
->
ctx
);
cuda_enter
(
c
->
ctx
);
int
expected_output_dims
[
5
]
=
{
0
};
err
=
cudnnGetConvolutionNdForwardOutputDim
(
desc
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_NDIM
(
input
),
expected_output_dims
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error computing convolution output dim: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
PyGpuArray_NDIM
(
input
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%dx%ld"
" but received gradient with shape %ldx%ldx%dx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
else
if
(
PyGpuArray_NDIM
(
input
)
==
5
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
])
||
(
PyGpuArray_DIMS
(
output
)[
4
]
!=
expected_output_dims
[
4
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
expected_output_dims
[
4
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
],
PyGpuArray_DIMS
(
output
)[
4
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
#ifdef CHOOSE_ALGO
#ifdef CHOOSE_ALGO
#ifndef CHOOSE_ONCE
#ifndef CHOOSE_ONCE
reuse_algo
=
1
;
reuse_algo
=
1
;
...
...
theano/gpuarray/tests/test_abstractconv.py
浏览文件 @
c072d669
from
__future__
import
absolute_import
,
print_function
,
division
from
__future__
import
absolute_import
,
print_function
,
division
from
nose.plugins.skip
import
SkipTest
from
nose.plugins.skip
import
SkipTest
from
nose.tools
import
assert_raises
import
numpy
import
numpy
...
@@ -49,6 +50,31 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
...
@@ -49,6 +50,31 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
provide_shape
=
provide_shape
,
border_mode
=
b
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
)
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
)
def
tcase_gi
(
self
,
i
,
f
,
o
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
),
expect_error
=
False
):
if
not
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn_available
.
msg
)
if
fd
!=
(
1
,
1
):
raise
SkipTest
(
"Doesn't have CUDNN implementation"
)
mode
=
mode_with_gpu
if
not
expect_error
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
,
filter_dilation
=
fd
)
else
:
assert_raises
((
RuntimeError
,
ValueError
),
self
.
run_gradinput
,
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
,
ref
=
None
,
filter_dilation
=
fd
)
class
TestDnnConv3d
(
test_abstract_conv
.
BaseTestConv3d
):
class
TestDnnConv3d
(
test_abstract_conv
.
BaseTestConv3d
):
@classmethod
@classmethod
...
@@ -82,6 +108,31 @@ class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
...
@@ -82,6 +108,31 @@ class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
provide_shape
=
provide_shape
,
border_mode
=
b
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
)
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
)
def
tcase_gi
(
self
,
i
,
f
,
o
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
,
1
),
expect_error
=
False
):
if
not
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn_available
.
msg
)
if
fd
!=
(
1
,
1
,
1
):
raise
SkipTest
(
"Doesn't have CUDNN implementation"
)
mode
=
mode_with_gpu
if
not
expect_error
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
,
filter_dilation
=
fd
)
else
:
assert_raises
((
RuntimeError
,
ValueError
),
self
.
run_gradinput
,
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
,
ref
=
None
,
filter_dilation
=
fd
)
class
TestCorrMMConv2d
(
test_abstract_conv
.
BaseTestConv2d
):
class
TestCorrMMConv2d
(
test_abstract_conv
.
BaseTestConv2d
):
@classmethod
@classmethod
...
@@ -115,6 +166,28 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
...
@@ -115,6 +166,28 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
target_op
=
GpuCorrMM_gradInputs
,
target_op
=
GpuCorrMM_gradInputs
,
filter_dilation
=
fd
)
filter_dilation
=
fd
)
def
tcase_gi
(
self
,
i
,
f
,
o
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
),
expect_error
=
False
):
mode
=
self
.
mode
if
not
expect_error
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuCorrMM_gradInputs
,
filter_dilation
=
fd
)
else
:
assert_raises
(
ValueError
,
self
.
run_gradinput
,
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuCorrMM_gradInputs
,
ref
=
None
,
filter_dilation
=
fd
)
class
TestCorrMMConv3d
(
test_abstract_conv
.
BaseTestConv3d
):
class
TestCorrMMConv3d
(
test_abstract_conv
.
BaseTestConv3d
):
@classmethod
@classmethod
...
@@ -148,6 +221,28 @@ class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d):
...
@@ -148,6 +221,28 @@ class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d):
target_op
=
GpuCorr3dMM_gradInputs
,
target_op
=
GpuCorr3dMM_gradInputs
,
filter_dilation
=
fd
)
filter_dilation
=
fd
)
def
tcase_gi
(
self
,
i
,
f
,
o
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
,
1
),
expect_error
=
False
):
mode
=
self
.
mode
if
not
expect_error
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuCorr3dMM_gradInputs
,
filter_dilation
=
fd
)
else
:
assert_raises
(
ValueError
,
self
.
run_gradinput
,
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuCorr3dMM_gradInputs
,
ref
=
None
,
filter_dilation
=
fd
)
class
TestDnnConvTypes
(
test_abstract_conv
.
TestConvTypes
):
class
TestDnnConvTypes
(
test_abstract_conv
.
TestConvTypes
):
def
setUp
(
self
):
def
setUp
(
self
):
...
...
theano/gpuarray/tests/test_dnn.py
浏览文件 @
c072d669
...
@@ -12,6 +12,7 @@ import theano.tensor as T
...
@@ -12,6 +12,7 @@ import theano.tensor as T
import
theano.tests.unittest_tools
as
utt
import
theano.tests.unittest_tools
as
utt
from
theano.tensor.signal.pool
import
pool_2d
,
pool_3d
from
theano.tensor.signal.pool
import
pool_2d
,
pool_3d
from
theano.tensor.signal.pool
import
Pool
,
MaxPoolGrad
,
AveragePoolGrad
from
theano.tensor.signal.pool
import
Pool
,
MaxPoolGrad
,
AveragePoolGrad
from
theano.tensor.nnet.abstract_conv
import
get_conv_output_shape
from
..
import
dnn
from
..
import
dnn
from
..basic_ops
import
GpuAllocEmpty
from
..basic_ops
import
GpuAllocEmpty
...
@@ -628,56 +629,50 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -628,56 +629,50 @@ class TestDnnInferShapes(utt.InferShapeTester):
[(
1
,
1
,
1
),
(
2
,
2
,
2
)],
[(
1
,
1
,
1
),
(
2
,
2
,
2
)],
'none'
)
'none'
)
def
_test_conv_gradw
(
self
,
img
,
kerns
,
out
,
img_val
,
kern_vals
,
border_mode
,
conv_mode
,
subsample
):
def
_test_conv_gradw
(
self
,
img
,
topgrad
,
kerns
,
img_shape
,
kerns_shape
,
border_mode
,
conv_mode
,
subsample
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
topgrad_shape
=
get_conv_output_shape
(
img_shape
,
kerns_shape
,
border_mode
,
subsample
)
img_val
=
numpy
.
asarray
(
img_val
=
numpy
.
asarray
(
img_val
,
numpy
.
random
.
rand
(
*
img_shape
)
,
dtype
=
theano
.
config
.
floatX
dtype
=
theano
.
config
.
floatX
)
)
kern
_vals
=
numpy
.
asarray
(
topgrad
_vals
=
numpy
.
asarray
(
kern_vals
,
numpy
.
random
.
rand
(
*
topgrad_shape
)
,
dtype
=
theano
.
config
.
floatX
dtype
=
theano
.
config
.
floatX
)
)
temp_img
=
img
.
dimshuffle
(
1
,
0
,
2
,
3
)
kerns_vals
=
numpy
.
zeros
(
kerns_shape
,
dtype
=
theano
.
config
.
floatX
)
temp_kerns
=
kerns
kerns_shape
=
theano
.
shared
(
numpy
.
asarray
(
kerns_shape
))
if
conv_mode
==
'conv'
:
temp_kerns
=
temp_kerns
[:,
:,
::
-
1
,
::
-
1
]
temp_kerns
=
temp_kerns
.
dimshuffle
(
1
,
0
,
2
,
3
)
shape
=
(
kern_vals
.
shape
[
1
],
img_val
.
shape
[
1
],
img_val
.
shape
[
2
]
-
kern_vals
.
shape
[
2
]
+
1
,
img_val
.
shape
[
3
]
-
kern_vals
.
shape
[
3
]
+
1
)
out_vals
=
numpy
.
zeros
(
shape
,
dtype
=
theano
.
config
.
floatX
)
desc
=
dnn
.
GpuDnnConvDesc
(
desc
=
dnn
.
GpuDnnConvDesc
(
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
conv_mode
=
conv_mode
,
conv_mode
=
conv_mode
,
precision
=
set_precision
(
theano
.
config
.
floatX
)
precision
=
set_precision
(
theano
.
config
.
floatX
)
)(
out
.
shape
)
)(
kerns_
shape
)
conv_grad_w
=
dnn
.
GpuDnnConvGradW
()(
conv_grad_w
=
dnn
.
GpuDnnConvGradW
()(
temp_
img
,
img
,
t
emp_kerns
,
t
opgrad
,
out
,
kerns
,
desc
,
desc
,
)
)
self
.
_compile_and_check
(
self
.
_compile_and_check
(
[
temp_img
,
temp_kerns
,
out
],
[
img
,
topgrad
,
kerns
],
[
conv_grad_w
],
[
conv_grad_w
],
[
img_val
,
kern_vals
,
out
_vals
],
[
img_val
,
topgrad_vals
,
kerns
_vals
],
dnn
.
GpuDnnConvGradW
dnn
.
GpuDnnConvGradW
)
)
@parameterized.expand
(
product
(
border_modes
,
conv_modes
),
utt
.
custom_name_func
)
@parameterized.expand
(
product
(
border_modes
,
conv_modes
),
utt
.
custom_name_func
)
def
test_conv_gradw
(
self
,
border_mode
,
conv_mode
):
def
test_conv_gradw
(
self
,
border_mode
,
conv_mode
):
self
.
_test_conv_gradw
(
T
.
tensor4
(
'img'
),
self
.
_test_conv_gradw
(
T
.
tensor4
(
'img'
),
T
.
tensor4
(
'topgrad'
),
T
.
tensor4
(
'kerns'
),
T
.
tensor4
(
'kerns'
),
T
.
tensor4
(
'out'
),
(
5
,
2
,
6
,
13
),
numpy
.
random
.
rand
(
2
,
5
,
6
,
8
),
(
1
,
2
,
3
,
7
),
numpy
.
random
.
rand
(
2
,
1
,
5
,
6
),
border_mode
,
border_mode
,
conv_mode
,
conv_mode
,
(
1
,
1
))
(
1
,
1
))
...
...
theano/sandbox/cuda/blas.py
浏览文件 @
c072d669
...
@@ -922,7 +922,7 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -922,7 +922,7 @@ class BaseGpuCorrMM(GpuOp):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
# raise this whenever modifying any of the support_code_files
return
(
0
,
26
)
return
(
0
,
30
)
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
@@ -960,19 +960,19 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -960,19 +960,19 @@ class BaseGpuCorrMM(GpuOp):
sub
sub
Dictionary of substitutions useable to help generating the C code.
Dictionary of substitutions useable to help generating the C code.
height
height
If self.subsample[0] != 1, a variable giving the height of the
Required if self.subsample[0] != 1, a variable giving the height of
filters for direction="backprop weights" or the height of the input
the filters for direction="backprop weights" or the height of the
images for direction="backprop inputs".
i
nput i
mages for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the height of the
Required if self.border_mode == 'half', a variable giving the height
filters for direction="backprop weights".
of the
filters for direction="backprop weights".
Ignored otherwise
.
Not required otherwise, but if a value is given this will be checked
.
width
width
If self.subsample[1] != 1, a variable giving the width of the
Required if self.subsample[1] != 1, a variable giving the width of
filters for direction="backprop weights" or the width of the
the
filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs".
input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the width of the
Required if self.border_mode == 'half', a variable giving the width
filters for direction="backprop weights".
of the
filters for direction="backprop weights".
Ignored otherwise
.
Not required otherwise, but if a value is given this will be checked
.
"""
"""
dH
,
dW
=
self
.
subsample
dH
,
dW
=
self
.
subsample
...
@@ -1001,18 +1001,18 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -1001,18 +1001,18 @@ class BaseGpuCorrMM(GpuOp):
# When subsampling, we cannot unambiguously infer the height and width
# When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given.
# of bottom and weights from top, so we require them to be given.
# Similarly, when pad="half", we cannot infer the weight size.
# Similarly, when pad="half", we cannot infer the weight size.
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
height
:
if
not
height
:
raise
ValueError
(
"height must be given for backprop with vertical sampling or pad='half'"
)
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
else
:
else
:
height
=
'NULL'
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
raise
ValueError
(
"height must be given for backprop with vertical sampling or pad='half'"
)
if
not
width
:
height
=
'-1'
raise
ValueError
(
"width must be given for backprop with horizontal sampling or pad='half'"
)
if
width
:
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
else
:
else
:
width
=
'NULL'
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
raise
ValueError
(
"width must be given for backprop with horizontal sampling or pad='half'"
)
width
=
'-1'
sub
=
sub
.
copy
()
sub
=
sub
.
copy
()
sub
.
update
(
locals
())
sub
.
update
(
locals
())
...
@@ -1035,15 +1035,15 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -1035,15 +1035,15 @@ class BaseGpuCorrMM(GpuOp):
// Obtain or infer kernel width and height
// Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding)
// (we need to know it early to be able to handle auto-padding)
int kH, kW;
int kH, kW
, dil_kH, dil_kW
;
if (direction != 1) {
if (direction != 1) {
// weight is an input variable, we can just read its shape
// weight is an input variable, we can just read its shape
kH = CudaNdarray_HOST_DIMS(weights)[2];
kH = CudaNdarray_HOST_DIMS(weights)[2];
kW = CudaNdarray_HOST_DIMS(weights)[3];
kW = CudaNdarray_HOST_DIMS(weights)[3];
}
}
else {
else {
if (
(dH != 1) || (padH == -1)
) {
if (
%(height)
s != -1
) {
//
vertical subsampling or half padding, kernel height is specified
//
kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
kH =
%(height)
s;
}
}
else if (padH == -2) {
else if (padH == -2) {
...
@@ -1054,7 +1054,7 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -1054,7 +1054,7 @@ class BaseGpuCorrMM(GpuOp):
// explicit padding, we can infer the kernel height
// explicit padding, we can infer the kernel height
kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ;
kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ;
}
}
if (
(dW != 1) || (padW == -1)
) {
if (
%(width)
s != -1
) {
kW =
%(width)
s;
kW =
%(width)
s;
}
}
else if (padW == -2) {
else if (padW == -2) {
...
@@ -1066,8 +1066,8 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -1066,8 +1066,8 @@ class BaseGpuCorrMM(GpuOp):
}
}
// Implicit dilated kernel size
// Implicit dilated kernel size
int
dil_kH = (kH - 1) * dilH + 1;
dil_kH = (kH - 1) * dilH + 1;
int
dil_kW = (kW - 1) * dilW + 1;
dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested
// Auto-padding if requested
if (padH == -1) { // vertical half padding
if (padH == -1) { // vertical half padding
...
@@ -1101,6 +1101,20 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -1101,6 +1101,20 @@ class BaseGpuCorrMM(GpuOp):
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[0];
out_dim[2] = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - ((CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1;
out_dim[2] = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - ((CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1;
out_dim[3] = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - ((CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1;
out_dim[3] = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - ((CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
CudaNdarray_HOST_DIMS(bottom)[0], CudaNdarray_HOST_DIMS(bottom)[1],
CudaNdarray_HOST_DIMS(bottom)[2], CudaNdarray_HOST_DIMS(bottom)[3],
CudaNdarray_HOST_DIMS(weights)[0], CudaNdarray_HOST_DIMS(weights)[1],
CudaNdarray_HOST_DIMS(weights)[2], CudaNdarray_HOST_DIMS(weights)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)
s
}
break;
break;
case 1: // backprop wrt. weights
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// output is weights: (num_filters, num_channels, height, width)
...
@@ -1109,14 +1123,42 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -1109,14 +1123,42 @@ class BaseGpuCorrMM(GpuOp):
out_dim[1] = CudaNdarray_HOST_DIMS(bottom)[1];
out_dim[1] = CudaNdarray_HOST_DIMS(bottom)[1];
out_dim[2] = kH; // already inferred further above
out_dim[2] = kH; // already inferred further above
out_dim[3] = kW; // how convenient
out_dim[3] = kW; // how convenient
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
CudaNdarray_HOST_DIMS(bottom)[0], CudaNdarray_HOST_DIMS(bottom)[1],
CudaNdarray_HOST_DIMS(bottom)[2], CudaNdarray_HOST_DIMS(bottom)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
CudaNdarray_HOST_DIMS(top)[0], CudaNdarray_HOST_DIMS(top)[1],
CudaNdarray_HOST_DIMS(top)[2], CudaNdarray_HOST_DIMS(top)[3]);
%(fail)
s
}
break;
break;
case 2: // backprop wrt. inputs
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[2] = (dH != 1) ?
%(height)
s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[2] = (
%(height)
s != -1) ?
%(height)
s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ?
%(width)
s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[3] = (
%(width)
s != -1) ?
%(width)
s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weight shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
CudaNdarray_HOST_DIMS(weights)[0], CudaNdarray_HOST_DIMS(weights)[1],
CudaNdarray_HOST_DIMS(weights)[2], CudaNdarray_HOST_DIMS(weights)[3],
CudaNdarray_HOST_DIMS(top)[0], CudaNdarray_HOST_DIMS(top)[1],
CudaNdarray_HOST_DIMS(top)[2], CudaNdarray_HOST_DIMS(top)[3]);
%(fail)
s
}
break;
break;
default:
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2
\\
n");
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2
\\
n");
...
@@ -1274,15 +1316,15 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
...
@@ -1274,15 +1316,15 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
raise
TypeError
(
'img must be 4D tensor'
)
raise
TypeError
(
'img must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
s
elf
.
subsample
!=
(
1
,
1
)
or
self
.
border_mode
==
"half"
:
if
s
hape
is
None
:
if
s
hape
is
None
:
if
s
elf
.
subsample
!=
(
1
,
1
)
or
self
.
border_mode
==
"half"
:
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
' or border_mode == "half"'
)
' or border_mode == "half"'
)
height_width
=
[]
else
:
height_width
=
[
shape
[
0
],
shape
[
1
]]
height_width
=
[
shape
[
0
],
shape
[
1
]]
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
else
:
height_width
=
[]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
False
,
False
]
False
,
False
]
...
@@ -1343,10 +1385,12 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
...
@@ -1343,10 +1385,12 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
raise
TypeError
(
'kern must be 4D tensor'
)
raise
TypeError
(
'kern must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
self
.
subsample
!=
(
1
,
1
)
and
shape
is
None
:
if
shape
is
None
:
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
)
if
self
.
subsample
!=
(
1
,
1
):
height_width
=
[
shape
[
0
],
shape
[
1
]]
if
self
.
subsample
!=
(
1
,
1
)
else
[]
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
)
if
height_width
:
height_width
=
[]
else
:
height_width
=
[
shape
[
0
],
shape
[
1
]]
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
...
@@ -1469,7 +1513,7 @@ class BaseGpuCorr3dMM(GpuOp):
...
@@ -1469,7 +1513,7 @@ class BaseGpuCorr3dMM(GpuOp):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
# raise this whenever modifying any of the support_code_files
return
(
0
,
2
5
)
return
(
0
,
2
9
)
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
@@ -1510,26 +1554,26 @@ class BaseGpuCorr3dMM(GpuOp):
...
@@ -1510,26 +1554,26 @@ class BaseGpuCorr3dMM(GpuOp):
sub
sub
Dictionary of substitutions useable to help generating the C code.
Dictionary of substitutions useable to help generating the C code.
height
height
If self.subsample[0] != 1, a variable giving the height
Required if self.subsample[0] != 1, a variable giving the height of
of
the filters for direction="backprop weights" or the height of the
the filters for direction="backprop weights" or the height of the
input images for direction="backprop inputs".
input images for direction="backprop inputs".
If self.pad == 'half', a variable giving the height of the filters
Required if self.border_mode == 'half', a variable giving the height
for direction="backprop weights".
of the filters
for direction="backprop weights".
Ignored otherwise
.
Not required otherwise, but if a value is given this will be checked
.
width
width
If self.subsample[1] != 1, a variable giving the width
Required if self.subsample[1] != 1, a variable giving the width of
of
the filters for direction="backprop weights" or the width of the
the filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs".
input images for direction="backprop inputs".
If self.pad == 'half', a variable giving the width of the filters
Required if self.border_mode == 'half', a variable giving the width
for direction="backprop weights".
of the filters
for direction="backprop weights".
Ignored otherwise
.
Not required otherwise, but if a value is given this will be checked
.
depth
depth
If self.subsample[2] != 1, a variable giving the depth
Required if self.subsample[2] != 1, a variable giving the depth of
of
the filters for direction="backprop weights" or the depth of the
the filters for direction="backprop weights" or the depth of the
input images for direction="backprop inputs".
input images for direction="backprop inputs".
If self.pad == 'half', a variable giving the depth of the filters
Required if self.border_mode == 'half', a variable giving the depth
for direction="backprop weights".
of the filters
for direction="backprop weights".
Ignored otherwise
.
Not required otherwise, but if a value is given this will be checked
.
"""
"""
dH
,
dW
,
dD
=
self
.
subsample
dH
,
dW
,
dD
=
self
.
subsample
...
@@ -1558,24 +1602,24 @@ class BaseGpuCorr3dMM(GpuOp):
...
@@ -1558,24 +1602,24 @@ class BaseGpuCorr3dMM(GpuOp):
# When subsampling, we cannot unambiguously infer the height and width
# When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given.
# of bottom and weights from top, so we require them to be given.
# Similarly, when pad="half", we cannot infer the weight size.
# Similarly, when pad="half", we cannot infer the weight size.
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
height
:
if
not
height
:
raise
ValueError
(
"height must be given for backprop with vertical sampling or pad='half'"
)
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
else
:
else
:
height
=
'NULL'
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
raise
ValueError
(
"height must be given for backprop with vertical sampling or pad='half'"
)
if
not
width
:
height
=
'-1'
raise
ValueError
(
"width must be given for backprop with horizontal sampling or pad='half'"
)
if
width
:
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
else
:
else
:
width
=
'NULL'
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
if
((
direction
!=
0
)
and
(
dD
!=
1
))
or
((
direction
==
1
)
and
(
padD
==
-
1
)):
raise
ValueError
(
"width must be given for backprop with horizontal sampling or pad='half'"
)
if
not
depth
:
width
=
'-1'
raise
ValueError
(
"depth must be given for backprop with horizontal sampling or pad='half'"
)
if
depth
:
depth
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
depth
depth
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
depth
else
:
else
:
depth
=
'NULL'
if
((
direction
!=
0
)
and
(
dD
!=
1
))
or
((
direction
==
1
)
and
(
padD
==
-
1
)):
raise
ValueError
(
"depth must be given for backprop with horizontal sampling or pad='half'"
)
depth
=
'-1'
sub
=
sub
.
copy
()
sub
=
sub
.
copy
()
sub
.
update
(
locals
())
sub
.
update
(
locals
())
...
@@ -1601,7 +1645,7 @@ class BaseGpuCorr3dMM(GpuOp):
...
@@ -1601,7 +1645,7 @@ class BaseGpuCorr3dMM(GpuOp):
// Obtain or infer kernel width and height
// Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding)
// (we need to know it early to be able to handle auto-padding)
int kH, kW, kD;
int kH, kW, kD
, dil_kH, dil_kW, dil_kD
;
if (direction != 1)
if (direction != 1)
{
{
// weight is an input variable, we can just read its shape
// weight is an input variable, we can just read its shape
...
@@ -1611,9 +1655,9 @@ class BaseGpuCorr3dMM(GpuOp):
...
@@ -1611,9 +1655,9 @@ class BaseGpuCorr3dMM(GpuOp):
}
}
else
else
{
{
if (
(dH != 1) || (padH == -1)
)
if (
%(height)
s != -1
)
{
{
//
vertical subsampling or half padding, kernel height is specified
//
kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
kH =
%(height)
s;
}
}
else if (padH == -2)
else if (padH == -2)
...
@@ -1626,7 +1670,7 @@ class BaseGpuCorr3dMM(GpuOp):
...
@@ -1626,7 +1670,7 @@ class BaseGpuCorr3dMM(GpuOp):
// explicit padding, we can infer the kernel height
// explicit padding, we can infer the kernel height
kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ;
kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ;
}
}
if (
(dW != 1) || (padW == -1)
)
if (
%(width)
s != -1
)
{
{
kW =
%(width)
s;
kW =
%(width)
s;
}
}
...
@@ -1638,7 +1682,7 @@ class BaseGpuCorr3dMM(GpuOp):
...
@@ -1638,7 +1682,7 @@ class BaseGpuCorr3dMM(GpuOp):
{
{
kW = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
kW = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
}
if (
(dD != 1) || (padD == -1)
)
if (
%(depth)
s != -1
)
{
{
kD =
%(depth)
s;
kD =
%(depth)
s;
}
}
...
@@ -1653,9 +1697,9 @@ class BaseGpuCorr3dMM(GpuOp):
...
@@ -1653,9 +1697,9 @@ class BaseGpuCorr3dMM(GpuOp):
}
}
// Implicit dilated kernel size
// Implicit dilated kernel size
int
dil_kH = (kH - 1) * dilH + 1;
dil_kH = (kH - 1) * dilH + 1;
int
dil_kW = (kW - 1) * dilW + 1;
dil_kW = (kW - 1) * dilW + 1;
int
dil_kD = (kD - 1) * dilD + 1;
dil_kD = (kD - 1) * dilD + 1;
// Auto-padding if requested
// Auto-padding if requested
if (padH == -1)
if (padH == -1)
...
@@ -1707,6 +1751,22 @@ class BaseGpuCorr3dMM(GpuOp):
...
@@ -1707,6 +1751,22 @@ class BaseGpuCorr3dMM(GpuOp):
out_dim[2] = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - ((CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1;
out_dim[2] = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - ((CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1;
out_dim[3] = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - ((CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1;
out_dim[3] = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - ((CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1;
out_dim[4] = (CudaNdarray_HOST_DIMS(bottom)[4] + 2*padD - ((CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1;
out_dim[4] = (CudaNdarray_HOST_DIMS(bottom)[4] + 2*padD - ((CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorr3dMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
CudaNdarray_HOST_DIMS(bottom)[0], CudaNdarray_HOST_DIMS(bottom)[1],
CudaNdarray_HOST_DIMS(bottom)[2], CudaNdarray_HOST_DIMS(bottom)[3],
CudaNdarray_HOST_DIMS(bottom)[4],
CudaNdarray_HOST_DIMS(weights)[0], CudaNdarray_HOST_DIMS(weights)[1],
CudaNdarray_HOST_DIMS(weights)[2], CudaNdarray_HOST_DIMS(weights)[3],
CudaNdarray_HOST_DIMS(weights)[4],
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4]);
%(fail)
s
}
break;
break;
case 1: // backprop wrt. weights
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width, depth)
// output is weights: (num_filters, num_channels, height, width, depth)
...
@@ -1716,23 +1776,53 @@ class BaseGpuCorr3dMM(GpuOp):
...
@@ -1716,23 +1776,53 @@ class BaseGpuCorr3dMM(GpuOp):
out_dim[2] = kH; // already inferred further above
out_dim[2] = kH; // already inferred further above
out_dim[3] = kW; // how convenient
out_dim[3] = kW; // how convenient
out_dim[4] = kD;
out_dim[4] = kD;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorr3dMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
CudaNdarray_HOST_DIMS(bottom)[0], CudaNdarray_HOST_DIMS(bottom)[1],
CudaNdarray_HOST_DIMS(bottom)[2], CudaNdarray_HOST_DIMS(bottom)[3],
CudaNdarray_HOST_DIMS(bottom)[4],
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4],
CudaNdarray_HOST_DIMS(top)[0], CudaNdarray_HOST_DIMS(top)[1],
CudaNdarray_HOST_DIMS(top)[2], CudaNdarray_HOST_DIMS(top)[3],
CudaNdarray_HOST_DIMS(top)[4]);
%(fail)
s
}
break;
break;
case 2: // backprop wrt. inputs
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width, depth)
// output is bottom: (batchsize, num_channels, height, width, depth)
// height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
// height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[2] = (dH != 1) ?
%(height)
s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[2] = (
%(height)
s != -1) ?
%(height)
s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ?
%(width)
s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[3] = (
%(width)
s != -1) ?
%(width)
s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (dD != 1) ?
%(depth)
s : (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD + (CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
out_dim[4] = (
%(depth)
s != -1) ?
%(depth)
s : (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD + (CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorr3dMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4],
CudaNdarray_HOST_DIMS(weights)[0], CudaNdarray_HOST_DIMS(weights)[1],
CudaNdarray_HOST_DIMS(weights)[2], CudaNdarray_HOST_DIMS(weights)[3],
CudaNdarray_HOST_DIMS(weights)[4],
CudaNdarray_HOST_DIMS(top)[0], CudaNdarray_HOST_DIMS(top)[1],
CudaNdarray_HOST_DIMS(top)[2], CudaNdarray_HOST_DIMS(top)[3],
CudaNdarray_HOST_DIMS(top)[4]);
%(fail)
s
}
break;
break;
default:
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2
\\
n");
PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2
\\
n");
%(fail)
s
%(fail)
s
}
}
// Prepare output array
// Prepare output array
if (!(
%(out)
s
if (!(
%(out)
s
&&
%(out)
s->nd == 5
&&
%(out)
s->nd == 5
...
@@ -1876,16 +1966,16 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
...
@@ -1876,16 +1966,16 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
raise
TypeError
(
'img must be 5D tensor'
)
raise
TypeError
(
'img must be 5D tensor'
)
if
topgrad
.
type
.
ndim
!=
5
:
if
topgrad
.
type
.
ndim
!=
5
:
raise
TypeError
(
'topgrad must be 5D tensor'
)
raise
TypeError
(
'topgrad must be 5D tensor'
)
if
s
elf
.
subsample
!=
(
1
,
1
,
1
)
or
self
.
border_mode
==
"half"
:
if
s
hape
is
None
:
if
s
hape
is
None
:
if
s
elf
.
subsample
!=
(
1
,
1
,
1
)
or
self
.
border_mode
==
"half"
:
raise
ValueError
(
'shape must be given if subsample != (1, 1, 1)'
raise
ValueError
(
'shape must be given if subsample != (1, 1, 1)'
' or border_mode == "half"'
)
' or border_mode == "half"'
)
height_width_depth
=
[]
else
:
height_width_depth
=
[
shape
[
0
],
shape
[
1
],
shape
[
2
]]
height_width_depth
=
[
shape
[
0
],
shape
[
1
],
shape
[
2
]]
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
assert
shape
[
2
]
.
ndim
==
0
assert
shape
[
2
]
.
ndim
==
0
else
:
height_width_depth
=
[]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
False
,
False
,
False
]
False
,
False
,
False
]
...
@@ -1943,10 +2033,12 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
...
@@ -1943,10 +2033,12 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
raise
TypeError
(
'kern must be 5D tensor'
)
raise
TypeError
(
'kern must be 5D tensor'
)
if
topgrad
.
type
.
ndim
!=
5
:
if
topgrad
.
type
.
ndim
!=
5
:
raise
TypeError
(
'topgrad must be 5D tensor'
)
raise
TypeError
(
'topgrad must be 5D tensor'
)
if
self
.
subsample
!=
(
1
,
1
,
1
)
and
shape
is
None
:
if
shape
is
None
:
raise
ValueError
(
'shape must be given if subsample != (1, 1, 1)'
)
if
self
.
subsample
!=
(
1
,
1
,
1
):
height_width_depth
=
[
shape
[
0
],
shape
[
1
],
shape
[
2
]]
if
self
.
subsample
!=
(
1
,
1
,
1
)
else
[]
raise
ValueError
(
'shape must be given if subsample != (1, 1, 1)'
)
if
height_width_depth
:
height_width_depth
=
[]
else
:
height_width_depth
=
[
shape
[
0
],
shape
[
1
],
shape
[
2
]]
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
assert
shape
[
2
]
.
ndim
==
0
assert
shape
[
2
]
.
ndim
==
0
...
...
theano/sandbox/cuda/corr3d_gemm.cu
浏览文件 @
c072d669
...
@@ -429,9 +429,17 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
...
@@ -429,9 +429,17 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kD = (kD - 1) * dilD + 1;
const int dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const int topHeight = int((bottomHeight + 2*padH - dil_kH) / dH) + 1;
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidth = int((bottomWidth + 2*padW - dil_kW) / dW) + 1;
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const int topDepth = int((bottomDepth + 2*padD - dil_kD) / dD) + 1;
const int topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const int topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
...
@@ -478,6 +486,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
...
@@ -478,6 +486,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
if (direction == 0)
if (direction == 0)
{ // forward pass
{ // forward pass
output = top;
output = top;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid correlation: im2col, then gemm
// valid correlation: im2col, then gemm
// Iterate over batch
// Iterate over batch
for (int n = 0; n < batchSize; n++)
for (int n = 0; n < batchSize; n++)
...
@@ -527,6 +548,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
...
@@ -527,6 +548,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
{
{
// backprop wrt. weights
// backprop wrt. weights
output = weight;
output = weight;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM grad wrt. weights could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid convolution: im2col, then gemm
// valid convolution: im2col, then gemm
// Iterate over batch
// Iterate over batch
for (int n = 0; n < batchSize; n++)
for (int n = 0; n < batchSize; n++)
...
@@ -578,6 +612,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
...
@@ -578,6 +612,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
{
{
// backprop wrt. inputs
// backprop wrt. inputs
output = bottom;
output = bottom;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM grad wrt. inputs could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// full convolution: gemm, then col2im3d
// full convolution: gemm, then col2im3d
// Iterate over batch
// Iterate over batch
for (int n = 0; n < batchSize; n++)
for (int n = 0; n < batchSize; n++)
...
...
theano/sandbox/cuda/corr_gemm.cu
浏览文件 @
c072d669
...
@@ -333,8 +333,15 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
...
@@ -333,8 +333,15 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth)
// top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
...
@@ -377,6 +384,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
...
@@ -377,6 +384,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
CudaNdarray *output;
CudaNdarray *output;
if (direction == 0) { // forward pass
if (direction == 0) { // forward pass
output = top;
output = top;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid correlation: im2col, then gemm
// valid correlation: im2col, then gemm
// Iterate over batch
// Iterate over batch
for (int n = 0; n < batchSize; n++) {
for (int n = 0; n < batchSize; n++) {
...
@@ -445,6 +465,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
...
@@ -445,6 +465,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
}
}
else if (direction == 1) { // backprop wrt. weights
else if (direction == 1) { // backprop wrt. weights
output = weight;
output = weight;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM grad wrt. weights could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid convolution: im2col, then gemm
// valid convolution: im2col, then gemm
// Iterate over batch
// Iterate over batch
for (int n = 0; n < batchSize; n++) {
for (int n = 0; n < batchSize; n++) {
...
@@ -513,6 +546,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
...
@@ -513,6 +546,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
}
}
else if (direction == 2) { // backprop wrt. inputs
else if (direction == 2) { // backprop wrt. inputs
output = bottom;
output = bottom;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM grad wrt. inputs could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// full convolution: gemm, then col2im
// full convolution: gemm, then col2im
// Iterate over batch
// Iterate over batch
for (int n = 0; n < batchSize; n++) {
for (int n = 0; n < batchSize; n++) {
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
c072d669
...
@@ -14,7 +14,8 @@ from theano.gof.type import CDataType
...
@@ -14,7 +14,8 @@ from theano.gof.type import CDataType
from
theano.compile
import
optdb
from
theano.compile
import
optdb
from
theano.compile.ops
import
shape_i
from
theano.compile.ops
import
shape_i
from
theano.tensor.nnet
import
LogSoftmax
,
SoftmaxGrad
from
theano.tensor.nnet
import
LogSoftmax
,
SoftmaxGrad
from
theano.tensor.nnet.abstract_conv
import
get_conv_output_shape
from
theano.tensor.nnet.abstract_conv
import
(
get_conv_output_shape
,
assert_conv_shape
)
from
theano.tensor.signal.pool
import
(
from
theano.tensor.signal.pool
import
(
Pool
,
MaxPoolGrad
,
AveragePoolGrad
)
Pool
,
MaxPoolGrad
,
AveragePoolGrad
)
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda.type
import
CudaNdarrayType
...
@@ -1132,10 +1133,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -1132,10 +1133,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns
=
kerns
[:,
:,
::
-
1
,
::
-
1
]
kerns
=
kerns
[:,
:,
::
-
1
,
::
-
1
]
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
shape2
=
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
out_shp
=
(
shape_i
(
kerns
,
1
,
fgraph
),
shape3
=
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
shape_i
(
img
,
1
,
fgraph
),
out
=
gpu_alloc_empty
(
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
,
shape_i
(
img
,
1
,
fgraph
),
shape2
,
shape3
)
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
gpu_alloc_empty
(
*
out_shp
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
'cross'
,
precision
=
precision
)(
img
.
shape
,
conv_mode
=
'cross'
,
precision
=
precision
)(
img
.
shape
,
out
.
shape
)
out
.
shape
)
...
@@ -1149,10 +1152,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -1149,10 +1152,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
img
=
gpu_contiguous
(
img
)
img
=
gpu_contiguous
(
img
)
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
shape2
=
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
out_shp
=
(
shape_i
(
img
,
0
,
fgraph
),
shape3
=
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
shape_i
(
kerns
,
1
,
fgraph
),
out
=
gpu_alloc_empty
(
shape_i
(
img
,
0
,
fgraph
),
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
,
shape_i
(
kerns
,
1
,
fgraph
),
shape2
,
shape3
)
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
gpu_alloc_empty
(
*
out_shp
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
conv_mode
,
precision
=
precision
)(
out
.
shape
,
conv_mode
=
conv_mode
,
precision
=
precision
)(
out
.
shape
,
kerns
.
shape
)
kerns
.
shape
)
...
@@ -1170,6 +1175,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -1170,6 +1175,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
out_shp
=
GpuDnnConv
.
get_out_shape
(
img
.
shape
,
kerns
.
shape
,
out_shp
=
GpuDnnConv
.
get_out_shape
(
img
.
shape
,
kerns
.
shape
,
desc_op
.
border_mode
,
desc_op
.
border_mode
,
desc_op
.
subsample
)
desc_op
.
subsample
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
gpu_alloc_empty
(
*
out_shp
)
out
=
gpu_alloc_empty
(
*
out_shp
)
return
GpuDnnConv
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
return
GpuDnnConv
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
...
@@ -1248,11 +1254,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
...
@@ -1248,11 +1254,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns
=
kerns
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
kerns
=
kerns
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
))
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
))
shape2
=
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
out_shp
=
(
shape_i
(
kerns
,
1
,
fgraph
),
shape3
=
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
shape_i
(
img
,
1
,
fgraph
),
shape4
=
shape_i
(
img
,
4
,
fgraph
)
-
shape_i
(
kerns
,
4
,
fgraph
)
+
1
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
,
out
=
gpu_alloc_empty
(
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
,
shape_i
(
img
,
1
,
fgraph
),
shape2
,
shape3
,
shape4
)
shape_i
(
img
,
4
,
fgraph
)
-
shape_i
(
kerns
,
4
,
fgraph
)
+
1
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
gpu_alloc_empty
(
*
out_shp
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
conv_mode
=
'cross'
,
precision
=
precision
)(
img
.
shape
,
conv_mode
=
'cross'
,
precision
=
precision
)(
img
.
shape
,
out
.
shape
)
out
.
shape
)
...
@@ -1271,6 +1279,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
...
@@ -1271,6 +1279,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
out_shp
=
GpuDnnConv3d
.
get_out_shape
(
img
.
shape
,
kerns
.
shape
,
out_shp
=
GpuDnnConv3d
.
get_out_shape
(
img
.
shape
,
kerns
.
shape
,
desc_op
.
border_mode
,
desc_op
.
border_mode
,
desc_op
.
subsample
)
desc_op
.
subsample
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
gpu_alloc_empty
(
*
out_shp
)
out
=
gpu_alloc_empty
(
*
out_shp
)
return
GpuDnnConv3d
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
return
GpuDnnConv3d
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
...
...
theano/sandbox/cuda/dnn_fwd.c
浏览文件 @
c072d669
...
@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return
1
;
return
1
;
}
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_filterNd
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
int
nb_dim
=
CudaNdarray_NDIM
(
input
);
int
nb_dim
=
CudaNdarray_NDIM
(
input
);
#ifdef CONV_INPLACE
#ifdef CONV_INPLACE
...
@@ -30,8 +25,24 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -30,8 +25,24 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return
1
;
return
1
;
#endif
#endif
if
(
c_set_tensorNd
(
*
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
if
(
CudaNdarray_DIMS
(
input
)[
0
]
==
0
||
CudaNdarray_DIMS
(
kerns
)[
0
]
==
0
||
CudaNdarray_DIMS
(
kerns
)[
1
]
==
0
)
{
return
1
;
cudaError_t
err2
=
cudaMemset
((
*
output
)
->
devdata
,
0
,
CudaNdarray_SIZE
(
*
output
)
*
sizeof
(
real
));
if
(
err2
!=
cudaSuccess
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv could not fill the output with zeros: %s"
,
cudaGetErrorString
(
err2
));
return
1
;
}
return
0
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_filterNd
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
*
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
{
{
size_t
worksize
;
size_t
worksize
;
...
...
theano/sandbox/cuda/dnn_gi.c
浏览文件 @
c072d669
...
@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
...
@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return
1
;
return
1
;
}
}
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
if
(
c_set_filterNd
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
int
nb_dim
=
CudaNdarray_NDIM
(
output
);
int
nb_dim
=
CudaNdarray_NDIM
(
output
);
#ifdef CONV_INPLACE
#ifdef CONV_INPLACE
...
@@ -30,9 +25,64 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
...
@@ -30,9 +25,64 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return
1
;
return
1
;
#endif
#endif
if
(
CudaNdarray_DIMS
(
im
)[
0
]
==
0
||
CudaNdarray_DIMS
(
kerns
)[
0
]
==
0
||
CudaNdarray_DIMS
(
kerns
)[
1
]
==
0
)
{
cudaError_t
err2
=
cudaMemset
((
*
input
)
->
devdata
,
0
,
CudaNdarray_SIZE
(
*
input
)
*
sizeof
(
real
));
if
(
err2
!=
cudaSuccess
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv grad wrt. inputs could not fill the output with zeros: %s"
,
cudaGetErrorString
(
err2
));
return
1
;
}
return
0
;
}
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
if
(
c_set_filterNd
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
*
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
if
(
c_set_tensorNd
(
*
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
return
1
;
int
expected_output_dims
[
5
]
=
{
0
};
err
=
cudnnGetConvolutionNdForwardOutputDim
(
desc
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
nb_dim
,
expected_output_dims
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error computing convolution output dim: %s"
,
cudnnGetErrorString
(
err
));
return
1
;
}
if
(
nb_dim
==
4
)
{
if
((
CudaNdarray_HOST_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ld"
,
(
long
int
)
expected_output_dims
[
0
],
(
long
int
)
expected_output_dims
[
1
],
(
long
int
)
expected_output_dims
[
2
],
(
long
int
)
expected_output_dims
[
3
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
0
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
1
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
2
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
3
]);
return
1
;
}
}
else
if
(
nb_dim
==
5
)
{
if
((
CudaNdarray_HOST_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
4
]
!=
expected_output_dims
[
4
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld"
,
(
long
int
)
expected_output_dims
[
0
],
(
long
int
)
expected_output_dims
[
1
],
(
long
int
)
expected_output_dims
[
2
],
(
long
int
)
expected_output_dims
[
3
],
(
long
int
)
expected_output_dims
[
4
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
0
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
1
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
2
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
3
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
4
]);
return
1
;
}
}
{
{
size_t
worksize
;
size_t
worksize
;
void
*
workspace
;
void
*
workspace
;
...
...
theano/sandbox/cuda/dnn_gw.c
浏览文件 @
c072d669
...
@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
...
@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return
1
;
return
1
;
}
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
int
nb_dim
=
CudaNdarray_NDIM
(
output
);
int
nb_dim
=
CudaNdarray_NDIM
(
output
);
#ifdef CONV_INPLACE
#ifdef CONV_INPLACE
...
@@ -30,9 +25,64 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
...
@@ -30,9 +25,64 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return
1
;
return
1
;
#endif
#endif
if
(
CudaNdarray_DIMS
(
input
)[
0
]
==
0
||
CudaNdarray_DIMS
(
km
)[
0
]
==
0
||
CudaNdarray_DIMS
(
km
)[
1
]
==
0
)
{
cudaError_t
err2
=
cudaMemset
((
*
kerns
)
->
devdata
,
0
,
CudaNdarray_SIZE
(
*
kerns
)
*
sizeof
(
real
));
if
(
err2
!=
cudaSuccess
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv grad wrt. weights could not fill the output with zeros: %s"
,
cudaGetErrorString
(
err2
));
return
1
;
}
return
0
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
if
(
c_set_filterNd
(
*
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filterNd
(
*
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
return
1
;
int
expected_output_dims
[
5
]
=
{
0
};
err
=
cudnnGetConvolutionNdForwardOutputDim
(
desc
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
nb_dim
,
expected_output_dims
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error computing convolution output dim: %s"
,
cudnnGetErrorString
(
err
));
return
1
;
}
if
(
nb_dim
==
4
)
{
if
((
CudaNdarray_HOST_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%dx%ld"
" but received gradient with shape %ldx%ldx%dx%ld"
,
(
long
int
)
expected_output_dims
[
0
],
(
long
int
)
expected_output_dims
[
1
],
(
long
int
)
expected_output_dims
[
2
],
(
long
int
)
expected_output_dims
[
3
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
0
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
1
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
2
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
3
]);
return
1
;
}
}
else
if
(
nb_dim
==
5
)
{
if
((
CudaNdarray_HOST_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
])
||
(
CudaNdarray_HOST_DIMS
(
output
)[
4
]
!=
expected_output_dims
[
4
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld"
,
(
long
int
)
expected_output_dims
[
0
],
(
long
int
)
expected_output_dims
[
1
],
(
long
int
)
expected_output_dims
[
2
],
(
long
int
)
expected_output_dims
[
3
],
(
long
int
)
expected_output_dims
[
4
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
0
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
1
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
2
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
3
],
(
long
int
)
CudaNdarray_HOST_DIMS
(
output
)[
4
]);
return
1
;
}
}
{
{
size_t
worksize
;
size_t
worksize
;
void
*
workspace
;
void
*
workspace
;
...
...
theano/sandbox/cuda/tests/test_abstractconv.py
浏览文件 @
c072d669
...
@@ -13,6 +13,7 @@ from theano.sandbox.cuda.blas import (
...
@@ -13,6 +13,7 @@ from theano.sandbox.cuda.blas import (
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
,
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
,
GpuCorr3dMM
,
GpuCorr3dMM_gradWeights
,
GpuCorr3dMM_gradInputs
)
GpuCorr3dMM
,
GpuCorr3dMM_gradWeights
,
GpuCorr3dMM_gradInputs
)
from
nose.plugins.skip
import
SkipTest
from
nose.plugins.skip
import
SkipTest
from
nose.tools
import
assert_raises
import
theano.sandbox.cuda
as
cuda
import
theano.sandbox.cuda
as
cuda
if
not
cuda
.
cuda_available
:
if
not
cuda
.
cuda_available
:
...
@@ -57,6 +58,31 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
...
@@ -57,6 +58,31 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
,
filter_dilation
=
fd
)
filter_dilation
=
fd
)
def
tcase_gi
(
self
,
i
,
f
,
o
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
),
expect_error
=
False
):
if
fd
!=
(
1
,
1
):
raise
SkipTest
(
"No dilation implementation for cuDNN ConvOp."
)
if
not
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
mode
=
mode_with_gpu
if
not
expect_error
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
,
filter_dilation
=
fd
)
else
:
assert_raises
((
RuntimeError
,
ValueError
),
self
.
run_gradinput
,
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
,
ref
=
None
,
filter_dilation
=
fd
)
class
TestDnnConv3d
(
test_abstract_conv
.
BaseTestConv3d
):
class
TestDnnConv3d
(
test_abstract_conv
.
BaseTestConv3d
):
@classmethod
@classmethod
...
@@ -91,6 +117,31 @@ class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
...
@@ -91,6 +117,31 @@ class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
filter_flip
=
flip
,
target_op
=
GpuDnnConv3dGradI
,
filter_flip
=
flip
,
target_op
=
GpuDnnConv3dGradI
,
filter_dilation
=
fd
)
filter_dilation
=
fd
)
def
tcase_gi
(
self
,
i
,
f
,
o
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
,
1
),
expect_error
=
False
):
if
fd
!=
(
1
,
1
,
1
):
raise
SkipTest
(
"No dilation implementation for cuDNN ConvOp."
)
if
not
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
mode
=
mode_with_gpu
if
not
expect_error
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
,
filter_dilation
=
fd
)
else
:
assert_raises
((
RuntimeError
,
ValueError
),
self
.
run_gradinput
,
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
,
ref
=
None
,
filter_dilation
=
fd
)
class
TestCorrMMConv2d
(
test_abstract_conv
.
BaseTestConv2d
):
class
TestCorrMMConv2d
(
test_abstract_conv
.
BaseTestConv2d
):
@classmethod
@classmethod
...
@@ -124,6 +175,28 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
...
@@ -124,6 +175,28 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
target_op
=
GpuCorrMM_gradInputs
,
target_op
=
GpuCorrMM_gradInputs
,
filter_dilation
=
fd
)
filter_dilation
=
fd
)
def
tcase_gi
(
self
,
i
,
f
,
o
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
),
expect_error
=
False
):
mode
=
self
.
mode
if
not
expect_error
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuCorrMM_gradInputs
,
filter_dilation
=
fd
)
else
:
assert_raises
(
ValueError
,
self
.
run_gradinput
,
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuCorrMM_gradInputs
,
ref
=
None
,
filter_dilation
=
fd
)
class
TestCorrMMConv3d
(
test_abstract_conv
.
BaseTestConv3d
):
class
TestCorrMMConv3d
(
test_abstract_conv
.
BaseTestConv3d
):
@classmethod
@classmethod
...
@@ -157,6 +230,28 @@ class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d):
...
@@ -157,6 +230,28 @@ class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d):
target_op
=
GpuCorr3dMM_gradInputs
,
target_op
=
GpuCorr3dMM_gradInputs
,
filter_dilation
=
fd
)
filter_dilation
=
fd
)
def
tcase_gi
(
self
,
i
,
f
,
o
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
,
1
),
expect_error
=
False
):
mode
=
self
.
mode
if
not
expect_error
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuCorr3dMM_gradInputs
,
filter_dilation
=
fd
)
else
:
assert_raises
(
ValueError
,
self
.
run_gradinput
,
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuCorr3dMM_gradInputs
,
ref
=
None
,
filter_dilation
=
fd
)
class
TestDnnConvTypes
(
test_abstract_conv
.
TestConvTypes
):
class
TestDnnConvTypes
(
test_abstract_conv
.
TestConvTypes
):
def
setUp
(
self
):
def
setUp
(
self
):
...
...
theano/sandbox/cuda/tests/test_dnn.py
浏览文件 @
c072d669
...
@@ -4,6 +4,7 @@ import os
...
@@ -4,6 +4,7 @@ import os
import
sys
import
sys
from
nose.plugins.skip
import
SkipTest
from
nose.plugins.skip
import
SkipTest
from
nose_parameterized
import
parameterized
from
itertools
import
chain
,
product
from
itertools
import
chain
,
product
import
six.moves.cPickle
as
pickle
import
six.moves.cPickle
as
pickle
from
six
import
StringIO
from
six
import
StringIO
...
@@ -16,6 +17,7 @@ import theano.tensor as T
...
@@ -16,6 +17,7 @@ import theano.tensor as T
import
theano.tests.unittest_tools
as
utt
import
theano.tests.unittest_tools
as
utt
from
theano.tensor.signal.pool
import
pool_2d
,
pool_3d
from
theano.tensor.signal.pool
import
pool_2d
,
pool_3d
from
theano.tensor.signal.pool
import
Pool
,
MaxPoolGrad
,
AveragePoolGrad
from
theano.tensor.signal.pool
import
Pool
,
MaxPoolGrad
,
AveragePoolGrad
from
theano.tensor.nnet.abstract_conv
import
get_conv_output_shape
import
theano.sandbox.cuda.dnn
as
dnn
import
theano.sandbox.cuda.dnn
as
dnn
from
theano.sandbox.cuda.basic_ops
import
GpuAllocEmpty
,
gpu_alloc_empty
from
theano.sandbox.cuda.basic_ops
import
GpuAllocEmpty
,
gpu_alloc_empty
from
theano.sandbox.cuda
import
float32_shared_constructor
as
shared
from
theano.sandbox.cuda
import
float32_shared_constructor
as
shared
...
@@ -979,98 +981,104 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -979,98 +981,104 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn
.
GpuDnnConv3d
dnn
.
GpuDnnConv3d
)
)
def
test_conv_gradw
(
self
):
def
_test_conv_gradw
(
self
,
img
,
topgrad
,
kerns
,
img_shape
,
kerns_shape
,
border_mode
,
conv_mode
,
subsample
):
if
not
dnn
.
dnn_available
():
if
not
dnn
.
dnn_available
():
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
img
=
T
.
ftensor4
(
'img'
)
kerns
=
T
.
ftensor4
(
'kerns'
)
topgrad_shape
=
get_conv_output_shape
(
img_shape
,
kerns_shape
,
out
=
T
.
ftensor4
(
'out'
)
border_mode
,
subsample
)
img_val
=
numpy
.
asarray
(
img_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
5
,
6
,
8
),
numpy
.
random
.
rand
(
*
img_shape
),
dtype
=
'float32'
dtype
=
theano
.
config
.
floatX
)
)
kern
_vals
=
numpy
.
asarray
(
topgrad
_vals
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
1
,
5
,
6
),
numpy
.
random
.
rand
(
*
topgrad_shape
),
dtype
=
'float32'
dtype
=
theano
.
config
.
floatX
)
)
for
params
in
product
(
kerns_vals
=
numpy
.
zeros
(
kerns_shape
,
dtype
=
theano
.
config
.
floatX
)
[
'valid'
,
'full'
,
'half'
],
kerns_shape
=
theano
.
shared
(
numpy
.
asarray
(
kerns_shape
))
[(
1
,
1
)],
# strides besides (1, 1)
topgrad_shape
=
theano
.
shared
(
numpy
.
asarray
(
topgrad_shape
))
[
'conv'
,
'cross'
]
desc
=
dnn
.
GpuDnnConvDesc
(
):
border_mode
=
border_mode
,
temp_img
=
img
.
dimshuffle
(
1
,
0
,
2
,
3
)
subsample
=
subsample
,
temp_kerns
=
kerns
conv_mode
=
conv_mode
if
params
[
2
]
==
'conv'
:
)(
topgrad_shape
,
kerns_shape
)
temp_kerns
=
temp_kerns
[:,
:,
::
-
1
,
::
-
1
]
conv_grad_w
=
dnn
.
GpuDnnConvGradW
()(
temp_kerns
=
temp_kerns
.
dimshuffle
(
1
,
0
,
2
,
3
)
img
,
shape
=
(
topgrad
,
kern_vals
.
shape
[
1
],
img_val
.
shape
[
1
],
kerns
,
img_val
.
shape
[
2
]
-
kern_vals
.
shape
[
2
]
+
1
,
desc
,
img_val
.
shape
[
3
]
-
kern_vals
.
shape
[
3
]
+
1
)
)
self
.
_compile_and_check
(
out_vals
=
numpy
.
zeros
(
shape
,
dtype
=
'float32'
)
[
img
,
topgrad
,
kerns
],
desc
=
dnn
.
GpuDnnConvDesc
(
[
conv_grad_w
],
border_mode
=
params
[
0
],
[
img_val
,
topgrad_vals
,
kerns_vals
],
subsample
=
params
[
1
],
dnn
.
GpuDnnConvGradW
conv_mode
=
params
[
2
]
)
)(
temp_img
.
shape
,
out
.
shape
)
conv_grad_w
=
dnn
.
GpuDnnConvGradW
()(
border_modes
=
[
'valid'
,
'full'
,
'half'
]
temp_img
,
conv_modes
=
[
'conv'
,
'cross'
]
temp_kerns
,
out
,
desc
,
)
self
.
_compile_and_check
(
[
temp_img
,
temp_kerns
,
out
],
[
conv_grad_w
],
[
img_val
,
kern_vals
,
out_vals
],
dnn
.
GpuDnnConvGradW
)
def
test_conv3d_gradw
(
self
):
@parameterized.expand
(
product
(
border_modes
,
conv_modes
),
utt
.
custom_name_func
)
def
test_conv_gradw
(
self
,
border_mode
,
conv_mode
):
self
.
_test_conv_gradw
(
T
.
tensor4
(
'img'
),
T
.
tensor4
(
'topgrad'
),
T
.
tensor4
(
'kerns'
),
(
5
,
2
,
6
,
13
),
(
1
,
2
,
3
,
7
),
border_mode
,
conv_mode
,
(
1
,
1
))
def
_test_conv3d_gradw
(
self
,
img
,
topgrad
,
kerns
,
img_shape
,
kerns_shape
,
border_mode
,
conv_mode
,
subsample
):
if
not
(
cuda
.
dnn
.
dnn_available
()
and
dnn
.
version
()
>=
(
2000
,
2000
)):
if
not
(
cuda
.
dnn
.
dnn_available
()
and
dnn
.
version
()
>=
(
2000
,
2000
)):
raise
SkipTest
(
'"cuDNN 3D convolution requires cuDNN v2'
)
raise
SkipTest
(
'"cuDNN 3D convolution requires cuDNN v2'
)
img
=
T
.
ftensor5
(
'img'
)
kerns
=
T
.
ftensor5
(
'kerns'
)
topgrad_shape
=
get_conv_output_shape
(
img_shape
,
kerns_shape
,
out
=
T
.
ftensor5
(
'out'
)
border_mode
,
subsample
)
img_val
=
numpy
.
asarray
(
img_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
9
,
2
,
4
,
8
,
13
),
numpy
.
random
.
rand
(
*
img_shape
),
dtype
=
'float32'
dtype
=
theano
.
config
.
floatX
)
)
kern
_vals
=
numpy
.
asarray
(
topgrad
_vals
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
11
,
2
,
3
,
1
,
4
),
numpy
.
random
.
rand
(
*
topgrad_shape
),
dtype
=
'float32'
dtype
=
theano
.
config
.
floatX
)
)
for
params
in
product
(
kerns_vals
=
numpy
.
zeros
(
kerns_shape
,
dtype
=
theano
.
config
.
floatX
)
[
'valid'
,
'full'
,
'half'
],
kerns_shape
=
theano
.
shared
(
numpy
.
asarray
(
kerns_shape
))
[(
1
,
1
,
1
),
(
2
,
2
,
2
)],
topgrad_shape
=
theano
.
shared
(
numpy
.
asarray
(
topgrad_shape
))
[
'conv'
,
'cross'
]
desc
=
dnn
.
GpuDnnConvDesc
(
):
border_mode
=
border_mode
,
out_vals
=
numpy
.
zeros
(
subsample
=
subsample
,
dnn
.
GpuDnnConv3d
.
get_out_shape
(
img_val
.
shape
,
kern_vals
.
shape
,
conv_mode
=
conv_mode
border_mode
=
params
[
0
],
)(
topgrad_shape
,
kerns_shape
)
subsample
=
params
[
1
]),
conv_grad_w
=
dnn
.
GpuDnnConv3dGradW
()(
dtype
=
'float32'
)
img
,
topgrad
,
kerns
,
desc
,
)
self
.
_compile_and_check
(
[
img
,
topgrad
,
kerns
],
[
conv_grad_w
],
[
img_val
,
topgrad_vals
,
kerns_vals
],
dnn
.
GpuDnnConv3dGradW
)
desc
=
dnn
.
GpuDnnConvDesc
(
@parameterized.expand
(
product
(
border_modes
,
conv_modes
),
utt
.
custom_name_func
)
border_mode
=
params
[
0
],
def
test_conv3d_gradw
(
self
,
border_mode
,
conv_mode
):
subsample
=
params
[
1
],
self
.
_test_conv3d_gradw
(
T
.
tensor5
(
'img'
),
conv_mode
=
params
[
2
]
T
.
tensor5
(
'topgrad'
),
)(
img
.
shape
,
out
.
shape
)
T
.
tensor5
(
'kerns'
),
conv_grad_w
=
dnn
.
GpuDnnConv3dGradW
()(
(
5
,
2
,
6
,
13
,
21
),
img
,
(
1
,
2
,
3
,
7
,
9
),
out
,
border_mode
,
kerns
,
conv_mode
,
desc
,
(
1
,
1
,
1
))
)
self
.
_compile_and_check
(
[
img
,
out
,
kerns
],
[
conv_grad_w
],
[
img_val
,
out_vals
,
kern_vals
],
dnn
.
GpuDnnConv3dGradW
)
def
test_conv_gradi
(
self
):
def
test_conv_gradi
(
self
):
if
not
dnn
.
dnn_available
():
if
not
dnn
.
dnn_available
():
...
...
theano/tensor/nnet/abstract_conv.py
浏览文件 @
c072d669
...
@@ -11,6 +11,7 @@ import theano
...
@@ -11,6 +11,7 @@ import theano
from
theano.tensor
import
as_tensor_variable
,
patternbroadcast
from
theano.tensor
import
as_tensor_variable
,
patternbroadcast
from
theano.tensor
import
get_scalar_constant_value
,
NotScalarConstantError
from
theano.tensor
import
get_scalar_constant_value
,
NotScalarConstantError
from
theano.tensor.opt
import
Assert
from
theano.gof
import
Apply
,
Op
from
theano.gof
import
Apply
,
Op
from
six.moves
import
xrange
from
six.moves
import
xrange
...
@@ -51,11 +52,11 @@ def get_conv_output_shape(image_shape, kernel_shape,
...
@@ -51,11 +52,11 @@ def get_conv_output_shape(image_shape, kernel_shape,
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
If it is a tuple, its two (or three) elements respectively correspond
If it is a tuple, its two (or three) elements respectively correspond
to the padding on height and width (and possibly depth) axis.
to the padding on height and width (and possibly depth) axis.
subsample: tuple of int (symbolic or numeric). Its or three elements
subsample: tuple of int (symbolic or numeric). Its
two
or three elements
espectively correspond to the subsampling on height and width (and
espectively correspond to the subsampling on height and width (and
possibly depth) axis.
possibly depth) axis.
filter_dilation: tuple of int (symbolic or numeric). Its two
elements
filter_dilation: tuple of int (symbolic or numeric). Its two
or three
correspond respectively to the dilation on height and width axis.
elements
correspond respectively to the dilation on height and width axis.
Returns
Returns
-------
-------
...
@@ -137,6 +138,374 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
...
@@ -137,6 +138,374 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
return
out_shp
return
out_shp
def
get_conv_gradweights_shape
(
image_shape
,
top_shape
,
border_mode
,
subsample
,
filter_dilation
=
None
):
"""
This function tries to compute the kernel shape of convolution gradWeights.
The weights shape can only be computed exactly when subsample is 1 and
border_mode is not 'half'. If subsample is not 1 or border_mode is 'half',
this function will return None.
Parameters
----------
image_shape: tuple of int corresponding to the input image shape. Its
four (or five) elements must correspond respectively to: batch size,
number of output channels, height and width of the image. None where
undefined.
top_shape: tuple of int (symbolic or numeric) corresponding to the top
image shape. Its four (or five) element must correspond respectively
to: batch size, number of output channels, height and width (and
possibly depth) of the image. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
If it is a tuple, its two (or three) elements respectively correspond
to the padding on height and width (and possibly depth) axis.
subsample: tuple of int (symbolic or numeric). Its two or three elements
respectively correspond to the subsampling on height and width (and
possibly depth) axis.
filter_dilation: tuple of int (symbolic or numeric). Its two or three
elements correspond respectively to the dilation on height and
width axis.
Returns
-------
kernel_shape: tuple of int (symbolic or numeric) corresponding to the
kernel shape. Its four (or five) elements correspond respectively
to: number of output channels, number of input channels, height and
width (and possibly depth) of the kernel. None where undefined.
"""
nkern
,
imshp
=
image_shape
[
1
],
image_shape
[
2
:]
nchan
,
topshp
=
top_shape
[
1
],
top_shape
[
2
:]
if
filter_dilation
is
None
:
filter_dilation
=
numpy
.
ones
(
len
(
subsample
),
dtype
=
'int'
)
if
isinstance
(
border_mode
,
tuple
):
out_shp
=
tuple
(
get_conv_gradweights_shape_1axis
(
imshp
[
i
],
topshp
[
i
],
border_mode
[
i
],
subsample
[
i
],
filter_dilation
[
i
])
for
i
in
range
(
len
(
subsample
)))
else
:
out_shp
=
tuple
(
get_conv_gradweights_shape_1axis
(
imshp
[
i
],
topshp
[
i
],
border_mode
,
subsample
[
i
],
filter_dilation
[
i
])
for
i
in
range
(
len
(
subsample
)))
return
(
nchan
,
nkern
)
+
out_shp
def
get_conv_gradweights_shape_1axis
(
image_shape
,
top_shape
,
border_mode
,
subsample
,
dilation
):
"""
This function tries to compute the image shape of convolution gradWeights.
The weights shape can only be computed exactly when subsample is 1 and
border_mode is not 'half'. If subsample is not 1 or border_mode is 'half',
this function will return None.
Parameters
----------
image_shape: int or None. Corresponds to the input image shape on a
given axis. None if undefined.
top_shape: int or None. Corresponds to the top shape on a given axis.
None if undefined.
border_mode: string or int. If it is a string, it must be
'valid', 'half' or 'full'. If it is an integer, it must correspond to
the padding on the considered axis.
subsample: int. It must correspond to the subsampling on the
considered axis.
dilation: int. It must correspond to the dilation on the
considered axis.
Returns
-------
kernel_shape: int or None. Corresponds to the kernel shape on a given
axis. None if undefined.
"""
if
None
in
[
image_shape
,
top_shape
,
border_mode
,
subsample
,
dilation
]:
return
None
if
subsample
!=
1
or
border_mode
==
"half"
:
return
None
if
border_mode
==
"full"
:
kernel_shape
=
top_shape
-
image_shape
elif
border_mode
==
"valid"
:
kernel_shape
=
image_shape
-
top_shape
else
:
if
border_mode
<
0
:
raise
ValueError
(
"border_mode must be >= 0"
)
kernel_shape
=
(
image_shape
+
2
*
border_mode
-
top_shape
)
if
dilation
>
1
:
kernel_shape
=
kernel_shape
/
dilation
return
kernel_shape
+
1
def
get_conv_gradinputs_shape
(
kernel_shape
,
top_shape
,
border_mode
,
subsample
,
filter_dilation
=
None
):
"""
This function tries to compute the image shape of convolution gradInputs.
The image shape can only be computed exactly when subsample is 1.
If subsample for a dimension is not 1, this function will return None for
that dimension.
Parameters
----------
kernel_shape: tuple of int (symbolic or numeric) corresponding to the
kernel shape. Its four (or five) elements must correspond respectively
to: number of output channels, number of input channels, height and
width (and possibly depth) of the kernel. None where undefined.
top_shape: tuple of int (symbolic or numeric) corresponding to the top
image shape. Its four (or five) element must correspond respectively
to: batch size, number of output channels, height and width (and
possibly depth) of the image. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
If it is a tuple, its two (or three) elements respectively correspond
to the padding on height and width (and possibly depth) axis.
subsample: tuple of int (symbolic or numeric). Its two or three elements
respectively correspond to the subsampling on height and width (and
possibly depth) axis.
filter_dilation: tuple of int (symbolic or numeric). Its two or three
elements correspond respectively to the dilation on height and
width axis.
Returns
-------
image_shape: tuple of int corresponding to the input image shape. Its
four element must correspond respectively to: batch size, number of
output channels, height and width of the image. None where undefined.
"""
bsize
,
topshp
=
top_shape
[
0
],
top_shape
[
2
:]
nkern
,
kshp
=
kernel_shape
[
1
],
kernel_shape
[
2
:]
if
filter_dilation
is
None
:
filter_dilation
=
numpy
.
ones
(
len
(
subsample
),
dtype
=
'int'
)
if
isinstance
(
border_mode
,
tuple
):
out_shp
=
tuple
(
get_conv_gradinputs_shape_1axis
(
kshp
[
i
],
topshp
[
i
],
border_mode
[
i
],
subsample
[
i
],
filter_dilation
[
i
])
for
i
in
range
(
len
(
subsample
)))
else
:
out_shp
=
tuple
(
get_conv_gradinputs_shape_1axis
(
kshp
[
i
],
topshp
[
i
],
border_mode
,
subsample
[
i
],
filter_dilation
[
i
])
for
i
in
range
(
len
(
subsample
)))
return
(
bsize
,
nkern
)
+
out_shp
def
get_conv_gradinputs_shape_1axis
(
kernel_shape
,
top_shape
,
border_mode
,
subsample
,
dilation
):
"""
This function tries to compute the image shape of convolution gradInputs.
The image shape can only be computed exactly when subsample is 1.
If subsample is not 1, this function will return None.
Parameters
----------
kernel_shape: int or None. Corresponds to the kernel shape on a given
axis. None if undefined.
top_shape: int or None. Corresponds to the top shape on a given axis.
None if undefined.
border_mode: string or int. If it is a string, it must be
'valid', 'half' or 'full'. If it is an integer, it must correspond to
the padding on the considered axis.
subsample: int. It must correspond to the subsampling on the
considered axis.
dilation: int. It must correspond to the dilation on the
considered axis.
Returns
-------
image_shape: int or None. Corresponds to the input image shape on a
given axis. None if undefined.
"""
if
None
in
[
kernel_shape
,
top_shape
,
border_mode
,
subsample
,
dilation
]:
return
None
if
subsample
!=
1
:
return
None
# Implicit dilated kernel shape
dil_kernel_shape
=
(
kernel_shape
-
1
)
*
dilation
+
1
if
border_mode
==
"half"
:
pad
=
dil_kernel_shape
//
2
elif
border_mode
==
"full"
:
pad
=
dil_kernel_shape
-
1
elif
border_mode
==
"valid"
:
pad
=
0
else
:
pad
=
border_mode
if
pad
<
0
:
raise
ValueError
(
"border_mode must be >= 0"
)
# In case of symbolic shape, we want to build the smallest graph
# image_shape = (top_shape - 1) * s - 2 * pad + dil_kernel_shape + a
# where 0 <= a < subsample, but we have checked that subsample == 1
if
pad
==
0
:
image_shape
=
(
top_shape
+
dil_kernel_shape
-
1
)
else
:
image_shape
=
(
top_shape
-
2
*
pad
+
dil_kernel_shape
-
1
)
return
image_shape
def
check_conv_gradinputs_shape
(
image_shape
,
kernel_shape
,
output_shape
,
border_mode
,
subsample
,
filter_dilation
=
None
):
"""
This function checks if the given image shapes are consistent.
Parameters
----------
image_shape: tuple of int (symbolic or numeric) corresponding to the input
image shape. Its four (or five) element must correspond respectively
to: batch size, number of input channels, height and width (and
possibly depth) of the image. None where undefined.
kernel_shape: tuple of int (symbolic or numeric) corresponding to the
kernel shape. Its four (or five) elements must correspond respectively
to: number of output channels, number of input channels, height and
width (and possibly depth) of the kernel. None where undefined.
output_shape: tuple of int (symbolic or numeric) corresponding to the
output shape. Its four (or five) elements must correspond respectively
to: batch size, number of output channels, height and width
(and possibly depth) of the output. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
If it is a tuple, its two (or three) elements respectively correspond
to the padding on height and width (and possibly depth) axis.
subsample: tuple of int (symbolic or numeric). Its two or three elements
respectively correspond to the subsampling on height and width (and
possibly depth) axis.
filter_dilation: tuple of int (symbolic or numeric). Its two or three
elements correspond respectively to the dilation on height and
width axis.
Returns
-------
Returns False if a convolution with the given input shape, kernel shape
and parameters would not have produced the given output shape.
Returns True in all other cases: if the given output shape matches the
computed output shape, but also if the shape could not be checked because
because the shape contains symbolic values.
"""
image_shape
=
tuple
(
image_shape
)
kernel_shape
=
tuple
(
kernel_shape
)
output_shape
=
tuple
(
output_shape
)
if
len
(
image_shape
)
!=
len
(
kernel_shape
)
or
len
(
image_shape
)
!=
len
(
output_shape
):
return
False
if
len
(
image_shape
)
-
2
!=
len
(
subsample
):
return
False
if
filter_dilation
is
not
None
and
len
(
image_shape
)
-
2
!=
len
(
filter_dilation
):
return
False
# compute the predicted output shape
computed_output_shape
=
get_conv_output_shape
(
image_shape
,
kernel_shape
,
border_mode
,
subsample
,
filter_dilation
)
# check if the given output shape matches the computed shape
def
check_dim
(
given
,
computed
):
if
given
is
None
or
computed
is
None
:
return
True
try
:
given
=
get_scalar_constant_value
(
given
)
computed
=
get_scalar_constant_value
(
computed
)
return
int
(
given
)
==
int
(
computed
)
except
NotScalarConstantError
:
# no answer possible, accept for now
return
True
return
all
(
check_dim
(
given
,
computed
)
for
(
given
,
computed
)
in
zip
(
output_shape
,
computed_output_shape
))
def
assert_conv_shape
(
shape
):
"""This function adds Assert nodes that check if shape is a valid convolution shape.
The first two dimensions should be larger than or equal to zero. The convolution
dimensions should be larger than zero.
Parameters
----------
shape: tuple of int (symbolic or numeric) corresponding to the input, output or
kernel shape of a convolution. For input and output, the first elements should
should be the batch size and number of channels. For kernels, the first and
second elements should contain the number of input and output channels.
The remaining dimensions are the convolution dimensions.
Returns
-------
Returns a tuple similar to the given `shape`. For constant elements in `shape`,
the function checks the value and raises a `ValueError` if the dimension is invalid.
The elements that are not constant are wrapped in an `Assert` op that checks the
dimension at run time.
"""
out_shape
=
[]
for
i
,
n
in
enumerate
(
shape
):
try
:
const_n
=
get_scalar_constant_value
(
n
)
if
i
<
2
:
if
const_n
<
0
:
raise
ValueError
(
'The convolution would produce an invalid shape (dim[
%
d]:
%
d < 0).'
%
(
i
,
const_n
))
else
:
if
const_n
<=
0
:
raise
ValueError
(
'The convolution would produce an invalid shape (dim[
%
d]:
%
d <= 0).'
%
(
i
,
const_n
))
out_shape
.
append
(
n
)
except
NotScalarConstantError
:
if
i
<
2
:
assert_shp
=
Assert
(
'The convolution would produce an invalid shape (dim[
%
d] < 0).'
%
i
)
out_shape
.
append
(
assert_shp
(
n
,
theano
.
tensor
.
ge
(
n
,
0
)))
else
:
assert_shp
=
Assert
(
'The convolution would produce an invalid shape (dim[
%
d] <= 0).'
%
i
)
out_shape
.
append
(
assert_shp
(
n
,
theano
.
tensor
.
gt
(
n
,
0
)))
return
tuple
(
out_shape
)
def
assert_shape
(
x
,
expected_shape
,
msg
=
'Unexpected shape.'
):
"""Wraps `x` in an `Assert` to check its shape.
Parameters
----------
x : Tensor
x will be wrapped in an `Assert`.
expected_shape : tuple or list
The expected shape of `x`. The size of a dimension can be None,
which means it will not be checked.
msg : str
The error message of the `Assert`.
Returns
-------
Tensor
`x` wrapped in an `Assert`. At execution time, this will throw an
AssertionError if the shape of `x` does not match `expected_shape`.
If `expected_shape` is None or contains only Nones, the function
will return `x` directly.
"""
if
expected_shape
is
None
:
return
x
shape
=
x
.
shape
tests
=
[]
for
i
in
range
(
x
.
ndim
):
if
expected_shape
[
i
]
is
not
None
:
tests
.
append
(
theano
.
tensor
.
eq
(
shape
[
i
],
expected_shape
[
i
]))
if
tests
:
return
Assert
(
msg
)(
x
,
*
tests
)
else
:
return
x
def
conv2d
(
input
,
def
conv2d
(
input
,
filters
,
filters
,
input_shape
=
None
,
input_shape
=
None
,
...
@@ -782,7 +1151,7 @@ def conv3d_grad_wrt_weights(input,
...
@@ -782,7 +1151,7 @@ def conv3d_grad_wrt_weights(input,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
)
filter_dilation
=
filter_dilation
)
return
gradWeight_op
(
input
,
output_grad
,
filter_shape
[
:
-
3
])
return
gradWeight_op
(
input
,
output_grad
,
filter_shape
[
-
3
:
])
def
bilinear_kernel_2D
(
ratio
,
normalize
=
True
):
def
bilinear_kernel_2D
(
ratio
,
normalize
=
True
):
...
@@ -1209,6 +1578,13 @@ class AbstractConv(BaseAbstractConv):
...
@@ -1209,6 +1578,13 @@ class AbstractConv(BaseAbstractConv):
if
kern
.
type
.
ndim
!=
2
+
self
.
convdim
:
if
kern
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'kern must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
raise
TypeError
(
'kern must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
img
=
assert_shape
(
img
,
self
.
imshp
,
'AbstractConv shape mismatch: shape of '
'image does not match given imshp.'
)
kern
=
assert_shape
(
kern
,
self
.
kshp
,
'AbstractConv shape mismatch: shape of '
'filters does not match given kshp.'
)
broadcastable
=
[
img
.
broadcastable
[
0
],
broadcastable
=
[
img
.
broadcastable
[
0
],
kern
.
broadcastable
[
0
]]
+
([
False
]
*
self
.
convdim
)
kern
.
broadcastable
[
0
]]
+
([
False
]
*
self
.
convdim
)
output
=
img
.
type
.
clone
(
broadcastable
=
broadcastable
)()
output
=
img
.
type
.
clone
(
broadcastable
=
broadcastable
)()
...
@@ -1420,6 +1796,10 @@ class AbstractConv_gradWeights(BaseAbstractConv):
...
@@ -1420,6 +1796,10 @@ class AbstractConv_gradWeights(BaseAbstractConv):
if
topgrad
.
type
.
ndim
!=
2
+
self
.
convdim
:
if
topgrad
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'topgrad must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
raise
TypeError
(
'topgrad must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
img
=
assert_shape
(
img
,
self
.
imshp
,
'AbstractConv_gradWeights shape mismatch: shape of '
'image does not match given imshp.'
)
shape
=
as_tensor_variable
(
shape
)
shape
=
as_tensor_variable
(
shape
)
broadcastable
=
[
topgrad
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
broadcastable
[
1
],
img
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
img
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
...
@@ -1655,6 +2035,10 @@ class AbstractConv_gradInputs(BaseAbstractConv):
...
@@ -1655,6 +2035,10 @@ class AbstractConv_gradInputs(BaseAbstractConv):
if
topgrad
.
type
.
ndim
!=
2
+
self
.
convdim
:
if
topgrad
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'topgrad must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
raise
TypeError
(
'topgrad must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
kern
=
assert_shape
(
kern
,
self
.
kshp
,
'AbstractConv_gradInputs shape mismatch: shape of '
'filters does not match given kshp.'
)
shape
=
as_tensor_variable
(
shape
)
shape
=
as_tensor_variable
(
shape
)
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
kern
.
type
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
...
@@ -1675,6 +2059,21 @@ class AbstractConv_gradInputs(BaseAbstractConv):
...
@@ -1675,6 +2059,21 @@ class AbstractConv_gradInputs(BaseAbstractConv):
'"valid", "full", "half", an integer or a tuple of'
'"valid", "full", "half", an integer or a tuple of'
' integers'
.
format
(
mode
))
' integers'
.
format
(
mode
))
imshp
=
self
.
imshp
[:]
if
self
.
imshp
is
not
None
else
[
None
]
*
(
2
+
self
.
convdim
)
fallback_imshp
=
([
topgrad
.
shape
[
0
],
kern
.
shape
[
1
]]
+
[
shape
[
i
]
for
i
in
range
(
self
.
convdim
)])
imshp
=
[
fallback_imshp
[
i
]
if
imshp
[
i
]
is
None
else
imshp
[
i
]
for
i
in
range
(
2
+
self
.
convdim
)]
expected_topgrad_shape
=
get_conv_output_shape
(
imshp
,
kern
.
shape
,
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)
if
not
tuple
(
expected_topgrad_shape
)
==
tuple
(
topgrad
.
shape
):
raise
ValueError
(
'invalid input_shape for gradInputs: the given input_shape '
'would produce an output of shape {}, but the given topgrad '
'has shape {}'
.
format
(
tuple
(
expected_topgrad_shape
),
tuple
(
topgrad
.
shape
)))
dil_kernshp
=
tuple
((
kern
.
shape
[
i
+
2
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
dil_kernshp
=
tuple
((
kern
.
shape
[
i
+
2
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
for
i
in
range
(
self
.
convdim
))
for
i
in
range
(
self
.
convdim
))
pad
=
(
0
,)
*
self
.
convdim
pad
=
(
0
,)
*
self
.
convdim
...
...
theano/tensor/nnet/corr.py
浏览文件 @
c072d669
...
@@ -123,7 +123,7 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -123,7 +123,7 @@ class BaseCorrMM(gof.OpenMPOp):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
# raise this whenever modifying any of the support_code_files
return
(
1
,
self
.
openmp
,
blas_header_version
())
return
(
5
,
self
.
openmp
,
blas_header_version
())
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
@@ -234,17 +234,17 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -234,17 +234,17 @@ class BaseCorrMM(gof.OpenMPOp):
# When subsampling, we cannot unambiguously infer the height and width
# When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given.
# of bottom and weights from top, so we require them to be given.
# Similarly, when border_mode="half", we cannot infer the weight size.
# Similarly, when border_mode="half", we cannot infer the weight size.
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
height
:
if
not
height
:
raise
ValueError
(
"height must be given for backprop with vertical sampling or border_mode='half'"
)
height
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
height
height
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
height
else
:
else
:
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
raise
ValueError
(
"height must be given for backprop with vertical sampling or border_mode='half'"
)
height
=
'-1'
height
=
'-1'
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
if
width
:
if
not
width
:
raise
ValueError
(
"width must be given for backprop with horizontal sampling or border_mode='half'"
)
width
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
width
width
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
width
else
:
else
:
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
raise
ValueError
(
"width must be given for backprop with horizontal sampling or border_mode='half'"
)
width
=
'-1'
width
=
'-1'
sub
=
sub
.
copy
()
sub
=
sub
.
copy
()
sub
.
update
(
locals
())
sub
.
update
(
locals
())
...
@@ -268,15 +268,15 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -268,15 +268,15 @@ class BaseCorrMM(gof.OpenMPOp):
// Obtain or infer kernel width and height
// Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding)
// (we need to know it early to be able to handle auto-padding)
int kH, kW;
int kH, kW
, dil_kH, dil_kW
;
if (direction != 1) {
if (direction != 1) {
// weight is an input variable, we can just read its shape
// weight is an input variable, we can just read its shape
kH = PyArray_DIMS(weights)[2];
kH = PyArray_DIMS(weights)[2];
kW = PyArray_DIMS(weights)[3];
kW = PyArray_DIMS(weights)[3];
}
}
else {
else {
if (
(dH != 1) || (padH == -1)
) {
if (
%(height)
s != -1
) {
//
vertical subsampling or half padding, kernel height is specified
//
kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
kH =
%(height)
s;
}
}
else if (padH == -2) {
else if (padH == -2) {
...
@@ -287,7 +287,8 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -287,7 +287,8 @@ class BaseCorrMM(gof.OpenMPOp):
// explicit padding, we can infer the kernel height
// explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
}
}
if ((dW != 1) || (padW == -1)) {
if (
%(width)
s != -1) {
// kernel width is specified (perhaps horizontal subsampling or half padding)
kW =
%(width)
s;
kW =
%(width)
s;
}
}
else if (padW == -2) {
else if (padW == -2) {
...
@@ -299,8 +300,8 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -299,8 +300,8 @@ class BaseCorrMM(gof.OpenMPOp):
}
}
// Implicit dilated kernel size
// Implicit dilated kernel size
int
dil_kH = (kH - 1) * dilH + 1;
dil_kH = (kH - 1) * dilH + 1;
int
dil_kW = (kW - 1) * dilW + 1;
dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested
// Auto-padding if requested
if (padH == -1) { // vertical half padding
if (padH == -1) { // vertical half padding
...
@@ -334,6 +335,21 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -334,6 +335,21 @@ class BaseCorrMM(gof.OpenMPOp):
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1);
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3]);
%(fail)
s
}
break;
break;
case 1: // backprop wrt. weights
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// output is weights: (num_filters, num_channels, height, width)
...
@@ -342,14 +358,44 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -342,14 +358,44 @@ class BaseCorrMM(gof.OpenMPOp):
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1];
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[3] = (npy_intp)kW; // how convenient
out_dim[3] = (npy_intp)kW; // how convenient
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)
s
}
break;
break;
case 2: // backprop wrt. inputs
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[2] = (npy_intp)((
%(height)
s != -1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[3] = (npy_intp)((
%(width)
s != -1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)
s
}
break;
break;
default:
default:
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2
\\
n");
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2
\\
n");
...
@@ -491,13 +537,13 @@ class CorrMM_gradWeights(BaseCorrMM):
...
@@ -491,13 +537,13 @@ class CorrMM_gradWeights(BaseCorrMM):
raise
TypeError
(
'img must be 4D tensor'
)
raise
TypeError
(
'img must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
s
elf
.
subsample
!=
(
1
,
1
)
or
self
.
border_mode
==
"half"
:
if
s
hape
is
None
:
if
s
hape
is
None
:
if
s
elf
.
subsample
!=
(
1
,
1
)
or
self
.
border_mode
==
"half"
:
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
' or border_mode == "half"'
)
' or border_mode == "half"'
)
height_width
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
)]
else
:
height_width
=
[]
height_width
=
[]
else
:
height_width
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
)]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
False
,
False
]
False
,
False
]
...
@@ -588,9 +634,13 @@ class CorrMM_gradInputs(BaseCorrMM):
...
@@ -588,9 +634,13 @@ class CorrMM_gradInputs(BaseCorrMM):
raise
TypeError
(
'kern must be 4D tensor'
)
raise
TypeError
(
'kern must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
self
.
subsample
!=
(
1
,
1
)
and
shape
is
None
:
if
shape
is
None
:
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
)
if
self
.
subsample
!=
(
1
,
1
):
height_width
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
)]
if
self
.
subsample
!=
(
1
,
1
)
else
[]
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
)
height_width
=
[]
else
:
height_width
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
)]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
False
,
False
]
False
,
False
]
...
...
theano/tensor/nnet/corr3d.py
浏览文件 @
c072d669
...
@@ -123,7 +123,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
...
@@ -123,7 +123,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
# raise this whenever modifying any of the support_code_files
return
(
1
,
self
.
openmp
,
blas_header_version
())
return
(
5
,
self
.
openmp
,
blas_header_version
())
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
@@ -241,23 +241,23 @@ class BaseCorr3dMM(gof.OpenMPOp):
...
@@ -241,23 +241,23 @@ class BaseCorr3dMM(gof.OpenMPOp):
# When subsampling, we cannot unambiguously infer the height and width
# When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given.
# of bottom and weights from top, so we require them to be given.
# Similarly, when border_mode="half", we cannot infer the weight size.
# Similarly, when border_mode="half", we cannot infer the weight size.
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
height
:
if
not
height
:
raise
ValueError
(
"height must be given for backprop with vertical sampling or border_mode='half'"
)
height
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
height
height
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
height
else
:
else
:
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
raise
ValueError
(
"height must be given for backprop with vertical sampling or border_mode='half'"
)
height
=
'-1'
height
=
'-1'
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
if
width
:
if
not
width
:
raise
ValueError
(
"width must be given for backprop with horizontal sampling or border_mode='half'"
)
width
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
width
width
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
width
else
:
else
:
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
raise
ValueError
(
"width must be given for backprop with horizontal sampling or border_mode='half'"
)
width
=
'-1'
width
=
'-1'
if
((
direction
!=
0
)
and
(
dD
!=
1
))
or
((
direction
==
1
)
and
(
padD
==
-
1
)):
if
depth
:
if
not
depth
:
raise
ValueError
(
"depth must be given for backprop with depth sampling or border_mode='half'"
)
depth
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
depth
depth
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
depth
else
:
else
:
if
((
direction
!=
0
)
and
(
dD
!=
1
))
or
((
direction
==
1
)
and
(
padD
==
-
1
)):
raise
ValueError
(
"depth must be given for backprop with depth sampling or border_mode='half'"
)
depth
=
'-1'
depth
=
'-1'
sub
=
sub
.
copy
()
sub
=
sub
.
copy
()
sub
.
update
(
locals
())
sub
.
update
(
locals
())
...
@@ -284,7 +284,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
...
@@ -284,7 +284,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
// Obtain or infer kernel width, height and depth
// Obtain or infer kernel width, height and depth
// (we need to know it early to be able to handle auto-padding)
// (we need to know it early to be able to handle auto-padding)
int kH, kW, kD;
int kH, kW, kD
, dil_kH, dil_kW, dil_kD
;
if (direction != 1) {
if (direction != 1) {
// weight is an input variable, we can just read its shape
// weight is an input variable, we can just read its shape
kH = PyArray_DIMS(weights)[2];
kH = PyArray_DIMS(weights)[2];
...
@@ -292,8 +292,8 @@ class BaseCorr3dMM(gof.OpenMPOp):
...
@@ -292,8 +292,8 @@ class BaseCorr3dMM(gof.OpenMPOp):
kD = PyArray_DIMS(weights)[4];
kD = PyArray_DIMS(weights)[4];
}
}
else {
else {
if (
(dH != 1) || (padH == -1)
) {
if (
%(height)
s != -1
) {
//
vertical subsampling or half padding, kernel height is specified
//
kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
kH =
%(height)
s;
}
}
else if (padH == -2) {
else if (padH == -2) {
...
@@ -304,7 +304,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
...
@@ -304,7 +304,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
// explicit padding, we can infer the kernel height
// explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
}
}
if (
(dW != 1) || (padW == -1)
) {
if (
%(width)
s != -1
) {
kW =
%(width)
s;
kW =
%(width)
s;
}
}
else if (padW == -2) {
else if (padW == -2) {
...
@@ -313,7 +313,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
...
@@ -313,7 +313,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
else {
else {
kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
}
if (
(dD != 1) || (padD == -1)
) {
if (
%(depth)
s != -1
) {
kD =
%(depth)
s;
kD =
%(depth)
s;
}
}
else if (padD == -2) {
else if (padD == -2) {
...
@@ -325,9 +325,9 @@ class BaseCorr3dMM(gof.OpenMPOp):
...
@@ -325,9 +325,9 @@ class BaseCorr3dMM(gof.OpenMPOp):
}
}
// Implicit dilated kernel size
// Implicit dilated kernel size
int
dil_kH = (kH - 1) * dilH + 1;
dil_kH = (kH - 1) * dilH + 1;
int
dil_kW = (kW - 1) * dilW + 1;
dil_kW = (kW - 1) * dilW + 1;
int
dil_kD = (kD - 1) * dilD + 1;
dil_kD = (kD - 1) * dilD + 1;
// Auto-padding if requested
// Auto-padding if requested
if (padH == -1) { // vertical half padding
if (padH == -1) { // vertical half padding
...
@@ -372,6 +372,23 @@ class BaseCorr3dMM(gof.OpenMPOp):
...
@@ -372,6 +372,23 @@ class BaseCorr3dMM(gof.OpenMPOp):
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1);
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1);
out_dim[4] = (npy_intp)((PyArray_DIMS(bottom)[4] + 2*padD - ((PyArray_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1);
out_dim[4] = (npy_intp)((PyArray_DIMS(bottom)[4] + 2*padD - ((PyArray_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"Corr3dMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(bottom)[4],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(weights)[4],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3], (long int)out_dim[4]);
%(fail)
s
}
break;
break;
case 1: // backprop wrt. weights
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width, depth)
// output is weights: (num_filters, num_channels, height, width, depth)
...
@@ -381,15 +398,49 @@ class BaseCorr3dMM(gof.OpenMPOp):
...
@@ -381,15 +398,49 @@ class BaseCorr3dMM(gof.OpenMPOp):
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[3] = (npy_intp)kW; // how convenient
out_dim[3] = (npy_intp)kW; // how convenient
out_dim[4] = (npy_intp)kD;
out_dim[4] = (npy_intp)kD;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"Corr3dMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(bottom)[4],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3], (long int)out_dim[4],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3],
(long int)PyArray_DIMS(top)[4]);
%(fail)
s
}
break;
break;
case 2: // backprop wrt. inputs
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width, depth)
// output is bottom: (batchsize, num_channels, height, width, depth)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[2] = (npy_intp)((
%(height)
s != -1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[3] = (npy_intp)((
%(width)
s != -1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[4] = (npy_intp)((dD != 1) ?
%(depth)
s : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD);
out_dim[4] = (npy_intp)((
%(depth)
s != -1) ?
%(depth)
s : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"Corr3dMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3], (long int)out_dim[4],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(weights)[4],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3],
(long int)PyArray_DIMS(top)[4]);
%(fail)
s
}
break;
break;
default:
default:
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: direction must be 0, 1, or 2
\\
n");
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: direction must be 0, 1, or 2
\\
n");
...
@@ -533,15 +584,15 @@ class Corr3dMM_gradWeights(BaseCorr3dMM):
...
@@ -533,15 +584,15 @@ class Corr3dMM_gradWeights(BaseCorr3dMM):
raise
TypeError
(
'img must be 5D tensor'
)
raise
TypeError
(
'img must be 5D tensor'
)
if
topgrad
.
type
.
ndim
!=
5
:
if
topgrad
.
type
.
ndim
!=
5
:
raise
TypeError
(
'topgrad must be 5D tensor'
)
raise
TypeError
(
'topgrad must be 5D tensor'
)
if
s
elf
.
subsample
!=
(
1
,
1
,
1
)
or
self
.
border_mode
==
"half"
:
if
s
hape
is
None
:
if
s
hape
is
None
:
if
s
elf
.
subsample
!=
(
1
,
1
,
1
)
or
self
.
border_mode
==
"half"
:
raise
ValueError
(
'shape must be given if subsample != (1, 1, 1)'
raise
ValueError
(
'shape must be given if subsample != (1, 1, 1)'
' or border_mode == "half"'
)
' or border_mode == "half"'
)
height_width_depth
=
[]
else
:
height_width_depth
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
height_width_depth
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
2
])
.
astype
(
'int64'
)]
as_tensor_variable
(
shape
[
2
])
.
astype
(
'int64'
)]
else
:
height_width_depth
=
[]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
False
,
False
,
False
]
False
,
False
,
False
]
...
@@ -638,14 +689,14 @@ class Corr3dMM_gradInputs(BaseCorr3dMM):
...
@@ -638,14 +689,14 @@ class Corr3dMM_gradInputs(BaseCorr3dMM):
raise
TypeError
(
'kern must be 5D tensor'
)
raise
TypeError
(
'kern must be 5D tensor'
)
if
topgrad
.
type
.
ndim
!=
5
:
if
topgrad
.
type
.
ndim
!=
5
:
raise
TypeError
(
'topgrad must be 5D tensor'
)
raise
TypeError
(
'topgrad must be 5D tensor'
)
if
self
.
subsample
!=
(
1
,
1
,
1
)
and
shape
is
None
:
if
shape
is
None
:
raise
ValueError
(
'shape must be given if subsample != (1, 1, 1)'
)
if
self
.
subsample
!=
(
1
,
1
,
1
):
if
self
.
subsample
!=
(
1
,
1
,
1
):
raise
ValueError
(
'shape must be given if subsample != (1, 1, 1)'
)
height_width_depth
=
[]
else
:
height_width_depth
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
height_width_depth
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
2
])
.
astype
(
'int64'
)]
as_tensor_variable
(
shape
[
2
])
.
astype
(
'int64'
)]
else
:
height_width_depth
=
[]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
False
,
False
,
False
]
False
,
False
,
False
]
...
...
theano/tensor/nnet/corr3d_gemm.c
浏览文件 @
c072d669
...
@@ -188,9 +188,17 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
...
@@ -188,9 +188,17 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
const
int
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
const
int
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
const
int
dil_kD
=
(
kD
-
1
)
*
dilD
+
1
;
const
int
dil_kD
=
(
kD
-
1
)
*
dilD
+
1
;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const
int
topHeight
=
(
bottomHeight
+
2
*
padH
-
dil_kH
)
/
dH
+
1
;
const
int
topHeightNoDH
=
(
bottomHeight
+
2
*
padH
-
dil_kH
);
const
int
topWidth
=
(
bottomWidth
+
2
*
padW
-
dil_kW
)
/
dW
+
1
;
const
int
topWidthNoDW
=
(
bottomWidth
+
2
*
padW
-
dil_kW
);
const
int
topDepth
=
(
bottomDepth
+
2
*
padD
-
dil_kD
)
/
dD
+
1
;
const
int
topDepthNoDD
=
(
bottomDepth
+
2
*
padD
-
dil_kD
);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const
int
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
int
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
const
int
topDepth
=
_CONV_FLOORDIV_X
(
topDepthNoDD
,
dD
)
+
1
;
#undef _CONV_FLOORDIV
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
...
@@ -245,7 +253,23 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
...
@@ -245,7 +253,23 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
char
Trans
=
'T'
;
char
Trans
=
'T'
;
PyArrayObject
*
output
;
PyArrayObject
*
output
;
if
(
direction
==
0
)
{
// forward pass
if
(
batchSize
==
0
||
nChannels
==
0
||
nFilters
==
0
)
{
switch
(
direction
)
{
case
0
:
output
=
top
;
break
;
case
1
:
output
=
weight
;
break
;
case
2
:
output
=
bottom
;
break
;
default:
return
NULL
;
}
PyArray_FILLWBYTE
(
output
,
0
);
}
else
if
(
direction
==
0
)
{
// forward pass
output
=
top
;
output
=
top
;
// valid correlation: im3d2col, then gemm
// valid correlation: im3d2col, then gemm
// Iterate over batch
// Iterate over batch
...
...
theano/tensor/nnet/corr_gemm.c
浏览文件 @
c072d669
...
@@ -164,8 +164,15 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -164,8 +164,15 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
int
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
int
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
const
int
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
// top: (batchSize, nFilters, topHeight, topWidth)
// top: (batchSize, nFilters, topHeight, topWidth)
const
int
topHeight
=
(
bottomHeight
+
2
*
padH
-
dil_kH
)
/
dH
+
1
;
const
int
topHeightNoDH
=
(
bottomHeight
+
2
*
padH
-
dil_kH
);
const
int
topWidth
=
(
bottomWidth
+
2
*
padW
-
dil_kW
)
/
dW
+
1
;
const
int
topWidthNoDW
=
(
bottomWidth
+
2
*
padW
-
dil_kW
);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const
int
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
int
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
#undef _CONV_FLOORDIV
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
...
@@ -219,7 +226,23 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -219,7 +226,23 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
char
Trans
=
'T'
;
char
Trans
=
'T'
;
PyArrayObject
*
output
;
PyArrayObject
*
output
;
if
(
direction
==
0
)
{
// forward pass
if
(
batchSize
==
0
||
nChannels
==
0
||
nFilters
==
0
)
{
switch
(
direction
)
{
case
0
:
output
=
top
;
break
;
case
1
:
output
=
weight
;
break
;
case
2
:
output
=
bottom
;
break
;
default:
return
NULL
;
}
PyArray_FILLWBYTE
(
output
,
0
);
}
else
if
(
direction
==
0
)
{
// forward pass
output
=
top
;
output
=
top
;
// valid correlation: im2col, then gemm
// valid correlation: im2col, then gemm
// Iterate over batch
// Iterate over batch
...
...
theano/tensor/nnet/tests/test_abstract_conv.py
浏览文件 @
c072d669
...
@@ -10,7 +10,12 @@ from theano import tensor
...
@@ -10,7 +10,12 @@ from theano import tensor
from
theano.gof.opt
import
check_stack_trace
from
theano.gof.opt
import
check_stack_trace
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.nnet
import
corr
,
corr3d
,
abstract_conv
as
conv
from
theano.tensor.nnet
import
corr
,
corr3d
,
abstract_conv
as
conv
from
theano.tensor.nnet.abstract_conv
import
get_conv_output_shape
from
theano.tensor.nnet.abstract_conv
import
(
get_conv_output_shape
,
get_conv_gradweights_shape
,
get_conv_gradinputs_shape
,
check_conv_gradinputs_shape
,
assert_conv_shape
,
assert_shape
)
from
theano.tensor.nnet.abstract_conv
import
AbstractConv2d
from
theano.tensor.nnet.abstract_conv
import
AbstractConv2d
from
theano.tensor.nnet.abstract_conv
import
AbstractConv2d_gradInputs
from
theano.tensor.nnet.abstract_conv
import
AbstractConv2d_gradInputs
from
theano.tensor.nnet.abstract_conv
import
AbstractConv2d_gradWeights
from
theano.tensor.nnet.abstract_conv
import
AbstractConv2d_gradWeights
...
@@ -133,6 +138,198 @@ class TestGetConvOutShape(unittest.TestCase):
...
@@ -133,6 +138,198 @@ class TestGetConvOutShape(unittest.TestCase):
self
.
assertTrue
(
test4_params
==
(
3
,
4
,
6
,
4
,
10
))
self
.
assertTrue
(
test4_params
==
(
3
,
4
,
6
,
4
,
10
))
class
TestConvGradInputsShape
(
unittest
.
TestCase
):
def
test_check_shape
(
self
):
for
i
in
range
(
1
,
20
):
for
k
in
range
(
1
,
10
):
for
b
in
(
'valid'
,
'half'
,
'full'
,
(
0
,
2
)):
for
s
in
(
1
,
2
,
3
):
for
d
in
(
1
,
2
,
3
):
image_shape
=
(
59
,
61
,
i
,
i
)
kernel_shape
=
(
67
,
61
,
k
,
k
)
# compute the output that these inputs and parameters would produce
computed_shape
=
get_conv_output_shape
(
image_shape
,
kernel_shape
,
b
,
(
s
,
s
),
(
d
,
d
))
# this should be accepted
self
.
assertTrue
(
check_conv_gradinputs_shape
(
image_shape
,
kernel_shape
,
computed_shape
,
b
,
(
s
,
s
),
(
d
,
d
)))
# one or more None should also be accepted
trial_shape
=
(
None
,
None
,
computed_shape
[
2
],
None
)
self
.
assertTrue
(
check_conv_gradinputs_shape
(
image_shape
,
kernel_shape
,
trial_shape
,
b
,
(
s
,
s
),
(
d
,
d
)))
# the batch size and number of filters are important
trial_shape
=
(
1
,
1
,
computed_shape
[
2
],
computed_shape
[
3
])
self
.
assertFalse
(
check_conv_gradinputs_shape
(
image_shape
,
kernel_shape
,
trial_shape
,
b
,
(
s
,
s
),
(
d
,
d
)))
# outputs that are too large or too small should be rejected
for
o
in
(
-
3
,
-
2
,
-
1
,
1
,
2
,
3
):
trial_shape
=
(
computed_shape
[
0
],
computed_shape
[
1
],
computed_shape
[
2
]
+
o
,
computed_shape
[
3
]
+
o
)
self
.
assertFalse
(
check_conv_gradinputs_shape
(
image_shape
,
kernel_shape
,
trial_shape
,
b
,
(
s
,
s
),
(
d
,
d
)))
def
test_get_shape
(
self
):
for
i
in
range
(
1
,
20
):
for
k
in
range
(
1
,
10
):
for
b
in
(
'valid'
,
'half'
,
'full'
,
(
0
,
2
)):
for
d
in
(
1
,
2
,
3
):
image_shape
=
(
59
,
61
,
i
,
i
)
kernel_shape
=
(
67
,
61
,
k
,
k
)
# compute the output that these inputs and parameters would produce
output_shape
=
get_conv_output_shape
(
image_shape
,
kernel_shape
,
b
,
(
1
,
1
),
(
d
,
d
))
# compute the image_shape given this output_shape
computed_image_shape
=
get_conv_gradinputs_shape
(
kernel_shape
,
output_shape
,
b
,
(
1
,
1
),
(
d
,
d
))
self
.
assertEqual
(
computed_image_shape
,
image_shape
)
# if subsample > 1, the shape should be None
computed_image_shape
=
get_conv_gradinputs_shape
(
kernel_shape
,
output_shape
,
b
,
(
2
,
3
),
(
d
,
d
))
image_shape_with_None
=
image_shape
[:
2
]
+
(
None
,
None
)
self
.
assertEqual
(
computed_image_shape
,
image_shape_with_None
)
# compute the kernel_shape given this output_shape
computed_kernel_shape
=
get_conv_gradweights_shape
(
image_shape
,
output_shape
,
b
,
(
1
,
1
),
(
d
,
d
))
# if border_mode == 'half', the shape should be None
if
b
==
'half'
:
kernel_shape_with_None
=
kernel_shape
[:
2
]
+
(
None
,
None
)
self
.
assertEqual
(
computed_kernel_shape
,
kernel_shape_with_None
)
else
:
self
.
assertEqual
(
computed_kernel_shape
,
kernel_shape
)
# if subsample > 1, the shape should be None
computed_kernel_shape
=
get_conv_gradweights_shape
(
kernel_shape
,
output_shape
,
b
,
(
2
,
3
),
(
d
,
d
))
kernel_shape_with_None
=
kernel_shape
[:
2
]
+
(
None
,
None
)
self
.
assertEqual
(
computed_kernel_shape
,
kernel_shape_with_None
)
class
TestAssertConvShape
(
unittest
.
TestCase
):
def
test_basic
(
self
):
shape
=
tuple
(
tensor
.
iscalar
()
for
i
in
range
(
4
))
f
=
theano
.
function
(
shape
,
assert_conv_shape
(
shape
))
self
.
assertEqual
([
1
,
2
,
3
,
4
],
f
(
1
,
2
,
3
,
4
))
self
.
assertEqual
([
0
,
0
,
1
,
1
],
f
(
0
,
0
,
1
,
1
))
assert_raises
(
AssertionError
,
f
,
3
,
3
,
3
,
0
)
assert_raises
(
AssertionError
,
f
,
3
,
3
,
0
,
3
)
assert_raises
(
AssertionError
,
f
,
3
,
3
,
-
1
,
3
)
assert_raises
(
AssertionError
,
f
,
3
,
-
1
,
3
,
3
)
assert_raises
(
AssertionError
,
f
,
-
1
,
3
,
3
,
3
)
class
TestAssertShape
(
unittest
.
TestCase
):
def
test_basic
(
self
):
x
=
tensor
.
tensor4
()
s1
=
tensor
.
iscalar
()
s2
=
tensor
.
iscalar
()
expected_shape
=
[
None
,
s1
,
s2
,
None
]
f
=
theano
.
function
([
x
,
s1
,
s2
],
assert_shape
(
x
,
expected_shape
))
v
=
numpy
.
zeros
((
3
,
5
,
7
,
11
),
dtype
=
'float32'
)
self
.
assertEqual
(
0
,
numpy
.
sum
(
f
(
v
,
5
,
7
)))
assert_raises
(
AssertionError
,
f
,
v
,
5
,
0
)
assert_raises
(
AssertionError
,
f
,
v
,
5
,
9
)
assert_raises
(
AssertionError
,
f
,
v
,
0
,
7
)
assert_raises
(
AssertionError
,
f
,
v
,
7
,
7
)
def
test_shape_check_conv2d
(
self
):
input
=
tensor
.
tensor4
()
filters
=
tensor
.
tensor4
()
out
=
conv
.
conv2d
(
input
,
filters
,
input_shape
=
(
3
,
5
,
7
,
11
),
filter_shape
=
(
7
,
5
,
3
,
3
))
f
=
theano
.
function
([
input
,
filters
],
out
)
# mismatched input_shape
assert_raises
(
AssertionError
,
f
,
numpy
.
zeros
((
3
,
5
,
9
,
11
),
dtype
=
'float32'
),
numpy
.
zeros
((
7
,
5
,
3
,
3
),
dtype
=
'float32'
))
# mismatched filter_shape
assert_raises
(
AssertionError
,
f
,
numpy
.
zeros
((
3
,
5
,
7
,
11
),
dtype
=
'float32'
),
numpy
.
zeros
((
7
,
5
,
2
,
2
),
dtype
=
'float32'
))
def
test_shape_check_conv3d
(
self
):
input
=
tensor
.
tensor5
()
filters
=
tensor
.
tensor5
()
out
=
conv
.
conv3d
(
input
,
filters
,
input_shape
=
(
3
,
5
,
7
,
11
,
13
),
filter_shape
=
(
7
,
5
,
3
,
3
,
3
))
f
=
theano
.
function
([
input
,
filters
],
out
)
# mismatched input_shape
assert_raises
(
AssertionError
,
f
,
numpy
.
zeros
((
3
,
5
,
9
,
11
,
13
),
dtype
=
'float32'
),
numpy
.
zeros
((
7
,
5
,
3
,
3
,
3
),
dtype
=
'float32'
))
# mismatched filter_shape
assert_raises
(
AssertionError
,
f
,
numpy
.
zeros
((
3
,
5
,
7
,
11
,
13
),
dtype
=
'float32'
),
numpy
.
zeros
((
7
,
5
,
2
,
2
,
2
),
dtype
=
'float32'
))
def
test_shape_check_conv2d_grad_wrt_inputs
(
self
):
output_grad
=
tensor
.
tensor4
()
filters
=
tensor
.
tensor4
()
out
=
conv
.
conv2d_grad_wrt_inputs
(
output_grad
,
filters
,
input_shape
=
(
None
,
None
,
7
,
11
),
filter_shape
=
(
7
,
5
,
3
,
3
))
f
=
theano
.
function
([
output_grad
,
filters
],
out
)
# mismatched filter_shape
assert_raises
(
AssertionError
,
f
,
numpy
.
zeros
((
3
,
6
,
5
,
9
),
dtype
=
'float32'
),
numpy
.
zeros
((
7
,
6
,
3
,
3
),
dtype
=
'float32'
))
def
test_shape_check_conv3d_grad_wrt_inputs
(
self
):
output_grad
=
tensor
.
tensor5
()
filters
=
tensor
.
tensor5
()
out
=
conv
.
conv3d_grad_wrt_inputs
(
output_grad
,
filters
,
input_shape
=
(
None
,
None
,
7
,
11
,
13
),
filter_shape
=
(
7
,
5
,
3
,
3
,
3
))
f
=
theano
.
function
([
output_grad
,
filters
],
out
)
# mismatched filter_shape
assert_raises
(
AssertionError
,
f
,
numpy
.
zeros
((
3
,
6
,
5
,
9
,
11
),
dtype
=
'float32'
),
numpy
.
zeros
((
7
,
6
,
3
,
3
,
3
),
dtype
=
'float32'
))
def
test_shape_check_conv2d_grad_wrt_weights
(
self
):
input
=
tensor
.
tensor4
()
output_grad
=
tensor
.
tensor4
()
out
=
conv
.
conv2d_grad_wrt_weights
(
input
,
output_grad
,
filter_shape
=
(
None
,
None
,
3
,
3
),
input_shape
=
(
3
,
5
,
7
,
11
))
f
=
theano
.
function
([
input
,
output_grad
],
out
)
# mismatched filter_shape
assert_raises
(
AssertionError
,
f
,
numpy
.
zeros
((
3
,
6
,
7
,
11
),
dtype
=
'float32'
),
numpy
.
zeros
((
3
,
7
,
5
,
9
),
dtype
=
'float32'
))
def
test_shape_check_conv3d_grad_wrt_weights
(
self
):
input
=
tensor
.
tensor5
()
output_grad
=
tensor
.
tensor5
()
out
=
conv
.
conv3d_grad_wrt_weights
(
input
,
output_grad
,
filter_shape
=
(
None
,
None
,
3
,
3
,
3
),
input_shape
=
(
3
,
5
,
7
,
11
,
13
))
f
=
theano
.
function
([
input
,
output_grad
],
out
)
# mismatched filter_shape
assert_raises
(
AssertionError
,
f
,
numpy
.
zeros
((
3
,
6
,
7
,
11
,
13
),
dtype
=
'float32'
),
numpy
.
zeros
((
3
,
7
,
5
,
9
,
11
),
dtype
=
'float32'
))
class
BaseTestConv
(
object
):
class
BaseTestConv
(
object
):
def
get_output_shape
(
self
,
inputs_shape
,
filters_shape
,
def
get_output_shape
(
self
,
inputs_shape
,
filters_shape
,
subsample
,
border_mode
,
filter_dilation
):
subsample
,
border_mode
,
filter_dilation
):
...
@@ -211,7 +408,7 @@ class BaseTestConv(object):
...
@@ -211,7 +408,7 @@ class BaseTestConv(object):
res_ref
=
numpy
.
array
(
f_ref
())
res_ref
=
numpy
.
array
(
f_ref
())
res
=
numpy
.
array
(
f
())
res
=
numpy
.
array
(
f
())
utt
.
assert_allclose
(
res_ref
,
res
)
utt
.
assert_allclose
(
res_ref
,
res
)
if
verify_grad
:
if
verify_grad
and
inputs_val
.
size
>
0
and
filters_val
.
size
>
0
and
res
.
size
>
0
:
utt
.
verify_grad
(
conv_op
(
border_mode
=
border_mode
,
utt
.
verify_grad
(
conv_op
(
border_mode
=
border_mode
,
imshp
=
imshp
,
kshp
=
kshp
,
imshp
=
imshp
,
kshp
=
kshp
,
subsample
=
subsample
,
subsample
=
subsample
,
...
@@ -277,7 +474,7 @@ class BaseTestConv(object):
...
@@ -277,7 +474,7 @@ class BaseTestConv(object):
filter_dilation
=
filter_dilation
)
filter_dilation
=
filter_dilation
)
return
conv_op
(
inputs_val
,
output_val
,
filters_shape
[
2
:])
return
conv_op
(
inputs_val
,
output_val
,
filters_shape
[
2
:])
if
verify_grad
:
if
verify_grad
and
inputs_val
.
size
>
0
and
output_val
.
size
>
0
and
res
.
size
>
0
:
utt
.
verify_grad
(
abstract_conv_gradweight
,
utt
.
verify_grad
(
abstract_conv_gradweight
,
[
inputs_val
,
output_val
],
[
inputs_val
,
output_val
],
mode
=
mode
,
eps
=
1
)
mode
=
mode
,
eps
=
1
)
...
@@ -314,11 +511,16 @@ class BaseTestConv(object):
...
@@ -314,11 +511,16 @@ class BaseTestConv(object):
imshp
=
imshp
,
kshp
=
kshp
,
imshp
=
imshp
,
kshp
=
kshp
,
filter_dilation
=
filter_dilation
)
filter_dilation
=
filter_dilation
)
c
=
c
(
filters
,
output
,
inputs_shape
[
2
:])
c
=
c
(
filters
,
output
,
inputs_shape
[
2
:])
c_ref
=
ref
(
filters
,
output
,
inputs_shape
,
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
,
filter_dilation
=
filter_dilation
)
f
=
theano
.
function
([],
c
,
mode
=
mode
)
f
=
theano
.
function
([],
c
,
mode
=
mode
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
=
'FAST_RUN'
)
# ref is set to None for the inconsistent-shape tests.
# The reference function also raises an exception, which would
# mask the exception generated by the target implementation.
if
ref
is
not
None
:
c_ref
=
ref
(
filters
,
output
,
inputs_shape
,
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
,
filter_dilation
=
filter_dilation
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
=
'FAST_RUN'
)
if
target_op
is
not
None
:
if
target_op
is
not
None
:
assert
any
([
isinstance
(
n
.
op
,
target_op
)
for
n
assert
any
([
isinstance
(
n
.
op
,
target_op
)
for
n
...
@@ -326,9 +528,11 @@ class BaseTestConv(object):
...
@@ -326,9 +528,11 @@ class BaseTestConv(object):
if
check_trace
:
if
check_trace
:
assert_true
(
check_stack_trace
(
f
,
ops_to_check
=
target_op
))
assert_true
(
check_stack_trace
(
f
,
ops_to_check
=
target_op
))
res_ref
=
numpy
.
array
(
f_ref
())
res
=
numpy
.
array
(
f
())
res
=
numpy
.
array
(
f
())
utt
.
assert_allclose
(
res_ref
,
res
)
if
ref
is
not
None
:
res_ref
=
numpy
.
array
(
f_ref
())
utt
.
assert_allclose
(
res_ref
,
res
)
def
abstract_conv_gradinputs
(
filters_val
,
output_val
):
def
abstract_conv_gradinputs
(
filters_val
,
output_val
):
conv_op
=
gradInputs_fn
(
border_mode
=
border_mode
,
conv_op
=
gradInputs_fn
(
border_mode
=
border_mode
,
...
@@ -336,7 +540,7 @@ class BaseTestConv(object):
...
@@ -336,7 +540,7 @@ class BaseTestConv(object):
filter_dilation
=
filter_dilation
)
filter_dilation
=
filter_dilation
)
return
conv_op
(
filters_val
,
output_val
,
inputs_shape
[
2
:])
return
conv_op
(
filters_val
,
output_val
,
inputs_shape
[
2
:])
if
verify_grad
:
if
verify_grad
and
filters_val
.
size
>
0
and
output_val
.
size
>
0
and
res
.
size
>
0
:
utt
.
verify_grad
(
abstract_conv_gradinputs
,
utt
.
verify_grad
(
abstract_conv_gradinputs
,
[
filters_val
,
output_val
],
[
filters_val
,
output_val
],
mode
=
mode
,
eps
=
1
)
mode
=
mode
,
eps
=
1
)
...
@@ -351,13 +555,14 @@ class BaseTestConv(object):
...
@@ -351,13 +555,14 @@ class BaseTestConv(object):
for
(
i
,
f
)
in
zip
(
self
.
inputs_shapes
,
self
.
filters_shapes
):
for
(
i
,
f
)
in
zip
(
self
.
inputs_shapes
,
self
.
filters_shapes
):
for
provide_shape
in
self
.
provide_shape
:
for
provide_shape
in
self
.
provide_shape
:
yield
(
self
.
tcase
,
i
,
f
,
ds
,
db
,
dflip
,
provide_shape
)
yield
(
self
.
tcase
,
i
,
f
,
ds
,
db
,
dflip
,
provide_shape
)
for
fd
in
self
.
filters_dilations
:
if
min
(
i
)
>
0
and
min
(
f
)
>
0
:
for
s
in
self
.
subsamples
:
for
fd
in
self
.
filters_dilations
:
for
b
in
self
.
border_modes
:
for
s
in
self
.
subsamples
:
yield
(
self
.
tcase
,
i
,
f
,
s
,
b
,
dflip
,
for
b
in
self
.
border_modes
:
dprovide_shape
,
fd
)
yield
(
self
.
tcase
,
i
,
f
,
s
,
b
,
dflip
,
for
flip
in
self
.
filter_flip
:
dprovide_shape
,
fd
)
yield
(
self
.
tcase
,
i
,
f
,
ds
,
db
,
flip
,
dprovide_shape
)
for
flip
in
self
.
filter_flip
:
yield
(
self
.
tcase
,
i
,
f
,
ds
,
db
,
flip
,
dprovide_shape
)
class
BaseTestConv2d
(
BaseTestConv
):
class
BaseTestConv2d
(
BaseTestConv
):
...
@@ -365,12 +570,15 @@ class BaseTestConv2d(BaseTestConv):
...
@@ -365,12 +570,15 @@ class BaseTestConv2d(BaseTestConv):
def
setup_class
(
cls
):
def
setup_class
(
cls
):
# This tests can run even when theano.config.blas.ldflags is empty.
# This tests can run even when theano.config.blas.ldflags is empty.
cls
.
inputs_shapes
=
[(
8
,
1
,
6
,
6
),
(
8
,
1
,
8
,
8
),
(
2
,
1
,
7
,
7
),
cls
.
inputs_shapes
=
[(
8
,
1
,
6
,
6
),
(
8
,
1
,
8
,
8
),
(
2
,
1
,
7
,
7
),
(
6
,
1
,
10
,
11
),
(
2
,
1
,
6
,
5
),
(
1
,
5
,
9
,
9
)]
(
6
,
1
,
10
,
11
),
(
2
,
1
,
6
,
5
),
(
1
,
5
,
9
,
9
),
(
0
,
1
,
6
,
6
),
(
1
,
0
,
6
,
6
),
(
1
,
1
,
6
,
6
)]
cls
.
filters_shapes
=
[(
5
,
1
,
2
,
2
),
(
4
,
1
,
3
,
3
),
(
2
,
1
,
3
,
3
),
cls
.
filters_shapes
=
[(
5
,
1
,
2
,
2
),
(
4
,
1
,
3
,
3
),
(
2
,
1
,
3
,
3
),
(
1
,
1
,
2
,
3
),
(
4
,
1
,
1
,
3
),
(
4
,
5
,
3
,
2
)]
(
1
,
1
,
2
,
3
),
(
4
,
1
,
1
,
3
),
(
4
,
5
,
3
,
2
),
(
1
,
1
,
2
,
2
),
(
1
,
0
,
2
,
2
),
(
0
,
1
,
2
,
2
)]
cls
.
subsamples
=
[(
1
,
1
),
(
2
,
2
),
(
2
,
4
)]
cls
.
subsamples
=
[(
1
,
1
),
(
2
,
2
),
(
2
,
4
)]
cls
.
default_subsamples
=
(
1
,
1
)
cls
.
default_subsamples
=
(
1
,
1
)
cls
.
filters_dilations
=
[(
1
,
1
),
(
1
,
2
),
(
2
,
1
)]
cls
.
filters_dilations
=
[(
1
,
1
),
(
1
,
2
),
(
2
,
1
)]
cls
.
default_filters_dilations
=
(
1
,
1
)
cls
.
border_modes
=
[
"valid"
,
"half"
,
"full"
,
(
0
,
0
),
(
1
,
1
),
(
5
,
5
),
(
5
,
2
)]
cls
.
border_modes
=
[
"valid"
,
"half"
,
"full"
,
(
0
,
0
),
(
1
,
1
),
(
5
,
5
),
(
5
,
2
)]
cls
.
default_border_mode
=
(
0
,
0
)
cls
.
default_border_mode
=
(
0
,
0
)
cls
.
filter_flip
=
[
True
,
False
]
cls
.
filter_flip
=
[
True
,
False
]
...
@@ -379,6 +587,62 @@ class BaseTestConv2d(BaseTestConv):
...
@@ -379,6 +587,62 @@ class BaseTestConv2d(BaseTestConv):
cls
.
default_provide_shape
=
True
cls
.
default_provide_shape
=
True
cls
.
shared
=
staticmethod
(
theano
.
compile
.
shared
)
cls
.
shared
=
staticmethod
(
theano
.
compile
.
shared
)
def
test_gradinput_arbitrary_output_shapes
(
self
):
# this computes the grad wrt inputs for an output shape
# that the forward convolution would not produce
input_shape
=
(
2
,
1
,
7
,
7
)
filter_shape
=
(
2
,
1
,
3
,
3
)
for
output_shape
in
[(
2
,
2
,
8
,
8
),
(
2
,
2
,
9
,
9
),
(
2
,
2
,
12
,
12
)]:
for
border_mode
in
[
"valid"
,
"half"
,
"full"
]:
computed_shape
=
get_conv_output_shape
(
input_shape
,
filter_shape
,
border_mode
,
self
.
default_subsamples
,
self
.
default_filters_dilations
)
# is this a valid combination?
if
tuple
(
computed_shape
)
==
output_shape
:
yield
(
self
.
tcase_gi
,
input_shape
,
filter_shape
,
output_shape
,
self
.
default_subsamples
,
border_mode
,
True
,
True
,
self
.
default_filters_dilations
,
False
)
else
:
# expect an error
yield
(
self
.
tcase_gi
,
input_shape
,
filter_shape
,
output_shape
,
self
.
default_subsamples
,
border_mode
,
True
,
True
,
self
.
default_filters_dilations
,
True
)
def
test_gradinput_impossible_output_shapes
(
self
):
def
run_for_output_offsets
(
image_shape
,
kernel_shape
,
s
,
border_mode
,
d
):
# outputs that are too large or too small should be rejected
for
o
in
(
-
3
,
-
1
,
1
,
2
):
output_shape
=
(
1
,
1
,
computed_shape
[
2
]
+
o
,
computed_shape
[
3
]
+
o
)
# expect an error
self
.
tcase_gi
(
image_shape
,
kernel_shape
,
output_shape
,
(
s
,
s
),
border_mode
,
True
,
True
,
(
d
,
d
),
True
)
for
(
i
,
k
)
in
((
1
,
1
),
(
1
,
2
),
(
2
,
1
),
(
4
,
2
),
(
4
,
3
),
(
7
,
3
),
(
9
,
5
)):
for
border_mode
in
(
'valid'
,
'half'
,
'full'
,
(
0
,
2
)):
for
(
s
,
d
)
in
((
1
,
1
),
(
1
,
2
),
(
2
,
1
),
(
2
,
2
),
(
3
,
1
),
(
1
,
3
)):
image_shape
=
(
1
,
1
,
i
,
i
)
kernel_shape
=
(
1
,
1
,
k
,
k
)
# compute the output that these inputs and parameters would produce
computed_shape
=
get_conv_output_shape
(
image_shape
,
kernel_shape
,
border_mode
,
(
s
,
s
),
(
d
,
d
))
yield
(
run_for_output_offsets
,
image_shape
,
kernel_shape
,
s
,
border_mode
,
d
)
def
run_fwd
(
self
,
inputs_shape
,
filters_shape
,
def
run_fwd
(
self
,
inputs_shape
,
filters_shape
,
conv_fn
=
conv
.
conv2d
,
conv_op
=
conv
.
AbstractConv2d
,
conv_fn
=
conv
.
conv2d
,
conv_op
=
conv
.
AbstractConv2d
,
ref
=
conv2d_corr
,
**
kwargs
):
ref
=
conv2d_corr
,
**
kwargs
):
...
@@ -438,6 +702,26 @@ class TestCorrConv2d(BaseTestConv2d):
...
@@ -438,6 +702,26 @@ class TestCorrConv2d(BaseTestConv2d):
filter_flip
=
flip
,
target_op
=
CorrMM_gradInputs
,
filter_flip
=
flip
,
target_op
=
CorrMM_gradInputs
,
check_trace
=
True
,
filter_dilation
=
fd
)
check_trace
=
True
,
filter_dilation
=
fd
)
def
tcase_gi
(
self
,
i
,
f
,
o
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
),
expect_error
=
False
):
# This tests can run even when theano.config.blas.ldflags is empty.
if
(
not
theano
.
config
.
cxx
or
theano
.
config
.
mode
==
"FAST_COMPILE"
):
raise
SkipTest
(
"Need blas to test conv2d"
)
if
not
expect_error
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
CorrMM_gradInputs
,
check_trace
=
True
,
filter_dilation
=
fd
)
else
:
assert_raises
(
ValueError
,
self
.
run_gradinput
,
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
CorrMM_gradInputs
,
ref
=
None
,
check_trace
=
True
,
filter_dilation
=
fd
)
class
TestAbstractConvNoOptim
(
BaseTestConv2d
):
class
TestAbstractConvNoOptim
(
BaseTestConv2d
):
@classmethod
@classmethod
...
@@ -477,6 +761,25 @@ class TestAbstractConvNoOptim(BaseTestConv2d):
...
@@ -477,6 +761,25 @@ class TestAbstractConvNoOptim(BaseTestConv2d):
check_trace
=
True
,
filter_dilation
=
fd
,
check_trace
=
True
,
filter_dilation
=
fd
,
mode
=
mode
)
mode
=
mode
)
def
tcase_gi
(
self
,
i
,
f
,
o
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
),
expect_error
=
False
):
mode
=
theano
.
Mode
(
optimizer
=
None
)
if
not
expect_error
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
None
,
check_trace
=
True
,
filter_dilation
=
fd
,
mode
=
mode
)
else
:
assert_raises
(
ValueError
,
self
.
run_gradinput
,
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
None
,
check_trace
=
True
,
filter_dilation
=
fd
,
ref
=
None
,
mode
=
mode
)
class
TestCpuConv2d
(
BaseTestConv2d
):
class
TestCpuConv2d
(
BaseTestConv2d
):
@classmethod
@classmethod
...
@@ -592,16 +895,47 @@ class TestCpuConv2d(BaseTestConv2d):
...
@@ -592,16 +895,47 @@ class TestCpuConv2d(BaseTestConv2d):
check_trace
=
True
,
check_trace
=
True
,
filter_dilation
=
fd
)
filter_dilation
=
fd
)
def
tcase_gi
(
self
,
i
,
f
,
o
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
),
expect_error
=
False
):
if
fd
!=
(
1
,
1
):
raise
SkipTest
(
"No dilation implementation for basic cpu ConvOp."
)
mode
=
self
.
mode
if
not
flip
:
return
if
b
not
in
((
0
,
0
),
'valid'
,
'full'
):
return
if
(
not
provide_shape
)
and
(
s
!=
(
1
,
1
))
and
(
b
==
'full'
):
return
if
((
s
[
0
]
not
in
(
1
,
2
))
or
(
s
[
1
]
not
in
(
1
,
2
)))
and
(
b
==
'full'
):
return
if
not
expect_error
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
(
ConvOp
,
ConvTransp3D
),
check_trace
=
True
,
filter_dilation
=
fd
)
else
:
# we do not check for inconsistent shapes,
# because this older implementation does not check that
raise
SkipTest
(
'Inconsistent shapes are not tested for old cpu ConvOp.'
)
class
BaseTestConv3d
(
BaseTestConv
):
class
BaseTestConv3d
(
BaseTestConv
):
@classmethod
@classmethod
def
setup_class
(
cls
):
def
setup_class
(
cls
):
# This tests can run even when theano.config.blas.ldflags is empty.
# This tests can run even when theano.config.blas.ldflags is empty.
cls
.
inputs_shapes
=
[(
2
,
1
,
5
,
5
,
5
),
(
1
,
2
,
7
,
5
,
6
)]
cls
.
inputs_shapes
=
[(
2
,
1
,
5
,
5
,
5
),
(
1
,
2
,
7
,
5
,
6
),
cls
.
filters_shapes
=
[(
2
,
1
,
2
,
2
,
2
),
(
1
,
2
,
2
,
1
,
3
)]
(
0
,
1
,
5
,
5
,
5
),
(
1
,
0
,
5
,
5
,
5
),
(
1
,
1
,
5
,
5
,
5
)]
cls
.
filters_shapes
=
[(
2
,
1
,
2
,
2
,
2
),
(
1
,
2
,
2
,
1
,
3
),
(
1
,
1
,
2
,
2
,
2
),
(
1
,
0
,
2
,
2
,
2
),
(
0
,
1
,
2
,
2
,
2
)]
cls
.
subsamples
=
[(
1
,
1
,
1
),
(
2
,
2
,
2
),
(
1
,
2
,
3
)]
cls
.
subsamples
=
[(
1
,
1
,
1
),
(
2
,
2
,
2
),
(
1
,
2
,
3
)]
cls
.
default_subsamples
=
(
1
,
1
,
1
)
cls
.
default_subsamples
=
(
1
,
1
,
1
)
cls
.
filters_dilations
=
[(
1
,
1
,
1
),
(
1
,
2
,
1
),
(
2
,
1
,
2
)]
cls
.
filters_dilations
=
[(
1
,
1
,
1
),
(
1
,
2
,
1
),
(
2
,
1
,
2
)]
cls
.
default_filters_dilations
=
(
1
,
1
,
1
)
cls
.
border_modes
=
[
"valid"
,
"half"
,
"full"
,
(
0
,
0
,
0
),
(
2
,
2
,
3
)]
cls
.
border_modes
=
[
"valid"
,
"half"
,
"full"
,
(
0
,
0
,
0
),
(
2
,
2
,
3
)]
cls
.
default_border_mode
=
(
0
,
0
,
0
)
cls
.
default_border_mode
=
(
0
,
0
,
0
)
cls
.
filter_flip
=
[
True
,
False
]
cls
.
filter_flip
=
[
True
,
False
]
...
@@ -610,6 +944,64 @@ class BaseTestConv3d(BaseTestConv):
...
@@ -610,6 +944,64 @@ class BaseTestConv3d(BaseTestConv):
cls
.
default_provide_shape
=
True
cls
.
default_provide_shape
=
True
cls
.
shared
=
staticmethod
(
theano
.
compile
.
shared
)
cls
.
shared
=
staticmethod
(
theano
.
compile
.
shared
)
def
test_gradinput_arbitrary_output_shapes
(
self
):
# this computes the grad wrt inputs for an output shape
# that the forward convolution would not produce
input_shape
=
(
2
,
1
,
7
,
7
,
7
)
filter_shape
=
(
1
,
1
,
3
,
3
,
3
)
for
output_shape
in
[(
2
,
1
,
8
,
8
,
8
),
(
2
,
1
,
9
,
9
,
9
),
(
2
,
1
,
12
,
12
,
12
)]:
for
border_mode
in
[
"valid"
,
"half"
,
"full"
]:
# compute the output that these inputs and parameters would produce
computed_shape
=
get_conv_output_shape
(
input_shape
,
filter_shape
,
border_mode
,
self
.
default_subsamples
,
self
.
default_filters_dilations
)
# is this a valid combination?
if
tuple
(
computed_shape
)
==
output_shape
:
yield
(
self
.
tcase_gi
,
input_shape
,
filter_shape
,
output_shape
,
self
.
default_subsamples
,
border_mode
,
True
,
True
,
self
.
default_filters_dilations
,
False
)
else
:
# expect an error
yield
(
self
.
tcase_gi
,
input_shape
,
filter_shape
,
output_shape
,
self
.
default_subsamples
,
border_mode
,
True
,
True
,
self
.
default_filters_dilations
,
True
)
def
test_gradinput_impossible_output_shapes
(
self
):
def
run_for_output_offsets
(
image_shape
,
kernel_shape
,
s
,
border_mode
,
d
):
# outputs that are too large or too small should be rejected
for
o
in
(
-
3
,
-
1
,
1
,
2
):
output_shape
=
(
1
,
1
,
computed_shape
[
2
]
+
o
,
computed_shape
[
3
]
+
o
,
computed_shape
[
4
]
+
o
)
# expect an error
self
.
tcase_gi
(
image_shape
,
kernel_shape
,
output_shape
,
(
s
,
s
),
border_mode
,
True
,
True
,
(
d
,
d
),
True
)
for
(
i
,
k
)
in
((
1
,
1
),
(
1
,
2
),
(
2
,
1
),
(
4
,
2
),
(
4
,
3
),
(
7
,
3
),
(
9
,
5
)):
for
border_mode
in
(
'valid'
,
'half'
,
'full'
,
(
0
,
2
,
1
)):
for
(
s
,
d
)
in
((
1
,
1
),
(
1
,
2
),
(
2
,
1
),
(
2
,
2
),
(
3
,
1
),
(
1
,
3
)):
image_shape
=
(
1
,
1
,
i
,
i
,
i
)
kernel_shape
=
(
1
,
1
,
k
,
k
,
k
)
# compute the output that these inputs and parameters would produce
computed_shape
=
get_conv_output_shape
(
image_shape
,
kernel_shape
,
border_mode
,
(
s
,
s
,
s
),
(
d
,
d
,
d
))
yield
(
run_for_output_offsets
,
image_shape
,
kernel_shape
,
s
,
border_mode
,
d
)
def
run_fwd
(
self
,
inputs_shape
,
filters_shape
,
def
run_fwd
(
self
,
inputs_shape
,
filters_shape
,
conv_fn
=
conv
.
conv3d
,
conv_op
=
conv
.
AbstractConv3d
,
conv_fn
=
conv
.
conv3d
,
conv_op
=
conv
.
AbstractConv3d
,
ref
=
conv3d_corr
,
**
kwargs
):
ref
=
conv3d_corr
,
**
kwargs
):
...
@@ -669,6 +1061,26 @@ class TestCorrConv3d(BaseTestConv3d):
...
@@ -669,6 +1061,26 @@ class TestCorrConv3d(BaseTestConv3d):
filter_flip
=
flip
,
target_op
=
Corr3dMM_gradInputs
,
filter_flip
=
flip
,
target_op
=
Corr3dMM_gradInputs
,
check_trace
=
True
,
filter_dilation
=
fd
)
check_trace
=
True
,
filter_dilation
=
fd
)
def
tcase_gi
(
self
,
i
,
f
,
o
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
,
1
),
expect_error
=
False
):
# This test can run even when theano.config.blas.ldflags is empty.
if
(
not
theano
.
config
.
cxx
or
theano
.
config
.
mode
==
"FAST_COMPILE"
):
raise
SkipTest
(
"Need blas to test conv3d"
)
if
not
expect_error
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
Corr3dMM_gradInputs
,
check_trace
=
True
,
filter_dilation
=
fd
)
else
:
assert_raises
(
ValueError
,
self
.
run_gradinput
,
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
Corr3dMM_gradInputs
,
ref
=
None
,
check_trace
=
True
,
filter_dilation
=
fd
)
class
TestCpuConv3d
(
BaseTestConv3d
):
class
TestCpuConv3d
(
BaseTestConv3d
):
@classmethod
@classmethod
...
@@ -687,6 +1099,9 @@ class TestCpuConv3d(BaseTestConv3d):
...
@@ -687,6 +1099,9 @@ class TestCpuConv3d(BaseTestConv3d):
raise
SkipTest
(
"No dilation implementation for basic cpu Conv3D."
)
raise
SkipTest
(
"No dilation implementation for basic cpu Conv3D."
)
if
not
theano
.
config
.
cxx
:
if
not
theano
.
config
.
cxx
:
raise
SkipTest
(
"Need cxx to test conv2d"
)
raise
SkipTest
(
"Need cxx to test conv2d"
)
if
min
(
i
)
==
0
or
min
(
f
)
==
0
:
raise
SkipTest
(
'Not tested for old cpu Conv3D.'
)
mode
=
self
.
mode
mode
=
self
.
mode
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
,
fd
)
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
,
fd
)
fwd_OK
=
True
fwd_OK
=
True
...
@@ -770,6 +1185,30 @@ class TestCpuConv3d(BaseTestConv3d):
...
@@ -770,6 +1185,30 @@ class TestCpuConv3d(BaseTestConv3d):
check_trace
=
True
,
check_trace
=
True
,
filter_dilation
=
fd
)
filter_dilation
=
fd
)
def
tcase_gi
(
self
,
i
,
f
,
o
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
,
1
),
expect_error
=
False
):
if
fd
!=
(
1
,
1
,
1
):
raise
SkipTest
(
"No dilation implementation for basic cpu Conv3D."
)
mode
=
self
.
mode
if
min
(
i
)
==
0
or
min
(
f
)
==
0
or
min
(
o
)
==
0
:
raise
SkipTest
(
'Not tested for old cpu Conv3D.'
)
if
b
not
in
((
0
,
0
,
0
),
'valid'
):
return
if
not
expect_error
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
ConvTransp3D
,
check_trace
=
True
,
filter_dilation
=
fd
)
else
:
# we do not check for inconsistent shapes,
# because this older implementation does not check that
raise
SkipTest
(
'Inconsistent shapes are not tested for old cpu Conv3D.'
)
def
test_constant_shapes
():
def
test_constant_shapes
():
# Check that the `imshp` and `kshp` parameters of the AbstractConv Ops
# Check that the `imshp` and `kshp` parameters of the AbstractConv Ops
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论