Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
e514f4d3
提交
e514f4d3
authored
8月 25, 2017
作者:
Vikram
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
GPU code and some more tests
上级
cd10a53c
显示空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
112 行增加
和
70 行删除
+112
-70
blas.py
theano/gpuarray/blas.py
+58
-38
corr_gemm.c
theano/gpuarray/c_code/corr_gemm.c
+33
-30
test_gemmcorr.py
theano/gpuarray/tests/test_gemmcorr.py
+8
-0
corr.py
theano/tensor/nnet/corr.py
+2
-2
test_corr.py
theano/tensor/nnet/tests/test_corr.py
+11
-0
没有找到文件。
theano/gpuarray/blas.py
浏览文件 @
e514f4d3
...
@@ -468,16 +468,33 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -468,16 +468,33 @@ class BaseGpuCorrMM(CGpuKernelBase):
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
):
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
):
if
isinstance
(
border_mode
,
integer_types
):
if
isinstance
(
border_mode
,
integer_types
):
border_mode
=
(
border_mode
,
border_mode
)
if
border_mode
<
0
:
if
isinstance
(
border_mode
,
tuple
):
raise
ValueError
(
pad_h
,
pad_w
=
map
(
int
,
border_mode
)
'invalid border_mode {}, which must be a '
border_mode
=
(
pad_h
,
pad_w
)
'non-negative integer'
.
format
(
border_mode
))
if
not
((
isinstance
(
border_mode
,
tuple
)
and
min
(
border_mode
)
>=
0
)
or
border_mode
=
((
border_mode
,
border_mode
),)
*
2
border_mode
in
(
'valid'
,
'full'
,
'half'
)):
elif
isinstance
(
border_mode
,
tuple
):
if
len
(
border_mode
)
!=
2
:
raise
ValueError
(
'invalid border_mode {} which must be a '
'tuple of length 2'
.
format
(
border_mode
))
border
=
()
for
mode
in
border_mode
:
if
isinstance
(
mode
,
integer_types
)
and
mode
>=
0
:
border
+=
((
mode
,
mode
),)
elif
isinstance
(
mode
,
tuple
)
and
len
(
mode
)
==
2
and
\
min
(
mode
)
>=
0
:
border
+=
((
int
(
mode
[
0
]),
int
(
mode
[
1
])),)
else
:
raise
ValueError
(
'invalid border mode {}. The tuple can only contain '
'integers or tuples of length 2'
.
format
(
border_mode
))
border_mode
=
border
elif
border_mode
not
in
(
'valid'
,
'full'
,
'half'
):
raise
ValueError
(
raise
ValueError
(
'invalid border_mode {}, which must be either '
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a
pair of
'
'"valid", "full", "half", an integer or a
tuple
'
'
integers
'
.
format
(
border_mode
))
'
of length 2
'
.
format
(
border_mode
))
self
.
border_mode
=
border_mode
self
.
border_mode
=
border_mode
if
len
(
subsample
)
!=
2
:
if
len
(
subsample
)
!=
2
:
raise
ValueError
(
"subsample must have two elements"
)
raise
ValueError
(
"subsample must have two elements"
)
...
@@ -495,7 +512,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -495,7 +512,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def
pad
(
self
):
def
pad
(
self
):
if
self
.
border_mode
!=
'valid'
:
if
self
.
border_mode
!=
'valid'
:
return
self
.
border_mode
return
self
.
border_mode
return
(
0
,
0
)
return
(
(
0
,
0
),)
*
2
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s,
%
s,
%
s,
%
s}'
%
(
return
'
%
s{
%
s,
%
s,
%
s,
%
s,
%
s}'
%
(
...
@@ -537,7 +554,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -537,7 +554,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# Raise this whenever modifying the C code (including the file).
# Raise this whenever modifying the C code (including the file).
return
(
1
1
,)
return
(
1
2
,)
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
"""
"""
...
@@ -587,14 +604,14 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -587,14 +604,14 @@ class BaseGpuCorrMM(CGpuKernelBase):
numgroups
=
self
.
num_groups
numgroups
=
self
.
num_groups
unshared
=
int
(
self
.
unshared
)
unshared
=
int
(
self
.
unshared
)
if
self
.
border_mode
==
"half"
:
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
padH
_l
=
padH_r
=
padW_l
=
padW_r
=
-
1
elif
self
.
border_mode
==
"full"
:
elif
self
.
border_mode
==
"full"
:
padH
=
padW
=
-
2
padH
_l
=
padH_r
=
padW_l
=
padW_r
=
-
2
elif
isinstance
(
self
.
border_mode
,
tuple
):
elif
isinstance
(
self
.
border_mode
,
tuple
):
padH
,
padW
=
self
.
border_mode
(
padH_l
,
padH_r
),
(
padW_l
,
padW_r
)
=
self
.
border_mode
else
:
else
:
assert
self
.
border_mode
==
"valid"
assert
self
.
border_mode
==
"valid"
padH
=
padW
=
0
padH
_l
=
padH_r
=
padW_l
=
padW_r
=
0
if
direction
==
"forward"
:
if
direction
==
"forward"
:
direction
=
0
direction
=
0
out
=
top
out
=
top
...
@@ -613,13 +630,13 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -613,13 +630,13 @@ class BaseGpuCorrMM(CGpuKernelBase):
if
height
:
if
height
:
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
else
:
else
:
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
_l
==
-
1
)):
raise
ValueError
(
"height must be given for backprop with vertical sampling or pad='half'"
)
raise
ValueError
(
"height must be given for backprop with vertical sampling or pad='half'"
)
height
=
'-1'
height
=
'-1'
if
width
:
if
width
:
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
else
:
else
:
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
_l
==
-
1
)):
raise
ValueError
(
"width must be given for backprop with horizontal sampling or pad='half'"
)
raise
ValueError
(
"width must be given for backprop with horizontal sampling or pad='half'"
)
width
=
'-1'
width
=
'-1'
...
@@ -635,8 +652,10 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -635,8 +652,10 @@ class BaseGpuCorrMM(CGpuKernelBase):
size_t dW =
%(dW)
s;
size_t dW =
%(dW)
s;
size_t dilH =
%(dilH)
s;
size_t dilH =
%(dilH)
s;
size_t dilW =
%(dilW)
s;
size_t dilW =
%(dilW)
s;
int padH =
%(padH)
s;
int padH_l =
%(padH_l)
s;
int padW =
%(padW)
s;
int padH_r =
%(padH_r)
s;
int padW_l =
%(padW_l)
s;
int padW_r =
%(padW_r)
s;
int numgroups =
%(numgroups)
s;
int numgroups =
%(numgroups)
s;
int unshared =
%(unshared)
s;
int unshared =
%(unshared)
s;
...
@@ -662,22 +681,22 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -662,22 +681,22 @@ class BaseGpuCorrMM(CGpuKernelBase):
// kernel height is specified (perhaps vertical subsampling or half padding)
// kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
kH =
%(height)
s;
}
}
else if (padH == -2) {
else if (padH
_l == -2 || padH_r
== -2) {
// vertical full padding, we can infer the kernel height
// vertical full padding, we can infer the kernel height
kH = (2 - PyGpuArray_DIMS(bottom)[2] + (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1;
kH = (2 - PyGpuArray_DIMS(bottom)[2] + (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1;
}
}
else {
else {
// explicit padding, we can infer the kernel height
// explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] +
2*padH
- (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
kH = (PyGpuArray_DIMS(bottom)[2] +
padH_l + padH_r
- (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
}
}
if (
%(width)
s != -1) {
if (
%(width)
s != -1) {
kW =
%(width)
s;
kW =
%(width)
s;
}
}
else if (padW == -2) {
else if (padW
_l == -2 || padW_r
== -2) {
kW = (2 - PyGpuArray_DIMS(bottom)[3] + (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
kW = (2 - PyGpuArray_DIMS(bottom)[3] + (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
}
else {
else {
kW = (PyGpuArray_DIMS(bottom)[3] +
2*padW
- (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
kW = (PyGpuArray_DIMS(bottom)[3] +
padW_l + padW_r
- (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
}
}
}
...
@@ -686,23 +705,23 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -686,23 +705,23 @@ class BaseGpuCorrMM(CGpuKernelBase):
dil_kW = (kW - 1) * dilW + 1;
dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested
// Auto-padding if requested
if (padH == -1) { // vertical half padding
if (padH
_l == -1 || padH_r
== -1) { // vertical half padding
padH = dil_kH / 2;
padH
_l = padH_r
= dil_kH / 2;
}
}
else if (padH == -2) { // vertical full padding
else if (padH
_l == -2 || padH_r
== -2) { // vertical full padding
padH = dil_kH - 1;
padH
_l = padH_r
= dil_kH - 1;
}
}
else if (padH < 0) {
else if (padH
_l < 0 || padH_r
< 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padH must be >= -2");
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padH must be >= -2");
%(fail)
s
%(fail)
s
}
}
if (padW == -1) { // horizontal half padding
if (padW
_l == -1 || padW_r
== -1) { // horizontal half padding
padW = dil_kW / 2;
padW
_l = padW_r
= dil_kW / 2;
}
}
else if (padW == -2) { // horizontal full padding
else if (padW
_l == -2 || padW_r
== -2) { // horizontal full padding
padW = dil_kW - 1;
padW
_l = padW_r
= dil_kW - 1;
}
}
else if (padW < 0) {
else if (padW
_l < 0 || padW_r
< 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padW must be >= -2");
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padW must be >= -2");
%(fail)
s
%(fail)
s
}
}
...
@@ -718,11 +737,11 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -718,11 +737,11 @@ class BaseGpuCorrMM(CGpuKernelBase):
switch(direction) {
switch(direction) {
case 0: // forward pass
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width)
// output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom +
2*pad
- ((weight-1)*dil + 1)) / sample + 1
// height and width: top = (bottom +
pad_l + pad_r
- ((weight-1)*dil + 1)) / sample + 1
out_dim[0] = PyGpuArray_DIMS(bottom)[0];
out_dim[0] = PyGpuArray_DIMS(bottom)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[0];
out_dim[2] = (PyGpuArray_DIMS(bottom)[2] +
2*padH
- ((PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1;
out_dim[2] = (PyGpuArray_DIMS(bottom)[2] +
padH_l + padH_r
- ((PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1;
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] +
2*padW
- ((PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1;
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] +
padW_l + padW_r
- ((PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1;
out_typecode = bottom->ga.typecode;
out_typecode = bottom->ga.typecode;
out_context = bottom->context;
out_context = bottom->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
...
@@ -810,8 +829,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -810,8 +829,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[wdim-3] * numgroups;
out_dim[1] = PyGpuArray_DIMS(weights)[wdim-3] * numgroups;
out_dim[2] = (
%(height)
s != -1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1 -
2*padH
;
out_dim[2] = (
%(height)
s != -1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1 -
padH_l - padH_r
;
out_dim[3] = (
%(width)
s != -1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1 -
2*padW
;
out_dim[3] = (
%(width)
s != -1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1 -
padW_l - padW_r
;
out_typecode = top->ga.typecode;
out_typecode = top->ga.typecode;
out_context = top->context;
out_context = top->context;
if (unshared) {
if (unshared) {
...
@@ -884,7 +903,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -884,7 +903,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
}
}
// Call GPU code
// Call GPU code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW, numgroups, unshared);
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW,
padH_l, padH_r, padW_l, padW_r, numgroups, unshared);
if (out2==NULL){
if (out2==NULL){
%(fail)
s
%(fail)
s
}
}
...
...
theano/gpuarray/c_code/corr_gemm.c
浏览文件 @
e514f4d3
...
@@ -42,7 +42,7 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
...
@@ -42,7 +42,7 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
dilation_h
,
const
ga_size
dilation_w
,
const
ga_size
dilation_h
,
const
ga_size
dilation_w
,
const
ga_size
pad_h
,
const
ga_size
pad_w
,
const
ga_size
pad_h
l
,
const
ga_size
pad_wl
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
height_col
,
const
ga_size
width_col
,
const
ga_size
height_col
,
const
ga_size
width_col
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col
,
...
@@ -57,8 +57,8 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
...
@@ -57,8 +57,8 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
const
ga_size
w_col
=
index
%
width_col
;
const
ga_size
w_col
=
index
%
width_col
;
const
ga_size
c_im
=
h_index
/
height_col
;
const
ga_size
c_im
=
h_index
/
height_col
;
const
ga_size
c_col
=
c_im
*
kernel_h
*
kernel_w
;
const
ga_size
c_col
=
c_im
*
kernel_h
*
kernel_w
;
const
ga_size
h_offset
=
h_col
*
stride_h
-
pad_h
;
const
ga_size
h_offset
=
h_col
*
stride_h
-
pad_h
l
;
const
ga_size
w_offset
=
w_col
*
stride_w
-
pad_w
;
const
ga_size
w_offset
=
w_col
*
stride_w
-
pad_w
l
;
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col_ptr
=
data_col
;
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col_ptr
=
data_col
;
data_col_ptr
+=
(
c_col
*
height_col
+
h_col
)
*
width_col
+
w_col
;
data_col_ptr
+=
(
c_col
*
height_col
+
h_col
)
*
width_col
+
w_col
;
GLOBAL_MEM
const
DTYPE_INPUT_0
*
data_im_ptr
=
data_im
+
data_im_offset
;
GLOBAL_MEM
const
DTYPE_INPUT_0
*
data_im_ptr
=
data_im
+
data_im_offset
;
...
@@ -86,7 +86,7 @@ KERNEL void im2col_kernel(const ga_size n,
...
@@ -86,7 +86,7 @@ KERNEL void im2col_kernel(const ga_size n,
// data_im_offset is an offset of elements in the array
// data_im_offset is an offset of elements in the array
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
pad_h
,
const
ga_size
pad_w
,
const
ga_size
pad_h
l
,
const
ga_size
pad_wl
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
height_col
,
const
ga_size
width_col
,
const
ga_size
height_col
,
const
ga_size
width_col
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col
,
...
@@ -101,8 +101,8 @@ KERNEL void im2col_kernel(const ga_size n,
...
@@ -101,8 +101,8 @@ KERNEL void im2col_kernel(const ga_size n,
const
ga_size
w_col
=
index
%
width_col
;
const
ga_size
w_col
=
index
%
width_col
;
const
ga_size
c_im
=
h_index
/
height_col
;
const
ga_size
c_im
=
h_index
/
height_col
;
const
ga_size
c_col
=
c_im
*
kernel_h
*
kernel_w
;
const
ga_size
c_col
=
c_im
*
kernel_h
*
kernel_w
;
const
ga_size
h_offset
=
h_col
*
stride_h
-
pad_h
;
const
ga_size
h_offset
=
h_col
*
stride_h
-
pad_h
l
;
const
ga_size
w_offset
=
w_col
*
stride_w
-
pad_w
;
const
ga_size
w_offset
=
w_col
*
stride_w
-
pad_w
l
;
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col_ptr
=
data_col
;
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col_ptr
=
data_col
;
data_col_ptr
+=
(
c_col
*
height_col
+
h_col
)
*
width_col
+
w_col
;
data_col_ptr
+=
(
c_col
*
height_col
+
h_col
)
*
width_col
+
w_col
;
GLOBAL_MEM
const
DTYPE_INPUT_0
*
data_im_ptr
=
data_im
+
data_im_offset
;
GLOBAL_MEM
const
DTYPE_INPUT_0
*
data_im_ptr
=
data_im
+
data_im_offset
;
...
@@ -127,7 +127,7 @@ KERNEL void dilated_col2im_kernel(const ga_size n,
...
@@ -127,7 +127,7 @@ KERNEL void dilated_col2im_kernel(const ga_size n,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
channels
,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
channels
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
dilation_h
,
const
ga_size
dilation_w
,
const
ga_size
dilation_h
,
const
ga_size
dilation_w
,
const
ga_size
pad_h
,
const
ga_size
pad_w
,
const
ga_size
pad_h
l
,
const
ga_size
pad_wl
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
height_col
,
const
ga_size
width_col
,
const
ga_size
height_col
,
const
ga_size
width_col
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_im
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_im
,
...
@@ -141,8 +141,8 @@ KERNEL void dilated_col2im_kernel(const ga_size n,
...
@@ -141,8 +141,8 @@ KERNEL void dilated_col2im_kernel(const ga_size n,
for
(
ga_size
index
=
GID_0
*
LDIM_0
+
LID_0
;
for
(
ga_size
index
=
GID_0
*
LDIM_0
+
LID_0
;
index
<
(
n
);
index
+=
LDIM_0
*
GDIM_0
)
{
index
<
(
n
);
index
+=
LDIM_0
*
GDIM_0
)
{
DTYPE_INPUT_0
val
=
0
;
DTYPE_INPUT_0
val
=
0
;
const
ga_size
w_im
=
index
%
width
+
pad_w
;
const
ga_size
w_im
=
index
%
width
+
pad_w
l
;
const
ga_size
h_im
=
(
index
/
width
)
%
height
+
pad_h
;
const
ga_size
h_im
=
(
index
/
width
)
%
height
+
pad_h
l
;
const
ga_size
c_im
=
index
/
(
width
*
height
);
const
ga_size
c_im
=
index
/
(
width
*
height
);
ga_size
kernel_extent_w
=
(
kernel_w
-
1
)
*
dilation_w
+
1
;
ga_size
kernel_extent_w
=
(
kernel_w
-
1
)
*
dilation_w
+
1
;
ga_size
kernel_extent_h
=
(
kernel_h
-
1
)
*
dilation_h
+
1
;
ga_size
kernel_extent_h
=
(
kernel_h
-
1
)
*
dilation_h
+
1
;
...
@@ -177,7 +177,7 @@ KERNEL void col2im_kernel(const ga_size n,
...
@@ -177,7 +177,7 @@ KERNEL void col2im_kernel(const ga_size n,
GLOBAL_MEM
const
DTYPE_INPUT_0
*
data_col
,
const
ga_size
offset_col
,
GLOBAL_MEM
const
DTYPE_INPUT_0
*
data_col
,
const
ga_size
offset_col
,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
channels
,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
channels
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
pad_h
,
const
ga_size
pad_w
,
const
ga_size
pad_h
l
,
const
ga_size
pad_wl
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
height_col
,
const
ga_size
width_col
,
const
ga_size
height_col
,
const
ga_size
width_col
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_im
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_im
,
...
@@ -191,8 +191,8 @@ KERNEL void col2im_kernel(const ga_size n,
...
@@ -191,8 +191,8 @@ KERNEL void col2im_kernel(const ga_size n,
for
(
ga_size
index
=
GID_0
*
LDIM_0
+
LID_0
;
for
(
ga_size
index
=
GID_0
*
LDIM_0
+
LID_0
;
index
<
(
n
);
index
+=
LDIM_0
*
GDIM_0
)
{
index
<
(
n
);
index
+=
LDIM_0
*
GDIM_0
)
{
DTYPE_INPUT_0
val
=
0
;
DTYPE_INPUT_0
val
=
0
;
const
ga_size
w_im
=
index
%
width
+
pad_w
;
const
ga_size
w_im
=
index
%
width
+
pad_w
l
;
const
ga_size
h_im
=
(
index
/
width
)
%
height
+
pad_h
;
const
ga_size
h_im
=
(
index
/
width
)
%
height
+
pad_h
l
;
const
ga_size
c_im
=
index
/
(
width
*
height
);
const
ga_size
c_im
=
index
/
(
width
*
height
);
// compute the start and end of the output
// compute the start and end of the output
const
ga_size
w_col_start
=
const
ga_size
w_col_start
=
...
@@ -254,15 +254,16 @@ int rgemm(cb_order o, cb_transpose tA, cb_transpose tB,
...
@@ -254,15 +254,16 @@ int rgemm(cb_order o, cb_transpose tA, cb_transpose tB,
int
im2col
(
GpuArray
*
data_im
,
const
size_t
data_im_offset
,
const
size_t
channels
,
int
im2col
(
GpuArray
*
data_im
,
const
size_t
data_im_offset
,
const
size_t
channels
,
const
size_t
height
,
const
size_t
width
,
const
size_t
kernel_h
,
const
size_t
kernel_w
,
const
size_t
height
,
const
size_t
width
,
const
size_t
kernel_h
,
const
size_t
kernel_w
,
const
size_t
dilation_h
,
const
size_t
dilation_w
,
const
size_t
dilation_h
,
const
size_t
dilation_w
,
const
size_t
pad_h
,
const
size_t
pad_w
,
const
size_t
pad_hl
,
const
size_t
pad_hr
,
const
size_t
pad_wl
,
const
size_t
pad_wr
,
const
size_t
stride_h
,
const
size_t
stride_w
,
const
size_t
stride_h
,
const
size_t
stride_w
,
GpuArray
*
data_col
)
{
GpuArray
*
data_col
)
{
// We are going to launch channels * height_col * width_col kernels, each
// We are going to launch channels * height_col * width_col kernels, each
// kernel responsible for copying a single-channel grid.
// kernel responsible for copying a single-channel grid.
size_t
dil_kernel_h
=
(
kernel_h
-
1
)
*
dilation_h
+
1
;
size_t
dil_kernel_h
=
(
kernel_h
-
1
)
*
dilation_h
+
1
;
size_t
dil_kernel_w
=
(
kernel_w
-
1
)
*
dilation_w
+
1
;
size_t
dil_kernel_w
=
(
kernel_w
-
1
)
*
dilation_w
+
1
;
size_t
height_col
=
(
height
+
2
*
pad_h
-
dil_kernel_h
)
/
stride_h
+
1
;
size_t
height_col
=
(
height
+
pad_hl
+
pad_hr
-
dil_kernel_h
)
/
stride_h
+
1
;
size_t
width_col
=
(
width
+
2
*
pad_w
-
dil_kernel_w
)
/
stride_w
+
1
;
size_t
width_col
=
(
width
+
pad_wl
+
pad_wr
-
dil_kernel_w
)
/
stride_w
+
1
;
size_t
num_kernels
=
channels
*
height_col
*
width_col
;
size_t
num_kernels
=
channels
*
height_col
*
width_col
;
int
err
;
int
err
;
if
(
dilation_h
!=
1
||
dilation_w
!=
1
)
{
if
(
dilation_h
!=
1
||
dilation_w
!=
1
)
{
...
@@ -270,7 +271,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
...
@@ -270,7 +271,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
1
,
&
num_kernels
,
0
,
1
,
&
num_kernels
,
0
,
num_kernels
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
,
num_kernels
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
,
height
,
width
,
kernel_h
,
kernel_w
,
height
,
width
,
kernel_h
,
kernel_w
,
dilation_h
,
dilation_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
height_col
,
dilation_h
,
dilation_w
,
pad_h
l
,
pad_wl
,
stride_h
,
stride_w
,
height_col
,
width_col
,
data_col
->
data
,
data_col
->
offset
);
width_col
,
data_col
->
data
,
data_col
->
offset
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -282,7 +283,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
...
@@ -282,7 +283,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
1
,
&
num_kernels
,
0
,
1
,
&
num_kernels
,
0
,
num_kernels
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
,
num_kernels
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
,
height
,
width
,
kernel_h
,
kernel_w
,
height
,
width
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
height_col
,
pad_h
l
,
pad_wl
,
stride_h
,
stride_w
,
height_col
,
width_col
,
data_col
->
data
,
data_col
->
offset
);
width_col
,
data_col
->
data
,
data_col
->
offset
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -296,12 +297,12 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
...
@@ -296,12 +297,12 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
int
col2im
(
GpuArray
*
data_col
,
const
size_t
channels
,
int
col2im
(
GpuArray
*
data_col
,
const
size_t
channels
,
const
size_t
height
,
const
size_t
width
,
const
size_t
patch_h
,
const
size_t
patch_w
,
const
size_t
height
,
const
size_t
width
,
const
size_t
patch_h
,
const
size_t
patch_w
,
const
size_t
dilation_h
,
const
size_t
dilation_w
,
const
size_t
dilation_h
,
const
size_t
dilation_w
,
const
size_t
pad_h
,
const
size_t
pad_w
,
const
size_t
stride_h
,
const
size_t
pad_h
l
,
const
size_t
pad_hr
,
const
size_t
pad_wl
,
const
size_t
pad_wr
,
const
size_t
stride_w
,
GpuArray
*
data_im
,
const
size_t
data_im_offset
)
{
const
size_t
stride_
h
,
const
size_t
stride_
w
,
GpuArray
*
data_im
,
const
size_t
data_im_offset
)
{
size_t
dil_patch_h
=
(
patch_h
-
1
)
*
dilation_h
+
1
;
size_t
dil_patch_h
=
(
patch_h
-
1
)
*
dilation_h
+
1
;
size_t
dil_patch_w
=
(
patch_w
-
1
)
*
dilation_w
+
1
;
size_t
dil_patch_w
=
(
patch_w
-
1
)
*
dilation_w
+
1
;
size_t
height_col
=
(
height
+
2
*
pad_h
-
dil_patch_h
)
/
stride_h
+
1
;
size_t
height_col
=
(
height
+
pad_hl
+
pad_hr
-
dil_patch_h
)
/
stride_h
+
1
;
size_t
width_col
=
(
width
+
2
*
pad_w
-
dil_patch_w
)
/
stride_w
+
1
;
size_t
width_col
=
(
width
+
pad_wl
+
pad_wr
-
dil_patch_w
)
/
stride_w
+
1
;
size_t
num_kernels
=
channels
*
height
*
width
;
size_t
num_kernels
=
channels
*
height
*
width
;
// To avoid involving atomic operations, we will launch one kernel per
// To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions.
// bottom dimension, and then in the kernel add up the top dimensions.
...
@@ -311,7 +312,7 @@ int col2im(GpuArray *data_col, const size_t channels,
...
@@ -311,7 +312,7 @@ int col2im(GpuArray *data_col, const size_t channels,
1
,
&
num_kernels
,
0
,
1
,
&
num_kernels
,
0
,
num_kernels
,
data_col
->
data
,
data_col
->
offset
,
num_kernels
,
data_col
->
data
,
data_col
->
offset
,
height
,
width
,
channels
,
patch_h
,
patch_w
,
height
,
width
,
channels
,
patch_h
,
patch_w
,
dilation_h
,
dilation_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
pad_h
l
,
pad_wl
,
stride_h
,
stride_w
,
height_col
,
width_col
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
);
height_col
,
width_col
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -323,7 +324,7 @@ int col2im(GpuArray *data_col, const size_t channels,
...
@@ -323,7 +324,7 @@ int col2im(GpuArray *data_col, const size_t channels,
1
,
&
num_kernels
,
0
,
1
,
&
num_kernels
,
0
,
num_kernels
,
data_col
->
data
,
data_col
->
offset
,
num_kernels
,
data_col
->
data
,
data_col
->
offset
,
height
,
width
,
channels
,
patch_h
,
patch_w
,
height
,
width
,
channels
,
patch_h
,
patch_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
pad_h
l
,
pad_wl
,
stride_h
,
stride_w
,
height_col
,
width_col
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
);
height_col
,
width_col
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -347,8 +348,10 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -347,8 +348,10 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
dW
=
1
,
const
size_t
dW
=
1
,
const
size_t
dilH
=
1
,
const
size_t
dilH
=
1
,
const
size_t
dilW
=
1
,
const
size_t
dilW
=
1
,
const
size_t
padH
=
0
,
const
size_t
padH_l
=
0
,
const
size_t
padW
=
0
,
const
size_t
padH_r
=
0
,
const
size_t
padW_l
=
0
,
const
size_t
padW_r
=
0
,
const
size_t
numgroups
=
1
,
const
size_t
numgroups
=
1
,
const
size_t
unshared
=
0
)
const
size_t
unshared
=
0
)
{
{
...
@@ -443,8 +446,8 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -443,8 +446,8 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
size_t
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
size_t
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
const
size_t
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
// top: (batchSize, nFilters, topHeight, topWidth)
// top: (batchSize, nFilters, topHeight, topWidth)
const
size_t
topHeightNoDH
=
(
bottomHeight
+
2
*
padH
-
dil_kH
);
const
size_t
topHeightNoDH
=
(
bottomHeight
+
padH_l
+
padH_r
-
dil_kH
);
const
size_t
topWidthNoDW
=
(
bottomWidth
+
2
*
padW
-
dil_kW
);
const
size_t
topWidthNoDW
=
(
bottomWidth
+
padW_l
+
padW_r
-
dil_kW
);
// the above values might be negative so we need to use Python-like
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
// note: this macro implements Python's // for negative x only
...
@@ -558,7 +561,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -558,7 +561,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
err
=
im2col
(
&
bottom
->
ga
,
n
*
batch_bottom_stride
,
err
=
im2col
(
&
bottom
->
ga
,
n
*
batch_bottom_stride
,
nChannels
,
bottomHeight
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
&
col
->
ga
);
padH
_l
,
padH_r
,
padW_l
,
padW_r
,
dH
,
dW
,
&
col
->
ga
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
Py_DECREF
(
col
);
Py_DECREF
(
col
);
return
NULL
;
return
NULL
;
...
@@ -618,7 +621,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -618,7 +621,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
err
=
im2col
(
&
bottom
->
ga
,
n
*
batch_bottom_stride
,
err
=
im2col
(
&
bottom
->
ga
,
n
*
batch_bottom_stride
,
nChannels
,
bottomHeight
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
&
col
->
ga
);
padH
_l
,
padH_r
,
padW_l
,
padW_r
,
dH
,
dW
,
&
col
->
ga
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
Py_DECREF
(
col
);
Py_DECREF
(
col
);
return
NULL
;
return
NULL
;
...
@@ -712,7 +715,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -712,7 +715,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
}
}
// col2im back to the data
// col2im back to the data
err
=
col2im
(
&
col
->
ga
,
nChannels
,
bottomHeight
,
bottomWidth
,
err
=
col2im
(
&
col
->
ga
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
kH
,
kW
,
dilH
,
dilW
,
padH
_l
,
padH_r
,
padW_l
,
padW_r
,
dH
,
dW
,
&
bottom
->
ga
,
n
*
batch_bottom_stride
);
dH
,
dW
,
&
bottom
->
ga
,
n
*
batch_bottom_stride
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
Py_DECREF
(
col
);
Py_DECREF
(
col
);
...
...
theano/gpuarray/tests/test_gemmcorr.py
浏览文件 @
e514f4d3
...
@@ -12,6 +12,7 @@ from ..type import gpuarray_shared_constructor
...
@@ -12,6 +12,7 @@ from ..type import gpuarray_shared_constructor
from
..blas
import
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
from
..blas
import
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
,
TestUnsharedConv
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
,
TestUnsharedConv
from
theano.tensor.nnet.tests.test_abstract_conv
import
TestAsymmetricPadding
class
TestCorrMM
(
unittest
.
TestCase
):
class
TestCorrMM
(
unittest
.
TestCase
):
...
@@ -272,3 +273,10 @@ class TestUnsharedGpuCorr2d(TestUnsharedConv):
...
@@ -272,3 +273,10 @@ class TestUnsharedGpuCorr2d(TestUnsharedConv):
conv2d_op
=
GpuCorrMM
conv2d_op
=
GpuCorrMM
conv2d_gradw_op
=
GpuCorrMM_gradWeights
conv2d_gradw_op
=
GpuCorrMM_gradWeights
conv2d_gradi_op
=
GpuCorrMM_gradInputs
conv2d_gradi_op
=
GpuCorrMM_gradInputs
class
TestAsymmetricGpu
(
TestAsymmetricPadding
):
mode
=
mode_with_gpu
.
excluding
(
'cudnn'
)
conv2d_op
=
GpuCorrMM
conv2d_gradw_op
=
GpuCorrMM_gradWeights
conv2d_gradi_op
=
GpuCorrMM_gradInputs
theano/tensor/nnet/corr.py
浏览文件 @
e514f4d3
...
@@ -195,7 +195,7 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -195,7 +195,7 @@ class BaseCorrMM(gof.OpenMPOp):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
# raise this whenever modifying any of the support_code_files
return
(
9
,
self
.
openmp
,
blas_header_version
())
return
(
10
,
self
.
openmp
,
blas_header_version
())
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
@@ -439,7 +439,7 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -439,7 +439,7 @@ class BaseCorrMM(gof.OpenMPOp):
break;
break;
case 1: // backprop wrt. weights
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights = (bottom +
2*pad
- (top - 1) * sample - 1) / dil + 1
// height and width: weights = (bottom +
pad_l + pad_r
- (top - 1) * sample - 1) / dil + 1
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
if (unshared){
if (unshared){
odim = 6;
odim = 6;
...
...
theano/tensor/nnet/tests/test_corr.py
浏览文件 @
e514f4d3
...
@@ -11,6 +11,7 @@ import theano.tensor as T
...
@@ -11,6 +11,7 @@ import theano.tensor as T
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.nnet
import
corr
,
conv
from
theano.tensor.nnet
import
corr
,
conv
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
,
TestUnsharedConv
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
,
TestUnsharedConv
from
theano.tensor.nnet.tests.test_abstract_conv
import
TestAsymmetricPadding
class
TestCorr2D
(
utt
.
InferShapeTester
):
class
TestCorr2D
(
utt
.
InferShapeTester
):
...
@@ -462,6 +463,16 @@ class TestUnsharedCorr2d(TestUnsharedConv):
...
@@ -462,6 +463,16 @@ class TestUnsharedCorr2d(TestUnsharedConv):
conv2d_gradi_op
=
corr
.
CorrMM_gradInputs
conv2d_gradi_op
=
corr
.
CorrMM_gradInputs
class
TestAsymmetricCorr
(
TestAsymmetricPadding
):
if
theano
.
config
.
mode
==
"FAST_COMPILE"
:
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
.
excluding
(
'gpuarray'
)
else
:
mode
=
None
conv2d_op
=
corr
.
CorrMM
conv2d_gradw_op
=
corr
.
CorrMM_gradWeights
conv2d_gradi_op
=
corr
.
CorrMM_gradInputs
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
t
=
TestCorr2D
(
'setUp'
)
t
=
TestCorr2D
(
'setUp'
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论