Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
30dd8bdc
提交
30dd8bdc
authored
11月 26, 2016
作者:
Gijs van Tulder
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
CorrMM should use height/width if it is given.
上级
676cf29d
隐藏空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
103 行增加
和
179 行删除
+103
-179
blas.py
theano/gpuarray/blas.py
+14
-55
corr3d_gemm.c
theano/gpuarray/corr3d_gemm.c
+11
-3
corr_gemm.c
theano/gpuarray/corr_gemm.c
+9
-2
blas.py
theano/sandbox/cuda/blas.py
+14
-54
corr3d_gemm.cu
theano/sandbox/cuda/corr3d_gemm.cu
+11
-3
corr_gemm.cu
theano/sandbox/cuda/corr_gemm.cu
+9
-2
corr.py
theano/tensor/nnet/corr.py
+7
-25
corr3d.py
theano/tensor/nnet/corr3d.py
+8
-30
corr3d_gemm.c
theano/tensor/nnet/corr3d_gemm.c
+11
-3
corr_gemm.c
theano/tensor/nnet/corr_gemm.c
+9
-2
没有找到文件。
theano/gpuarray/blas.py
浏览文件 @
30dd8bdc
...
...
@@ -528,7 +528,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def
c_code_cache_version
(
self
):
# Raise this whenever modifying the code below.
return
(
3
,)
return
(
4
,)
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
"""
...
...
@@ -650,8 +650,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
kW = PyGpuArray_DIMS(weights)[3];
}
else {
if (
(dH != 1) || (padH == -1)
) {
//
vertical subsampling or half padding, kernel height is specified
if (
%(height)
s != -1
) {
//
kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
}
else if (padH == -2) {
...
...
@@ -662,7 +662,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
// explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
}
if (
(dW != 1) || (padW == -1)
) {
if (
%(width)
s != -1
) {
kW =
%(width)
s;
}
else if (padW == -2) {
...
...
@@ -671,15 +671,6 @@ class BaseGpuCorrMM(CGpuKernelBase):
else {
kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
if ((
%(height)
s != -1 &&
%(height)
s != kH) ||
(
%(width)
s != -1 &&
%(width)
s != kW))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorrMM: computed kernel shape
%%
lldx
%%
lld "
"does not match given shape
%%
lldx
%%
lld",
(long long)kH, (long long)kW, (long long)
%(height)
s, (long long)
%(width)
s);
%(fail)
s
}
}
// Implicit dilated kernel size
...
...
@@ -738,20 +729,10 @@ class BaseGpuCorrMM(CGpuKernelBase):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[1];
out_dim[2] = (
dH !=
1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (
dW !=
1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[2] = (
%(height)
s != -
1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (
%(width)
s != -
1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_typecode = top->ga.typecode;
out_context = top->context;
if ((
%(height)
s != -1 &&
%(height)
s != out_dim[2]) ||
(
%(width)
s != -1 &&
%(width)
s != out_dim[3]))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorrMM: computed output shape
%%
lldx
%%
lld "
"does not match given shape
%%
lldx
%%
lld",
(long long)out_dim[2], (long long)out_dim[3],
(long long)
%(height)
s, (long long)
%(width)
s);
%(fail)
s
}
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2
\\
n");
...
...
@@ -1105,7 +1086,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
def
c_code_cache_version
(
self
):
# raise this whenever modifying the code below.
return
(
3
,)
return
(
4
,)
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
,
depth
=
None
):
...
...
@@ -1245,8 +1226,8 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
kD = PyGpuArray_DIMS(weights)[4];
}
else {
if (
(dH != 1) || (padH == -1)
) {
//
vertical subsampling or half padding, kernel height is specified
if (
%(height)
s != -1
) {
//
kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
}
else if (padH == -2) {
...
...
@@ -1257,7 +1238,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
// explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
}
if (
(dW != 1) || (padW == -1)
) {
if (
%(width)
s != -1
) {
kW =
%(width)
s;
}
else if (padW == -2) {
...
...
@@ -1266,7 +1247,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
else {
kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
if (
(dD != 1) || (padD == -1)
) {
if (
%(depth)
s != -1
) {
kD =
%(depth)
s;
}
else if (padD == -2) {
...
...
@@ -1275,17 +1256,6 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
else {
kD = (PyGpuArray_DIMS(bottom)[4] + 2*padD - (PyGpuArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1;
}
if ((
%(height)
s != -1 &&
%(height)
s != kH) ||
(
%(width)
s != -1 &&
%(width)
s != kW) ||
(
%(depth)
s != -1 &&
%(depth)
s != kD))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorr3dMM: computed kernel shape
%%
lldx
%%
lldx
%%
lld "
"does not match given shape
%%
lldx
%%
lldx
%%
lld",
(long long)kH, (long long)kW, (long long)kD,
(long long)
%(height)
s, (long long)
%(width)
s, (long long)
%(depth)
s);
%(fail)
s
}
}
// Implicit dilated kernel size
...
...
@@ -1357,22 +1327,11 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
// height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[1];
out_dim[2] = (
dH !=
1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (
dW !=
1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (
dD !=
1) ?
%(depth)
s : (PyGpuArray_DIMS(top)[4] - 1) * dD + (PyGpuArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
out_dim[2] = (
%(height)
s != -
1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (
%(width)
s != -
1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (
%(depth)
s != -
1) ?
%(depth)
s : (PyGpuArray_DIMS(top)[4] - 1) * dD + (PyGpuArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
out_typecode = top->ga.typecode;
out_context = top->context;
if ((
%(height)
s != -1 &&
%(height)
s != out_dim[2]) ||
(
%(width)
s != -1 &&
%(width)
s != out_dim[3]) ||
(
%(depth)
s != -1 &&
%(depth)
s != out_dim[4]))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorr3dMM: computed output shape
%%
lldx
%%
lldx
%%
lld "
"does not match given shape
%%
lldx
%%
lldx
%%
lld",
(long long)out_dim[2], (long long)out_dim[3], (long long)out_dim[4],
(long long)
%(height)
s, (long long)
%(width)
s, (long long)
%(depth)
s);
%(fail)
s
}
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2
\\
n");
...
...
theano/gpuarray/corr3d_gemm.c
浏览文件 @
30dd8bdc
...
...
@@ -425,9 +425,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
const
size_t
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
const
size_t
dil_kD
=
(
kD
-
1
)
*
dilD
+
1
;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const
size_t
topHeight
=
(
bottomHeight
+
2
*
padH
-
dil_kH
)
/
dH
+
1
;
const
size_t
topWidth
=
(
bottomWidth
+
2
*
padW
-
dil_kW
)
/
dW
+
1
;
const
size_t
topDepth
=
(
bottomDepth
+
2
*
padD
-
dil_kD
)
/
dD
+
1
;
const
size_t
topHeightNoDH
=
(
bottomHeight
+
2
*
padH
-
dil_kH
);
const
size_t
topWidthNoDW
=
(
bottomWidth
+
2
*
padW
-
dil_kW
);
const
size_t
topDepthNoDD
=
(
bottomDepth
+
2
*
padD
-
dil_kD
);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const
size_t
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
size_t
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
const
size_t
topDepth
=
_CONV_FLOORDIV_X
(
topDepthNoDD
,
dD
)
+
1
;
#undef _CONV_FLOORDIV
if
(
batchSize
!=
PyGpuArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyGpuArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyGpuArray_DIMS
(
top
)[
2
]
||
...
...
theano/gpuarray/corr_gemm.c
浏览文件 @
30dd8bdc
...
...
@@ -360,8 +360,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
size_t
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
// top: (batchSize, nFilters, topHeight, topWidth)
const
size_t
topHeight
=
(
bottomHeight
+
2
*
padH
-
dil_kH
)
/
dH
+
1
;
const
size_t
topWidth
=
(
bottomWidth
+
2
*
padW
-
dil_kW
)
/
dW
+
1
;
const
size_t
topHeightNoDH
=
(
bottomHeight
+
2
*
padH
-
dil_kH
);
const
size_t
topWidthNoDW
=
(
bottomWidth
+
2
*
padW
-
dil_kW
);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const
size_t
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
size_t
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
#undef _CONV_FLOORDIV
if
(
batchSize
!=
PyGpuArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyGpuArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyGpuArray_DIMS
(
top
)[
2
]
||
...
...
theano/sandbox/cuda/blas.py
浏览文件 @
30dd8bdc
...
...
@@ -922,7 +922,7 @@ class BaseGpuCorrMM(GpuOp):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
return
(
0
,
2
7
)
return
(
0
,
2
8
)
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
...
@@ -1042,8 +1042,8 @@ class BaseGpuCorrMM(GpuOp):
kW = CudaNdarray_HOST_DIMS(weights)[3];
}
else {
if (
(dH != 1) || (padH == -1)
) {
//
vertical subsampling or half padding, kernel height is specified
if (
%(height)
s != -1
) {
//
kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
}
else if (padH == -2) {
...
...
@@ -1054,7 +1054,7 @@ class BaseGpuCorrMM(GpuOp):
// explicit padding, we can infer the kernel height
kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ;
}
if (
(dW != 1) || (padW == -1)
) {
if (
%(width)
s != -1
) {
kW =
%(width)
s;
}
else if (padW == -2) {
...
...
@@ -1063,15 +1063,6 @@ class BaseGpuCorrMM(GpuOp):
else {
kW = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
if ((
%(height)
s != -1 &&
%(height)
s != kH) ||
(
%(width)
s != -1 &&
%(width)
s != kW))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorrMM: computed kernel shape
%%
dx
%%
d "
"does not match given shape
%%
dx
%%
d",
kH, kW,
%(height)
s,
%(width)
s);
%(fail)
s
}
}
// Implicit dilated kernel size
...
...
@@ -1124,18 +1115,8 @@ class BaseGpuCorrMM(GpuOp):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[2] = (dH != 1) ?
%(height)
s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ?
%(width)
s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
if ((
%(height)
s != -1 &&
%(height)
s != out_dim[2]) ||
(
%(width)
s != -1 &&
%(width)
s != out_dim[3]))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorrMM: computed output shape
%%
dx
%%
d "
"does not match given shape
%%
dx
%%
d",
out_dim[2], out_dim[3],
%(height)
s,
%(width)
s);
%(fail)
s
}
out_dim[2] = (
%(height)
s != -1) ?
%(height)
s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (
%(width)
s != -1) ?
%(width)
s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2
\\
n");
...
...
@@ -1500,7 +1481,7 @@ class BaseGpuCorr3dMM(GpuOp):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
return
(
0
,
2
6
)
return
(
0
,
2
7
)
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
...
@@ -1642,9 +1623,9 @@ class BaseGpuCorr3dMM(GpuOp):
}
else
{
if (
(dH != 1) || (padH == -1)
)
if (
%(height)
s != -1
)
{
//
vertical subsampling or half padding, kernel height is specified
//
kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
}
else if (padH == -2)
...
...
@@ -1657,7 +1638,7 @@ class BaseGpuCorr3dMM(GpuOp):
// explicit padding, we can infer the kernel height
kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ;
}
if (
(dW != 1) || (padW == -1)
)
if (
%(width)
s != -1
)
{
kW =
%(width)
s;
}
...
...
@@ -1669,7 +1650,7 @@ class BaseGpuCorr3dMM(GpuOp):
{
kW = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
if (
(dD != 1) || (padD == -1)
)
if (
%(depth)
s != -1
)
{
kD =
%(depth)
s;
}
...
...
@@ -1681,16 +1662,6 @@ class BaseGpuCorr3dMM(GpuOp):
{
kD = (CudaNdarray_HOST_DIMS(bottom)[4] + 2*padD - (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD - 1) / dilD+ 1;
}
if ((
%(height)
s != -1 &&
%(height)
s != kH) ||
(
%(width)
s != -1 &&
%(width)
s != kW) ||
(
%(depth)
s != -1 &&
%(depth)
s != kD))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorr3dMM: computed kernel shape
%%
dx
%%
dx
%%
d "
"does not match given shape
%%
dx
%%
dx
%%
d",
kH, kW, kD,
%(height)
s,
%(width)
s,
%(depth)
s);
%(fail)
s
}
}
// Implicit dilated kernel size
...
...
@@ -1763,20 +1734,9 @@ class BaseGpuCorr3dMM(GpuOp):
// height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[2] = (dH != 1) ?
%(height)
s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ?
%(width)
s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (dD != 1) ?
%(depth)
s : (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD + (CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
if ((
%(height)
s != -1 &&
%(height)
s != out_dim[2]) ||
(
%(width)
s != -1 &&
%(width)
s != out_dim[3]) ||
(
%(depth)
s != -1 &&
%(depth)
s != out_dim[4]))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorr3dMM: computed output shape
%%
dx
%%
dx
%%
d "
"does not match given shape
%%
dx
%%
dx
%%
d",
out_dim[2], out_dim[3], out_dim[4],
%(height)
s,
%(width)
s,
%(depth)
s);
%(fail)
s
}
out_dim[2] = (
%(height)
s != -1) ?
%(height)
s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (
%(width)
s != -1) ?
%(width)
s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (
%(depth)
s != -1) ?
%(depth)
s : (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD + (CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2
\\
n");
...
...
theano/sandbox/cuda/corr3d_gemm.cu
浏览文件 @
30dd8bdc
...
...
@@ -429,9 +429,17 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const int topHeight = int((bottomHeight + 2*padH - dil_kH) / dH) + 1;
const int topWidth = int((bottomWidth + 2*padW - dil_kW) / dW) + 1;
const int topDepth = int((bottomDepth + 2*padD - dil_kD) / dD) + 1;
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const int topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const int topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
...
...
theano/sandbox/cuda/corr_gemm.cu
浏览文件 @
30dd8bdc
...
...
@@ -333,8 +333,15 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
...
...
theano/tensor/nnet/corr.py
浏览文件 @
30dd8bdc
...
...
@@ -123,7 +123,7 @@ class BaseCorrMM(gof.OpenMPOp):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
return
(
2
,
self
.
openmp
,
blas_header_version
())
return
(
3
,
self
.
openmp
,
blas_header_version
())
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
...
@@ -275,8 +275,8 @@ class BaseCorrMM(gof.OpenMPOp):
kW = PyArray_DIMS(weights)[3];
}
else {
if (
(dH != 1) || (padH == -1)
) {
//
vertical subsampling or half padding, kernel height is specified
if (
%(height)
s != -1
) {
//
kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
}
else if (padH == -2) {
...
...
@@ -287,7 +287,8 @@ class BaseCorrMM(gof.OpenMPOp):
// explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
}
if ((dW != 1) || (padW == -1)) {
if (
%(width)
s != -1) {
// kernel width is specified (perhaps horizontal subsampling or half padding)
kW =
%(width)
s;
}
else if (padW == -2) {
...
...
@@ -296,15 +297,6 @@ class BaseCorrMM(gof.OpenMPOp):
else {
kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
if ((
%(height)
s != -1 &&
%(height)
s != kH) ||
(
%(width)
s != -1 &&
%(width)
s != kW))
{
PyErr_Format(PyExc_ValueError,
"BaseCorrMM: computed kernel shape
%%
lldx
%%
lld "
"does not match given shape
%%
lldx
%%
lld",
(long long)kH, (long long)kW, (long long)
%(height)
s, (long long)
%(width)
s);
%(fail)
s
}
}
// Implicit dilated kernel size
...
...
@@ -357,18 +349,8 @@ class BaseCorrMM(gof.OpenMPOp):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
if ((
%(height)
s != -1 &&
%(height)
s != out_dim[2]) ||
(
%(width)
s != -1 &&
%(width)
s != out_dim[3]))
{
PyErr_Format(PyExc_ValueError,
"BaseCorrMM: computed output shape
%%
lldx
%%
lld "
"does not match given shape
%%
lldx
%%
lld",
(long long)out_dim[2], (long long)out_dim[3],
(long long)
%(height)
s, (long long)
%(width)
s);
%(fail)
s
}
out_dim[2] = (npy_intp)((
%(height)
s != -1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((
%(width)
s != -1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2
\\
n");
...
...
theano/tensor/nnet/corr3d.py
浏览文件 @
30dd8bdc
...
...
@@ -123,7 +123,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
return
(
2
,
self
.
openmp
,
blas_header_version
())
return
(
3
,
self
.
openmp
,
blas_header_version
())
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
...
@@ -292,8 +292,8 @@ class BaseCorr3dMM(gof.OpenMPOp):
kD = PyArray_DIMS(weights)[4];
}
else {
if (
(dH != 1) || (padH == -1)
) {
//
vertical subsampling or half padding, kernel height is specified
if (
%(height)
s != -1
) {
//
kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
}
else if (padH == -2) {
...
...
@@ -304,7 +304,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
// explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
}
if (
(dW != 1) || (padW == -1)
) {
if (
%(width)
s != -1
) {
kW =
%(width)
s;
}
else if (padW == -2) {
...
...
@@ -313,7 +313,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
else {
kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
if (
(dD != 1) || (padD == -1)
) {
if (
%(depth)
s != -1
) {
kD =
%(depth)
s;
}
else if (padD == -2) {
...
...
@@ -322,17 +322,6 @@ class BaseCorr3dMM(gof.OpenMPOp):
else {
kD = (PyArray_DIMS(bottom)[4] + 2*padD - (PyArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1;
}
if ((
%(height)
s != -1 &&
%(height)
s != kH) ||
(
%(width)
s != -1 &&
%(width)
s != kW) ||
(
%(depth)
s != -1 &&
%(depth)
s != kD))
{
PyErr_Format(PyExc_ValueError,
"BaseCorr3dMM: computed kernel shape
%%
lldx
%%
lldx
%%
lld "
"does not match given shape
%%
lldx
%%
lldx
%%
lld",
(long long)kH, (long long)kW, (long long)kD,
(long long)
%(height)
s, (long long)
%(width)
s, (long long)
%(depth)
s);
%(fail)
s
}
}
// Implicit dilated kernel size
...
...
@@ -398,20 +387,9 @@ class BaseCorr3dMM(gof.OpenMPOp):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[4] = (npy_intp)((dD != 1) ?
%(depth)
s : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD);
if ((
%(height)
s != -1 &&
%(height)
s != out_dim[2]) ||
(
%(width)
s != -1 &&
%(width)
s != out_dim[3]) ||
(
%(depth)
s != -1 &&
%(depth)
s != out_dim[4]))
{
PyErr_Format(PyExc_ValueError,
"BaseCorr3dMM: computed output shape
%%
lldx
%%
lldx
%%
lld "
"does not match given shape
%%
lldx
%%
lldx
%%
lld",
(long long)out_dim[2], (long long)out_dim[3], (long long)out_dim[4],
(long long)
%(height)
s, (long long)
%(width)
s, (long long)
%(depth)
s);
%(fail)
s
}
out_dim[2] = (npy_intp)((
%(height)
s != -1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((
%(width)
s != -1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[4] = (npy_intp)((
%(depth)
s != -1) ?
%(depth)
s : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD);
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: direction must be 0, 1, or 2
\\
n");
...
...
theano/tensor/nnet/corr3d_gemm.c
浏览文件 @
30dd8bdc
...
...
@@ -188,9 +188,17 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
const
int
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
const
int
dil_kD
=
(
kD
-
1
)
*
dilD
+
1
;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const
int
topHeight
=
(
bottomHeight
+
2
*
padH
-
dil_kH
)
/
dH
+
1
;
const
int
topWidth
=
(
bottomWidth
+
2
*
padW
-
dil_kW
)
/
dW
+
1
;
const
int
topDepth
=
(
bottomDepth
+
2
*
padD
-
dil_kD
)
/
dD
+
1
;
const
int
topHeightNoDH
=
(
bottomHeight
+
2
*
padH
-
dil_kH
);
const
int
topWidthNoDW
=
(
bottomWidth
+
2
*
padW
-
dil_kW
);
const
int
topDepthNoDD
=
(
bottomDepth
+
2
*
padD
-
dil_kD
);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const
int
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
int
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
const
int
topDepth
=
_CONV_FLOORDIV_X
(
topDepthNoDD
,
dD
)
+
1
;
#undef _CONV_FLOORDIV
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
...
...
theano/tensor/nnet/corr_gemm.c
浏览文件 @
30dd8bdc
...
...
@@ -164,8 +164,15 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
int
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
// top: (batchSize, nFilters, topHeight, topWidth)
const
int
topHeight
=
(
bottomHeight
+
2
*
padH
-
dil_kH
)
/
dH
+
1
;
const
int
topWidth
=
(
bottomWidth
+
2
*
padW
-
dil_kW
)
/
dW
+
1
;
const
int
topHeightNoDH
=
(
bottomHeight
+
2
*
padH
-
dil_kH
);
const
int
topWidthNoDW
=
(
bottomWidth
+
2
*
padW
-
dil_kW
);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const
int
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
int
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
#undef _CONV_FLOORDIV
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论