Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
9117f696
提交
9117f696
authored
6月 02, 2009
作者:
bergstra@ip05.m
浏览文件
操作
浏览文件
下载
差异文件
merge
上级
d9ee20e6
a128912a
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
192 行增加
和
433 行删除
+192
-433
conv.py
theano/sandbox/conv.py
+51
-291
test_conv.py
theano/sandbox/test_conv.py
+141
-142
没有找到文件。
theano/sandbox/conv.py
浏览文件 @
9117f696
...
@@ -16,9 +16,6 @@ class ConvOp(Op):
...
@@ -16,9 +16,6 @@ class ConvOp(Op):
In development.
In development.
"""
"""
__attrnames
=
[
'imshp'
,
'kshp'
,
'nkern'
,
'bsize'
,
'dx'
,
'dy'
,
'out_mode'
]
"""These attributes uniquely identify the behaviour of this op for given inputs"""
def
__init__
(
self
,
imshp
,
kshp
,
nkern
,
bsize
,
dx
,
dy
,
output_mode
=
'valid'
,
unroll_batch
=
0
,
unroll_kern
=
0
):
def
__init__
(
self
,
imshp
,
kshp
,
nkern
,
bsize
,
dx
,
dy
,
output_mode
=
'valid'
,
unroll_batch
=
0
,
unroll_kern
=
0
):
"""
"""
unroll_batch. If >0 will use a version that will unroll the batch loop by the value of the option. By default don't use this version of the code.
unroll_batch. If >0 will use a version that will unroll the batch loop by the value of the option. By default don't use this version of the code.
...
@@ -40,7 +37,7 @@ class ConvOp(Op):
...
@@ -40,7 +37,7 @@ class ConvOp(Op):
self
.
unroll_batch
=
unroll_batch
self
.
unroll_batch
=
unroll_batch
self
.
unroll_kern
=
unroll_kern
self
.
unroll_kern
=
unroll_kern
assert
not
(
unroll_batch
>
0
and
unroll_kern
>
0
)
if
self
.
unroll_batch
>
0
and
self
.
bsize
%
self
.
unroll_batch
!=
0
:
if
self
.
unroll_batch
>
0
and
self
.
bsize
%
self
.
unroll_batch
!=
0
:
raise
Exception
(
"unroll_batch(
%
s) should be 0 or a multiple of bsize(
%
s)"
%
(
str
(
self
.
unroll_batch
),
str
(
self
.
bsize
)))
raise
Exception
(
"unroll_batch(
%
s) should be 0 or a multiple of bsize(
%
s)"
%
(
str
(
self
.
unroll_batch
),
str
(
self
.
bsize
)))
if
self
.
unroll_kern
>
0
and
self
.
nkern
%
unroll_kern
!=
0
:
if
self
.
unroll_kern
>
0
and
self
.
nkern
%
unroll_kern
!=
0
:
...
@@ -54,24 +51,12 @@ class ConvOp(Op):
...
@@ -54,24 +51,12 @@ class ConvOp(Op):
raise
Exception
(
"Mode
%
s not implemented"
%
self
.
out_mode
)
raise
Exception
(
"Mode
%
s not implemented"
%
self
.
out_mode
)
assert
(
self
.
outshp
>=
0
)
.
all
()
assert
(
self
.
outshp
>=
0
)
.
all
()
hashval
=
hash
(
type
(
self
))
for
a
in
self
.
__attrnames
:
hashval
=
hashval
^
hash
(
getattr
(
self
,
a
))
self
.
__hashval
=
hashval
def
__eq__
(
self
,
other
):
# def __eq__(self, other):
if
type
(
self
)
!=
type
(
other
):
# raise Error("Not implemented")
return
False
for
a
in
self
.
__attrnames
:
if
getattr
(
self
,
a
)
!=
getattr
(
other
,
a
):
return
False
return
True
def
__hash__
(
self
):
# def __hash__(self):
return
self
.
__hashval
# raise Error("Not implemented")
def
__str__
(
self
):
return
"ConvOp{"
+
","
.
join
(
str
((
a
,
getattr
(
self
,
a
)))
for
a
in
self
.
__attrnames
)
+
"}"
def
make_node
(
self
,
inputs
,
kerns
):
def
make_node
(
self
,
inputs
,
kerns
):
# TODO: find a way to make ConvOp work for N-D (after NIPS09)
# TODO: find a way to make ConvOp work for N-D (after NIPS09)
...
@@ -190,12 +175,10 @@ using namespace std;
...
@@ -190,12 +175,10 @@ using namespace std;
if
node
.
inputs
[
0
]
.
type
.
dtype
==
"float32"
:
d
[
"type"
]
=
"float"
if
node
.
inputs
[
0
]
.
type
.
dtype
==
"float32"
:
d
[
"type"
]
=
"float"
elif
node
.
inputs
[
0
]
.
type
.
dtype
==
"float64"
:
d
[
"type"
]
=
"double"
elif
node
.
inputs
[
0
]
.
type
.
dtype
==
"float64"
:
d
[
"type"
]
=
"double"
else
:
raise
Exception
(
"Type
%
s not implemented"
%
node
.
inputs
[
0
]
.
type
.
dtype
)
else
:
raise
Exception
(
"Type
%
s not implemented"
%
node
.
inputs
[
0
]
.
type
.
dtype
)
if
self
.
unroll_kern
>
0
:
if
self
.
unroll_kern
>
0
and
self
.
unroll_batch
>
0
:
print
"return unrolled kern code by"
,
self
.
unroll_kern
print
"return unrolled batch and kern code by"
,
self
.
unroll_batch
,
self
.
unroll_kern
return
gen_conv_code_unroll_kern
(
d
,
self
.
unroll_kern
)
return
gen_conv_code_unroll_batch_kern
(
d
,
self
.
unroll_batch
,
if
self
.
unroll_batch
>
0
:
self
.
unroll_kern
)
print
"return unrolled batch code by"
,
self
.
unroll_batch
return
gen_conv_code_unroll_batch
(
d
,
self
.
unroll_batch
)
#TODO: should we choose the unroll size automatically with the bigger divisor under 5? under 10?
#TODO: should we choose the unroll size automatically with the bigger divisor under 5? under 10?
if
self
.
out_mode
==
'valid'
:
if
self
.
out_mode
==
'valid'
:
...
@@ -648,17 +631,29 @@ free(kbuf);
...
@@ -648,17 +631,29 @@ free(kbuf);
Py_XDECREF(img2d);
Py_XDECREF(img2d);
"""
"""
def
gen_conv_code_unroll_batch_kern
(
d
,
unroll_bsize
=
1
,
unroll_ksize
=
1
):
def
gen_conv_code_unroll_batch
(
d
,
unloop_size
=
1
):
""" c_code for ConvOp that unroll the batch size loop
""" c_code for ConvOp that unroll the batch size loop
"""
"""
d
[
"unloop_size"
]
=
unloop_size
assert
unroll_bsize
>
0
and
unroll_ksize
>
0
def
my_dup
(
st
):
d
[
"unroll_bsize"
]
=
unroll_bsize
d
[
"unroll_ksize"
]
=
unroll_ksize
def
my_dup
(
st
,
size
):
s
=
""
s
=
""
for
i
in
range
(
unloop_
size
):
for
i
in
range
(
size
):
d
[
"un
loop
_iter"
]
=
i
d
[
"un
roll
_iter"
]
=
i
s
+=
st
%
d
s
+=
st
%
d
return
s
return
s
+
"
\n
"
def
my_dup2
(
st
):
s
=
""
iter
=
0
for
i
in
range
(
unroll_bsize
):
d
[
"unroll_biter"
]
=
i
for
j
in
range
(
unroll_ksize
):
d
[
"unroll_kiter"
]
=
j
d
[
"unroll_iter"
]
=
iter
iter
+=
1
s
+=
st
%
d
return
s
+
"
\n
"
ret
=
"""
ret
=
"""
int mode=-1,typenum=0, typenum_f=0;
int mode=-1,typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
...
@@ -775,8 +770,8 @@ if ((!%(z)s)
...
@@ -775,8 +770,8 @@ if ((!%(z)s)
int Os[2];
int Os[2];
if (mode == FULL) {Os[0] = dim_im[0]+dim_ker[0]-1; Os[1] = dim_im[1]+dim_ker[1]-1;}
if (mode == FULL) {Os[0] = dim_im[0]+dim_ker[0]-1; Os[1] = dim_im[1]+dim_ker[1]-1;}
else {Os[0] = dim_im[0]-dim_ker[0]+1; Os[1] = dim_im[1]-dim_ker[1]+1;}
else {Os[0] = dim_im[0]-dim_ker[0]+1; Os[1] = dim_im[1]-dim_ker[1]+1;}
for(int b=0;b<
%(self_bsize)
s ;b+=
%(un
loop_
size)
s){
for(int b=0;b<
%(self_bsize)
s ;b+=
%(un
roll_b
size)
s){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern+
+
){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern+
=
%(unroll_ksize)
s
){
//assertions
//assertions
if (
%(z)
s->strides[0] !=
%(z)
s->dimensions[1] *
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[0] !=
%(z)
s->dimensions[1] *
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
...
@@ -784,14 +779,14 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
...
@@ -784,14 +779,14 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
if (
%(z)
s->strides[2] !=
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[2] !=
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[3] != sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[3] != sizeof(
%(type)
s))
%(fail)
s;
"""
%
d
"""
%
d
ret
+=
my_dup
(
"
%(type)
s * __restrict__ out
%(unloop_iter)
s=(
%(type)
s *)(PyArray_GETPTR2(
%(z)
s,b+
%(unloop_iter)
s,n_kern));
\n
"
)
ret
+=
my_dup
2
(
"
%(type)
s * __restrict__ out
%(unroll_iter)
s=(
%(type)
s *)(PyArray_GETPTR2(
%(z)
s,b+
%(unroll_biter)
s,n_kern+
%(unroll_kiter)
s));
"
)
ret
+=
my_dup
(
"for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out
%(un
loop_iter)
s[i] = 0;"
)
ret
+=
my_dup
(
"for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out
%(un
roll_iter)
s[i] = 0;"
,
unroll_bsize
*
unroll_ksize
)
ret
+=
"""
ret
+=
"""
for(int stack_size=0;stack_size<
%(self_imshp0)
s;stack_size++){
for(int stack_size=0;stack_size<
%(self_imshp0)
s;stack_size++){
"""
%
d
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s * __restrict__ in
%(unloop_iter)
d=(
%(type)
s *)(PyArray_GETPTR2(img2d,b+
%(unloop_iter)
s,stack_size));
\n
"
)
ret
+=
my_dup
(
"const
%(type)
s * __restrict__ in
%(unroll_iter)
d=(
%(type)
s *)(PyArray_GETPTR2(img2d,b+
%(unroll_iter)
s,stack_size));"
,
unroll_bsize
)
ret
+=
my_dup
(
"const
%(type)
s * __restrict__ hvals
%(unroll_iter)
s=(
%(type)
s *)(PyArray_GETPTR2(filtersflipped,n_kern+
%(unroll_iter)
s,stack_size));"
,
unroll_ksize
)
ret
+=
"""
ret
+=
"""
const
%(type)
s * __restrict__ hvals=(
%(type)
s *)(PyArray_GETPTR2(filtersflipped,n_kern,stack_size));
int new_m;
int new_m;
...
@@ -802,7 +797,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
...
@@ -802,7 +797,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
for (int n=0; n < Os[1]; n++) { // loop over columns
for (int n=0; n < Os[1]; n++) { // loop over columns
"""
%
d
"""
%
d
ret
+=
my_dup
(
"
%(type)
s sum
%(un
loop_iter)
s=0;
\n
"
)
ret
+=
my_dup
(
"
%(type)
s sum
%(un
roll_iter)
s=0;"
,
unroll_bsize
*
unroll_ksize
)
ret
+=
"""
ret
+=
"""
// Sum over kernel, if index into image is out of bounds
// Sum over kernel, if index into image is out of bounds
...
@@ -811,12 +806,14 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
...
@@ -811,12 +806,14 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
int ind0 = (new_m-j);
int ind0 = (new_m-j);
if(mode==FULL){
if(mode==FULL){
const
%(type)
s * idx_hvals=&hvals[j*dim_ker[1]];
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s * idx_hvals
%(unroll_iter)
s=&hvals
%(unroll_iter)
s[j*dim_ker[1]];"
,
unroll_ksize
)
ret
+=
"""
if(ind0 < 0 || ind0 >= dim_im[0]){
if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0)
if(fill_value!=0)
for (int k=0; k < dim_ker[1]; k++) {
for (int k=0; k < dim_ker[1]; k++) {
"""
%
d
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s+= idx_hvals[k] * fill_value;
\n
"
)
ret
+=
my_dup
2
(
"sum
%(unroll_iter)
s += idx_hvals
%(unroll_kiter)
s[k] * fill_value;
"
)
ret
+=
"""
ret
+=
"""
}
}
}else{
}else{
...
@@ -827,7 +824,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
...
@@ -827,7 +824,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
for(k=0;k<max_k;k++){
for(k=0;k<max_k;k++){
"""
%
d
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s+= idx_hvals[k] * fill_value;
\n
"
)
ret
+=
my_dup
2
(
"sum
%(unroll_iter)
s += idx_hvals
%(unroll_kiter)
s[k] * fill_value;
"
)
ret
+=
"""
ret
+=
"""
}
}
}else {k=max_k;}
}else {k=max_k;}
...
@@ -835,285 +832,48 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
...
@@ -835,285 +832,48 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
//do the part where the kernel is on the img
//do the part where the kernel is on the img
max_k=min(n+1,(int)dim_ker[1]);
max_k=min(n+1,(int)dim_ker[1]);
"""
%
d
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s * idx_in
%(un
loop_iter)
s=&in
%(unloop_iter)
s[ind0*dim_im[1]];
\n
"
)
ret
+=
my_dup
(
"const
%(type)
s * idx_in
%(un
roll_iter)
s=&in
%(unroll_iter)
s[ind0*dim_im[1]];"
,
unroll_bsize
)
ret
+=
"""
ret
+=
"""
for (int ind1=n-k; k<max_k; k++,ind1--) {
for (int ind1=n-k; k<max_k; k++,ind1--) {
"""
%
d
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s+= idx_hvals[k] * idx_in
%(unloop_iter)
s[ind1];
\n
"
)
ret
+=
my_dup
2
(
"sum
%(unroll_iter)
s+= idx_hvals
%(unroll_kiter)
s[k] * idx_in
%(unroll_biter)
s[ind1];
"
)
ret
+=
"""
ret
+=
"""
}
}
//do the part to the left of the img
//do the part to the left of the img
if(fill_value!=0)
if(fill_value!=0)
for(;k<dim_ker[1];k++){
for(;k<dim_ker[1];k++){
"""
%
d
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s+= idx_hvals[k] * fill_value;
\n
"
)
ret
+=
my_dup
2
(
"sum
%(unroll_iter)
s += idx_hvals
%(unroll_kiter)
s[k] * fill_value;
"
)
ret
+=
"""
ret
+=
"""
}
}
}
}
}else{
}else{
//valid mode
"""
%
d
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s* idx_in
%(unloop_iter)
s=&in
%(unloop_iter)
s[ind0*dim_im[1]];
\n
"
)
ret
+=
my_dup
(
"const
%(type)
s* idx_in
%(unroll_iter)
s=&in
%(unroll_iter)
s[ind0*dim_im[1]];"
,
unroll_bsize
)
ret
+=
my_dup
(
"const
%(type)
s* idx_hvals
%(unroll_iter)
s=&hvals
%(unroll_iter)
s[j*dim_ker[1]];"
,
unroll_ksize
)
ret
+=
"""
ret
+=
"""
const
%(type)
s* idx_hvals=&hvals[j*dim_ker[1]];
int new_n = (n+dim_ker[1]-1);
int new_n = (n+dim_ker[1]-1);
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
"""
%
d
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s+=idx_hvals[k]*idx_in
%(unloop_iter)
s[last];
\n
"
)
ret
+=
my_dup
2
(
"sum
%(unroll_iter)
s+=idx_hvals
%(unroll_kiter)
s[k]*idx_in
%(unroll_biter)
s[last];
"
)
ret
+=
"""
ret
+=
"""
}
}
}
}
}//for j
}//for j
"""
%
d
"""
%
d
ret
+=
my_dup
(
"out
%(unloop_iter)
s[m*dim_zz[1]+n]
%(affectation)
s sum
%(unloop_iter)
s;
\n
"
)
# ret+=my_dup("out%(unroll_iter)s[m*dim_zz[1]+n] %(affectation)s sum%(unroll_iter)s;", unroll_bsize)
# ret+=my_dup("cout<<sum%(unloop_iter)s<<endl;")
ret
+=
my_dup
(
"out
%(unroll_iter)
s[m*dim_zz[1]+n]
%(affectation)
s sum
%(unroll_iter)
s;"
,
unroll_bsize
*
unroll_ksize
)
# ret+=my_dup("cout<<sum%(unroll_iter)s<<endl;",unroll_bsize)
ret
+=
"""
ret
+=
"""
}//for n
}//for n
}//for m
}//for m
}//for stack_size
}//for stack_size
if (0 && (mode==FULL)){
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i)
std::cout << " " << out0[i];
std::cout << "
\\
n";
}
}//for n_kern
}//for n_kern
}//for b
}//for b
Py_XDECREF(img2d);
Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
Py_XDECREF(filtersflipped);
"""
"""
return
ret
return
ret
def
gen_conv_code_unroll_kern
(
d
,
unloop_size
=
1
):
""" c_code for ConvOp that unroll the batch size loop
"""
d
[
"unloop_size"
]
=
unloop_size
def
my_dup
(
st
):
s
=
""
for
i
in
range
(
unloop_size
):
d
[
"unloop_iter"
]
=
i
s
+=
st
%
d
return
s
ret
=
"""
int mode=-1,typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
const
%(type)
s fill_value = 0;
int type_im=PyArray_TYPE(
%(img2d)
s);
int type_ker=PyArray_TYPE(
%(filtersflipped)
s);
npy_intp dim_zz[2]={
%(self_outshp0)
s,
%(self_outshp1)
s};
npy_intp dim_im[2]={
%(self_imshp1)
s,
%(self_imshp2)
s};
npy_intp dim_ker[2]={
%(self_kshp0)
s,
%(self_kshp1)
s};
PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0};
img2d_shape.ptr=img2d_dim;
img2d_shape.len=4;
PyArray_Dims kerns_shape;
npy_intp kerns_dim[4]={1,1,0,0};
kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
string s="
%(self_out_mode)
s";
if(
%(img2d)
s->nd==2){
img2d_dim[3]=
%(img2d)
s->dimensions[1];
img2d_dim[2]=
%(img2d)
s->dimensions[0];
}else if(
%(img2d)
s->nd==3){
img2d_dim[3]=
%(img2d)
s->dimensions[2];
img2d_dim[2]=
%(img2d)
s->dimensions[1];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
}else if(
%(img2d)
s->nd==4){
img2d_dim[3]=
%(img2d)
s->dimensions[3];
img2d_dim[2]=
%(img2d)
s->dimensions[2];
img2d_dim[1]=
%(img2d)
s->dimensions[1];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
}else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
%(fail)
s;
}
if(
%(filtersflipped)
s->nd==3){
kerns_dim[3]=
%(filtersflipped)
s->dimensions[2];
kerns_dim[2]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else if(
%(filtersflipped)
s->nd==4){
kerns_dim[3]=
%(filtersflipped)
s->dimensions[3];
kerns_dim[2]=
%(filtersflipped)
s->dimensions[2];
kerns_dim[1]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else{
PyErr_SetString(PyExc_ValueError, "kernel don't have a good shape");
%(fail)
s;
}
img2d = PyArray_Newshape(
%(img2d)
s,&img2d_shape, PyArray_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(
%(type)
s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(
%(type)
s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)
s;
}
}
img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(
%(filtersflipped)
s,&kerns_shape, PyArray_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != sizeof(
%(type)
s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*sizeof(
%(type)
s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)
s;
}
}
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(s=="valid") mode=0;
else if(s=="full") mode=2;
else {PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported");
%(fail)
s;};
typenum = PyArray_ObjectType((PyObject*)
%(img2d)
s, 0);
typenum_f = PyArray_ObjectType((PyObject*)
%(filtersflipped)
s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type");
%(fail)
s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match");
%(fail)
s;}
if (!img2d)
%(fail)
s;
if (!filtersflipped)
%(fail)
s;
if ((!
%(z)
s)
|| *PyArray_DIMS(
%(z)
s)!=4
||(
%(z)
s->dimensions[0] !=
%(self_bsize)
s)
||(
%(z)
s->dimensions[1] !=
%(self_nkern)
s)
||(
%(z)
s->dimensions[2] != dim_zz[0])
|| (
%(z)
s->dimensions[3] != dim_zz[1])
)
{
if (
%(z)
s) Py_DECREF(
%(z)
s);
npy_intp dims[4] = {0,0,0,0};
if(!dims)
%(fail)
s;
dims[0]=
%(self_bsize)
s;
dims[1]=
%(self_nkern)
s;
dims[2]=dim_zz[0];
dims[3]=dim_zz[1];
%(z)
s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
}else{
//PyArray_FILLWBYTE((PyObject*)
%(z)
s,0);
}
int Os[2];
if (mode == FULL) {Os[0] = dim_im[0]+dim_ker[0]-1; Os[1] = dim_im[1]+dim_ker[1]-1;}
else {Os[0] = dim_im[0]-dim_ker[0]+1; Os[1] = dim_im[1]-dim_ker[1]+1;}
for(int b=0;b<
%(self_bsize)
s;b++){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern+=
%(unloop_size)
s){
//assertions
if (
%(z)
s->strides[0] !=
%(z)
s->dimensions[1] *
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[1] !=
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[2] !=
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[3] != sizeof(
%(type)
s))
%(fail)
s;
"""
%
d
ret
+=
my_dup
(
"
%(type)
s * __restrict__ out
%(unloop_iter)
s=(
%(type)
s *)(PyArray_GETPTR2(
%(z)
s,b,n_kern+
%(unloop_iter)
s));"
)
ret
+=
my_dup
(
"for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out
%(unloop_iter)
s[i] = 0;"
)
ret
+=
"""
for(int stack_size=0;stack_size<
%(self_imshp0)
s;stack_size++){
const
%(type)
s * __restrict__ in=(
%(type)
s *)(PyArray_GETPTR2(img2d,b,stack_size));
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s * __restrict__ hvals
%(unloop_iter)
s=(
%(type)
s *)(PyArray_GETPTR2(filtersflipped,n_kern+
%(unloop_iter)
s,stack_size));"
)
ret
+=
"""
int new_m;
for (int m=0; m < Os[0]; m++) {
// Reposition index into input image based on requested output size
if (mode == FULL) new_m = m ;
else new_m = (m+dim_ker[0]-1);
for (int n=0; n < Os[1]; n++) { // loop over columns
"""
%
d
ret
+=
my_dup
(
"
%(type)
s sum
%(unloop_iter)
s=0;"
)
ret
+=
"""
// Sum over kernel, if index into image is out of bounds
// fill with the value
for (int j=0; j < dim_ker[0]; j++) {
int ind0 = (new_m-j);
if(mode==FULL){
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s * idx_hvals
%(unloop_iter)
s=&hvals
%(unloop_iter)
s[j*dim_ker[1]];"
)
ret
+=
"""
if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0)
for (int k=0; k < dim_ker[1]; k++) {
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s += idx_hvals
%(unloop_iter)
s[k] * fill_value;"
)
ret
+=
"""
}
}else{
//do the part where kernel is to the right of the img
int k=0,max_k=max((int)(n-dim_im[1])+1,0);
if(fill_value!=0){
for(k=0;k<max_k;k++){
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s += idx_hvals
%(unloop_iter)
s[k]*fill_value;"
)
ret
+=
"""
}
}else {k=max_k;}
//do the part where the kernel is on the img
max_k=min(n+1,(int)dim_ker[1]);
const
%(type)
s * idx_in=&in[ind0*dim_im[1]];
for (int ind1=n-k; k<max_k; k++,ind1--) {
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s += idx_hvals
%(unloop_iter)
s[k] * idx_in[ind1];"
)
ret
+=
"""
}
//do the part to the left of the img
if(fill_value!=0)
for(;k<dim_ker[1];k++){
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s+= idx_hvals
%(unloop_iter)
s[k]*fill_value;"
)
ret
+=
"""
}
}
}else{
const
%(type)
s* idx_in=&in[ind0*dim_im[1]];
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s* idx_hvals
%(unloop_iter)
s=&hvals
%(unloop_iter)
s[j*dim_ker[1]];"
)
ret
+=
"""
int new_n = (n+dim_ker[1]-1);
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s += idx_hvals
%(unloop_iter)
s[k]*idx_in[last];"
)
ret
+=
"""
}
}
}//for j
"""
%
d
ret
+=
my_dup
(
"out
%(unloop_iter)
s[m*dim_zz[1]+n]
%(affectation)
s sum
%(unloop_iter)
s;"
)
ret
+=
"""
}//for n
}//for m
}//for stack_size
}//for n_kern
}//for b
Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
"""
%
d
return
ret
theano/sandbox/test_conv.py
浏览文件 @
9117f696
...
@@ -50,10 +50,10 @@ class TestConvOp(unittest.TestCase):
...
@@ -50,10 +50,10 @@ class TestConvOp(unittest.TestCase):
nkern
=
5
# nb kernel
nkern
=
5
# nb kernel
ssizes
=
((
1
,
1
),(
2
,
2
),(
3
,
3
),(
4
,
4
))
#step size
ssizes
=
((
1
,
1
),(
2
,
2
),(
3
,
3
),(
4
,
4
))
#step size
convmodes
=
(
'full'
,
'valid'
)
convmodes
=
(
'full'
,
'valid'
)
elif
1
:
elif
0
:
# fixed parameters
like NORB JOB
# fixed parameters
bsize
=
4
# batch size
bsize
=
10
# batch size
imshp
=
(
96
,
96
)
# image shape
imshp
=
(
50
,
50
)
# image shape
print
>>
sys
.
stderr
,
"WARNING: only square shape tested"
print
>>
sys
.
stderr
,
"WARNING: only square shape tested"
kshps
=
[(
12
,
12
),
(
12
,
12
)]
kshps
=
[(
12
,
12
),
(
12
,
12
)]
nkern
=
20
# nb kernel
nkern
=
20
# nb kernel
...
@@ -63,7 +63,6 @@ class TestConvOp(unittest.TestCase):
...
@@ -63,7 +63,6 @@ class TestConvOp(unittest.TestCase):
# fixed parameters
# fixed parameters
bsize
=
7
# batch size
bsize
=
7
# batch size
imshp
=
(
5
,
4
)
# image shape
imshp
=
(
5
,
4
)
# image shape
print
>>
sys
.
stderr
,
"WARNING: only square shape tested"
kshps
=
[(
2
,
3
)]
kshps
=
[(
2
,
3
)]
nkern
=
6
# nb kernel
nkern
=
6
# nb kernel
ssizes
=
[(
1
,
1
)]
#step size
ssizes
=
[(
1
,
1
)]
#step size
...
@@ -72,7 +71,6 @@ class TestConvOp(unittest.TestCase):
...
@@ -72,7 +71,6 @@ class TestConvOp(unittest.TestCase):
# fixed parameters
# fixed parameters
bsize
=
7
# batch size
bsize
=
7
# batch size
imshp
=
(
5
,
4
)
# image shape
imshp
=
(
5
,
4
)
# image shape
print
>>
sys
.
stderr
,
"WARNING: only square shape tested"
kshps
=
[(
2
,
3
)]
kshps
=
[(
2
,
3
)]
nkern
=
6
# nb kernel
nkern
=
6
# nb kernel
ssizes
=
[(
1
,
1
)]
#step size
ssizes
=
[(
1
,
1
)]
#step size
...
@@ -102,8 +100,6 @@ class TestConvOp(unittest.TestCase):
...
@@ -102,8 +100,6 @@ class TestConvOp(unittest.TestCase):
#profmode = wraplinker.ProfileMode(OpWiseCLinker(), 'fast_run')
#profmode = wraplinker.ProfileMode(OpWiseCLinker(), 'fast_run')
tconvop
,
tscipy
,
tconv2
=
[],
[],
[]
tconvop
,
tscipy
,
tconv2
=
[],
[],
[]
tconvop_kern
,
tconvop_batch
=
[],
[]
tconvop_gemm
=
[]
for
conv_mode
in
convmodes
:
for
conv_mode
in
convmodes
:
for
kshp
in
kshps
:
for
kshp
in
kshps
:
...
@@ -114,7 +110,7 @@ class TestConvOp(unittest.TestCase):
...
@@ -114,7 +110,7 @@ class TestConvOp(unittest.TestCase):
# now test with real values
# now test with real values
img2d
=
1
+
N
.
arange
(
bsize
*
N
.
prod
(
imshp
))
.
reshape
((
bsize
,)
+
imshp
)
img2d
=
1
+
N
.
arange
(
bsize
*
N
.
prod
(
imshp
))
.
reshape
((
bsize
,)
+
imshp
)
#
print 'img2d', img2d
#
print 'img2d', img2d
img1d
=
img2d
.
reshape
(
bsize
,
-
1
)
img1d
=
img2d
.
reshape
(
bsize
,
-
1
)
# create filters (need to be flipped to use convolve2d)
# create filters (need to be flipped to use convolve2d)
...
@@ -123,100 +119,37 @@ class TestConvOp(unittest.TestCase):
...
@@ -123,100 +119,37 @@ class TestConvOp(unittest.TestCase):
# compute with new convolve2 (no timing info)
# compute with new convolve2 (no timing info)
output4
,
outshp4
=
convolve2
(
kerns
,
kshp
,
nkern
,
input
,
\
output4
,
outshp4
=
convolve2
(
kerns
,
kshp
,
nkern
,
input
,
\
imshp
,
bsize
,
(
1
,
1
),
bias
=
bias
,
mode
=
conv_mode
)
imshp
,
bsize
,
(
1
,
1
),
bias
=
bias
,
mode
=
conv_mode
)
#
print 'output4', output4
#
print 'output4', output4
ttime1
=
time
.
time
()
ttime1
=
time
.
time
()
f
=
function
([
kerns
,
bias
,
input
],
output4
)
f
=
function
([
kerns
,
bias
,
input
],
output4
)
out4
=
f
(
filtersflipped
.
reshape
(
nkern
,
-
1
),
biasvals
,
img1d
)
out4
=
f
(
filtersflipped
.
reshape
(
nkern
,
-
1
),
biasvals
,
img1d
)
#
print 'out4', out4, img1d, filtersflipped
#
print 'out4', out4, img1d, filtersflipped
tconv2
+=
[
time
.
time
()
-
ttime1
]
tconv2
+=
[
time
.
time
()
-
ttime1
]
out4
=
out4
.
reshape
(
bsize
,
nkern
,
outshp4
[
1
],
outshp4
[
2
])
out4
=
out4
.
reshape
(
bsize
,
nkern
,
outshp4
[
1
],
outshp4
[
2
])
out4
=
out4
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out4
=
out4
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out4
=
out4
.
reshape
(
bsize
,
-
1
)
out4
=
out4
.
reshape
(
bsize
,
-
1
)
if
1
:
# compute with ConvOp (code_a)
# compute with ConvOp
dmatrix3
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
))
dmatrix3
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
))
inputs
=
dmatrix3
()
inputs
=
dmatrix3
()
kerns3
=
dmatrix3
()
kerns3
=
dmatrix3
()
bia
=
T
.
dscalar
()
bia
=
T
.
dscalar
()
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
,
use_gemm
=
False
)(
inputs
,
kerns3
)
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
)(
inputs
,
kerns3
)
f2
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"c"
))
f2
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"c"
))
f3
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"py"
))
f3
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"py"
))
ttime1
=
time
.
time
()
ttime1
=
time
.
time
()
out2_
=
f2
(
img2d
,
filtersflipped
)
out2_
=
f2
(
img2d
,
filtersflipped
)
out2__
=
out2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out2__
=
out2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
tconvop
+=
[
time
.
time
()
-
ttime1
]
tconvop
+=
[
time
.
time
()
-
ttime1
]
out2___
=
out2__
.
copy
()
out2___
=
out2__
.
copy
()
out2
=
out2___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
out2
=
out2___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
out3_
=
f3
(
img2d
,
filtersflipped
)
out3_
=
f3
(
img2d
,
filtersflipped
)
out3__
=
out3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out3__
=
out3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out3___
=
out3__
.
copy
()
out3___
=
out3__
.
copy
()
out3
=
out3___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
out3
=
out3___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
assert
(
N
.
abs
(
out2_
-
out3_
)
<
1e-5
)
.
all
()
assert
(
N
.
abs
(
out2_
-
out3_
)
<
1e-5
)
.
all
()
if
1
:
# compute with ConvOp with gemm if possible
dmatrix3
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
))
inputs
=
dmatrix3
()
kerns3
=
dmatrix3
()
bia
=
T
.
dscalar
()
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
,
use_gemm
=
True
)(
inputs
,
kerns3
)
f2
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"c"
))
f3
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"py"
))
ttime1
=
time
.
time
()
out2_
=
f2
(
img2d
,
filtersflipped
)
out2__
=
out2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
tconvop_gemm
+=
[
time
.
time
()
-
ttime1
]
out2___
=
out2__
.
copy
()
out2
=
out2___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
out3_
=
f3
(
img2d
,
filtersflipped
)
out3__
=
out3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out3___
=
out3__
.
copy
()
out3
=
out3___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
assert
(
N
.
abs
(
out2_
-
out3_
)
<
1e-5
)
.
all
()
if
1
:
# compute with ConvOp with unroll_batch
dmatrix3
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
))
inputs
=
dmatrix3
()
kerns3
=
dmatrix3
()
bia
=
T
.
dscalar
()
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
,
use_gemm
=
False
,
unroll_batch
=
bsize
)(
inputs
,
kerns3
)
f2
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"c"
))
f3
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"py"
))
ttime1
=
time
.
time
()
out2_
=
f2
(
img2d
,
filtersflipped
)
out2__
=
out2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
tconvop_batch
+=
[
time
.
time
()
-
ttime1
]
out2___
=
out2__
.
copy
()
out2
=
out2___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
out3_
=
f3
(
img2d
,
filtersflipped
)
out3__
=
out3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out3___
=
out3__
.
copy
()
out3
=
out3___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
assert
(
N
.
abs
(
out2_
-
out3_
)
<
1e-5
)
.
all
()
if
1
:
# compute with ConvOp with unroll_kern
dmatrix3
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
))
inputs
=
dmatrix3
()
kerns3
=
dmatrix3
()
bia
=
T
.
dscalar
()
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
,
use_gemm
=
False
,
unroll_kern
=
bsize
)(
inputs
,
kerns3
)
f2
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"c"
))
f3
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"py"
))
ttime1
=
time
.
time
()
out2_
=
f2
(
img2d
,
filtersflipped
)
out2__
=
out2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
tconvop_kern
+=
[
time
.
time
()
-
ttime1
]
out2___
=
out2__
.
copy
()
out2
=
out2___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
out3_
=
f3
(
img2d
,
filtersflipped
)
out3__
=
out3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out3___
=
out3__
.
copy
()
out3
=
out3___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
assert
(
N
.
abs
(
out2_
-
out3_
)
<
1e-5
)
.
all
()
# REFERENCE IMPLEMENTATION: compute output with convolve2d
# REFERENCE IMPLEMENTATION: compute output with convolve2d
fulloutshp
=
N
.
array
(
imshp
)
-
N
.
array
(
kshp
)
+
1
if
conv_mode
==
'valid'
\
fulloutshp
=
N
.
array
(
imshp
)
-
N
.
array
(
kshp
)
+
1
if
conv_mode
==
'valid'
\
...
@@ -244,14 +177,12 @@ class TestConvOp(unittest.TestCase):
...
@@ -244,14 +177,12 @@ class TestConvOp(unittest.TestCase):
assert
(
temp
<
1e-5
)
.
all
()
assert
(
temp
<
1e-5
)
.
all
()
print
'**** Convolution Profiling Results ****'
print
'**** Convolution Profiling Results ****'
print
'Scipy convolve2d processing time:
%.3
fs'
%
sum
(
tscipy
)
#
,tscipy
print
'Scipy convolve2d processing time:
%.3
fs'
%
sum
(
tscipy
),
tscipy
print
'ConvOp processing time:
%.3
fs'
%
sum
(
tconvop
)
#
,tconvop
print
'ConvOp processing time:
%.3
fs'
%
sum
(
tconvop
),
tconvop
print
'convolve2 processing time:
%.3
fs'
%
sum
(
tconv2
)
#
,tconv2
print
'convolve2 processing time:
%.3
fs'
%
sum
(
tconv2
),
tconv2
print
'speed up ConvOp vs convolve2d:
%.3
f'
%
(
N
.
asarray
(
tscipy
)
/
tconvop
)
.
mean
()
d
=
N
.
asarray
(
tscipy
)
/
tconvop
print
'speed up use_gemm :
%.3
f'
%
(
N
.
asarray
(
tconvop
)
/
tconvop_gemm
)
.
mean
()
print
'speed up ConvOp vs convolve2d:
%.3
f'
%
d
.
mean
(),
d
print
'speed up unroll_batch :
%.3
f'
%
(
N
.
asarray
(
tconvop
)
/
tconvop_batch
)
.
mean
()
print
'speed up unroll_kern :
%.3
f'
%
(
N
.
asarray
(
tconvop
)
/
tconvop_kern
)
.
mean
()
def
test_multilayer_conv
(
self
):
def
test_multilayer_conv
(
self
):
# causes an atexit problem
# causes an atexit problem
...
@@ -274,13 +205,13 @@ class TestConvOp(unittest.TestCase):
...
@@ -274,13 +205,13 @@ class TestConvOp(unittest.TestCase):
ssizes
=
[(
1
,
1
),(
2
,
2
)]
#2,2)]
ssizes
=
[(
1
,
1
),(
2
,
2
)]
#2,2)]
#test speed
#test speed
bsize
=
10
# batch size
#
bsize = 10 # batch size
imshp_start
=
(
1
,
50
,
49
)
# imshp_start = (1,50,49)#un square shape to test more corner case.
kshps
=
([
11
,
12
],[
12
,
11
])
# kshps = ([11,12],[12,11])#un square shape to test more corner case.
nkerns
=
[
20
,
20
]
# per output pixel
#
nkerns = [20,20] # per output pixel
ssizes
=
[(
1
,
1
),]
#(1,1)]#(2,2) bugged
#
ssizes = [(1,1),]#(1,1)]#(2,2) bugged
convmodes
=
[
'valid'
,
'full'
]
#
convmodes = ['valid','full']
do_theano
=
False
#
do_theano=False
N
.
set_printoptions
(
threshold
=
N
.
nan
)
N
.
set_printoptions
(
threshold
=
N
.
nan
)
...
@@ -288,23 +219,25 @@ class TestConvOp(unittest.TestCase):
...
@@ -288,23 +219,25 @@ class TestConvOp(unittest.TestCase):
kerns
=
[
T
.
matrix
(),
T
.
dmatrix
()]
kerns
=
[
T
.
matrix
(),
T
.
dmatrix
()]
img
=
T
.
dmatrix
()
img
=
T
.
dmatrix
()
rng
=
N
.
random
.
RandomState
(
3423489
)
rng
=
N
.
random
.
RandomState
(
3423489
)
tctot
,
tpytot
,
t2ctot
,
t2pytot
,
ntot
,
convtot
=
[],
[],
[],
[],
[],
[]
tctot
,
tpytot
,
ntot
=
[],
[],
[]
dmatrix4
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
,
False
))
dmatrix4
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
,
False
))
inputs4
=
dmatrix4
()
inputs4
=
dmatrix4
()
kerns4
=
dmatrix4
()
kerns4
=
dmatrix4
()
assert
len
(
kshps
)
==
len
(
nkerns
)
==
len
(
kerns
)
assert
len
(
kshps
)
==
len
(
nkerns
)
==
len
(
kerns
)
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
def
do_test
(
conv_mode
,
ss
,
unroll_batch
=
0
,
unroll_kern
=
0
,
img
=
img
,
validate
=
True
,
conv_op_py
=
False
):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
# build actual input images
# build actual input images
imgval
=
rng
.
rand
(
bsize
,
imshp_start
[
0
],
imshp_start
[
1
],
imshp_start
[
2
])
imgval
=
rng
.
rand
(
bsize
,
imshp_start
[
0
],
imshp_start
[
1
],
imshp_start
[
2
])
imshp
=
imshp_start
imshp
=
imshp_start
# for each layer
# for each layer
for
kshp
,
kern
,
nkern
,
n_layer
in
zip
(
kshps
,
kerns
,
nkerns
,
range
(
len
(
kerns
))):
ntot
=
0
tctot
=
0
tpytot
=
0
for
kshp
,
kern
,
nkern
,
n_layer
in
zip
(
kshps
,
kerns
,
nkerns
,
range
(
len
(
kerns
))):
print
'************* layer
%
i ***************'
%
n_layer
print
'************* layer
%
i ***************'
%
n_layer
print
conv_mode
,
ss
,
n_layer
,
kshp
,
nkern
print
conv_mode
,
ss
,
n_layer
,
kshp
,
nkern
...
@@ -326,14 +259,15 @@ class TestConvOp(unittest.TestCase):
...
@@ -326,14 +259,15 @@ class TestConvOp(unittest.TestCase):
time1
=
time
.
time
()
time1
=
time
.
time
()
outval
=
N
.
zeros
(
N
.
r_
[
bsize
,
outshp
])
outval
=
N
.
zeros
(
N
.
r_
[
bsize
,
outshp
])
val
=
_valfrommode
(
conv_mode
)
if
validate
:
bval
=
_bvalfromboundary
(
'fill'
)
val
=
_valfrommode
(
conv_mode
)
for
b
in
range
(
bsize
):
# loop over batches
bval
=
_bvalfromboundary
(
'fill'
)
for
n
in
range
(
nkern
):
# loop over filters
for
b
in
range
(
bsize
):
# loop over batches
for
i
in
range
(
imshp
[
0
]):
# loop over input feature maps
for
n
in
range
(
nkern
):
# loop over filters
outval
[
b
,
n
,
...
]
+=
_convolve2d
(
\
for
i
in
range
(
imshp
[
0
]):
# loop over input feature maps
imgval
[
b
,
i
,
...
],
w_flip
[
n
,
i
,
...
],
1
,
val
,
bval
,
0
)[
0
::
ss
[
0
],
0
::
ss
[
1
]]
outval
[
b
,
n
,
...
]
+=
_convolve2d
(
\
ntot
+=
[
time
.
time
()
-
time1
]
imgval
[
b
,
i
,
...
],
w_flip
[
n
,
i
,
...
],
1
,
val
,
bval
,
0
)[
0
::
ss
[
0
],
0
::
ss
[
1
]]
ntot
+=
time
.
time
()
-
time1
if
do_theano
:
if
do_theano
:
####### test with new sp.convolve2 function ######
####### test with new sp.convolve2 function ######
...
@@ -353,18 +287,16 @@ class TestConvOp(unittest.TestCase):
...
@@ -353,18 +287,16 @@ class TestConvOp(unittest.TestCase):
assert
(
N
.
abs
(
hidval
-
hidval1
)
<
1e-5
)
.
all
()
assert
(
N
.
abs
(
hidval
-
hidval1
)
<
1e-5
)
.
all
()
temp
=
N
.
abs
(
outval
.
reshape
(
bsize
,
-
1
)
-
hidval
)
temp
=
N
.
abs
(
outval
.
reshape
(
bsize
,
-
1
)
-
hidval
)
assert
(
temp
<
1e-5
)
.
all
()
if
validate
:
assert
(
temp
<
1e-5
)
.
all
()
else
:
else
:
hid
=
img
#we don't need it, but it make the flow easier flow
hid
=
img
#we don't need it, but it make the flow easier flow
convtot
+=
[
-
1
]
tctot
+=
[
-
1
]
tpytot
+=
[
-
1
]
hidval
=
outval
.
copy
()
#to keep the same memory
hidval
=
outval
.
copy
()
#to keep the same memory
hidval1
=
outval
.
copy
()
hidval1
=
outval
.
copy
()
# ConvOp
# ConvOp
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
,
unroll_
kern
=
10
)(
inputs4
,
kerns4
)
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
,
unroll_
batch
=
unroll_batch
,
unroll_kern
=
unroll_kern
)(
inputs4
,
kerns4
)
l1shp
=
N
.
hstack
((
nkern
,
l1shp
=
N
.
hstack
((
nkern
,
getFilterOutShp
(
imshp
,
kshp
,
ss
,
conv_mode
)))
getFilterOutShp
(
imshp
,
kshp
,
ss
,
conv_mode
)))
propup2
=
function
([
inputs4
,
kerns4
],
conv_op
)
propup2
=
function
([
inputs4
,
kerns4
],
conv_op
)
...
@@ -373,30 +305,90 @@ class TestConvOp(unittest.TestCase):
...
@@ -373,30 +305,90 @@ class TestConvOp(unittest.TestCase):
time1
=
time
.
time
()
time1
=
time
.
time
()
hidval2_
=
propup2
(
imgval
,
w_flip
)
hidval2_
=
propup2
(
imgval
,
w_flip
)
hidval2
=
hidval2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
hidval2
=
hidval2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
t
2ctot
+=
[
time
.
time
()
-
time1
]
t
ctot
+=
time
.
time
()
-
time1
time1
=
time
.
time
()
if
conv_op_py
:
# hidval3_ = propup3(imgval,w_flip)
time1
=
time
.
time
()
# hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]]
hidval3_
=
propup3
(
imgval
,
w_flip
)
t2pytot
+=
[
time
.
time
()
-
time1
]
hidval3
=
hidval3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
# assert (N.abs(hidval2-hidval3)<1e-5).all()
tpytot
+=
time
.
time
()
-
time1
assert
(
N
.
abs
(
hidval2
-
hidval3
)
<
1e-5
)
.
all
()
else
:
tpytot
+=
0
temp
=
N
.
abs
(
outval
-
hidval2
)
if
validate
:
assert
(
temp
<
1e-5
)
.
all
()
temp
=
N
.
abs
(
outval
-
hidval2
)
# temp = N.abs(outval - hidval3)
assert
(
temp
<
1e-5
)
.
all
()
# assert (temp < 1e-5).all()
if
validate
and
conv_op_py
:
temp
=
N
.
abs
(
outval
-
hidval3
)
assert
(
temp
<
1e-5
)
.
all
()
img
,
imshp
=
hid
,
tuple
(
outshp
)
img
,
imshp
=
hid
,
tuple
(
outshp
)
imgval
=
outval
.
reshape
(
bsize
,
outshp
[
0
],
outshp
[
1
],
outshp
[
2
])
imgval
=
outval
.
reshape
(
bsize
,
outshp
[
0
],
outshp
[
1
],
outshp
[
2
])
return
tctot
,
tpytot
,
ntot
if
False
:
# calculate the speed up of different combination of unroll
# put the paramter to the same you will try.
validate
=
False
# we don't validate the result to have it much faster!
unroll_batch
=
[
0
,
1
,
2
,
4
,
5
,
10
,
20
]
unroll_kern
=
[
0
,
2
,
4
,
5
,
10
,
20
]
# unroll_batch = [0,2,5]
# unroll_kern = [0,2,5]
bsize
=
20
# batch size
imshp_start
=
(
1
,
50
,
49
)
#un square shape to test more corner case.
kshps
=
([
11
,
12
],[
12
,
11
])
#un square shape to test more corner case.
nkerns
=
[
20
,
20
]
# per output pixel
ssizes
=
[(
1
,
1
),]
#(1,1)]#(2,2) bugged
convmodes
=
[
'valid'
,
'full'
]
do_theano
=
False
a
=
T
.
dmatrix
()
kerns
=
[
a
for
i
in
nkerns
]
assert
len
(
kshps
)
==
len
(
nkerns
)
==
len
(
kerns
)
timing
=
N
.
zeros
((
len
(
unroll_batch
),
len
(
unroll_kern
),
3
))
t_b_k
=
[]
for
unroll_b
,
n_b
in
zip
(
unroll_batch
,
range
(
len
(
unroll_batch
))):
for
unroll_k
,
n_k
in
zip
(
unroll_kern
,
range
(
len
(
unroll_kern
))):
t_b_k
+=
[
str
(
unroll_b
)
+
"/"
+
str
(
unroll_k
)]
tctot
,
tpytot
,
ntot
=
[],[],[]
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
tctot_
,
tpytot_
,
ntot_
=
do_test
(
conv_mode
,
ss
,
unroll_batch
=
unroll_b
,
unroll_kern
=
unroll_k
,
validate
=
validate
)
tctot
+=
[
tctot_
]
tpytot
+=
[
tpytot_
]
ntot
+=
[
ntot_
]
timing
[
n_b
,
n_k
]
=
[
sum
(
tctot
),
sum
(
tpytot
),
sum
(
ntot
)]
# print timing
t
=
timing
[:,:,
0
]
#We select only the c timing.
print
t_b_k
print
t
print
"max
%.3
fs"
%
t
.
max
(),
"max param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmax
()]
print
"min
%.3
fs"
%
t
.
min
(),
"min param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmin
()]
print
"speedup
%.3
fx"
%
(
t
.
max
()
/
t
.
min
())
return
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
tctot_
,
tpytot_
,
ntot_
=
do_test
(
conv_mode
,
ss
)
tctot
+=
[
tctot_
]
tpytot
+=
[
tpytot_
]
ntot
+=
[
ntot_
]
print
'**** Multilayer Convolution Profiling Results ****'
print
'**** Multilayer Convolution Profiling Results ****'
print
'Numpy convolve2d processing time:
%.3
fs'
%
sum
(
ntot
),
ntot
print
'Numpy convolve2d processing time:
%.3
fs'
%
sum
(
ntot
),
ntot
print
'c Theano(ConvOp) processing time:
%.3
fs'
%
sum
(
t2ctot
),
t2ctot
print
'c Theano(ConvOp) processing time:
%.3
fs'
%
sum
(
tctot
),
tctot
print
'py Theano(ConvOp) processing time:
%.3
fs'
%
sum
(
t2pytot
),
t2pytot
print
'py Theano(ConvOp) processing time:
%.3
fs'
%
sum
(
tpytot
),
tpytot
print
'convolve processing time:
%.3
fs'
%
sum
(
convtot
),
convtot
d
=
N
.
asarray
(
ntot
)
/
tctot
d
=
N
.
asarray
(
ntot
)
/
t2ctot
print
'speed up c theano(ConvOp) vs convolve2d:
%.3
f'
%
d
.
mean
(),
d
print
'speed up c theano(ConvOp) vs convolve2d:
%.3
f'
%
d
.
mean
(),
d
d
=
N
.
asarray
(
ntot
)
/
t
2
pytot
d
=
N
.
asarray
(
ntot
)
/
tpytot
print
'speed up py theano(ConvOp) vs convolve2d:
%.3
f'
%
d
.
mean
(),
d
print
'speed up py theano(ConvOp) vs convolve2d:
%.3
f'
%
d
.
mean
(),
d
...
@@ -417,7 +409,7 @@ class TestConvOp(unittest.TestCase):
...
@@ -417,7 +409,7 @@ class TestConvOp(unittest.TestCase):
visdim
=
1
if
len
(
imshp
)
!=
3
else
imshp
[
0
]
visdim
=
1
if
len
(
imshp
)
!=
3
else
imshp
[
0
]
for
kshp
in
kshps
:
for
kshp
in
kshps
:
imgvals
=
N
.
random
.
random
(
N
.
hstack
((
bsize
,
imshp
)))
imgvals
=
N
.
random
.
random
(
N
.
hstack
((
bsize
,
imshp
)))
#
print 'imgvals.shape = ', imgvals.shape
print
'imgvals.shape = '
,
imgvals
.
shape
imgvals
=
imgvals
.
reshape
(
bsize
,
-
1
)
imgvals
=
imgvals
.
reshape
(
bsize
,
-
1
)
if
visdim
==
1
:
if
visdim
==
1
:
...
@@ -460,3 +452,10 @@ class TestConvOp(unittest.TestCase):
...
@@ -460,3 +452,10 @@ class TestConvOp(unittest.TestCase):
kernvals
=
kernvals
.
reshape
(
nkern
,
-
1
)
kernvals
=
kernvals
.
reshape
(
nkern
,
-
1
)
utt
.
verify_grad
(
testf
,
[
imgvals
,
kernvals
])
utt
.
verify_grad
(
testf
,
[
imgvals
,
kernvals
])
if
__name__
==
'__main__'
:
t
=
TestConvOp
(
"test_convolution"
)
t
.
test_convolution
()
# t.test_multilayer_conv()
# from theano.tests import main
# main("test_sp")
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论