Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
9117f696
提交
9117f696
authored
6月 02, 2009
作者:
bergstra@ip05.m
浏览文件
操作
浏览文件
下载
差异文件
merge
上级
d9ee20e6
a128912a
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
192 行增加
和
433 行删除
+192
-433
conv.py
theano/sandbox/conv.py
+51
-291
test_conv.py
theano/sandbox/test_conv.py
+141
-142
没有找到文件。
theano/sandbox/conv.py
浏览文件 @
9117f696
...
...
@@ -16,9 +16,6 @@ class ConvOp(Op):
In development.
"""
__attrnames
=
[
'imshp'
,
'kshp'
,
'nkern'
,
'bsize'
,
'dx'
,
'dy'
,
'out_mode'
]
"""These attributes uniquely identify the behaviour of this op for given inputs"""
def
__init__
(
self
,
imshp
,
kshp
,
nkern
,
bsize
,
dx
,
dy
,
output_mode
=
'valid'
,
unroll_batch
=
0
,
unroll_kern
=
0
):
"""
unroll_batch. If >0 will use a version that will unroll the batch loop by the value of the option. By default don't use this version of the code.
...
...
@@ -40,7 +37,7 @@ class ConvOp(Op):
self
.
unroll_batch
=
unroll_batch
self
.
unroll_kern
=
unroll_kern
assert
not
(
unroll_batch
>
0
and
unroll_kern
>
0
)
if
self
.
unroll_batch
>
0
and
self
.
bsize
%
self
.
unroll_batch
!=
0
:
raise
Exception
(
"unroll_batch(
%
s) should be 0 or a multiple of bsize(
%
s)"
%
(
str
(
self
.
unroll_batch
),
str
(
self
.
bsize
)))
if
self
.
unroll_kern
>
0
and
self
.
nkern
%
unroll_kern
!=
0
:
...
...
@@ -54,24 +51,12 @@ class ConvOp(Op):
raise
Exception
(
"Mode
%
s not implemented"
%
self
.
out_mode
)
assert
(
self
.
outshp
>=
0
)
.
all
()
hashval
=
hash
(
type
(
self
))
for
a
in
self
.
__attrnames
:
hashval
=
hashval
^
hash
(
getattr
(
self
,
a
))
self
.
__hashval
=
hashval
def
__eq__
(
self
,
other
):
if
type
(
self
)
!=
type
(
other
):
return
False
for
a
in
self
.
__attrnames
:
if
getattr
(
self
,
a
)
!=
getattr
(
other
,
a
):
return
False
return
True
# def __eq__(self, other):
# raise Error("Not implemented")
def
__hash__
(
self
):
return
self
.
__hashval
def
__str__
(
self
):
return
"ConvOp{"
+
","
.
join
(
str
((
a
,
getattr
(
self
,
a
)))
for
a
in
self
.
__attrnames
)
+
"}"
# def __hash__(self):
# raise Error("Not implemented")
def
make_node
(
self
,
inputs
,
kerns
):
# TODO: find a way to make ConvOp work for N-D (after NIPS09)
...
...
@@ -190,12 +175,10 @@ using namespace std;
if
node
.
inputs
[
0
]
.
type
.
dtype
==
"float32"
:
d
[
"type"
]
=
"float"
elif
node
.
inputs
[
0
]
.
type
.
dtype
==
"float64"
:
d
[
"type"
]
=
"double"
else
:
raise
Exception
(
"Type
%
s not implemented"
%
node
.
inputs
[
0
]
.
type
.
dtype
)
if
self
.
unroll_kern
>
0
:
print
"return unrolled kern code by"
,
self
.
unroll_kern
return
gen_conv_code_unroll_kern
(
d
,
self
.
unroll_kern
)
if
self
.
unroll_batch
>
0
:
print
"return unrolled batch code by"
,
self
.
unroll_batch
return
gen_conv_code_unroll_batch
(
d
,
self
.
unroll_batch
)
if
self
.
unroll_kern
>
0
and
self
.
unroll_batch
>
0
:
print
"return unrolled batch and kern code by"
,
self
.
unroll_batch
,
self
.
unroll_kern
return
gen_conv_code_unroll_batch_kern
(
d
,
self
.
unroll_batch
,
self
.
unroll_kern
)
#TODO: should we choose the unroll size automatically with the bigger divisor under 5? under 10?
if
self
.
out_mode
==
'valid'
:
...
...
@@ -648,17 +631,29 @@ free(kbuf);
Py_XDECREF(img2d);
"""
def
gen_conv_code_unroll_batch
(
d
,
unloop_size
=
1
):
def
gen_conv_code_unroll_batch_kern
(
d
,
unroll_bsize
=
1
,
unroll_ksize
=
1
):
""" c_code for ConvOp that unroll the batch size loop
"""
d
[
"unloop_size"
]
=
unloop_size
def
my_dup
(
st
):
assert
unroll_bsize
>
0
and
unroll_ksize
>
0
d
[
"unroll_bsize"
]
=
unroll_bsize
d
[
"unroll_ksize"
]
=
unroll_ksize
def
my_dup
(
st
,
size
):
s
=
""
for
i
in
range
(
unloop_
size
):
d
[
"un
loop
_iter"
]
=
i
for
i
in
range
(
size
):
d
[
"un
roll
_iter"
]
=
i
s
+=
st
%
d
return
s
return
s
+
"
\n
"
def
my_dup2
(
st
):
s
=
""
iter
=
0
for
i
in
range
(
unroll_bsize
):
d
[
"unroll_biter"
]
=
i
for
j
in
range
(
unroll_ksize
):
d
[
"unroll_kiter"
]
=
j
d
[
"unroll_iter"
]
=
iter
iter
+=
1
s
+=
st
%
d
return
s
+
"
\n
"
ret
=
"""
int mode=-1,typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
...
...
@@ -775,8 +770,8 @@ if ((!%(z)s)
int Os[2];
if (mode == FULL) {Os[0] = dim_im[0]+dim_ker[0]-1; Os[1] = dim_im[1]+dim_ker[1]-1;}
else {Os[0] = dim_im[0]-dim_ker[0]+1; Os[1] = dim_im[1]-dim_ker[1]+1;}
for(int b=0;b<
%(self_bsize)
s ;b+=
%(un
loop_
size)
s){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern+
+
){
for(int b=0;b<
%(self_bsize)
s ;b+=
%(un
roll_b
size)
s){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern+
=
%(unroll_ksize)
s
){
//assertions
if (
%(z)
s->strides[0] !=
%(z)
s->dimensions[1] *
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
...
...
@@ -784,14 +779,14 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
if (
%(z)
s->strides[2] !=
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[3] != sizeof(
%(type)
s))
%(fail)
s;
"""
%
d
ret
+=
my_dup
(
"
%(type)
s * __restrict__ out
%(unloop_iter)
s=(
%(type)
s *)(PyArray_GETPTR2(
%(z)
s,b+
%(unloop_iter)
s,n_kern));
\n
"
)
ret
+=
my_dup
(
"for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out
%(un
loop_iter)
s[i] = 0;"
)
ret
+=
my_dup
2
(
"
%(type)
s * __restrict__ out
%(unroll_iter)
s=(
%(type)
s *)(PyArray_GETPTR2(
%(z)
s,b+
%(unroll_biter)
s,n_kern+
%(unroll_kiter)
s));
"
)
ret
+=
my_dup
(
"for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out
%(un
roll_iter)
s[i] = 0;"
,
unroll_bsize
*
unroll_ksize
)
ret
+=
"""
for(int stack_size=0;stack_size<
%(self_imshp0)
s;stack_size++){
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s * __restrict__ in
%(unloop_iter)
d=(
%(type)
s *)(PyArray_GETPTR2(img2d,b+
%(unloop_iter)
s,stack_size));
\n
"
)
ret
+=
my_dup
(
"const
%(type)
s * __restrict__ in
%(unroll_iter)
d=(
%(type)
s *)(PyArray_GETPTR2(img2d,b+
%(unroll_iter)
s,stack_size));"
,
unroll_bsize
)
ret
+=
my_dup
(
"const
%(type)
s * __restrict__ hvals
%(unroll_iter)
s=(
%(type)
s *)(PyArray_GETPTR2(filtersflipped,n_kern+
%(unroll_iter)
s,stack_size));"
,
unroll_ksize
)
ret
+=
"""
const
%(type)
s * __restrict__ hvals=(
%(type)
s *)(PyArray_GETPTR2(filtersflipped,n_kern,stack_size));
int new_m;
...
...
@@ -802,7 +797,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
for (int n=0; n < Os[1]; n++) { // loop over columns
"""
%
d
ret
+=
my_dup
(
"
%(type)
s sum
%(un
loop_iter)
s=0;
\n
"
)
ret
+=
my_dup
(
"
%(type)
s sum
%(un
roll_iter)
s=0;"
,
unroll_bsize
*
unroll_ksize
)
ret
+=
"""
// Sum over kernel, if index into image is out of bounds
...
...
@@ -811,12 +806,14 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
int ind0 = (new_m-j);
if(mode==FULL){
const
%(type)
s * idx_hvals=&hvals[j*dim_ker[1]];
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s * idx_hvals
%(unroll_iter)
s=&hvals
%(unroll_iter)
s[j*dim_ker[1]];"
,
unroll_ksize
)
ret
+=
"""
if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0)
for (int k=0; k < dim_ker[1]; k++) {
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s+= idx_hvals[k] * fill_value;
\n
"
)
ret
+=
my_dup
2
(
"sum
%(unroll_iter)
s += idx_hvals
%(unroll_kiter)
s[k] * fill_value;
"
)
ret
+=
"""
}
}else{
...
...
@@ -827,7 +824,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
for(k=0;k<max_k;k++){
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s+= idx_hvals[k] * fill_value;
\n
"
)
ret
+=
my_dup
2
(
"sum
%(unroll_iter)
s += idx_hvals
%(unroll_kiter)
s[k] * fill_value;
"
)
ret
+=
"""
}
}else {k=max_k;}
...
...
@@ -835,285 +832,48 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unloop_size)s){
//do the part where the kernel is on the img
max_k=min(n+1,(int)dim_ker[1]);
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s * idx_in
%(un
loop_iter)
s=&in
%(unloop_iter)
s[ind0*dim_im[1]];
\n
"
)
ret
+=
my_dup
(
"const
%(type)
s * idx_in
%(un
roll_iter)
s=&in
%(unroll_iter)
s[ind0*dim_im[1]];"
,
unroll_bsize
)
ret
+=
"""
for (int ind1=n-k; k<max_k; k++,ind1--) {
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s+= idx_hvals[k] * idx_in
%(unloop_iter)
s[ind1];
\n
"
)
ret
+=
my_dup
2
(
"sum
%(unroll_iter)
s+= idx_hvals
%(unroll_kiter)
s[k] * idx_in
%(unroll_biter)
s[ind1];
"
)
ret
+=
"""
}
//do the part to the left of the img
if(fill_value!=0)
for(;k<dim_ker[1];k++){
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s+= idx_hvals[k] * fill_value;
\n
"
)
ret
+=
my_dup
2
(
"sum
%(unroll_iter)
s += idx_hvals
%(unroll_kiter)
s[k] * fill_value;
"
)
ret
+=
"""
}
}
}else{
}else{
//valid mode
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s* idx_in
%(unloop_iter)
s=&in
%(unloop_iter)
s[ind0*dim_im[1]];
\n
"
)
ret
+=
my_dup
(
"const
%(type)
s* idx_in
%(unroll_iter)
s=&in
%(unroll_iter)
s[ind0*dim_im[1]];"
,
unroll_bsize
)
ret
+=
my_dup
(
"const
%(type)
s* idx_hvals
%(unroll_iter)
s=&hvals
%(unroll_iter)
s[j*dim_ker[1]];"
,
unroll_ksize
)
ret
+=
"""
const
%(type)
s* idx_hvals=&hvals[j*dim_ker[1]];
int new_n = (n+dim_ker[1]-1);
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s+=idx_hvals[k]*idx_in
%(unloop_iter)
s[last];
\n
"
)
ret
+=
my_dup
2
(
"sum
%(unroll_iter)
s+=idx_hvals
%(unroll_kiter)
s[k]*idx_in
%(unroll_biter)
s[last];
"
)
ret
+=
"""
}
}
}//for j
"""
%
d
ret
+=
my_dup
(
"out
%(unloop_iter)
s[m*dim_zz[1]+n]
%(affectation)
s sum
%(unloop_iter)
s;
\n
"
)
# ret+=my_dup("cout<<sum%(unloop_iter)s<<endl;")
# ret+=my_dup("out%(unroll_iter)s[m*dim_zz[1]+n] %(affectation)s sum%(unroll_iter)s;", unroll_bsize)
ret
+=
my_dup
(
"out
%(unroll_iter)
s[m*dim_zz[1]+n]
%(affectation)
s sum
%(unroll_iter)
s;"
,
unroll_bsize
*
unroll_ksize
)
# ret+=my_dup("cout<<sum%(unroll_iter)s<<endl;",unroll_bsize)
ret
+=
"""
}//for n
}//for m
}//for stack_size
if (0 && (mode==FULL)){
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i)
std::cout << " " << out0[i];
std::cout << "
\\
n";
}
}//for n_kern
}//for b
Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
"""
return
ret
def
gen_conv_code_unroll_kern
(
d
,
unloop_size
=
1
):
""" c_code for ConvOp that unroll the batch size loop
"""
d
[
"unloop_size"
]
=
unloop_size
def
my_dup
(
st
):
s
=
""
for
i
in
range
(
unloop_size
):
d
[
"unloop_iter"
]
=
i
s
+=
st
%
d
return
s
ret
=
"""
int mode=-1,typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
const
%(type)
s fill_value = 0;
int type_im=PyArray_TYPE(
%(img2d)
s);
int type_ker=PyArray_TYPE(
%(filtersflipped)
s);
npy_intp dim_zz[2]={
%(self_outshp0)
s,
%(self_outshp1)
s};
npy_intp dim_im[2]={
%(self_imshp1)
s,
%(self_imshp2)
s};
npy_intp dim_ker[2]={
%(self_kshp0)
s,
%(self_kshp1)
s};
PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0};
img2d_shape.ptr=img2d_dim;
img2d_shape.len=4;
PyArray_Dims kerns_shape;
npy_intp kerns_dim[4]={1,1,0,0};
kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
string s="
%(self_out_mode)
s";
if(
%(img2d)
s->nd==2){
img2d_dim[3]=
%(img2d)
s->dimensions[1];
img2d_dim[2]=
%(img2d)
s->dimensions[0];
}else if(
%(img2d)
s->nd==3){
img2d_dim[3]=
%(img2d)
s->dimensions[2];
img2d_dim[2]=
%(img2d)
s->dimensions[1];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
}else if(
%(img2d)
s->nd==4){
img2d_dim[3]=
%(img2d)
s->dimensions[3];
img2d_dim[2]=
%(img2d)
s->dimensions[2];
img2d_dim[1]=
%(img2d)
s->dimensions[1];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
}else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
%(fail)
s;
}
if(
%(filtersflipped)
s->nd==3){
kerns_dim[3]=
%(filtersflipped)
s->dimensions[2];
kerns_dim[2]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else if(
%(filtersflipped)
s->nd==4){
kerns_dim[3]=
%(filtersflipped)
s->dimensions[3];
kerns_dim[2]=
%(filtersflipped)
s->dimensions[2];
kerns_dim[1]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else{
PyErr_SetString(PyExc_ValueError, "kernel don't have a good shape");
%(fail)
s;
}
img2d = PyArray_Newshape(
%(img2d)
s,&img2d_shape, PyArray_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(
%(type)
s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(
%(type)
s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)
s;
}
}
img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(
%(filtersflipped)
s,&kerns_shape, PyArray_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != sizeof(
%(type)
s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*sizeof(
%(type)
s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)
s;
}
}
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(s=="valid") mode=0;
else if(s=="full") mode=2;
else {PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported");
%(fail)
s;};
typenum = PyArray_ObjectType((PyObject*)
%(img2d)
s, 0);
typenum_f = PyArray_ObjectType((PyObject*)
%(filtersflipped)
s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type");
%(fail)
s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match");
%(fail)
s;}
if (!img2d)
%(fail)
s;
if (!filtersflipped)
%(fail)
s;
if ((!
%(z)
s)
|| *PyArray_DIMS(
%(z)
s)!=4
||(
%(z)
s->dimensions[0] !=
%(self_bsize)
s)
||(
%(z)
s->dimensions[1] !=
%(self_nkern)
s)
||(
%(z)
s->dimensions[2] != dim_zz[0])
|| (
%(z)
s->dimensions[3] != dim_zz[1])
)
{
if (
%(z)
s) Py_DECREF(
%(z)
s);
npy_intp dims[4] = {0,0,0,0};
if(!dims)
%(fail)
s;
dims[0]=
%(self_bsize)
s;
dims[1]=
%(self_nkern)
s;
dims[2]=dim_zz[0];
dims[3]=dim_zz[1];
%(z)
s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
}else{
//PyArray_FILLWBYTE((PyObject*)
%(z)
s,0);
}
int Os[2];
if (mode == FULL) {Os[0] = dim_im[0]+dim_ker[0]-1; Os[1] = dim_im[1]+dim_ker[1]-1;}
else {Os[0] = dim_im[0]-dim_ker[0]+1; Os[1] = dim_im[1]-dim_ker[1]+1;}
for(int b=0;b<
%(self_bsize)
s;b++){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern+=
%(unloop_size)
s){
//assertions
if (
%(z)
s->strides[0] !=
%(z)
s->dimensions[1] *
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[1] !=
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[2] !=
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[3] != sizeof(
%(type)
s))
%(fail)
s;
"""
%
d
ret
+=
my_dup
(
"
%(type)
s * __restrict__ out
%(unloop_iter)
s=(
%(type)
s *)(PyArray_GETPTR2(
%(z)
s,b,n_kern+
%(unloop_iter)
s));"
)
ret
+=
my_dup
(
"for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out
%(unloop_iter)
s[i] = 0;"
)
ret
+=
"""
for(int stack_size=0;stack_size<
%(self_imshp0)
s;stack_size++){
const
%(type)
s * __restrict__ in=(
%(type)
s *)(PyArray_GETPTR2(img2d,b,stack_size));
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s * __restrict__ hvals
%(unloop_iter)
s=(
%(type)
s *)(PyArray_GETPTR2(filtersflipped,n_kern+
%(unloop_iter)
s,stack_size));"
)
ret
+=
"""
int new_m;
for (int m=0; m < Os[0]; m++) {
// Reposition index into input image based on requested output size
if (mode == FULL) new_m = m ;
else new_m = (m+dim_ker[0]-1);
for (int n=0; n < Os[1]; n++) { // loop over columns
"""
%
d
ret
+=
my_dup
(
"
%(type)
s sum
%(unloop_iter)
s=0;"
)
ret
+=
"""
// Sum over kernel, if index into image is out of bounds
// fill with the value
for (int j=0; j < dim_ker[0]; j++) {
int ind0 = (new_m-j);
if(mode==FULL){
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s * idx_hvals
%(unloop_iter)
s=&hvals
%(unloop_iter)
s[j*dim_ker[1]];"
)
ret
+=
"""
if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0)
for (int k=0; k < dim_ker[1]; k++) {
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s += idx_hvals
%(unloop_iter)
s[k] * fill_value;"
)
ret
+=
"""
}
}else{
//do the part where kernel is to the right of the img
int k=0,max_k=max((int)(n-dim_im[1])+1,0);
if(fill_value!=0){
for(k=0;k<max_k;k++){
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s += idx_hvals
%(unloop_iter)
s[k]*fill_value;"
)
ret
+=
"""
}
}else {k=max_k;}
//do the part where the kernel is on the img
max_k=min(n+1,(int)dim_ker[1]);
const
%(type)
s * idx_in=&in[ind0*dim_im[1]];
for (int ind1=n-k; k<max_k; k++,ind1--) {
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s += idx_hvals
%(unloop_iter)
s[k] * idx_in[ind1];"
)
ret
+=
"""
}
//do the part to the left of the img
if(fill_value!=0)
for(;k<dim_ker[1];k++){
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s+= idx_hvals
%(unloop_iter)
s[k]*fill_value;"
)
ret
+=
"""
}
}
}else{
const
%(type)
s* idx_in=&in[ind0*dim_im[1]];
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s* idx_hvals
%(unloop_iter)
s=&hvals
%(unloop_iter)
s[j*dim_ker[1]];"
)
ret
+=
"""
int new_n = (n+dim_ker[1]-1);
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
"""
%
d
ret
+=
my_dup
(
"sum
%(unloop_iter)
s += idx_hvals
%(unloop_iter)
s[k]*idx_in[last];"
)
ret
+=
"""
}
}
}//for j
"""
%
d
ret
+=
my_dup
(
"out
%(unloop_iter)
s[m*dim_zz[1]+n]
%(affectation)
s sum
%(unloop_iter)
s;"
)
ret
+=
"""
}//for n
}//for m
}//for stack_size
}//for n_kern
}//for b
Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
"""
%
d
return
ret
theano/sandbox/test_conv.py
浏览文件 @
9117f696
...
...
@@ -50,10 +50,10 @@ class TestConvOp(unittest.TestCase):
nkern
=
5
# nb kernel
ssizes
=
((
1
,
1
),(
2
,
2
),(
3
,
3
),(
4
,
4
))
#step size
convmodes
=
(
'full'
,
'valid'
)
elif
1
:
# fixed parameters
like NORB JOB
bsize
=
4
# batch size
imshp
=
(
96
,
96
)
# image shape
elif
0
:
# fixed parameters
bsize
=
10
# batch size
imshp
=
(
50
,
50
)
# image shape
print
>>
sys
.
stderr
,
"WARNING: only square shape tested"
kshps
=
[(
12
,
12
),
(
12
,
12
)]
nkern
=
20
# nb kernel
...
...
@@ -63,7 +63,6 @@ class TestConvOp(unittest.TestCase):
# fixed parameters
bsize
=
7
# batch size
imshp
=
(
5
,
4
)
# image shape
print
>>
sys
.
stderr
,
"WARNING: only square shape tested"
kshps
=
[(
2
,
3
)]
nkern
=
6
# nb kernel
ssizes
=
[(
1
,
1
)]
#step size
...
...
@@ -72,7 +71,6 @@ class TestConvOp(unittest.TestCase):
# fixed parameters
bsize
=
7
# batch size
imshp
=
(
5
,
4
)
# image shape
print
>>
sys
.
stderr
,
"WARNING: only square shape tested"
kshps
=
[(
2
,
3
)]
nkern
=
6
# nb kernel
ssizes
=
[(
1
,
1
)]
#step size
...
...
@@ -102,8 +100,6 @@ class TestConvOp(unittest.TestCase):
#profmode = wraplinker.ProfileMode(OpWiseCLinker(), 'fast_run')
tconvop
,
tscipy
,
tconv2
=
[],
[],
[]
tconvop_kern
,
tconvop_batch
=
[],
[]
tconvop_gemm
=
[]
for
conv_mode
in
convmodes
:
for
kshp
in
kshps
:
...
...
@@ -114,7 +110,7 @@ class TestConvOp(unittest.TestCase):
# now test with real values
img2d
=
1
+
N
.
arange
(
bsize
*
N
.
prod
(
imshp
))
.
reshape
((
bsize
,)
+
imshp
)
#
print 'img2d', img2d
#
print 'img2d', img2d
img1d
=
img2d
.
reshape
(
bsize
,
-
1
)
# create filters (need to be flipped to use convolve2d)
...
...
@@ -123,100 +119,37 @@ class TestConvOp(unittest.TestCase):
# compute with new convolve2 (no timing info)
output4
,
outshp4
=
convolve2
(
kerns
,
kshp
,
nkern
,
input
,
\
imshp
,
bsize
,
(
1
,
1
),
bias
=
bias
,
mode
=
conv_mode
)
#
print 'output4', output4
#
print 'output4', output4
ttime1
=
time
.
time
()
f
=
function
([
kerns
,
bias
,
input
],
output4
)
out4
=
f
(
filtersflipped
.
reshape
(
nkern
,
-
1
),
biasvals
,
img1d
)
#
print 'out4', out4, img1d, filtersflipped
#
print 'out4', out4, img1d, filtersflipped
tconv2
+=
[
time
.
time
()
-
ttime1
]
out4
=
out4
.
reshape
(
bsize
,
nkern
,
outshp4
[
1
],
outshp4
[
2
])
out4
=
out4
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out4
=
out4
.
reshape
(
bsize
,
-
1
)
if
1
:
# compute with ConvOp (code_a)
dmatrix3
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
))
inputs
=
dmatrix3
()
kerns3
=
dmatrix3
()
bia
=
T
.
dscalar
()
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
,
use_gemm
=
False
)(
inputs
,
kerns3
)
f2
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"c"
))
f3
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"py"
))
ttime1
=
time
.
time
()
out2_
=
f2
(
img2d
,
filtersflipped
)
out2__
=
out2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
tconvop
+=
[
time
.
time
()
-
ttime1
]
out2___
=
out2__
.
copy
()
out2
=
out2___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
out3_
=
f3
(
img2d
,
filtersflipped
)
out3__
=
out3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out3___
=
out3__
.
copy
()
out3
=
out3___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
assert
(
N
.
abs
(
out2_
-
out3_
)
<
1e-5
)
.
all
()
if
1
:
# compute with ConvOp with gemm if possible
dmatrix3
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
))
inputs
=
dmatrix3
()
kerns3
=
dmatrix3
()
bia
=
T
.
dscalar
()
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
,
use_gemm
=
True
)(
inputs
,
kerns3
)
f2
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"c"
))
f3
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"py"
))
ttime1
=
time
.
time
()
out2_
=
f2
(
img2d
,
filtersflipped
)
out2__
=
out2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
tconvop_gemm
+=
[
time
.
time
()
-
ttime1
]
out2___
=
out2__
.
copy
()
out2
=
out2___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
out3_
=
f3
(
img2d
,
filtersflipped
)
out3__
=
out3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out3___
=
out3__
.
copy
()
out3
=
out3___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
assert
(
N
.
abs
(
out2_
-
out3_
)
<
1e-5
)
.
all
()
if
1
:
# compute with ConvOp with unroll_batch
dmatrix3
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
))
inputs
=
dmatrix3
()
kerns3
=
dmatrix3
()
bia
=
T
.
dscalar
()
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
,
use_gemm
=
False
,
unroll_batch
=
bsize
)(
inputs
,
kerns3
)
f2
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"c"
))
f3
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"py"
))
ttime1
=
time
.
time
()
out2_
=
f2
(
img2d
,
filtersflipped
)
out2__
=
out2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
tconvop_batch
+=
[
time
.
time
()
-
ttime1
]
out2___
=
out2__
.
copy
()
out2
=
out2___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
out3_
=
f3
(
img2d
,
filtersflipped
)
out3__
=
out3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out3___
=
out3__
.
copy
()
out3
=
out3___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
assert
(
N
.
abs
(
out2_
-
out3_
)
<
1e-5
)
.
all
()
if
1
:
# compute with ConvOp with unroll_kern
dmatrix3
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
))
inputs
=
dmatrix3
()
kerns3
=
dmatrix3
()
bia
=
T
.
dscalar
()
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
,
use_gemm
=
False
,
unroll_kern
=
bsize
)(
inputs
,
kerns3
)
f2
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"c"
))
f3
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"py"
))
ttime1
=
time
.
time
()
out2_
=
f2
(
img2d
,
filtersflipped
)
out2__
=
out2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
tconvop_kern
+=
[
time
.
time
()
-
ttime1
]
out2___
=
out2__
.
copy
()
out2
=
out2___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
out3_
=
f3
(
img2d
,
filtersflipped
)
out3__
=
out3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out3___
=
out3__
.
copy
()
out3
=
out3___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
assert
(
N
.
abs
(
out2_
-
out3_
)
<
1e-5
)
.
all
()
# compute with ConvOp
dmatrix3
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
))
inputs
=
dmatrix3
()
kerns3
=
dmatrix3
()
bia
=
T
.
dscalar
()
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
)(
inputs
,
kerns3
)
f2
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"c"
))
f3
=
function
([
inputs
,
kerns3
],
conv_op
,
mode
=
Mode
(
linker
=
"py"
))
ttime1
=
time
.
time
()
out2_
=
f2
(
img2d
,
filtersflipped
)
out2__
=
out2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
tconvop
+=
[
time
.
time
()
-
ttime1
]
out2___
=
out2__
.
copy
()
out2
=
out2___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
out3_
=
f3
(
img2d
,
filtersflipped
)
out3__
=
out3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
out3___
=
out3__
.
copy
()
out3
=
out3___
+
biasvals
.
reshape
(
1
,
nkern
,
1
,
1
)
assert
(
N
.
abs
(
out2_
-
out3_
)
<
1e-5
)
.
all
()
# REFERENCE IMPLEMENTATION: compute output with convolve2d
fulloutshp
=
N
.
array
(
imshp
)
-
N
.
array
(
kshp
)
+
1
if
conv_mode
==
'valid'
\
...
...
@@ -244,14 +177,12 @@ class TestConvOp(unittest.TestCase):
assert
(
temp
<
1e-5
)
.
all
()
print
'**** Convolution Profiling Results ****'
print
'Scipy convolve2d processing time:
%.3
fs'
%
sum
(
tscipy
)
#
,tscipy
print
'ConvOp processing time:
%.3
fs'
%
sum
(
tconvop
)
#
,tconvop
print
'convolve2 processing time:
%.3
fs'
%
sum
(
tconv2
)
#
,tconv2
print
'Scipy convolve2d processing time:
%.3
fs'
%
sum
(
tscipy
),
tscipy
print
'ConvOp processing time:
%.3
fs'
%
sum
(
tconvop
),
tconvop
print
'convolve2 processing time:
%.3
fs'
%
sum
(
tconv2
),
tconv2
print
'speed up ConvOp vs convolve2d:
%.3
f'
%
(
N
.
asarray
(
tscipy
)
/
tconvop
)
.
mean
()
print
'speed up use_gemm :
%.3
f'
%
(
N
.
asarray
(
tconvop
)
/
tconvop_gemm
)
.
mean
()
print
'speed up unroll_batch :
%.3
f'
%
(
N
.
asarray
(
tconvop
)
/
tconvop_batch
)
.
mean
()
print
'speed up unroll_kern :
%.3
f'
%
(
N
.
asarray
(
tconvop
)
/
tconvop_kern
)
.
mean
()
d
=
N
.
asarray
(
tscipy
)
/
tconvop
print
'speed up ConvOp vs convolve2d:
%.3
f'
%
d
.
mean
(),
d
def
test_multilayer_conv
(
self
):
# causes an atexit problem
...
...
@@ -274,13 +205,13 @@ class TestConvOp(unittest.TestCase):
ssizes
=
[(
1
,
1
),(
2
,
2
)]
#2,2)]
#test speed
bsize
=
10
# batch size
imshp_start
=
(
1
,
50
,
49
)
kshps
=
([
11
,
12
],[
12
,
11
])
nkerns
=
[
20
,
20
]
# per output pixel
ssizes
=
[(
1
,
1
),]
#(1,1)]#(2,2) bugged
convmodes
=
[
'valid'
,
'full'
]
do_theano
=
False
#
bsize = 10 # batch size
# imshp_start = (1,50,49)#un square shape to test more corner case.
# kshps = ([11,12],[12,11])#un square shape to test more corner case.
#
nkerns = [20,20] # per output pixel
#
ssizes = [(1,1),]#(1,1)]#(2,2) bugged
#
convmodes = ['valid','full']
#
do_theano=False
N
.
set_printoptions
(
threshold
=
N
.
nan
)
...
...
@@ -288,23 +219,25 @@ class TestConvOp(unittest.TestCase):
kerns
=
[
T
.
matrix
(),
T
.
dmatrix
()]
img
=
T
.
dmatrix
()
rng
=
N
.
random
.
RandomState
(
3423489
)
tctot
,
tpytot
,
t2ctot
,
t2pytot
,
ntot
,
convtot
=
[],
[],
[],
[],
[],
[]
tctot
,
tpytot
,
ntot
=
[],
[],
[]
dmatrix4
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
,
False
))
inputs4
=
dmatrix4
()
kerns4
=
dmatrix4
()
assert
len
(
kshps
)
==
len
(
nkerns
)
==
len
(
kerns
)
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
def
do_test
(
conv_mode
,
ss
,
unroll_batch
=
0
,
unroll_kern
=
0
,
img
=
img
,
validate
=
True
,
conv_op_py
=
False
):
# build actual input images
imgval
=
rng
.
rand
(
bsize
,
imshp_start
[
0
],
imshp_start
[
1
],
imshp_start
[
2
])
imshp
=
imshp_start
# for each layer
for
kshp
,
kern
,
nkern
,
n_layer
in
zip
(
kshps
,
kerns
,
nkerns
,
range
(
len
(
kerns
))):
ntot
=
0
tctot
=
0
tpytot
=
0
for
kshp
,
kern
,
nkern
,
n_layer
in
zip
(
kshps
,
kerns
,
nkerns
,
range
(
len
(
kerns
))):
print
'************* layer
%
i ***************'
%
n_layer
print
conv_mode
,
ss
,
n_layer
,
kshp
,
nkern
...
...
@@ -326,14 +259,15 @@ class TestConvOp(unittest.TestCase):
time1
=
time
.
time
()
outval
=
N
.
zeros
(
N
.
r_
[
bsize
,
outshp
])
val
=
_valfrommode
(
conv_mode
)
bval
=
_bvalfromboundary
(
'fill'
)
for
b
in
range
(
bsize
):
# loop over batches
for
n
in
range
(
nkern
):
# loop over filters
for
i
in
range
(
imshp
[
0
]):
# loop over input feature maps
outval
[
b
,
n
,
...
]
+=
_convolve2d
(
\
imgval
[
b
,
i
,
...
],
w_flip
[
n
,
i
,
...
],
1
,
val
,
bval
,
0
)[
0
::
ss
[
0
],
0
::
ss
[
1
]]
ntot
+=
[
time
.
time
()
-
time1
]
if
validate
:
val
=
_valfrommode
(
conv_mode
)
bval
=
_bvalfromboundary
(
'fill'
)
for
b
in
range
(
bsize
):
# loop over batches
for
n
in
range
(
nkern
):
# loop over filters
for
i
in
range
(
imshp
[
0
]):
# loop over input feature maps
outval
[
b
,
n
,
...
]
+=
_convolve2d
(
\
imgval
[
b
,
i
,
...
],
w_flip
[
n
,
i
,
...
],
1
,
val
,
bval
,
0
)[
0
::
ss
[
0
],
0
::
ss
[
1
]]
ntot
+=
time
.
time
()
-
time1
if
do_theano
:
####### test with new sp.convolve2 function ######
...
...
@@ -353,18 +287,16 @@ class TestConvOp(unittest.TestCase):
assert
(
N
.
abs
(
hidval
-
hidval1
)
<
1e-5
)
.
all
()
temp
=
N
.
abs
(
outval
.
reshape
(
bsize
,
-
1
)
-
hidval
)
assert
(
temp
<
1e-5
)
.
all
()
if
validate
:
assert
(
temp
<
1e-5
)
.
all
()
else
:
hid
=
img
#we don't need it, but it make the flow easier flow
convtot
+=
[
-
1
]
tctot
+=
[
-
1
]
tpytot
+=
[
-
1
]
hidval
=
outval
.
copy
()
#to keep the same memory
hidval1
=
outval
.
copy
()
# ConvOp
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
,
unroll_
kern
=
10
)(
inputs4
,
kerns4
)
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
,
unroll_
batch
=
unroll_batch
,
unroll_kern
=
unroll_kern
)(
inputs4
,
kerns4
)
l1shp
=
N
.
hstack
((
nkern
,
getFilterOutShp
(
imshp
,
kshp
,
ss
,
conv_mode
)))
propup2
=
function
([
inputs4
,
kerns4
],
conv_op
)
...
...
@@ -373,30 +305,90 @@ class TestConvOp(unittest.TestCase):
time1
=
time
.
time
()
hidval2_
=
propup2
(
imgval
,
w_flip
)
hidval2
=
hidval2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
t
2ctot
+=
[
time
.
time
()
-
time1
]
t
ctot
+=
time
.
time
()
-
time1
time1
=
time
.
time
()
# hidval3_ = propup3(imgval,w_flip)
# hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]]
t2pytot
+=
[
time
.
time
()
-
time1
]
# assert (N.abs(hidval2-hidval3)<1e-5).all()
if
conv_op_py
:
time1
=
time
.
time
()
hidval3_
=
propup3
(
imgval
,
w_flip
)
hidval3
=
hidval3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
tpytot
+=
time
.
time
()
-
time1
assert
(
N
.
abs
(
hidval2
-
hidval3
)
<
1e-5
)
.
all
()
else
:
tpytot
+=
0
temp
=
N
.
abs
(
outval
-
hidval2
)
assert
(
temp
<
1e-5
)
.
all
()
# temp = N.abs(outval - hidval3)
# assert (temp < 1e-5).all()
if
validate
:
temp
=
N
.
abs
(
outval
-
hidval2
)
assert
(
temp
<
1e-5
)
.
all
()
if
validate
and
conv_op_py
:
temp
=
N
.
abs
(
outval
-
hidval3
)
assert
(
temp
<
1e-5
)
.
all
()
img
,
imshp
=
hid
,
tuple
(
outshp
)
imgval
=
outval
.
reshape
(
bsize
,
outshp
[
0
],
outshp
[
1
],
outshp
[
2
])
return
tctot
,
tpytot
,
ntot
if
False
:
# calculate the speed up of different combination of unroll
# put the paramter to the same you will try.
validate
=
False
# we don't validate the result to have it much faster!
unroll_batch
=
[
0
,
1
,
2
,
4
,
5
,
10
,
20
]
unroll_kern
=
[
0
,
2
,
4
,
5
,
10
,
20
]
# unroll_batch = [0,2,5]
# unroll_kern = [0,2,5]
bsize
=
20
# batch size
imshp_start
=
(
1
,
50
,
49
)
#un square shape to test more corner case.
kshps
=
([
11
,
12
],[
12
,
11
])
#un square shape to test more corner case.
nkerns
=
[
20
,
20
]
# per output pixel
ssizes
=
[(
1
,
1
),]
#(1,1)]#(2,2) bugged
convmodes
=
[
'valid'
,
'full'
]
do_theano
=
False
a
=
T
.
dmatrix
()
kerns
=
[
a
for
i
in
nkerns
]
assert
len
(
kshps
)
==
len
(
nkerns
)
==
len
(
kerns
)
timing
=
N
.
zeros
((
len
(
unroll_batch
),
len
(
unroll_kern
),
3
))
t_b_k
=
[]
for
unroll_b
,
n_b
in
zip
(
unroll_batch
,
range
(
len
(
unroll_batch
))):
for
unroll_k
,
n_k
in
zip
(
unroll_kern
,
range
(
len
(
unroll_kern
))):
t_b_k
+=
[
str
(
unroll_b
)
+
"/"
+
str
(
unroll_k
)]
tctot
,
tpytot
,
ntot
=
[],[],[]
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
tctot_
,
tpytot_
,
ntot_
=
do_test
(
conv_mode
,
ss
,
unroll_batch
=
unroll_b
,
unroll_kern
=
unroll_k
,
validate
=
validate
)
tctot
+=
[
tctot_
]
tpytot
+=
[
tpytot_
]
ntot
+=
[
ntot_
]
timing
[
n_b
,
n_k
]
=
[
sum
(
tctot
),
sum
(
tpytot
),
sum
(
ntot
)]
# print timing
t
=
timing
[:,:,
0
]
#We select only the c timing.
print
t_b_k
print
t
print
"max
%.3
fs"
%
t
.
max
(),
"max param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmax
()]
print
"min
%.3
fs"
%
t
.
min
(),
"min param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmin
()]
print
"speedup
%.3
fx"
%
(
t
.
max
()
/
t
.
min
())
return
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
tctot_
,
tpytot_
,
ntot_
=
do_test
(
conv_mode
,
ss
)
tctot
+=
[
tctot_
]
tpytot
+=
[
tpytot_
]
ntot
+=
[
ntot_
]
print
'**** Multilayer Convolution Profiling Results ****'
print
'Numpy convolve2d processing time:
%.3
fs'
%
sum
(
ntot
),
ntot
print
'c Theano(ConvOp) processing time:
%.3
fs'
%
sum
(
t2ctot
),
t2ctot
print
'py Theano(ConvOp) processing time:
%.3
fs'
%
sum
(
t2pytot
),
t2pytot
print
'convolve processing time:
%.3
fs'
%
sum
(
convtot
),
convtot
d
=
N
.
asarray
(
ntot
)
/
t2ctot
print
'c Theano(ConvOp) processing time:
%.3
fs'
%
sum
(
tctot
),
tctot
print
'py Theano(ConvOp) processing time:
%.3
fs'
%
sum
(
tpytot
),
tpytot
d
=
N
.
asarray
(
ntot
)
/
tctot
print
'speed up c theano(ConvOp) vs convolve2d:
%.3
f'
%
d
.
mean
(),
d
d
=
N
.
asarray
(
ntot
)
/
t
2
pytot
d
=
N
.
asarray
(
ntot
)
/
tpytot
print
'speed up py theano(ConvOp) vs convolve2d:
%.3
f'
%
d
.
mean
(),
d
...
...
@@ -417,7 +409,7 @@ class TestConvOp(unittest.TestCase):
visdim
=
1
if
len
(
imshp
)
!=
3
else
imshp
[
0
]
for
kshp
in
kshps
:
imgvals
=
N
.
random
.
random
(
N
.
hstack
((
bsize
,
imshp
)))
#
print 'imgvals.shape = ', imgvals.shape
print
'imgvals.shape = '
,
imgvals
.
shape
imgvals
=
imgvals
.
reshape
(
bsize
,
-
1
)
if
visdim
==
1
:
...
...
@@ -460,3 +452,10 @@ class TestConvOp(unittest.TestCase):
kernvals
=
kernvals
.
reshape
(
nkern
,
-
1
)
utt
.
verify_grad
(
testf
,
[
imgvals
,
kernvals
])
if
__name__
==
'__main__'
:
t
=
TestConvOp
(
"test_convolution"
)
t
.
test_convolution
()
# t.test_multilayer_conv()
# from theano.tests import main
# main("test_sp")
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论