Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
dc1aa62a
提交
dc1aa62a
authored
1月 19, 2010
作者:
Frederic Bastien
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[mq]: unroll_patch
上级
3d8a5430
显示空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
377 行增加
和
26 行删除
+377
-26
conv.py
theano/sandbox/conv.py
+304
-3
test_conv.py
theano/sandbox/test_conv.py
+73
-23
没有找到文件。
theano/sandbox/conv.py
浏览文件 @
dc1aa62a
...
@@ -29,9 +29,10 @@ class ConvOp(Op):
...
@@ -29,9 +29,10 @@ class ConvOp(Op):
#TODO: make the stacksize its own parameter, and make imshp a pair
#TODO: make the stacksize its own parameter, and make imshp a pair
def
__init__
(
self
,
imshp
,
kshp
,
nkern
,
bsize
,
dx
,
dy
,
output_mode
=
'valid'
,
def
__init__
(
self
,
imshp
=
None
,
kshp
=
None
,
nkern
=
None
,
bsize
=
None
,
dx
=
None
,
dy
=
None
,
output_mode
=
'valid'
,
unroll_batch
=
4
,
unroll_batch
=
0
,
unroll_kern
=
4
,
unroll_kern
=
0
,
unroll_patch
=
False
,
imshp_logical
=
None
,
imshp_logical
=
None
,
kshp_logical
=
None
,
kshp_logical
=
None
,
kshp_logical_top_aligned
=
True
,
kshp_logical_top_aligned
=
True
,
...
@@ -47,6 +48,7 @@ class ConvOp(Op):
...
@@ -47,6 +48,7 @@ class ConvOp(Op):
dx - patch stride rows
dx - patch stride rows
dy - patch stride cols
dy - patch stride cols
out_mode - 'valid', 'full'
out_mode - 'valid', 'full'
unroll_patch - c code generation option
unroll_batch - c code generation option
unroll_batch - c code generation option
unroll_kern - c code generation option
unroll_kern - c code generation option
verbose - passed to GpuConv
verbose - passed to GpuConv
...
@@ -60,6 +62,7 @@ class ConvOp(Op):
...
@@ -60,6 +62,7 @@ class ConvOp(Op):
gradient on the filters.
gradient on the filters.
unroll_patch. If True will use a version that is faster then without not unroll by unroll the patch loop.
unroll_batch. If >0 will use a version that will unroll the batch loop by the value of the option. By default don't use this version of the code.
unroll_batch. If >0 will use a version that will unroll the batch loop by the value of the option. By default don't use this version of the code.
unroll_nkern. idem as unroll_batch but unroll the kernel loop.
unroll_nkern. idem as unroll_batch but unroll the kernel loop.
...
@@ -95,6 +98,7 @@ class ConvOp(Op):
...
@@ -95,6 +98,7 @@ class ConvOp(Op):
self
.
unroll_batch
=
unroll_batch
self
.
unroll_batch
=
unroll_batch
self
.
unroll_kern
=
unroll_kern
self
.
unroll_kern
=
unroll_kern
self
.
unroll_patch
=
unroll_patch
if
self
.
unroll_batch
>
0
and
self
.
bsize
%
self
.
unroll_batch
!=
0
:
if
self
.
unroll_batch
>
0
and
self
.
bsize
%
self
.
unroll_batch
!=
0
:
if
self
.
bsize
<=
self
.
unroll_batch
:
if
self
.
bsize
<=
self
.
unroll_batch
:
...
@@ -407,6 +411,7 @@ using namespace std;
...
@@ -407,6 +411,7 @@ using namespace std;
d
[
"self_imshp0"
]
=
self
.
imshp
[
0
]
d
[
"self_imshp0"
]
=
self
.
imshp
[
0
]
d
[
"self_imshp1"
]
=
self
.
imshp
[
1
]
d
[
"self_imshp1"
]
=
self
.
imshp
[
1
]
d
[
"self_imshp2"
]
=
self
.
imshp
[
2
]
d
[
"self_imshp2"
]
=
self
.
imshp
[
2
]
d
[
"mode"
]
=
self
.
out_mode
.
upper
()
d
[
"self_kshp0"
]
=
self
.
kshp
[
0
]
d
[
"self_kshp0"
]
=
self
.
kshp
[
0
]
d
[
"self_kshp1"
]
=
self
.
kshp
[
1
]
d
[
"self_kshp1"
]
=
self
.
kshp
[
1
]
d
[
"self_kshp_logical_r"
]
=
self
.
kshp_logical
[
0
]
d
[
"self_kshp_logical_r"
]
=
self
.
kshp_logical
[
0
]
...
@@ -439,8 +444,12 @@ using namespace std;
...
@@ -439,8 +444,12 @@ using namespace std;
#print self.out_mode, d["self_imshp_logical_stride_r"]
#print self.out_mode, d["self_imshp_logical_stride_r"]
if
self
.
imshp
!=
self
.
imshp_logical
or
self
.
kshp
!=
self
.
kshp_logical
:
if
self
.
imshp
!=
self
.
imshp_logical
or
self
.
kshp
!=
self
.
kshp_logical
:
# print "return imshp!=imshp_logical or self.kshp != self.kshp_logical shape version"
return
_conv_op_code_a
%
d
return
_conv_op_code_a
%
d
if
self
.
unroll_patch
:
# print "return unroll patch version",self.dx,self.dy
return
_conv_op_code_unroll_patch
%
d
if
self
.
unroll_batch
>
0
or
self
.
unroll_kern
>
0
:
if
self
.
unroll_batch
>
0
or
self
.
unroll_kern
>
0
:
if
self
.
unroll_batch
<=
0
:
self
.
unroll_batch
=
1
if
self
.
unroll_batch
<=
0
:
self
.
unroll_batch
=
1
if
self
.
unroll_kern
<=
0
:
self
.
unroll_kern
=
1
if
self
.
unroll_kern
<=
0
:
self
.
unroll_kern
=
1
...
@@ -1212,3 +1221,295 @@ Py_XDECREF(img2d);
...
@@ -1212,3 +1221,295 @@ Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
Py_XDECREF(filtersflipped);
"""
"""
return
ret
return
ret
_conv_op_code_unroll_patch
=
"""
const int mode=
%(mode)
s;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
const
%(type)
s fill_value = 0;
int type_im=PyArray_TYPE(
%(img2d)
s);
int type_ker=PyArray_TYPE(
%(filtersflipped)
s);
npy_intp dim_zz[2]={
%(self_outshp0)
s,
%(self_outshp1)
s};
npy_intp dim_im[2]={
%(self_imshp1)
s,
%(self_imshp2)
s};
npy_intp dim_ker[2]={
%(self_kshp0)
s,
%(self_kshp1)
s};
PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0};
img2d_shape.ptr=img2d_dim;
img2d_shape.len=4;
PyArray_Dims kerns_shape;
npy_intp kerns_dim[4]={1,1,0,0};
kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
if(
%(img2d)
s->nd==2){
img2d_dim[3]=
%(img2d)
s->dimensions[1];
img2d_dim[2]=
%(img2d)
s->dimensions[0];
}else if(
%(img2d)
s->nd==3){
img2d_dim[3]=
%(img2d)
s->dimensions[2];
img2d_dim[2]=
%(img2d)
s->dimensions[1];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
}else if(
%(img2d)
s->nd==4){
img2d_dim[3]=
%(img2d)
s->dimensions[3];
img2d_dim[2]=
%(img2d)
s->dimensions[2];
img2d_dim[1]=
%(img2d)
s->dimensions[1];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
}else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
%(fail)
s;
}
if(
%(filtersflipped)
s->nd==3){
kerns_dim[3]=
%(filtersflipped)
s->dimensions[2];
kerns_dim[2]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else if(
%(filtersflipped)
s->nd==4){
kerns_dim[3]=
%(filtersflipped)
s->dimensions[3];
kerns_dim[2]=
%(filtersflipped)
s->dimensions[2];
kerns_dim[1]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else{
std:stringstream temp;
temp << "nddim="<<
%(filtersflipped)
s->nd;
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
("kernel don't have a good shape. " + param).c_str());
%(fail)
s;
}
img2d = PyArray_Newshape(
%(img2d)
s,&img2d_shape, PyArray_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(
%(type)
s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(
%(type)
s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)
s;
}
}
img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(
%(filtersflipped)
s,&kerns_shape, PyArray_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != sizeof(
%(type)
s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*sizeof(
%(type)
s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)
s;
}
}
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported");
%(fail)
s;
}
typenum = PyArray_ObjectType((PyObject*)
%(img2d)
s, 0);
typenum_f = PyArray_ObjectType((PyObject*)
%(filtersflipped)
s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type");
%(fail)
s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match");
%(fail)
s;}
if (!img2d)
%(fail)
s;
if (!filtersflipped)
%(fail)
s;
if ((!
%(z)
s)
|| *PyArray_DIMS(
%(z)
s)!=4
||(
%(z)
s->dimensions[0] !=
%(self_bsize)
s)
||(
%(z)
s->dimensions[1] !=
%(self_nkern)
s)
||(
%(z)
s->dimensions[2] != dim_zz[0])
|| (
%(z)
s->dimensions[3] != dim_zz[1])
)
{
if (
%(z)
s) Py_DECREF(
%(z)
s);
npy_intp dims[4] = {0,0,0,0};
if(!dims)
%(fail)
s;
dims[0]=
%(self_bsize)
s;
dims[1]=
%(self_nkern)
s;
dims[2]=dim_zz[0];
dims[3]=dim_zz[1];
%(z)
s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
}else{
//PyArray_FILLWBYTE((PyObject*)
%(z)
s,0);
}
int Os[2];
Os[0]=
%(self_outshp0)
s;
Os[1]=
%(self_outshp1)
s;
//I keep the formula to calculte Os in case we need it in the futur.
//if (mode == FULL) {Os[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(
%(self_dx)
s)); Os[1] = ceil((dim_im[1]+dim_ker[1]-1)/float(
%(self_dy)
s));}
//else {Os[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(
%(self_dx)
s)); Os[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(
%(self_dy)
s));}
for(int b=0;b<
%(self_bsize)
s;b++){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern++){
//assertions
if (
%(z)
s->strides[0] !=
%(z)
s->dimensions[1] *
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[1] !=
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[2] !=
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[3] != sizeof(
%(type)
s))
%(fail)
s;
%(type)
s * __restrict__ out=(
%(type)
s *)(PyArray_GETPTR2(
%(z)
s,b,n_kern));
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
for(int stack_size=0;stack_size<
%(self_imshp0)
s;stack_size++){
const
%(type)
s * __restrict__ in=(
%(type)
s *)(PyArray_GETPTR2(img2d,b,stack_size));
const
%(type)
s * __restrict__ hvals=(
%(type)
s *)(PyArray_GETPTR2(filtersflipped,n_kern,stack_size));
int new_m;
for (int iter_m=0; iter_m < Os[0]; iter_m++) {
// Reposition index into input image based on requested output size
int pos_m = iter_m*
%(self_dx)
s;//The position of the patch in the image
if (mode == FULL) new_m = pos_m ;
else new_m = (pos_m+dim_ker[0]-1);
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
int pos_n=iter_n*
%(self_dy)
s;
%(type)
s sum=0;
%(type)
s sum2=0;
%(type)
s sum3=0;
%(type)
s sum4=0;
int nb_sum=0;
// Sum over kernel, if index into image is out of bounds
// fill with the value
for (int j=0; j < dim_ker[0]; j++) {
int ind0 = (new_m-j);
if(mode==FULL){
const
%(type)
s * idx_hvals=&hvals[j*dim_ker[1]];
if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0)
for (int k=0; k < dim_ker[1]; k++) {
sum+= idx_hvals[k] * fill_value;
}
}else{
//do the part where kernel is to the right of the img
//TODO: implement unroll patch for fill_value!=0
int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
if(fill_value!=0){
for(k=0;k<max_k;k++){
sum+= idx_hvals[k]*fill_value;
}
}else {k=max_k;}
//do the part where the kernel is on the img
max_k=min(pos_n+1,(int)dim_ker[1]);
const
%(type)
s * idx_in=&in[ind0*dim_im[1]];
if(iter_n + 4*
%(self_dy)
s < Os[1]
&& iter_n>dim_ker[1]-1+3
&& iter_n<dim_im[1]-dim_ker[1]+1-3){
nb_sum=4;
//cout<<4<<endl;
for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
sum+=idx_hvals[k]*idx_in[ind1];
sum2+=idx_hvals[k]*idx_in[ind1+
%(self_dy)
s];
sum3+=idx_hvals[k]*idx_in[ind1+2*
%(self_dy)
s];
sum4+=idx_hvals[k]*idx_in[ind1+3*
%(self_dy)
s];
}
}else if(iter_n + 2*
%(self_dy)
s < Os[1]
&& iter_n>dim_ker[1]-1
&& iter_n<dim_im[1]-dim_ker[1]+1){
//cout<<2<<endl;
nb_sum=2;
// if(iter_n==dim_ker[1]-1){//k-1<min(pos_n+
%(self_dy)
s,(int)dim_ker[1])){
// sum2+=idx_hvals[k-1]*idx_in[pos_n-k-
%(self_dy)
s];
// }
for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
sum+=idx_hvals[k]*idx_in[ind1];
sum2+=idx_hvals[k]*idx_in[ind1+
%(self_dy)
s];
}
// sum2+=idx_hvals[k]*idx_in[pos_n-k+
%(self_dy)
s];
// sum+=idx_hvals[k]*idx_in[pos_n-k];
// k++;
}else{
//cout<<1<<endl;
nb_sum=1;
/*
%(type)
s sum_=0;
if((k-max_k) & 0x1 != 0){
sum+= idx_hvals[k] * idx_in[pos_n-k];
}
for (int ind1=pos_n-k; k<max_k; k+=2,ind1-=2) {
sum+= idx_hvals[k] * idx_in[ind1];
sum_+= idx_hvals[k+1] * idx_in[ind1-1];
}
sum+=sum_;
*/
for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
sum+=idx_hvals[k]*idx_in[ind1];
}
}
//do the part to the left of the img
if(fill_value!=0)
for(;k<dim_ker[1];k++) sum+= idx_hvals[k]*fill_value;
}
}else{//valid mode
const
%(type)
s* idx_in=&in[ind0*dim_im[1]];
const
%(type)
s* idx_hvals=&hvals[j*dim_ker[1]];
if(iter_n + 4*
%(self_dy)
s < Os[1]){
nb_sum=4;
for (int k=dim_ker[1]-1,im_idx=pos_n; k >=0; k--,im_idx++) {
sum+=idx_hvals[k]*idx_in[im_idx];
sum2+=idx_hvals[k]*idx_in[im_idx+
%(self_dy)
s];
sum3+=idx_hvals[k]*idx_in[im_idx+2*
%(self_dy)
s];
sum4+=idx_hvals[k]*idx_in[im_idx+3*
%(self_dy)
s];
}
}else if(iter_n + 2*
%(self_dy)
s < Os[1]){
nb_sum=2;
for (int k=dim_ker[1]-1,im_idx=pos_n; k >=0; k--,im_idx++) {
sum+=idx_hvals[k]*idx_in[im_idx];
sum2+=idx_hvals[k]*idx_in[im_idx+
%(self_dy)
s];
}
}else{
nb_sum=1;
for (int k=dim_ker[1]-1,im_idx=pos_n; k >=0; k--,im_idx++) {
sum+=idx_hvals[k]*idx_in[im_idx];
}
}
}//else valid mode
}//for j
switch(nb_sum){
case 4: out[iter_m*dim_zz[1]+iter_n+3]
%(affectation)
s sum4;
case 3: out[iter_m*dim_zz[1]+iter_n+2]
%(affectation)
s sum3;
case 2: out[iter_m*dim_zz[1]+iter_n+1]
%(affectation)
s sum2;
case 1: out[iter_m*dim_zz[1]+iter_n]
%(affectation)
s sum;
}
iter_n+=nb_sum-1;
/*
out[iter_m*dim_zz[1]+iter_n]
%(affectation)
s sum;
if(nb_sum>=2){
iter_n++;
out[iter_m*dim_zz[1]+iter_n]
%(affectation)
s sum2;
}
if(nb_sum>=3){
iter_n++;
out[iter_m*dim_zz[1]+iter_n]
%(affectation)
s sum3;
}
if(nb_sum>=4){
iter_n++;
out[iter_m*dim_zz[1]+iter_n]
%(affectation)
s sum4;
}
*/
}//for iter_n
}//for iter_m
}//for stack_size
if (0 && (mode==FULL)){
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i)
std::cout << " " << out[i];
std::cout << "
\\
n";
}
}//for n_kern
}//for b
Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
"""
theano/sandbox/test_conv.py
浏览文件 @
dc1aa62a
...
@@ -41,7 +41,7 @@ def flip(kern, kshp):
...
@@ -41,7 +41,7 @@ def flip(kern, kshp):
global_rng
=
N
.
random
.
RandomState
(
3423489
)
global_rng
=
N
.
random
.
RandomState
(
3423489
)
dmatrix4
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
,
False
))
dmatrix4
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
,
False
))
def
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsize
,
imshp
,
kshps
,
nkerns
,
unroll_batch
=
0
,
unroll_kern
=
0
,
img
=
T
.
dmatrix
(),
validate
=
True
,
conv_op_py
=
False
,
do_convolve2
=
False
,
do_print
=
True
,
repeat
=
1
):
def
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsize
,
imshp
,
kshps
,
nkerns
,
unroll_batch
=
0
,
unroll_kern
=
0
,
img
=
T
.
dmatrix
(),
validate
=
True
,
conv_op_py
=
False
,
do_convolve2
=
False
,
do_print
=
True
,
repeat
=
1
,
unroll_patch
=
0
):
# build actual input images
# build actual input images
imgval
=
global_rng
.
rand
(
bsize
,
imshp
[
0
],
imshp
[
1
],
imshp
[
2
])
imgval
=
global_rng
.
rand
(
bsize
,
imshp
[
0
],
imshp
[
1
],
imshp
[
2
])
...
@@ -121,7 +121,7 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
...
@@ -121,7 +121,7 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
hidval1
=
outval
.
copy
()
hidval1
=
outval
.
copy
()
# ConvOp
# ConvOp
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
ss
[
0
],
ss
[
1
],
conv_mode
,
unroll_batch
=
unroll_batch
,
unroll_kern
=
unroll_kern
)(
inputs4
,
kerns4
)
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
ss
[
0
],
ss
[
1
],
conv_mode
,
unroll_batch
=
unroll_batch
,
unroll_kern
=
unroll_kern
,
unroll_patch
=
unroll_patch
)(
inputs4
,
kerns4
)
l1shp
=
N
.
hstack
((
nkern
,
l1shp
=
N
.
hstack
((
nkern
,
getFilterOutShp
(
imshp
,
kshp
,
ss
,
conv_mode
)))
getFilterOutShp
(
imshp
,
kshp
,
ss
,
conv_mode
)))
propup2
=
function
([
inputs4
,
kerns4
],
conv_op
)
propup2
=
function
([
inputs4
,
kerns4
],
conv_op
)
...
@@ -328,7 +328,7 @@ class TestConvOp(unittest.TestCase):
...
@@ -328,7 +328,7 @@ class TestConvOp(unittest.TestCase):
ssizess
=
[[(
1
,
1
),(
1
,
2
)],[(
1
,
1
),(
2
,
2
)]]
ssizess
=
[[(
1
,
1
),(
1
,
2
)],[(
1
,
1
),(
2
,
2
)]]
convmodes
=
[
'valid'
,
'full'
]
convmodes
=
[
'valid'
,
'full'
]
do_convolve2
=
True
do_convolve2
=
True
unroll
=
[(
0
,
0
),(
1
,
1
),(
2
,
2
),(
3
,
2
)]
#(batch,kern
)
unroll
=
[(
0
,
0
,
False
),(
0
,
0
,
True
),(
1
,
1
,
False
),(
2
,
2
,
False
),(
3
,
2
,
False
)]
#(batch,kern,patch
)
do_speed_test
=
False
do_speed_test
=
False
# TODO: this version show a bug that was fixed
# TODO: this version show a bug that was fixed
...
@@ -338,6 +338,11 @@ class TestConvOp(unittest.TestCase):
...
@@ -338,6 +338,11 @@ class TestConvOp(unittest.TestCase):
# nkerns = [2,2] # per output pixel
# nkerns = [2,2] # per output pixel
# ssizes = [(1,1),(2,2)]#2,2)]
# ssizes = [(1,1),(2,2)]#2,2)]
# bsizes = [1,1] # batch size
# imshp_starts = [(1,10,10),(1,5,6)]
# kshpss = ([[2,3],[3,2]],[[2,2],[2,2]])
# nkernss = [[1,1],[1,1]] # per output pixel
N
.
set_printoptions
(
threshold
=
N
.
nan
)
N
.
set_printoptions
(
threshold
=
N
.
nan
)
# symbolic stuff
# symbolic stuff
...
@@ -356,8 +361,8 @@ class TestConvOp(unittest.TestCase):
...
@@ -356,8 +361,8 @@ class TestConvOp(unittest.TestCase):
unroll_batch
=
[
1
,
2
,
4
,
5
,
10
,
20
]
unroll_batch
=
[
1
,
2
,
4
,
5
,
10
,
20
]
unroll_kern
=
[
1
,
2
,
4
,
5
,
10
,
20
]
unroll_kern
=
[
1
,
2
,
4
,
5
,
10
,
20
]
unroll_batch
=
[
1
,
2
,
5
]
unroll_batch
=
[
1
,
4
,
5
]
unroll_kern
=
[
1
,
2
,
5
]
unroll_kern
=
[
1
,
4
,
5
]
bsize
=
20
# batch size
bsize
=
20
# batch size
imshp_start
=
(
1
,
48
,
48
)
#un square shape to test more corner case.
imshp_start
=
(
1
,
48
,
48
)
#un square shape to test more corner case.
...
@@ -374,9 +379,17 @@ class TestConvOp(unittest.TestCase):
...
@@ -374,9 +379,17 @@ class TestConvOp(unittest.TestCase):
timing
=
N
.
zeros
((
len
(
unroll_batch
),
len
(
unroll_kern
),
3
))
timing
=
N
.
zeros
((
len
(
unroll_batch
),
len
(
unroll_kern
),
3
))
t_b_k
=
[]
t_b_k
=
[]
#calculate the timing with unrolling
#calculate the timing with unrolling
t_
=
[[
7.60572791
,
3.95069814
,
3.74271464
],
[
4.05631089
,
2.90384555
,
2.93613672
],
[
3.90551591
,
2.92595196
,
3.00102282
]]
best
=
[]
worst
=
[]
best
=
[
0.52690219879150391
,
2.4266397953033447
]
worst
=
[
0.92042708396911621
,
6.8822150230407715
]
t_
=
[]
for
unroll_b
,
n_b
in
zip
(
unroll_batch
,
range
(
len
(
unroll_batch
))):
for
unroll_b
,
n_b
in
zip
(
unroll_batch
,
range
(
len
(
unroll_batch
))):
for
unroll_k
,
n_k
in
zip
(
unroll_kern
,
range
(
len
(
unroll_kern
))):
for
unroll_k
,
n_k
in
zip
(
unroll_kern
,
range
(
len
(
unroll_kern
))):
t_b_k
.
append
(
str
(
unroll_b
)
+
"/"
+
str
(
unroll_k
))
t_b_k
.
append
(
str
(
unroll_b
)
+
"/"
+
str
(
unroll_k
))
if
not
t_
:
tctot
,
tpytot
,
ntot
=
[],[],[]
tctot
,
tpytot
,
ntot
=
[],[],[]
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
...
@@ -384,36 +397,68 @@ class TestConvOp(unittest.TestCase):
...
@@ -384,36 +397,68 @@ class TestConvOp(unittest.TestCase):
tctot
+=
[
tctot_
]
tctot
+=
[
tctot_
]
tpytot
+=
[
tpytot_
]
tpytot
+=
[
tpytot_
]
ntot
+=
[
ntot_
]
ntot
+=
[
ntot_
]
if
unroll_b
==
4
and
unroll_k
==
4
:
print
"unroll 4/4"
,
tctot
best
=
tctot
if
unroll_b
==
1
and
unroll_k
==
1
:
print
"unroll 1/1"
,
tctot
worst
=
tctot
timing
[
n_b
,
n_k
]
=
[
sum
(
tctot
),
sum
(
tpytot
),
sum
(
ntot
)]
timing
[
n_b
,
n_k
]
=
[
sum
(
tctot
),
sum
(
tpytot
),
sum
(
ntot
)]
if
not
t_
:
t
=
timing
[:,:,
0
]
#We select only the c timing.
else
:
t
=
t_
t
=
N
.
asarray
(
t
)
#calculate the old timing
#calculate the old timing
tctot
,
tpytot
,
ntot
=
0
,
0
,
0
tctot_
=
[
0.52555489540100098
,
6.6634182929992676
]
# tctot_=[]
tctot
,
tpytot
,
ntot
=
[],[],[]
if
not
tctot_
:
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
tctot_
,
tpytot_
,
ntot_
=
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsize
,
imshp_start
,
kshps
,
nkerns
,
unroll_batch
=
0
,
unroll_kern
=
0
,
validate
=
validate
)
tctot_
,
tpytot_
,
ntot_
=
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsize
,
imshp_start
,
kshps
,
nkerns
,
unroll_batch
=
0
,
unroll_kern
=
0
,
validate
=
validate
)
tctot
+=
tctot_
tctot
+=
[
tctot_
]
tpytot
+=
tpytot_
tpytot
+=
[
tpytot_
]
ntot
+=
ntot_
ntot
+=
[
ntot_
]
print
"old code timing
%.3
fs"
%
tctot
else
:
tctot
=
N
.
asarray
(
tctot_
)
print
"old code timing
%.3
fs"
%
sum
(
tctot
),
tctot
# print timing
best
=
N
.
asarray
(
best
)
t
=
timing
[:,:,
0
]
#We select only the c timing.
worst
=
N
.
asarray
(
worst
)
print
"timing for unrolled version"
print
"timing for unrolled version"
print
t_b_k
print
t_b_k
print
t
print
t
print
"max
%.3
fs"
%
t
.
max
(),
"max param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmax
()]
print
"max
%.3
fs"
%
t
.
max
(),
"max param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmax
()]
print
"min
%.3
fs"
%
t
.
min
(),
"min param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmin
()]
print
"min
%.3
fs"
%
t
.
min
(),
"min param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmin
()]
print
"speedup vs (1/1)
%.3
fx, vs old
%.3
fx"
%
(
t
.
max
()
/
t
.
min
(),
tctot
/
t
.
min
())
print
"speedup vs (1/1)
%.3
fx, vs old
%.3
fx"
%
(
t
.
max
()
/
t
.
min
(),
sum
(
tctot
)
/
t
.
min
())
print
worst
/
best
,
tctot
/
best
tctot_patch
=
[]
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
tctot_
,
tpytot_
,
ntot_
=
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsize
,
imshp_start
,
kshps
,
nkerns
,
unroll_batch
=
0
,
unroll_kern
=
0
,
validate
=
validate
,
unroll_patch
=
2
)
tctot_patch
+=
[
tctot_
]
t_patch
=
sum
(
tctot_patch
)
print
"unroll_patch time"
,
tctot_patch
print
"speedup vs (1/1)
%.3
fx, vs old
%.3
fx"
%
(
t
.
max
()
/
t_patch
,
sum
(
tctot
)
/
t_patch
)
print
best
/
tctot_patch
,
worst
/
tctot_patch
print
best
print
worst
print
tctot
print
tctot_patch
return
return
for
i
in
range
(
len
(
kshpss
)):
for
i
in
range
(
len
(
kshpss
)):
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizess
[
i
],
range
(
len
(
ssizess
[
i
]))):
for
ss
,
n_ss
in
zip
(
ssizess
[
i
],
range
(
len
(
ssizess
[
i
]))):
for
un_b
,
un_k
in
unroll
:
for
un_b
,
un_k
,
un_p
in
unroll
:
tctot_
,
tpytot_
,
ntot_
=
exec_multilayer_conv_nnet
(
tctot_
,
tpytot_
,
ntot_
=
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsizes
[
i
],
imshp_starts
[
i
],
conv_mode
,
ss
,
bsizes
[
i
],
imshp_starts
[
i
],
kshpss
[
i
],
nkernss
[
i
],
kshpss
[
i
],
nkernss
[
i
],
img
=
img
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
img
=
img
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
unroll_patch
=
un_p
,
validate
=
True
)
validate
=
True
)
tctot
+=
[
tctot_
]
tctot
+=
[
tctot_
]
tpytot
+=
[
tpytot_
]
tpytot
+=
[
tpytot_
]
...
@@ -428,6 +473,11 @@ class TestConvOp(unittest.TestCase):
...
@@ -428,6 +473,11 @@ class TestConvOp(unittest.TestCase):
d
=
N
.
asarray
(
ntot
)
/
tpytot
d
=
N
.
asarray
(
ntot
)
/
tpytot
print
'speed up py theano(ConvOp) vs convolve2d:
%.3
fx'
%
d
.
mean
(),
d
print
'speed up py theano(ConvOp) vs convolve2d:
%.3
fx'
%
d
.
mean
(),
d
def
init_data
(
self
,
shape
):
return
N
.
ones
(
shape
)
return
N
.
random
.
random
(
shape
)
def
test_ConvOpGrad
(
self
):
def
test_ConvOpGrad
(
self
):
"""
"""
test the gradient in float and double
test the gradient in float and double
...
@@ -442,7 +492,7 @@ class TestConvOp(unittest.TestCase):
...
@@ -442,7 +492,7 @@ class TestConvOp(unittest.TestCase):
kshps
=
[(
2
,
3
)]
kshps
=
[(
2
,
3
)]
imshps
=
[(
2
,
3
,
4
)]
imshps
=
[(
2
,
3
,
4
)]
modes
=
[
'valid'
,
'full'
]
modes
=
[
'valid'
,
'full'
]
unroll
=
[(
0
,
0
),(
1
,
1
),(
2
,
3
)]
unroll
=
[(
0
,
0
,
True
),(
1
,
1
,
False
),(
2
,
3
,
False
),(
1
,
1
,
False
),(
0
,
0
,
False
)]
#(batch,kern,patch)
ssizes
=
[(
1
,
1
),(
2
,
2
)]
ssizes
=
[(
1
,
1
),(
2
,
2
)]
for
typ
in
types
:
for
typ
in
types
:
...
@@ -457,12 +507,12 @@ class TestConvOp(unittest.TestCase):
...
@@ -457,12 +507,12 @@ class TestConvOp(unittest.TestCase):
imgvals
=
N
.
array
(
N
.
random
.
random
(
N
.
hstack
((
bsize
,
imshp
))),
dtype
=
imgs
.
dtype
)
imgvals
=
N
.
array
(
N
.
random
.
random
(
N
.
hstack
((
bsize
,
imshp
))),
dtype
=
imgs
.
dtype
)
for
kshp
in
kshps
:
for
kshp
in
kshps
:
t
=
numpy
.
array
([
imshp
[
1
]
-
kshp
[
0
],
imshp
[
2
]
-
kshp
[
1
]])
t
=
numpy
.
array
([
imshp
[
1
]
-
kshp
[
0
],
imshp
[
2
]
-
kshp
[
1
]])
kernvals
=
N
.
array
(
N
.
random
.
rand
(
nkern
,
visdim
,
kshp
[
0
],
kernvals
=
N
.
array
(
self
.
init_data
(
(
nkern
,
visdim
,
kshp
[
0
],
kshp
[
1
]
),
dtype
=
kerns
.
dtype
)
kshp
[
1
])
),
dtype
=
kerns
.
dtype
)
# 'full' mode should support kernels bigger than the input
# 'full' mode should support kernels bigger than the input
if
mode
==
'valid'
and
(
t
<
0
)
.
any
():
if
mode
==
'valid'
and
(
t
<
0
)
.
any
():
continue
continue
for
un_b
,
un_k
in
unroll
:
for
un_b
,
un_k
,
un_p
in
unroll
:
for
ss
in
ssizes
:
for
ss
in
ssizes
:
print
'test_ConvOpGrad'
print
'test_ConvOpGrad'
print
'mode type:'
,
mode
,
typ
print
'mode type:'
,
mode
,
typ
...
@@ -476,14 +526,14 @@ class TestConvOp(unittest.TestCase):
...
@@ -476,14 +526,14 @@ class TestConvOp(unittest.TestCase):
def
test_i
(
imgs
):
def
test_i
(
imgs
):
convop
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
ss
[
0
],
ss
[
1
],
convop
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
ss
[
0
],
ss
[
1
],
output_mode
=
mode
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
)
output_mode
=
mode
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
unroll_patch
=
un_p
)
return
convop
(
imgs
,
kernvals
)
return
convop
(
imgs
,
kernvals
)
def
test_k
(
kerns
):
def
test_k
(
kerns
):
convop
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
ss
[
0
],
ss
[
1
],
convop
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
ss
[
0
],
ss
[
1
],
output_mode
=
mode
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
)
output_mode
=
mode
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
unroll_patch
=
un_p
)
return
convop
(
imgvals
,
kerns
)
return
convop
(
imgvals
,
kerns
)
print
mode
,
imshp
,
kshp
,
un_b
,
un_k
,
ss
#TODO the tolerance needed to pass is very high for float32(0.17). Is this acceptable? Expected?
#TODO the tolerance needed to pass is very high for float32(0.17). Is this acceptable? Expected?
tol
=
None
tol
=
None
if
typ
==
"float32"
:
if
typ
==
"float32"
:
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论