Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
dc1aa62a
提交
dc1aa62a
authored
1月 19, 2010
作者:
Frederic Bastien
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[mq]: unroll_patch
上级
3d8a5430
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
389 行增加
和
38 行删除
+389
-38
conv.py
theano/sandbox/conv.py
+304
-3
test_conv.py
theano/sandbox/test_conv.py
+85
-35
没有找到文件。
theano/sandbox/conv.py
浏览文件 @
dc1aa62a
...
...
@@ -29,9 +29,10 @@ class ConvOp(Op):
#TODO: make the stacksize its own parameter, and make imshp a pair
def
__init__
(
self
,
imshp
,
kshp
,
nkern
,
bsize
,
dx
,
dy
,
output_mode
=
'valid'
,
unroll_batch
=
4
,
unroll_kern
=
4
,
def
__init__
(
self
,
imshp
=
None
,
kshp
=
None
,
nkern
=
None
,
bsize
=
None
,
dx
=
None
,
dy
=
None
,
output_mode
=
'valid'
,
unroll_batch
=
0
,
unroll_kern
=
0
,
unroll_patch
=
False
,
imshp_logical
=
None
,
kshp_logical
=
None
,
kshp_logical_top_aligned
=
True
,
...
...
@@ -47,6 +48,7 @@ class ConvOp(Op):
dx - patch stride rows
dy - patch stride cols
out_mode - 'valid', 'full'
unroll_patch - c code generation option
unroll_batch - c code generation option
unroll_kern - c code generation option
verbose - passed to GpuConv
...
...
@@ -60,6 +62,7 @@ class ConvOp(Op):
gradient on the filters.
unroll_patch. If True will use a version that is faster then without not unroll by unroll the patch loop.
unroll_batch. If >0 will use a version that will unroll the batch loop by the value of the option. By default don't use this version of the code.
unroll_nkern. idem as unroll_batch but unroll the kernel loop.
...
...
@@ -95,6 +98,7 @@ class ConvOp(Op):
self
.
unroll_batch
=
unroll_batch
self
.
unroll_kern
=
unroll_kern
self
.
unroll_patch
=
unroll_patch
if
self
.
unroll_batch
>
0
and
self
.
bsize
%
self
.
unroll_batch
!=
0
:
if
self
.
bsize
<=
self
.
unroll_batch
:
...
...
@@ -407,6 +411,7 @@ using namespace std;
d
[
"self_imshp0"
]
=
self
.
imshp
[
0
]
d
[
"self_imshp1"
]
=
self
.
imshp
[
1
]
d
[
"self_imshp2"
]
=
self
.
imshp
[
2
]
d
[
"mode"
]
=
self
.
out_mode
.
upper
()
d
[
"self_kshp0"
]
=
self
.
kshp
[
0
]
d
[
"self_kshp1"
]
=
self
.
kshp
[
1
]
d
[
"self_kshp_logical_r"
]
=
self
.
kshp_logical
[
0
]
...
...
@@ -439,8 +444,12 @@ using namespace std;
#print self.out_mode, d["self_imshp_logical_stride_r"]
if
self
.
imshp
!=
self
.
imshp_logical
or
self
.
kshp
!=
self
.
kshp_logical
:
# print "return imshp!=imshp_logical or self.kshp != self.kshp_logical shape version"
return
_conv_op_code_a
%
d
if
self
.
unroll_patch
:
# print "return unroll patch version",self.dx,self.dy
return
_conv_op_code_unroll_patch
%
d
if
self
.
unroll_batch
>
0
or
self
.
unroll_kern
>
0
:
if
self
.
unroll_batch
<=
0
:
self
.
unroll_batch
=
1
if
self
.
unroll_kern
<=
0
:
self
.
unroll_kern
=
1
...
...
@@ -1212,3 +1221,295 @@ Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
"""
return
ret
_conv_op_code_unroll_patch
=
"""
const int mode=
%(mode)
s;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
const
%(type)
s fill_value = 0;
int type_im=PyArray_TYPE(
%(img2d)
s);
int type_ker=PyArray_TYPE(
%(filtersflipped)
s);
npy_intp dim_zz[2]={
%(self_outshp0)
s,
%(self_outshp1)
s};
npy_intp dim_im[2]={
%(self_imshp1)
s,
%(self_imshp2)
s};
npy_intp dim_ker[2]={
%(self_kshp0)
s,
%(self_kshp1)
s};
PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0};
img2d_shape.ptr=img2d_dim;
img2d_shape.len=4;
PyArray_Dims kerns_shape;
npy_intp kerns_dim[4]={1,1,0,0};
kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
if(
%(img2d)
s->nd==2){
img2d_dim[3]=
%(img2d)
s->dimensions[1];
img2d_dim[2]=
%(img2d)
s->dimensions[0];
}else if(
%(img2d)
s->nd==3){
img2d_dim[3]=
%(img2d)
s->dimensions[2];
img2d_dim[2]=
%(img2d)
s->dimensions[1];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
}else if(
%(img2d)
s->nd==4){
img2d_dim[3]=
%(img2d)
s->dimensions[3];
img2d_dim[2]=
%(img2d)
s->dimensions[2];
img2d_dim[1]=
%(img2d)
s->dimensions[1];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
}else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
%(fail)
s;
}
if(
%(filtersflipped)
s->nd==3){
kerns_dim[3]=
%(filtersflipped)
s->dimensions[2];
kerns_dim[2]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else if(
%(filtersflipped)
s->nd==4){
kerns_dim[3]=
%(filtersflipped)
s->dimensions[3];
kerns_dim[2]=
%(filtersflipped)
s->dimensions[2];
kerns_dim[1]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else{
std:stringstream temp;
temp << "nddim="<<
%(filtersflipped)
s->nd;
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
("kernel don't have a good shape. " + param).c_str());
%(fail)
s;
}
img2d = PyArray_Newshape(
%(img2d)
s,&img2d_shape, PyArray_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(
%(type)
s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(
%(type)
s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)
s;
}
}
img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(
%(filtersflipped)
s,&kerns_shape, PyArray_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != sizeof(
%(type)
s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*sizeof(
%(type)
s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)
s;
}
}
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported");
%(fail)
s;
}
typenum = PyArray_ObjectType((PyObject*)
%(img2d)
s, 0);
typenum_f = PyArray_ObjectType((PyObject*)
%(filtersflipped)
s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type");
%(fail)
s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match");
%(fail)
s;}
if (!img2d)
%(fail)
s;
if (!filtersflipped)
%(fail)
s;
if ((!
%(z)
s)
|| *PyArray_DIMS(
%(z)
s)!=4
||(
%(z)
s->dimensions[0] !=
%(self_bsize)
s)
||(
%(z)
s->dimensions[1] !=
%(self_nkern)
s)
||(
%(z)
s->dimensions[2] != dim_zz[0])
|| (
%(z)
s->dimensions[3] != dim_zz[1])
)
{
if (
%(z)
s) Py_DECREF(
%(z)
s);
npy_intp dims[4] = {0,0,0,0};
if(!dims)
%(fail)
s;
dims[0]=
%(self_bsize)
s;
dims[1]=
%(self_nkern)
s;
dims[2]=dim_zz[0];
dims[3]=dim_zz[1];
%(z)
s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
}else{
//PyArray_FILLWBYTE((PyObject*)
%(z)
s,0);
}
int Os[2];
Os[0]=
%(self_outshp0)
s;
Os[1]=
%(self_outshp1)
s;
//I keep the formula to calculte Os in case we need it in the futur.
//if (mode == FULL) {Os[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(
%(self_dx)
s)); Os[1] = ceil((dim_im[1]+dim_ker[1]-1)/float(
%(self_dy)
s));}
//else {Os[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(
%(self_dx)
s)); Os[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(
%(self_dy)
s));}
for(int b=0;b<
%(self_bsize)
s;b++){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern++){
//assertions
if (
%(z)
s->strides[0] !=
%(z)
s->dimensions[1] *
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[1] !=
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[2] !=
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[3] != sizeof(
%(type)
s))
%(fail)
s;
%(type)
s * __restrict__ out=(
%(type)
s *)(PyArray_GETPTR2(
%(z)
s,b,n_kern));
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
for(int stack_size=0;stack_size<
%(self_imshp0)
s;stack_size++){
const
%(type)
s * __restrict__ in=(
%(type)
s *)(PyArray_GETPTR2(img2d,b,stack_size));
const
%(type)
s * __restrict__ hvals=(
%(type)
s *)(PyArray_GETPTR2(filtersflipped,n_kern,stack_size));
int new_m;
for (int iter_m=0; iter_m < Os[0]; iter_m++) {
// Reposition index into input image based on requested output size
int pos_m = iter_m*
%(self_dx)
s;//The position of the patch in the image
if (mode == FULL) new_m = pos_m ;
else new_m = (pos_m+dim_ker[0]-1);
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
int pos_n=iter_n*
%(self_dy)
s;
%(type)
s sum=0;
%(type)
s sum2=0;
%(type)
s sum3=0;
%(type)
s sum4=0;
int nb_sum=0;
// Sum over kernel, if index into image is out of bounds
// fill with the value
for (int j=0; j < dim_ker[0]; j++) {
int ind0 = (new_m-j);
if(mode==FULL){
const
%(type)
s * idx_hvals=&hvals[j*dim_ker[1]];
if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0)
for (int k=0; k < dim_ker[1]; k++) {
sum+= idx_hvals[k] * fill_value;
}
}else{
//do the part where kernel is to the right of the img
//TODO: implement unroll patch for fill_value!=0
int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
if(fill_value!=0){
for(k=0;k<max_k;k++){
sum+= idx_hvals[k]*fill_value;
}
}else {k=max_k;}
//do the part where the kernel is on the img
max_k=min(pos_n+1,(int)dim_ker[1]);
const
%(type)
s * idx_in=&in[ind0*dim_im[1]];
if(iter_n + 4*
%(self_dy)
s < Os[1]
&& iter_n>dim_ker[1]-1+3
&& iter_n<dim_im[1]-dim_ker[1]+1-3){
nb_sum=4;
//cout<<4<<endl;
for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
sum+=idx_hvals[k]*idx_in[ind1];
sum2+=idx_hvals[k]*idx_in[ind1+
%(self_dy)
s];
sum3+=idx_hvals[k]*idx_in[ind1+2*
%(self_dy)
s];
sum4+=idx_hvals[k]*idx_in[ind1+3*
%(self_dy)
s];
}
}else if(iter_n + 2*
%(self_dy)
s < Os[1]
&& iter_n>dim_ker[1]-1
&& iter_n<dim_im[1]-dim_ker[1]+1){
//cout<<2<<endl;
nb_sum=2;
// if(iter_n==dim_ker[1]-1){//k-1<min(pos_n+
%(self_dy)
s,(int)dim_ker[1])){
// sum2+=idx_hvals[k-1]*idx_in[pos_n-k-
%(self_dy)
s];
// }
for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
sum+=idx_hvals[k]*idx_in[ind1];
sum2+=idx_hvals[k]*idx_in[ind1+
%(self_dy)
s];
}
// sum2+=idx_hvals[k]*idx_in[pos_n-k+
%(self_dy)
s];
// sum+=idx_hvals[k]*idx_in[pos_n-k];
// k++;
}else{
//cout<<1<<endl;
nb_sum=1;
/*
%(type)
s sum_=0;
if((k-max_k) & 0x1 != 0){
sum+= idx_hvals[k] * idx_in[pos_n-k];
}
for (int ind1=pos_n-k; k<max_k; k+=2,ind1-=2) {
sum+= idx_hvals[k] * idx_in[ind1];
sum_+= idx_hvals[k+1] * idx_in[ind1-1];
}
sum+=sum_;
*/
for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
sum+=idx_hvals[k]*idx_in[ind1];
}
}
//do the part to the left of the img
if(fill_value!=0)
for(;k<dim_ker[1];k++) sum+= idx_hvals[k]*fill_value;
}
}else{//valid mode
const
%(type)
s* idx_in=&in[ind0*dim_im[1]];
const
%(type)
s* idx_hvals=&hvals[j*dim_ker[1]];
if(iter_n + 4*
%(self_dy)
s < Os[1]){
nb_sum=4;
for (int k=dim_ker[1]-1,im_idx=pos_n; k >=0; k--,im_idx++) {
sum+=idx_hvals[k]*idx_in[im_idx];
sum2+=idx_hvals[k]*idx_in[im_idx+
%(self_dy)
s];
sum3+=idx_hvals[k]*idx_in[im_idx+2*
%(self_dy)
s];
sum4+=idx_hvals[k]*idx_in[im_idx+3*
%(self_dy)
s];
}
}else if(iter_n + 2*
%(self_dy)
s < Os[1]){
nb_sum=2;
for (int k=dim_ker[1]-1,im_idx=pos_n; k >=0; k--,im_idx++) {
sum+=idx_hvals[k]*idx_in[im_idx];
sum2+=idx_hvals[k]*idx_in[im_idx+
%(self_dy)
s];
}
}else{
nb_sum=1;
for (int k=dim_ker[1]-1,im_idx=pos_n; k >=0; k--,im_idx++) {
sum+=idx_hvals[k]*idx_in[im_idx];
}
}
}//else valid mode
}//for j
switch(nb_sum){
case 4: out[iter_m*dim_zz[1]+iter_n+3]
%(affectation)
s sum4;
case 3: out[iter_m*dim_zz[1]+iter_n+2]
%(affectation)
s sum3;
case 2: out[iter_m*dim_zz[1]+iter_n+1]
%(affectation)
s sum2;
case 1: out[iter_m*dim_zz[1]+iter_n]
%(affectation)
s sum;
}
iter_n+=nb_sum-1;
/*
out[iter_m*dim_zz[1]+iter_n]
%(affectation)
s sum;
if(nb_sum>=2){
iter_n++;
out[iter_m*dim_zz[1]+iter_n]
%(affectation)
s sum2;
}
if(nb_sum>=3){
iter_n++;
out[iter_m*dim_zz[1]+iter_n]
%(affectation)
s sum3;
}
if(nb_sum>=4){
iter_n++;
out[iter_m*dim_zz[1]+iter_n]
%(affectation)
s sum4;
}
*/
}//for iter_n
}//for iter_m
}//for stack_size
if (0 && (mode==FULL)){
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i)
std::cout << " " << out[i];
std::cout << "
\\
n";
}
}//for n_kern
}//for b
Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
"""
theano/sandbox/test_conv.py
浏览文件 @
dc1aa62a
...
...
@@ -41,7 +41,7 @@ def flip(kern, kshp):
global_rng
=
N
.
random
.
RandomState
(
3423489
)
dmatrix4
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
,
False
))
def
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsize
,
imshp
,
kshps
,
nkerns
,
unroll_batch
=
0
,
unroll_kern
=
0
,
img
=
T
.
dmatrix
(),
validate
=
True
,
conv_op_py
=
False
,
do_convolve2
=
False
,
do_print
=
True
,
repeat
=
1
):
def
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsize
,
imshp
,
kshps
,
nkerns
,
unroll_batch
=
0
,
unroll_kern
=
0
,
img
=
T
.
dmatrix
(),
validate
=
True
,
conv_op_py
=
False
,
do_convolve2
=
False
,
do_print
=
True
,
repeat
=
1
,
unroll_patch
=
0
):
# build actual input images
imgval
=
global_rng
.
rand
(
bsize
,
imshp
[
0
],
imshp
[
1
],
imshp
[
2
])
...
...
@@ -121,7 +121,7 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
hidval1
=
outval
.
copy
()
# ConvOp
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
ss
[
0
],
ss
[
1
],
conv_mode
,
unroll_batch
=
unroll_batch
,
unroll_kern
=
unroll_kern
)(
inputs4
,
kerns4
)
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
ss
[
0
],
ss
[
1
],
conv_mode
,
unroll_batch
=
unroll_batch
,
unroll_kern
=
unroll_kern
,
unroll_patch
=
unroll_patch
)(
inputs4
,
kerns4
)
l1shp
=
N
.
hstack
((
nkern
,
getFilterOutShp
(
imshp
,
kshp
,
ss
,
conv_mode
)))
propup2
=
function
([
inputs4
,
kerns4
],
conv_op
)
...
...
@@ -328,7 +328,7 @@ class TestConvOp(unittest.TestCase):
ssizess
=
[[(
1
,
1
),(
1
,
2
)],[(
1
,
1
),(
2
,
2
)]]
convmodes
=
[
'valid'
,
'full'
]
do_convolve2
=
True
unroll
=
[(
0
,
0
),(
1
,
1
),(
2
,
2
),(
3
,
2
)]
#(batch,kern
)
unroll
=
[(
0
,
0
,
False
),(
0
,
0
,
True
),(
1
,
1
,
False
),(
2
,
2
,
False
),(
3
,
2
,
False
)]
#(batch,kern,patch
)
do_speed_test
=
False
# TODO: this version show a bug that was fixed
...
...
@@ -338,6 +338,11 @@ class TestConvOp(unittest.TestCase):
# nkerns = [2,2] # per output pixel
# ssizes = [(1,1),(2,2)]#2,2)]
# bsizes = [1,1] # batch size
# imshp_starts = [(1,10,10),(1,5,6)]
# kshpss = ([[2,3],[3,2]],[[2,2],[2,2]])
# nkernss = [[1,1],[1,1]] # per output pixel
N
.
set_printoptions
(
threshold
=
N
.
nan
)
# symbolic stuff
...
...
@@ -356,8 +361,8 @@ class TestConvOp(unittest.TestCase):
unroll_batch
=
[
1
,
2
,
4
,
5
,
10
,
20
]
unroll_kern
=
[
1
,
2
,
4
,
5
,
10
,
20
]
unroll_batch
=
[
1
,
2
,
5
]
unroll_kern
=
[
1
,
2
,
5
]
unroll_batch
=
[
1
,
4
,
5
]
unroll_kern
=
[
1
,
4
,
5
]
bsize
=
20
# batch size
imshp_start
=
(
1
,
48
,
48
)
#un square shape to test more corner case.
...
...
@@ -374,46 +379,86 @@ class TestConvOp(unittest.TestCase):
timing
=
N
.
zeros
((
len
(
unroll_batch
),
len
(
unroll_kern
),
3
))
t_b_k
=
[]
#calculate the timing with unrolling
t_
=
[[
7.60572791
,
3.95069814
,
3.74271464
],
[
4.05631089
,
2.90384555
,
2.93613672
],
[
3.90551591
,
2.92595196
,
3.00102282
]]
best
=
[]
worst
=
[]
best
=
[
0.52690219879150391
,
2.4266397953033447
]
worst
=
[
0.92042708396911621
,
6.8822150230407715
]
t_
=
[]
for
unroll_b
,
n_b
in
zip
(
unroll_batch
,
range
(
len
(
unroll_batch
))):
for
unroll_k
,
n_k
in
zip
(
unroll_kern
,
range
(
len
(
unroll_kern
))):
t_b_k
.
append
(
str
(
unroll_b
)
+
"/"
+
str
(
unroll_k
))
tctot
,
tpytot
,
ntot
=
[],[],[]
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
tctot_
,
tpytot_
,
ntot_
=
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsize
,
imshp_start
,
kshps
,
nkerns
,
unroll_batch
=
unroll_b
,
unroll_kern
=
unroll_k
,
validate
=
validate
)
tctot
+=
[
tctot_
]
tpytot
+=
[
tpytot_
]
ntot
+=
[
ntot_
]
timing
[
n_b
,
n_k
]
=
[
sum
(
tctot
),
sum
(
tpytot
),
sum
(
ntot
)]
if
not
t_
:
tctot
,
tpytot
,
ntot
=
[],[],[]
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
tctot_
,
tpytot_
,
ntot_
=
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsize
,
imshp_start
,
kshps
,
nkerns
,
unroll_batch
=
unroll_b
,
unroll_kern
=
unroll_k
,
validate
=
validate
)
tctot
+=
[
tctot_
]
tpytot
+=
[
tpytot_
]
ntot
+=
[
ntot_
]
if
unroll_b
==
4
and
unroll_k
==
4
:
print
"unroll 4/4"
,
tctot
best
=
tctot
if
unroll_b
==
1
and
unroll_k
==
1
:
print
"unroll 1/1"
,
tctot
worst
=
tctot
timing
[
n_b
,
n_k
]
=
[
sum
(
tctot
),
sum
(
tpytot
),
sum
(
ntot
)]
if
not
t_
:
t
=
timing
[:,:,
0
]
#We select only the c timing.
else
:
t
=
t_
t
=
N
.
asarray
(
t
)
#calculate the old timing
tctot
,
tpytot
,
ntot
=
0
,
0
,
0
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
tctot_
,
tpytot_
,
ntot_
=
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsize
,
imshp_start
,
kshps
,
nkerns
,
unroll_batch
=
0
,
unroll_kern
=
0
,
validate
=
validate
)
tctot
+=
tctot_
tpytot
+=
tpytot_
ntot
+=
ntot_
print
"old code timing
%.3
fs"
%
tctot
# print timing
t
=
timing
[:,:,
0
]
#We select only the c timing.
tctot_
=
[
0.52555489540100098
,
6.6634182929992676
]
# tctot_=[]
tctot
,
tpytot
,
ntot
=
[],[],[]
if
not
tctot_
:
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
tctot_
,
tpytot_
,
ntot_
=
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsize
,
imshp_start
,
kshps
,
nkerns
,
unroll_batch
=
0
,
unroll_kern
=
0
,
validate
=
validate
)
tctot
+=
[
tctot_
]
tpytot
+=
[
tpytot_
]
ntot
+=
[
ntot_
]
else
:
tctot
=
N
.
asarray
(
tctot_
)
print
"old code timing
%.3
fs"
%
sum
(
tctot
),
tctot
best
=
N
.
asarray
(
best
)
worst
=
N
.
asarray
(
worst
)
print
"timing for unrolled version"
print
t_b_k
print
t
print
"max
%.3
fs"
%
t
.
max
(),
"max param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmax
()]
print
"min
%.3
fs"
%
t
.
min
(),
"min param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmin
()]
print
"speedup vs (1/1)
%.3
fx, vs old
%.3
fx"
%
(
t
.
max
()
/
t
.
min
(),
tctot
/
t
.
min
())
print
"speedup vs (1/1)
%.3
fx, vs old
%.3
fx"
%
(
t
.
max
()
/
t
.
min
(),
sum
(
tctot
)
/
t
.
min
())
print
worst
/
best
,
tctot
/
best
tctot_patch
=
[]
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
tctot_
,
tpytot_
,
ntot_
=
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsize
,
imshp_start
,
kshps
,
nkerns
,
unroll_batch
=
0
,
unroll_kern
=
0
,
validate
=
validate
,
unroll_patch
=
2
)
tctot_patch
+=
[
tctot_
]
t_patch
=
sum
(
tctot_patch
)
print
"unroll_patch time"
,
tctot_patch
print
"speedup vs (1/1)
%.3
fx, vs old
%.3
fx"
%
(
t
.
max
()
/
t_patch
,
sum
(
tctot
)
/
t_patch
)
print
best
/
tctot_patch
,
worst
/
tctot_patch
print
best
print
worst
print
tctot
print
tctot_patch
return
for
i
in
range
(
len
(
kshpss
)):
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizess
[
i
],
range
(
len
(
ssizess
[
i
]))):
for
un_b
,
un_k
in
unroll
:
for
un_b
,
un_k
,
un_p
in
unroll
:
tctot_
,
tpytot_
,
ntot_
=
exec_multilayer_conv_nnet
(
conv_mode
,
ss
,
bsizes
[
i
],
imshp_starts
[
i
],
kshpss
[
i
],
nkernss
[
i
],
img
=
img
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
unroll_patch
=
un_p
,
validate
=
True
)
tctot
+=
[
tctot_
]
tpytot
+=
[
tpytot_
]
...
...
@@ -428,6 +473,11 @@ class TestConvOp(unittest.TestCase):
d
=
N
.
asarray
(
ntot
)
/
tpytot
print
'speed up py theano(ConvOp) vs convolve2d:
%.3
fx'
%
d
.
mean
(),
d
def
init_data
(
self
,
shape
):
return
N
.
ones
(
shape
)
return
N
.
random
.
random
(
shape
)
def
test_ConvOpGrad
(
self
):
"""
test the gradient in float and double
...
...
@@ -442,9 +492,9 @@ class TestConvOp(unittest.TestCase):
kshps
=
[(
2
,
3
)]
imshps
=
[(
2
,
3
,
4
)]
modes
=
[
'valid'
,
'full'
]
unroll
=
[(
0
,
0
),(
1
,
1
),(
2
,
3
)]
unroll
=
[(
0
,
0
,
True
),(
1
,
1
,
False
),(
2
,
3
,
False
),(
1
,
1
,
False
),(
0
,
0
,
False
)]
#(batch,kern,patch)
ssizes
=
[(
1
,
1
),(
2
,
2
)]
for
typ
in
types
:
imgs
=
T
.
TensorType
(
typ
,
(
False
,
False
,
False
,
False
),
'imgs'
)
kerns
=
T
.
TensorType
(
typ
,
(
False
,
False
,
False
,
False
),
'kerns'
)
...
...
@@ -457,12 +507,12 @@ class TestConvOp(unittest.TestCase):
imgvals
=
N
.
array
(
N
.
random
.
random
(
N
.
hstack
((
bsize
,
imshp
))),
dtype
=
imgs
.
dtype
)
for
kshp
in
kshps
:
t
=
numpy
.
array
([
imshp
[
1
]
-
kshp
[
0
],
imshp
[
2
]
-
kshp
[
1
]])
kernvals
=
N
.
array
(
N
.
random
.
rand
(
nkern
,
visdim
,
kshp
[
0
],
kshp
[
1
]
),
dtype
=
kerns
.
dtype
)
kernvals
=
N
.
array
(
self
.
init_data
(
(
nkern
,
visdim
,
kshp
[
0
],
kshp
[
1
])
),
dtype
=
kerns
.
dtype
)
# 'full' mode should support kernels bigger than the input
if
mode
==
'valid'
and
(
t
<
0
)
.
any
():
continue
for
un_b
,
un_k
in
unroll
:
for
un_b
,
un_k
,
un_p
in
unroll
:
for
ss
in
ssizes
:
print
'test_ConvOpGrad'
print
'mode type:'
,
mode
,
typ
...
...
@@ -476,14 +526,14 @@ class TestConvOp(unittest.TestCase):
def
test_i
(
imgs
):
convop
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
ss
[
0
],
ss
[
1
],
output_mode
=
mode
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
)
output_mode
=
mode
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
unroll_patch
=
un_p
)
return
convop
(
imgs
,
kernvals
)
def
test_k
(
kerns
):
convop
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
ss
[
0
],
ss
[
1
],
output_mode
=
mode
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
)
output_mode
=
mode
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
unroll_patch
=
un_p
)
return
convop
(
imgvals
,
kerns
)
print
mode
,
imshp
,
kshp
,
un_b
,
un_k
,
ss
#TODO the tolerance needed to pass is very high for float32(0.17). Is this acceptable? Expected?
tol
=
None
if
typ
==
"float32"
:
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论