Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
4bded893
提交
4bded893
authored
6月 02, 2009
作者:
James Bergstra
浏览文件
操作
浏览文件
下载
差异文件
merge
上级
86e61713
5a503ffe
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
388 行增加
和
53 行删除
+388
-53
conv.py
theano/sandbox/conv.py
+274
-11
test_conv.py
theano/sandbox/test_conv.py
+108
-42
basic.py
theano/tensor/basic.py
+6
-0
没有找到文件。
theano/sandbox/conv.py
浏览文件 @
4bded893
...
...
@@ -16,8 +16,11 @@ class ConvOp(Op):
In development.
"""
def
__init__
(
self
,
imshp
,
kshp
,
nkern
,
bsize
,
dx
,
dy
,
output_mode
=
'valid'
):
def
__init__
(
self
,
imshp
,
kshp
,
nkern
,
bsize
,
dx
,
dy
,
output_mode
=
'valid'
,
unroll_batch
=
0
,
unroll_kern
=
0
):
"""
unroll_batch. If >0 will use a version that will unroll the batch loop by the value of the option. By default don't use this version of the code.
unroll_nkern. idem as unroll_batch but unroll the kernel loop.
"""
imshp
=
tuple
(
imshp
)
if
len
(
imshp
)
==
2
:
self
.
imshp
=
(
1
,)
+
imshp
...
...
@@ -31,6 +34,14 @@ class ConvOp(Op):
self
.
bsize
=
bsize
self
.
dx
=
dx
self
.
dy
=
dy
self
.
unroll_batch
=
unroll_batch
self
.
unroll_kern
=
unroll_kern
if
self
.
unroll_batch
>
0
and
self
.
bsize
%
self
.
unroll_batch
!=
0
:
raise
Exception
(
"unroll_batch(
%
s) should be 0 or a multiple of bsize(
%
s)"
%
(
str
(
self
.
unroll_batch
),
str
(
self
.
bsize
)))
if
self
.
unroll_kern
>
0
and
self
.
nkern
%
unroll_kern
!=
0
:
raise
Exception
(
"unroll_kern(
%
s) should be 0 or a multiple of nkern(
%
s)"
%
(
str
(
self
.
unroll_kern
),
str
(
self
.
nkern
)))
if
self
.
dx
!=
1
or
self
.
dy
!=
1
:
print
"Warning, dx!=1 or dy!=1 only supported in python mode!"
raise
NotImplementedError
()
...
...
@@ -164,7 +175,12 @@ using namespace std;
if
node
.
inputs
[
0
]
.
type
.
dtype
==
"float32"
:
d
[
"type"
]
=
"float"
elif
node
.
inputs
[
0
]
.
type
.
dtype
==
"float64"
:
d
[
"type"
]
=
"double"
else
:
raise
Exception
(
"Type
%
s not implemented"
%
node
.
inputs
[
0
]
.
type
.
dtype
)
if
self
.
unroll_kern
>
0
and
self
.
unroll_batch
>
0
:
print
"return unrolled batch and kern code by"
,
self
.
unroll_batch
,
self
.
unroll_kern
return
gen_conv_code_unroll_batch_kern
(
d
,
self
.
unroll_batch
,
self
.
unroll_kern
)
#TODO: should we choose the unroll size automatically with the bigger divisor under 5? under 10?
if
self
.
out_mode
==
'valid'
:
return
_conv_op_code_valid_gemm
%
d
else
:
...
...
@@ -344,11 +360,11 @@ for(int b=0;b< %(self_bsize)s;b++){
int ind0 = (new_m-j);
if(mode==FULL){
const
%(type)
s * idx
2
=&hvals[j*dim_ker[1]];
const
%(type)
s * idx
_hvals
=&hvals[j*dim_ker[1]];
if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0)
for (int k=0; k < dim_ker[1]; k++) {
sum+= idx
2
[k] * fill_value;
sum+= idx
_hvals
[k] * fill_value;
}
}else{
//do the part where kernel is to the right of the img
...
...
@@ -357,27 +373,27 @@ for(int b=0;b< %(self_bsize)s;b++){
if(fill_value!=0){
for(k=0;k<max_k;k++){
sum+= idx
2
[k]*fill_value;
sum+= idx
_hvals
[k]*fill_value;
}
}else {k=max_k;}
//do the part where the kernel is on the img
max_k=min(n+1,(int)dim_ker[1]);
const
%(type)
s * idx
1
=&in[ind0*dim_im[1]];
const
%(type)
s * idx
_in
=&in[ind0*dim_im[1]];
for (int ind1=n-k; k<max_k; k++,ind1--) {
sum+= idx
2[k] * idx1
[ind1];
sum+= idx
_hvals[k] * idx_in
[ind1];
}
//do the part to the left of the img
if(fill_value!=0)
for(;k<dim_ker[1];k++) sum+= idx
2
[k]*fill_value;
for(;k<dim_ker[1];k++) sum+= idx
_hvals
[k]*fill_value;
}
}else{
const
%(type)
s* idx
1
=&in[ind0*dim_im[1]]; //JB: should be dim_im[1] right? (was dim_im[0])
const
%(type)
s* idx
2
=&hvals[j*dim_ker[1]];
const
%(type)
s* idx
_in
=&in[ind0*dim_im[1]]; //JB: should be dim_im[1] right? (was dim_im[0])
const
%(type)
s* idx
_hvals
=&hvals[j*dim_ker[1]];
int new_n = (n+dim_ker[1]-1);
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
sum+=idx
2[k]*idx1
[last];
sum+=idx
_hvals[k]*idx_in
[last];
}
}
}//for j
...
...
@@ -614,3 +630,250 @@ free(kbuf);
}
Py_XDECREF(img2d);
"""
def
gen_conv_code_unroll_batch_kern
(
d
,
unroll_bsize
=
1
,
unroll_ksize
=
1
):
""" c_code for ConvOp that unroll the batch size loop
"""
assert
unroll_bsize
>
0
and
unroll_ksize
>
0
d
[
"unroll_bsize"
]
=
unroll_bsize
d
[
"unroll_ksize"
]
=
unroll_ksize
def
my_dup
(
st
,
size
):
s
=
""
for
i
in
range
(
size
):
d
[
"unroll_iter"
]
=
i
s
+=
st
%
d
return
s
+
"
\n
"
def
my_dup2
(
st
):
s
=
""
iter
=
0
for
i
in
range
(
unroll_bsize
):
d
[
"unroll_biter"
]
=
i
for
j
in
range
(
unroll_ksize
):
d
[
"unroll_kiter"
]
=
j
d
[
"unroll_iter"
]
=
iter
iter
+=
1
s
+=
st
%
d
return
s
+
"
\n
"
ret
=
"""
int mode=-1,typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
const
%(type)
s fill_value = 0;
int type_im=PyArray_TYPE(
%(img2d)
s);
int type_ker=PyArray_TYPE(
%(filtersflipped)
s);
npy_intp dim_zz[2]={
%(self_outshp0)
s,
%(self_outshp1)
s};
npy_intp dim_im[2]={
%(self_imshp1)
s,
%(self_imshp2)
s};
npy_intp dim_ker[2]={
%(self_kshp0)
s,
%(self_kshp1)
s};
PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0};
img2d_shape.ptr=img2d_dim;
img2d_shape.len=4;
PyArray_Dims kerns_shape;
npy_intp kerns_dim[4]={1,1,0,0};
kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
string s="
%(self_out_mode)
s";
if(
%(img2d)
s->nd==2){
img2d_dim[3]=
%(img2d)
s->dimensions[1];
img2d_dim[2]=
%(img2d)
s->dimensions[0];
}else if(
%(img2d)
s->nd==3){
img2d_dim[3]=
%(img2d)
s->dimensions[2];
img2d_dim[2]=
%(img2d)
s->dimensions[1];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
}else if(
%(img2d)
s->nd==4){
img2d_dim[3]=
%(img2d)
s->dimensions[3];
img2d_dim[2]=
%(img2d)
s->dimensions[2];
img2d_dim[1]=
%(img2d)
s->dimensions[1];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
}else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
%(fail)
s;
}
if(
%(filtersflipped)
s->nd==3){
kerns_dim[3]=
%(filtersflipped)
s->dimensions[2];
kerns_dim[2]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else if(
%(filtersflipped)
s->nd==4){
kerns_dim[3]=
%(filtersflipped)
s->dimensions[3];
kerns_dim[2]=
%(filtersflipped)
s->dimensions[2];
kerns_dim[1]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else{
PyErr_SetString(PyExc_ValueError, "kernel don't have a good shape");
%(fail)
s;
}
img2d = PyArray_Newshape(
%(img2d)
s,&img2d_shape, PyArray_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(
%(type)
s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(
%(type)
s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)
s;
}
}
img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(
%(filtersflipped)
s,&kerns_shape, PyArray_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != sizeof(
%(type)
s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*sizeof(
%(type)
s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)
s;
}
}
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(s=="valid") mode=0;
else if(s=="full") mode=2;
else {PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported");
%(fail)
s;};
typenum = PyArray_ObjectType((PyObject*)
%(img2d)
s, 0);
typenum_f = PyArray_ObjectType((PyObject*)
%(filtersflipped)
s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type");
%(fail)
s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match");
%(fail)
s;}
if (!img2d)
%(fail)
s;
if (!filtersflipped)
%(fail)
s;
if ((!
%(z)
s)
|| *PyArray_DIMS(
%(z)
s)!=4
||(
%(z)
s->dimensions[0] !=
%(self_bsize)
s)
||(
%(z)
s->dimensions[1] !=
%(self_nkern)
s)
||(
%(z)
s->dimensions[2] != dim_zz[0])
|| (
%(z)
s->dimensions[3] != dim_zz[1])
)
{
if (
%(z)
s) Py_DECREF(
%(z)
s);
npy_intp dims[4] = {0,0,0,0};
if(!dims)
%(fail)
s;
dims[0]=
%(self_bsize)
s;
dims[1]=
%(self_nkern)
s;
dims[2]=dim_zz[0];
dims[3]=dim_zz[1];
%(z)
s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
}else{
//PyArray_FILLWBYTE((PyObject*)
%(z)
s,0);
}
int Os[2];
if (mode == FULL) {Os[0] = dim_im[0]+dim_ker[0]-1; Os[1] = dim_im[1]+dim_ker[1]-1;}
else {Os[0] = dim_im[0]-dim_ker[0]+1; Os[1] = dim_im[1]-dim_ker[1]+1;}
for(int b=0;b<
%(self_bsize)
s ;b+=
%(unroll_bsize)
s){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern+=
%(unroll_ksize)
s){
//assertions
if (
%(z)
s->strides[0] !=
%(z)
s->dimensions[1] *
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[1] !=
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[2] !=
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[3] != sizeof(
%(type)
s))
%(fail)
s;
"""
%
d
ret
+=
my_dup2
(
"
%(type)
s * __restrict__ out
%(unroll_iter)
s=(
%(type)
s *)(PyArray_GETPTR2(
%(z)
s,b+
%(unroll_biter)
s,n_kern+
%(unroll_kiter)
s));"
)
ret
+=
my_dup
(
"for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out
%(unroll_iter)
s[i] = 0;"
,
unroll_bsize
*
unroll_ksize
)
ret
+=
"""
for(int stack_size=0;stack_size<
%(self_imshp0)
s;stack_size++){
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s * __restrict__ in
%(unroll_iter)
d=(
%(type)
s *)(PyArray_GETPTR2(img2d,b+
%(unroll_iter)
s,stack_size));"
,
unroll_bsize
)
ret
+=
my_dup
(
"const
%(type)
s * __restrict__ hvals
%(unroll_iter)
s=(
%(type)
s *)(PyArray_GETPTR2(filtersflipped,n_kern+
%(unroll_iter)
s,stack_size));"
,
unroll_ksize
)
ret
+=
"""
int new_m;
for (int m=0; m < Os[0]; m++) {
// Reposition index into input image based on requested output size
if (mode == FULL) new_m = m ;
else new_m = (m+dim_ker[0]-1);
for (int n=0; n < Os[1]; n++) { // loop over columns
"""
%
d
ret
+=
my_dup
(
"
%(type)
s sum
%(unroll_iter)
s=0;"
,
unroll_bsize
*
unroll_ksize
)
ret
+=
"""
// Sum over kernel, if index into image is out of bounds
// fill with the value
for (int j=0; j < dim_ker[0]; j++) {
int ind0 = (new_m-j);
if(mode==FULL){
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s * idx_hvals
%(unroll_iter)
s=&hvals
%(unroll_iter)
s[j*dim_ker[1]];"
,
unroll_ksize
)
ret
+=
"""
if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0)
for (int k=0; k < dim_ker[1]; k++) {
"""
%
d
ret
+=
my_dup2
(
"sum
%(unroll_iter)
s += idx_hvals
%(unroll_kiter)
s[k] * fill_value;"
)
ret
+=
"""
}
}else{
//do the part where kernel is to the right of the img
int k=0,max_k=max((int)(n-dim_im[1])+1,0);
if(fill_value!=0){
for(k=0;k<max_k;k++){
"""
%
d
ret
+=
my_dup2
(
"sum
%(unroll_iter)
s += idx_hvals
%(unroll_kiter)
s[k] * fill_value;"
)
ret
+=
"""
}
}else {k=max_k;}
//do the part where the kernel is on the img
max_k=min(n+1,(int)dim_ker[1]);
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s * idx_in
%(unroll_iter)
s=&in
%(unroll_iter)
s[ind0*dim_im[1]];"
,
unroll_bsize
)
ret
+=
"""
for (int ind1=n-k; k<max_k; k++,ind1--) {
"""
%
d
ret
+=
my_dup2
(
"sum
%(unroll_iter)
s+= idx_hvals
%(unroll_kiter)
s[k] * idx_in
%(unroll_biter)
s[ind1];"
)
ret
+=
"""
}
//do the part to the left of the img
if(fill_value!=0)
for(;k<dim_ker[1];k++){
"""
%
d
ret
+=
my_dup2
(
"sum
%(unroll_iter)
s += idx_hvals
%(unroll_kiter)
s[k] * fill_value;"
)
ret
+=
"""
}
}
}else{//valid mode
"""
%
d
ret
+=
my_dup
(
"const
%(type)
s* idx_in
%(unroll_iter)
s=&in
%(unroll_iter)
s[ind0*dim_im[1]];"
,
unroll_bsize
)
ret
+=
my_dup
(
"const
%(type)
s* idx_hvals
%(unroll_iter)
s=&hvals
%(unroll_iter)
s[j*dim_ker[1]];"
,
unroll_ksize
)
ret
+=
"""
int new_n = (n+dim_ker[1]-1);
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
"""
%
d
ret
+=
my_dup2
(
"sum
%(unroll_iter)
s+=idx_hvals
%(unroll_kiter)
s[k]*idx_in
%(unroll_biter)
s[last];"
)
ret
+=
"""
}
}
}//for j
"""
%
d
# ret+=my_dup("out%(unroll_iter)s[m*dim_zz[1]+n] %(affectation)s sum%(unroll_iter)s;", unroll_bsize)
ret
+=
my_dup
(
"out
%(unroll_iter)
s[m*dim_zz[1]+n]
%(affectation)
s sum
%(unroll_iter)
s;"
,
unroll_bsize
*
unroll_ksize
)
# ret+=my_dup("cout<<sum%(unroll_iter)s<<endl;",unroll_bsize)
ret
+=
"""
}//for n
}//for m
}//for stack_size
}//for n_kern
}//for b
Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
"""
return
ret
theano/sandbox/test_conv.py
浏览文件 @
4bded893
...
...
@@ -63,7 +63,6 @@ class TestConvOp(unittest.TestCase):
# fixed parameters
bsize
=
7
# batch size
imshp
=
(
5
,
4
)
# image shape
print
>>
sys
.
stderr
,
"WARNING: only square shape tested"
kshps
=
[(
2
,
3
)]
nkern
=
6
# nb kernel
ssizes
=
[(
1
,
1
)]
#step size
...
...
@@ -72,7 +71,6 @@ class TestConvOp(unittest.TestCase):
# fixed parameters
bsize
=
7
# batch size
imshp
=
(
5
,
4
)
# image shape
print
>>
sys
.
stderr
,
"WARNING: only square shape tested"
kshps
=
[(
2
,
3
)]
nkern
=
6
# nb kernel
ssizes
=
[(
1
,
1
)]
#step size
...
...
@@ -112,7 +110,7 @@ class TestConvOp(unittest.TestCase):
# now test with real values
img2d
=
1
+
N
.
arange
(
bsize
*
N
.
prod
(
imshp
))
.
reshape
((
bsize
,)
+
imshp
)
print
'img2d'
,
img2d
#
print 'img2d', img2d
img1d
=
img2d
.
reshape
(
bsize
,
-
1
)
# create filters (need to be flipped to use convolve2d)
...
...
@@ -121,12 +119,12 @@ class TestConvOp(unittest.TestCase):
# compute with new convolve2 (no timing info)
output4
,
outshp4
=
convolve2
(
kerns
,
kshp
,
nkern
,
input
,
\
imshp
,
bsize
,
(
1
,
1
),
bias
=
bias
,
mode
=
conv_mode
)
print
'output4'
,
output4
#
print 'output4', output4
ttime1
=
time
.
time
()
f
=
function
([
kerns
,
bias
,
input
],
output4
)
out4
=
f
(
filtersflipped
.
reshape
(
nkern
,
-
1
),
biasvals
,
img1d
)
print
'out4'
,
out4
,
img1d
,
filtersflipped
#
print 'out4', out4, img1d, filtersflipped
tconv2
+=
[
time
.
time
()
-
ttime1
]
out4
=
out4
.
reshape
(
bsize
,
nkern
,
outshp4
[
1
],
outshp4
[
2
])
out4
=
out4
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
...
...
@@ -208,12 +206,12 @@ class TestConvOp(unittest.TestCase):
#test speed
# bsize = 10 # batch size
# imshp_start = (1,50,
50)
# kshps = ([1
2,12],[12,12])
# imshp_start = (1,50,
49)#un square shape to test more corner case.
# kshps = ([1
1,12],[12,11])#un square shape to test more corner case.
# nkerns = [20,20] # per output pixel
# ssizes = [(1,1),(1,1)]#(2,2) bugged
# ssizes = [(1,1),
]#
(1,1)]#(2,2) bugged
# convmodes = ['valid','full']
# do_theano=
Tru
e
# do_theano=
Fals
e
N
.
set_printoptions
(
threshold
=
N
.
nan
)
...
...
@@ -221,23 +219,25 @@ class TestConvOp(unittest.TestCase):
kerns
=
[
T
.
matrix
(),
T
.
dmatrix
()]
img
=
T
.
dmatrix
()
rng
=
N
.
random
.
RandomState
(
3423489
)
tctot
,
tpytot
,
t2ctot
,
t2pytot
,
ntot
,
convtot
=
[],
[],
[],
[],
[],
[]
tctot
,
tpytot
,
ntot
=
[],
[],
[]
dmatrix4
=
T
.
TensorType
(
'float64'
,
(
False
,
False
,
False
,
False
))
inputs4
=
dmatrix4
()
kerns4
=
dmatrix4
()
assert
len
(
kshps
)
==
len
(
nkerns
)
==
len
(
kerns
)
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
def
do_test
(
conv_mode
,
ss
,
unroll_batch
=
0
,
unroll_kern
=
0
,
img
=
img
,
validate
=
True
,
conv_op_py
=
False
):
# build actual input images
imgval
=
rng
.
rand
(
bsize
,
imshp_start
[
0
],
imshp_start
[
1
],
imshp_start
[
2
])
imshp
=
imshp_start
# for each layer
for
kshp
,
kern
,
nkern
,
n_layer
in
zip
(
kshps
,
kerns
,
nkerns
,
range
(
len
(
kerns
))):
ntot
=
0
tctot
=
0
tpytot
=
0
for
kshp
,
kern
,
nkern
,
n_layer
in
zip
(
kshps
,
kerns
,
nkerns
,
range
(
len
(
kerns
))):
print
'************* layer
%
i ***************'
%
n_layer
print
conv_mode
,
ss
,
n_layer
,
kshp
,
nkern
...
...
@@ -259,14 +259,15 @@ class TestConvOp(unittest.TestCase):
time1
=
time
.
time
()
outval
=
N
.
zeros
(
N
.
r_
[
bsize
,
outshp
])
val
=
_valfrommode
(
conv_mode
)
bval
=
_bvalfromboundary
(
'fill'
)
for
b
in
range
(
bsize
):
# loop over batches
for
n
in
range
(
nkern
):
# loop over filters
for
i
in
range
(
imshp
[
0
]):
# loop over input feature maps
outval
[
b
,
n
,
...
]
+=
_convolve2d
(
\
imgval
[
b
,
i
,
...
],
w_flip
[
n
,
i
,
...
],
1
,
val
,
bval
,
0
)[
0
::
ss
[
0
],
0
::
ss
[
1
]]
ntot
+=
[
time
.
time
()
-
time1
]
if
validate
:
val
=
_valfrommode
(
conv_mode
)
bval
=
_bvalfromboundary
(
'fill'
)
for
b
in
range
(
bsize
):
# loop over batches
for
n
in
range
(
nkern
):
# loop over filters
for
i
in
range
(
imshp
[
0
]):
# loop over input feature maps
outval
[
b
,
n
,
...
]
+=
_convolve2d
(
\
imgval
[
b
,
i
,
...
],
w_flip
[
n
,
i
,
...
],
1
,
val
,
bval
,
0
)[
0
::
ss
[
0
],
0
::
ss
[
1
]]
ntot
+=
time
.
time
()
-
time1
if
do_theano
:
####### test with new sp.convolve2 function ######
...
...
@@ -286,18 +287,16 @@ class TestConvOp(unittest.TestCase):
assert
(
N
.
abs
(
hidval
-
hidval1
)
<
1e-5
)
.
all
()
temp
=
N
.
abs
(
outval
.
reshape
(
bsize
,
-
1
)
-
hidval
)
assert
(
temp
<
1e-5
)
.
all
()
if
validate
:
assert
(
temp
<
1e-5
)
.
all
()
else
:
hid
=
img
#we don't need it, but it make the flow easier flow
convtot
+=
[
-
1
]
tctot
+=
[
-
1
]
tpytot
+=
[
-
1
]
hidval
=
outval
.
copy
()
#to keep the same memory
hidval1
=
outval
.
copy
()
# ConvOp
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
)(
inputs4
,
kerns4
)
conv_op
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
conv_mode
,
unroll_batch
=
unroll_batch
,
unroll_kern
=
unroll_kern
)(
inputs4
,
kerns4
)
l1shp
=
N
.
hstack
((
nkern
,
getFilterOutShp
(
imshp
,
kshp
,
ss
,
conv_mode
)))
propup2
=
function
([
inputs4
,
kerns4
],
conv_op
)
...
...
@@ -306,30 +305,90 @@ class TestConvOp(unittest.TestCase):
time1
=
time
.
time
()
hidval2_
=
propup2
(
imgval
,
w_flip
)
hidval2
=
hidval2_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
t
2ctot
+=
[
time
.
time
()
-
time1
]
t
ctot
+=
time
.
time
()
-
time1
time1
=
time
.
time
()
hidval3_
=
propup3
(
imgval
,
w_flip
)
hidval3
=
hidval3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
t2pytot
+=
[
time
.
time
()
-
time1
]
assert
(
N
.
abs
(
hidval2
-
hidval3
)
<
1e-5
)
.
all
()
if
conv_op_py
:
time1
=
time
.
time
()
hidval3_
=
propup3
(
imgval
,
w_flip
)
hidval3
=
hidval3_
[:,:,
0
::
ss
[
0
],
0
::
ss
[
1
]]
tpytot
+=
time
.
time
()
-
time1
assert
(
N
.
abs
(
hidval2
-
hidval3
)
<
1e-5
)
.
all
()
else
:
tpytot
+=
0
temp
=
N
.
abs
(
outval
-
hidval2
)
assert
(
temp
<
1e-5
)
.
all
()
temp
=
N
.
abs
(
outval
-
hidval3
)
assert
(
temp
<
1e-5
)
.
all
()
if
validate
:
temp
=
N
.
abs
(
outval
-
hidval2
)
assert
(
temp
<
1e-5
)
.
all
()
if
validate
and
conv_op_py
:
temp
=
N
.
abs
(
outval
-
hidval3
)
assert
(
temp
<
1e-5
)
.
all
()
img
,
imshp
=
hid
,
tuple
(
outshp
)
imgval
=
outval
.
reshape
(
bsize
,
outshp
[
0
],
outshp
[
1
],
outshp
[
2
])
return
tctot
,
tpytot
,
ntot
if
False
:
# calculate the speed up of different combination of unroll
# put the paramter to the same you will try.
validate
=
False
# we don't validate the result to have it much faster!
unroll_batch
=
[
0
,
1
,
2
,
4
,
5
,
10
,
20
]
unroll_kern
=
[
0
,
2
,
4
,
5
,
10
,
20
]
# unroll_batch = [0,2,5]
# unroll_kern = [0,2,5]
bsize
=
20
# batch size
imshp_start
=
(
1
,
50
,
49
)
#un square shape to test more corner case.
kshps
=
([
11
,
12
],[
12
,
11
])
#un square shape to test more corner case.
nkerns
=
[
20
,
20
]
# per output pixel
ssizes
=
[(
1
,
1
),]
#(1,1)]#(2,2) bugged
convmodes
=
[
'valid'
,
'full'
]
do_theano
=
False
a
=
T
.
dmatrix
()
kerns
=
[
a
for
i
in
nkerns
]
assert
len
(
kshps
)
==
len
(
nkerns
)
==
len
(
kerns
)
timing
=
N
.
zeros
((
len
(
unroll_batch
),
len
(
unroll_kern
),
3
))
t_b_k
=
[]
for
unroll_b
,
n_b
in
zip
(
unroll_batch
,
range
(
len
(
unroll_batch
))):
for
unroll_k
,
n_k
in
zip
(
unroll_kern
,
range
(
len
(
unroll_kern
))):
t_b_k
+=
[
str
(
unroll_b
)
+
"/"
+
str
(
unroll_k
)]
tctot
,
tpytot
,
ntot
=
[],[],[]
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
tctot_
,
tpytot_
,
ntot_
=
do_test
(
conv_mode
,
ss
,
unroll_batch
=
unroll_b
,
unroll_kern
=
unroll_k
,
validate
=
validate
)
tctot
+=
[
tctot_
]
tpytot
+=
[
tpytot_
]
ntot
+=
[
ntot_
]
timing
[
n_b
,
n_k
]
=
[
sum
(
tctot
),
sum
(
tpytot
),
sum
(
ntot
)]
# print timing
t
=
timing
[:,:,
0
]
#We select only the c timing.
print
t_b_k
print
t
print
"max
%.3
fs"
%
t
.
max
(),
"max param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmax
()]
print
"min
%.3
fs"
%
t
.
min
(),
"min param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmin
()]
print
"speedup
%.3
fx"
%
(
t
.
max
()
/
t
.
min
())
return
for
conv_mode
,
n_mode
in
zip
(
convmodes
,
range
(
len
(
convmodes
))):
for
ss
,
n_ss
in
zip
(
ssizes
,
range
(
len
(
ssizes
))):
tctot_
,
tpytot_
,
ntot_
=
do_test
(
conv_mode
,
ss
)
tctot
+=
[
tctot_
]
tpytot
+=
[
tpytot_
]
ntot
+=
[
ntot_
]
print
'**** Multilayer Convolution Profiling Results ****'
print
'Numpy convolve2d processing time:
%.3
fs'
%
sum
(
ntot
),
ntot
print
'c Theano(ConvOp) processing time:
%.3
fs'
%
sum
(
t2ctot
),
t2ctot
print
'py Theano(ConvOp) processing time:
%.3
fs'
%
sum
(
t2pytot
),
t2pytot
print
'convolve processing time:
%.3
fs'
%
sum
(
convtot
),
convtot
d
=
N
.
asarray
(
ntot
)
/
t2ctot
print
'c Theano(ConvOp) processing time:
%.3
fs'
%
sum
(
tctot
),
tctot
print
'py Theano(ConvOp) processing time:
%.3
fs'
%
sum
(
tpytot
),
tpytot
d
=
N
.
asarray
(
ntot
)
/
tctot
print
'speed up c theano(ConvOp) vs convolve2d:
%.3
f'
%
d
.
mean
(),
d
d
=
N
.
asarray
(
ntot
)
/
t
2
pytot
d
=
N
.
asarray
(
ntot
)
/
tpytot
print
'speed up py theano(ConvOp) vs convolve2d:
%.3
f'
%
d
.
mean
(),
d
...
...
@@ -393,3 +452,10 @@ class TestConvOp(unittest.TestCase):
kernvals
=
kernvals
.
reshape
(
nkern
,
-
1
)
utt
.
verify_grad
(
testf
,
[
imgvals
,
kernvals
])
if
__name__
==
'__main__'
:
t
=
TestConvOp
(
"test_convolution"
)
t
.
test_convolution
()
# t.test_multilayer_conv()
# from theano.tests import main
# main("test_sp")
theano/tensor/basic.py
浏览文件 @
4bded893
...
...
@@ -1956,6 +1956,12 @@ def horizontal_stack(*args):
L{TensorType}s must have the same shape along all dimensions but the
second.
"""
# Note: 'horizontal_stack' and 'vertical_stack' do not behave exactly like
# Numpy's hstack and vstack functions. This is intended, because Numpy's
# functions have potentially confusing/incoherent behavior (try them on 1D
# arrays). If this is fixed in a future version of Numpy, it may be worth
# trying to get closer to Numpy's way of doing things. In the meantime,
# better keep different names to emphasize the implementation divergences.
assert
len
(
args
)
>=
2
for
arg
in
args
:
assert
arg
.
type
.
ndim
==
2
return
concatenate
(
args
,
axis
=
1
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论