Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
f20f6106
提交
f20f6106
authored
5月 31, 2009
作者:
James Bergstra
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
added blas implementation for conv
上级
96f2ff78
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
300 行增加
和
99 行删除
+300
-99
conv.py
theano/sandbox/conv.py
+300
-99
没有找到文件。
theano/sandbox/conv.py
浏览文件 @
f20f6106
...
...
@@ -131,12 +131,6 @@ class ConvOp(Op):
def
c_headers
(
self
):
return
[
'"Python.h"'
,
'"numpy/noprefix.h"'
]
def
c_code_cleanup
(
self
,
node
,
name
,
input_names
,
output_names
,
sub
):
"""
TODO: implement from c_code()???
"""
return
""
def
c_support_code
(
self
):
return
"""
#define STRIDES(arr) ((arr)->strides)
...
...
@@ -145,13 +139,68 @@ class ConvOp(Op):
#define VALID 0
#include <iostream>
using namespace std;
"""
"""
+
tensor
.
blas
.
blas_header_text
()
def
c_libraries
(
self
):
return
tensor
.
blas
.
ldflags
()
def
c_code
(
self
,
node
,
name
,
(
img2d
,
filtersflipped
),
(
z
,
),
sub
):
if
node
.
inputs
[
0
]
.
type
.
dtype
!=
node
.
inputs
[
1
]
.
type
.
dtype
:
raise
NotImplementedError
()
code
=
"""
int mode=-1,typenum;
PyArrayObject *ain1=NULL, *ain2=NULL, *aout=NULL;
d
=
locals
()
d
.
update
(
sub
)
d
[
"self_out_mode"
]
=
self
.
out_mode
d
[
"self_bsize"
]
=
self
.
bsize
d
[
"self_nkern"
]
=
self
.
nkern
d
[
"self_dx"
]
=
self
.
dx
d
[
"self_dy"
]
=
self
.
dy
d
[
"self_outshp0"
]
=
self
.
outshp
[
0
]
d
[
"self_outshp1"
]
=
self
.
outshp
[
1
]
d
[
"self_imshp0"
]
=
self
.
imshp
[
0
]
d
[
"self_imshp1"
]
=
self
.
imshp
[
1
]
d
[
"self_imshp2"
]
=
self
.
imshp
[
2
]
d
[
"self_kshp0"
]
=
self
.
kshp
[
0
]
d
[
"self_kshp1"
]
=
self
.
kshp
[
1
]
d
[
"affectation"
]
=
"="
if
self
.
imshp
[
0
]
==
1
else
"+="
if
node
.
inputs
[
0
]
.
type
.
dtype
==
"float32"
:
d
[
"type"
]
=
"float"
elif
node
.
inputs
[
0
]
.
type
.
dtype
==
"float64"
:
d
[
"type"
]
=
"double"
else
:
raise
Exception
(
"Type
%
s not implemented"
%
node
.
inputs
[
0
]
.
type
.
dtype
)
if
self
.
out_mode
==
'valid'
:
return
_conv_op_code_valid_gemm
%
d
else
:
return
_conv_op_code_a
%
d
def
convolve2
(
kerns
,
kshp
,
nkern
,
images
,
imshp
,
bsize
,
step
=
(
1
,
1
),
bias
=
None
,
mode
=
'valid'
):
# if imshp, is a tuple, images contains one input dimension
nvis_dim
=
1
if
len
(
imshp
)
!=
3
else
imshp
[
0
]
# all these reshapes should happen in place
imrshp
=
tensor
.
as_tensor
([
bsize
]
+
list
(
imshp
))
imtensor
=
tensor
.
reshape
(
images
,
imrshp
)
kernrshp
=
tensor
.
as_tensor
([
nkern
,
nvis_dim
]
+
list
(
kshp
))
kerntensor
=
tensor
.
reshape
(
kerns
,
kernrshp
)
print
'*** convolve2 ***'
print
'imshp = '
,
imshp
print
'kshp = '
,
kshp
print
'nkern = '
,
nkern
print
'bsize = '
,
bsize
convop
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
output_mode
=
mode
)
convout
=
convop
(
imtensor
,
kerntensor
)
if
bias
:
biastensor
=
tensor
.
DimShuffle
((
False
,),
(
'x'
,
0
,
'x'
,
'x'
),
inplace
=
True
)(
bias
)
convout
=
convout
+
biastensor
rval
=
tensor
.
flatten
(
convout
,
2
)
return
rval
,
N
.
hstack
((
nkern
,
convop
.
outshp
))
_conv_op_code_a
=
"""
int mode=-1,typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
const
%(type)
s fill_value = 0;
int type_im=PyArray_TYPE(
%(img2d)
s);
...
...
@@ -170,7 +219,7 @@ PyArray_Dims kerns_shape;
npy_intp kerns_dim[4]={1,1,0,0};
kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d
, *contig, *filtersflipped
;
PyObject *img2d
=NULL, *contig, *filtersflipped=NULL
;
string s="
%(self_out_mode)
s";
if(
%(img2d)
s->nd==2){
...
...
@@ -205,33 +254,40 @@ if(%(filtersflipped)s->nd==3){
}
img2d = PyArray_Newshape(
%(img2d)
s,&img2d_shape, PyArray_CORDER);
if (!PyArray_ISCONTIGUOUS(img2d)){
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(
%(type)
s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(
%(type)
s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)
s;
}
}
if (!PyArray_ISCONTIGUOUS(img2d)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)
s;
}
img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(
%(filtersflipped)
s,&kerns_shape, PyArray_CORDER);
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != sizeof(
%(type)
s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*sizeof(
%(type)
s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)
s;
}
}
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)
s;
}
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(s=="valid") mode=0;
else if(s=="full") mode=2;
else {PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported");
%(fail)
s;};
typenum = PyArray_ObjectType((PyObject*)
%(img2d)
s, 0);
typenum = PyArray_ObjectType((PyObject*)
%(filtersflipped)
s, 0);
typenum
_f
= PyArray_ObjectType((PyObject*)
%(filtersflipped)
s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type");
%(fail)
s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match");
%(fail)
s;}
if (!img2d)
%(fail)
s;
if (!filtersflipped)
%(fail)
s;
...
...
@@ -244,7 +300,7 @@ if ((!%(z)s)
)
{
if (
%(z)
s) Py_DECREF(
%(z)
s);
npy_intp dims[4] = {0,0,0,0};
//(npy_intp *)malloc(4*sizeof(
%(type)
s));
npy_intp dims[4] = {0,0,0,0};
if(!dims)
%(fail)
s;
dims[0]=
%(self_bsize)
s;
dims[1]=
%(self_nkern)
s;
...
...
@@ -268,33 +324,13 @@ for(int b=0;b< %(self_bsize)s;b++){
if (
%(z)
s->strides[2] !=
%(z)
s->dimensions[3] * sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[3] != sizeof(
%(type)
s))
%(fail)
s;
aout = (PyArrayObject *)PyArray_SimpleNewFromData(2,dim_zz,
typenum,PyArray_GETPTR2(
%(z)
s,b,n_kern));
if (aout == NULL)
%(fail)
s;
%(type)
s *out=(
%(type)
s *)(aout->data);
%(type)
s * __restrict__ out=(
%(type)
s *)(PyArray_GETPTR2(
%(z)
s,b,n_kern));
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
for(int stack_size=0;stack_size<
%(self_imshp0)
s;stack_size++){
ain1 = (PyArrayObject *)PyArray_SimpleNewFromData(2,dim_im,
type_im,PyArray_GETPTR2(img2d,b,stack_size));
ain2 = (PyArrayObject *)PyArray_SimpleNewFromData(2,dim_ker,
type_ker,PyArray_GETPTR2(filtersflipped,n_kern,stack_size));
if (ain1 == NULL)
%(fail)
s;
if (ain2 == NULL)
%(fail)
s;
if (dim_im[0] != ((PyArrayObject*)img2d)->dimensions[2])
%(fail)
s;
if (dim_im[1] != ((PyArrayObject*)img2d)->dimensions[3])
%(fail)
s;
if (dim_ker[0] !=((PyArrayObject*)filtersflipped)->dimensions[2])
%(fail)
s;
if (dim_ker[1] !=((PyArrayObject*)filtersflipped)->dimensions[3])
%(fail)
s;
if (ain1->strides[0] != ain1->dimensions[1] * sizeof(
%(type)
s))
%(fail)
s;
if (ain2->strides[0] != ain2->dimensions[1] * sizeof(
%(type)
s))
%(fail)
s;
if (aout->strides[0] != aout->dimensions[1] * sizeof(
%(type)
s))
%(fail)
s;
if (ain1->strides[1] != sizeof(
%(type)
s))
%(fail)
s;
if (ain2->strides[1] != sizeof(
%(type)
s))
%(fail)
s;
if (aout->strides[1] != sizeof(
%(type)
s))
%(fail)
s;
%(type)
s *in=(
%(type)
s *)(ain1->data);
%(type)
s *hvals=(
%(type)
s *)(ain2->data);
const
%(type)
s * __restrict__ in=(
%(type)
s *)(PyArray_GETPTR2(img2d,b,stack_size));
const
%(type)
s * __restrict__ hvals=(
%(type)
s *)(PyArray_GETPTR2(filtersflipped,n_kern,stack_size));
int new_m;
...
...
@@ -312,7 +348,7 @@ for(int b=0;b< %(self_bsize)s;b++){
int ind0 = (new_m-j);
if(mode==FULL){
%(type)
s * idx2=&hvals[j*dim_ker[1]];
const
%(type)
s * idx2=&hvals[j*dim_ker[1]];
if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0)
for (int k=0; k < dim_ker[1]; k++) {
...
...
@@ -331,7 +367,7 @@ for(int b=0;b< %(self_bsize)s;b++){
//do the part where the kernel is on the img
max_k=min(n+1,(int)dim_ker[1]);
%(type)
s * idx1=&in[ind0*dim_im[1]];
const
%(type)
s * idx1=&in[ind0*dim_im[1]];
for (int ind1=n-k; k<max_k; k++,ind1--) {
sum+= idx2[k] * idx1[ind1];
}
...
...
@@ -340,8 +376,8 @@ for(int b=0;b< %(self_bsize)s;b++){
for(;k<dim_ker[1];k++) sum+= idx2[k]*fill_value;
}
}else{
%(type)
s* idx1=&in[ind0*dim_im[1]]; //JB: should be dim_im[1] right? (was dim_im[0])
%(type)
s* idx2=&hvals[j*dim_ker[1]];
const
%(type)
s* idx1=&in[ind0*dim_im[1]]; //JB: should be dim_im[1] right? (was dim_im[0])
const
%(type)
s* idx2=&hvals[j*dim_ker[1]];
int new_n = (n+dim_ker[1]-1);
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
...
...
@@ -352,68 +388,233 @@ for(int b=0;b< %(self_bsize)s;b++){
out[m*dim_zz[1]+n]
%(affectation)
s sum;
}//for n
}//for m
Py_DECREF(ain1);
Py_DECREF(ain2);
}//for stack_size
if (0 && (mode==FULL)){
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i)
std::cout << " " << out[i];
std::cout << "
\\
n";
}
Py_DECREF(aout);
}//for n_kern
}//for b
Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
"""
fail:
"""
d
=
locals
()
d
.
update
(
sub
)
d
[
"self_out_mode"
]
=
self
.
out_mode
d
[
"self_bsize"
]
=
self
.
bsize
d
[
"self_nkern"
]
=
self
.
nkern
d
[
"self_dx"
]
=
self
.
dx
d
[
"self_dy"
]
=
self
.
dy
d
[
"self_outshp0"
]
=
self
.
outshp
[
0
]
d
[
"self_outshp1"
]
=
self
.
outshp
[
1
]
d
[
"self_imshp0"
]
=
self
.
imshp
[
0
]
d
[
"self_imshp1"
]
=
self
.
imshp
[
1
]
d
[
"self_imshp2"
]
=
self
.
imshp
[
2
]
d
[
"self_kshp0"
]
=
self
.
kshp
[
0
]
d
[
"self_kshp1"
]
=
self
.
kshp
[
1
]
d
[
"affectation"
]
=
"="
if
self
.
imshp
[
0
]
==
1
else
"+="
if
node
.
inputs
[
0
]
.
type
.
dtype
==
"float32"
:
d
[
"type"
]
=
"float"
elif
node
.
inputs
[
0
]
.
type
.
dtype
==
"float64"
:
d
[
"type"
]
=
"double"
else
:
raise
Exception
(
"Type
%
s not implemented"
%
node
.
inputs
[
0
]
.
type
.
dtype
)
return
code
%
d
#########
######### ConvOp c_code for valid mode (uses gemm)
#########
def
convolve2
(
kerns
,
kshp
,
nkern
,
images
,
imshp
,
bsize
,
step
=
(
1
,
1
),
bias
=
None
,
mode
=
'valid'
):
_conv_op_code_valid_gemm
=
"""
int mode=-1,typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *img2d_arr=NULL;
const int NKERN =
%(self_nkern)
s;
# if imshp, is a tuple, images contains one input dimension
nvis_dim
=
1
if
len
(
imshp
)
!=
3
else
imshp
[
0
]
int type_im=PyArray_TYPE(
%(img2d)
s);
int type_ker=PyArray_TYPE(
%(filtersflipped)
s);
# all these reshapes should happen in place
imrshp
=
tensor
.
as_tensor
([
bsize
]
+
list
(
imshp
))
imtensor
=
tensor
.
reshape
(
images
,
imrshp
)
npy_intp dim_zz[2]={
%(self_outshp0)
s,
%(self_outshp1)
s};
npy_intp dim_im[2]={
%(self_imshp1)
s,
%(self_imshp2)
s};
npy_intp dim_ker[2]={
%(self_kshp0)
s,
%(self_kshp1)
s};
kernrshp
=
tensor
.
as_tensor
([
nkern
,
nvis_dim
]
+
list
(
kshp
))
kerntensor
=
tensor
.
reshape
(
kerns
,
kernrshp
)
print
'*** convolve2 ***'
print
'imshp = '
,
imshp
print
'kshp = '
,
kshp
print
'nkern = '
,
nkern
print
'bsize = '
,
bsize
convop
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
output_mode
=
mode
)
convout
=
convop
(
imtensor
,
kerntensor
)
if
bias
:
biastensor
=
tensor
.
DimShuffle
((
False
,),
(
'x'
,
0
,
'x'
,
'x'
),
inplace
=
True
)(
bias
)
convout
=
convout
+
biastensor
rval
=
tensor
.
flatten
(
convout
,
2
)
return
rval
,
N
.
hstack
((
nkern
,
convop
.
outshp
))
PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0};
img2d_shape.ptr=img2d_dim;
img2d_shape.len=4;
PyArray_Dims kerns_shape;
npy_intp kerns_dim[4]={1,1,0,0};
kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig;
if(
%(img2d)
s->nd==2){
img2d_dim[3]=
%(img2d)
s->dimensions[1];
img2d_dim[2]=
%(img2d)
s->dimensions[0];
}else if(
%(img2d)
s->nd==3){
img2d_dim[3]=
%(img2d)
s->dimensions[2];
img2d_dim[2]=
%(img2d)
s->dimensions[1];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
}else if(
%(img2d)
s->nd==4){
img2d_dim[3]=
%(img2d)
s->dimensions[3];
img2d_dim[2]=
%(img2d)
s->dimensions[2];
img2d_dim[1]=
%(img2d)
s->dimensions[1];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
}else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
%(fail)
s;
}
if(
%(filtersflipped)
s->nd==3){
kerns_dim[3]=
%(filtersflipped)
s->dimensions[2];
kerns_dim[2]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else if(
%(filtersflipped)
s->nd==4){
kerns_dim[3]=
%(filtersflipped)
s->dimensions[3];
kerns_dim[2]=
%(filtersflipped)
s->dimensions[2];
kerns_dim[1]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else{
PyErr_SetString(PyExc_ValueError, "kernel don't have a good shape");
%(fail)
s;
}
if (NKERN != kerns_dim[0])
{
PyErr_SetString(PyExc_NotImplementedError, "nonsense nkern");
%(fail)
s;
}
img2d = PyArray_Newshape(
%(img2d)
s,&img2d_shape, PyArray_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(
%(type)
s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(
%(type)
s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)
s;
}
}
img2d_arr = (PyArrayObject*)img2d;
typenum = PyArray_ObjectType((PyObject*)
%(img2d)
s, 0);
typenum_f = PyArray_ObjectType((PyObject*)
%(filtersflipped)
s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type");
%(fail)
s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match");
%(fail)
s;}
if (!img2d) {
PyErr_SetString(PyExc_ValueError, "Null argument img2d");
%(fail)
s;
}
if ((!
%(z)
s)
|| *PyArray_DIMS(
%(z)
s)!=4
||(
%(z)
s->dimensions[0] !=
%(self_bsize)
s)
||(
%(z)
s->dimensions[1] !=
%(self_nkern)
s)
||(
%(z)
s->dimensions[2] != dim_zz[0])
|| (
%(z)
s->dimensions[3] != dim_zz[1])
)
{
if (
%(z)
s) Py_DECREF(
%(z)
s);
npy_intp dims[4] = {0,0,0,0};
dims[0]=
%(self_bsize)
s;
dims[1]=
%(self_nkern)
s;
dims[2]=dim_zz[0];
dims[3]=dim_zz[1];
%(z)
s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
}else{
PyArray_FILLWBYTE((PyObject*)
%(z)
s,0);
}
int Os[2];
Os[0] = dim_im[0]-dim_ker[0]+1;
Os[1] = dim_im[1]-dim_ker[1]+1;
// allocate a temporary buffer for storing the inner product of each nth kernel row
// with each row of an image
{
%(type)
s * kbuf = (
%(type)
s *)malloc((Os[0] * NKERN + PyArray_Size((PyObject*)
%(filtersflipped)
s))* sizeof(
%(type)
s));
int kbufstride = NKERN;
%(type)
s * myfilters = kbuf + Os[0] * NKERN;
//copy out filtersflipped into filters un-flipped format
//std::cerr << "__filling myfilters__
\\
n";
for(int i=0;i < kerns_dim[0];++i){
for(int j=0;j < kerns_dim[1];++j){
for(int k=0;k < kerns_dim[2];++k){
for(int l=0;l < kerns_dim[3];++l){
%(type)
s * ff = ((
%(filtersflipped)
s)->nd == 3)
? (
%(type)
s *)PyArray_GETPTR3(
%(filtersflipped)
s, i, kerns_dim[2]-1-k, kerns_dim[3]-1-l)
: (
%(type)
s *)PyArray_GETPTR4(
%(filtersflipped)
s, i, j, kerns_dim[2]-1-k, kerns_dim[3]-1-l);
myfilters[i * (kerns_dim[1]*kerns_dim[2]*kerns_dim[3])
+ j * (kerns_dim[2]*kerns_dim[3])
+ k * (kerns_dim[3])
+ l] = ff[0];
//std::cerr << " " << ff[0];
}
//std::cerr << "
\\
n";
}
//std::cerr << "(end of stack/batch " <<j << "/" << i << " )
\\
n";
}
}
//std::cerr << "-----new loop ----
\\
n";
for(int b=0;b<
%(self_bsize)
s;b++){
for (int img_col = 0; img_col < Os[1]; ++img_col){
for (int filter_row = 0; filter_row < kerns_dim[2]; ++filter_row){
for (int stackidx = 0; stackidx <
%(self_imshp0)
s; ++stackidx){
%(type)
s * img_colview =
(
%(type)
s *)(PyArray_GETPTR4(img2d, b, stackidx, filter_row, img_col));
%(type)
s * filter_rows = myfilters + stackidx * (kerns_dim[2]*kerns_dim[3]) +
filter_row * kerns_dim[3];
//std::cerr << "filterview offset: " << filter_rows - myfilters << "
\\
n";
char N = 'N'; char T = 'T';
int Nz0 = Os[0];
int Nz1 = NKERN;
int K = kerns_dim[3];
double alpha = 1.0;
double beta = stackidx ? 1.0 : 0.0;
int imgview_stride = dim_im[1];
int filter_rows_stride =kerns_dim[1]*kerns_dim[2]*kerns_dim[3];
//remember, Fortran wants a column-major interpretation
assert(img2d->strides[3] == sizeof(double));
if (0){
std::cerr << "b " << b << " img_col " << img_col << " filterrow " << filter_row << " stackidx " <<stackidx << "
\\
n";
std::cerr << "colview (physical layout) stride: " << imgview_stride << "
\\
n";
for (int ii = 0; ii < Nz0; ++ii){
for (int jj = 0; jj < K; ++jj){
std::cerr << " " << img_colview[ii * imgview_stride + jj];
}
std::cerr << "
\\
n";
}
std::cerr << "filterview ("<<filter_row<<"'th rows) stride: " << filter_rows_stride << "
\\
n";
for (int ii = 0; ii < Nz1; ++ii){
for (int jj = 0; jj < K; ++jj){
std::cerr << " " << filter_rows[ii * filter_rows_stride + jj];
}
std::cerr << "
\\
n";
}
std::cerr << Nz1 << " " << Nz0 << " " << K << "
\\
n" ;
}
dgemm_(&T, &N,
&Nz1, &Nz0, &K,
&alpha,
filter_rows, &filter_rows_stride,
img_colview, &imgview_stride,
&beta, kbuf, &kbufstride);
if (0){
std::cerr << "z (logical layout) beta" << beta << "
\\
n";
for (int ii = 0; ii < Nz0; ++ii){
for (int jj = 0; jj < Nz1; ++jj){
std::cerr << " " << kbuf[ii * kbufstride + jj];
}
std::cerr << "
\\
n";
}
}
}
// now kbuf the sum over the stack, put it into the outbuf
for (int img_row = 0; img_row < Os[0]; ++img_row) {
for (int kernel_idx = 0; kernel_idx < NKERN; ++kernel_idx) {
%(type)
s * z_p = (
%(type)
s *)PyArray_GETPTR4(
%(z)
s, b, kernel_idx, img_row, img_col);
if (0)
{
if (b >=
%(z)
s->dimensions[0])
%(fail)
s;
if (kernel_idx >=
%(z)
s->dimensions[1])
%(fail)
s;
if (img_row >=
%(z)
s->dimensions[2])
%(fail)
s;
if (img_col >=
%(z)
s->dimensions[3])
%(fail)
s;
}
z_p[0] += kbuf[img_row * kbufstride + kernel_idx];
}
}
}
}
}
free(kbuf);
}
Py_XDECREF(img2d);
"""
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论