提交 aaea1d32 authored 作者: Frederic Bastien's avatar Frederic Bastien

fix white space.

上级 a04be875
......@@ -26,7 +26,7 @@ try:
imported_scipy_signal = True
except ImportError:
pass
_logger=logging.getLogger("theano.signal.conv")
def _debug(*msg):
......@@ -51,13 +51,13 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
:param border_mode:
'valid'-- only apply filter to complete patches of the image. Generates
output of shape: image_shape - filter_shape + 1
output of shape: image_shape - filter_shape + 1
'full' -- zero-pads image to multiple of filter shape to generate output of
shape: image_shape + filter_shape - 1
:type subsample: tuple of len 2
:param subsample: factor by which to subsample the output
:type image_shape: tuple of len 4 of int or Contant variable
:param image_shape: (batch size, stack size, nb row, nb col)
Optional, used for optimization.
......@@ -68,7 +68,7 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
:param kwargs: kwargs are passed onto ConvOp. Can be used to set the following:
unroll_batch, unroll_kern, unroll_patch (see ConvOp doc)
:rtype: symbolic 4D tensor
:return: set of feature maps generated by convolutional layer. Tensor is of shape
:return: set of feature maps generated by convolutional layer. Tensor is of shape
(batch size, nb filters, output row, output col)
"""
......@@ -89,11 +89,11 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
filter_shape[i] = int(filter_shape[i])
if image_shape and filter_shape:
try:
assert image_shape[1]==filter_shape[1]
except:
print 'image ', image_shape, ' filters ', filter_shape
raise
try:
assert image_shape[1]==filter_shape[1]
except:
print 'image ', image_shape, ' filters ', filter_shape
raise
if filter_shape is not None:
nkern = filter_shape[0]
......@@ -117,25 +117,25 @@ class ConvOp(Op):
"""
This Op serves a dual purpose: it can implement a vanilla 2D convolution
(as taught in any signal processing class) or implement the
convolutional layers found in Convolutional Neural Networks.
convolutional layers found in Convolutional Neural Networks.
In this setting, a set of 3D images is convolved with a set of 3D kernels,
with the particularity that their leading dimensions are of equal length.
Vanilla 2D convolution is treated as a special case of this.
The input parameter represents a mini-batch of multiple images. Its shape is:
batch size x num. input feature maps x image height x image width
The kernel parameter represents a set of 3D kernels. Its shape is:
number of filters x num. input images x filter height x filter width
number of filters x num. input images x filter height x filter width
The output of ConvOp is a 4D tensor, generated as follows:
output[b,k,:,:] = \sum_i input[b,i,:,:] * filter[k,i,:,:] \forall b,k
where b is the mini-batch index, k the filter index and * is the convolution
operator.
operator.
"""
__attrnames = ['imshp', 'kshp', 'nkern', 'bsize', 'dx', 'dy', 'out_mode',
__attrnames = ['imshp', 'kshp', 'nkern', 'bsize', 'dx', 'dy', 'out_mode',
'unroll_batch', 'unroll_kern', 'unroll_patch',
'imshp_logical', 'kshp_logical', 'kshp_logical_top_aligned']
"""These attributes uniquely identify the behaviour of this op for given inputs"""
......@@ -203,7 +203,7 @@ class ConvOp(Op):
speed_unroll_patch_noshape=[2.0109100341796875, 5.8175678253173828]
#valid time, full time
speed_unroll_patch_shape=[1.2967290878295898, 5.5283889770507812]
def c_compile_args(self):
#when the ksph==(1,1) gcc 4.3.0 segfault during the compilation with -O3.
#This don't happen at -O2
......@@ -223,7 +223,7 @@ class ConvOp(Op):
"""
Computes the output dimensions of convolving an image of shape "inshp"
with kernels of shape "kshp".
:param inshp: (rows,cols) of input image
:param kshp: (rows,cols) of filters
:param mode: 'valid' or 'full' (see 'border_mode' in conv2d's doc)
......@@ -236,7 +236,7 @@ class ConvOp(Op):
numpy.array([dx,dy], dtype='float')))
def __init__(self, imshp=None, kshp=None, nkern=None, bsize=None,
def __init__(self, imshp=None, kshp=None, nkern=None, bsize=None,
dx=None, dy=None,
output_mode='valid',
......@@ -269,7 +269,7 @@ class ConvOp(Op):
For optimizing other architectures, see:
Kazushige Goto and Robert A. Van De Geijn, Anatomy of High-Performance
Matrix Multiplication, (mr x nr). ACM Transactions on Mathematical
Software, May 2008.
Software, May 2008.
Figure 12: (mr x nr). For x86 use 2x4, itanium 8x8, etc.
:type output_mode: string
......@@ -325,7 +325,7 @@ class ConvOp(Op):
if (unroll_batch>0 or unroll_kern>0) and not all_shape:
raise Exception("In ConvOp, when using unroll_batch and unroll_nkern, all shape are needed")
if not all_shape:
unroll_patch = True
......@@ -419,7 +419,7 @@ class ConvOp(Op):
if not self.out_mode in ["valid", "full"]:
raise Exception("Mode %s not implemented"%self.out_mode)
if all_shape and not (self.outshp > 0).all():
raise Exception(("Bad size for the output shape. Verify that [post-"\
"supersampling] input shape (%s) and kern shape(%s) are ok. "\
......@@ -501,8 +501,8 @@ class ConvOp(Op):
for out_col in range(self.outshp[0]):#loop over output col
for row in range(self.kshp[0]):#loop over kern row
if (row+out_row-self.kshp[0]+1<0 or
row+out_row-self.kshp[0]+1>=self.imshp[1]):
if (row+out_row-self.kshp[0]+1<0 or
row+out_row-self.kshp[0]+1>=self.imshp[1]):
continue
col=0
......@@ -516,9 +516,9 @@ class ConvOp(Op):
while col < max_col: #loop over kern col
self.flops+=2
col+=1
self.flops*=self.imshp[0]*self.nkern*self.bsize#for all outputs images#n_stack==self.imshp[0]
assert self.flops == self.bsize * self.nkern * self.imshp[0] * \
self.kshp[0] * self.kshp[1] * self.imshp[1] * self.imshp[2] * 2
......@@ -545,7 +545,7 @@ class ConvOp(Op):
bcastable23 = [False, False]
output = tensor.tensor(dtype=_inputs.type.dtype,
broadcastable=[_inputs.broadcastable[0],
_kerns.broadcastable[0]]+bcastable23);
_kerns.broadcastable[0]]+bcastable23);
return Apply(self, [_inputs, _kerns], [output])
......@@ -568,7 +568,7 @@ class ConvOp(Op):
except TypeError:
raise NotImplementedError()
outshp = (batch_size,fmo) + tuple(fmshp)
return [outshp]
return [outshp]
else:
# Haven't implemented this case. imshp and kshp may be symbollic
# and ConvOp.getOutputShape doesn't handle this. In this case
......@@ -583,7 +583,7 @@ class ConvOp(Op):
raise theano.gof.utils.MethodNotDefined(
"c_headers", type(self), self.__class__.__name__,
"Need the python package for scipy.signal to be installed for the python implementation. You can use the C implementation instead.")
# TODO: move these back out to global scope when they no longer cause an atexit error
imshp = self.imshp
if imshp is None or any([x is None for x in imshp]):
......@@ -597,7 +597,7 @@ class ConvOp(Op):
nkern = self.nkern
if nkern is None:
nkern = filtersflipped.shape[0]
imshp_logical = self.imshp_logical
if imshp_logical is None:
imshp_logical = imshp
......@@ -704,13 +704,13 @@ class ConvOp(Op):
if not all_shape and (self.dx!=1 or self.dy!=1):
raise Exception("ConvOp.grad when dx!=1 or dy!=1 we must have all "\
"the optional shape information")
####### Determine gradient on kernels ########
assert inputs.ndim==4 and kerns.ndim==4
newin = inputs.dimshuffle((1,0,2,3))
newgz = gz.dimshuffle((1,0,2,3))
(bsize, nkern) = None, None
imshp = None
kshp = None
......@@ -742,7 +742,7 @@ class ConvOp(Op):
raise NotImplementedError('Only [full,valid] modes are currently supported.')
filters = filters[:,:,::-1,::-1] #flip them
if 0: #find good value for the unroll
if all_shape and un_b!=0 and bsize%un_b!=0:
......@@ -793,7 +793,7 @@ class ConvOp(Op):
####### Determine gradient on inputs ########
mode = 'valid'
if not self.out_mode == 'full':
if not self.out_mode == 'full':
mode = 'full'
filters = kerns.dimshuffle((1,0,2,3))
......@@ -809,7 +809,7 @@ class ConvOp(Op):
imshp_logical=(self.nkern, self.fulloutshp[0], self.fulloutshp[1])
if 0: # hard-code c generation parameters
din = ConvOp(imshp, self.kshp, nkern, self.bsize,
din = ConvOp(imshp, self.kshp, nkern, self.bsize,
1,1, output_mode=mode,
unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p,
imshp_logical=imshp_logical,
......@@ -817,7 +817,7 @@ class ConvOp(Op):
version=-1,#we we change the mode, we don't forward the version.
verbose=self.verbose)
else: # let __init__ figure out the unrolling / patch sizes
din = ConvOp(imshp, self.kshp, nkern, self.bsize,
din = ConvOp(imshp, self.kshp, nkern, self.bsize,
1,1, output_mode=mode,
unroll_batch=None, unroll_kern=None, unroll_patch=None,
imshp_logical=imshp_logical,
......@@ -840,7 +840,7 @@ class ConvOp(Op):
def c_code_cache_version(self):
return (4)
def c_support_code(self):
return """
#define STRIDES(arr) ((arr)->strides)
......@@ -878,12 +878,12 @@ using namespace std;
if self.use_blas():
return tensor.blas.ldflags(libs=False, libs_dir=True)
return []
def c_header_dirs(self):
if self.use_blas():
return tensor.blas.ldflags(libs=False, include_dir=True)
return []
def c_code(self, node, name, (img2d, filtersflipped), (z, ), sub):
if node.inputs[0].type.dtype != node.inputs[1].type.dtype:
raise NotImplementedError()
......@@ -953,7 +953,7 @@ using namespace std;
d["self_kshp_logical_offset_r"] = (self.kshp_logical[0] - (self.kshp[0]*rstride) - 1+rstride) % rstride
d["self_kshp_logical_offset_c"] = (self.kshp_logical[1] - (self.kshp[1]*cstride) - 1+cstride) % cstride
del rstride, cstride
if node.inputs[0].type.dtype=="float32": d["type"]="float"
elif node.inputs[0].type.dtype=="float64": d["type"]="double"
else: raise Exception("Type %s not implemented"%node.inputs[0].type.dtype)
......@@ -978,7 +978,7 @@ using namespace std;
return gen_conv_code_unroll_batch_kern(d, self.unroll_batch,
self.unroll_kern)
#TODO: should we choose the unroll size automatically with the bigger divisor under 5?
#TODO: should we choose the unroll size automatically with the bigger divisor under 5?
if self.out_mode == 'valid' and self.dx==0 and self.dy==0:
if self.verbose:
_debug("return gemm version")
......@@ -1067,7 +1067,7 @@ img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, PyArray_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s))
if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
......@@ -1213,7 +1213,7 @@ for(int b=0;b< %(self_bsize)s;b++){
}//for m
}//for stack_size
if (0 && (mode==FULL)){
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i)
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i)
std::cout << " " << out[i];
std::cout << "\\n";
}
......@@ -1224,7 +1224,7 @@ Py_XDECREF(filtersflipped);
"""
#########
#########
######### ConvOp c_code for valid mode (uses gemm)
#########
......@@ -1293,7 +1293,7 @@ if (NKERN != kerns_dim[0])
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s))
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
......@@ -1337,7 +1337,7 @@ int Os[2];
Os[0] = dim_im[0]-dim_ker[0]+1;
Os[1] = dim_im[1]-dim_ker[1]+1;
// allocate a temporary buffer for storing the inner product of each nth kernel row
// allocate a temporary buffer for storing the inner product of each nth kernel row
// with each row of an image
{
%(type)s * kbuf = (%(type)s *)malloc((Os[0] * NKERN + PyArray_Size((PyObject*)%(filtersflipped)s))* (npy_intp)sizeof(%(type)s));
......@@ -1353,7 +1353,7 @@ for(int i=0;i < kerns_dim[0];++i){
%(type)s * ff = ((%(filtersflipped)s)->nd == 3)
? (%(type)s *)PyArray_GETPTR3(%(filtersflipped)s, i, kerns_dim[2]-1-k, kerns_dim[3]-1-l)
: (%(type)s *)PyArray_GETPTR4(%(filtersflipped)s, i, j, kerns_dim[2]-1-k, kerns_dim[3]-1-l);
myfilters[i * (kerns_dim[1]*kerns_dim[2]*kerns_dim[3])
myfilters[i * (kerns_dim[1]*kerns_dim[2]*kerns_dim[3])
+ j * (kerns_dim[2]*kerns_dim[3])
+ k * (kerns_dim[3])
+ l] = ff[0];
......@@ -1370,14 +1370,14 @@ for(int b=0;b< %(self_bsize)s;b++){
for (int img_col = 0; img_col < Os[1]; ++img_col){
for (int filter_row = 0; filter_row < kerns_dim[2]; ++filter_row){
for (int stackidx = 0; stackidx < %(self_imshp0)s; ++stackidx){
%(type)s * img_colview =
%(type)s * img_colview =
(%(type)s *)(PyArray_GETPTR4(img2d, b, stackidx, filter_row, img_col));
%(type)s * filter_rows = myfilters + stackidx * (kerns_dim[2]*kerns_dim[3]) +
filter_row * kerns_dim[3];
//std::cerr << "filterview offset: " << filter_rows - myfilters << "\\n";
char N = 'N'; char T = 'T';
int Nz0 = Os[0];
int Nz0 = Os[0];
int Nz1 = NKERN;
int K = kerns_dim[3];
%(type)s alpha = 1.0;
......@@ -1407,11 +1407,11 @@ for(int b=0;b< %(self_bsize)s;b++){
std::cerr << Nz1 << " " << Nz0 << " " << K << "\\n" ;
}
%(gemm)s(&T, &N,
%(gemm)s(&T, &N,
&Nz1, &Nz0, &K,
&alpha,
&alpha,
filter_rows, &filter_rows_stride,
img_colview, &imgview_stride,
img_colview, &imgview_stride,
&beta, kbuf, &kbufstride);
if (0){
......@@ -1453,7 +1453,7 @@ def gen_conv_code_unroll_batch_kern(d,unroll_bsize=1, unroll_ksize=1):
if d.has_key("unroll_bsize") or d.has_key("unroll_ksize") or d.has_key("unroll_iter") or d.has_key("unroll_biter") or d.has_key("unroll_kiter"):
raise Exception("We can't use this dictionnary as we will overwrite some of its containt")
d=d.copy()
d["unroll_bsize"]=unroll_bsize
d["unroll_ksize"]=unroll_ksize
def my_dup(st,size):
......@@ -1547,7 +1547,7 @@ if(kerns_dim[0] %% %(self_nkern)s!=0){
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s))
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
......@@ -1561,7 +1561,7 @@ img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, PyArray_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s))
if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
......@@ -1632,7 +1632,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
if (mode == FULL) new_m = pos_m ;
else new_m = (pos_m+dim_ker[0]-1);
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
int pos_n=iter_n*%(self_dy)s;
"""%d
ret+=my_dup("%(type)s sum%(unroll_iter)s=0;", unroll_bsize*unroll_ksize)
......@@ -1658,15 +1658,15 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
//do the part where kernel is to the right of the img
int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
if(fill_value!=0){
if(fill_value!=0){
for(k=0;k<max_k;k++){
"""%d
ret+=my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
ret+="""
}
}else {k=max_k;}
//do the part where the kernel is on the img
max_k=min(pos_n+1,(int)dim_ker[1]);
"""%d
......@@ -1787,7 +1787,7 @@ if(kerns_dim[0] != %(self_nkern)s){
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(%(type)s))
if ((img2d_arr->strides[3] != sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
......@@ -1801,7 +1801,7 @@ img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, PyArray_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != sizeof(%(type)s))
if ((filtersflipped_arr->strides[3] != sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
......@@ -1897,13 +1897,13 @@ for(int b=0;b< %(self_bsize)s;b++){
}else{
//do the part where kernel is to the right of the img
int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
if(fill_value!=0){
if(fill_value!=0){
for(k=0;k<max_k;k++){
sum+= idx_hvals[k]*fill_value;
}
}else {k=max_k;}
//do the part where the kernel is on the img
max_k=min(pos_n+1,(int)dim_ker[1]);
const %(type)s * idx_in=&in[ind0*dim_im[1]];
......@@ -1918,7 +1918,7 @@ for(int b=0;b< %(self_bsize)s;b++){
sum3+=idx_hvals[k]*idx_in[ind1+2*%(self_dy)s];
sum4+=idx_hvals[k]*idx_in[ind1+3*%(self_dy)s];
}
}else if(iter_n + 2*%(self_dy)s < dim_zz[1]
}else if(iter_n + 2*%(self_dy)s < dim_zz[1]
&& iter_n>dim_ker[1]-1
&& iter_n<dim_im[1]-dim_ker[1]+1){
nb_sum=2;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论