提交 cf91f745 authored 作者: Frederic Bastien's avatar Frederic Bastien

A first optimized implementation of conv2d on the with subsamble. Work only for some shape.

上级 899d98b6
...@@ -363,7 +363,7 @@ class GpuConv(Op): ...@@ -363,7 +363,7 @@ class GpuConv(Op):
return ['cuda_ndarray.cuh','<stdio.h>'] return ['cuda_ndarray.cuh','<stdio.h>']
def c_code_cache_version(self): def c_code_cache_version(self):
return (0,13) # raise this whenever modifying any of the support_code_files return (0,14) # raise this whenever modifying any of the support_code_files
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of these files # REMEMBER TO RAISE c_code_cache_version when changing any of these files
......
...@@ -163,8 +163,7 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -163,8 +163,7 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
cudaGetErrorString(sts)); cudaGetErrorString(sts));
} }
} }
if (!subsample && if (out_contiguous &&
out_contiguous &&
(version==1||version==3||version==11||version==12||version==-1) && (version==1||version==3||version==11||version==12||version==-1) &&
(version!=1 || out_size<512) &&//Maximum of 512 theads by block (version!=1 || out_size<512) &&//Maximum of 512 theads by block
out_wid<512 &&//Maximum of 512 theads by block out_wid<512 &&//Maximum of 512 theads by block
...@@ -187,36 +186,54 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -187,36 +186,54 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
int shared_size=(img_size + (preload_full_kernel?kern_size:kern_wid))*sizeof(float); int shared_size=(img_size + (preload_full_kernel?kern_size:kern_wid))*sizeof(float);
void (*f)(float*, float*, float*, void (*f)(float*, float*, float*,
int, int, int, int,
int, int, int, int, int, int, int, int,
int, int, int, int, int, int, int, int,
int, int, int, int, int, int, int, int,
int, int); int, int);
#define CONV_PATCH_STACK_SPECIAL(kern_wid) \ #define CONV_PATCH_STACK_SPECIAL(kern_wid) \
if(preload_full_kernel && nb_split==1 && img_contiguous_2d && kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,true,true,false,true>;} \ if(preload_full_kernel && nb_split==1 && img_contiguous_2d && kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,true,true,false,true,true>;} \
else if(preload_full_kernel && nb_split==1 && img_contiguous_2d && !kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,true,false,false,true>;} \ else if(preload_full_kernel && nb_split==1 && img_contiguous_2d && !kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,true,false,false,true,true>;} \
else if(preload_full_kernel && nb_split==1 && !img_contiguous_2d && kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,false,true,false,true>;}\ else if(preload_full_kernel && nb_split==1 && !img_contiguous_2d && kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,false,true,false,true,true>;}\
else if(preload_full_kernel && nb_split==1 && !img_contiguous_2d && !kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,false,false,false,true>;}\ else if(preload_full_kernel && nb_split==1 && !img_contiguous_2d && !kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,false,false,false,true,true>;}\
else if(preload_full_kernel && nb_split!=1 && img_contiguous_2d && kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,true,true,true,true>;}\ else if(preload_full_kernel && nb_split!=1 && img_contiguous_2d && kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,true,true,true,true,true>;}\
else if(preload_full_kernel && nb_split!=1 && img_contiguous_2d && !kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,true,false,true,true>;}\ else if(preload_full_kernel && nb_split!=1 && img_contiguous_2d && !kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,true,false,true,true,true>;}\
else if(preload_full_kernel && nb_split!=1 && !img_contiguous_2d && kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,false,true,true,true>;}\ else if(preload_full_kernel && nb_split!=1 && !img_contiguous_2d && kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,false,true,true,true,true>;}\
else if(preload_full_kernel && nb_split!=1 && !img_contiguous_2d && !kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,false,false,true,true>;}\ else if(preload_full_kernel && nb_split!=1 && !img_contiguous_2d && !kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,false,false,true,true,true>;}\
else if(!preload_full_kernel && nb_split==1 && img_contiguous_2d && kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,true,true,false,false>;}\ else if(!preload_full_kernel && nb_split==1 && img_contiguous_2d && kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,true,true,false,false,true>;}\
else if(!preload_full_kernel && nb_split==1 && img_contiguous_2d && !kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,true,false,false,false>;}\ else if(!preload_full_kernel && nb_split==1 && img_contiguous_2d && !kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,true,false,false,false,true>;}\
else if(!preload_full_kernel && nb_split==1 && !img_contiguous_2d && kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,false,true,false,false>;}\ else if(!preload_full_kernel && nb_split==1 && !img_contiguous_2d && kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,false,true,false,false,true>;}\
else if(!preload_full_kernel && nb_split==1 && !img_contiguous_2d && !kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,false,false,false,false>;}\ else if(!preload_full_kernel && nb_split==1 && !img_contiguous_2d && !kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,false,false,false,false,true>;}\
else if(!preload_full_kernel && nb_split!=1 && img_contiguous_2d && kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,true,true,true,false>;} \ else if(!preload_full_kernel && nb_split!=1 && img_contiguous_2d && kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,true,true,true,false,true>;} \
else if(!preload_full_kernel && nb_split!=1 && img_contiguous_2d && !kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,true,false,true,false>;} \ else if(!preload_full_kernel && nb_split!=1 && img_contiguous_2d && !kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,true,false,true,false,true>;} \
else if(!preload_full_kernel && nb_split!=1 && !img_contiguous_2d && kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,false,true,true,false>;} \ else if(!preload_full_kernel && nb_split!=1 && !img_contiguous_2d && kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,false,true,true,false,true>;} \
else if(!preload_full_kernel && nb_split!=1 && !img_contiguous_2d && !kern_contiguous_2d){ f=conv_patch_stack<true,false,kern_wid,false,false,true,false>;} else if(!preload_full_kernel && nb_split!=1 && !img_contiguous_2d && !kern_contiguous_2d && subsample){ f=conv_patch_stack<true,false,kern_wid,false,false,true,false,true>;} \
else if(preload_full_kernel && nb_split==1 && img_contiguous_2d && kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,true,true,false,true,false>;} \
else if(preload_full_kernel && nb_split==1 && img_contiguous_2d && !kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,true,false,false,true,false>;} \
else if(preload_full_kernel && nb_split==1 && !img_contiguous_2d && kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,false,true,false,true,false>;}\
else if(preload_full_kernel && nb_split==1 && !img_contiguous_2d && !kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,false,false,false,true,false>;}\
else if(preload_full_kernel && nb_split!=1 && img_contiguous_2d && kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,true,true,true,true,false>;}\
else if(preload_full_kernel && nb_split!=1 && img_contiguous_2d && !kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,true,false,true,true,false>;}\
else if(preload_full_kernel && nb_split!=1 && !img_contiguous_2d && kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,false,true,true,true,false>;}\
else if(preload_full_kernel && nb_split!=1 && !img_contiguous_2d && !kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,false,false,true,true,false>;}\
else if(!preload_full_kernel && nb_split==1 && img_contiguous_2d && kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,true,true,false,false,false>;}\
else if(!preload_full_kernel && nb_split==1 && img_contiguous_2d && !kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,true,false,false,false,false>;}\
else if(!preload_full_kernel && nb_split==1 && !img_contiguous_2d && kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,false,true,false,false,false>;}\
else if(!preload_full_kernel && nb_split==1 && !img_contiguous_2d && !kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,false,false,false,false,false>;}\
else if(!preload_full_kernel && nb_split!=1 && img_contiguous_2d && kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,true,true,true,false,false>;} \
else if(!preload_full_kernel && nb_split!=1 && img_contiguous_2d && !kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,true,false,true,false,false>;} \
else if(!preload_full_kernel && nb_split!=1 && !img_contiguous_2d && kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,false,true,true,false,false>;} \
else if(!preload_full_kernel && nb_split!=1 && !img_contiguous_2d && !kern_contiguous_2d && !subsample){ f=conv_patch_stack<true,false,kern_wid,false,false,true,false,false>;}
CONV_PATCH_STACK_SPECIAL(THEANO_KERN_WID); CONV_PATCH_STACK_SPECIAL(THEANO_KERN_WID);
f<<< grid, threads, shared_size>>> f<<< grid, threads, shared_size>>>
(img->devdata, kern->devdata, out->devdata, (img->devdata, kern->devdata, out->devdata,
img_len, img_wid, kern_len, kern_wid, nkern, nstack, img_len, img_wid, kern_len, kern_wid,
out_len, out_wid, nkern, nstack,
img_stride_col, img_stride_row, img_stride_stack, img_stride_col, img_stride_row, img_stride_stack,
img_stride_batch, kern_stride_col, kern_stride_row, img_stride_batch, kern_stride_col, kern_stride_row,
kern_stride_stack, kern_stride_nkern); kern_stride_stack, kern_stride_nkern, subsample_rows, subsample_cols);
CNDA_THREAD_SYNC; CNDA_THREAD_SYNC;
cudaError_t sts = cudaGetLastError(); cudaError_t sts = cudaGetLastError();
...@@ -226,13 +243,15 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -226,13 +243,15 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
fprintf(stderr, fprintf(stderr,
"threads.x=%i, threads.y=%i, grid.x=%i, grid.y=%i, shared_size=%i, nb_threads=%i," "threads.x=%i, threads.y=%i, grid.x=%i, grid.y=%i, shared_size=%i, nb_threads=%i,"
" kern_flipped=true, accumulate=false, kern_width=%i, img_c_contiguous_2d=%i," " kern_flipped=true, accumulate=false, kern_width=%i, img_c_contiguous_2d=%i,"
" kern_c_contiguous_2d=%i, nb_split=%i, preload_full_kernel=%i\n", " kern_c_contiguous_2d=%i, nb_split=%i, preload_full_kernel=%i,",
" subsample_rows=%i, subsample_cols=%i\n",
threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y, threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y,
THEANO_KERN_WID, img_contiguous_2d, kern_contiguous_2d, THEANO_KERN_WID, img_contiguous_2d, kern_contiguous_2d,
nb_split, preload_full_kernel); nb_split, preload_full_kernel, subsample_rows, subsample_cols);
if (verbose) fprintf(stderr, if (verbose) fprintf(stderr,
"INFO: used 'conv_patch_stack' version with nb_split=%i and preload_full_kernel=%i\n", "INFO: used 'conv_patch_stack' version with nb_split=%i and preload_full_kernel=%i,"
nb_split,preload_full_kernel); " subsample_rows=%i, subsample_cols=%i\n",
nb_split,preload_full_kernel, subsample_rows, subsample_cols);
work_complete = true; work_complete = true;
} }
else else
...@@ -240,10 +259,11 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -240,10 +259,11 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
if (verbose) if (verbose)
fprintf(stderr, "threads.x=%i, threads.y=%i, grid.x=%i, grid.y=%i, shared_size=%i, nb_threads=%i," fprintf(stderr, "threads.x=%i, threads.y=%i, grid.x=%i, grid.y=%i, shared_size=%i, nb_threads=%i,"
" kern_flipped=true, accumulate=false, kern_width=%i, img_c_contiguous_2d=%i," " kern_flipped=true, accumulate=false, kern_width=%i, img_c_contiguous_2d=%i,"
" kern_c_contiguous_2d=%i, nb_split=%i, preload_full_kernel=%i\n", " kern_c_contiguous_2d=%i, nb_split=%i, preload_full_kernel=%i,",
" subsample_rows=%i, subsample_cols=%i\n",
threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y, threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y,
THEANO_KERN_WID, img_contiguous_2d, kern_contiguous_2d, THEANO_KERN_WID, img_contiguous_2d, kern_contiguous_2d,
nb_split, preload_full_kernel); nb_split, preload_full_kernel, subsample_rows, subsample_cols);
if (verbose) fprintf(stderr, "INFO: impl 'conv_patch_stack' failed (%s), trying next implementation\n", if (verbose) fprintf(stderr, "INFO: impl 'conv_patch_stack' failed (%s), trying next implementation\n",
cudaGetErrorString(sts)); cudaGetErrorString(sts));
} }
......
...@@ -280,6 +280,8 @@ conv_patch( float* img, float* kern, float* out, ...@@ -280,6 +280,8 @@ conv_patch( float* img, float* kern, float* out,
* *
* nkern: the number of kernel, used to compute the output image to store the result * nkern: the number of kernel, used to compute the output image to store the result
* nstack: the size of the stack, used to compute the image to load. * nstack: the size of the stack, used to compute the image to load.
* dx: patch stride rows(1 for normal convolution)
* dy: patch stride cols(1 for normal convolution)
* template flipped_kern: if true, we "flip" the kernel as in a real convolution, else we don't * template flipped_kern: if true, we "flip" the kernel as in a real convolution, else we don't
* template accumulate: if true, we add the result, else we override the result * template accumulate: if true, we add the result, else we override the result
* template KERN_WIDTH: if 0, will work for any kern_wid, else it specialyse to this kern_wid as an optimization * template KERN_WIDTH: if 0, will work for any kern_wid, else it specialyse to this kern_wid as an optimization
...@@ -287,19 +289,19 @@ conv_patch( float* img, float* kern, float* out, ...@@ -287,19 +289,19 @@ conv_patch( float* img, float* kern, float* out,
* template kern_c_contiguous_2d: if true, the kernel have are collon and row contiguous * template kern_c_contiguous_2d: if true, the kernel have are collon and row contiguous
* template split: if true, each thread generate more then 1 output pixel, but use more registers. * template split: if true, each thread generate more then 1 output pixel, but use more registers.
* template preload_full_kern: if true, we load the full kernel in shared memory, else, we load 1 row at a time. * template preload_full_kern: if true, we load the full kernel in shared memory, else, we load 1 row at a time.
* template subsample: if false, remove some computation needed when dx or dy!=1.
*/ */
template<bool flipped_kern, bool accumulate, int KERN_WIDTH, bool img_c_contiguous_2d, bool kern_c_contiguous_2d, bool split, bool preload_full_kern> template<bool flipped_kern, bool accumulate, int KERN_WIDTH, bool img_c_contiguous_2d, bool kern_c_contiguous_2d, bool split, bool preload_full_kern, bool subsample>
__global__ void __global__ void
conv_patch_stack( float* img, float* kern, float* out, conv_patch_stack( float* img, float* kern, float* out,
int img_len, int img_wid, int kern_len, int kern_wid, int img_len, int img_wid, int kern_len, int kern_wid,
int out_len, int out_wid,
int nkern, int nstack, int img_stride_col,int img_stride_row, int nkern, int nstack, int img_stride_col,int img_stride_row,
int img_stride_stack, int img_stride_batch, int img_stride_stack, int img_stride_batch,
int kern_stride_col, int kern_stride_row, int kern_stride_col, int kern_stride_row,
int kern_stride_stack, int kern_stride_nkern) int kern_stride_stack, int kern_stride_nkern, int dx, int dy)
{ {
int __shared__ out_len, out_wid, nb_thread_id; int __shared__ nb_thread_id;
out_len = img_len - kern_len + 1;
out_wid = img_wid - kern_wid + 1;
nb_thread_id = blockDim.z*blockDim.y*blockDim.x; nb_thread_id = blockDim.z*blockDim.y*blockDim.x;
extern __shared__ float s_data[]; extern __shared__ float s_data[];
...@@ -346,7 +348,11 @@ conv_patch_stack( float* img, float* kern, float* out, ...@@ -346,7 +348,11 @@ conv_patch_stack( float* img, float* kern, float* out,
const float* idx_kern; const float* idx_kern;
if(preload_full_kern) idx_kern=&d_kern[row*kern_wid]; if(preload_full_kern) idx_kern=&d_kern[row*kern_wid];
else idx_kern=d_kern; else idx_kern=d_kern;
const float* idx_in=&d_img[(row+out_row)*img_wid+out_col]; const float* idx_in;
if(subsample)
idx_in=&d_img[(row+out_row*dx)*img_wid+out_col*dy];
else
idx_in=&d_img[(row+out_row)*img_wid+out_col];
convolutionRowNoFlip<KERN_WIDTH>(sum,idx_in,idx_kern,kern_wid); convolutionRowNoFlip<KERN_WIDTH>(sum,idx_in,idx_kern,kern_wid);
} }
...@@ -368,7 +374,7 @@ conv_patch_stack( float* img, float* kern, float* out, ...@@ -368,7 +374,7 @@ conv_patch_stack( float* img, float* kern, float* out,
//TODO: inverse the out_row and stack loop to don't load the date as frequently! //TODO: inverse the out_row and stack loop to don't load the date as frequently!
//TODO: do this happen elsewhere? //TODO: do this happen elsewhere?
for(int out_row=ty;out_row<out_len_max;out_row+=blockDim.y){ for(;out_row<out_len_max;out_row+=blockDim.y){
float sum = 0.0f; float sum = 0.0f;
for (int stack = 0;stack<nstack;stack++){ for (int stack = 0;stack<nstack;stack++){
//TODO: load only the part of the image needed or put the partial result in shared memory //TODO: load only the part of the image needed or put the partial result in shared memory
...@@ -397,7 +403,11 @@ conv_patch_stack( float* img, float* kern, float* out, ...@@ -397,7 +403,11 @@ conv_patch_stack( float* img, float* kern, float* out,
const float* idx_kern; const float* idx_kern;
if(preload_full_kern) idx_kern=&d_kern[row*kern_wid]; if(preload_full_kern) idx_kern=&d_kern[row*kern_wid];
else idx_kern=d_kern; else idx_kern=d_kern;
const float* idx_in=&d_img[(row+out_row)*img_wid+out_col]; const float* idx_in;
if(subsample)
idx_in=&d_img[(row+out_row*dx)*img_wid+out_col*dy];
else
idx_in=&d_img[(row+out_row)*img_wid+out_col];
//if needed as on Fermi as reading out of bound index from shared memory generate an error. //if needed as on Fermi as reading out of bound index from shared memory generate an error.
//Not needed on generation before as they worked anyway. Removing the if generate the good code //Not needed on generation before as they worked anyway. Removing the if generate the good code
......
...@@ -282,8 +282,7 @@ def get_valid_shapes(): ...@@ -282,8 +282,7 @@ def get_valid_shapes():
shapes += get_shapes2(scales_img=(2,2),img_stride=(-1,-1)) shapes += get_shapes2(scales_img=(2,2),img_stride=(-1,-1))
shapes += get_shapes2(scales_img=(2,2),kern_stride=(-1,-1)) shapes += get_shapes2(scales_img=(2,2),kern_stride=(-1,-1))
#test subsample #test subsample done in a separate fct
shapes += get_shapes2(scales_img=(2,2),subsample=(2,2))
shapes += [ shapes += [
#other test #other test
...@@ -502,8 +501,7 @@ def test_full(): ...@@ -502,8 +501,7 @@ def test_full():
shapes += get_shapes2(scales_img=(2,2),img_stride=(-1,-1)) shapes += get_shapes2(scales_img=(2,2),img_stride=(-1,-1))
shapes += get_shapes2(scales_img=(2,2),kern_stride=(-1,-1)) shapes += get_shapes2(scales_img=(2,2),kern_stride=(-1,-1))
#test subsample #test subsample done in a separate fct
shapes += get_shapes2(scales_img=(2,2),subsample=(2,2))
shapes += [ shapes += [
#other test #other test
...@@ -552,22 +550,32 @@ def test_full(): ...@@ -552,22 +550,32 @@ def test_full():
def test_subsample(): def test_subsample():
# implement when # implement when
shapes = [ shapes = [
((1, 1, 1, 1), (1, 1, 1, 1), (1,1)) ((1, 1, 1, 1), (1, 1, 1, 1), (1,1), (1,1), (1,1))
, ((1, 1, 1, 1), (1, 1, 1, 1), (2,2)) , ((1, 1, 1, 1), (1, 1, 1, 1), (2,2), (1,1), (1,1))
, ((4, 2, 10, 10), (3, 2, 2, 2), (1, 3)) , ((4, 2, 10, 10), (3, 2, 2, 2), (1, 3), (1,1), (1,1))
, ((4, 2, 10, 10), (3, 2, 2, 2), (3, 3)) , ((4, 2, 10, 10), (3, 2, 2, 2), (3, 3), (1,1), (1,1))
, ((4, 2, 10, 10), (3, 2, 2, 2), (3, 1)) , ((4, 2, 10, 10), (3, 2, 2, 2), (3, 1), (1,1), (1,1))
] ]
all_good = True shapes += get_shapes2(scales_img=(2,2),subsample=(1,1))
shapes += get_shapes2(scales_img=(2,2),subsample=(1,2))
_params_allgood_header() shapes += get_shapes2(scales_img=(2,2),subsample=(2,1))
for ishape, kshape, ds in shapes: shapes += get_shapes2(scales_img=(2,2),subsample=(2,2))
if not _params_allgood(ishape, kshape, 'full', subsample=ds):
all_good = False #We put only the version that implement the subsample to make the test faster.
if not _params_allgood(ishape, kshape, 'valid', subsample=ds): version_valid = [-2,-1,1,3,11,12]
all_good = False version_full = [-2,-1]
assert all_good verbose = 0
random = True
print_ = False
ones = False
if ones:
random = False
#test
random = False
exec_conv(version_valid, shapes, verbose, random, 'valid', print_=print_, ones=ones)
exec_conv(version_full, shapes, verbose, random, 'full', print_=print_, ones=ones)
## See #616 ## See #616
#def test_logical_shapes(): #def test_logical_shapes():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论