提交 925a4eb6 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merged

...@@ -421,7 +421,7 @@ class GpuDownsampleFactorMax(Op): ...@@ -421,7 +421,7 @@ class GpuDownsampleFactorMax(Op):
#def perform(self, node, input_storage, output_storage): #def perform(self, node, input_storage, output_storage):
#raise NotImplementedError('only C is implemented') #raise NotImplementedError('only C is implemented')
def c_code_cache_version(self): def c_code_cache_version(self):
return () return (1)
def c_code(self, node, nodename, (x,), (z,), sub): def c_code(self, node, nodename, (x,), (z,), sub):
fail = sub['fail'] fail = sub['fail']
ds0, ds1 = self.ds ds0, ds1 = self.ds
......
...@@ -521,6 +521,7 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -521,6 +521,7 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
} }
if (1 && (version==6||version==-1) && if (1 && (version==6||version==-1) &&
kern_len<=320 &&
!work_complete) //conv_valid_row_reduce !work_complete) //conv_valid_row_reduce
{ {
int outsize = CudaNdarray_SIZE(out); int outsize = CudaNdarray_SIZE(out);
......
import sys, time import sys, time
import numpy import numpy
from nose.plugins.skip import SkipTest
imported_scipy_convolve2d = False
try:
from scipy.signal import convolve2d
imported_scipy_convolve2d = True
except ImportError:
pass
import theano import theano
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
import theano.sandbox.cuda as cuda_ndarray import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available == False: if cuda_ndarray.cuda_available == False:
raise SkipTest('Optional package cuda disabled') raise SkipTest('Optional package cuda disabled')
...@@ -38,9 +46,23 @@ def py_conv_full_numpy(img, kern): ...@@ -38,9 +46,23 @@ def py_conv_full_numpy(img, kern):
pad_cols = 2*(kern.shape[3]-1) + img.shape[3] pad_cols = 2*(kern.shape[3]-1) + img.shape[3]
padded_img = numpy.zeros((img.shape[0], img.shape[1], pad_rows, pad_cols), dtype=img.dtype) padded_img = numpy.zeros((img.shape[0], img.shape[1], pad_rows, pad_cols), dtype=img.dtype)
padded_img[:,:,kern.shape[2]-1:kern.shape[2]-1+img.shape[2],kern.shape[3]-1:kern.shape[3]-1+img.shape[3]] = img padded_img[:,:,kern.shape[2]-1:kern.shape[2]-1+img.shape[2],kern.shape[3]-1:kern.shape[3]-1+img.shape[3]] = img
return py_conv_valid(padded_img, kern) return py_conv_valid_numpy(padded_img, kern)
def py_conv(img, kern, mode, subsample):
"""
use a scipy or numpy implementation depending is scipy is available.
The scipy version is faster.
"""
if imported_scipy_convolve2d:
return py_conv_scipy(img, kern, mode, subsample)
elif mode=='valid':
return py_conv_valid_numpy(img,kern)[:,:,::subsample[0],::subsample[1]]
elif mode=='full':
return py_conv_full_numpy(img,kern)[:,:,::subsample[0],::subsample[1]]
else:
raise Exception("Can't execute this kernel.")
def py_conv_scipy(img, kern, mode, subsample): def py_conv_scipy(img, kern, mode, subsample):
from scipy.signal import convolve2d
assert img.shape[1] == kern.shape[1] assert img.shape[1] == kern.shape[1]
if mode == 'valid': if mode == 'valid':
outshp = (img.shape[0], kern.shape[0], outshp = (img.shape[0], kern.shape[0],
...@@ -89,7 +111,7 @@ def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1), ker ...@@ -89,7 +111,7 @@ def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1), ker
rval = True rval = True
try: try:
t0 = time.time() t0 = time.time()
cpuval = py_conv_scipy(npy_img, npy_kern, mode, subsample) cpuval = py_conv(npy_img, npy_kern, mode, subsample)
t1 = time.time() t1 = time.time()
i = cuda_tensor4() i = cuda_tensor4()
k = cuda_tensor4() k = cuda_tensor4()
...@@ -550,7 +572,7 @@ def _test_dummy(): ...@@ -550,7 +572,7 @@ def _test_dummy():
rval = True rval = True
t0 = time.time() t0 = time.time()
cpuval = py_conv_scipy(npy_img, npy_kern, mode, subsample) cpuval = py_conv(npy_img, npy_kern, mode, subsample)
t1 = time.time() t1 = time.time()
gpuval = cuda_ndarray.conv(img, kern, mode, subsample) gpuval = cuda_ndarray.conv(img, kern, mode, subsample)
t2 = time.time() t2 = time.time()
......
...@@ -252,13 +252,13 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -252,13 +252,13 @@ class GpuImages2Neibs(Images2Neibs):
dtype=ten4.type.dtype)()]) dtype=ten4.type.dtype)()])
def c_code_cache_version(self): def c_code_cache_version(self):
return () return (6,)
return (2,)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
if self.mode=="valid": mode = self.mode
return """ return """
static __global__ void k_multi_warp_%(nodename)s( //a version that use less register but don't work in all case.
static __global__ void k_multi_warp_less_%(nodename)s(
const int nb_batch, const int nb_batch,
const int nb_stack, const int nb_stack,
const int height, const int height,
...@@ -274,8 +274,10 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -274,8 +274,10 @@ class GpuImages2Neibs(Images2Neibs):
float * global_out float * global_out
) )
{ {
const int wrap_centered_idx_shift_x = c/2;
const int wrap_centered_idx_shift_y = d/2;
for(int tblock = blockIdx.x;tblock<nb_batch*nb_stack*grid_c*grid_d;tblock+=gridDim.x){ for(int tblock = blockIdx.x*blockDim.z+threadIdx.z;tblock<nb_batch*nb_stack*grid_c*grid_d;tblock+=gridDim.x*blockDim.z){
const int b = tblock%%grid_d; const int b = tblock%%grid_d;
int left = tblock/grid_d; int left = tblock/grid_d;
const int a = left%%grid_c; const int a = left%%grid_c;
...@@ -289,12 +291,23 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -289,12 +291,23 @@ class GpuImages2Neibs(Images2Neibs):
if(a>grid_c)continue; if(a>grid_c)continue;
if(b>grid_d)continue; if(b>grid_d)continue;
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n)); int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
for (int i = 0; i < c; i++) // loop over c int i = threadIdx.y; // loop over c
{ {
int ten4_2 = i + a * step_x; int ten4_2 = i + a * step_x;
for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d if("%(mode)s"=="wrap_centered"){
ten4_2 -= wrap_centered_idx_shift_x;
if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height;
}
int j = threadIdx.x; // loop over d
{ {
int ten4_3 = j + b * step_y; int ten4_3 = j + b * step_y;
if("%(mode)s"=="wrap_centered"){
ten4_3 -= wrap_centered_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width;
}
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n)); //int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n)); //int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n; int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
...@@ -307,9 +320,6 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -307,9 +320,6 @@ class GpuImages2Neibs(Images2Neibs):
} }
} }
""" % locals()
if self.mode=="wrap_centered":
return """
static __global__ void k_multi_warp_%(nodename)s( static __global__ void k_multi_warp_%(nodename)s(
const int nb_batch, const int nb_batch,
const int nb_stack, const int nb_stack,
...@@ -329,7 +339,7 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -329,7 +339,7 @@ class GpuImages2Neibs(Images2Neibs):
const int wrap_centered_idx_shift_x = c/2; const int wrap_centered_idx_shift_x = c/2;
const int wrap_centered_idx_shift_y = d/2; const int wrap_centered_idx_shift_y = d/2;
for(int tblock = blockIdx.x;tblock<nb_batch*nb_stack*grid_c*grid_d;tblock+=gridDim.x){ for(int tblock = blockIdx.x*blockDim.z+threadIdx.z;tblock<nb_batch*nb_stack*grid_c*grid_d;tblock+=gridDim.x*blockDim.z){
const int b = tblock%%grid_d; const int b = tblock%%grid_d;
int left = tblock/grid_d; int left = tblock/grid_d;
const int a = left%%grid_c; const int a = left%%grid_c;
...@@ -343,19 +353,23 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -343,19 +353,23 @@ class GpuImages2Neibs(Images2Neibs):
if(a>grid_c)continue; if(a>grid_c)continue;
if(b>grid_d)continue; if(b>grid_d)continue;
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n)); int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
for (int i = 0; i < c; i++) // loop over c for (int i = threadIdx.y; i < c; i+=blockDim.y) // loop over c
{ {
int ten4_2 = i + a * step_x; int ten4_2 = i + a * step_x;
if("%(mode)s"=="wrap_centered"){
ten4_2 -= wrap_centered_idx_shift_x; ten4_2 -= wrap_centered_idx_shift_x;
if ( ten4_2 < 0 ) ten4_2 += height; if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height; else if (ten4_2 >= height) ten4_2 -= height;
}
for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d
{ {
int ten4_3 = j + b * step_y; int ten4_3 = j + b * step_y;
if("%(mode)s"=="wrap_centered"){
ten4_3 -= wrap_centered_idx_shift_y; ten4_3 -= wrap_centered_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width; if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width; else if (ten4_3 >= width) ten4_3 -= width;
}
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n)); //int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n)); //int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n; int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
...@@ -370,7 +384,6 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -370,7 +384,6 @@ class GpuImages2Neibs(Images2Neibs):
""" % locals() """ % locals()
def c_code(self, node, name, (ten4, neib_shape, neib_step), (z,), sub): def c_code(self, node, name, (ten4, neib_shape, neib_step), (z,), sub):
fail = sub['fail'] fail = sub['fail']
mode = self.mode mode = self.mode
...@@ -473,17 +486,36 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -473,17 +486,36 @@ class GpuImages2Neibs(Images2Neibs):
const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0); const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1); const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
dim3 n_threads(d,c,1);
//Their is a max of 512 threads per blocks
while(n_threads.x*n_threads.y>512 && n_threads.y>1)n_threads.y--;
while(n_threads.x*n_threads.y>512 && n_threads.x>1)n_threads.x--;
//Make bigger block to have better memory access pattern and a higher core utilisation.
//for smaller patch size
while(c*d*(n_threads.z+1) < 128 && n_threads.z<64 && n_threads.z<CudaNdarray_HOST_DIMS(%(z)s)[0]){
n_threads.z++;
}
int nb_block; int nb_block;
if (nb_batch %% 32 == 0) if (CudaNdarray_HOST_DIMS(%(z)s)[0] %% n_threads.z == 0)
nb_block = nb_batch/32; nb_block = CudaNdarray_HOST_DIMS(%(z)s)[0] / n_threads.z;
else else
nb_block = (int)((float)nb_batch/32. + 1.); nb_block = (CudaNdarray_HOST_DIMS(%(z)s)[0] / n_threads.z) + 1;
dim3 n_blocks(std::min(32*1024,nb_block));
dim3 n_blocks(std::min(32*1024,CudaNdarray_HOST_DIMS(%(z)s)[0]),1,1);
dim3 n_threads(32,1,1);
int n_shared = 0; int n_shared = 0;
k_multi_warp_%(name)s<<<n_blocks, n_threads, n_shared>>>( void (*f)(int, int, int ,int,
int, int, int ,int,
int, int,
int, int, int, int,
float*, float*);
if(n_threads.x==d && n_threads.y==c){
f = k_multi_warp_less_%(name)s;
}else{
f = k_multi_warp_%(name)s;
}
f<<<n_blocks, n_threads, n_shared>>>(
nb_batch, nb_batch,
nb_stack, nb_stack,
height, width, height, width,
......
...@@ -278,10 +278,14 @@ def test_neibs_wrap_centered_step_manual(): ...@@ -278,10 +278,14 @@ def test_neibs_wrap_centered_step_manual():
def test_neibs_gpu(): def test_neibs_gpu():
if cuda.cuda_available == False: if cuda.cuda_available == False:
raise SkipTest('Optional package cuda disabled') raise SkipTest('Optional package cuda disabled')
for shape, pshape in [((100,40,18,18),(2,2)),
((100,40,6,18),(3,2)),
((10,40,66,66),(33,33)),
((10,40,68,66),(34,33))
]:
shape = (100,40,18,18)
images = shared(numpy.arange(numpy.prod(shape), dtype='float32').reshape(shape)) images = shared(numpy.arange(numpy.prod(shape), dtype='float32').reshape(shape))
neib_shape = T.as_tensor_variable((2,2))#(array((2,2), dtype='float32')) neib_shape = T.as_tensor_variable(pshape)
from theano.sandbox.cuda.basic_ops import gpu_from_host from theano.sandbox.cuda.basic_ops import gpu_from_host
......
...@@ -12,7 +12,7 @@ import numpy, theano ...@@ -12,7 +12,7 @@ import numpy, theano
#from copy import copy as python_copy #from copy import copy as python_copy
from theano import gof, shared from theano import gof, shared
from theano.gof import Variable, Op, utils, Type, Constant, Value from theano.gof import Variable, Op, Type, Constant, Value
from theano.tensor.tsor_apply import Apply from theano.tensor.tsor_apply import Apply
from theano import gradient from theano import gradient
...@@ -21,7 +21,7 @@ import elemwise ...@@ -21,7 +21,7 @@ import elemwise
from theano import scalar as scal from theano import scalar as scal
from theano.gof.python25 import partial, any, all from theano.gof.python25 import partial, any, all
from theano import compile, printing from theano import compile, printing
from theano.printing import pprint, Print from theano.printing import pprint
### set up the external interface ### set up the external interface
from elemwise import Elemwise, DimShuffle, CAReduce, Sum from elemwise import Elemwise, DimShuffle, CAReduce, Sum
......
...@@ -18,6 +18,16 @@ from theano import gof, Op, tensor, config ...@@ -18,6 +18,16 @@ from theano import gof, Op, tensor, config
from theano.tensor.tsor_apply import Apply from theano.tensor.tsor_apply import Apply
from theano.gof.python25 import any from theano.gof.python25 import any
imported_scipy_signal = False
try:
# TODO: move these back out to global scope when they no longer cause an atexit error
from scipy.signal.signaltools import _valfrommode, _bvalfromboundary
from scipy.signal.sigtools import _convolve2d
imported_scipy_signal = True
except ImportError:
pass
_logger=logging.getLogger("theano.signal.conv") _logger=logging.getLogger("theano.signal.conv")
def _debug(*msg): def _debug(*msg):
_logger.debug(' '.join([ str(x) for x in msg])) _logger.debug(' '.join([ str(x) for x in msg]))
...@@ -547,9 +557,12 @@ class ConvOp(Op): ...@@ -547,9 +557,12 @@ class ConvOp(Op):
""" """
By default if len(img2d.shape)==3, we By default if len(img2d.shape)==3, we
""" """
if not imported_scipy_signal:
raise theano.gof.utils.MethodNotDefined(
"c_headers", type(self), self.__class__.__name__,
"Need the python package for scipy.signal to be installed for the python implementation. You can use the C implementation instead.")
# TODO: move these back out to global scope when they no longer cause an atexit error # TODO: move these back out to global scope when they no longer cause an atexit error
from scipy.signal.signaltools import _valfrommode, _bvalfromboundary
from scipy.signal.sigtools import _convolve2d
imshp = self.imshp imshp = self.imshp
if imshp is None or any([x is None for x in imshp]): if imshp is None or any([x is None for x in imshp]):
imshp = tuple(img2d.shape[1:]) imshp = tuple(img2d.shape[1:])
...@@ -584,8 +597,6 @@ class ConvOp(Op): ...@@ -584,8 +597,6 @@ class ConvOp(Op):
z[0] = numpy.zeros((bsize,)+(nkern,)+fulloutshp, z[0] = numpy.zeros((bsize,)+(nkern,)+fulloutshp,
dtype=img2d.dtype) dtype=img2d.dtype)
zz=z[0] zz=z[0]
val = _valfrommode(self.out_mode)
bval = _bvalfromboundary('fill')
stacklen = imshp[0] stacklen = imshp[0]
...@@ -616,6 +627,9 @@ class ConvOp(Op): ...@@ -616,6 +627,9 @@ class ConvOp(Op):
filtersflipped = buf filtersflipped = buf
del buf, rstride, cstride del buf, rstride, cstride
val = _valfrommode(self.out_mode)
bval = _bvalfromboundary('fill')
for b in range(bsize): for b in range(bsize):
for n in range(nkern): for n in range(nkern):
zz[b,n,...].fill(0) zz[b,n,...].fill(0)
...@@ -623,6 +637,25 @@ class ConvOp(Op): ...@@ -623,6 +637,25 @@ class ConvOp(Op):
zz[b,n,...] += _convolve2d(\ zz[b,n,...] += _convolve2d(\
img2d[b,im0,...], filtersflipped[n,im0,...],1,val, bval, 0) img2d[b,im0,...], filtersflipped[n,im0,...],1,val, bval, 0)
if False:
if False and self.out_mode=="full":
img2d2 = numpy.zeros((bsize,stacklen,
imshp[1]+2*kshp[0]-2,
imshp[2]+2*kshp[1]-2))
img2d2[:,:,kshp[0]-1:kshp[0]-1+imshp[1],
kshp[1]-1:kshp[1]-1+imshp[2]] = img2d
img2d = img2d2
#N_image_shape = image_data.shape
for b in range(bsize):
for n in range(nkern):
zz[b,n,...].fill(0)
for im0 in range(stacklen):
for row in range(0,zz.shape[2],self.dx):
for col in range(0,zz.shape[3],self.dy):
zz[b,n,row,col] += (img2d[b,im0,row:row+kshp[0],col:col+kshp[1]]*\
filtersflipped[n,im0,::-1,::-1]).sum()
#We copy it to remove the Stride mismatch warning from DEBUG_MODE. #We copy it to remove the Stride mismatch warning from DEBUG_MODE.
#The copy make that we return an object with the same stride as the c version. #The copy make that we return an object with the same stride as the c version.
#The copy don't affect the performence during our experience as in that case we #The copy don't affect the performence during our experience as in that case we
......
import sys, time, unittest import sys, time, unittest
import numpy import numpy
from scipy import signal
import theano import theano
import theano.tensor as T import theano.tensor as T
...@@ -60,6 +59,7 @@ class TestConv2D(unittest.TestCase): ...@@ -60,6 +59,7 @@ class TestConv2D(unittest.TestCase):
############# REFERENCE IMPLEMENTATION ############ ############# REFERENCE IMPLEMENTATION ############
s = 1. s = 1.
orig_image_data = image_data
if border_mode is not 'full': s = -1. if border_mode is not 'full': s = -1.
out_shape2d = numpy.array(N_image_shape[-2:]) +\ out_shape2d = numpy.array(N_image_shape[-2:]) +\
s*numpy.array(N_filter_shape[-2:]) - s s*numpy.array(N_filter_shape[-2:]) - s
...@@ -68,26 +68,41 @@ class TestConv2D(unittest.TestCase): ...@@ -68,26 +68,41 @@ class TestConv2D(unittest.TestCase):
ref_output = numpy.zeros(out_shape) ref_output = numpy.zeros(out_shape)
# loop over output feature maps # loop over output feature maps
for k in range(N_filter_shape[0]): ref_output.fill(0)
# loop over input feature maps if border_mode=='full':
for l in range(N_filter_shape[1]): image_data2 = numpy.zeros((N_image_shape[0],N_image_shape[1],
N_image_shape[2]+2*N_filter_shape[2]-2,
filter2d = filter_data[k,l,:,:] N_image_shape[3]+2*N_filter_shape[3]-2))
image_data2[:,:,N_filter_shape[2]-1:N_filter_shape[2]-1+N_image_shape[2],
# loop over mini-batches N_filter_shape[3]-1:N_filter_shape[3]-1+N_image_shape[3]] = image_data
for b in range(N_image_shape[0]): image_data = image_data2
image2d = image_data[b,l,:,:] N_image_shape = image_data.shape
output2d = signal.convolve2d(image2d, filter2d, border_mode) for bb in range(N_image_shape[0]):
for nn in range(N_filter_shape[0]):
ref_output[b,k,:,:] +=\ for im0 in range(N_image_shape[1]):
output2d[::subsample[0],::subsample[1]] filter2d = filter_data[nn,im0,:,:]
image2d = image_data[bb,im0,:,:]
for row in range(ref_output.shape[2]):
irow = row * subsample[0]#image row
for col in range(ref_output.shape[3]):
icol = col * subsample[1]#image col
ref_output[bb,nn,row,col] += (image2d[irow:irow+N_filter_shape[2],
icol:icol+N_filter_shape[3]]*filter2d[::-1,::-1]
).sum()
self.failUnless(_allclose(theano_output, ref_output)) self.failUnless(_allclose(theano_output, ref_output))
############# TEST GRADIENT ############ ############# TEST GRADIENT ############
if verify_grad: if verify_grad:
utt.verify_grad(sym_conv2d, [image_data, filter_data]) utt.verify_grad(sym_conv2d, [orig_image_data, filter_data])
def test_basic1(self):
"""
Tests that basic convolutions work for odd and even dimensions of image and filter
shapes, as well as rectangular images and filters.
"""
self.validate((2,2,3,3), (2,2,2,2), 'valid', verify_grad=False)
def test_basic(self): def test_basic(self):
""" """
......
import sys, time, unittest import sys, time, unittest
import numpy import numpy
from scipy import signal
import theano import theano
import theano.tensor as T import theano.tensor as T
...@@ -59,7 +58,13 @@ class TestSignalConv2D(unittest.TestCase): ...@@ -59,7 +58,13 @@ class TestSignalConv2D(unittest.TestCase):
image2d = image_data3d[b,:,:] image2d = image_data3d[b,:,:]
filter2d = filter_data3d[k,:,:] filter2d = filter_data3d[k,:,:]
output2d = signal.convolve2d(image2d, filter2d, 'valid') output2d = numpy.zeros(ref_output.shape)
for row in range(ref_output.shape[0]):
for col in range(ref_output.shape[1]):
output2d[row,col] += (image2d[row:row+filter2d.shape[0],
col:col+filter2d.shape[1]]*filter2d[::-1,::-1]
).sum()
self.failUnless(_allclose(theano_output4d[b,k,:,:], output2d)) self.failUnless(_allclose(theano_output4d[b,k,:,:], output2d))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论