提交 c649d668 authored 作者: Arjun Jain's avatar Arjun Jain

- fixed a bug in cafe conv (values of _M, _N, _K)

- added a test that does a check on variety of shapes and sizes of image and kernel - removed flip form local_conv_gemm in cuda/opt.py is the code never reached there for me - added flip in the kernel in the test code itself
上级 f18c8498
...@@ -165,7 +165,7 @@ CudaNdarray* validMM(const CudaNdarray *input, ...@@ -165,7 +165,7 @@ CudaNdarray* validMM(const CudaNdarray *input,
float alpha = 1.0f; float beta = 0.0f; float alpha = 1.0f; float beta = 0.0f;
int m = CudaNdarray_HOST_DIMS(columns)[1]; int m = CudaNdarray_HOST_DIMS(columns)[1];
int n = CudaNdarray_HOST_DIMS(weight)[1]; int n = CudaNdarray_HOST_DIMS(weight)[0];
int k = CudaNdarray_HOST_DIMS(columns)[0]; int k = CudaNdarray_HOST_DIMS(columns)[0];
status = cublasSgemm(handle, status = cublasSgemm(handle,
......
...@@ -1291,7 +1291,8 @@ def local_conv_gemm(node): ...@@ -1291,7 +1291,8 @@ def local_conv_gemm(node):
print "WARNING, YOU ARE USING BUGGED CODE!" print "WARNING, YOU ARE USING BUGGED CODE!"
img, kern = node.inputs img, kern = node.inputs
img = gpu_contiguous(img) img = gpu_contiguous(img)
kern = gpu_contiguous(kern[:, :, ::-1, ::-1]) #kern = kern[:, :, ::-1, ::-1]
kern = gpu_contiguous(kern)
return [GpuConvMM(node.op.border_mode)(img, kern)] return [GpuConvMM(node.op.border_mode)(img, kern)]
gpu_optimizer.register("conv_gemm", local_conv_gemm) gpu_optimizer.register("conv_gemm", local_conv_gemm)
......
...@@ -115,8 +115,8 @@ def py_conv_scipy(img, kern, mode, subsample): ...@@ -115,8 +115,8 @@ def py_conv_scipy(img, kern, mode, subsample):
for k in xrange(out.shape[1]): for k in xrange(out.shape[1]):
for s in xrange(img.shape[1]): for s in xrange(img.shape[1]):
out[b, k, :, :] += convolve2d(img[b, s, :, :], out[b, k, :, :] += convolve2d(img[b, s, :, :],
numpy.rot90(kern[k, s, :, :],2), kern[k, s, :, :],
mode) mode)
return out[:, :, ::subsample[0], ::subsample[1]] return out[:, :, ::subsample[0], ::subsample[1]]
...@@ -819,37 +819,53 @@ class TestConv2DGPU(unittest.TestCase): ...@@ -819,37 +819,53 @@ class TestConv2DGPU(unittest.TestCase):
def _test_dummy(): def _test_dummy():
ishape = (1, 1, 7, 7) """
kshape = (1, 1, 3, 3) input: (batch size, channels, rows, columns)
mode = 'valid' filters: (number of filters, channels, rows, columns)
subsample = (1, 1) """
npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
npy_kern = theano._asarray(numpy.random.rand(*kshape), dtype='float32')
i = cuda_tensor4()
k = cuda_tensor4()
#print >> sys.stdout, '_params_allgood trying ', ishape, kshape, mode
t2 = None
rval = True
t0 = time.time()
cpuval = py_conv(npy_img, npy_kern, mode, subsample)
t1 = time.time()
op = theano.sandbox.cuda.blas.GpuConvMM(border_mode=mode)(i, k)
f = theano.function([i, k], op, mode=theano_mode)
gpuval = f(npy_img, npy_kern)
t2 = time.time()
gpuval = numpy.asarray(gpuval) for bs in range(1, 5):
print gpuval for ch in range(1,4):
print '-------------------' for nf in range(1,4):
print cpuval for rImg in range(5, 9):
for rFlt in range(2, 3):
ishape = (bs, ch, rImg, rImg)
kshape = (nf, ch, rFlt, rFlt)
print "ishape: ", ishape
print "kshape: ", kshape
# ishape = (2, 1, 5, 5)
# kshape = (2, 1, 3, 3)
mode = 'valid'
subsample = (1, 1)
npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
npy_kern = theano._asarray(numpy.random.rand(*kshape), dtype='float32')
i = cuda_tensor4()
k = cuda_tensor4()
t2 = None
t0 = time.time()
cpuval = py_conv(npy_img, npy_kern, mode, subsample)
t1 = time.time()
op = theano.sandbox.cuda.blas.GpuConvMM(border_mode=mode)(i, k)
f = theano.function([i, k], op, mode=theano_mode)
for k in range(npy_kern.shape[0]):
for s in range(npy_kern.shape[1]):
npy_kern[k,s,:,:] = numpy.rot90(npy_kern[k,s,:,:], 2)
gpuval = f(npy_img, npy_kern)
t2 = time.time()
gpuval = numpy.asarray(gpuval)
rval = numpy.allclose(cpuval, gpuval, rtol=1e-4)
assert (rval == True)
print 'Test Passed'
def benchmark(): def benchmark():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论