提交 1e3de2ce authored 作者: Arjun Jain's avatar Arjun Jain

Changes suggested by Fred

上级 74ea01ac
...@@ -76,7 +76,7 @@ void im2col(const float* data_im, const int channels, ...@@ -76,7 +76,7 @@ void im2col(const float* data_im, const int channels,
// Author: Arjun Jain // Author: Arjun Jain
CudaNdarray* validMM(const CudaNdarray *input, CudaNdarray* corrMM(const CudaNdarray *input,
CudaNdarray *weight, CudaNdarray *weight,
CudaNdarray *output, CudaNdarray *output,
int padding = 0) int padding = 0)
......
...@@ -830,46 +830,47 @@ def test_gemm(): ...@@ -830,46 +830,47 @@ def test_gemm():
input: (batch size, channels, rows, columns) input: (batch size, channels, rows, columns)
filters: (number of filters, channels, rows, columns) filters: (number of filters, channels, rows, columns)
""" """
for bs in range(1, 5): for mode in ['valid', 'full']:
for ch in range(1,4): print 'Testing mode: ' + mode
for nf in range(1,4): for bs in range(1, 5):
for rImg in range(5, 9): for ch in range(1,4):
for rFlt in range(2, 4): for nf in range(1,4):
ishape = (bs, ch, rImg, rImg) for rImg in range(5, 9):
kshape = (nf, ch, rFlt, rFlt) for rFlt in range(2, 4):
print "ishape: ", ishape ishape = (bs, ch, rImg, rImg)
print "kshape: ", kshape kshape = (nf, ch, rFlt, rFlt)
mode = 'full' print "ishape: ", ishape
subsample = (1, 1) print "kshape: ", kshape
subsample = (1, 1)
npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
npy_kern = theano._asarray(numpy.random.rand(*kshape), dtype='float32')
i = cuda_tensor4()
k = cuda_tensor4()
t2 = None
t0 = time.time()
cpuval = py_conv(npy_img, npy_kern, mode, subsample)
t1 = time.time()
op = theano.sandbox.cuda.blas.GpuConvMM(border_mode=mode)(i, k) npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
f = theano.function([i, k], op, mode=theano_mode) npy_kern = theano._asarray(numpy.random.rand(*kshape), dtype='float32')
for k in range(npy_kern.shape[0]):
for s in range(npy_kern.shape[1]):
npy_kern[k,s,:,:] = numpy.rot90(npy_kern[k,s,:,:], 2)
gpuval = f(npy_img, npy_kern) i = cuda_tensor4()
k = cuda_tensor4()
t2 = time.time() t2 = None
gpuval = numpy.asarray(gpuval) t0 = time.time()
rval = numpy.allclose(cpuval, gpuval, rtol=1e-4) cpuval = py_conv(npy_img, npy_kern, mode, subsample)
assert (rval == True)
print 'Test Passed' t1 = time.time()
op = theano.sandbox.cuda.blas.GpuConvMM(border_mode=mode)(i, k)
f = theano.function([i, k], op, mode=theano_mode)
for k in range(npy_kern.shape[0]):
for s in range(npy_kern.shape[1]):
npy_kern[k,s,:,:] = numpy.rot90(npy_kern[k,s,:,:], 2)
gpuval = f(npy_img, npy_kern)
t2 = time.time()
gpuval = numpy.asarray(gpuval)
rval = numpy.allclose(cpuval, gpuval, rtol=1e-4)
assert (rval == True)
print 'Test Passed'
def benchmark(): def benchmark():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论