提交 f44feaaa authored 作者: lamblin's avatar lamblin

Merge pull request #616 from nouiz/gpu_conv_faster

Gpu conv faster
...@@ -309,8 +309,15 @@ def use(device, ...@@ -309,8 +309,15 @@ def use(device,
device = 0 device = 0
try: try:
if device != 'gpu': if device != 'gpu':
assert isinstance(device, int)
gpu_init(device) gpu_init(device)
use.device_number = device use.device_number = device
else:
# This mean we let the driver select the GPU.
# But default it is always number 0.
# If the driver is in exclusive mode, it will always show
# device 0 event if it use something else.
use.device_number = 0
if test_driver: if test_driver:
import theano.sandbox.cuda.tests.test_driver import theano.sandbox.cuda.tests.test_driver
theano.sandbox.cuda.tests.test_driver.test_nvidia_driver1() theano.sandbox.cuda.tests.test_driver.test_nvidia_driver1()
......
...@@ -718,10 +718,11 @@ class GpuConv(GpuOp): ...@@ -718,10 +718,11 @@ class GpuConv(GpuOp):
node_ = copy.copy(node) node_ = copy.copy(node)
assert node.op is node_.op assert node.op is node_.op
if node_.op.max_threads_dim0 is None: if node_.op.max_threads_dim0 is None:
op = copy.copy(node_.op) cuda = theano.sandbox.cuda
device_id = theano.sandbox.cuda.use.device_number[3:] device_id = cuda.use.device_number
if device_id == '': if device_id is None:
device_id = 0 cuda.use("gpu", False, False, False, False, True)
device_id = cuda.use.device_number
cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray
prop = cuda_ndarray.device_properties(device_id) prop = cuda_ndarray.device_properties(device_id)
node_.op.max_threads_dim0 = prop['maxThreadsDim0'] node_.op.max_threads_dim0 = prop['maxThreadsDim0']
......
...@@ -35,9 +35,9 @@ device_id = theano.sandbox.cuda.use.device_number ...@@ -35,9 +35,9 @@ device_id = theano.sandbox.cuda.use.device_number
if device_id is None: if device_id is None:
cuda_ndarray.shared_constructor(numpy.zeros(2, dtype='float32')) cuda_ndarray.shared_constructor(numpy.zeros(2, dtype='float32'))
device_id = theano.sandbox.cuda.use.device_number device_id = theano.sandbox.cuda.use.device_number
device_id = device_id[3:] if device_id is None:
if device_id == '': cuda.use("gpu", False, False, False, False, True)
device_id = 0 device_id = theano.sandbox.cuda.use.device_number
cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray
device_prop = cuda_ndarray.device_properties(device_id) device_prop = cuda_ndarray.device_properties(device_id)
...@@ -55,7 +55,7 @@ def py_conv_valid_numpy(img, kern): ...@@ -55,7 +55,7 @@ def py_conv_valid_numpy(img, kern):
#rr, cc is the upper-left corner of img patches #rr, cc is the upper-left corner of img patches
imgpatch = img[b, :, rr:rr + kern.shape[2], imgpatch = img[b, :, rr:rr + kern.shape[2],
cc:cc + kern.shape[3]] cc:cc + kern.shape[3]]
#print img.shape, kern.shape, imgpatch.shape, rr+kern.shape[2]-1, rr-1, -1
innerprod = (imgpatch[:, ::-1, ::-1] * innerprod = (imgpatch[:, ::-1, ::-1] *
kern[k, :, :, :]).sum() kern[k, :, :, :]).sum()
out[b, k, rr, cc] = innerprod out[b, k, rr, cc] = innerprod
...@@ -127,9 +127,10 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1), ...@@ -127,9 +127,10 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
# See `test_example` (above) for an example of how to use this directly. # See `test_example` (above) for an example of how to use this directly.
# #
# :param kshape: (4d)The shape of the kernel at run time. # :param kshape: (4d)The shape of the kernel at run time.
# :param compile_kshp: (2d) hardcode the shape of the kernel in the generated code # :param compile_kshp: (2d) hardcode the shape of the kernel in
# This is supposed to be faster, but we need to check # the generated code This is supposed to be
# That we raise an error if the input have the wrong shape. # faster, but we need to check That we raise
# an error if the input have the wrong shape.
# #
if ones: if ones:
assert not random assert not random
...@@ -195,7 +196,8 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1), ...@@ -195,7 +196,8 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
if mode == 'valid': if mode == 'valid':
approx_fp = cpuval.size * ishape[1] * kshape[2] * kshape[3] * 2 approx_fp = cpuval.size * ishape[1] * kshape[2] * kshape[3] * 2
else: else:
approx_fp = ishape[0] * kshape[0] * kshape[1] * kshape[2] * kshape[3] * ishape[2] * ishape[3] * 2 approx_fp = (ishape[0] * kshape[0] * kshape[1] * kshape[2] *
kshape[3] * ishape[2] * ishape[3] * 2)
approx_fp /= 1e6 approx_fp /= 1e6
cpu_mflops = approx_fp / (t1 - t0) cpu_mflops = approx_fp / (t1 - t0)
gpu_mflops = approx_fp / (t2 - t1) gpu_mflops = approx_fp / (t2 - t1)
...@@ -204,14 +206,21 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1), ...@@ -204,14 +206,21 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
print >> sys.stdout, '%12.5f %7.2f %7.2f %7.1f' % (approx_fp, print >> sys.stdout, '%12.5f %7.2f %7.2f %7.1f' % (approx_fp,
cpu_mflops, gpu_mflops, (t1 - t0) / (t2 - t1)) cpu_mflops, gpu_mflops, (t1 - t0) / (t2 - t1))
if not rval: if not rval:
print >> sys.stdout, 'test_'+mode+' id='+str(id)+' FAILED for ishape, kshape, mode, subsample, img_stride, kern_stride, version', ishape, kshape, mode, subsample, img_stride, kern_stride, version print >> sys.stdout, ('test_' + mode + ' id=' + str(id) +
' FAILED for ishape, kshape, mode, subsample,' +
' img_stride, kern_stride, version', ishape,
kshape, mode, subsample, img_stride, kern_stride,
version)
diff = cpuval - gpuval diff = cpuval - gpuval
diffabs = numpy.absolute(diff) diffabs = numpy.absolute(diff)
pr_diff = diffabs / numpy.absolute(cpuval) pr_diff = diffabs / numpy.absolute(cpuval)
nb_close = (diffabs <= (atol + rtol * numpy.absolute(gpuval))).sum() nb_close = (diffabs <= (atol + rtol * numpy.absolute(gpuval))).sum()
print "max absolute diff:",diffabs.max(),"avg abs diff:",numpy.average(diffabs) print "max absolute diff:", (diffabs.max(), "avg abs diff:",
print "median abs diff:", numpy.median(diffabs), "nb close:",nb_close, "/", diff.size numpy.average(diffabs))
print "max relatif diff:",pr_diff.max(), "avg rel diff:", numpy.average(pr_diff) print "median abs diff:", (numpy.median(diffabs), "nb close:",
nb_close, "/", diff.size)
print "max relatif diff:", (pr_diff.max(), "avg rel diff:",
numpy.average(pr_diff))
if not rval and print_ != False: if not rval and print_ != False:
if npy_img.shape[0] > 5: if npy_img.shape[0] > 5:
print "img", npy_img[0] print "img", npy_img[0]
...@@ -275,17 +284,16 @@ def exec_conv(version, shapes, verbose, random, mode, ...@@ -275,17 +284,16 @@ def exec_conv(version, shapes, verbose, random, mode,
def get_basic_shapes(): def get_basic_shapes():
return [
#basic test of image and kernel shape #basic test of image and kernel shape
((1, 1, 1, 1), (1, 1, 1, 1), (1,1), (1,1), (1,1)) return [((1, 1, 1, 1), (1, 1, 1, 1), (1, 1), (1, 1), (1, 1)),
, ((1, 1, 2, 2), (1, 1, 2, 2), (1,1), (1,1), (1,1)) ((1, 1, 2, 2), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1)),
, ((1, 1, 3, 3), (1, 1, 2, 2), (1,1), (1,1), (1,1)) ((1, 1, 3, 3), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1)),
#basic test for unsquare kernel and image #basic test for unsquare kernel and image
, ((1, 1, 2, 4), (1, 1, 2, 2), (1,1), (1,1), (1,1)) ((1, 1, 2, 4), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1)),
, ((1, 1, 3, 4), (1, 1, 2, 2), (1,1), (1,1), (1,1)) ((1, 1, 3, 4), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1)),
, ((1, 1, 4, 3), (1, 1, 2, 2), (1,1), (1,1), (1,1)) ((1, 1, 4, 3), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1)),
, ((1, 1, 4, 4), (1, 1, 3, 2), (1,1), (1,1), (1,1)) ((1, 1, 4, 4), (1, 1, 3, 2), (1, 1), (1, 1), (1, 1)),
, ((1, 1, 4, 4), (1, 1, 2, 3), (1,1), (1,1), (1,1))] ((1, 1, 4, 4), (1, 1, 2, 3), (1, 1), (1, 1), (1, 1))]
def get_shapes(imshp=(1, 1), kshp=(1, 1), subsample=(1, 1), def get_shapes(imshp=(1, 1), kshp=(1, 1), subsample=(1, 1),
...@@ -293,40 +301,56 @@ def get_shapes(imshp=(1, 1), kshp=(1, 1), subsample=(1, 1), ...@@ -293,40 +301,56 @@ def get_shapes(imshp=(1, 1), kshp=(1, 1), subsample=(1, 1),
""" all possible case if we one or more of stack size, batch size, """ all possible case if we one or more of stack size, batch size,
nkern. We use the gived image shape, kernel shape and subsmaple nkern. We use the gived image shape, kernel shape and subsmaple
shape.""" shape."""
return [ ((1, 2)+imshp, (1, 2)+kshp,subsample, img_stride, kern_stride)#stack only return [
, ((3, 1)+imshp, (1, 1)+kshp,subsample, img_stride, kern_stride)#batch only #stack only
, ((1, 1)+imshp, (2, 1)+kshp,subsample, img_stride, kern_stride)#nkern only ((1, 2) + imshp, (1, 2) + kshp, subsample, img_stride, kern_stride),
, ((3, 1)+imshp, (2, 1)+kshp,subsample, img_stride, kern_stride)#batch and nkern #batch only
, ((3, 2)+imshp, (1, 2)+kshp,subsample, img_stride, kern_stride)#batch and stack ((3, 1) + imshp, (1, 1) + kshp, subsample, img_stride, kern_stride),
, ((1, 2)+imshp, (2, 2)+kshp,subsample, img_stride, kern_stride)#stack and nkern #nkern only
, ((2, 2)+imshp, (2, 2)+kshp,subsample, img_stride, kern_stride)#batch, nkern and stack ((1, 1) + imshp, (2, 1) + kshp, subsample, img_stride, kern_stride),
, ((3, 2)+imshp, (4, 2)+kshp,subsample, img_stride, kern_stride)#batch, nkern and stack #batch and nkern
] ((3, 1) + imshp, (2, 1) + kshp, subsample, img_stride, kern_stride),
#batch and stack
((3, 2) + imshp, (1, 2) + kshp, subsample, img_stride, kern_stride),
#stack and nkern
((1, 2) + imshp, (2, 2) + kshp, subsample, img_stride, kern_stride),
#batch, nkern and stack
((2, 2) + imshp, (2, 2) + kshp, subsample, img_stride, kern_stride),
#batch, nkern and stack
((3, 2) + imshp, (4, 2) + kshp, subsample, img_stride, kern_stride)
]
def get_shapes2(scales_img=(1, 1), scales_kern=(1, 1), subsample=(1, 1), def get_shapes2(scales_img=(1, 1), scales_kern=(1, 1), subsample=(1, 1),
img_stride=(1, 1), kern_stride=(1, 1)): img_stride=(1, 1), kern_stride=(1, 1)):
#basic test of stack, batch and nkern paramter #basic test of stack, batch and nkern paramter
shapes =get_shapes((1*scales_img[0],1*scales_img[1]), shapes = get_shapes((1 * scales_img[0], 1 * scales_img[1]),
(1*scales_kern[0],1*scales_kern[1]),subsample, img_stride, kern_stride) (1 * scales_kern[0], 1 * scales_kern[1]),
subsample, img_stride, kern_stride)
#basic test of stack, batch and nkern paramter with image and kernel shape #basic test of stack, batch and nkern paramter with image and kernel shape
shapes +=get_shapes((2*scales_img[0],2*scales_img[1]), shapes += get_shapes((2 * scales_img[0], 2 * scales_img[1]),
(2*scales_kern[0],2*scales_kern[1]),subsample, img_stride, kern_stride) (2 * scales_kern[0], 2 * scales_kern[1]),
subsample, img_stride, kern_stride)
#basic test of stack, batch and nkern paramter with image and kernel shape #basic test of stack, batch and nkern paramter with image and kernel shape
shapes +=get_shapes((3*scales_img[0],3*scales_img[1]), shapes += get_shapes((3 * scales_img[0], 3 * scales_img[1]),
(2*scales_kern[0],2*scales_kern[1]),subsample, img_stride, kern_stride) (2 * scales_kern[0], 2 * scales_kern[1]),
subsample, img_stride, kern_stride)
#basic test of stack, batch and nkern paramter with not square image. #basic test of stack, batch and nkern paramter with not square image.
shapes +=get_shapes((4*scales_img[0],3*scales_img[1]), shapes += get_shapes((4 * scales_img[0], 3 * scales_img[1]),
(2*scales_kern[0],2*scales_kern[1]),subsample, img_stride, kern_stride) (2 * scales_kern[0], 2 * scales_kern[1]),
subsample, img_stride, kern_stride)
#basic test of stack, batch and nkern paramter with not square image. #basic test of stack, batch and nkern paramter with not square image.
shapes +=get_shapes((3*scales_img[0],4*scales_img[1]), shapes += get_shapes((3 * scales_img[0], 4 * scales_img[1]),
(2*scales_kern[0],2*scales_kern[1]),subsample, img_stride, kern_stride) (2 * scales_kern[0], 2 * scales_kern[1]),
subsample, img_stride, kern_stride)
#basic test of stack, batch and nkern paramter with not square kernel. #basic test of stack, batch and nkern paramter with not square kernel.
shapes +=get_shapes((4*scales_img[0],4*scales_img[1]), shapes += get_shapes((4 * scales_img[0], 4 * scales_img[1]),
(3*scales_kern[0],2*scales_kern[1]),subsample, img_stride, kern_stride) (3 * scales_kern[0], 2 * scales_kern[1]),
subsample, img_stride, kern_stride)
#basic test of stack, batch and nkern paramter with not square kernel. #basic test of stack, batch and nkern paramter with not square kernel.
shapes +=get_shapes((4*scales_img[0],4*scales_img[1]), shapes += get_shapes((4 * scales_img[0], 4 * scales_img[1]),
(2*scales_kern[0],3*scales_kern[1]),subsample, img_stride, kern_stride) (2 * scales_kern[0], 3 * scales_kern[1]),
subsample, img_stride, kern_stride)
return shapes return shapes
...@@ -348,34 +372,37 @@ def get_valid_shapes(): ...@@ -348,34 +372,37 @@ def get_valid_shapes():
shapes += [ shapes += [
#other test #other test
((2, 1, 2, 2), (1, 1, 2, 2), (1,1), (1,1), (1,1)) ((2, 1, 2, 2), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1))
, ((3, 2, 4, 4), (4, 2, 4, 4), (1,1), (1,1), (1,1)) , ((3, 2, 4, 4), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1))
, ((4, 1, 10, 10), (1, 1, 2, 2), (1,1), (1,1), (1,1)) , ((4, 1, 10, 10), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1))
, ((1, 1, 4, 4), (1, 1, 2, 3), (1,1), (1,1), (1,1)) , ((1, 1, 4, 4), (1, 1, 2, 3), (1, 1), (1, 1), (1, 1))
, ((4, 1, 10, 10), (1, 1, 2, 3), (1,1), (1,1), (1,1)) , ((4, 1, 10, 10), (1, 1, 2, 3), (1, 1), (1, 1), (1, 1))
, ((4, 1, 10, 10), (1, 1, 2, 10), (1,1), (1,1), (1,1)) , ((4, 1, 10, 10), (1, 1, 2, 10), (1, 1), (1, 1), (1, 1))
, ((4, 1, 20, 10), (1, 1, 2, 10), (1,1), (1,1), (1,1)) , ((4, 1, 20, 10), (1, 1, 2, 10), (1, 1), (1, 1), (1, 1))
, ((3, 2, 8, 8), (4, 2, 4, 4), (1,1), (1,1), (1,1)) #stack, nkern, bsize , ((3, 2, 8, 8), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize
, ((3, 2, 8, 6), (4, 2, 4, 4), (1,1), (1,1), (1,1)) #stack, nkern, bsize, non-square image , ((3, 2, 8, 6), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize, non-square image
, ((3, 2, 8, 6), (4, 2, 4, 3), (1,1), (1,1), (1,1)) #stack, nkern, bsize, non-square image, non-square kern , ((3, 2, 8, 6), (4, 2, 4, 3), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize, non-square image, non-square kern
, ((3, 2, 8, 6), (4, 2, 4, 6), (1,1), (1,1), (1,1)) #stack, nkern, bsize ,non-square image, non-square kern, kernsize==imgsize on one dim , ((3, 2, 8, 6), (4, 2, 4, 6), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize ,non-square image, non-square kern, kernsize==imgsize on one dim
, ((16, 5, 64, 64), (8, 5, 8, 8), (1,1), (1,1), (1,1)) # a big one , ((16, 5, 64, 64), (8, 5, 8, 8), (1, 1), (1, 1), (1, 1)) # a big one
, ((16, 1, 28, 28), (20, 1, 5, 5), (1,1), (1,1), (1,1)) # MNIST LeNET layer 1 , ((16, 1, 28, 28), (20, 1, 5, 5), (1, 1), (1, 1), (1, 1)) # MNIST LeNET layer 1
, ((20, 16, 32, 32), (1, 16, 28, 28), (1,1), (1,1), (1,1)) # layer 1 backprop to weights , ((20, 16, 32, 32), (1, 16, 28, 28), (1, 1), (1, 1), (1, 1)) # layer 1 backprop to weights
, ((60,20,28,28), (10,20,5,5), (1,1), (2,2), (1,1))#added a test case that fail from test_nnet.py.test_conv_nnet2 , ((60,20,28,28), (10,20,5,5), (1, 1), (2,2), (1, 1))#added a test case that fail from test_nnet.py.test_conv_nnet2
, ((10,5,28,28), (10,5,5,5), (1,1), (2,2), (1,1))#test precedent but reduced that triger the error , ((10,5,28,28), (10,5,5,5), (1, 1), (2,2), (1, 1))#test precedent but reduced that triger the error
#Test more then maxThreadsDim0
, ((2,4,13,1050), (3,4,10, 11), (1, 1), (1, 1), (1, 1))
, ((2,4,1050,13), (3,4,10, 11), (1, 1), (1, 1), (1, 1))
] ]
shapes += [ ((60,1,28,28),(20,1,5,5), (1,1), (1,1), (1,1))#test_lenet_28 1 layers shapes += [ ((60,1,28,28),(20,1,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 1 layers
, ((60,20,12,12),(30,20,5,5), (1,1), (1,1), (1,1))#test_lenet_28 2 layers , ((60,20,12,12),(30,20,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 2 layers
, ((60,30,8,8),(20,30,5,5), (1,1), (1,1), (1,1))#test_lenet_28 bprop 1 full , ((60,30,8,8),(20,30,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 1 full
, ((20,60,12,12),(30,60,8,8), (1,1), (1,1), (1,1))#test_lenet_28 bprop 2 valid , ((20,60,12,12),(30,60,8,8), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 2 valid
# , ((1,60,28,28),(20,60,24,24), (1,1), (1,1), (1,1))#test_lenet_28 bprop 2 valid # , ((1,60,28,28),(20,60,24,24), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 2 valid
, ((10,1,64,64),(20,1,7,7), (1,1), (1,1), (1,1))#test_lenet_64 1 layers , ((10,1,64,64),(20,1,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 1 layers
, ((10,20,29,29),(30,20,7,7), (1,1), (1,1), (1,1))#test_lenet_64 2 layers , ((10,20,29,29),(30,20,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 2 layers
, ((10,30,23,23),(20,30,7,7), (1,1), (1,1), (1,1))#test_lenet_64 full , ((10,30,23,23),(20,30,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 full
# , ((20,10,29,29),(30,10,23,23), (1,1), (1,1), (1,1))#test_lenet_64 bprop 1 # , ((20,10,29,29),(30,10,23,23), (1, 1), (1, 1), (1, 1))#test_lenet_64 bprop 1
# , ((1,10,64,64),(20,10,58,58), (1,1), (1,1), (1,1))#test_lenet_64 bprop 2 # , ((1,10,64,64),(20,10,58,58), (1, 1), (1, 1), (1, 1))#test_lenet_64 bprop 2
] ]
return shapes return shapes
...@@ -483,7 +510,7 @@ def test_valid_5(): ...@@ -483,7 +510,7 @@ def test_valid_5():
random = False random = False
shapes2 = [] shapes2 = []
print len(shapes) # print len(shapes)
for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes): for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes):
oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) - oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) -
numpy.asarray(kshape[2:]) + numpy.asarray(kshape[2:]) +
...@@ -496,7 +523,7 @@ def test_valid_5(): ...@@ -496,7 +523,7 @@ def test_valid_5():
if subshape == (1, 1): if subshape == (1, 1):
shapes2.append((ishape, kshape, subshape, istride, kstride)) shapes2.append((ishape, kshape, subshape, istride, kstride))
shapes = shapes2 shapes = shapes2
print len(shapes2) # print len(shapes2)
exec_conv(version, shapes, verbose, random, 'valid', exec_conv(version, shapes, verbose, random, 'valid',
print_=print_, ones=ones, rtol=1.1e-5) print_=print_, ones=ones, rtol=1.1e-5)
...@@ -517,7 +544,7 @@ def test_valid_7_8_13(): ...@@ -517,7 +544,7 @@ def test_valid_7_8_13():
random = False random = False
shapes2 = [] shapes2 = []
print len(shapes) # print len(shapes)
for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes): for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes):
oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) - oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) -
numpy.asarray(kshape[2:]) + numpy.asarray(kshape[2:]) +
...@@ -530,7 +557,7 @@ def test_valid_7_8_13(): ...@@ -530,7 +557,7 @@ def test_valid_7_8_13():
if subshape == (1, 1): if subshape == (1, 1):
shapes2.append((ishape, kshape, subshape, istride, kstride)) shapes2.append((ishape, kshape, subshape, istride, kstride))
shapes = shapes2 shapes = shapes2
print len(shapes2) # print len(shapes2)
exec_conv(version, shapes, verbose, random, 'valid', exec_conv(version, shapes, verbose, random, 'valid',
print_=print_, ones=ones, rtol=1.1e-5) print_=print_, ones=ones, rtol=1.1e-5)
...@@ -548,7 +575,7 @@ def test_valid_9_10(): ...@@ -548,7 +575,7 @@ def test_valid_9_10():
random = False random = False
shapes2 = [] shapes2 = []
print len(shapes) # print len(shapes)
for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes): for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes):
oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) - oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) -
numpy.asarray(kshape[2:]) + numpy.asarray(kshape[2:]) +
...@@ -560,7 +587,7 @@ def test_valid_9_10(): ...@@ -560,7 +587,7 @@ def test_valid_9_10():
if subshape == (1, 1): if subshape == (1, 1):
shapes2.append((ishape, kshape, subshape, istride, kstride)) shapes2.append((ishape, kshape, subshape, istride, kstride))
shapes = shapes2 shapes = shapes2
print len(shapes2) # print len(shapes2)
exec_conv(version, shapes, verbose, random, 'valid', exec_conv(version, shapes, verbose, random, 'valid',
print_=print_, ones=ones, rtol=1.1e-5) print_=print_, ones=ones, rtol=1.1e-5)
...@@ -600,39 +627,42 @@ def test_full(): ...@@ -600,39 +627,42 @@ def test_full():
shapes += [ shapes += [
#other test #other test
((2, 1, 2, 2), (1, 1, 2, 2), (1,1), (1,1), (1,1)) ((2, 1, 2, 2), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1))
, ((3, 2, 4, 4), (4, 2, 4, 4), (1,1), (1,1), (1,1)) , ((3, 2, 4, 4), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1))
, ((4, 1, 10, 10), (1, 1, 2, 2), (1,1), (1,1), (1,1)) , ((4, 1, 10, 10), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1))
, ((1, 1, 4, 4), (1, 1, 2, 3), (1,1), (1,1), (1,1)) , ((1, 1, 4, 4), (1, 1, 2, 3), (1, 1), (1, 1), (1, 1))
, ((4, 1, 10, 10), (1, 1, 2, 3), (1,1), (1,1), (1,1)) , ((4, 1, 10, 10), (1, 1, 2, 3), (1, 1), (1, 1), (1, 1))
, ((4, 1, 10, 10), (1, 1, 2, 10), (1,1), (1,1), (1,1)) , ((4, 1, 10, 10), (1, 1, 2, 10), (1, 1), (1, 1), (1, 1))
, ((4, 1, 20, 10), (1, 1, 2, 10), (1,1), (1,1), (1,1)) , ((4, 1, 20, 10), (1, 1, 2, 10), (1, 1), (1, 1), (1, 1))
, ((3, 2, 8, 8), (4, 2, 4, 4), (1,1), (1,1), (1,1)) #stack, nkern, bsize , ((3, 2, 8, 8), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize
, ((3, 2, 8, 6), (4, 2, 4, 4), (1,1), (1,1), (1,1)) #stack, nkern, bsize, non-square image , ((3, 2, 8, 6), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize, non-square image
, ((3, 2, 8, 6), (4, 2, 4, 3), (1,1), (1,1), (1,1)) #stack, nkern, bsize, non-square image, non-square kern , ((3, 2, 8, 6), (4, 2, 4, 3), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize, non-square image, non-square kern
, ((3, 2, 8, 6), (4, 2, 4, 6), (1,1), (1,1), (1,1)) #stack, nkern, bsize ,non-square image, non-square kern, kernsize==imgsize on one dim , ((3, 2, 8, 6), (4, 2, 4, 6), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize ,non-square image, non-square kern, kernsize==imgsize on one dim
, ((16, 5, 64, 64), (8, 5, 8, 8), (1,1), (1,1), (1,1)) # a big one , ((16, 5, 64, 64), (8, 5, 8, 8), (1, 1), (1, 1), (1, 1)) # a big one
, ((16, 1, 28, 28), (20, 1, 5, 5), (1,1), (1,1), (1,1)) # MNIST LeNET layer 1 , ((16, 1, 28, 28), (20, 1, 5, 5), (1, 1), (1, 1), (1, 1)) # MNIST LeNET layer 1
, ((20, 16, 32, 32), (1, 16, 28, 28), (1,1), (1,1), (1,1)) # layer 1 backprop to weights , ((20, 16, 32, 32), (1, 16, 28, 28), (1, 1), (1, 1), (1, 1)) # layer 1 backprop to weights
#other test #other test
, ((3, 1, 1, 1), (2, 1, 5, 3), (1,1), (1,1), (1,1))#kernel bigger then image , ((3, 1, 1, 1), (2, 1, 5, 3), (1, 1), (1, 1), (1, 1))#kernel bigger then image
, ((3, 2, 1, 1), (4, 2, 1, 1), (1,1), (1,1), (1,1)) , ((3, 2, 1, 1), (4, 2, 1, 1), (1, 1), (1, 1), (1, 1))
, ((3, 2, 4, 4), (4, 2, 2, 6), (1,1), (1,1), (1,1)) , ((3, 2, 4, 4), (4, 2, 2, 6), (1, 1), (1, 1), (1, 1))
, ((3, 2, 4, 4), (4, 2, 8, 6), (1,1), (1,1), (1,1))#kernel bigger then image , ((3, 2, 4, 4), (4, 2, 8, 6), (1, 1), (1, 1), (1, 1))#kernel bigger then image
, ((4, 2, 10, 10), (3, 2, 2, 12), (1,1), (1,1), (1,1)) , ((4, 2, 10, 10), (3, 2, 2, 12), (1, 1), (1, 1), (1, 1))
] ]
shapes += [ shapes += [
# ((60,1,28,28),(20,1,5,5), (1,1), (1,1), (1,1))#test_lenet_28 1 layers # ((60,1,28,28),(20,1,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 1 layers
# , ((60,20,12,12),(30,20,5,5), (1,1), (1,1), (1,1))#test_lenet_28 2 layers # , ((60,20,12,12),(30,20,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 2 layers
((60,30,8,8),(20,30,5,5), (1,1), (1,1), (1,1))#test_lenet_28 bprop 1 full ((60,30,8,8),(20,30,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 1 full
# , ((20,60,12,12),(30,60,8,8), (1,1), (1,1), (1,1))#test_lenet_28 bprop 2 valid # , ((20,60,12,12),(30,60,8,8), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 2 valid
# , ((1,60,28,28),(20,60,24,24), (1,1), (1,1), (1,1))#test_lenet_28 bprop 2 valid # , ((1,60,28,28),(20,60,24,24), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 2 valid
# , ((10,1,64,64),(20,1,7,7), (1,1), (1,1), (1,1))#test_lenet_64 1 layers # , ((10,1,64,64),(20,1,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 1 layers
# , ((10,20,29,29),(30,20,7,7), (1,1), (1,1), (1,1))#test_lenet_64 2 layers # , ((10,20,29,29),(30,20,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 2 layers
, ((10,30,23,23),(20,30,7,7), (1,1), (1,1), (1,1))#test_lenet_64 full , ((10,30,23,23),(20,30,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 full
# , ((20,10,29,29),(30,10,23,23), (1,1), (1,1), (1,1))#test_lenet_64 bprop 1 # , ((20,10,29,29),(30,10,23,23), (1, 1), (1, 1), (1, 1))#test_lenet_64 bprop 1
# , ((1,10,64,64),(20,10,58,58), (1,1), (1,1), (1,1))#test_lenet_64 bprop 2 # , ((1,10,64,64),(20,10,58,58), (1, 1), (1, 1), (1, 1))#test_lenet_64 bprop 2
#Test more then maxThreadsDim0
, ((2,4,13,1050), (3,4,10, 11), (1, 1), (1, 1), (1, 1))
, ((2,4,1050,13), (3,4,10, 11), (1, 1), (1, 1), (1, 1))
] ]
# shapes=shapes[:277] # shapes=shapes[:277]
...@@ -646,13 +676,12 @@ def test_full(): ...@@ -646,13 +676,12 @@ def test_full():
def test_subsample(): def test_subsample():
# implement when # implement when
shapes = [ shapes = [((1, 1, 1, 1), (1, 1, 1, 1), (1, 1), (1, 1), (1, 1)),
((1, 1, 1, 1), (1, 1, 1, 1), (1,1), (1,1), (1,1)) ((1, 1, 1, 1), (1, 1, 1, 1), (2, 2), (1, 1), (1, 1)),
, ((1, 1, 1, 1), (1, 1, 1, 1), (2,2), (1,1), (1,1)) ((4, 2, 10, 10), (3, 2, 2, 2), (1, 3), (1, 1), (1, 1)),
, ((4, 2, 10, 10), (3, 2, 2, 2), (1, 3), (1,1), (1,1)) ((4, 2, 10, 10), (3, 2, 2, 2), (3, 3), (1, 1), (1, 1)),
, ((4, 2, 10, 10), (3, 2, 2, 2), (3, 3), (1,1), (1,1)) ((4, 2, 10, 10), (3, 2, 2, 2), (3, 1), (1, 1), (1, 1))
, ((4, 2, 10, 10), (3, 2, 2, 2), (3, 1), (1,1), (1,1)) ]
]
shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 1)) shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 1))
shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 2)) shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 2))
shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 1)) shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 1))
...@@ -676,7 +705,8 @@ def test_subsample(): ...@@ -676,7 +705,8 @@ def test_subsample():
## See #616 ## See #616
#def test_logical_shapes(): #def test_logical_shapes():
# # implement when # # implement when
# print >> sys.stderr, "WARNING TODO: test_logical_shapes not implemented (i.e. imshp_logical, kshp_logical, kshp_logical_top_aligned)" # print >> sys.stderr, ("WARNING TODO: test_logical_shapes not implemented"
# " (i.e. imshp_logical, kshp_logical, kshp_logical_top_aligned)")
class TestConv2DGPU(unittest.TestCase): class TestConv2DGPU(unittest.TestCase):
...@@ -745,43 +775,43 @@ def benchmark(): ...@@ -745,43 +775,43 @@ def benchmark():
shapes_valid = [ shapes_valid = [
#test_lenet_28 shape #test_lenet_28 shape
((20, 60,12,12), (30,60,8,8), (1,1), (1,1), (1,1))#valid ((20, 60,12,12), (30,60,8,8), (1, 1), (1, 1), (1, 1))#valid
,((60, 20,12,12), (30,20,5,5), (1,1), (1,1), (1,1))#valid ,((60, 20,12,12), (30,20,5,5), (1, 1), (1, 1), (1, 1))#valid
,((60, 1,28,28), (20,1,5,5), (1,1), (1,1), (1,1))#valid ,((60, 1,28,28), (20,1,5,5), (1, 1), (1, 1), (1, 1))#valid
,((1, 60,28,28), (20,60,24,24), (1,1), (1,1), (1,1))#valid ,((1, 60,28,28), (20,60,24,24), (1, 1), (1, 1), (1, 1))#valid
#test_lenet_32 shape #test_lenet_32 shape
,((20, 60,14,14), (30,60,10,10), (1,1), (1,1), (1,1))#valid ,((20, 60,14,14), (30,60,10,10), (1, 1), (1, 1), (1, 1))#valid
,((60, 20,14,14), (30,20,5,5), (1,1), (1,1), (1,1))#valid ,((60, 20,14,14), (30,20,5,5), (1, 1), (1, 1), (1, 1))#valid
,((60, 1,32,32), (20,1,5,5), (1,1), (1,1), (1,1))#valid ,((60, 1,32,32), (20,1,5,5), (1, 1), (1, 1), (1, 1))#valid
,((1, 60,32,32), (20,60,28,28), (1,1), (1,1), (1,1))#valid ,((1, 60,32,32), (20,60,28,28), (1, 1), (1, 1), (1, 1))#valid
#test_lenet_64 shape #test_lenet_64 shape
,((10, 20,29,29), (30,20,7,7), (1,1), (1,1), (1,1))#valid ,((10, 20,29,29), (30,20,7,7), (1, 1), (1, 1), (1, 1))#valid
,((20, 10,29,29), (30,10,23,23), (1,1), (1,1), (1,1))#valid ,((20, 10,29,29), (30,10,23,23), (1, 1), (1, 1), (1, 1))#valid
,((10, 1,64,64), (20,1,7,7), (1,1), (1,1), (1,1))#valid ,((10, 1,64,64), (20,1,7,7), (1, 1), (1, 1), (1, 1))#valid
,((1, 10,64,64), (20,10,58,58), (1,1), (1,1), (1,1))#valid ,((1, 10,64,64), (20,10,58,58), (1, 1), (1, 1), (1, 1))#valid
#test_lenet_108 shape #test_lenet_108 shape
,((10, 20,51,51), (30,20,7,7), (1,1), (1,1), (1,1))#valid ,((10, 20,51,51), (30,20,7,7), (1, 1), (1, 1), (1, 1))#valid
,((20, 10,51,51), (30,10,45,45), (1,1), (1,1), (1,1))#valid ,((20, 10,51,51), (30,10,45,45), (1, 1), (1, 1), (1, 1))#valid
,((10, 1,108,108), (20,1,7,7), (1,1), (1,1), (1,1))#valid ,((10, 1,108,108), (20,1,7,7), (1, 1), (1, 1), (1, 1))#valid
,((1, 10,108,108), (20,10,102,102), (1,1), (1,1), (1,1))#valid ,((1, 10,108,108), (20,10,102,102), (1, 1), (1, 1), (1, 1))#valid
#test_lenet_256 shape #test_lenet_256 shape
,((2, 20,124,124), (30,20,9,9), (1,1), (1,1), (1,1))#valid ,((2, 20,124,124), (30,20,9,9), (1, 1), (1, 1), (1, 1))#valid
,((20, 2,124,124), (30,2,116,116), (1,1), (1,1), (1,1))#valid ,((20, 2,124,124), (30,2,116,116), (1, 1), (1, 1), (1, 1))#valid
,((2, 1,256,256), (20,1,9,9), (1,1), (1,1), (1,1))#valid ,((2, 1,256,256), (20,1,9,9), (1, 1), (1, 1), (1, 1))#valid
,((1, 2,256,256), (20,2,248,248), (1,1), (1,1), (1,1))#valid ,((1, 2,256,256), (20,2,248,248), (1, 1), (1, 1), (1, 1))#valid
] ]
shapes_full = [ shapes_full = [
#test_lenet_28 shape #test_lenet_28 shape
((60, 30,8,8), (20, 30, 5, 5), (1,1), (1,1), (1,1))#full ((60, 30,8,8), (20, 30, 5, 5), (1, 1), (1, 1), (1, 1))#full
#test_lenet_32 shape #test_lenet_32 shape
,((60, 30,10,10), (20, 30, 5, 5), (1,1), (1,1), (1,1))#full conv_full_patch_stack_padded' N=1 ,((60, 30,10,10), (20, 30, 5, 5), (1, 1), (1, 1), (1, 1))#full conv_full_patch_stack_padded' N=1
#test_lenet_64 shape #test_lenet_64 shape
,((10, 30,23,23), (20, 30, 7, 7), (1,1), (1,1), (1,1))#full conv_full_patch_stack_padded' N=3 ,((10, 30,23,23), (20, 30, 7, 7), (1, 1), (1, 1), (1, 1))#full conv_full_patch_stack_padded' N=3
#test_lenet_108 shape #test_lenet_108 shape
,((10, 30,45,45), (20, 30, 7, 7), (1,1), (1,1), (1,1))#full 'conv_full_patch_stack_padded' N=9 ,((10, 30,45,45), (20, 30, 7, 7), (1, 1), (1, 1), (1, 1))#full 'conv_full_patch_stack_padded' N=9
#test_lenet_256 shape #test_lenet_256 shape
,((2, 30,116,116), (20, 30, 9,9), (1,1), (1,1), (1,1))#full conv_reference_full ,((2, 30,116,116), (20, 30, 9,9), (1, 1), (1, 1), (1, 1))#full conv_reference_full
] ]
# shapes_valid=shapes_valid[-1:] # shapes_valid=shapes_valid[-1:]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论