提交 7444e2b3 authored 作者: Frederic's avatar Frederic

Manually disable the parallel conv when there is only possibly 1 thread running.

上级 aa71b758
...@@ -854,7 +854,7 @@ class ConvOp(Op): ...@@ -854,7 +854,7 @@ class ConvOp(Op):
return ['<numpy/noprefix.h>', '<iostream>', '<sstream>', '<omp.h>' ] return ['<numpy/noprefix.h>', '<iostream>', '<sstream>', '<omp.h>' ]
def c_code_cache_version(self): def c_code_cache_version(self):
return (7, self.openmp) return (8, self.openmp)
def c_support_code(self): def c_support_code(self):
return """ return """
...@@ -1947,7 +1947,9 @@ if (%(z)s->strides[1] != %(z)s->dimensions[2] * %(z)s->dimensions[3] * sizeof(%( ...@@ -1947,7 +1947,9 @@ if (%(z)s->strides[1] != %(z)s->dimensions[2] * %(z)s->dimensions[3] * sizeof(%(
if (%(z)s->strides[2] != %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s; if (%(z)s->strides[2] != %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[3] != sizeof(%(type)s)) %(fail)s; if (%(z)s->strides[3] != sizeof(%(type)s)) %(fail)s;
#pragma omp parallel for schedule(static) //The if on the number of loop make a speed up for small array.
//with g++ 4.5.1. The compiler should be smart enough to do this himself!
#pragma omp parallel for schedule(static) if(%(self_bsize)s * %(self_nkern)s > 1)
// We merge the 2 loop into one to make it easier to parallelize on both // We merge the 2 loop into one to make it easier to parallelize on both
// This is the equivalent of those 2 lines. // This is the equivalent of those 2 lines.
//for(int b=0;b< %(self_bsize)s;b++){ //for(int b=0;b< %(self_bsize)s;b++){
......
...@@ -25,8 +25,7 @@ class TestConv2D(unittest.TestCase): ...@@ -25,8 +25,7 @@ class TestConv2D(unittest.TestCase):
N_image_shape=None, N_filter_shape=None, N_image_shape=None, N_filter_shape=None,
input=None, filters=None, input=None, filters=None,
unroll_batch=None, unroll_kern=None, unroll_patch=None, unroll_batch=None, unroll_kern=None, unroll_patch=None,
verify_grad=True, should_raise=False, verify_grad=True, should_raise=False):
speed_only=False):
if N_image_shape is None: if N_image_shape is None:
N_image_shape = [T.get_constant_value(T. N_image_shape = [T.get_constant_value(T.
...@@ -65,8 +64,6 @@ class TestConv2D(unittest.TestCase): ...@@ -65,8 +64,6 @@ class TestConv2D(unittest.TestCase):
if should_raise: if should_raise:
raise Exception( raise Exception(
"ConvOp should have generated an error") "ConvOp should have generated an error")
if speed_only:
return
############# REFERENCE IMPLEMENTATION ############ ############# REFERENCE IMPLEMENTATION ############
s = 1. s = 1.
...@@ -374,15 +371,23 @@ class TestConv2D(unittest.TestCase): ...@@ -374,15 +371,23 @@ class TestConv2D(unittest.TestCase):
self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', verify_grad=False) self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', verify_grad=False)
def speed(self): def speed(self):
for filter_shape in [(1, 5, 4, 4), (5, 5, 4, 4)]: n_calls = 20000
print filter_shape print "n_calls", n_calls
for image_shape in [(1, 5, 6, 6), for border_mode in ['valid', 'full']:
#(10, 10, 10, 10), print
print border_mode
for openmp in [False, True]:
print "OpenMP", openmp
image_shapes = [(1, 5, 6, 6),
(10, 5, 6, 6),
#(10, 10, 16, 16), #(10, 10, 16, 16),
#(10, 10, 32, 32) #(10, 10, 32, 32)
]: ]
print image_shape print "image_shape", image_shapes
for border_mode in ['valid', 'full']: for image_shape in image_shapes:
filter_shapes = [(1, 5, 4, 4), (2, 5, 4, 4), (5, 5, 4, 4)]
print "filter_shapes", filter_shapes
for filter_shape in filter_shapes:
input = theano.shared(numpy.random.random(image_shape)) input = theano.shared(numpy.random.random(image_shape))
filters = theano.shared(numpy.random.random(filter_shape)) filters = theano.shared(numpy.random.random(filter_shape))
...@@ -390,12 +395,14 @@ class TestConv2D(unittest.TestCase): ...@@ -390,12 +395,14 @@ class TestConv2D(unittest.TestCase):
output = conv.conv2d(input, filters, output = conv.conv2d(input, filters,
image_shape, filter_shape, image_shape, filter_shape,
border_mode, border_mode,
unroll_patch=True) unroll_patch=True,
openmp=openmp)
mode = theano.Mode(linker=theano.gof.vm.VM_Linker( mode = theano.Mode(linker=theano.gof.vm.VM_Linker(
allow_gc=False, allow_gc=False,
use_cloop=True)) use_cloop=True))
theano_conv = theano.function([], output, mode=mode) theano_conv = theano.function([], output, mode=mode)
t1 = time.time() t1 = time.time()
theano_conv.fn(n_calls=500) theano_conv.fn(n_calls=n_calls)
t2 = time.time() t2 = time.time()
print border_mode, t2 - t1, 100 print t2 - t1,
print
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论