提交 7444e2b3 authored 作者: Frederic's avatar Frederic

Manually disable the parallel conv when there is only possibly 1 thread running.

上级 aa71b758
......@@ -854,7 +854,7 @@ class ConvOp(Op):
return ['<numpy/noprefix.h>', '<iostream>', '<sstream>', '<omp.h>' ]
def c_code_cache_version(self):
return (7, self.openmp)
return (8, self.openmp)
def c_support_code(self):
return """
......@@ -1947,7 +1947,9 @@ if (%(z)s->strides[1] != %(z)s->dimensions[2] * %(z)s->dimensions[3] * sizeof(%(
if (%(z)s->strides[2] != %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[3] != sizeof(%(type)s)) %(fail)s;
#pragma omp parallel for schedule(static)
//The if on the number of loop make a speed up for small array.
//with g++ 4.5.1. The compiler should be smart enough to do this himself!
#pragma omp parallel for schedule(static) if(%(self_bsize)s * %(self_nkern)s > 1)
// We merge the 2 loop into one to make it easier to parallelize on both
// This is the equivalent of those 2 lines.
//for(int b=0;b< %(self_bsize)s;b++){
......
......@@ -25,8 +25,7 @@ class TestConv2D(unittest.TestCase):
N_image_shape=None, N_filter_shape=None,
input=None, filters=None,
unroll_batch=None, unroll_kern=None, unroll_patch=None,
verify_grad=True, should_raise=False,
speed_only=False):
verify_grad=True, should_raise=False):
if N_image_shape is None:
N_image_shape = [T.get_constant_value(T.
......@@ -65,8 +64,6 @@ class TestConv2D(unittest.TestCase):
if should_raise:
raise Exception(
"ConvOp should have generated an error")
if speed_only:
return
############# REFERENCE IMPLEMENTATION ############
s = 1.
......@@ -374,28 +371,38 @@ class TestConv2D(unittest.TestCase):
self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', verify_grad=False)
def speed(self):
for filter_shape in [(1, 5, 4, 4), (5, 5, 4, 4)]:
print filter_shape
for image_shape in [(1, 5, 6, 6),
#(10, 10, 10, 10),
n_calls = 20000
print "n_calls", n_calls
for border_mode in ['valid', 'full']:
print
print border_mode
for openmp in [False, True]:
print "OpenMP", openmp
image_shapes = [(1, 5, 6, 6),
(10, 5, 6, 6),
#(10, 10, 16, 16),
#(10, 10, 32, 32)
]:
print image_shape
for border_mode in ['valid', 'full']:
input = theano.shared(numpy.random.random(image_shape))
filters = theano.shared(numpy.random.random(filter_shape))
output = conv.conv2d(input, filters,
image_shape, filter_shape,
border_mode,
unroll_patch=True)
mode = theano.Mode(linker=theano.gof.vm.VM_Linker(
allow_gc=False,
use_cloop=True))
theano_conv = theano.function([], output, mode=mode)
t1 = time.time()
theano_conv.fn(n_calls=500)
t2 = time.time()
print border_mode, t2 - t1, 100
]
print "image_shape", image_shapes
for image_shape in image_shapes:
filter_shapes = [(1, 5, 4, 4), (2, 5, 4, 4), (5, 5, 4, 4)]
print "filter_shapes", filter_shapes
for filter_shape in filter_shapes:
input = theano.shared(numpy.random.random(image_shape))
filters = theano.shared(numpy.random.random(filter_shape))
output = conv.conv2d(input, filters,
image_shape, filter_shape,
border_mode,
unroll_patch=True,
openmp=openmp)
mode = theano.Mode(linker=theano.gof.vm.VM_Linker(
allow_gc=False,
use_cloop=True))
theano_conv = theano.function([], output, mode=mode)
t1 = time.time()
theano_conv.fn(n_calls=n_calls)
t2 = time.time()
print t2 - t1,
print
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论