提交 61980334 authored 作者: notoraptor's avatar notoraptor

Added another optimization for [sd]gemm_,

just by skipping alpha*matrix multiplication when alpha == 1.0. All tests succeed (with blas.ldflags empty) for: * test_abstract_conv.py in theano/tensor/nnet/tests/ * test_blas.py and test_blas_scipy.py in theano/tensor/tests/ I have modified theano/tensor/tests/test_blas_c.py to skip all tests that involves either gemv or ger functions. * Before the modifications, this file executed 44 tests and 34 were skipped. * After the modifications, this file executes 44 tests and 29 are skipped. # $ theano-cache purge && THEANO_FLAGS=blas.ldflags= nosetests --verbose theano/tensor/tests/test_blas_c.py PS: I also tried to execute test_corr.py in theano/tensor/nnet/tests/test_corr.py after removing ldflags checking, but I get many errors in many of the tests (theano outputs not matches ref outputs). So for the moment I have let this file and I will continue investigations tomorrow.
上级 9ca9474b
...@@ -85,11 +85,14 @@ void dgemm_(char* TRANSA, char* TRANSB, ...@@ -85,11 +85,14 @@ void dgemm_(char* TRANSA, char* TRANSB,
PyObject* op_B = alt_op(TRANSB, (PyArrayObject*)matrix_B); PyObject* op_B = alt_op(TRANSB, (PyArrayObject*)matrix_B);
if(*BETA == 0) { if(*BETA == 0) {
PyArray_MatrixProduct2(op_A, op_B, (PyArrayObject*)matrix_C); PyArray_MatrixProduct2(op_A, op_B, (PyArrayObject*)matrix_C);
alt_numpy_double_scalar_matrix_product_in_place(*ALPHA, (PyArrayObject*)matrix_C); if(*ALPHA != 1.0)
alt_numpy_double_scalar_matrix_product_in_place(*ALPHA, (PyArrayObject*)matrix_C);
} else { } else {
PyArrayObject* op_A_times_op_B = (PyArrayObject*)PyArray_MatrixProduct(op_A, op_B); PyArrayObject* op_A_times_op_B = (PyArrayObject*)PyArray_MatrixProduct(op_A, op_B);
alt_numpy_double_scalar_matrix_product_in_place(*ALPHA, op_A_times_op_B); if(*ALPHA != 1.0)
alt_numpy_double_scalar_matrix_product_in_place(*BETA, (PyArrayObject*)matrix_C); alt_numpy_double_scalar_matrix_product_in_place(*ALPHA, op_A_times_op_B);
if(*BETA != 1.0)
alt_numpy_double_scalar_matrix_product_in_place(*BETA, (PyArrayObject*)matrix_C);
alt_numpy_double_matrix_sum(op_A_times_op_B, (PyArrayObject*)matrix_C, (PyArrayObject*)matrix_C); alt_numpy_double_matrix_sum(op_A_times_op_B, (PyArrayObject*)matrix_C, (PyArrayObject*)matrix_C);
Py_XDECREF(op_A_times_op_B); Py_XDECREF(op_A_times_op_B);
} }
......
...@@ -114,11 +114,14 @@ void sgemm_(char* TRANSA, char* TRANSB, ...@@ -114,11 +114,14 @@ void sgemm_(char* TRANSA, char* TRANSB,
PyObject* op_B = alt_op(TRANSB, (PyArrayObject*)matrix_B); PyObject* op_B = alt_op(TRANSB, (PyArrayObject*)matrix_B);
if(*BETA == 0) { if(*BETA == 0) {
PyArray_MatrixProduct2(op_A, op_B, (PyArrayObject*)matrix_C); PyArray_MatrixProduct2(op_A, op_B, (PyArrayObject*)matrix_C);
alt_numpy_scalar_matrix_product_in_place(*ALPHA, (PyArrayObject*)matrix_C); if(*ALPHA != 1.0)
alt_numpy_scalar_matrix_product_in_place(*ALPHA, (PyArrayObject*)matrix_C);
} else { } else {
PyArrayObject* op_A_times_op_B = (PyArrayObject*)PyArray_MatrixProduct(op_A, op_B); PyArrayObject* op_A_times_op_B = (PyArrayObject*)PyArray_MatrixProduct(op_A, op_B);
alt_numpy_scalar_matrix_product_in_place(*ALPHA, op_A_times_op_B); if(*ALPHA != 1.0)
alt_numpy_scalar_matrix_product_in_place(*BETA, (PyArrayObject*)matrix_C); alt_numpy_scalar_matrix_product_in_place(*ALPHA, op_A_times_op_B);
if(*BETA != 1.0)
alt_numpy_scalar_matrix_product_in_place(*BETA, (PyArrayObject*)matrix_C);
alt_numpy_matrix_sum(op_A_times_op_B, (PyArrayObject*)matrix_C, (PyArrayObject*)matrix_C); alt_numpy_matrix_sum(op_A_times_op_B, (PyArrayObject*)matrix_C, (PyArrayObject*)matrix_C);
Py_XDECREF(op_A_times_op_B); Py_XDECREF(op_A_times_op_B);
} }
......
...@@ -26,13 +26,20 @@ mode_blas_opt = theano.compile.get_default_mode().including( ...@@ -26,13 +26,20 @@ mode_blas_opt = theano.compile.get_default_mode().including(
'BlasOpt', 'specialize', 'InplaceBlasOpt', 'c_blas') 'BlasOpt', 'specialize', 'InplaceBlasOpt', 'c_blas')
def skip_if_blas_ldflags_empty(*functions_detected):
if theano.config.blas.ldflags == "":
functions_string = ""
if functions_detected:
functions_string = " (at least " + (", ".join(functions_detected)) + ")"
raise SkipTest("This test is useful only when Theano can access to BLAS functions" + functions_string + ".")
class TestCGer(TestCase, TestOptimizationMixin): class TestCGer(TestCase, TestOptimizationMixin):
def setUp(self, dtype='float64'): def setUp(self, dtype='float64'):
if theano.config.blas.ldflags == "": # if theano.config.blas.ldflags == "":
raise SkipTest("This test is useful only when Theano" # raise SkipTest("This test is useful only when Theano"
" is directly linked to blas.") # " is directly linked to blas.")
self.dtype = dtype self.dtype = dtype
self.mode = theano.compile.get_default_mode().including('fast_run') self.mode = theano.compile.get_default_mode().including('fast_run')
self.A = tensor.tensor(dtype=dtype, broadcastable=(False, False)) self.A = tensor.tensor(dtype=dtype, broadcastable=(False, False))
...@@ -76,11 +83,13 @@ class TestCGer(TestCase, TestOptimizationMixin): ...@@ -76,11 +83,13 @@ class TestCGer(TestCase, TestOptimizationMixin):
self.assertTrue(hash(CGer(False)) != hash(CGer(True))) self.assertTrue(hash(CGer(False)) != hash(CGer(True)))
def test_optimization_pipeline(self): def test_optimization_pipeline(self):
skip_if_blas_ldflags_empty('dger_')
f = self.function([self.x, self.y], tensor.outer(self.x, self.y)) f = self.function([self.x, self.y], tensor.outer(self.x, self.y))
self.assertFunctionContains(f, CGer(destructive=True)) self.assertFunctionContains(f, CGer(destructive=True))
f(self.xval, self.yval) # DebugMode tests correctness f(self.xval, self.yval) # DebugMode tests correctness
def test_optimization_pipeline_float(self): def test_optimization_pipeline_float(self):
skip_if_blas_ldflags_empty('sger_')
self.setUp('float32') self.setUp('float32')
f = self.function([self.x, self.y], tensor.outer(self.x, self.y)) f = self.function([self.x, self.y], tensor.outer(self.x, self.y))
self.assertFunctionContains(f, CGer(destructive=True)) self.assertFunctionContains(f, CGer(destructive=True))
...@@ -93,12 +102,14 @@ class TestCGer(TestCase, TestOptimizationMixin): ...@@ -93,12 +102,14 @@ class TestCGer(TestCase, TestOptimizationMixin):
self.assertFunctionContains0(f, CGer(destructive=False)) self.assertFunctionContains0(f, CGer(destructive=False))
def test_A_plus_outer(self): def test_A_plus_outer(self):
skip_if_blas_ldflags_empty('sger_', 'dger_')
f = self.function([self.A, self.x, self.y], f = self.function([self.A, self.x, self.y],
self.A + tensor.outer(self.x, self.y)) self.A + tensor.outer(self.x, self.y))
self.assertFunctionContains(f, CGer(destructive=False)) self.assertFunctionContains(f, CGer(destructive=False))
self.run_f(f) # DebugMode tests correctness self.run_f(f) # DebugMode tests correctness
def test_A_plus_scaled_outer(self): def test_A_plus_scaled_outer(self):
skip_if_blas_ldflags_empty('sger_', 'dger_')
f = self.function([self.A, self.x, self.y], f = self.function([self.A, self.x, self.y],
self.A + 0.1 * tensor.outer(self.x, self.y)) self.A + 0.1 * tensor.outer(self.x, self.y))
self.assertFunctionContains(f, CGer(destructive=False)) self.assertFunctionContains(f, CGer(destructive=False))
...@@ -113,9 +124,9 @@ class TestCGemv(TestCase, TestOptimizationMixin): ...@@ -113,9 +124,9 @@ class TestCGemv(TestCase, TestOptimizationMixin):
""" """
def setUp(self, dtype='float64'): def setUp(self, dtype='float64'):
if theano.config.blas.ldflags == "": # if theano.config.blas.ldflags == "":
raise SkipTest("This test is useful only when Theano" # raise SkipTest("This test is useful only when Theano"
" is directly linked to blas.") # " is directly linked to blas.")
self.dtype = dtype self.dtype = dtype
self.mode = theano.compile.get_default_mode().including('fast_run') self.mode = theano.compile.get_default_mode().including('fast_run')
# matrix # matrix
...@@ -144,6 +155,7 @@ class TestCGemv(TestCase, TestOptimizationMixin): ...@@ -144,6 +155,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):
assert not numpy.isnan(zval).any() assert not numpy.isnan(zval).any()
def test_optimizations_vm(self): def test_optimizations_vm(self):
skip_if_blas_ldflags_empty('sdot_')
''' Test vector dot matrix ''' ''' Test vector dot matrix '''
f = theano.function([self.x, self.A], f = theano.function([self.x, self.A],
theano.dot(self.x, self.A), theano.dot(self.x, self.A),
...@@ -165,6 +177,7 @@ class TestCGemv(TestCase, TestOptimizationMixin): ...@@ -165,6 +177,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):
numpy.dot(self.xval, self.Aval[::-1, ::-1])) numpy.dot(self.xval, self.Aval[::-1, ::-1]))
def test_optimizations_mv(self): def test_optimizations_mv(self):
skip_if_blas_ldflags_empty('sdot_')
''' Test matrix dot vector ''' ''' Test matrix dot vector '''
f = theano.function([self.A, self.y], f = theano.function([self.A, self.y],
theano.dot(self.A, self.y), theano.dot(self.A, self.y),
...@@ -235,6 +248,7 @@ class TestCGemv(TestCase, TestOptimizationMixin): ...@@ -235,6 +248,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):
numpy.dot(m.get_value(), v1.get_value()) + v2_orig) numpy.dot(m.get_value(), v1.get_value()) + v2_orig)
def test_gemv1(self): def test_gemv1(self):
skip_if_blas_ldflags_empty('sdot_')
self.t_gemv1((3, 2)) self.t_gemv1((3, 2))
self.t_gemv1((1, 2)) self.t_gemv1((1, 2))
self.t_gemv1((0, 2)) self.t_gemv1((0, 2))
...@@ -269,6 +283,7 @@ class TestCGemv(TestCase, TestOptimizationMixin): ...@@ -269,6 +283,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):
self.assertRaises(ValueError, f, A_val, ones_4, ones_6) self.assertRaises(ValueError, f, A_val, ones_4, ones_6)
def test_multiple_inplace(self): def test_multiple_inplace(self):
skip_if_blas_ldflags_empty('sdot_')
x = tensor.dmatrix('x') x = tensor.dmatrix('x')
y = tensor.dvector('y') y = tensor.dvector('y')
z = tensor.dvector('z') z = tensor.dvector('z')
...@@ -292,9 +307,7 @@ class TestCGemvFloat32(TestCase, BaseGemv, TestOptimizationMixin): ...@@ -292,9 +307,7 @@ class TestCGemvFloat32(TestCase, BaseGemv, TestOptimizationMixin):
gemv_inplace = CGemv(inplace=True) gemv_inplace = CGemv(inplace=True)
def setUp(self): def setUp(self):
if theano.config.blas.ldflags == "": skip_if_blas_ldflags_empty('sdot_')
raise SkipTest("This test is useful only when Theano"
" is directly linked to blas.")
class TestCGemvFloat64(TestCase, BaseGemv, TestOptimizationMixin): class TestCGemvFloat64(TestCase, BaseGemv, TestOptimizationMixin):
...@@ -304,10 +317,11 @@ class TestCGemvFloat64(TestCase, BaseGemv, TestOptimizationMixin): ...@@ -304,10 +317,11 @@ class TestCGemvFloat64(TestCase, BaseGemv, TestOptimizationMixin):
gemv_inplace = CGemv(inplace=True) gemv_inplace = CGemv(inplace=True)
def setUp(self): def setUp(self):
if theano.config.blas.ldflags == "": skip_if_blas_ldflags_empty('sdot_')
raise SkipTest("This test is useful only when Theano"
" is directly linked to blas.")
class TestBlasStridesC(TestBlasStrides): class TestBlasStridesC(TestBlasStrides):
mode = mode_blas_opt mode = mode_blas_opt
def test_ger_strides(self):
skip_if_blas_ldflags_empty('dger_')
super(TestBlasStridesC, self).test_ger_strides()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论