Added another optimization for [sd]gemm_,

just by skipping alpha*matrix multiplication when alpha == 1.0. All tests succeed (with blas.ldflags empty) for: * test_abstract_conv.py in theano/tensor/nnet/tests/ * test_blas.py and test_blas_scipy.py in theano/tensor/tests/ I have modified theano/tensor/tests/test_blas_c.py to skip all tests that involves either gemv or ger functions. * Before the modifications, this file executed 44 tests and 34 were skipped. * After the modifications, this file executes 44 tests and 29 are skipped. # $ theano-cache purge && THEANO_FLAGS=blas.ldflags= nosetests --verbose theano/tensor/tests/test_blas_c.py PS: I also tried to execute test_corr.py in theano/tensor/nnet/tests/test_corr.py after removing ldflags checking, but I get many errors in many of the tests (theano outputs not matches ref outputs). So for the moment I have let this file and I will continue investigations tomorrow.

Added another optimization for [sd]gemm_,
61980334 · notoraptor · 9ca9474b · 61980334 · 61980334 · 61980334
--- a/theano/tensor/alt_dgemm.c
+++ b/theano/tensor/alt_dgemm.c
@@ -85,11 +85,14 @@ void dgemm_(char* TRANSA, char* TRANSB,
    PyObject* op_B = alt_op(TRANSB, (PyArrayObject*)matrix_B);
    if(*BETA == 0) {
        PyArray_MatrixProduct2(op_A, op_B, (PyArrayObject*)matrix_C);
-        alt_numpy_double_scalar_matrix_product_in_place(*ALPHA, (PyArrayObject*)matrix_C);
+        if(*ALPHA != 1.0)
+            alt_numpy_double_scalar_matrix_product_in_place(*ALPHA, (PyArrayObject*)matrix_C);
    } else {
        PyArrayObject* op_A_times_op_B = (PyArrayObject*)PyArray_MatrixProduct(op_A, op_B);
-        alt_numpy_double_scalar_matrix_product_in_place(*ALPHA, op_A_times_op_B);
+        if(*ALPHA != 1.0)
-        alt_numpy_double_scalar_matrix_product_in_place(*BETA, (PyArrayObject*)matrix_C);
+            alt_numpy_double_scalar_matrix_product_in_place(*ALPHA, op_A_times_op_B);
+        if(*BETA != 1.0)
+            alt_numpy_double_scalar_matrix_product_in_place(*BETA, (PyArrayObject*)matrix_C);
        alt_numpy_double_matrix_sum(op_A_times_op_B, (PyArrayObject*)matrix_C, (PyArrayObject*)matrix_C);
        Py_XDECREF(op_A_times_op_B);
    }

--- a/theano/tensor/alt_sgemm.c
+++ b/theano/tensor/alt_sgemm.c
@@ -114,11 +114,14 @@ void sgemm_(char* TRANSA, char* TRANSB,
    PyObject* op_B = alt_op(TRANSB, (PyArrayObject*)matrix_B);
    if(*BETA == 0) {
        PyArray_MatrixProduct2(op_A, op_B, (PyArrayObject*)matrix_C);
-        alt_numpy_scalar_matrix_product_in_place(*ALPHA, (PyArrayObject*)matrix_C);
+        if(*ALPHA != 1.0)
+            alt_numpy_scalar_matrix_product_in_place(*ALPHA, (PyArrayObject*)matrix_C);
    } else {
        PyArrayObject* op_A_times_op_B = (PyArrayObject*)PyArray_MatrixProduct(op_A, op_B);
-        alt_numpy_scalar_matrix_product_in_place(*ALPHA, op_A_times_op_B);
+        if(*ALPHA != 1.0)
-        alt_numpy_scalar_matrix_product_in_place(*BETA, (PyArrayObject*)matrix_C);
+            alt_numpy_scalar_matrix_product_in_place(*ALPHA, op_A_times_op_B);
+        if(*BETA != 1.0)
+            alt_numpy_scalar_matrix_product_in_place(*BETA, (PyArrayObject*)matrix_C);
        alt_numpy_matrix_sum(op_A_times_op_B, (PyArrayObject*)matrix_C, (PyArrayObject*)matrix_C);
        Py_XDECREF(op_A_times_op_B);
    }

--- a/theano/tensor/tests/test_blas_c.py
+++ b/theano/tensor/tests/test_blas_c.py
@@ -26,13 +26,20 @@ mode_blas_opt = theano.compile.get_default_mode().including(
    'BlasOpt', 'specialize', 'InplaceBlasOpt', 'c_blas')
+def skip_if_blas_ldflags_empty(*functions_detected):
+    if theano.config.blas.ldflags == "":
+        functions_string = ""
+        if functions_detected:
+            functions_string = " (at least " + (", ".join(functions_detected)) + ")"
+        raise SkipTest("This test is useful only when Theano can access to BLAS functions" + functions_string + ".")
 class TestCGer(TestCase, TestOptimizationMixin):
    def setUp(self, dtype='float64'):
-        if theano.config.blas.ldflags == "":
+        # if theano.config.blas.ldflags == "":
-            raise SkipTest("This test is useful only when Theano"
+            # raise SkipTest("This test is useful only when Theano"
-                           " is directly linked to blas.")
+                           # " is directly linked to blas.")
        self.dtype = dtype
        self.mode = theano.compile.get_default_mode().including('fast_run')
        self.A = tensor.tensor(dtype=dtype, broadcastable=(False, False))
@@ -76,11 +83,13 @@ class TestCGer(TestCase, TestOptimizationMixin):
        self.assertTrue(hash(CGer(False)) != hash(CGer(True)))
    def test_optimization_pipeline(self):
+        skip_if_blas_ldflags_empty('dger_')
        f = self.function([self.x, self.y], tensor.outer(self.x, self.y))
        self.assertFunctionContains(f, CGer(destructive=True))
        f(self.xval, self.yval)  # DebugMode tests correctness
    def test_optimization_pipeline_float(self):
+        skip_if_blas_ldflags_empty('sger_')
        self.setUp('float32')
        f = self.function([self.x, self.y], tensor.outer(self.x, self.y))
        self.assertFunctionContains(f, CGer(destructive=True))
@@ -93,12 +102,14 @@ class TestCGer(TestCase, TestOptimizationMixin):
        self.assertFunctionContains0(f, CGer(destructive=False))
    def test_A_plus_outer(self):
+        skip_if_blas_ldflags_empty('sger_', 'dger_')
        f = self.function([self.A, self.x, self.y],
                self.A + tensor.outer(self.x, self.y))
        self.assertFunctionContains(f, CGer(destructive=False))
        self.run_f(f)  # DebugMode tests correctness
    def test_A_plus_scaled_outer(self):
+        skip_if_blas_ldflags_empty('sger_', 'dger_')
        f = self.function([self.A, self.x, self.y],
                self.A + 0.1 * tensor.outer(self.x, self.y))
        self.assertFunctionContains(f, CGer(destructive=False))
@@ -113,9 +124,9 @@ class TestCGemv(TestCase, TestOptimizationMixin):
    """
    def setUp(self, dtype='float64'):
-        if theano.config.blas.ldflags == "":
+        # if theano.config.blas.ldflags == "":
-            raise SkipTest("This test is useful only when Theano"
+            # raise SkipTest("This test is useful only when Theano"
-                           " is directly linked to blas.")
+                           # " is directly linked to blas.")
        self.dtype = dtype
        self.mode = theano.compile.get_default_mode().including('fast_run')
        # matrix
@@ -144,6 +155,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):
        assert not numpy.isnan(zval).any()
    def test_optimizations_vm(self):
+        skip_if_blas_ldflags_empty('sdot_')
        ''' Test vector dot matrix '''
        f = theano.function([self.x, self.A],
                theano.dot(self.x, self.A),
@@ -165,6 +177,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):
                numpy.dot(self.xval, self.Aval[::-1, ::-1]))
    def test_optimizations_mv(self):
+        skip_if_blas_ldflags_empty('sdot_')
        ''' Test matrix dot vector '''
        f = theano.function([self.A, self.y],
                theano.dot(self.A, self.y),
@@ -235,6 +248,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):
            numpy.dot(m.get_value(), v1.get_value()) + v2_orig)
    def test_gemv1(self):
+        skip_if_blas_ldflags_empty('sdot_')
        self.t_gemv1((3, 2))
        self.t_gemv1((1, 2))
        self.t_gemv1((0, 2))
@@ -269,6 +283,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):
        self.assertRaises(ValueError, f, A_val, ones_4, ones_6)
    def test_multiple_inplace(self):
+        skip_if_blas_ldflags_empty('sdot_')
        x = tensor.dmatrix('x')
        y = tensor.dvector('y')
        z = tensor.dvector('z')
@@ -292,9 +307,7 @@ class TestCGemvFloat32(TestCase, BaseGemv, TestOptimizationMixin):
    gemv_inplace = CGemv(inplace=True)
    def setUp(self):
-        if theano.config.blas.ldflags == "":
+        skip_if_blas_ldflags_empty('sdot_')
-            raise SkipTest("This test is useful only when Theano"
-                           " is directly linked to blas.")
 class TestCGemvFloat64(TestCase, BaseGemv, TestOptimizationMixin):
@@ -304,10 +317,11 @@ class TestCGemvFloat64(TestCase, BaseGemv, TestOptimizationMixin):
    gemv_inplace = CGemv(inplace=True)
    def setUp(self):
-        if theano.config.blas.ldflags == "":
+        skip_if_blas_ldflags_empty('sdot_')
-            raise SkipTest("This test is useful only when Theano"
-                           " is directly linked to blas.")
 class TestBlasStridesC(TestBlasStrides):
    mode = mode_blas_opt
+    def test_ger_strides(self):
+        skip_if_blas_ldflags_empty('dger_')
+        super(TestBlasStridesC, self).test_ger_strides()