Add tests for fallback and update code to fix them.

243857c5 · notoraptor · 67341d9e · 243857c5 · 243857c5 · 243857c5
--- a/theano/tensor/blas_c.py
+++ b/theano/tensor/blas_c.py
@@ -444,7 +444,7 @@ def gemv_c_code(y, A, x, z, alpha, beta, fail,
        dtype_%(x)s* x_data = (dtype_%(x)s*) PyArray_DATA(%(x)s);
        dtype_%(z)s* z_data = (dtype_%(z)s*) PyArray_DATA(%(z)s);
        // gemv expects pointers to the beginning of memory arrays,
-        // but numpy provides provides a pointer to the first element,
+        // but numpy provides a pointer to the first element,
        // so when the stride is negative, we need to get the last one.
        if (Sx < 0)
            x_data += (NA1 - 1) * Sx;

--- a/theano/tensor/blas_headers.py
+++ b/theano/tensor/blas_headers.py
@@ -1033,7 +1033,7 @@ def openblas_threads_text():
 def blas_header_version():
    # Version for the base header
-    version = (4,)
+    version = (5,)
    if detect_macos_sdot_bug():
        if detect_macos_sdot_bug.fix_works:
            # Version with fix

--- a/theano/tensor/c_code/alt_blas_template.c
+++ b/theano/tensor/c_code/alt_blas_template.c
@@ -203,27 +203,40 @@ void %(precision)sgemv_(
    x, y are vectors
    ALPHA, BETA are scalars
    **/
-    if (*M < 0 || *N < 0 || *LDA < 0 || *incx < 0 || *incy < 0)
-        alt_fatal_error("The integer arguments passed to %(precision)sgemv_ must all be at least 0.");
+    // If alpha == 0 and beta == 1, we have nothing to do, as alpha*A*x + beta*y == y.
+    if (*ALPHA == 0 && *BETA == 1)
+        return;
+    if (*M < 0 || *N < 0 || *LDA < 0)
+        alt_fatal_error("NumPy %(precision)sgemv_ implementation: M, N and LDA must be at least 0.");
+    if (*incx == 0 || *incy == 0)
+        alt_fatal_error("NumPy %(precision)sgemv_ implementation: incx and incy must not be 0.");
    int transpose = alt_trans_to_bool(TRANS);
+    int size_x = 0, size_y = 0;
+    if (transpose) {
+        size_x = *M;
+        size_y = *N;
+    } else {
+        size_x = *N;
+        size_y = *M;
+    }
    if (*M == 0 || *N == 0) {
        /* A contains M * N == 0 values. y should be empty too, and we have nothing to do. */
-        if ((transpose && *N != 0) || (!transpose && *M != 0))
+        if (size_y != 0)
            alt_fatal_error("NumPy %(precision)sgemv_ implementation: the output vector should be empty.");
        return;
    }
+    /* Vector pointers points to the begining of memory (see function `theano.tensor.blas_c.gemv_c_code`).
+     * NumPy seems to expect that pointers points to the first element of the array. */
+    if (*incx < 0)
+        x += (size_x - 1) * (-*incx);
+    if (*incy < 0)
+        y += (size_y - 1) * (-*incy);
    PyObject* matrixA = alt_op_%(float_type)s(transpose, A, *M, *N, *LDA, 0);
-    PyObject* matrixX = NULL;
+    PyObject* matrixX = alt_op_%(float_type)s(1, x, 1, size_x, *incx, 0);
-    PyObject* matrixY = NULL;
+    PyObject* matrixY = alt_op_%(float_type)s(1, y, 1, size_y, *incy, NPY_ARRAY_WRITEABLE);
-    if (transpose) {
-        matrixX = alt_op_%(float_type)s(1, x, 1, *M, *incx, 0);
-        matrixY = alt_op_%(float_type)s(1, y, 1, *N, *incy, NPY_ARRAY_WRITEABLE);
-    } else {
-        matrixX = alt_op_%(float_type)s(1, x, 1, *N, *incx, 0);
-        matrixY = alt_op_%(float_type)s(1, y, 1, *M, *incy, NPY_ARRAY_WRITEABLE);
-    };
    if (matrixA == NULL || matrixX == NULL || matrixY == NULL)
-        alt_fatal_error("NumPy %(precision)sgemv_: unable to wrap A, x or y arrays.")
+        alt_fatal_error("NumPy %(precision)sgemv_ implementation: unable to wrap A, x or y arrays.")
    if (*ALPHA == 0) {
        // Just BETA * y
        alt_numpy_scale_matrix_inplace_%(float_type)s(BETA, (PyArrayObject*)matrixY);
@@ -268,22 +281,33 @@ void %(precision)sgemv_(
    %(float_type)s *SY,
    const int *INCY
 ) {
-    if (*N < 0 || *INCX < 0 || *INCY < 0)
+    if (*N < 0)
-        alt_fatal_error("The integer arguments passed to %(precision)sdot_ must all be at least 0.");
+        alt_fatal_error("NumPy %(precision)sdot_ implementation: N must be at least 0.");
+    if (*INCX == 0 || *INCY == 0)
+        alt_fatal_error("NumPy %(precision)sdot_ implementation: INCX and INCY must not be 0.");
+    %(float_type)s result = 0;
+    int one = 1;
+    /* Vector pointers points to the begining of memory (see function `theano.tensor.blas_c.gemv_c_code`).
+     * NumPy seems to expect that pointers points to the first element of the array. */
+    if (*INCX < 0)
+        SX += (*N - 1) * (-*INCX);
+    if (*INCY < 0)
+        SY += (*N - 1) * (-*INCY);
    // Create vector_x with shape (1, N)
    PyObject* vector_x = alt_op_%(float_type)s(0, SX, 1, *N, *INCX, 0);
    // Create vector_y with shape (N, 1)
    PyObject* vector_y = alt_op_%(float_type)s(1, SY, 1, *N, *INCY, 0);
+    // Create output scalar z with shape (1, 1) to wrap `result`.
+    PyArrayObject* dot_product = (PyArrayObject*)alt_wrap_fortran_writeable_matrix_%(float_type)s(&result, &one, &one, &one);
-    if (vector_x == NULL || vector_y == NULL)
+    if (vector_x == NULL || vector_y == NULL || dot_product == NULL)
-        alt_fatal_error("NumPy %(precision)sdot_: unable to wrap x and y arrays.");
+        alt_fatal_error("NumPy %(precision)sdot_ implementation: unable to wrap x, y or output arrays.");
-    // Make matrix product: (1, N) * (N, 1) => (1, 1)
+    // Compute matrix product: (1, N) * (N, 1) => (1, 1)
-    PyObject* dot_product = PyArray_MatrixProduct(vector_x, vector_y);
+    PyArray_MatrixProduct2(vector_x, vector_y, dot_product);
-    if (dot_product == NULL)
+    if (PyErr_Occurred())
-        alt_fatal_error("NumPy %(precision)sdot_: unable to compute dot.");
+        alt_fatal_error("NumPy %(precision)sdot_ implementation: unable to compute dot.");
    // Get result.
-    %(float_type)s result = *(%(float_type)s*)PyArray_DATA((PyArrayObject*)dot_product);
    Py_XDECREF(dot_product);
    Py_XDECREF(vector_y);
    Py_XDECREF(vector_x);

--- a/theano/tensor/tests/test_blas_c.py
+++ b/theano/tensor/tests/test_blas_c.py
@@ -316,5 +316,92 @@ class TestCGemvFloat64(TestCase, BaseGemv, TestOptimizationMixin):
        skip_if_blas_ldflags_empty()
+class TestCGemvNoFlags(object):
+    mode = mode_blas_opt
+    gemv = CGemv(inplace=False)
+    M = 4
+    N = 5
+    slice_step = 3
+    def setUp(self):
+        unittest_tools.seed_rng()
+    def get_function(self, dtype, transpose_A=False, slice_tensors=False):
+        alpha = theano.tensor.scalar(dtype=dtype)
+        beta = theano.tensor.scalar(dtype=dtype)
+        A = theano.tensor.matrix(dtype=dtype)
+        x = theano.tensor.vector(dtype=dtype)
+        y = theano.tensor.vector(dtype=dtype)
+        if transpose_A:
+            A_1 = A.T
+        else:
+            A_1 = A
+        if slice_tensors:
+            A_2 = A_1[::-self.slice_step]
+            x_2 = x[::-self.slice_step]
+            y_2 = y[::-self.slice_step]
+        else:
+            A_2 = A_1
+            x_2 = x
+            y_2 = y
+        return theano.function([alpha, A, x, beta, y], self.gemv(y_2, alpha, A_2, x_2, beta))
+    def get_data(self, dtype, alpha, beta, transpose_A=False, slice_tensors=False):
+        if slice_tensors:
+            if transpose_A:
+                A_shape = (self.N, self.M * self.slice_step)
+            else:
+                A_shape = (self.M * self.slice_step, self.N)
+            x_shape = (self.N * self.slice_step,)
+            y_shape = (self.M * self.slice_step,)
+        else:
+            if transpose_A:
+                A_shape = (self.N, self.M)
+            else:
+                A_shape = (self.M, self.N)
+            x_shape = (self.N,)
+            y_shape = (self.M,)
+        A = np.random.random(A_shape).astype(dtype)
+        x = np.random.random(x_shape).astype(dtype)
+        y = np.random.random(y_shape).astype(dtype)
+        return (alpha, A, x, beta, y)
+    def compute_ref(self, alpha, A, x, beta, y, transpose_A, slice_tensors):
+        if transpose_A:
+            A = A.T
+        if slice_tensors:
+            A = A[::-self.slice_step]
+            x = x[::-self.slice_step]
+            y = y[::-self.slice_step]
+        ref_val = alpha * np.dot(A, x)
+        if beta != 0:
+            ref_val += beta * y
+        return ref_val
+    @theano.change_flags({'blas.ldflags': ''})
+    def run_cgemv(self, dtype, ALPHA, BETA, transpose_A, slice_tensors):
+        f = self.get_function(dtype, transpose_A=transpose_A, slice_tensors=slice_tensors)
+        values = self.get_data(dtype, ALPHA, BETA, transpose_A=transpose_A, slice_tensors=slice_tensors)
+        assert any(isinstance(node.op, CGemv) for node in f.maker.fgraph.apply_nodes)
+        z_val = f(*values)
+        assert z_val.dtype == dtype
+        assert z_val.ndim == 1
+        assert z_val.shape[0] == self.M
+        ref_val = self.compute_ref(*(values + (transpose_A, slice_tensors)))
+        unittest_tools.assert_allclose(ref_val, z_val)
+    def test_cgemv(self):
+        for dtype in ('float32', 'float64'):
+            for alpha in (0, 1, -2):
+                for beta in (0, 1, -2):
+                    for transpose_A in (False, True):
+                        for slice_tensors in (False, True):
+                            yield (self.run_cgemv, dtype, alpha, beta, transpose_A, slice_tensors)
+class TestSdotNoFlags(TestCGemvNoFlags):
+    M = 1
 class TestBlasStridesC(TestBlasStrides):
    mode = mode_blas_opt