提交 243857c5 authored 作者: notoraptor's avatar notoraptor

Add tests for fallback and update code to fix them.

上级 67341d9e
...@@ -444,7 +444,7 @@ def gemv_c_code(y, A, x, z, alpha, beta, fail, ...@@ -444,7 +444,7 @@ def gemv_c_code(y, A, x, z, alpha, beta, fail,
dtype_%(x)s* x_data = (dtype_%(x)s*) PyArray_DATA(%(x)s); dtype_%(x)s* x_data = (dtype_%(x)s*) PyArray_DATA(%(x)s);
dtype_%(z)s* z_data = (dtype_%(z)s*) PyArray_DATA(%(z)s); dtype_%(z)s* z_data = (dtype_%(z)s*) PyArray_DATA(%(z)s);
// gemv expects pointers to the beginning of memory arrays, // gemv expects pointers to the beginning of memory arrays,
// but numpy provides provides a pointer to the first element, // but numpy provides a pointer to the first element,
// so when the stride is negative, we need to get the last one. // so when the stride is negative, we need to get the last one.
if (Sx < 0) if (Sx < 0)
x_data += (NA1 - 1) * Sx; x_data += (NA1 - 1) * Sx;
......
...@@ -1033,7 +1033,7 @@ def openblas_threads_text(): ...@@ -1033,7 +1033,7 @@ def openblas_threads_text():
def blas_header_version(): def blas_header_version():
# Version for the base header # Version for the base header
version = (4,) version = (5,)
if detect_macos_sdot_bug(): if detect_macos_sdot_bug():
if detect_macos_sdot_bug.fix_works: if detect_macos_sdot_bug.fix_works:
# Version with fix # Version with fix
......
...@@ -203,27 +203,40 @@ void %(precision)sgemv_( ...@@ -203,27 +203,40 @@ void %(precision)sgemv_(
x, y are vectors x, y are vectors
ALPHA, BETA are scalars ALPHA, BETA are scalars
**/ **/
if (*M < 0 || *N < 0 || *LDA < 0 || *incx < 0 || *incy < 0)
alt_fatal_error("The integer arguments passed to %(precision)sgemv_ must all be at least 0."); // If alpha == 0 and beta == 1, we have nothing to do, as alpha*A*x + beta*y == y.
if (*ALPHA == 0 && *BETA == 1)
return;
if (*M < 0 || *N < 0 || *LDA < 0)
alt_fatal_error("NumPy %(precision)sgemv_ implementation: M, N and LDA must be at least 0.");
if (*incx == 0 || *incy == 0)
alt_fatal_error("NumPy %(precision)sgemv_ implementation: incx and incy must not be 0.");
int transpose = alt_trans_to_bool(TRANS); int transpose = alt_trans_to_bool(TRANS);
int size_x = 0, size_y = 0;
if (transpose) {
size_x = *M;
size_y = *N;
} else {
size_x = *N;
size_y = *M;
}
if (*M == 0 || *N == 0) { if (*M == 0 || *N == 0) {
/* A contains M * N == 0 values. y should be empty too, and we have nothing to do. */ /* A contains M * N == 0 values. y should be empty too, and we have nothing to do. */
if ((transpose && *N != 0) || (!transpose && *M != 0)) if (size_y != 0)
alt_fatal_error("NumPy %(precision)sgemv_ implementation: the output vector should be empty."); alt_fatal_error("NumPy %(precision)sgemv_ implementation: the output vector should be empty.");
return; return;
} }
/* Vector pointers points to the begining of memory (see function `theano.tensor.blas_c.gemv_c_code`).
* NumPy seems to expect that pointers points to the first element of the array. */
if (*incx < 0)
x += (size_x - 1) * (-*incx);
if (*incy < 0)
y += (size_y - 1) * (-*incy);
PyObject* matrixA = alt_op_%(float_type)s(transpose, A, *M, *N, *LDA, 0); PyObject* matrixA = alt_op_%(float_type)s(transpose, A, *M, *N, *LDA, 0);
PyObject* matrixX = NULL; PyObject* matrixX = alt_op_%(float_type)s(1, x, 1, size_x, *incx, 0);
PyObject* matrixY = NULL; PyObject* matrixY = alt_op_%(float_type)s(1, y, 1, size_y, *incy, NPY_ARRAY_WRITEABLE);
if (transpose) {
matrixX = alt_op_%(float_type)s(1, x, 1, *M, *incx, 0);
matrixY = alt_op_%(float_type)s(1, y, 1, *N, *incy, NPY_ARRAY_WRITEABLE);
} else {
matrixX = alt_op_%(float_type)s(1, x, 1, *N, *incx, 0);
matrixY = alt_op_%(float_type)s(1, y, 1, *M, *incy, NPY_ARRAY_WRITEABLE);
};
if (matrixA == NULL || matrixX == NULL || matrixY == NULL) if (matrixA == NULL || matrixX == NULL || matrixY == NULL)
alt_fatal_error("NumPy %(precision)sgemv_: unable to wrap A, x or y arrays.") alt_fatal_error("NumPy %(precision)sgemv_ implementation: unable to wrap A, x or y arrays.")
if (*ALPHA == 0) { if (*ALPHA == 0) {
// Just BETA * y // Just BETA * y
alt_numpy_scale_matrix_inplace_%(float_type)s(BETA, (PyArrayObject*)matrixY); alt_numpy_scale_matrix_inplace_%(float_type)s(BETA, (PyArrayObject*)matrixY);
...@@ -268,22 +281,33 @@ void %(precision)sgemv_( ...@@ -268,22 +281,33 @@ void %(precision)sgemv_(
%(float_type)s *SY, %(float_type)s *SY,
const int *INCY const int *INCY
) { ) {
if (*N < 0 || *INCX < 0 || *INCY < 0) if (*N < 0)
alt_fatal_error("The integer arguments passed to %(precision)sdot_ must all be at least 0."); alt_fatal_error("NumPy %(precision)sdot_ implementation: N must be at least 0.");
if (*INCX == 0 || *INCY == 0)
alt_fatal_error("NumPy %(precision)sdot_ implementation: INCX and INCY must not be 0.");
%(float_type)s result = 0;
int one = 1;
/* Vector pointers points to the begining of memory (see function `theano.tensor.blas_c.gemv_c_code`).
* NumPy seems to expect that pointers points to the first element of the array. */
if (*INCX < 0)
SX += (*N - 1) * (-*INCX);
if (*INCY < 0)
SY += (*N - 1) * (-*INCY);
// Create vector_x with shape (1, N) // Create vector_x with shape (1, N)
PyObject* vector_x = alt_op_%(float_type)s(0, SX, 1, *N, *INCX, 0); PyObject* vector_x = alt_op_%(float_type)s(0, SX, 1, *N, *INCX, 0);
// Create vector_y with shape (N, 1) // Create vector_y with shape (N, 1)
PyObject* vector_y = alt_op_%(float_type)s(1, SY, 1, *N, *INCY, 0); PyObject* vector_y = alt_op_%(float_type)s(1, SY, 1, *N, *INCY, 0);
// Create output scalar z with shape (1, 1) to wrap `result`.
PyArrayObject* dot_product = (PyArrayObject*)alt_wrap_fortran_writeable_matrix_%(float_type)s(&result, &one, &one, &one);
if (vector_x == NULL || vector_y == NULL) if (vector_x == NULL || vector_y == NULL || dot_product == NULL)
alt_fatal_error("NumPy %(precision)sdot_: unable to wrap x and y arrays."); alt_fatal_error("NumPy %(precision)sdot_ implementation: unable to wrap x, y or output arrays.");
// Make matrix product: (1, N) * (N, 1) => (1, 1) // Compute matrix product: (1, N) * (N, 1) => (1, 1)
PyObject* dot_product = PyArray_MatrixProduct(vector_x, vector_y); PyArray_MatrixProduct2(vector_x, vector_y, dot_product);
if (dot_product == NULL) if (PyErr_Occurred())
alt_fatal_error("NumPy %(precision)sdot_: unable to compute dot."); alt_fatal_error("NumPy %(precision)sdot_ implementation: unable to compute dot.");
// Get result. // Get result.
%(float_type)s result = *(%(float_type)s*)PyArray_DATA((PyArrayObject*)dot_product);
Py_XDECREF(dot_product); Py_XDECREF(dot_product);
Py_XDECREF(vector_y); Py_XDECREF(vector_y);
Py_XDECREF(vector_x); Py_XDECREF(vector_x);
......
...@@ -316,5 +316,92 @@ class TestCGemvFloat64(TestCase, BaseGemv, TestOptimizationMixin): ...@@ -316,5 +316,92 @@ class TestCGemvFloat64(TestCase, BaseGemv, TestOptimizationMixin):
skip_if_blas_ldflags_empty() skip_if_blas_ldflags_empty()
class TestCGemvNoFlags(object):
mode = mode_blas_opt
gemv = CGemv(inplace=False)
M = 4
N = 5
slice_step = 3
def setUp(self):
unittest_tools.seed_rng()
def get_function(self, dtype, transpose_A=False, slice_tensors=False):
alpha = theano.tensor.scalar(dtype=dtype)
beta = theano.tensor.scalar(dtype=dtype)
A = theano.tensor.matrix(dtype=dtype)
x = theano.tensor.vector(dtype=dtype)
y = theano.tensor.vector(dtype=dtype)
if transpose_A:
A_1 = A.T
else:
A_1 = A
if slice_tensors:
A_2 = A_1[::-self.slice_step]
x_2 = x[::-self.slice_step]
y_2 = y[::-self.slice_step]
else:
A_2 = A_1
x_2 = x
y_2 = y
return theano.function([alpha, A, x, beta, y], self.gemv(y_2, alpha, A_2, x_2, beta))
def get_data(self, dtype, alpha, beta, transpose_A=False, slice_tensors=False):
if slice_tensors:
if transpose_A:
A_shape = (self.N, self.M * self.slice_step)
else:
A_shape = (self.M * self.slice_step, self.N)
x_shape = (self.N * self.slice_step,)
y_shape = (self.M * self.slice_step,)
else:
if transpose_A:
A_shape = (self.N, self.M)
else:
A_shape = (self.M, self.N)
x_shape = (self.N,)
y_shape = (self.M,)
A = np.random.random(A_shape).astype(dtype)
x = np.random.random(x_shape).astype(dtype)
y = np.random.random(y_shape).astype(dtype)
return (alpha, A, x, beta, y)
def compute_ref(self, alpha, A, x, beta, y, transpose_A, slice_tensors):
if transpose_A:
A = A.T
if slice_tensors:
A = A[::-self.slice_step]
x = x[::-self.slice_step]
y = y[::-self.slice_step]
ref_val = alpha * np.dot(A, x)
if beta != 0:
ref_val += beta * y
return ref_val
@theano.change_flags({'blas.ldflags': ''})
def run_cgemv(self, dtype, ALPHA, BETA, transpose_A, slice_tensors):
f = self.get_function(dtype, transpose_A=transpose_A, slice_tensors=slice_tensors)
values = self.get_data(dtype, ALPHA, BETA, transpose_A=transpose_A, slice_tensors=slice_tensors)
assert any(isinstance(node.op, CGemv) for node in f.maker.fgraph.apply_nodes)
z_val = f(*values)
assert z_val.dtype == dtype
assert z_val.ndim == 1
assert z_val.shape[0] == self.M
ref_val = self.compute_ref(*(values + (transpose_A, slice_tensors)))
unittest_tools.assert_allclose(ref_val, z_val)
def test_cgemv(self):
for dtype in ('float32', 'float64'):
for alpha in (0, 1, -2):
for beta in (0, 1, -2):
for transpose_A in (False, True):
for slice_tensors in (False, True):
yield (self.run_cgemv, dtype, alpha, beta, transpose_A, slice_tensors)
class TestSdotNoFlags(TestCGemvNoFlags):
M = 1
class TestBlasStridesC(TestBlasStrides): class TestBlasStridesC(TestBlasStrides):
mode = mode_blas_opt mode = mode_blas_opt
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论