提交 2ecf9f1d authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #6367 from notoraptor/fallback-gemv

Add fallback implementation for BLAS [sd]gemv_.
......@@ -444,7 +444,7 @@ def gemv_c_code(y, A, x, z, alpha, beta, fail,
dtype_%(x)s* x_data = (dtype_%(x)s*) PyArray_DATA(%(x)s);
dtype_%(z)s* z_data = (dtype_%(z)s*) PyArray_DATA(%(z)s);
// gemv expects pointers to the beginning of memory arrays,
// but numpy provides provides a pointer to the first element,
// but numpy provides a pointer to the first element,
// so when the stride is negative, we need to get the last one.
if (Sx < 0)
x_data += (NA1 - 1) * Sx;
......
......@@ -731,29 +731,26 @@ def cblas_header_text():
def blas_header_text():
"""C header for the fortran blas interface"""
gemm_code = ""
const = "const"
blas_code = ""
if not config.blas.ldflags:
# Include the Numpy version implementation of [sd]gemm_.
current_filedir = dirname(__file__)
gemm_common_filepath = os.path.join(current_filedir, 'c_code', 'alt_gemm_common.c')
gemm_template_filepath = os.path.join(current_filedir, 'c_code', 'alt_gemm_template.c')
blas_common_filepath = os.path.join(current_filedir, 'c_code', 'alt_blas_common.h')
blas_template_filepath = os.path.join(current_filedir, 'c_code', 'alt_blas_template.c')
common_code = ""
sgemm_code = ""
dgemm_code = ""
with open(gemm_common_filepath) as code:
sblas_code = ""
dblas_code = ""
with open(blas_common_filepath) as code:
common_code = code.read()
with open(gemm_template_filepath) as code:
with open(blas_template_filepath) as code:
template_code = code.read()
sgemm_code = template_code % {"float_type": "float", "float_size": 4, "npy_float": "NPY_FLOAT32", "name": "sgemm_"}
dgemm_code = template_code % {"float_type": "double", "float_size": 8, "npy_float": "NPY_FLOAT64", "name": "dgemm_"}
if not common_code or not sgemm_code:
raise IOError("Unable to load NumPy implementation of gemm code from C source files.")
else:
const = ""
gemm_code += common_code
gemm_code += sgemm_code
gemm_code += dgemm_code
sblas_code = template_code % {"float_type": "float", "float_size": 4, "npy_float": "NPY_FLOAT32", "precision": "s"}
dblas_code = template_code % {"float_type": "double", "float_size": 8, "npy_float": "NPY_FLOAT64", "precision": "d"}
if not common_code or not template_code:
raise IOError("Unable to load NumPy implementation of BLAS functions from C source files.")
blas_code += common_code
blas_code += sblas_code
blas_code += dblas_code
header = """
extern "C"
......@@ -916,7 +913,7 @@ def blas_header_text():
/* Single Precision */
void sgemm_(char*, char*, const int*, const int*, const int*, const float *, %(const)s float *, const int*, %(const)s float *, const int*, const float *, float *, const int*);
void sgemm_(char*, char*, const int*, const int*, const int*, const float *, const float *, const int*, const float *, const int*, const float *, float *, const int*);
void ssymm_(char*, char*, const int*, const int*, const float *, const float *, const int*, const float *, const int*, const float *, float *, const int*);
void ssyrk_(char*, char*, const int*, const int*, const float *, const float *, const int*, const float *, float *, const int*);
void ssyr2k_(char*, char*, const int*, const int*, const float *, const float *, const int*, const float *, const int*, const float *, float *, const int*);
......@@ -925,7 +922,7 @@ def blas_header_text():
/* Double Precision */
void dgemm_(char*, char*, const int*, const int*, const int*, const double *, %(const)s double *, const int*, %(const)s double *, const int*, const double *, double *, const int*);
void dgemm_(char*, char*, const int*, const int*, const int*, const double *, const double *, const int*, const double *, const int*, const double *, double *, const int*);
void dsymm_(char*, char*, const int*, const int*, const double *, const double *, const int*, const double *, const int*, const double *, double *, const int*);
void dsyrk_(char*, char*, const int*, const int*, const double *, const double *, const int*, const double *, double *, const int*);
void dsyr2k_(char*, char*, const int*, const int*, const double *, const double *, const int*, const double *, const int*, const double *, double *, const int*);
......@@ -984,7 +981,11 @@ def blas_header_text():
}
""")
return (header % {'const': const}) + gemm_code
return header + blas_code
if not config.blas.ldflags:
_logger.warning('Using NumPy C-API based implementation for BLAS functions.')
def mkl_threads_text():
......@@ -1032,7 +1033,7 @@ def openblas_threads_text():
def blas_header_version():
# Version for the base header
version = (2,)
version = (5,)
if detect_macos_sdot_bug():
if detect_macos_sdot_bug.fix_works:
# Version with fix
......
/** C Implementation (with NumPy back-end) of BLAS functions used in Theano.
* Used instead of BLAS when Theano flag ``blas.ldflags`` is empty.
* This file contains some useful header code not templated.
* File alt_blas_template.c currently contains template code for:
* - [sd]gemm_
* - [sd]gemv_
* - [sd]dot_
**/
#define alt_fatal_error(message) { if (PyErr_Occurred()) PyErr_Print(); if(message != NULL) fprintf(stderr, message); exit(-1); }
#define alt_trans_to_bool(trans) (*trans != 'N' && *trans != 'n')
/**Template code for BLAS functions follows in file alt_blas_template.c
* (as Python string to be used with old formatting).
* PARAMETERS:
* float_type: "float" or "double".
* float_size: 4 for float32 (sgemm_), 8 for float64 (dgemm_).
* npy_float: "NPY_FLOAT32" or "NPY_FLOAT64".
* precision: "s" for single, "d" for double.
* See blas_headers.py for current use.**/
/** C Implementation of [sd]gemm_ based on NumPy
* Used instead of blas when Theano config flag blas.ldflags is empty.
* This file contains the common code for [sd]gemm_.
* File alt_gemm_template.c contains template code for [sd]gemm_. **/
#define alt_fatal_error(message) { if(message != NULL) fprintf(stderr, message); exit(-1); }
#define alt_trans_to_bool(trans) (*trans != 'N' && *trans != 'n')
/**Template code for [sd]gemm_ follows in file alt_gemm_template.c
* (as Python string to be used with old formatting).
* PARAMETERS:
* float_type: "float" for sgemm_, "double" for dgemm_.
* float_size: 4 for float32 (sgemm_), 8 for float64 (dgemm_).
* npy_float: "NPY_FLOAT32" for sgemm_, "NPY_FLOAT64" for dgemm_.
* name: "sgemm_" for sgemm_, "dgemm_" for dgemm_.
* See blas_headers.py for current use.**/
......@@ -316,5 +316,92 @@ class TestCGemvFloat64(TestCase, BaseGemv, TestOptimizationMixin):
skip_if_blas_ldflags_empty()
class TestCGemvNoFlags(object):
mode = mode_blas_opt
gemv = CGemv(inplace=False)
M = 4
N = 5
slice_step = 3
def setUp(self):
unittest_tools.seed_rng()
def get_function(self, dtype, transpose_A=False, slice_tensors=False):
alpha = theano.tensor.scalar(dtype=dtype)
beta = theano.tensor.scalar(dtype=dtype)
A = theano.tensor.matrix(dtype=dtype)
x = theano.tensor.vector(dtype=dtype)
y = theano.tensor.vector(dtype=dtype)
if transpose_A:
A_1 = A.T
else:
A_1 = A
if slice_tensors:
A_2 = A_1[::-self.slice_step]
x_2 = x[::-self.slice_step]
y_2 = y[::-self.slice_step]
else:
A_2 = A_1
x_2 = x
y_2 = y
return theano.function([alpha, A, x, beta, y], self.gemv(y_2, alpha, A_2, x_2, beta))
def get_data(self, dtype, alpha, beta, transpose_A=False, slice_tensors=False):
if slice_tensors:
if transpose_A:
A_shape = (self.N, self.M * self.slice_step)
else:
A_shape = (self.M * self.slice_step, self.N)
x_shape = (self.N * self.slice_step,)
y_shape = (self.M * self.slice_step,)
else:
if transpose_A:
A_shape = (self.N, self.M)
else:
A_shape = (self.M, self.N)
x_shape = (self.N,)
y_shape = (self.M,)
A = np.random.random(A_shape).astype(dtype)
x = np.random.random(x_shape).astype(dtype)
y = np.random.random(y_shape).astype(dtype)
return (alpha, A, x, beta, y)
def compute_ref(self, alpha, A, x, beta, y, transpose_A, slice_tensors):
if transpose_A:
A = A.T
if slice_tensors:
A = A[::-self.slice_step]
x = x[::-self.slice_step]
y = y[::-self.slice_step]
ref_val = alpha * np.dot(A, x)
if beta != 0:
ref_val += beta * y
return ref_val
@theano.change_flags({'blas.ldflags': ''})
def run_cgemv(self, dtype, ALPHA, BETA, transpose_A, slice_tensors):
f = self.get_function(dtype, transpose_A=transpose_A, slice_tensors=slice_tensors)
values = self.get_data(dtype, ALPHA, BETA, transpose_A=transpose_A, slice_tensors=slice_tensors)
assert any(isinstance(node.op, CGemv) for node in f.maker.fgraph.apply_nodes)
z_val = f(*values)
assert z_val.dtype == dtype
assert z_val.ndim == 1
assert z_val.shape[0] == self.M
ref_val = self.compute_ref(*(values + (transpose_A, slice_tensors)))
unittest_tools.assert_allclose(ref_val, z_val)
def test_cgemv(self):
for dtype in ('float32', 'float64'):
for alpha in (0, 1, -2):
for beta in (0, 1, -2):
for transpose_A in (False, True):
for slice_tensors in (False, True):
yield (self.run_cgemv, dtype, alpha, beta, transpose_A, slice_tensors)
class TestSdotNoFlags(TestCGemvNoFlags):
M = 1
class TestBlasStridesC(TestBlasStrides):
mode = mode_blas_opt
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论