提交 bb25e997 authored 作者: notoraptor's avatar notoraptor

New update.

Some commented codes about empty ldflags have been removed and replaced with normal comments. Some changes are made in alt_gemm_template.c relative to last recommendations.
上级 70896325
/** %(name)s **/ /** %(name)s **/
void alt_numpy_scalar_matrix_product_in_place_%(float_type)s(%(float_type)s scalar, PyArrayObject* matrix) { void alt_numpy_scale_matrix_inplace_%(float_type)s(%(float_type)s scalar, PyArrayObject* matrix) {
NpyIter* iterator = NpyIter_New(matrix, NpyIter* iterator = NpyIter_New(matrix,
NPY_ITER_READWRITE | NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK, NPY_ITER_READWRITE | NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK,
NPY_KEEPORDER, NPY_NO_CASTING, NULL); NPY_KEEPORDER, NPY_NO_CASTING, NULL);
...@@ -25,7 +25,7 @@ void alt_numpy_scalar_matrix_product_in_place_%(float_type)s(%(float_type)s scal ...@@ -25,7 +25,7 @@ void alt_numpy_scalar_matrix_product_in_place_%(float_type)s(%(float_type)s scal
/*Matrix+Matrix function. Compute (coeffA * matrixA) + (coeffB * matrixB) /*Matrix+Matrix function. Compute (coeffA * matrixA) + (coeffB * matrixB)
* Remark: This function actually sums a C-contiguous matrix (alpha*op(A)*op(B)) with a F-contiguous matrix (beta*C) * Remark: This function actually sums a C-contiguous matrix (alpha*op(A)*op(B)) with a F-contiguous matrix (beta*C)
* (see gemm implementation at next function for more details) */ * (see gemm implementation at next function for more details) */
void alt_numpy_matrix_extended_sum_in_place_%(float_type)s( void alt_numpy_matrix_extended_sum_inplace_%(float_type)s(
const %(float_type)s* ALPHA, PyArrayObject* A, const %(float_type)s* ALPHA, PyArrayObject* A,
const %(float_type)s* BETA, PyArrayObject* B const %(float_type)s* BETA, PyArrayObject* B
) { ) {
...@@ -46,15 +46,15 @@ void alt_numpy_matrix_extended_sum_in_place_%(float_type)s( ...@@ -46,15 +46,15 @@ void alt_numpy_matrix_extended_sum_in_place_%(float_type)s(
} while(get_next(iterators)); } while(get_next(iterators));
NpyIter_Deallocate(iterators); NpyIter_Deallocate(iterators);
} }
PyObject* alt_op_without_copy_%(float_type)s(int transposable, %(float_type)s* M, int nrow, int ncol, int LDM) { PyObject* alt_op_without_copy_%(float_type)s(int to_transpose, %(float_type)s* M, int nrow, int ncol, int LDM) {
// By default, M is considered as a nrow*ncol F-contiguous matrix with LDM as stride indicator for the columns. // By default, M is considered as a nrow*ncol F-contiguous matrix with LDM as stride indicator for the columns.
npy_intp dims[2]; npy_intp dims[2];
npy_intp strides[2]; npy_intp strides[2];
int flags; int flags;
if(transposable) { if(to_transpose) {
dims[0] = ncol; dims[0] = ncol;
dims[1] = nrow; dims[1] = nrow;
strides[0] = dims[1] * %(float_size)d; strides[0] = LDM * %(float_size)d;
strides[1] = %(float_size)d; strides[1] = %(float_size)d;
flags = NPY_ARRAY_C_CONTIGUOUS; flags = NPY_ARRAY_C_CONTIGUOUS;
} else { } else {
...@@ -73,12 +73,14 @@ void %(name)s( ...@@ -73,12 +73,14 @@ void %(name)s(
const %(float_type)s* ALPHA, %(float_type)s* A, const int* LDA, const %(float_type)s* ALPHA, %(float_type)s* A, const int* LDA,
%(float_type)s* B, const int* LDB, const %(float_type)s* BETA, %(float_type)s* B, const int* LDB, const %(float_type)s* BETA,
%(float_type)s* C, const int* LDC) { %(float_type)s* C, const int* LDC) {
if(*M < 0 || *N < 0 || *K < 0 || *LDA < 0 || *LDB < 0 || *LDC < 0)
alt_fatal_error("The integer arguments passed to %(name)s must all be at least 0.");
/* NB: it seems that matrix+matrix and scalar*matrix functions /* NB: it seems that matrix+matrix and scalar*matrix functions
* defined above do not allocate iterator for a matrix with 0 * defined above do not allocate iterator for a matrix with 0
* content, that is a matrix whose nrow*ncol == 0. As these * content, that is a matrix whose nrow*ncol == 0. As these
* functions actually work with M*N matrices (op(A)*op(B) and/or C), * functions actually work with M*N matrices (op(A)*op(B) and/or C),
* I think that we could just return if M or N is null. */ * I think that we could just return if M or N is null. */
if(*M < 1 || *N < 1 || *K < 0 || *LDA < 0 || *LDB < 0 || *LDC < 0) if(*M == 0 || *N == 0)
return; return;
int nrowa, ncola, nrowb, ncolb; int nrowa, ncola, nrowb, ncolb;
int is_A_transposable = alt_trans_to_bool(TRANSA); int is_A_transposable = alt_trans_to_bool(TRANSA);
...@@ -111,7 +113,7 @@ void %(name)s( ...@@ -111,7 +113,7 @@ void %(name)s(
PyObject* op_B_transposed = alt_op_without_copy_%(float_type)s(!is_B_transposable, B, nrowb, ncolb, *LDB); PyObject* op_B_transposed = alt_op_without_copy_%(float_type)s(!is_B_transposable, B, nrowb, ncolb, *LDB);
PyArray_MatrixProduct2(op_B_transposed, op_A_transposed, (PyArrayObject*)matrix_C); PyArray_MatrixProduct2(op_B_transposed, op_A_transposed, (PyArrayObject*)matrix_C);
if(*ALPHA != 1.0) if(*ALPHA != 1.0)
alt_numpy_scalar_matrix_product_in_place_%(float_type)s(*ALPHA, (PyArrayObject*)matrix_C); alt_numpy_scale_matrix_inplace_%(float_type)s(*ALPHA, (PyArrayObject*)matrix_C);
Py_XDECREF(op_B_transposed); Py_XDECREF(op_B_transposed);
Py_XDECREF(op_A_transposed); Py_XDECREF(op_A_transposed);
Py_XDECREF(matrix_C); Py_XDECREF(matrix_C);
...@@ -123,7 +125,7 @@ void %(name)s( ...@@ -123,7 +125,7 @@ void %(name)s(
PyObject* op_A = alt_op_without_copy_%(float_type)s(is_A_transposable, A, nrowa, ncola, *LDA); PyObject* op_A = alt_op_without_copy_%(float_type)s(is_A_transposable, A, nrowa, ncola, *LDA);
PyObject* op_B = alt_op_without_copy_%(float_type)s(is_B_transposable, B, nrowb, ncolb, *LDB); PyObject* op_B = alt_op_without_copy_%(float_type)s(is_B_transposable, B, nrowb, ncolb, *LDB);
PyArrayObject* op_A_times_op_B = (PyArrayObject*)PyArray_MatrixProduct(op_A, op_B); PyArrayObject* op_A_times_op_B = (PyArrayObject*)PyArray_MatrixProduct(op_A, op_B);
alt_numpy_matrix_extended_sum_in_place_%(float_type)s(ALPHA, op_A_times_op_B, BETA, (PyArrayObject*)matrix_C); alt_numpy_matrix_extended_sum_inplace_%(float_type)s(ALPHA, op_A_times_op_B, BETA, (PyArrayObject*)matrix_C);
/*C is already F-contiguous, thus no conversion needed for output.*/ /*C is already F-contiguous, thus no conversion needed for output.*/
Py_XDECREF(op_A_times_op_B); Py_XDECREF(op_A_times_op_B);
Py_XDECREF(op_B); Py_XDECREF(op_B);
......
...@@ -1037,7 +1037,6 @@ class Gemm(GemmRelated): ...@@ -1037,7 +1037,6 @@ class Gemm(GemmRelated):
if node.inputs[0].type.dtype.startswith('complex'): if node.inputs[0].type.dtype.startswith('complex'):
raise utils.MethodNotDefined('%s.c_code' raise utils.MethodNotDefined('%s.c_code'
% self.__class__.__name__) % self.__class__.__name__)
# if not config.blas.ldflags: # return super(Gemm, self).c_code(node, name, (_z, _a, _x, _y, _b), (_zout, ), sub)
full_code = self.build_gemm_call() % dict(locals(), **sub) full_code = self.build_gemm_call() % dict(locals(), **sub)
return full_code return full_code
...@@ -2151,8 +2150,6 @@ class BatchedDot(Op): ...@@ -2151,8 +2150,6 @@ class BatchedDot(Op):
_z, = out _z, = out
fail = sub["fail"] fail = sub["fail"]
# if not config.blas.ldflags: # return super(BatchedDot, self).c_code(node, name, inp, out, sub)
# generate contiguity condition # generate contiguity condition
def contiguous(var, ndim): def contiguous(var, ndim):
strides = "PyArray_STRIDES(%s)" % var strides = "PyArray_STRIDES(%s)" % var
......
...@@ -751,7 +751,6 @@ def blas_header_text(): ...@@ -751,7 +751,6 @@ def blas_header_text():
raise IOError("Unable to load NumPy implementation of gemm code from C source files.") raise IOError("Unable to load NumPy implementation of gemm code from C source files.")
else: else:
const = "" const = ""
# _logger.info("Numpy implementation of gemm code loaded (config.blas.ldflags is empty)")
gemm_code += common_code gemm_code += common_code
gemm_code += sgemm_code gemm_code += sgemm_code
gemm_code += dgemm_code gemm_code += dgemm_code
......
...@@ -63,7 +63,7 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -63,7 +63,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
self.filter_dilation = tuple(filter_dilation) self.filter_dilation = tuple(filter_dilation)
if not theano.config.blas.ldflags: if not theano.config.blas.ldflags:
# raise NotImplementedError("C code for corrMM* classes need a blas library.") # Theano will use a NumPy C implementation of [sd]gemm_ instead.
self.blas_type = '' self.blas_type = ''
else: else:
if 'openblas' in theano.config.blas.ldflags: if 'openblas' in theano.config.blas.ldflags:
......
...@@ -98,7 +98,7 @@ def local_abstractconv_gemm(node): ...@@ -98,7 +98,7 @@ def local_abstractconv_gemm(node):
def local_abstractconv3d_gemm(node): def local_abstractconv3d_gemm(node):
# If theano.config.blas.ldflags is empty, Theano will use # If theano.config.blas.ldflags is empty, Theano will use
# a NumPy C implementation of [sd]gemm_. # a NumPy C implementation of [sd]gemm_.
if theano.config.cxx == "": # or not theano.config.blas.ldflags: if theano.config.cxx == "":
return return
if not isinstance(node.op, AbstractConv3d): if not isinstance(node.op, AbstractConv3d):
return None return None
...@@ -120,7 +120,9 @@ def local_abstractconv3d_gemm(node): ...@@ -120,7 +120,9 @@ def local_abstractconv3d_gemm(node):
@local_optimizer([AbstractConv2d_gradWeights]) @local_optimizer([AbstractConv2d_gradWeights])
def local_abstractconv_gradweight_gemm(node): def local_abstractconv_gradweight_gemm(node):
if theano.config.cxx == "": # or not theano.config.blas.ldflags: # If theano.config.blas.ldflags is empty, Theano will use
# a NumPy C implementation of [sd]gemm_.
if theano.config.cxx == "":
return return
if not isinstance(node.op, AbstractConv2d_gradWeights): if not isinstance(node.op, AbstractConv2d_gradWeights):
return None return None
...@@ -145,7 +147,9 @@ def local_abstractconv_gradweight_gemm(node): ...@@ -145,7 +147,9 @@ def local_abstractconv_gradweight_gemm(node):
@local_optimizer([AbstractConv3d_gradWeights]) @local_optimizer([AbstractConv3d_gradWeights])
def local_abstractconv3d_gradweight_gemm(node): def local_abstractconv3d_gradweight_gemm(node):
if theano.config.cxx == "": # or not theano.config.blas.ldflags: # If theano.config.blas.ldflags is empty, Theano will use
# a NumPy C implementation of [sd]gemm_.
if theano.config.cxx == "":
return return
if not isinstance(node.op, AbstractConv3d_gradWeights): if not isinstance(node.op, AbstractConv3d_gradWeights):
return None return None
...@@ -170,7 +174,9 @@ def local_abstractconv3d_gradweight_gemm(node): ...@@ -170,7 +174,9 @@ def local_abstractconv3d_gradweight_gemm(node):
@local_optimizer([AbstractConv2d_gradInputs]) @local_optimizer([AbstractConv2d_gradInputs])
def local_abstractconv_gradinputs_gemm(node): def local_abstractconv_gradinputs_gemm(node):
if theano.config.cxx == "": # or not theano.config.blas.ldflags: # If theano.config.blas.ldflags is empty, Theano will use
# a NumPy C implementation of [sd]gemm_.
if theano.config.cxx == "":
return return
if not isinstance(node.op, AbstractConv2d_gradInputs): if not isinstance(node.op, AbstractConv2d_gradInputs):
return None return None
...@@ -193,7 +199,9 @@ def local_abstractconv_gradinputs_gemm(node): ...@@ -193,7 +199,9 @@ def local_abstractconv_gradinputs_gemm(node):
@local_optimizer([AbstractConv3d_gradInputs]) @local_optimizer([AbstractConv3d_gradInputs])
def local_abstractconv3d_gradinputs_gemm(node): def local_abstractconv3d_gradinputs_gemm(node):
if theano.config.cxx == "": # or not theano.config.blas.ldflags: # If theano.config.blas.ldflags is empty, Theano will use
# a NumPy C implementation of [sd]gemm_.
if theano.config.cxx == "":
return return
if not isinstance(node.op, AbstractConv3d_gradInputs): if not isinstance(node.op, AbstractConv3d_gradInputs):
return None return None
......
...@@ -363,8 +363,7 @@ class BaseTestConv(object): ...@@ -363,8 +363,7 @@ class BaseTestConv(object):
class BaseTestConv2d(BaseTestConv): class BaseTestConv2d(BaseTestConv):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
# if theano.config.blas.ldflags == '': # This tests can run even when theano.config.blas.ldflags is empty.
# raise SkipTest("BLAS required for reference")
cls.inputs_shapes = [(8, 1, 6, 6), (8, 1, 8, 8), (2, 1, 7, 7), cls.inputs_shapes = [(8, 1, 6, 6), (8, 1, 8, 8), (2, 1, 7, 7),
(6, 1, 10, 11), (2, 1, 6, 5), (1, 5, 9, 9)] (6, 1, 10, 11), (2, 1, 6, 5), (1, 5, 9, 9)]
cls.filters_shapes = [(5, 1, 2, 2), (4, 1, 3, 3), (2, 1, 3, 3), cls.filters_shapes = [(5, 1, 2, 2), (4, 1, 3, 3), (2, 1, 3, 3),
...@@ -414,12 +413,12 @@ class BaseTestConv2d(BaseTestConv): ...@@ -414,12 +413,12 @@ class BaseTestConv2d(BaseTestConv):
class TestCorrConv2d(BaseTestConv2d): class TestCorrConv2d(BaseTestConv2d):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
# if theano.config.blas.ldflags == "": raise SkipTest() # This tests can run even when theano.config.blas.ldflags is empty.
BaseTestConv2d.setup_class() BaseTestConv2d.setup_class()
def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1)): def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1)):
o = self.get_output_shape(i, f, s, b, fd) o = self.get_output_shape(i, f, s, b, fd)
# if (not theano.config.blas.ldflags or # This tests can run even when theano.config.blas.ldflags is empty.
if (not theano.config.cxx or if (not theano.config.cxx or
theano.config.mode == "FAST_COMPILE"): theano.config.mode == "FAST_COMPILE"):
raise SkipTest("Need blas to test conv2d") raise SkipTest("Need blas to test conv2d")
...@@ -443,8 +442,7 @@ class TestCorrConv2d(BaseTestConv2d): ...@@ -443,8 +442,7 @@ class TestCorrConv2d(BaseTestConv2d):
class TestAbstractConvNoOptim(BaseTestConv2d): class TestAbstractConvNoOptim(BaseTestConv2d):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
# if theano.config.blas.ldflags == "": # This tests can run even when theano.config.blas.ldflags is empty.
# raise SkipTest()
BaseTestConv2d.setup_class() BaseTestConv2d.setup_class()
cls.inputs_shapes = [(8, 1, 6, 6)] cls.inputs_shapes = [(8, 1, 6, 6)]
cls.filters_shapes = [(5, 1, 2, 2)] cls.filters_shapes = [(5, 1, 2, 2)]
...@@ -517,8 +515,7 @@ class TestCpuConv2d(BaseTestConv2d): ...@@ -517,8 +515,7 @@ class TestCpuConv2d(BaseTestConv2d):
gradinput_OK = False gradinput_OK = False
if fwd_OK: if fwd_OK:
# if not theano.config.blas.ldflags: # This test can run even when theano.config.blas.ldflags is empty.
# raise SkipTest("Need blas to test conv2d")
self.run_fwd(inputs_shape=i, filters_shape=f, self.run_fwd(inputs_shape=i, filters_shape=f,
subsample=s, verify_grad=(gradweight_OK and gradinput_OK), subsample=s, verify_grad=(gradweight_OK and gradinput_OK),
mode=mode, provide_shape=provide_shape, mode=mode, provide_shape=provide_shape,
...@@ -540,8 +537,7 @@ class TestCpuConv2d(BaseTestConv2d): ...@@ -540,8 +537,7 @@ class TestCpuConv2d(BaseTestConv2d):
filter_dilation=fd) filter_dilation=fd)
if gradweight_OK: if gradweight_OK:
# if not theano.config.blas.ldflags: # This test can run even when theano.config.blas.ldflags is empty.
# raise SkipTest("Need blas to test conv2d")
self.run_gradweight(inputs_shape=i, filters_shape=f, self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, output_shape=o, subsample=s,
verify_grad=False, mode=mode, verify_grad=False, mode=mode,
...@@ -566,8 +562,7 @@ class TestCpuConv2d(BaseTestConv2d): ...@@ -566,8 +562,7 @@ class TestCpuConv2d(BaseTestConv2d):
filter_dilation=fd) filter_dilation=fd)
if gradinput_OK: if gradinput_OK:
# if not theano.config.blas.ldflags: # This test can run even when theano.config.blas.ldflags is empty.
# raise SkipTest("Need blas to test conv2d")
self.run_gradinput(inputs_shape=i, filters_shape=f, self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, output_shape=o, subsample=s,
verify_grad=False, mode=mode, verify_grad=False, mode=mode,
...@@ -595,8 +590,7 @@ class TestCpuConv2d(BaseTestConv2d): ...@@ -595,8 +590,7 @@ class TestCpuConv2d(BaseTestConv2d):
class BaseTestConv3d(BaseTestConv): class BaseTestConv3d(BaseTestConv):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
# if theano.config.blas.ldflags == '': # This tests can run even when theano.config.blas.ldflags is empty.
# raise SkipTest("BLAS required for reference")
cls.inputs_shapes = [(2, 1, 5, 5, 5), (1, 2, 7, 5, 6)] cls.inputs_shapes = [(2, 1, 5, 5, 5), (1, 2, 7, 5, 6)]
cls.filters_shapes = [(2, 1, 2, 2, 2), (1, 2, 2, 1, 3)] cls.filters_shapes = [(2, 1, 2, 2, 2), (1, 2, 2, 1, 3)]
cls.subsamples = [(1, 1, 1), (2, 2, 2), (1, 2, 3)] cls.subsamples = [(1, 1, 1), (2, 2, 2), (1, 2, 3)]
...@@ -644,13 +638,12 @@ class BaseTestConv3d(BaseTestConv): ...@@ -644,13 +638,12 @@ class BaseTestConv3d(BaseTestConv):
class TestCorrConv3d(BaseTestConv3d): class TestCorrConv3d(BaseTestConv3d):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
# if theano.config.blas.ldflags == "": # This tests can run even when theano.config.blas.ldflags is empty.
# raise SkipTest()
BaseTestConv3d.setup_class() BaseTestConv3d.setup_class()
def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1, 1)): def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1, 1)):
o = self.get_output_shape(i, f, s, b, fd) o = self.get_output_shape(i, f, s, b, fd)
# if (not theano.config.blas.ldflags or # This test can run even when theano.config.blas.ldflags is empty.
if (not theano.config.cxx or if (not theano.config.cxx or
theano.config.mode == "FAST_COMPILE"): theano.config.mode == "FAST_COMPILE"):
raise SkipTest("Need blas to test conv3d") raise SkipTest("Need blas to test conv3d")
...@@ -698,8 +691,7 @@ class TestCpuConv3d(BaseTestConv3d): ...@@ -698,8 +691,7 @@ class TestCpuConv3d(BaseTestConv3d):
gradinput_OK = False gradinput_OK = False
if fwd_OK: if fwd_OK:
# if not theano.config.blas.ldflags: # This test can run even when theano.config.blas.ldflags is empty.
# raise SkipTest("Need blas to test conv3d")
self.run_fwd(inputs_shape=i, filters_shape=f, self.run_fwd(inputs_shape=i, filters_shape=f,
subsample=s, verify_grad=(gradweight_OK and gradinput_OK), subsample=s, verify_grad=(gradweight_OK and gradinput_OK),
mode=mode, provide_shape=provide_shape, mode=mode, provide_shape=provide_shape,
...@@ -721,8 +713,7 @@ class TestCpuConv3d(BaseTestConv3d): ...@@ -721,8 +713,7 @@ class TestCpuConv3d(BaseTestConv3d):
filter_dilation=fd) filter_dilation=fd)
if gradweight_OK: if gradweight_OK:
# if not theano.config.blas.ldflags: # This test can run even when theano.config.blas.ldflags is empty.
# raise SkipTest("Need blas to test conv3d")
self.run_gradweight(inputs_shape=i, filters_shape=f, self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, output_shape=o, subsample=s,
verify_grad=False, mode=mode, verify_grad=False, mode=mode,
...@@ -747,8 +738,7 @@ class TestCpuConv3d(BaseTestConv3d): ...@@ -747,8 +738,7 @@ class TestCpuConv3d(BaseTestConv3d):
filter_dilation=fd) filter_dilation=fd)
if gradinput_OK: if gradinput_OK:
# if not theano.config.blas.ldflags: # This test can run even when theano.config.blas.ldflags is empty.
# raise SkipTest("Need blas to test conv3d")
self.run_gradinput(inputs_shape=i, filters_shape=f, self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, output_shape=o, subsample=s,
verify_grad=False, mode=mode, verify_grad=False, mode=mode,
...@@ -912,13 +902,13 @@ class TestConvTypes(unittest.TestCase): ...@@ -912,13 +902,13 @@ class TestConvTypes(unittest.TestCase):
class TestBilinearUpsampling(unittest.TestCase): class TestBilinearUpsampling(unittest.TestCase):
# If BLAS is not available on CPU, then we accept the fallback to the # If theano.config.blas.ldflags is empty, Theano will use
# slow Python implementation for that test. # a NumPy C implementation of [sd]gemm_.
compile_mode = theano.compile.mode.get_default_mode() compile_mode = theano.compile.mode.get_default_mode()
if theano.config.mode == "FAST_COMPILE": if theano.config.mode == "FAST_COMPILE":
compile_mode = compile_mode.excluding("conv_gemm") compile_mode = compile_mode.excluding("conv_gemm")
compile_mode = compile_mode.excluding('AbstractConvCheck') compile_mode = compile_mode.excluding('AbstractConvCheck')
elif not theano.config.cxx: # not theano.config.blas.ldflags or elif not theano.config.cxx:
compile_mode = compile_mode.excluding('AbstractConvCheck') compile_mode = compile_mode.excluding('AbstractConvCheck')
def numerical_kernel_1D(self, ratio): def numerical_kernel_1D(self, ratio):
......
...@@ -27,8 +27,7 @@ class TestCorr2D(utt.InferShapeTester): ...@@ -27,8 +27,7 @@ class TestCorr2D(utt.InferShapeTester):
self.filters.name = 'default_filters' self.filters.name = 'default_filters'
if not conv.imported_scipy_signal and theano.config.cxx == "": if not conv.imported_scipy_signal and theano.config.cxx == "":
raise SkipTest("CorrMM tests need SciPy or a c++ compiler") raise SkipTest("CorrMM tests need SciPy or a c++ compiler")
# if not theano.config.blas.ldflags: # This tests can run even when theano.config.blas.ldflags is empty.
# raise SkipTest("CorrMM tests need a BLAS")
def validate(self, image_shape, filter_shape, def validate(self, image_shape, filter_shape,
border_mode='valid', subsample=(1, 1), border_mode='valid', subsample=(1, 1),
......
...@@ -27,8 +27,7 @@ class TestCorr3D(utt.InferShapeTester): ...@@ -27,8 +27,7 @@ class TestCorr3D(utt.InferShapeTester):
self.filters.name = 'default_filters' self.filters.name = 'default_filters'
if not conv.imported_scipy_signal and theano.config.cxx == "": if not conv.imported_scipy_signal and theano.config.cxx == "":
raise SkipTest("Corr3dMM tests need SciPy or a c++ compiler") raise SkipTest("Corr3dMM tests need SciPy or a c++ compiler")
# if not theano.config.blas.ldflags: # This tests can run even when theano.config.blas.ldflags is empty.
# raise SkipTest("Corr3dMM tests need a BLAS")
def validate(self, image_shape, filter_shape, def validate(self, image_shape, filter_shape,
border_mode='valid', subsample=(1, 1, 1), border_mode='valid', subsample=(1, 1, 1),
......
...@@ -95,11 +95,11 @@ class t_gemm(TestCase): ...@@ -95,11 +95,11 @@ class t_gemm(TestCase):
cmp_linker(copy(z), a, x, y, b, 'c|py') cmp_linker(copy(z), a, x, y, b, 'c|py')
cmp_linker(copy(z), a, x, y, b, 'py') cmp_linker(copy(z), a, x, y, b, 'py')
# if (config.blas.ldflags and not dtype.startswith("complex")
if (not dtype.startswith("complex") if (not dtype.startswith("complex")
and theano.config.cxx): and theano.config.cxx):
# If blas.ldflags is equal to '', the C code will not # If theano.config.blas.ldflags is empty, Theano will use
# be generated # a NumPy C implementation of [sd]gemm_.
cmp_linker(copy(z), a, x, y, b, 'c') cmp_linker(copy(z), a, x, y, b, 'c')
def test0a(self): def test0a(self):
......
...@@ -37,9 +37,7 @@ def skip_if_blas_ldflags_empty(*functions_detected): ...@@ -37,9 +37,7 @@ def skip_if_blas_ldflags_empty(*functions_detected):
class TestCGer(TestCase, TestOptimizationMixin): class TestCGer(TestCase, TestOptimizationMixin):
def setUp(self, dtype='float64'): def setUp(self, dtype='float64'):
# if theano.config.blas.ldflags == "": # This tests can run even when theano.config.blas.ldflags is empty.
# raise SkipTest("This test is useful only when Theano"
# " is directly linked to blas.")
self.dtype = dtype self.dtype = dtype
self.mode = theano.compile.get_default_mode().including('fast_run') self.mode = theano.compile.get_default_mode().including('fast_run')
self.A = tensor.tensor(dtype=dtype, broadcastable=(False, False)) self.A = tensor.tensor(dtype=dtype, broadcastable=(False, False))
...@@ -124,9 +122,7 @@ class TestCGemv(TestCase, TestOptimizationMixin): ...@@ -124,9 +122,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):
""" """
def setUp(self, dtype='float64'): def setUp(self, dtype='float64'):
# if theano.config.blas.ldflags == "": # This tests can run even when theano.config.blas.ldflags is empty.
# raise SkipTest("This test is useful only when Theano"
# " is directly linked to blas.")
self.dtype = dtype self.dtype = dtype
self.mode = theano.compile.get_default_mode().including('fast_run') self.mode = theano.compile.get_default_mode().including('fast_run')
# matrix # matrix
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论