提交 3227d08e authored 作者: notoraptor's avatar notoraptor

New update.

Use "PyArray_Transpose" in gemm implementation Add a test to test_blas.py:TestBlasStrides to check is gemm works well with non-contiguous matrices (A,B,C are all passed as non-contiguous). All tests passed ! Recall: these are the tests executed: theano-cache purge && THEANO_FLAGS=blas.ldflags= nosetests theano/tensor/nnet/tests/test_abstract_conv.py:TestAbstractConvNoOptim theano-cache purge && THEANO_FLAGS=blas.ldflags= nosetests theano/tensor/nnet/tests/test_abstract_conv.py:TestBilinearUpsampling theano-cache purge && THEANO_FLAGS=blas.ldflags= nosetests theano/tensor/nnet/tests/test_abstract_conv.py:TestCorrConv2d theano-cache purge && THEANO_FLAGS=blas.ldflags= nosetests theano/tensor/nnet/tests/test_abstract_conv.py:TestCorrConv3d theano-cache purge && THEANO_FLAGS=blas.ldflags= nosetests theano/tensor/nnet/tests/test_abstract_conv.py:TestCpuConv2d theano-cache purge && THEANO_FLAGS=blas.ldflags= nosetests theano/tensor/nnet/tests/test_abstract_conv.py:TestCpuConv3d theano-cache purge && THEANO_FLAGS=blas.ldflags= nosetests theano/tensor/tests/test_blas_c.py theano-cache purge && THEANO_FLAGS=blas.ldflags= nosetests theano/tensor/tests/test_blas.py theano-cache purge && THEANO_FLAGS=blas.ldflags= nosetests theano/tensor/tests/test_blas_scipy.py theano-cache purge && THEANO_FLAGS=optdb.max_use_ratio=10,blas.ldflags= nosetests theano/tensor/nnet/tests/test_corr3d.py theano-cache purge && THEANO_FLAGS=optdb.max_use_ratio=7,blas.ldflags= nosetests theano/tensor/nnet/tests/test_corr.py
上级 da835094
......@@ -48,11 +48,11 @@ void alt_numpy_matrix_extended_sum_inplace_%(float_type)s(
} while(get_next(iterators));
NpyIter_Deallocate(iterators);
}
/* NumPy Wrapping function. Wraps a data into a Numpy's PyArrayObject.
/* NumPy Wrapping function. Wraps a data into a NumPy's PyArrayObject.
* By default, data is considered as Fortran-style array (column by column).
* If to_transpose, data will be considered as C-style array (row by row)
* with dimensions inversed. */
PyObject* alt_op_without_copy_%(float_type)s(int to_transpose, %(float_type)s* M, int nrow, int ncol, int LDM) {
* with dimensions reversed. */
PyObject* alt_op_%(float_type)s(int to_transpose, %(float_type)s* M, int nrow, int ncol, int LDM) {
npy_intp dims[2];
npy_intp strides[2];
if(to_transpose) {
......@@ -140,8 +140,8 @@ void %(name)s(
* consider the buffer as a F-contiguous M*N matrix, so that
* it will get the transposed of op_B_transposed * op_A_transposed,
* that is op_A * op_B (M*N matrix) as expected. */
PyObject* opA_transposed = alt_op_without_copy_%(float_type)s(!to_transpose_A, A, nrowa, ncola, *LDA);
PyObject* opB_transposed = alt_op_without_copy_%(float_type)s(!to_transpose_B, B, nrowb, ncolb, *LDB);
PyObject* opA_transposed = alt_op_%(float_type)s(!to_transpose_A, A, nrowa, ncola, *LDA);
PyObject* opB_transposed = alt_op_%(float_type)s(!to_transpose_B, B, nrowb, ncolb, *LDB);
PyObject* opB_trans_dot_opA_trans = PyArray_New(&PyArray_Type, 2, computation_dims, %(npy_float)s, computation_strides, computation_pointer, 0, computation_flags, NULL);
PyArray_MatrixProduct2(opB_transposed, opA_transposed, (PyArrayObject*)opB_trans_dot_opA_trans);
if(*BETA == 0) {
......@@ -151,16 +151,16 @@ void %(name)s(
/* A buffer has been created to compute ALPHA*op(A)*op(B),
* so we must copy it to the real output, that is C. */
PyObject* matrix_C = alt_wrap_fortran_writeable_matrix_%(float_type)s(C, M, N, LDC);
PyObject* alpha_opA_dot_opB = alt_op_without_copy_%(float_type)s(0, (%(float_type)s*)PyArray_DATA((PyArrayObject*)opB_trans_dot_opA_trans), *M, *N, *M);
PyObject* alpha_opA_dot_opB = PyArray_Transpose((PyArrayObject*)opB_trans_dot_opA_trans, NULL);
if(0 != PyArray_CopyInto((PyArrayObject*)matrix_C, (PyArrayObject*)alpha_opA_dot_opB))
alt_fatal_error("Numpy %(name)s implementation: unable to copy ALPHA*op(A)*op(B) to C when BETA == 0.");
alt_fatal_error("NumPy %(name)s implementation: unable to copy ALPHA*op(A)*op(B) into C when BETA == 0.");
Py_XDECREF(alpha_opA_dot_opB);
Py_XDECREF(matrix_C);
}
} else {
/* C is read, so we must consider it as Fortran-style matrix. */
PyObject* matrix_C = alt_wrap_fortran_writeable_matrix_%(float_type)s(C, M, N, LDC);
PyObject* opA_dot_opB = alt_op_without_copy_%(float_type)s(0, (%(float_type)s*)PyArray_DATA((PyArrayObject*)opB_trans_dot_opA_trans), *M, *N, *M);
PyObject* opA_dot_opB = PyArray_Transpose((PyArrayObject*)opB_trans_dot_opA_trans, NULL);
alt_numpy_matrix_extended_sum_inplace_%(float_type)s(ALPHA, (PyArrayObject*)opA_dot_opB, BETA, (PyArrayObject*)matrix_C);
Py_XDECREF(opA_dot_opB);
Py_XDECREF(matrix_C);
......
......@@ -2161,6 +2161,23 @@ class TestBlasStrides(TestCase):
self.cmp_ger((1, 0), 1, 0)
self.cmp_ger((0, 0), 0, 0)
def test_gemm_non_contiguous(self):
"""test_gemm_non_contiguous: Test if GEMM works well with non-contiguous matrices."""
aval = numpy.ones((6, 2))
bval = numpy.ones((2, 7))
cval = numpy.arange(7) + numpy.arange(0, .6, .1)[:, numpy.newaxis]
a = theano.shared(aval[:3], borrow=True)
b = theano.shared(bval[:, :5], borrow=True)
c = theano.shared(cval[:3, :5], borrow=True)
s = theano.tensor.scalar()
upd_c = s * c + theano.tensor.dot(a, b)
f = theano.function([s], [], updates={c: upd_c})
f(0)
ref_output = numpy.ones((3, 5)) * 2
unittest_tools.assert_allclose(c.get_value(), ref_output)
class test_infer_shape(unittest_tools.InferShapeTester):
def test_dot22(self):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论