提交 44726397 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5778 from aam-at/magma_gpu

Add magma gpu svd and matrix inverse
......@@ -679,6 +679,29 @@ import theano and print the config variable, as in:
``'guess_once'``, ``'guess_on_shape_change'``, ``'time_once'``,
``'time_on_shape_change'``.
.. attribute:: config.magma.enabled
String value: ``'True'``, ``'False'``
Default: ``'False'``
If ``'True'``, use `magma <http://icl.cs.utk.edu/magma/>`_ for matrix
computations.
If ``'False'``, disable magma.
.. attribute:: config.magma.include_path
Default: ``''``
Location of the magma headers.
.. attribute:: config.magma.library_path
Default: ``''``
Location of the magma library.
.. attribute:: config.gcc.cxxflags
Default: ``""``
......
......@@ -361,6 +361,22 @@ AddConfigVar('dnn.enabled',
EnumStr("auto", "True", "False", "no_check"),
in_c_key=False)
AddConfigVar('magma.include_path',
"Location of the magma header",
StrParam(''),
in_c_key=False)
AddConfigVar('magma.library_path',
"Location of the magma library",
StrParam(''),
in_c_key=False)
AddConfigVar('magma.enabled',
" If True, use magma for matrix computation."
" If False, disable magma",
BoolParam(False),
in_c_key=False)
# This flag determines whether or not to raise error/warning message if
# there is a CPU Op in the computational graph.
AddConfigVar(
......
from __future__ import absolute_import, division, print_function
import pkg_resources
import theano
import os
import warnings
from theano import Op
from theano.gpuarray import basic_ops, GpuArrayType
import pkg_resources
import numpy as np
from numpy.linalg.linalg import LinAlgError
import theano
from theano import Op, config, tensor
from theano.gof import COp
from theano.gpuarray import GpuArrayType
from .basic_ops import as_gpuarray_variable, gpu_contiguous, infer_context_name
from .type import gpu_context_type
try:
import pygpu
from pygpu.basic import triu, tril
......@@ -94,13 +99,13 @@ class GpuCusolverSolve(Op):
'GpuCusolverSolve Op can not be constructed.')
if skcuda.__version__ <= '0.5.1':
warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8')
context_name = basic_ops.infer_context_name(inp1, inp2)
context_name = infer_context_name(inp1, inp2)
inp1 = basic_ops.as_gpuarray_variable(inp1, context_name)
inp2 = basic_ops.as_gpuarray_variable(inp2, context_name)
inp1 = as_gpuarray_variable(inp1, context_name)
inp2 = as_gpuarray_variable(inp2, context_name)
inp1 = basic_ops.gpu_contiguous(inp1)
inp2 = basic_ops.gpu_contiguous(inp2)
inp1 = gpu_contiguous(inp1)
inp2 = gpu_contiguous(inp2)
# this op can only operate on float32 matrices
assert inp1.ndim == 2
......@@ -260,11 +265,11 @@ class GpuCholesky(Op):
if not pygpu_available:
raise RuntimeError('Missing pygpu or triu/tril functions.'
'Install or update libgpuarray.')
context_name = basic_ops.infer_context_name(inp)
context_name = infer_context_name(inp)
inp = basic_ops.as_gpuarray_variable(inp, context_name)
inp = as_gpuarray_variable(inp, context_name)
inp = basic_ops.gpu_contiguous(inp)
inp = gpu_contiguous(inp)
# this op can only operate on float32 matrices
# because of current implementation of triu/tril.
......@@ -341,3 +346,160 @@ class GpuCholesky(Op):
def gpu_cholesky(A, lower=True):
return GpuCholesky(lower)(A)
class GpuMagmaSVD(COp):
"""Computes the svd of a matrix :math:`A` using magma library.
"""
__props__ = ('full_matrices', 'compute_uv')
params_type = gpu_context_type
def __init__(self, full_matrices=True, compute_uv=True):
self.full_matrices = full_matrices
self.compute_uv = compute_uv
COp.__init__(self, ['magma_svd.c'], 'APPLY_SPECIFIC(magma_svd)')
def c_headers(self):
return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/ext_cuda.h',
'gpuarray_helper.h', 'magma.h']
def c_header_dirs(self):
dirs = [os.path.dirname(__file__), pygpu.get_include()]
if config.magma.include_path:
dirs.append(config.magma.include_path)
return dirs
def c_libraries(self):
return ['magma']
def c_lib_dirs(self):
if config.magma.library_path:
return [config.magma.library_path]
return []
def make_node(self, A):
ctx_name = infer_context_name(A)
A = as_gpuarray_variable(A, ctx_name)
if A.ndim != 2:
raise LinAlgError("Matrix rank error")
if self.compute_uv:
return theano.Apply(self, [A],
[A.type(),
GpuArrayType(A.dtype, broadcastable=[False],
context_name=ctx_name)(),
A.type()])
else:
return theano.Apply(self, [A],
[GpuArrayType(A.dtype, broadcastable=[False],
context_name=ctx_name)()])
def get_params(self, node):
return node.inputs[0].type.context
def get_op_params(self):
params = []
if self.compute_uv:
params.append(('COMPUTE_UV', '1'))
if self.full_matrices:
params.append(('FULL_MATRICES', '1'))
return params
def infer_shape(self, node, shapes):
x_shape, = shapes
M, N = x_shape
K = tensor.minimum(M, N)
s_shape = (K, )
if self.compute_uv:
u_shape = (M, M) if self.full_matrices else (M, K)
vt_shape = (N, N) if self.full_matrices else (K, N)
return [u_shape, s_shape, vt_shape]
else:
return [s_shape]
def gpu_svd(a, full_matrices=1, compute_uv=1):
"""
This function performs the SVD on GPU.
Parameters
----------
full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N),
respectively.
Otherwise, the shapes are (M, K) and (K, N), respectively,
where K = min(M, N).
compute_uv : bool, optional
Whether or not to compute u and v in addition to s.
True by default.
Returns
-------
U, V, D : matrices
"""
return GpuMagmaSVD(full_matrices, compute_uv)(a)
class GpuMagmaMatrixInverse(COp):
"""Computes the inverse of a matrix :math:`A` using magma library.
"""
__props__ = ('inplace', )
params_type = gpu_context_type
def __init__(self, inplace=False):
COp.__init__(self, ['magma_inv.c'], 'APPLY_SPECIFIC(magma_inv)')
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}
def c_headers(self):
return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/ext_cuda.h',
'gpuarray_helper.h', 'magma.h']
def c_header_dirs(self):
dirs = [os.path.dirname(__file__), pygpu.get_include()]
if config.magma.include_path:
dirs.append(config.magma.include_path)
return dirs
def c_libraries(self):
return ['magma']
def c_lib_dirs(self):
if config.magma.library_path:
return [config.magma.library_path]
return []
def clone_inplace(self):
return self.__class__(inplace=True)
def make_node(self, x):
ctx_name = infer_context_name(x)
x = as_gpuarray_variable(x, ctx_name)
if x.ndim != 2:
raise LinAlgError("Matrix rank error")
return theano.Apply(self, [x], [x.type()])
def get_params(self, node):
return node.inputs[0].type.context
def get_op_params(self):
if self.inplace:
return [('INPLACE', '1')]
else:
return []
def infer_shape(self, node, shapes):
return shapes
def gpu_matrix_inverse(a):
"""
This function performs the matrix inverse on GPU.
Returns
-------
a_inv: matrix
"""
return GpuMagmaMatrixInverse()(a)
#section init_code
setup_ext_cuda();
#section support_code_struct
int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **A_inv,
PyGpuContextObject *c) {
const size_t *dims;
magma_int_t N, ldwork, info;
magma_int_t *piv = NULL;
gpudata *dwork = NULL;
int res = -1;
if (A->ga.typecode != GA_FLOAT) {
PyErr_SetString(PyExc_TypeError,
"GpuMagmaMatrixInverse: Unsupported data type");
return -1;
}
// This is early to match the exit() in the fail label.
cuda_enter(c->ctx);
magma_init();
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaMatrixInverse: requires data to be C-contiguous");
return 1;
}
if (PyGpuArray_NDIM(A) != 2) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaMatrixInverse: matrix rank error");
goto fail;
}
dims = PyGpuArray_DIMS(A);
if (dims[0] != dims[1]) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaMatrixInverse: matrix is not square");
goto fail;
}
#ifdef INPLACE
Py_XDECREF(*A_inv);
*A_inv = A;
Py_INCREF(*A_inv);
#else
*A_inv = theano_try_copy(*A_inv, A);
if (*A_inv == NULL) {
PyErr_SetString(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate memory for the output");
goto fail;
}
#endif
// magma matrix inverse
N = dims[0];
ldwork = N * magma_get_sgetri_nb(N);
dwork = gpudata_alloc(c->ctx, ldwork * sizeof(float), NULL, 0, NULL);
if (dwork == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate working memory");
goto fail;
}
if (magma_imalloc_cpu(&piv, N)) {
PyErr_SetString(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate memory for the pivot array");
goto fail;
}
magma_sgetrf_gpu(N, N, (float *)PyGpuArray_DEV_DATA(*A_inv), N, piv, &info);
if (info != 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: magma_sgetrf_gpu returned error %d: %s.", info,
magma_strerror(info));
goto fail;
}
magma_sgetri_gpu(N, (float *)PyGpuArray_DEV_DATA(*A_inv), N, piv,
*(float **)dwork, ldwork, &info);
if (info != 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: magma_sgetri_gpu returned error %d: %s.", info,
magma_strerror(info));
goto fail;
}
res = 0;
fail:
if (piv != NULL)
magma_free(piv);
if (dwork != NULL)
gpudata_release(dwork);
magma_finalize();
cuda_exit(c->ctx);
return res;
}
#section init_code
setup_ext_cuda();
#section support_code_struct
int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
#ifdef COMPUTE_UV
PyGpuArrayObject **U,
#endif
PyGpuArrayObject **S,
#ifdef COMPUTE_UV
PyGpuArrayObject **VT,
#endif
PyGpuContextObject *c) {
magma_int_t *iwork = NULL, iunused[1];
magma_int_t M, N, K, ldu, ldv, M_U, N_VT, info;
magma_vec_t jobz;
size_t s_dims[1], u_dims[2], vt_dims[2];
float *a_data = NULL, *s_data = NULL, *u_data = NULL, *vt_data = NULL,
*work = NULL;
float dummy[1];
int res = -1, lwork;
if (A->ga.typecode != GA_FLOAT) {
PyErr_SetString(PyExc_TypeError,
"GpuMagmaMatrixInverse: Unsupported data type");
return -1;
}
// This is early to match the exit() in the fail label.
cuda_enter(c->ctx);
magma_init();
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaMatrixInverse: requires data to be C-contiguous");
return 1;
}
if (PyGpuArray_NDIM(A) != 2) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaMatrixInverse: matrix rank error");
goto fail;
}
// magma matrix svd
// reverse dimensions because MAGMA expects column-major matrices:
M = PyGpuArray_DIM(A, 1);
N = PyGpuArray_DIM(A, 0);
K = std::min(M, N);
if (MAGMA_SUCCESS != magma_smalloc_pinned(&a_data, M * N)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate memory");
goto fail;
}
cudaMemcpy(a_data, PyGpuArray_DEV_DATA(A), M * N * sizeof(float),
cudaMemcpyDeviceToDevice);
if (MAGMA_SUCCESS != magma_smalloc_pinned(&s_data, K)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate memory");
goto fail;
}
#ifdef COMPUTE_UV
#ifdef FULL_MATRICES
jobz = MagmaAllVec;
#else
jobz = MagmaSomeVec;
#endif
M_U = (jobz == MagmaAllVec ? M : K);
N_VT = (jobz == MagmaAllVec ? N : K);
ldu = M;
ldv = N_VT;
if (MAGMA_SUCCESS != magma_smalloc_pinned(&u_data, M_U * M)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate memory");
goto fail;
}
if (MAGMA_SUCCESS != magma_smalloc_pinned(&vt_data, N * N_VT)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate memory");
goto fail;
}
#else
jobz = MagmaNoVec;
ldu = M;
ldv = N;
#endif
// query for workspace size
magma_sgesdd(jobz, M, N, NULL, M, NULL, NULL, ldu, NULL, ldv,
dummy, -1, iunused, &info);
lwork = (magma_int_t) MAGMA_S_REAL(dummy[0]);
if (MAGMA_SUCCESS != magma_smalloc_pinned(&work, lwork)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate working memory");
goto fail;
}
if (MAGMA_SUCCESS != magma_imalloc_cpu(&iwork, 8*K)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate working memory");
goto fail;
}
// compute svd
magma_sgesdd(jobz, M, N, a_data, M, s_data,
u_data, ldu, vt_data, ldv, work, lwork, iwork, &info);
if (info > 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaSVD: the updating process of SBDSDC did not converge (error: %d)",
info);
goto fail;
} else if (info < 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaSVD: magma_sgesdd_gpu argument %d has an illegal value", -info);
goto fail;
}
s_dims[0] = K;
if (theano_prep_output(S, 1, s_dims, A->ga.typecode, GA_C_ORDER, c) != 0){
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate memory");
goto fail;
}
cudaMemcpy(PyGpuArray_DEV_DATA(*S), s_data, K * sizeof(float),
cudaMemcpyDeviceToDevice);
#ifdef COMPUTE_UV
u_dims[0] = N; u_dims[1] = N_VT;
if (theano_prep_output(U, 2, u_dims, A->ga.typecode, GA_C_ORDER, c) != 0){
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate memory");
goto fail;
}
// magma expects column-major matrices. Exchange u_data -> VT and vt_data -> U
// to match numpy.linalg.svd output
cudaMemcpy(PyGpuArray_DEV_DATA(*U), vt_data, N * N_VT * sizeof(float),
cudaMemcpyDeviceToDevice);
vt_dims[0] = M_U; vt_dims[1] = M;
if (theano_prep_output(VT, 2, vt_dims, A->ga.typecode, GA_C_ORDER, c) != 0){
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate memory");
goto fail;
}
// magma expects column-major matrices. Exchange u_data -> VT and vt_data -> U
// to match numpy.linalg.svd output
cudaMemcpy(PyGpuArray_DEV_DATA(*VT), u_data, M_U * M * sizeof(float),
cudaMemcpyDeviceToDevice);
#endif
res = 0;
fail:
if (a_data != NULL)
magma_free_pinned(a_data);
if (s_data != NULL)
magma_free_pinned(s_data);
if (u_data != NULL)
magma_free_pinned(u_data);
if (vt_data != NULL)
magma_free_pinned(vt_data);
if (work != NULL)
magma_free_pinned(work);
if (iwork != NULL)
magma_free_cpu(iwork);
magma_finalize();
cuda_exit(c->ctx);
return res;
}
......@@ -32,6 +32,7 @@ from theano.tensor.nnet.abstract_conv import (BaseAbstractConv,
AbstractConv3d,
AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs)
import theano.tensor.nlinalg as nlinalg
import theano.tensor.signal.pool as pool
import theano.tensor.slinalg as slinalg
......@@ -71,7 +72,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
from .reduction import GpuMaxAndArgmax
from .linalg import (GpuCusolverSolve, MATRIX_STRUCTURES_SOLVE, GpuCholesky,
cusolver_available)
cusolver_available, GpuMagmaMatrixInverse, GpuMagmaSVD)
_logger = logging.getLogger("theano.gpuarray.opt")
......@@ -2004,6 +2005,34 @@ def local_inplace_cholesky(node):
if isinstance(node.op, GpuCholesky) and not node.op.inplace:
return [GpuCholesky(lower=node.op.lower, inplace=True)(*node.inputs)]
@register_opt('magma', 'fast_compile')
@op_lifter([nlinalg.MatrixInverse])
@register_opt2([theano.tensor.nlinalg.MatrixInverse], 'magma', 'fast_compile')
def local_gpu_matrix_inverse(op, context_name, inputs, outputs):
if not config.magma.enabled:
return
return GpuMagmaMatrixInverse()
@register_inplace()
@local_optimizer([GpuMagmaMatrixInverse])
def local_inplace_matrix_inverse_inplace(node):
if isinstance(node.op, GpuMagmaMatrixInverse):
if not node.op.inplace:
return [node.op.clone_inplace()(*node.inputs)]
@register_opt('magma', 'fast_compile')
@op_lifter([nlinalg.SVD])
@register_opt2([theano.tensor.nlinalg.SVD], 'magma', 'fast_compile')
def local_gpu_svd(op, context_name, inputs, outputs):
if not config.magma.enabled:
return
return GpuMagmaSVD(full_matrices=op.full_matrices,
compute_uv=op.compute_uv)
# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
optdb.register('gpua_scanOp_make_inplace',
......
from __future__ import absolute_import, division, print_function
import unittest
import numpy as np
import theano
from numpy.linalg.linalg import LinAlgError
import theano
from theano import config
from theano.gpuarray.linalg import (GpuCholesky, GpuMagmaMatrixInverse,
cusolver_available, gpu_matrix_inverse,
gpu_solve, gpu_svd)
from theano.tensor.nlinalg import matrix_inverse
from theano.tests import unittest_tools as utt
from .config import mode_with_gpu
from numpy.linalg.linalg import LinAlgError
from .. import gpuarray_shared_constructor
from .config import mode_with_gpu, mode_without_gpu
from .test_basic_ops import rand
# Skip tests if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
from theano.gpuarray.linalg import (cusolver_available, gpu_solve, GpuCholesky)
if not cusolver_available:
raise SkipTest('Optional package scikits.cuda.cusolver not available')
class TestCusolver(unittest.TestCase):
def setUp(self):
if not cusolver_available:
self.skipTest('Optional package scikits.cuda.cusolver not available')
class TestCusolver(unittest.TestCase):
def run_gpu_solve(self, A_val, x_val, A_struct=None):
b_val = np.dot(A_val, x_val)
b_val_trans = np.dot(A_val.T, x_val)
......@@ -117,6 +123,8 @@ class TestCusolver(unittest.TestCase):
class TestGpuCholesky(unittest.TestCase):
def setUp(self):
if not cusolver_available:
self.skipTest('Optional package scikits.cuda.cusolver not available')
utt.seed_rng()
def get_gpu_cholesky_func(self, lower=True, inplace=False):
......@@ -191,3 +199,97 @@ class TestGpuCholesky(unittest.TestCase):
A_val = -M_val.dot(M_val.T)
fn = self.get_gpu_cholesky_func(True, False)
self.assertRaises(LinAlgError, fn, A_val)
class TestMagma(unittest.TestCase):
def setUp(self):
if not config.magma.enabled:
self.skipTest('Magma is not enabled, skipping test')
def test_gpu_matrix_inverse(self):
A = theano.tensor.fmatrix("A")
fn = theano.function([A], gpu_matrix_inverse(A), mode=mode_with_gpu)
N = 1000
A_val = rand(N, N).astype('float32')
A_val_inv = fn(A_val)
utt.assert_allclose(np.dot(A_val_inv, A_val), np.eye(N), atol=1e-3)
def test_gpu_matrix_inverse_inplace(self):
N = 1000
A_val_gpu = gpuarray_shared_constructor(rand(N, N).astype('float32'))
A_val_copy = A_val_gpu.get_value()
fn = theano.function([], GpuMagmaMatrixInverse(inplace=True)(A_val_gpu),
mode=mode_with_gpu, accept_inplace=True)
fn()
utt.assert_allclose(np.dot(A_val_gpu.get_value(), A_val_copy), np.eye(N), atol=1e-3)
def test_gpu_matrix_inverse_inplace_opt(self):
A = theano.tensor.fmatrix("A")
fn = theano.function([A], matrix_inverse(A), mode=mode_with_gpu)
assert any([
node.op.inplace
for node in fn.maker.fgraph.toposort() if
isinstance(node.op, GpuMagmaMatrixInverse)
])
def run_gpu_svd(self, A_val, full_matrices=True, compute_uv=True):
A = theano.tensor.fmatrix("A")
f = theano.function(
[A], gpu_svd(A, full_matrices=full_matrices, compute_uv=compute_uv),
mode=mode_with_gpu)
return f(A_val)
def assert_column_orthonormal(self, Ot):
utt.assert_allclose(np.dot(Ot.T, Ot), np.eye(Ot.shape[1]))
def check_svd(self, A, U, S, VT, rtol=None, atol=None):
S_m = np.zeros_like(A)
np.fill_diagonal(S_m, S)
utt.assert_allclose(
np.dot(np.dot(U, S_m), VT), A, rtol=rtol, atol=atol)
def test_gpu_svd_wide(self):
A = rand(100, 50).astype('float32')
M, N = A.shape
U, S, VT = self.run_gpu_svd(A)
self.assert_column_orthonormal(U)
self.assert_column_orthonormal(VT.T)
self.check_svd(A, U, S, VT)
U, S, VT = self.run_gpu_svd(A, full_matrices=False)
self.assertEqual(U.shape[1], min(M, N))
self.assert_column_orthonormal(U)
self.assertEqual(VT.shape[0], min(M, N))
self.assert_column_orthonormal(VT.T)
def test_gpu_svd_tall(self):
A = rand(50, 100).astype('float32')
M, N = A.shape
U, S, VT = self.run_gpu_svd(A)
self.assert_column_orthonormal(U)
self.assert_column_orthonormal(VT.T)
self.check_svd(A, U, S, VT)
U, S, VT = self.run_gpu_svd(A, full_matrices=False)
self.assertEqual(U.shape[1], min(M, N))
self.assert_column_orthonormal(U)
self.assertEqual(VT.shape[0], min(M, N))
self.assert_column_orthonormal(VT.T)
def test_gpu_singular_values(self):
A = theano.tensor.fmatrix("A")
f_cpu = theano.function(
[A], theano.tensor.nlinalg.svd(A, compute_uv=False),
mode=mode_without_gpu)
f_gpu = theano.function(
[A], gpu_svd(A, compute_uv=False), mode=mode_with_gpu)
A_val = rand(50, 100).astype('float32')
utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
A_val = rand(100, 50).astype('float32')
utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
......@@ -616,18 +616,37 @@ class SVD(Op):
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2, "The input of svd function should be a matrix."
w = theano.tensor.matrix(dtype=x.dtype)
u = theano.tensor.vector(dtype=x.dtype)
v = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [w, u, v])
s = theano.tensor.vector(dtype=x.dtype)
if self.compute_uv:
u = theano.tensor.matrix(dtype=x.dtype)
vt = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [u, s, vt])
else:
return Apply(self, [x], [s])
def perform(self, node, inputs, outputs):
(x,) = inputs
(w, u, v) = outputs
assert x.ndim == 2, "The input of svd function should be a matrix."
w[0], u[0], v[0] = self._numop(x,
self.full_matrices,
self.compute_uv)
if self.compute_uv:
u, s, vt = outputs
u[0], s[0], vt[0] = self._numop(x,
self.full_matrices,
self.compute_uv)
else:
s, = outputs
s[0] = self._numop(x, self.full_matrices, self.compute_uv)
def infer_shape(self, node, shapes):
x_shape, = shapes
M, N = x_shape
K = tensor.minimum(M, N)
s_shape = (K, )
if self.compute_uv:
u_shape = (M, M) if self.full_matrices else (M, K)
vt_shape = (N, N) if self.full_matrices else (K, N)
return [u_shape, s_shape, vt_shape]
else:
return [s_shape]
def svd(a, full_matrices=1, compute_uv=1):
......
from __future__ import absolute_import, print_function, division
import unittest
import itertools
import numpy as np
import numpy.linalg
from numpy.testing import assert_array_almost_equal
......@@ -20,7 +21,7 @@ from theano.tensor.nlinalg import (
AllocDiag, alloc_diag, ExtractDiag, extract_diag, diag,
trace, Det, det, Eig, eig, Eigh, EighGrad, eigh,
matrix_dot, _zero_disconnected, qr, matrix_power,
norm, svd, TensorInv, tensorinv, tensorsolve)
norm, svd, SVD, TensorInv, tensorinv, tensorsolve)
from nose.plugins.attrib import attr
from nose.plugins.skip import SkipTest
......@@ -128,18 +129,46 @@ def test_qr_modes():
assert "name 'complete' is not defined" in str(e)
def test_svd():
rng = np.random.RandomState(utt.fetch_seed())
A = tensor.matrix("A", dtype=theano.config.floatX)
U, V, T = svd(A)
fn = function([A], [U, V, T])
a = rng.rand(4, 4).astype(theano.config.floatX)
n_u, n_v, n_t = np.linalg.svd(a)
t_u, t_v, t_t = fn(a)
class test_SVD(utt.InferShapeTester):
op_class = SVD
dtype = 'float32'
assert _allclose(n_u, t_u)
assert _allclose(n_v, t_v)
assert _allclose(n_t, t_t)
def setUp(self):
super(test_SVD, self).setUp()
self.rng = np.random.RandomState(utt.fetch_seed())
self.A = theano.tensor.matrix(dtype=self.dtype)
self.op = svd
def test_svd(self):
A = tensor.matrix("A", dtype=self.dtype)
U, S, VT = svd(A)
fn = function([A], [U, S, VT])
a = self.rng.rand(4, 4).astype(self.dtype)
n_u, n_s, n_vt = np.linalg.svd(a)
t_u, t_s, t_vt = fn(a)
assert _allclose(n_u, t_u)
assert _allclose(n_s, t_s)
assert _allclose(n_vt, t_vt)
fn = function([A], svd(A, compute_uv=False))
t_s = fn(a)
assert _allclose(n_s, t_s)
def test_svd_infer_shape(self):
self.validate_shape((4, 4), full_matrices=True, compute_uv=True)
self.validate_shape((4, 4), full_matrices=False, compute_uv=True)
self.validate_shape((2, 4), full_matrices=False, compute_uv=True)
self.validate_shape((4, 2), full_matrices=False, compute_uv=True)
self.validate_shape((4, 4), compute_uv=False)
def validate_shape(self, shape, compute_uv=True, full_matrices=True):
A = self.A
A_v = self.rng.rand(*shape).astype(self.dtype)
outputs = self.op(A, full_matrices=full_matrices, compute_uv=compute_uv)
if not compute_uv:
outputs = [outputs]
self._compile_and_check([A], outputs, [A_v], self.op_class, warn=False)
def test_tensorsolve():
......@@ -403,8 +432,7 @@ class test_Eig(utt.InferShapeTester):
super(test_Eig, self).setUp()
self.rng = np.random.RandomState(utt.fetch_seed())
self.A = theano.tensor.matrix(dtype=self.dtype)
self.X = np.asarray(self.rng.rand(5, 5),
dtype=self.dtype)
self.X = np.asarray(self.rng.rand(5, 5), dtype=self.dtype)
self.S = self.X.dot(self.X.T)
def test_infer_shape(self):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论