提交 66716e23 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #1885 from Tanjay94/svd_op

Svd op
...@@ -5,10 +5,12 @@ import sys ...@@ -5,10 +5,12 @@ import sys
import numpy import numpy
import theano import theano
from theano import gof, Type, Apply from theano import gof, Type, Apply
from theano import tensor, scalar, config from theano import tensor, scalar, config
from theano.compat.six import StringIO from theano.compat.six import StringIO
from theano.scalar import Scalar from theano.scalar import Scalar
scal = scalar # somewhere scalar gets reassigned to be a function scal = scalar # somewhere scalar gets reassigned to be a function
from theano.gof.python25 import all, any from theano.gof.python25 import all, any
...@@ -3534,7 +3536,7 @@ __global__ void kEye(float* a, int n, int m) { ...@@ -3534,7 +3536,7 @@ __global__ void kEye(float* a, int n, int m) {
cudaGetErrorString(sts), cudaGetErrorString(sts),
dims[0], dims[1]); dims[0], dims[1]);
%(fail)s; %(fail)s;
} }
""" % locals() """ % locals()
return s return s
......
...@@ -489,7 +489,8 @@ class MatrixPinv(Op): ...@@ -489,7 +489,8 @@ class MatrixPinv(Op):
:math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then :math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then
:math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`. :math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`.
Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity matrix. Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity
matrix.
This method is not faster then `matrix_inverse`. Its strength comes from This method is not faster then `matrix_inverse`. Its strength comes from
that it works for non-square matrices. that it works for non-square matrices.
If you have a square matrix though, `matrix_inverse` can be both more If you have a square matrix though, `matrix_inverse` can be both more
...@@ -519,14 +520,10 @@ class MatrixPinv(Op): ...@@ -519,14 +520,10 @@ class MatrixPinv(Op):
return Apply(self, [x], [x.type()]) return Apply(self, [x], [x.type()])
def perform(self, node, (x,), (z, )): def perform(self, node, (x,), (z, )):
try: if imported_scipy:
if imported_scipy: z[0] = scipy.linalg.pinv(x).astype(x.dtype)
z[0] = scipy.linalg.pinv(x).astype(x.dtype) else:
else: z[0] = numpy.linalg.pinv(x).astype(x.dtype)
z[0] = numpy.linalg.pinv(x).astype(x.dtype)
except numpy.linalg.LinAlgError:
logger.debug('Failed to invert %s' % str(node.inputs[0]))
raise
def __str__(self): def __str__(self):
return "MatrixPseudoInverse" return "MatrixPseudoInverse"
...@@ -857,6 +854,7 @@ def spectral_radius_bound(X, log2_exponent): ...@@ -857,6 +854,7 @@ def spectral_radius_bound(X, log2_exponent):
if log2_exponent <= 0: if log2_exponent <= 0:
raise ValueError('spectral_radius_bound requires a strictly positive ' raise ValueError('spectral_radius_bound requires a strictly positive '
'exponent', log2_exponent) 'exponent', log2_exponent)
XX = X XX = X
for i in xrange(log2_exponent): for i in xrange(log2_exponent):
XX = tensor.dot(XX, XX) XX = tensor.dot(XX, XX)
...@@ -865,41 +863,6 @@ def spectral_radius_bound(X, log2_exponent): ...@@ -865,41 +863,6 @@ def spectral_radius_bound(X, log2_exponent):
2 ** (-log2_exponent)) 2 ** (-log2_exponent))
class A_Xinv_b(Op):
"""Product of form a inv(X) b"""
def make_node(self, a, X, b):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the A_Xinv_b op")
a = as_tensor_variable(a)
X = as_tensor_variable(X)
b = as_tensor_variable(b)
assert a.ndim == 2
assert X.ndim == 2
assert b.ndim == 2
o = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [a, X, b], [o])
def perform(self, ndoe, inputs, outstor):
a, X, b = inputs
if 1:
L_factor = scipy.linalg.cho_factor(X)
xb = scipy.linalg.cho_solve(L_factor, b)
xa = scipy.linalg.cho_solve(L_factor, a.T)
z = numpy.dot(xa.T, xb)
else:
raise NotImplementedError(self.X_structure)
outstor[0][0] = z
def grad(self, inputs, g_outputs):
gz, = g_outputs
a, X, b = inputs
iX = matrix_inverse(X)
ga = matrix_dot(gz, b.T, iX.T)
gX = -matrix_dot(iX.T, a, gz, b.T, iX.T)
gb = matrix_dot(ix.T, a.T, gz)
return [ga, gX, gb]
class Eig(Op): class Eig(Op):
"""Compute the eigenvalues and right eigenvectors of a square array. """Compute the eigenvalues and right eigenvectors of a square array.
...@@ -928,12 +891,7 @@ class Eig(Op): ...@@ -928,12 +891,7 @@ class Eig(Op):
return Apply(self, [x], [w, v]) return Apply(self, [x], [w, v])
def perform(self, node, (x,), (w, v)): def perform(self, node, (x,), (w, v)):
try: w[0], v[0] = [z.astype(x.dtype) for z in self._numop(x)]
w[0], v[0] = [z.astype(x.dtype) for z in self._numop(x)]
except numpy.linalg.LinAlgError:
logger.debug('Failed to find %s of %s' % (self._numop.__name__,
node.inputs[0]))
raise
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
n = shapes[0][0] n = shapes[0][0]
...@@ -945,6 +903,201 @@ class Eig(Op): ...@@ -945,6 +903,201 @@ class Eig(Op):
eig = Eig() eig = Eig()
class SVD(Op):
# See doc in the docstring of the function just after this class.
_numop = staticmethod(numpy.linalg.svd)
def __init__(self, full_matrices=True, compute_uv=True):
"""
inputs :
--------
full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N),
respectively.
Otherwise, the shapes are (M, K) and (K, N), respectively,
where K = min(M, N).
compute_uv : bool, optional
Whether or not to compute u and v in addition to s.
True by default.
"""
self.full_matrices = full_matrices
self.compute_uv = compute_uv
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def props(self):
return self.full_matrices, self.compute_uv,
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2, "The input of svd function should be a matrix."
w = theano.tensor.matrix(dtype=x.dtype)
u = theano.tensor.matrix(dtype=x.dtype)
v = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [w, u, v])
def perform(self, node, (x,), (w, u, v)):
assert x.ndim == 2, "The input of svd function should be a matrix."
w[0], u[0], v[0] = self._numop(x,
self.full_matrices,
self.compute_uv)
def __str__(self):
return self._numop.__name__.capitalize()
def svd(a, full_matrices=1, compute_uv=1):
"""
This function performs the SVD on CPU.
Parameters :
------------
full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N),
respectively.
Otherwise, the shapes are (M, K) and (K, N), respectively,
where K = min(M, N).
compute_uv : bool, optional
Whether or not to compute u and v in addition to s.
True by default.
Returns :
-------
U, V and D matrices.
"""
return SVD(full_matrices, compute_uv)(a)
class QRFull(Op):
"""
Full QR Decomposition.
Computes the QR decomposition of a matrix.
Factor the matrix a as qr, where q is orthonormal
and r is upper-triangular.
"""
_numop = staticmethod(numpy.linalg.qr)
def __init__(self, mode):
self.mode = mode
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2, "The input of qr function should be a matrix."
q = theano.tensor.matrix(dtype=x.dtype)
r = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [q, r])
def props(self):
return self.mode
def perform(self, node, (x,), (q, r)):
assert x.ndim == 2, "The input of qr function should be a matrix."
q[0], r[0] = self._numop(x,
self.mode)
def __str__(self):
return self._numop.__class__.__name__
class QRIncomplete(Op):
"""
Incomplete QR Decomposition.
Computes the QR decomposition of a matrix.
Factor the matrix a as qr and return a single matrix.
"""
_numop = staticmethod(numpy.linalg.qr)
def __init__(self, mode):
self.mode = mode
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def props(self):
return self.mode
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2, "The input of qr function should be a matrix."
q = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [q])
def perform(self, node, (x,), (q,)):
assert x.ndim == 2, "The input of qr function should be a matrix."
q[0] = self._numop(x,
self.mode)
def __str__(self):
return self._numop.__class__.__name__
def qr(a, mode="full"):
"""
Computes the QR decomposition of a matrix.
Factor the matrix a as qr, where q
is orthonormal and r is upper-triangular.
Parameters :
------------
a : array_like, shape (M, N)
Matrix to be factored.
mode : {'reduced', 'complete', 'r', 'raw', 'full', 'economic'}, optional
If K = min(M, N), then
'reduced' : returns q, r with dimensions (M, K), (K, N) (default)
'complete' : returns q, r with dimensions (M, M), (M, N)
'r' : returns r only with dimensions (K, N)
'raw' : returns h, tau with dimensions (N, M), (K,)
'full' : alias of 'reduced', deprecated
'economic' : returns h from 'raw', deprecated. The options 'reduced',
'complete', and 'raw' are new in numpy 1.8, see the notes for more
information. The default is 'reduced' and to maintain backward
compatibility with earlier versions of numpy both it and the old
default 'full' can be omitted. Note that array h returned in 'raw'
mode is transposed for calling Fortran. The 'economic' mode is
deprecated. The modes 'full' and 'economic' may be passed using only
the first letter for backwards compatibility, but all others
must be spelled out.
Default mode is 'full' which is also default for numpy 1.6.1.
Note: Default mode was left to full as full and reduced are both doing
the same thing in the new numpy version but only full works on the old
previous numpy version.
Returns :
---------
q : matrix of float or complex, optional
A matrix with orthonormal columns. When mode = 'complete'
the result is an orthogonal/unitary matrix depending on whether
or not a is real/complex. The determinant may be either +/- 1 in that case.
r : matrix of float or complex, optional
The upper-triangular matrix.
"""
x = [[2, 1], [3, 4]]
if isinstance(numpy.linalg.qr(x,mode), tuple):
return QRFull(mode)(a)
else:
return QRIncomplete(mode)(a)
def _zero_disconnected(outputs, grads): def _zero_disconnected(outputs, grads):
l = [] l = []
for o, g in zip(outputs, grads): for o, g in zip(outputs, grads):
......
...@@ -26,6 +26,8 @@ from theano.sandbox.linalg.ops import (cholesky, ...@@ -26,6 +26,8 @@ from theano.sandbox.linalg.ops import (cholesky,
AllocDiag, AllocDiag,
alloc_diag, alloc_diag,
det, det,
svd,
qr,
#PSD_hint, #PSD_hint,
trace, trace,
matrix_dot, matrix_dot,
...@@ -39,6 +41,7 @@ from theano.sandbox.linalg.ops import (cholesky, ...@@ -39,6 +41,7 @@ from theano.sandbox.linalg.ops import (cholesky,
from theano.sandbox.linalg import eig, eigh, eigvalsh from theano.sandbox.linalg import eig, eigh, eigvalsh
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
from nose.plugins.attrib import attr from nose.plugins.attrib import attr
from nose.tools import assert_raises
def check_lower_triangular(pd, ch_f): def check_lower_triangular(pd, ch_f):
...@@ -177,6 +180,50 @@ def test_matrix_dot(): ...@@ -177,6 +180,50 @@ def test_matrix_dot():
assert _allclose(numpy_sol, theano_sol) assert _allclose(numpy_sol, theano_sol)
def test_qr_modes():
rng = numpy.random.RandomState(utt.fetch_seed())
A = tensor.matrix("A", dtype=theano.config.floatX)
a = rng.rand(4, 4).astype(theano.config.floatX)
f = function([A], qr(A))
t_qr = f(a)
n_qr = numpy.linalg.qr(a)
assert _allclose(n_qr, t_qr)
for mode in ["reduced", "r", "raw", "full", "economic"]:
f = function([A], qr(A, mode))
t_qr = f(a)
n_qr = numpy.linalg.qr(a, mode)
if isinstance(n_qr, (list, tuple)):
assert _allclose(n_qr[0], t_qr[0])
assert _allclose(n_qr[1], t_qr[1])
else:
assert _allclose(n_qr, t_qr)
try:
n_qr = numpy.linalg.qr(a, "complete")
f = function([A], qr(A, "complete"))
t_qr = f(a)
assert _allclose(n_qr, t_qr)
except TypeError, e:
assert "name 'complete' is not defined" in str(e)
def test_svd():
rng = numpy.random.RandomState(utt.fetch_seed())
A = tensor.matrix("A", dtype=theano.config.floatX)
U, V, T = svd(A)
fn = function([A], [U, V, T])
a = rng.rand(4, 4).astype(theano.config.floatX)
n_u, n_v, n_t = numpy.linalg.svd(a)
t_u, t_v, t_t = fn(a)
assert _allclose(n_u, t_u)
assert _allclose(n_v, t_v)
assert _allclose(n_t, t_t)
def test_inverse_singular(): def test_inverse_singular():
singular = numpy.array([[1, 0, 0]] + [[0, 1, 0]] * 2, singular = numpy.array([[1, 0, 0]] + [[0, 1, 0]] * 2,
dtype=theano.config.floatX) dtype=theano.config.floatX)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论