提交 92ad39a1 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #1908 from Tanjay94/linalg

Linalg
......@@ -25,3 +25,5 @@ They are grouped into the following sections:
utils
extra_ops
io
slinalg
nlinalg
.. ../../../../theano/sandbox/nlinalg.py
.. _libdoc_linalg:
===================================================================
:mod:`tensor.nlinalg` -- Linear Algebra Ops Using Numpy
===================================================================
.. module:: tensor.nlinalg
:platform: Unix, Windows
:synopsis: Linear Algebra Ops Using Numpy
.. moduleauthor:: LISA
API
===
.. automodule:: theano.tensor.nlinalg
:members:
.. ../../../../theano/sandbox/slinalg.py
.. _libdoc_linalg:
===================================================================
:mod:`tensor.slinalg` -- Linear Algebra Ops Using Scipy
===================================================================
.. module:: tensor.slinalg
:platform: Unix, Windows
:synopsis: Linear Algebra Ops Using Scipy
.. moduleauthor:: LISA
API
===
.. automodule:: theano.tensor.slinalg
:members:
......@@ -197,7 +197,6 @@ else:
# This cannot be done in tensor/__init__.py due to a circular dependency -- randomstreams
# depends on raw_random which depends on tensor. As a work-around, we import RandomStreams
# here and inject an instance in tensor.
from theano import tensor
from theano.tensor.randomstreams import RandomStreams
# Imitate the numpy.random symbol with a tensor.random one
tensor.random = RandomStreams(seed=0xBAD5EED, no_warn=True)
......
......@@ -42,7 +42,7 @@ from theano.sandbox.cuda.elemwise import erfinv_gpu
from theano.sandbox.cuda.var import CudaNdarrayConstant
from theano.scan_module import scan_utils, scan_op, scan_opt
from theano.tensor.blas import _is_real_vector, _is_real_matrix
linalg = None
from theano.tensor import nlinalg
#optdb.print_summary() # shows what is currently registered
......@@ -1643,31 +1643,26 @@ def tensor_to_cuda(x):
@register_opt()
@local_optimizer(None) # XXX: linalg is in sandbox, so don't import it globally
@local_optimizer([nlinalg.ExtractDiag])
def local_gpu_extract_diagonal(node):
"""
extract_diagonal(host_from_gpu()) -> host_from_gpu(extract_diagonal)
gpu_from_host(extract_diagonal) -> extract_diagonal(gpu_from_host)
"""
global linalg
if linalg is None:
from theano.sandbox import linalg
linalg = theano.sandbox.linalg
if (isinstance(node.op, linalg.ops.ExtractDiag) and
if (isinstance(node.op, nlinalg.ExtractDiag) and
isinstance(node.inputs[0].type,
theano.tensor.TensorType)):
inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, HostFromGpu):
return [host_from_gpu(linalg.extract_diag(gpu_from_host(inp)))]
return [host_from_gpu(nlinalg.extract_diag(gpu_from_host(inp)))]
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
if (host_input.owner and
isinstance(host_input.owner.op, linalg.ops.ExtractDiag) and
isinstance(host_input.owner.op, nlinalg.ExtractDiag) and
isinstance(host_input.owner.inputs[0].type,
theano.tensor.TensorType)):
diag_node = host_input.owner
return [linalg.extract_diag(
return [nlinalg.extract_diag(
gpu_from_host(diag_node.inputs[0]))]
return False
......
from theano import tensor
from theano.tensor.slinalg import kron
import warnings
def kron(a, b):
""" Kronecker product
Same as scipy.linalg.kron(a, b).
:note: numpy.kron(a, b) != scipy.linalg.kron(a, b)!
They don't have the same shape and order when
a.ndim != b.ndim != 2.
:param a: array_like
:param b: array_like
:return: array_like with a.ndim + b.ndim - 2 dimensions.
"""
a = tensor.as_tensor_variable(a)
b = tensor.as_tensor_variable(b)
if (a.ndim + b.ndim <= 2):
raise TypeError('kron: inputs dimensions must sum to 3 or more. '
'You passed %d and %d.' % (a.ndim, b.ndim))
o = tensor.outer(a, b)
o = o.reshape(tensor.concatenate((a.shape, b.shape)),
a.ndim + b.ndim)
shf = o.dimshuffle(0, 2, 1, * range(3, o.ndim))
if shf.ndim == 3:
shf = o.dimshuffle(1, 0, 2)
o = shf.flatten()
else:
o = shf.reshape((o.shape[0] * o.shape[2],
o.shape[1] * o.shape[3]) +
tuple([o.shape[i] for i in range(4, o.ndim)]))
return o
warnings.warn(
"theano modules are deprecated and will be removed in release 0.7",
stacklevel=3)
\ No newline at end of file
......@@ -15,6 +15,42 @@ from theano.gof import local_optimizer
from theano.gof.opt import Optimizer
from theano.gradient import DisconnectedType
from theano.tensor.nlinalg import ( MatrixInverse,
matrix_inverse,
MatrixPinv,
pinv,
AllocDiag,
alloc_diag,
ExtractDiag,
extract_diag,
diag,
trace,
Det,
det,
Eig,
eig,
Eigh,
EighGrad,
eigh,
matrix_dot,
_zero_disconnected,
qr,
svd,
lstsq,
matrix_power,
norm
)
from theano.tensor.slinalg import ( Cholesky,
cholesky,
CholeskyGrad,
Solve,
solve,
Eigvalsh,
EigvalshGrad,
eigvalsh
)
try:
import scipy.linalg
imported_scipy = True
......@@ -317,523 +353,6 @@ def local_log_pow(node):
return [exponent * tensor.log(base)]
def matrix_dot(*args):
""" Shorthand for product between several dots
Given :math:`N` matrices :math:`A_0, A_1, .., A_N`, ``matrix_dot`` will
generate the matrix product between all in the given order, namely
:math:`A_0 \cdot A_1 \cdot A_2 \cdot .. \cdot A_N`.
"""
rval = args[0]
for a in args[1:]:
rval = theano.tensor.dot(rval, a)
return rval
MATRIX_STRUCTURES = (
'general',
'symmetric',
'lower_triangular',
'upper_triangular',
'hermitian',
'banded',
'diagonal',
'toeplitz',
)
class Cholesky(Op):
"""
Return a triangular matrix square root of positive semi-definite `x`
L = cholesky(X, lower=True) implies dot(L, L.T) == X
"""
#TODO: inplace
#TODO: for specific dtypes
#TODO: LAPACK wrapper with in-place behavior, for solve also
def __init__(self, lower=True):
self.lower = lower
self.destructive = False
def props(self):
return (self.lower,
self.destructive)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def infer_shape(self, node, shapes):
return [shapes[0]]
def __str__(self):
if self.lower:
lu = 'lower'
else:
lu = 'upper'
if self.destructive:
destr = 'destructive'
else:
destr = 'non-destructive'
return 'Cholesky{%s,%s}' % (lu, destr)
def make_node(self, x):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the Cholesky op")
x = as_tensor_variable(x)
assert x.ndim == 2
return Apply(self, [x], [x.type()])
def perform(self, node, inputs, outputs):
x = inputs[0]
z = outputs[0]
z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
def grad(self, inputs, gradients):
return [CholeskyGrad(self.lower)(inputs[0], self(inputs[0]),
gradients[0])]
cholesky = Cholesky()
class CholeskyGrad(Op):
"""
"""
def __init__(self, lower=True):
self.lower = lower
self.destructive = False
def props(self):
return (self.lower,
self.destructive)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def __str__(self):
if self.lower:
lu = 'lower'
else:
lu = 'upper'
if self.destructive:
destr = 'destructive'
else:
destr = 'non-destructive'
return 'CholeskyGrad{%s,%s}' % (lu, destr)
def make_node(self, x, l, dz):
x = as_tensor_variable(x)
l = as_tensor_variable(l)
dz = as_tensor_variable(dz)
assert x.ndim == 2
assert l.ndim == 2
assert dz.ndim == 2
assert l.owner.op.lower == self.lower, (
"lower/upper mismatch between Cholesky op and CholeskyGrad op"
)
return Apply(self, [x, l, dz], [x.type()])
def perform(self, node, inputs, outputs):
"""Implements the "reverse-mode" gradient [1]_ for the
Cholesky factorization of a positive-definite matrix.
.. [1] S. P. Smith. "Differentiation of the Cholesky Algorithm".
Journal of Computational and Graphical Statistics,
Vol. 4, No. 2 (Jun.,1995), pp. 134-147
http://www.jstor.org/stable/1390762
"""
x = inputs[0]
L = inputs[1]
dz = inputs[2]
dx = outputs[0]
N = x.shape[0]
if self.lower:
F = numpy.tril(dz)
for k in xrange(N - 1, -1, -1):
for j in xrange(k + 1, N):
for i in xrange(j, N):
F[i, k] -= F[i, j] * L[j, k]
F[j, k] -= F[i, j] * L[i, k]
for j in xrange(k + 1, N):
F[j, k] /= L[k, k]
F[k, k] -= L[j, k] * F[j, k]
F[k, k] /= (2 * L[k, k])
else:
F = numpy.triu(dz)
M = N - 1
for k in xrange(N - 1, -1, -1):
for j in xrange(k + 1, N):
for i in xrange(j, N):
F[k, i] -= F[j, i] * L[k, j]
F[k, j] -= F[j, i] * L[k, i]
for j in xrange(k + 1, N):
F[k, j] /= L[k, k]
F[k, k] -= L[k, j] * F[k, j]
F[k, k] /= (2 * L[k, k])
dx[0] = F
def infer_shape(self, node, shapes):
return [shapes[0]]
class MatrixPinv(Op):
"""Computes the pseudo-inverse of a matrix :math:`A`.
The pseudo-inverse of a matrix A, denoted :math:`A^+`, is
defined as: "the matrix that 'solves' [the least-squares problem]
:math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then
:math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`.
Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity
matrix.
This method is not faster then `matrix_inverse`. Its strength comes from
that it works for non-square matrices.
If you have a square matrix though, `matrix_inverse` can be both more
exact and faster to compute. Also this op does not get optimized into a
solve op.
"""
def __init__(self):
pass
def props(self):
"""Function exposing different properties of each instance of the
op.
For the ``MatrixPinv`` op, there are no properties to be exposed.
"""
return ()
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
return Apply(self, [x], [x.type()])
def perform(self, node, (x,), (z, )):
if imported_scipy:
z[0] = scipy.linalg.pinv(x).astype(x.dtype)
else:
z[0] = numpy.linalg.pinv(x).astype(x.dtype)
def __str__(self):
return "MatrixPseudoInverse"
pinv = MatrixPinv()
class MatrixInverse(Op):
"""Computes the inverse of a matrix :math:`A`.
Given a square matrix :math:`A`, ``matrix_inverse`` returns a square
matrix :math:`A_{inv}` such that the dot product :math:`A \cdot A_{inv}`
and :math:`A_{inv} \cdot A` equals the identity matrix :math:`I`.
:note: When possible, the call to this op will be optimized to the call
of ``solve``.
"""
def __init__(self):
pass
def props(self):
"""Function exposing different properties of each instance of the
op.
For the ``MatrixInverse`` op, there are no properties to be exposed.
"""
return ()
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
return Apply(self, [x], [x.type()])
def perform(self, node, (x,), (z, )):
try:
z[0] = numpy.linalg.inv(x).astype(x.dtype)
except numpy.linalg.LinAlgError:
logger.debug('Failed to invert %s' % str(node.inputs[0]))
raise
def grad(self, inputs, g_outputs):
r"""The gradient function should return
.. math:: V\frac{\partial X^{-1}}{\partial X},
where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
``inputs``. Using the `matrix cookbook
<http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
once can deduce that the relation corresponds to
.. math:: (X^{-1} \cdot V^{T} \cdot X^{-1})^T.
"""
x, = inputs
xi = self(x)
gz, = g_outputs
#TT.dot(gz.T,xi)
return [-matrix_dot(xi, gz.T, xi).T]
def R_op(self, inputs, eval_points):
r"""The gradient function should return
.. math:: \frac{\partial X^{-1}}{\partial X}V,
where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
``inputs``. Using the `matrix cookbook
<http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
once can deduce that the relation corresponds to
.. math:: X^{-1} \cdot V \cdot X^{-1}.
"""
x, = inputs
xi = self(x)
ev, = eval_points
if ev is None:
return [None]
return [-matrix_dot(xi, ev, xi)]
def __str__(self):
return "MatrixInverse"
matrix_inverse = MatrixInverse()
class Solve(Op):
"""Solve a system of linear equations"""
def __init__(self,
A_structure='general',
lower=False,
overwrite_A=False,
overwrite_b=False):
if A_structure not in MATRIX_STRUCTURES:
raise ValueError('Invalid matrix structure argument', A_structure)
self.A_structure = A_structure
self.lower = lower
self.overwrite_A = overwrite_A
self.overwrite_b = overwrite_b
def props(self):
return (self.A_structure,
self.lower,
self.overwrite_A,
self.overwrite_b)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return type(self) == type(other) and self.props() == other.props()
def __repr__(self):
return 'Solve{%s}' % str(self.props())
def make_node(self, A, b):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the Solve op")
A = as_tensor_variable(A)
b = as_tensor_variable(b)
assert A.ndim == 2
assert b.ndim in [1, 2]
otype = tensor.tensor(
broadcastable=b.broadcastable,
dtype=(A * b).dtype)
return Apply(self, [A, b], [otype])
def perform(self, node, inputs, output_storage):
A, b = inputs
#TODO: use the A_structure to go faster
output_storage[0][0] = scipy.linalg.solve(A, b)
# computes shape of x where x = inv(A) * b
def infer_shape(self, node, shapes):
Ashape, Bshape = shapes
rows = Ashape[1]
if len(Bshape) == 1: # b is a Vector
return [(rows,)]
else:
cols = Bshape[1] # b is a Matrix
return [(rows, cols)]
solve = Solve() # general solve
#TODO : SolveTriangular
#TODO: Optimizations to replace multiplication by matrix inverse
# with solve() Op (still unwritten)
class ExtractDiag(Op):
""" Return the diagonal of a matrix.
:note: work on the GPU.
"""
def __init__(self, view=False):
self.view = view
if self.view:
self.view_map = {0: [0]}
def __eq__(self, other):
return type(self) == type(other) and self.view == other.view
def __hash__(self):
return hash(type(self)) ^ hash(self.view)
def make_node(self, _x):
if not isinstance(_x, theano.Variable):
x = as_tensor_variable(_x)
else:
x = _x
if x.type.ndim != 2:
raise TypeError('ExtractDiag only works on matrices', _x)
return Apply(self, [x], [x.type.__class__(broadcastable=(False,),
dtype=x.type.dtype)()])
def perform(self, node, ins, outs):
""" For some reason numpy.diag(x) is really slow, so we
implemented our own. """
x, = ins
z, = outs
# zero-dimensional matrices ...
if x.shape[0] == 0 or x.shape[1] == 0:
z[0] = node.outputs[0].type.value_zeros((0,))
return
if x.shape[0] < x.shape[1]:
rval = x[:, 0]
else:
rval = x[0]
rval.strides = (x.strides[0] + x.strides[1],)
if self.view:
z[0] = rval
else:
z[0] = rval.copy()
def __str__(self):
return 'ExtractDiag{view=%s}' % self.view
def grad(self, inputs, g_outputs):
x = tensor.zeros_like(inputs[0])
xdiag = alloc_diag(g_outputs[0])
return [tensor.set_subtensor(
x[:xdiag.shape[0], :xdiag.shape[1]],
xdiag)]
def infer_shape(self, node, shapes):
x_s, = shapes
shp = tensor.min(node.inputs[0].shape)
return [(shp,)]
extract_diag = ExtractDiag()
#TODO: optimization to insert ExtractDiag with view=True
class AllocDiag(Op):
"""
Allocates a square matrix with the given vector as its diagonal.
"""
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def make_node(self, _x):
x = as_tensor_variable(_x)
if x.type.ndim != 1:
raise TypeError('AllocDiag only works on vectors', _x)
return Apply(self, [x], [tensor.matrix(dtype=x.type.dtype)])
def grad(self, inputs, g_outputs):
return [extract_diag(g_outputs[0])]
def perform(self, node, (x,), (z,)):
if x.ndim != 1:
raise TypeError(x)
z[0] = numpy.diag(x)
def infer_shape(self, node, shapes):
x_s, = shapes
return [(x_s[0], x_s[0])]
alloc_diag = AllocDiag()
def diag(x):
"""
Numpy-compatibility method
If `x` is a matrix, return its diagonal.
If `x` is a vector return a matrix with it as its diagonal.
* This method does not support the `k` argument that numpy supports.
"""
xx = as_tensor_variable(x)
if xx.type.ndim == 1:
return alloc_diag(xx)
elif xx.type.ndim == 2:
return extract_diag(xx)
else:
raise TypeError('diag requires vector or matrix argument', x)
class Det(Op):
"""Matrix determinant
Input should be a square matrix
"""
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
o = theano.tensor.scalar(dtype=x.dtype)
return Apply(self, [x], [o])
def perform(self, node, (x,), (z, )):
try:
z[0] = numpy.asarray(numpy.linalg.det(x), dtype=x.dtype)
except Exception:
print 'Failed to compute determinant', x
raise
def grad(self, inputs, g_outputs):
gz, = g_outputs
x, = inputs
return [gz * self(x) * matrix_inverse(x).T]
def infer_shape(self, node, shapes):
return [()]
def __str__(self):
return "Det"
det = Det()
def trace(X):
"""
Returns the sum of diagonal elements of matrix X.
:note: work on GPU since 0.6rc4.
"""
return extract_diag(X).sum()
def spectral_radius_bound(X, log2_exponent):
"""
Returns upper bound on the largest eigenvalue of square symmetrix matrix X.
......@@ -861,579 +380,3 @@ def spectral_radius_bound(X, log2_exponent):
return tensor.pow(
trace(XX),
2 ** (-log2_exponent))
class Eig(Op):
"""Compute the eigenvalues and right eigenvectors of a square array.
"""
_numop = staticmethod(numpy.linalg.eig)
def props(self):
"""Function exposing different properties of each instance of the
op.
For the ``Eig`` op, there are no properties to be exposed.
"""
return ()
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
w = theano.tensor.vector(dtype=x.dtype)
v = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [w, v])
def perform(self, node, (x,), (w, v)):
w[0], v[0] = [z.astype(x.dtype) for z in self._numop(x)]
def infer_shape(self, node, shapes):
n = shapes[0][0]
return [(n,), (n, n)]
def __str__(self):
return self._numop.__name__.capitalize()
eig = Eig()
class SVD(Op):
# See doc in the docstring of the function just after this class.
_numop = staticmethod(numpy.linalg.svd)
def __init__(self, full_matrices=True, compute_uv=True):
"""
inputs :
--------
full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N),
respectively.
Otherwise, the shapes are (M, K) and (K, N), respectively,
where K = min(M, N).
compute_uv : bool, optional
Whether or not to compute u and v in addition to s.
True by default.
"""
self.full_matrices = full_matrices
self.compute_uv = compute_uv
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def props(self):
return self.full_matrices, self.compute_uv,
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2, "The input of svd function should be a matrix."
w = theano.tensor.matrix(dtype=x.dtype)
u = theano.tensor.matrix(dtype=x.dtype)
v = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [w, u, v])
def perform(self, node, (x,), (w, u, v)):
assert x.ndim == 2, "The input of svd function should be a matrix."
w[0], u[0], v[0] = self._numop(x,
self.full_matrices,
self.compute_uv)
def __str__(self):
return self._numop.__name__.capitalize()
def svd(a, full_matrices=1, compute_uv=1):
"""
This function performs the SVD on CPU.
Parameters :
------------
full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N),
respectively.
Otherwise, the shapes are (M, K) and (K, N), respectively,
where K = min(M, N).
compute_uv : bool, optional
Whether or not to compute u and v in addition to s.
True by default.
Returns :
-------
U, V and D matrices.
"""
return SVD(full_matrices, compute_uv)(a)
class QRFull(Op):
"""
Full QR Decomposition.
Computes the QR decomposition of a matrix.
Factor the matrix a as qr, where q is orthonormal
and r is upper-triangular.
"""
_numop = staticmethod(numpy.linalg.qr)
def __init__(self, mode):
self.mode = mode
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2, "The input of qr function should be a matrix."
q = theano.tensor.matrix(dtype=x.dtype)
r = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [q, r])
def props(self):
return self.mode
def perform(self, node, (x,), (q, r)):
assert x.ndim == 2, "The input of qr function should be a matrix."
q[0], r[0] = self._numop(x,
self.mode)
def __str__(self):
return self._numop.__class__.__name__
class QRIncomplete(Op):
"""
Incomplete QR Decomposition.
Computes the QR decomposition of a matrix.
Factor the matrix a as qr and return a single matrix.
"""
_numop = staticmethod(numpy.linalg.qr)
def __init__(self, mode):
self.mode = mode
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def props(self):
return self.mode
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2, "The input of qr function should be a matrix."
q = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [q])
def perform(self, node, (x,), (q,)):
assert x.ndim == 2, "The input of qr function should be a matrix."
q[0] = self._numop(x,
self.mode)
def __str__(self):
return self._numop.__class__.__name__
def qr(a, mode="full"):
"""
Computes the QR decomposition of a matrix.
Factor the matrix a as qr, where q
is orthonormal and r is upper-triangular.
Parameters :
------------
a : array_like, shape (M, N)
Matrix to be factored.
mode : {'reduced', 'complete', 'r', 'raw', 'full', 'economic'}, optional
If K = min(M, N), then
'reduced' : returns q, r with dimensions (M, K), (K, N) (default)
'complete' : returns q, r with dimensions (M, M), (M, N)
'r' : returns r only with dimensions (K, N)
'raw' : returns h, tau with dimensions (N, M), (K,)
'full' : alias of 'reduced', deprecated
'economic' : returns h from 'raw', deprecated. The options 'reduced',
'complete', and 'raw' are new in numpy 1.8, see the notes for more
information. The default is 'reduced' and to maintain backward
compatibility with earlier versions of numpy both it and the old
default 'full' can be omitted. Note that array h returned in 'raw'
mode is transposed for calling Fortran. The 'economic' mode is
deprecated. The modes 'full' and 'economic' may be passed using only
the first letter for backwards compatibility, but all others
must be spelled out.
Default mode is 'full' which is also default for numpy 1.6.1.
Note: Default mode was left to full as full and reduced are both doing
the same thing in the new numpy version but only full works on the old
previous numpy version.
Returns :
---------
q : matrix of float or complex, optional
A matrix with orthonormal columns. When mode = 'complete'
the result is an orthogonal/unitary matrix depending on whether
or not a is real/complex. The determinant may be either +/- 1 in that case.
r : matrix of float or complex, optional
The upper-triangular matrix.
"""
x = [[2, 1], [3, 4]]
if isinstance(numpy.linalg.qr(x,mode), tuple):
return QRFull(mode)(a)
else:
return QRIncomplete(mode)(a)
def _zero_disconnected(outputs, grads):
l = []
for o, g in zip(outputs, grads):
if isinstance(g.type, DisconnectedType):
l.append(o.zeros_like())
else:
l.append(g)
return l
class Eigh(Eig):
"""
Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
"""
_numop = staticmethod(numpy.linalg.eigh)
def __init__(self, UPLO='L'):
assert UPLO in ['L', 'U']
self.UPLO = UPLO
def __str__(self):
return 'Eigh{%s}' % self.UPLO
def props(self):
return self.UPLO,
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
# Numpy's linalg.eigh may return either double or single
# presision eigenvalues depending on installed version of
# LAPACK. Rather than trying to reproduce the (rather
# involved) logic, we just probe linalg.eigh with a trivial
# input.
w_dtype = self._numop([[numpy.dtype(x.dtype).type()]])[0].dtype.name
w = theano.tensor.vector(dtype=w_dtype)
v = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [w, v])
def perform(self, node, (x,), (w, v)):
try:
w[0], v[0] = self._numop(x, self.UPLO)
except numpy.linalg.LinAlgError:
logger.debug('Failed to find %s of %s' % (self._numop.__name__,
node.inputs[0]))
raise
def grad(self, inputs, g_outputs):
r"""The gradient function should return
.. math:: \sum_n\left(W_n\frac{\partial\,w_n}
{\partial a_{ij}} +
\sum_k V_{nk}\frac{\partial\,v_{nk}}
{\partial a_{ij}}\right),
where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
:math:`a` to ``inputs``, and :math:`(w, v)=\mbox{eig}(a)`.
Analytic formulae for eigensystem gradients are well-known in
perturbation theory:
.. math:: \frac{\partial\,w_n}
{\partial a_{ij}} = v_{in}\,v_{jn}
.. math:: \frac{\partial\,v_{kn}}
{\partial a_{ij}} =
\sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
"""
x, = inputs
w, v = self(x)
# Replace gradients wrt disconnected variables with
# zeros. This is a work-around for issue #1063.
gw, gv = _zero_disconnected([w, v], g_outputs)
return [EighGrad(self.UPLO)(x, w, v, gw, gv)]
def eigh(a, UPLO='L'):
return Eigh(UPLO)(a)
class EighGrad(Op):
"""Gradient of an eigensystem of a Hermitian matrix.
"""
def __init__(self, UPLO='L'):
assert UPLO in ['L', 'U']
self.UPLO = UPLO
if UPLO == 'L':
self.tri0 = numpy.tril
self.tri1 = lambda a: numpy.triu(a, 1)
else:
self.tri0 = numpy.triu
self.tri1 = lambda a: numpy.tril(a, -1)
def props(self):
return (self.UPLO,)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def __str__(self):
return 'EighGrad{%s}' % self.UPLO
def make_node(self, x, w, v, gw, gv):
x, w, v, gw, gv = map(as_tensor_variable, (x, w, v, gw, gv))
assert x.ndim == 2
assert w.ndim == 1
assert v.ndim == 2
assert gw.ndim == 1
assert gv.ndim == 2
out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype,
gw.dtype, gv.dtype)
out = theano.tensor.matrix(dtype=out_dtype)
return Apply(self, [x, w, v, gw, gv], [out])
def perform(self, node, inputs, outputs):
r"""
Implements the "reverse-mode" gradient for the eigensystem of
a square matrix.
"""
x, w, v, W, V = inputs
N = x.shape[0]
outer = numpy.outer
G = lambda n: sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m])
for m in xrange(N) if m != n)
g = sum(outer(v[:, n], v[:, n] * W[n] + G(n))
for n in xrange(N))
# Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
# (triu(a)) only. This means that partial derivative of
# eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
# for i < j (i > j). At the same time, non-zero components of
# the gradient must account for the fact that variation of the
# opposite triangle contributes to variation of two elements
# of Hermitian (symmetric) matrix. The following line
# implements the necessary logic.
out = self.tri0(g) + self.tri1(g).T
# The call to self.tri0 in perform upcast from float32 to
# float64 or from int* to int64 in numpy 1.6.1 but not in
# 1.6.2. We do not want version dependent dtype in Theano.
# We think it should be the same as the output.
outputs[0][0] = numpy.asarray(out, dtype=node.outputs[0].dtype)
def infer_shape(self, node, shapes):
return [shapes[0]]
class Eigvalsh(Op):
"""Generalized eigenvalues of a Hermetian positive definite eigensystem
"""
def __init__(self, lower=True):
assert lower in [True, False]
self.lower = lower
def props(self):
return (self.lower,)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, a, b):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the Eigvalsh op")
a = as_tensor_variable(a)
assert a.ndim == 2
if not isinstance(b, (theano.Variable)):
if b is None:
b = theano.tensor.NoneConst
out_dtype = a.dtype
else:
b = as_tensor_variable(b)
out_dtype = theano.scalar.upcast(a.dtype, b.dtype)
elif not isinstance(b.type, theano.tensor.NoneTypeT):
b = as_tensor_variable(b)
out_dtype = theano.scalar.upcast(a.dtype, b.dtype)
assert b.ndim == 2
else:
out_dtype = a.dtype
w = theano.tensor.vector(dtype=out_dtype)
return Apply(self, [a, b], [w])
def perform(self, node, (a, b), (w,)):
w[0] = scipy.linalg.eigvalsh(a=a, b=b, lower=self.lower)
def grad(self, inputs, g_outputs):
a, b = inputs
gw, = g_outputs
return EigvalshGrad(self.lower)(a, b, gw)
def infer_shape(self, node, shapes):
n = shapes[0][0]
return [(n,)]
class EigvalshGrad(Op):
"""Gradient of generalized eigenvalues of a Hermetian positive definite
eigensystem
"""
# Note: This Op (EigvalshGrad), should be removed and replaced with a graph
# of theano ops that is constructed directly in Eigvalsh.grad.
# But this can only be done once scipy.linalg.eigh is available as an Op
# (currently the Eigh uses numpy.linalg.eigh, which doesn't let you
# pass the right-hand-side matrix for a generalized eigenproblem.) See the
# discussion on github at
# https://github.com/Theano/Theano/pull/1846#discussion-diff-12486764
def __init__(self, lower=True):
assert lower in [True, False]
self.lower = lower
if lower:
self.tri0 = numpy.tril
self.tri1 = lambda a: numpy.triu(a, 1)
else:
self.tri0 = numpy.triu
self.tri1 = lambda a: numpy.tril(a, -1)
def props(self):
return (self.lower,)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, a, b, gw):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the GEigvalsh op")
a, b, gw = map(as_tensor_variable, (a, b, gw))
assert a.ndim == 2
assert b.ndim == 2
assert gw.ndim == 1
out_dtype = theano.scalar.upcast(a.dtype, b.dtype, gw.dtype)
out1 = theano.tensor.matrix(dtype=out_dtype)
out2 = theano.tensor.matrix(dtype=out_dtype)
return Apply(self, [a, b, gw], [out1, out2])
def perform(self, node, (a, b, gw), outputs):
w, v = scipy.linalg.eigh(a, b, lower=self.lower)
gA = v.dot(numpy.diag(gw).dot(v.T))
gB = - v.dot(numpy.diag(gw*w).dot(v.T))
# See EighGrad comments for an explanation of these lines
out1 = self.tri0(gA) + self.tri1(gA).T
out2 = self.tri0(gB) + self.tri1(gB).T
outputs[0][0] = numpy.asarray(out1, dtype=node.outputs[0].dtype)
outputs[1][0] = numpy.asarray(out2, dtype=node.outputs[1].dtype)
def infer_shape(self, node, shapes):
return [shapes[0], shapes[1]]
def eigvalsh(a, b, lower=True):
return Eigvalsh(lower)(a, b)
def matrix_power(M, n):
result = 1
for i in xrange(n):
result = theano.dot(result, M)
return result
def norm(x,ord):
x = as_tensor_variable(x)
ndim = x.ndim
if ndim == 0:
raise ValueError("'axis' entry is out of bounds.")
elif ndim == 1:
if ord == None:
return tensor.sum(x**2)**0.5
elif ord == 'inf':
return tensor.max(abs(x))
elif ord == '-inf':
return tensor.min(abs(x))
elif ord == 0:
return x[x.nonzero()].shape[0]
else:
try:
z = tensor.sum(abs(x**ord))**(1./ord)
except TypeError:
raise ValueError("Invalid norm order for vectors.")
return z
elif ndim == 2:
if ord == None or ord == 'fro':
return tensor.sum(abs(x**2))**(0.5)
elif ord == 'inf':
return tensor.max(tensor.sum(abs(x), 1))
elif ord == '-inf':
return tensor.min(tensor.sum(abs(x), 1))
elif ord == 1:
return tensor.max(tensor.sum(abs(x), 0))
elif ord == -1:
return tensor.min(tensor.sum(abs(x),0))
else:
raise ValueError()
elif ndim > 2:
raise NotImplementedError("We don't support norm witn ndim > 2")
class lstsq(theano.Op):
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def __str__(self):
return self.__class__.__name__
def make_node(self, x, y, rcond):
x = theano.tensor.as_tensor_variable(x)
y = theano.tensor.as_tensor_variable(y)
rcond = theano.tensor.as_tensor_variable(rcond)
return theano.Apply(self, [x, y, rcond], [y.type(), theano.tensor.dvector(), theano.tensor.lscalar(), theano.tensor.dvector()])
def perform(self, node, inputs, outputs):
x = inputs[0]
y = inputs[1]
rcond = inputs[2]
zz = numpy.linalg.lstsq(inputs[0], inputs[1], inputs[2])
outputs[0][0] = zz[0]
outputs[1][0] = zz[1]
outputs[2][0] = zz[2]
outputs[3][0] = zz[3]
......@@ -44,209 +44,6 @@ from nose.plugins.attrib import attr
from nose.tools import assert_raises
def check_lower_triangular(pd, ch_f):
ch = ch_f(pd)
assert ch[0, pd.shape[1] - 1] == 0
assert ch[pd.shape[0] - 1, 0] != 0
assert numpy.allclose(numpy.dot(ch, ch.T), pd)
assert not numpy.allclose(numpy.dot(ch.T, ch), pd)
def check_upper_triangular(pd, ch_f):
ch = ch_f(pd)
assert ch[4, 0] == 0
assert ch[0, 4] != 0
assert numpy.allclose(numpy.dot(ch.T, ch), pd)
assert not numpy.allclose(numpy.dot(ch, ch.T), pd)
def test_cholesky():
if not imported_scipy:
raise SkipTest("Scipy needed for the Cholesky op.")
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5).astype(config.floatX)
pd = numpy.dot(r, r.T)
x = tensor.matrix()
chol = cholesky(x)
# Check the default.
ch_f = function([x], chol)
yield check_lower_triangular, pd, ch_f
# Explicit lower-triangular.
chol = Cholesky(lower=True)(x)
ch_f = function([x], chol)
yield check_lower_triangular, pd, ch_f
# Explicit upper-triangular.
chol = Cholesky(lower=False)(x)
ch_f = function([x], chol)
yield check_upper_triangular, pd, ch_f
def test_cholesky_grad():
if not imported_scipy:
raise SkipTest("Scipy needed for the Cholesky op.")
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5).astype(config.floatX)
pd = numpy.dot(r, r.T)
eps = None
if config.floatX == "float64":
eps = 2e-8
# Check the default.
yield (lambda: utt.verify_grad(cholesky, [pd], 3, rng, eps=eps))
# Explicit lower-triangular.
yield (lambda: utt.verify_grad(Cholesky(lower=True), [pd], 3,
rng, eps=eps))
# Explicit upper-triangular.
yield (lambda: utt.verify_grad(Cholesky(lower=False), [pd], 3,
rng, eps=eps))
@attr('slow')
def test_cholesky_and_cholesky_grad_shape():
if not imported_scipy:
raise SkipTest("Scipy needed for the Cholesky op.")
rng = numpy.random.RandomState(utt.fetch_seed())
x = tensor.matrix()
for l in (cholesky(x), Cholesky(lower=True)(x), Cholesky(lower=False)(x)):
f_chol = theano.function([x], l.shape)
g = tensor.grad(l.sum(), x)
f_cholgrad = theano.function([x], g.shape)
topo_chol = f_chol.maker.fgraph.toposort()
topo_cholgrad = f_cholgrad.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == Cholesky
for node in topo_chol]) == 0
assert sum([node.op.__class__ == CholeskyGrad
for node in topo_cholgrad]) == 0
for shp in [2, 3, 5]:
m = numpy.cov(rng.randn(shp, shp + 10)).astype(config.floatX)
yield numpy.testing.assert_equal, f_chol(m), (shp, shp)
yield numpy.testing.assert_equal, f_cholgrad(m), (shp, shp)
def test_inverse_correctness():
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(4, 4).astype(theano.config.floatX)
x = tensor.matrix()
xi = matrix_inverse(x)
ri = function([x], xi)(r)
assert ri.shape == r.shape
assert ri.dtype == r.dtype
rir = numpy.dot(ri, r)
rri = numpy.dot(r, ri)
assert _allclose(numpy.identity(4), rir), rir
assert _allclose(numpy.identity(4), rri), rri
def test_pseudoinverse_correctness():
rng = numpy.random.RandomState(utt.fetch_seed())
d1 = rng.randint(4) + 2
d2 = rng.randint(4) + 2
r = rng.randn(d1, d2).astype(theano.config.floatX)
x = tensor.matrix()
xi = pinv(x)
ri = function([x], xi)(r)
assert ri.shape[0] == r.shape[1]
assert ri.shape[1] == r.shape[0]
assert ri.dtype == r.dtype
# Note that pseudoinverse can be quite unprecise so I prefer to compare
# the result with what numpy.linalg returns
assert _allclose(ri, numpy.linalg.pinv(r))
def test_matrix_dot():
rng = numpy.random.RandomState(utt.fetch_seed())
n = rng.randint(4) + 2
rs = []
xs = []
for k in xrange(n):
rs += [rng.randn(4, 4).astype(theano.config.floatX)]
xs += [tensor.matrix()]
sol = matrix_dot(*xs)
theano_sol = function(xs, sol)(*rs)
numpy_sol = rs[0]
for r in rs[1:]:
numpy_sol = numpy.dot(numpy_sol, r)
assert _allclose(numpy_sol, theano_sol)
def test_qr_modes():
rng = numpy.random.RandomState(utt.fetch_seed())
A = tensor.matrix("A", dtype=theano.config.floatX)
a = rng.rand(4, 4).astype(theano.config.floatX)
f = function([A], qr(A))
t_qr = f(a)
n_qr = numpy.linalg.qr(a)
assert _allclose(n_qr, t_qr)
for mode in ["reduced", "r", "raw", "full", "economic"]:
f = function([A], qr(A, mode))
t_qr = f(a)
n_qr = numpy.linalg.qr(a, mode)
if isinstance(n_qr, (list, tuple)):
assert _allclose(n_qr[0], t_qr[0])
assert _allclose(n_qr[1], t_qr[1])
else:
assert _allclose(n_qr, t_qr)
try:
n_qr = numpy.linalg.qr(a, "complete")
f = function([A], qr(A, "complete"))
t_qr = f(a)
assert _allclose(n_qr, t_qr)
except TypeError, e:
assert "name 'complete' is not defined" in str(e)
def test_svd():
rng = numpy.random.RandomState(utt.fetch_seed())
A = tensor.matrix("A", dtype=theano.config.floatX)
U, V, T = svd(A)
fn = function([A], [U, V, T])
a = rng.rand(4, 4).astype(theano.config.floatX)
n_u, n_v, n_t = numpy.linalg.svd(a)
t_u, t_v, t_t = fn(a)
assert _allclose(n_u, t_u)
assert _allclose(n_v, t_v)
assert _allclose(n_t, t_t)
def test_inverse_singular():
singular = numpy.array([[1, 0, 0]] + [[0, 1, 0]] * 2,
dtype=theano.config.floatX)
a = tensor.matrix()
f = function([a], matrix_inverse(a))
try:
f(singular)
except numpy.linalg.LinAlgError:
return
assert False
def test_inverse_grad():
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(4, 4)
tensor.verify_grad(matrix_inverse, [r], rng=numpy.random)
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(4, 4)
tensor.verify_grad(matrix_inverse, [r], rng=numpy.random)
def test_rop_lop():
mx = tensor.matrix('mx')
mv = tensor.matrix('mv')
......@@ -295,189 +92,6 @@ def test_rop_lop():
assert _allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))
def test_det():
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5).astype(config.floatX)
x = tensor.matrix()
f = theano.function([x], det(x))
assert numpy.allclose(numpy.linalg.det(r), f(r))
def test_det_grad():
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5).astype(config.floatX)
tensor.verify_grad(det, [r], rng=numpy.random)
def test_det_shape():
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5).astype(config.floatX)
x = tensor.matrix()
f = theano.function([x], det(x))
f_shape = theano.function([x], det(x).shape)
assert numpy.all(f(r).shape == f_shape(r))
class test_diag(unittest.TestCase):
"""
Test that linalg.diag has the same behavior as numpy.diag.
numpy.diag has two behaviors:
(1) when given a vector, it returns a matrix with that vector as the
diagonal.
(2) when given a matrix, returns a vector which is the diagonal of the
matrix.
(1) and (2) are tested by test_alloc_diag and test_extract_diag
respectively.
test_diag test makes sure that linalg.diag instantiates
the right op based on the dimension of the input.
"""
def __init__(self, name, mode=None, shared=tensor._shared,
floatX=None, type=tensor.TensorType):
self.mode = mode
self.shared = shared
if floatX is None:
floatX = config.floatX
self.floatX = floatX
self.type = type
super(test_diag, self).__init__(name)
def test_alloc_diag(self):
rng = numpy.random.RandomState(utt.fetch_seed())
x = theano.tensor.vector()
g = alloc_diag(x)
f = theano.function([x], g)
# test "normal" scenario (5x5 matrix) and special cases of 0x0 and 1x1
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(self.floatX)
v = numpy.diag(m)
r = f(m)
# The right matrix is created
assert (r == v).all()
# Test we accept only vectors
xx = theano.tensor.matrix()
ok = False
try:
alloc_diag(xx)
except TypeError:
ok = True
assert ok
# Test infer_shape
f = theano.function([x], g.shape)
topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == AllocDiag for node in topo]) == 0
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(self.floatX)
assert (f(m) == m.shape).all()
def test_alloc_diag_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
x = rng.rand(5)
tensor.verify_grad(alloc_diag, [x], rng=rng)
def test_diag(self):
# test that it builds a matrix with given diagonal when using
# vector inputs
x = theano.tensor.vector()
y = diag(x)
assert y.owner.op.__class__ == AllocDiag
# test that it extracts the diagonal when using matrix input
x = theano.tensor.matrix()
y = extract_diag(x)
assert y.owner.op.__class__ == ExtractDiag
# other types should raise error
x = theano.tensor.tensor3()
ok = False
try:
y = extract_diag(x)
except TypeError:
ok = True
assert ok
# not testing the view=True case since it is not used anywhere.
def test_extract_diag(self):
rng = numpy.random.RandomState(utt.fetch_seed())
m = rng.rand(2, 3).astype(self.floatX)
x = self.shared(m)
g = extract_diag(x)
f = theano.function([], g)
assert [isinstance(node.inputs[0].type, self.type)
for node in f.maker.fgraph.toposort()
if isinstance(node.op, ExtractDiag)] == [True]
for shp in [(2, 3), (3, 2), (3, 3), (1, 1), (0, 0)]:
m = rng.rand(*shp).astype(self.floatX)
x.set_value(m)
v = numpy.diag(m)
r = f()
# The right diagonal is extracted
assert (r == v).all()
# Test we accept only matrix
xx = theano.tensor.vector()
ok = False
try:
extract_diag(xx)
except TypeError:
ok = True
assert ok
# Test infer_shape
f = theano.function([], g.shape)
topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == ExtractDiag
for node in topo]) == 0
for shp in [(2, 3), (3, 2), (3, 3)]:
m = rng.rand(*shp).astype(self.floatX)
x.set_value(m)
assert f() == min(shp)
def test_extract_diag_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
x = rng.rand(5, 4).astype(self.floatX)
tensor.verify_grad(extract_diag, [x], rng=rng)
@attr('slow')
def test_extract_diag_empty(self):
c = self.shared(numpy.array([[], []], self.floatX))
f = theano.function([], extract_diag(c), mode=self.mode)
assert [isinstance(node.inputs[0].type, self.type)
for node in f.maker.fgraph.toposort()
if isinstance(node.op, ExtractDiag)] == [True]
def test_trace():
rng = numpy.random.RandomState(utt.fetch_seed())
x = theano.tensor.matrix()
g = trace(x)
f = theano.function([x], g)
for shp in [(2, 3), (3, 2), (3, 3)]:
m = rng.rand(*shp).astype(config.floatX)
v = numpy.trace(m)
assert v == f(m)
xx = theano.tensor.vector()
ok = False
try:
trace(xx)
except TypeError:
ok = True
assert ok
def test_spectral_radius_bound():
tol = 10 ** (-6)
rng = numpy.random.RandomState(utt.fetch_seed())
......@@ -523,228 +137,3 @@ def test_spectral_radius_bound():
except ValueError:
ok = True
assert ok
class test_Solve(utt.InferShapeTester):
def setUp(self):
super(test_Solve, self).setUp()
self.op_class = Solve
self.op = Solve()
def test_infer_shape(self):
if not imported_scipy:
raise SkipTest("Scipy needed for the Cholesky op.")
rng = numpy.random.RandomState(utt.fetch_seed())
A = theano.tensor.matrix()
b = theano.tensor.matrix()
self._compile_and_check([A, b], # theano.function inputs
[self.op(A, b)], # theano.function outputs
# A must be square
[numpy.asarray(rng.rand(5, 5),
dtype=config.floatX),
numpy.asarray(rng.rand(5, 1),
dtype=config.floatX)],
self.op_class,
warn=False)
rng = numpy.random.RandomState(utt.fetch_seed())
A = theano.tensor.matrix()
b = theano.tensor.vector()
self._compile_and_check([A, b], # theano.function inputs
[self.op(A, b)], # theano.function outputs
# A must be square
[numpy.asarray(rng.rand(5, 5),
dtype=config.floatX),
numpy.asarray(rng.rand(5),
dtype=config.floatX)],
self.op_class,
warn=False)
class test_Eig(utt.InferShapeTester):
op_class = Eig
op = eig
dtype = 'float64'
def setUp(self):
super(test_Eig, self).setUp()
self.rng = numpy.random.RandomState(utt.fetch_seed())
self.A = theano.tensor.matrix(dtype=self.dtype)
X = numpy.asarray(self.rng.rand(5, 5),
dtype=self.dtype)
self.S = X.dot(X.T)
def test_infer_shape(self):
A = self.A
S = self.S
self._compile_and_check([A], # theano.function inputs
self.op(A), # theano.function outputs
# S must be square
[S],
self.op_class,
warn=False)
def test_eval(self):
A = theano.tensor.matrix(dtype=self.dtype)
self.assertEquals([e.eval({A: [[1]]}) for e in self.op(A)],
[[1.0], [[1.0]]])
x = [[0, 1], [1, 0]]
w, v = [e.eval({A: x}) for e in self.op(A)]
assert_array_almost_equal(numpy.dot(x, v), w * v)
class test_Eigh(test_Eig):
op = staticmethod(eigh)
def test_uplo(self):
S = self.S
a = theano.tensor.matrix(dtype=self.dtype)
wu, vu = [out.eval({a: S}) for out in self.op(a, 'U')]
wl, vl = [out.eval({a: S}) for out in self.op(a, 'L')]
assert_array_almost_equal(wu, wl)
assert_array_almost_equal(vu * numpy.sign(vu[0, :]),
vl * numpy.sign(vl[0, :]))
def test_grad(self):
S = self.S
utt.verify_grad(lambda x: self.op(x)[0], [S], rng=self.rng)
utt.verify_grad(lambda x: self.op(x)[1], [S], rng=self.rng)
utt.verify_grad(lambda x: self.op(x, 'U')[0], [S], rng=self.rng)
utt.verify_grad(lambda x: self.op(x, 'U')[1], [S], rng=self.rng)
class test_Eigh_float32(test_Eigh):
dtype = 'float32'
def test_matrix_inverse_solve():
if not imported_scipy:
raise SkipTest("Scipy needed for the Solve op.")
A = theano.tensor.dmatrix('A')
b = theano.tensor.dmatrix('b')
node = matrix_inverse(A).dot(b).owner
[out] = inv_as_solve.transform(node)
assert isinstance(out.owner.op, Solve)
def test_eigvalsh():
if not imported_scipy:
raise SkipTest("Scipy needed for the geigvalsh op.")
import scipy.linalg
A = theano.tensor.dmatrix('a')
B = theano.tensor.dmatrix('b')
f = function([A, B], eigvalsh(A, B))
rng = numpy.random.RandomState(utt.fetch_seed())
a = rng.randn(5, 5)
a = a + a.T
for b in [10 * numpy.eye(5, 5) + rng.randn(5, 5)]:
w = f(a, b)
refw = scipy.linalg.eigvalsh(a, b)
numpy.testing.assert_array_almost_equal(w, refw)
# We need to test None separatly, as otherwise DebugMode will
# complain, as this isn't a valid ndarray.
b = None
B = theano.tensor.NoneConst
f = function([A], eigvalsh(A, B))
w = f(a)
refw = scipy.linalg.eigvalsh(a, b)
numpy.testing.assert_array_almost_equal(w, refw)
def test_eigvalsh_grad():
rng = numpy.random.RandomState(utt.fetch_seed())
a = rng.randn(5, 5)
a = a + a.T
b = 10 * numpy.eye(5, 5) + rng.randn(5, 5)
tensor.verify_grad(lambda a, b: eigvalsh(a, b).dot([1, 2, 3, 4, 5]),
[a, b], rng=numpy.random)
class Matrix_power():
def test_numpy_compare(self):
rng = numpy.random.RandomState(utt.fetch_seed())
A = tensor.matrix("A", dtype=theano.config.floatX)
Q = matrix_power(A, 3)
fn = function([A], [Q])
a = rng.rand(4, 4).astype(theano.config.floatX)
n_p = numpy.linalg.matrix_power(a, 3)
t_p = fn(a)
assert numpy.allclose(n_p, t_p)
def test_non_square_matrix(self):
rng = numpy.random.RandomState(utt.fetch_seed())
A = tensor.matrix("A", dtype=theano.config.floatX)
Q = matrix_power(A, 3)
f = function([A], [Q])
a = rng.rand(4, 3).astype(theano.config.floatX)
self.assertRaises(ValueError, f, a)
class T_NormTests(unittest.TestCase):
def test_wrong_type_of_ord_for_vector(self):
self.assertRaises(ValueError, norm, [2, 1], 'fro')
def test_wrong_type_of_ord_for_matrix(self):
self.assertRaises(ValueError, norm, [[2, 1], [3, 4]], 0)
def test_non_tensorial_input(self):
self.assertRaises(ValueError, norm, 3, None)
def test_tensor_input(self):
self.assertRaises(NotImplementedError, norm, numpy.random.rand(3, 4, 5), None)
def test_numpy_compare(self):
rng = numpy.random.RandomState(utt.fetch_seed())
M = tensor.matrix("A", dtype=theano.config.floatX)
V = tensor.vector("V", dtype=theano.config.floatX)
a = rng.rand(4, 4).astype(theano.config.floatX)
b = rng.rand(4).astype(theano.config.floatX)
A = ( [None, 'fro', 'inf', '-inf', 1, -1, None, 'inf', '-inf', 0, 1, -1, 2, -2],
[M, M, M, M, M, M, V, V, V, V, V, V, V, V],
[a, a, a, a, a, a, b, b, b, b, b, b, b, b],
[None, 'fro', inf, -inf, 1, -1, None, inf, -inf, 0, 1, -1, 2, -2])
for i in range(0, 14):
f = function([A[1][i]], norm(A[1][i], A[0][i]))
t_n = f(A[2][i])
n_n = numpy.linalg.norm(A[2][i], A[3][i])
assert _allclose(n_n, t_n)
class T_lstsq(unittest.TestCase):
def test_correct_solution(self):
x = tensor.lmatrix()
y = tensor.lmatrix()
z = tensor.lscalar()
b = theano.sandbox.linalg.ops.lstsq()(x, y, z)
f = function([x, y, z], b)
TestMatrix1 = numpy.asarray([[2, 1], [3, 4]])
TestMatrix2 = numpy.asarray([[17, 20], [43, 50]])
TestScalar = numpy.asarray(1)
f = function([x, y, z], b)
m = f(TestMatrix1, TestMatrix2, TestScalar)
self.assertTrue(numpy.allclose(TestMatrix2, numpy.dot(TestMatrix1, m[0])))
def test_wrong_coefficient_matrix(self):
x = tensor.vector()
y = tensor.vector()
z = tensor.scalar()
b = theano.sandbox.linalg.ops.lstsq()(x, y, z)
f = function([x, y, z], b)
self.assertRaises(numpy.linalg.linalg.LinAlgError, f, [2, 1], [2, 1], 1)
def test_wrong_rcond_dimension(self):
x = tensor.vector()
y = tensor.vector()
z = tensor.vector()
b = theano.sandbox.linalg.ops.lstsq()(x, y, z)
f = function([x, y, z], b)
self.assertRaises(numpy.linalg.LinAlgError, f, [2, 1], [2, 1], [2, 1])
......@@ -38,6 +38,7 @@ from theano.tensor.sharedvar import tensor_constructor as _shared
from theano.tensor.io import *
def shared(*args, **kw):
"""
Backward-compatibility wrapper around `tensor._shared`.
......
......@@ -4945,8 +4945,7 @@ class Diagonal(Op):
def diagonal(a, offset=0, axis1=0, axis2=1):
if (offset, axis1, axis2) == (0, 0, 1):
from theano.sandbox.linalg import extract_diag
return extract_diag(a)
return theano.tensor.nlinalg.extract_diag(a)
return Diagonal(offset, axis1, axis2)(a)
......
......@@ -4,11 +4,11 @@ import numpy
import theano
from theano.tensor import basic
from theano.tensor import nlinalg
from theano import gof, scalar
tensor = basic
from theano.gradient import DisconnectedType
tensor = basic
class CumsumOp(theano.Op):
# See function cumsum for docstring
......@@ -727,8 +727,7 @@ class FillDiagonal(gof.Op):
self.__class__.__name__)
wr_a = fill_diagonal(grad, 0) # valid for any number of dimensions
# diag is only valid for matrices
import theano.sandbox.linalg
wr_val = theano.sandbox.linalg.ops.diag(grad).sum()
wr_val = theano.tensor.nlinalg.diag(grad).sum()
return [wr_a, wr_val]
fill_diagonal_ = FillDiagonal()
......
import logging
import theano
logger = logging.getLogger(__name__)
import numpy
from theano.gof import Op, Apply
from theano.tensor import as_tensor_variable, dot, DimShuffle, Dot
from theano.tensor.blas import Dot22
from theano.tensor.opt import (register_stabilize,
register_specialize, register_canonicalize)
from theano.gof import local_optimizer
from theano.gof.opt import Optimizer
from theano.gradient import DisconnectedType
from theano.tensor import basic as tensor
class MatrixPinv(Op):
"""Computes the pseudo-inverse of a matrix :math:`A`.
The pseudo-inverse of a matrix A, denoted :math:`A^+`, is
defined as: "the matrix that 'solves' [the least-squares problem]
:math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then
:math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`.
Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity matrix.
This method is not faster then `matrix_inverse`. Its strength comes from
that it works for non-square matrices.
If you have a square matrix though, `matrix_inverse` can be both more
exact and faster to compute. Also this op does not get optimized into a
solve op.
"""
def __init__(self):
pass
def props(self):
"""Function exposing different properties of each instance of the
op.
For the ``MatrixPinv`` op, there are no properties to be exposed.
"""
return ()
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
return Apply(self, [x], [x.type()])
def perform(self, node, (x,), (z, )):
z[0] = numpy.linalg.pinv(x).astype(x.dtype)
def __str__(self):
return "MatrixPseudoInverse"
pinv = MatrixPinv()
class MatrixInverse(Op):
"""Computes the inverse of a matrix :math:`A`.
Given a square matrix :math:`A`, ``matrix_inverse`` returns a square
matrix :math:`A_{inv}` such that the dot product :math:`A \cdot A_{inv}`
and :math:`A_{inv} \cdot A` equals the identity matrix :math:`I`.
:note: When possible, the call to this op will be optimized to the call
of ``solve``.
"""
def __init__(self):
pass
def props(self):
"""Function exposing different properties of each instance of the
op.
For the ``MatrixInverse`` op, there are no properties to be exposed.
"""
return ()
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
return Apply(self, [x], [x.type()])
def perform(self, node, (x,), (z, )):
z[0] = numpy.linalg.inv(x).astype(x.dtype)
def grad(self, inputs, g_outputs):
r"""The gradient function should return
.. math:: V\frac{\partial X^{-1}}{\partial X},
where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
``inputs``. Using the `matrix cookbook
<http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
once can deduce that the relation corresponds to
.. math:: (X^{-1} \cdot V^{T} \cdot X^{-1})^T.
"""
x, = inputs
xi = self(x)
gz, = g_outputs
#TT.dot(gz.T,xi)
return [-matrix_dot(xi, gz.T, xi).T]
def R_op(self, inputs, eval_points):
r"""The gradient function should return
.. math:: \frac{\partial X^{-1}}{\partial X}V,
where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
``inputs``. Using the `matrix cookbook
<http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
once can deduce that the relation corresponds to
.. math:: X^{-1} \cdot V \cdot X^{-1}.
"""
x, = inputs
xi = self(x)
ev, = eval_points
if ev is None:
return [None]
return [-matrix_dot(xi, ev, xi)]
def __str__(self):
return "MatrixInverse"
matrix_inverse = MatrixInverse()
def matrix_dot(*args):
""" Shorthand for product between several dots
Given :math:`N` matrices :math:`A_0, A_1, .., A_N`, ``matrix_dot`` will
generate the matrix product between all in the given order, namely
:math:`A_0 \cdot A_1 \cdot A_2 \cdot .. \cdot A_N`.
"""
rval = args[0]
for a in args[1:]:
rval = theano.tensor.dot(rval, a)
return rval
class AllocDiag(Op):
"""
Allocates a square matrix with the given vector as its diagonal.
"""
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def make_node(self, _x):
x = as_tensor_variable(_x)
if x.type.ndim != 1:
raise TypeError('AllocDiag only works on vectors', _x)
return Apply(self, [x], [theano.tensor.matrix(dtype=x.type.dtype)])
def grad(self, inputs, g_outputs):
return [extract_diag(g_outputs[0])]
def perform(self, node, (x,), (z,)):
if x.ndim != 1:
raise TypeError(x)
z[0] = numpy.diag(x)
def infer_shape(self, node, shapes):
x_s, = shapes
return [(x_s[0], x_s[0])]
alloc_diag = AllocDiag()
class ExtractDiag(Op):
""" Return the diagonal of a matrix.
:note: work on the GPU.
"""
def __init__(self, view=False):
self.view = view
if self.view:
self.view_map = {0: [0]}
def __eq__(self, other):
return type(self) == type(other) and self.view == other.view
def __hash__(self):
return hash(type(self)) ^ hash(self.view)
def make_node(self, _x):
if not isinstance(_x, theano.Variable):
x = as_tensor_variable(_x)
else:
x = _x
if x.type.ndim != 2:
raise TypeError('ExtractDiag only works on matrices', _x)
return Apply(self, [x], [x.type.__class__(broadcastable=(False,),
dtype=x.type.dtype)()])
def perform(self, node, ins, outs):
""" For some reason numpy.diag(x) is really slow, so we
implemented our own. """
x, = ins
z, = outs
# zero-dimensional matrices ...
if x.shape[0] == 0 or x.shape[1] == 0:
z[0] = node.outputs[0].type.value_zeros((0,))
return
if x.shape[0] < x.shape[1]:
rval = x[:, 0]
else:
rval = x[0]
rval.strides = (x.strides[0] + x.strides[1],)
if self.view:
z[0] = rval
else:
z[0] = rval.copy()
def __str__(self):
return 'ExtractDiag{view=%s}' % self.view
def grad(self, inputs, g_outputs):
x = theano.tensor.zeros_like(inputs[0])
xdiag = alloc_diag(g_outputs[0])
return [theano.tensor.set_subtensor(
x[:xdiag.shape[0], :xdiag.shape[1]],
xdiag)]
def infer_shape(self, node, shapes):
x_s, = shapes
shp = theano.tensor.min(node.inputs[0].shape)
return [(shp,)]
extract_diag = ExtractDiag()
#TODO: optimization to insert ExtractDiag with view=True
def diag(x):
"""
Numpy-compatibility method
If `x` is a matrix, return its diagonal.
If `x` is a vector return a matrix with it as its diagonal.
* This method does not support the `k` argument that numpy supports.
"""
xx = as_tensor_variable(x)
if xx.type.ndim == 1:
return alloc_diag(xx)
elif xx.type.ndim == 2:
return extract_diag(xx)
else:
raise TypeError('diag requires vector or matrix argument', x)
def trace(X):
"""
Returns the sum of diagonal elements of matrix X.
:note: work on GPU since 0.6rc4.
"""
return extract_diag(X).sum()
class Det(Op):
"""Matrix determinant
Input should be a square matrix
"""
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
o = theano.tensor.scalar(dtype=x.dtype)
return Apply(self, [x], [o])
def perform(self, node, (x,), (z, )):
try:
z[0] = numpy.asarray(numpy.linalg.det(x), dtype=x.dtype)
except Exception:
print 'Failed to compute determinant', x
raise
def grad(self, inputs, g_outputs):
gz, = g_outputs
x, = inputs
return [gz * self(x) * matrix_inverse(x).T]
def infer_shape(self, node, shapes):
return [()]
def __str__(self):
return "Det"
det = Det()
class Eig(Op):
"""Compute the eigenvalues and right eigenvectors of a square array.
"""
_numop = staticmethod(numpy.linalg.eig)
def props(self):
"""Function exposing different properties of each instance of the
op.
For the ``Eig`` op, there are no properties to be exposed.
"""
return ()
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
w = theano.tensor.vector(dtype=x.dtype)
v = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [w, v])
def perform(self, node, (x,), (w, v)):
w[0], v[0] = [z.astype(x.dtype) for z in self._numop(x)]
def infer_shape(self, node, shapes):
n = shapes[0][0]
return [(n,), (n, n)]
def __str__(self):
return self._numop.__name__.capitalize()
eig = Eig()
class Eigh(Eig):
"""
Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
"""
_numop = staticmethod(numpy.linalg.eigh)
def __init__(self, UPLO='L'):
assert UPLO in ['L', 'U']
self.UPLO = UPLO
def __str__(self):
return 'Eigh{%s}' % self.UPLO
def props(self):
return self.UPLO,
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
# Numpy's linalg.eigh may return either double or single
# presision eigenvalues depending on installed version of
# LAPACK. Rather than trying to reproduce the (rather
# involved) logic, we just probe linalg.eigh with a trivial
# input.
w_dtype = self._numop([[numpy.dtype(x.dtype).type()]])[0].dtype.name
w = theano.tensor.vector(dtype=w_dtype)
v = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [w, v])
def perform(self, node, (x,), (w, v)):
w[0], v[0] = self._numop(x, self.UPLO)
def grad(self, inputs, g_outputs):
r"""The gradient function should return
.. math:: \sum_n\left(W_n\frac{\partial\,w_n}
{\partial a_{ij}} +
\sum_k V_{nk}\frac{\partial\,v_{nk}}
{\partial a_{ij}}\right),
where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
:math:`a` to ``inputs``, and :math:`(w, v)=\mbox{eig}(a)`.
Analytic formulae for eigensystem gradients are well-known in
perturbation theory:
.. math:: \frac{\partial\,w_n}
{\partial a_{ij}} = v_{in}\,v_{jn}
.. math:: \frac{\partial\,v_{kn}}
{\partial a_{ij}} =
\sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
"""
x, = inputs
w, v = self(x)
# Replace gradients wrt disconnected variables with
# zeros. This is a work-around for issue #1063.
gw, gv = _zero_disconnected([w, v], g_outputs)
return [EighGrad(self.UPLO)(x, w, v, gw, gv)]
def _zero_disconnected(outputs, grads):
l = []
for o, g in zip(outputs, grads):
if isinstance(g.type, DisconnectedType):
l.append(o.zeros_like())
else:
l.append(g)
return l
class EighGrad(Op):
"""Gradient of an eigensystem of a Hermitian matrix.
"""
def __init__(self, UPLO='L'):
assert UPLO in ['L', 'U']
self.UPLO = UPLO
if UPLO == 'L':
self.tri0 = numpy.tril
self.tri1 = lambda a: numpy.triu(a, 1)
else:
self.tri0 = numpy.triu
self.tri1 = lambda a: numpy.tril(a, -1)
def props(self):
return (self.UPLO,)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def __str__(self):
return 'EighGrad{%s}' % self.UPLO
def make_node(self, x, w, v, gw, gv):
x, w, v, gw, gv = map(as_tensor_variable, (x, w, v, gw, gv))
assert x.ndim == 2
assert w.ndim == 1
assert v.ndim == 2
assert gw.ndim == 1
assert gv.ndim == 2
out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype,
gw.dtype, gv.dtype)
out = theano.tensor.matrix(dtype=out_dtype)
return Apply(self, [x, w, v, gw, gv], [out])
def perform(self, node, inputs, outputs):
"""
Implements the "reverse-mode" gradient for the eigensystem of
a square matrix.
"""
x, w, v, W, V = inputs
N = x.shape[0]
outer = numpy.outer
G = lambda n: sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m])
for m in xrange(N) if m != n)
g = sum(outer(v[:, n], v[:, n] * W[n] + G(n))
for n in xrange(N))
# Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
# (triu(a)) only. This means that partial derivative of
# eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
# for i < j (i > j). At the same time, non-zero components of
# the gradient must account for the fact that variation of the
# opposite triangle contributes to variation of two elements
# of Hermitian (symmetric) matrix. The following line
# implements the necessary logic.
out = self.tri0(g) + self.tri1(g).T
# The call to self.tri0 in perform upcast from float32 to
# float64 or from int* to int64 in numpy 1.6.1 but not in
# 1.6.2. We do not want version dependent dtype in Theano.
# We think it should be the same as the output.
outputs[0][0] = numpy.asarray(out, dtype=node.outputs[0].dtype)
def infer_shape(self, node, shapes):
return [shapes[0]]
def eigh(a, UPLO='L'):
return Eigh(UPLO)(a)
class QRFull(Op):
"""
Full QR Decomposition.
Computes the QR decomposition of a matrix.
Factor the matrix a as qr, where q is orthonormal
and r is upper-triangular.
"""
_numop = staticmethod(numpy.linalg.qr)
def __init__(self, mode):
self.mode = mode
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2, "The input of qr function should be a matrix."
q = theano.tensor.matrix(dtype=x.dtype)
r = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [q, r])
def props(self):
return self.mode
def perform(self, node, (x,), (q, r)):
assert x.ndim == 2, "The input of qr function should be a matrix."
q[0], r[0] = self._numop(x,
self.mode)
def __str__(self):
return self._numop.__class__.__name__
class QRIncomplete(Op):
"""
Incomplete QR Decomposition.
Computes the QR decomposition of a matrix.
Factor the matrix a as qr and return a single matrix.
"""
_numop = staticmethod(numpy.linalg.qr)
def __init__(self, mode):
self.mode = mode
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def props(self):
return self.mode
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2, "The input of qr function should be a matrix."
q = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [q])
def perform(self, node, (x,), (q,)):
assert x.ndim == 2, "The input of qr function should be a matrix."
q[0] = self._numop(x,
self.mode)
def __str__(self):
return self._numop.__class__.__name__
def qr(a, mode="full"):
"""
Computes the QR decomposition of a matrix.
Factor the matrix a as qr, where q
is orthonormal and r is upper-triangular.
Parameters :
------------
a : array_like, shape (M, N)
Matrix to be factored.
mode : {'reduced', 'complete', 'r', 'raw', 'full', 'economic'}, optional
If K = min(M, N), then
'reduced' : returns q, r with dimensions (M, K), (K, N) (default)
'complete' : returns q, r with dimensions (M, M), (M, N)
'r' : returns r only with dimensions (K, N)
'raw' : returns h, tau with dimensions (N, M), (K,)
'full' : alias of 'reduced', deprecated
'economic' : returns h from 'raw', deprecated. The options 'reduced',
'complete', and 'raw' are new in numpy 1.8, see the notes for more
information. The default is 'reduced' and to maintain backward
compatibility with earlier versions of numpy both it and the old
default 'full' can be omitted. Note that array h returned in 'raw'
mode is transposed for calling Fortran. The 'economic' mode is
deprecated. The modes 'full' and 'economic' may be passed using only
the first letter for backwards compatibility, but all others
must be spelled out.
Default mode is 'full' which is also default for numpy 1.6.1.
Note: Default mode was left to full as full and reduced are both doing
the same thing in the new numpy version but only full works on the old
previous numpy version.
Returns :
---------
q : matrix of float or complex, optional
A matrix with orthonormal columns. When mode = 'complete'
the result is an orthogonal/unitary matrix depending on whether
or not a is real/complex. The determinant may be either +/- 1 in that case.
r : matrix of float or complex, optional
The upper-triangular matrix.
"""
x = [[2, 1], [3, 4]]
if isinstance(numpy.linalg.qr(x,mode), tuple):
return QRFull(mode)(a)
else:
return QRIncomplete(mode)(a)
class SVD(Op):
# See doc in the docstring of the function just after this class.
_numop = staticmethod(numpy.linalg.svd)
def __init__(self, full_matrices=True, compute_uv=True):
"""
inputs :
--------
full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N),
respectively.
Otherwise, the shapes are (M, K) and (K, N), respectively,
where K = min(M, N).
compute_uv : bool, optional
Whether or not to compute u and v in addition to s.
True by default.
"""
self.full_matrices = full_matrices
self.compute_uv = compute_uv
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def props(self):
return self.full_matrices, self.compute_uv,
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2, "The input of svd function should be a matrix."
w = theano.tensor.matrix(dtype=x.dtype)
u = theano.tensor.matrix(dtype=x.dtype)
v = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [w, u, v])
def perform(self, node, (x,), (w, u, v)):
assert x.ndim == 2, "The input of svd function should be a matrix."
w[0], u[0], v[0] = self._numop(x,
self.full_matrices,
self.compute_uv)
def __str__(self):
return self._numop.__name__.capitalize()
def svd(a, full_matrices=1, compute_uv=1):
"""
This function performs the SVD on CPU.
Parameters :
------------
full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N),
respectively.
Otherwise, the shapes are (M, K) and (K, N), respectively,
where K = min(M, N).
compute_uv : bool, optional
Whether or not to compute u and v in addition to s.
True by default.
Returns :
-------
U, V and D matrices.
"""
return SVD(full_matrices, compute_uv)(a)
def test_matrix_inverse_solve():
if not imported_scipy:
raise SkipTest("Scipy needed for the Solve op.")
A = theano.tensor.dmatrix('A')
b = theano.tensor.dmatrix('b')
node = matrix_inverse(A).dot(b).owner
[out] = inv_as_solve.transform(node)
assert isinstance(out.owner.op, Solve)
class lstsq(Op):
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def __str__(self):
return self.__class__.__name__
def make_node(self, x, y, rcond):
x = theano.tensor.as_tensor_variable(x)
y = theano.tensor.as_tensor_variable(y)
rcond = theano.tensor.as_tensor_variable(rcond)
return theano.Apply(self, [x, y, rcond], [y.type(), theano.tensor.dvector(), theano.tensor.lscalar(), theano.tensor.dvector()])
def perform(self, node, inputs, outputs):
x = inputs[0]
y = inputs[1]
rcond = inputs[2]
zz = numpy.linalg.lstsq(inputs[0], inputs[1], inputs[2])
outputs[0][0] = zz[0]
outputs[1][0] = zz[1]
outputs[2][0] = zz[2]
outputs[3][0] = zz[3]
def matrix_power(M, n):
result = 1
for i in xrange(n):
result = theano.dot(result, M)
return result
def norm(x,ord):
x = as_tensor_variable(x)
ndim = x.ndim
if ndim == 0:
raise ValueError("'axis' entry is out of bounds.")
elif ndim == 1:
if ord == None:
return tensor.sum(x**2)**0.5
elif ord == 'inf':
return tensor.max(abs(x))
elif ord == '-inf':
return tensor.min(abs(x))
elif ord == 0:
return x[x.nonzero()].shape[0]
else:
try:
z = tensor.sum(abs(x**ord))**(1./ord)
except TypeError:
raise ValueError("Invalid norm order for vectors.")
return z
elif ndim == 2:
if ord == None or ord == 'fro':
return tensor.sum(abs(x**2))**(0.5)
elif ord == 'inf':
return tensor.max(tensor.sum(abs(x), 1))
elif ord == '-inf':
return tensor.min(tensor.sum(abs(x), 1))
elif ord == 1:
return tensor.max(tensor.sum(abs(x), 0))
elif ord == -1:
return tensor.min(tensor.sum(abs(x),0))
else:
raise ValueError(0)
elif ndim > 2:
raise NotImplementedError("We don't support norm witn ndim > 2")
import logging
logger = logging.getLogger(__name__)
import numpy
from theano.gof import Op, Apply
from theano.tensor import as_tensor_variable, dot, DimShuffle, Dot
from theano.tensor.blas import Dot22
from theano import tensor
import theano.tensor
from theano.tensor.opt import (register_stabilize,
register_specialize, register_canonicalize)
from theano.gof import local_optimizer
from theano.gof.opt import Optimizer
from theano.gradient import DisconnectedType
try:
import scipy.linalg
imported_scipy = True
except ImportError:
# some ops (e.g. Cholesky, Solve, A_Xinv_b) won't work
imported_scipy = False
MATRIX_STRUCTURES = (
'general',
'symmetric',
'lower_triangular',
'upper_triangular',
'hermitian',
'banded',
'diagonal',
'toeplitz',
)
class Cholesky(Op):
"""
Return a triangular matrix square root of positive semi-definite `x`
L = cholesky(X, lower=True) implies dot(L, L.T) == X
"""
#TODO: inplace
#TODO: for specific dtypes
#TODO: LAPACK wrapper with in-place behavior, for solve also
def __init__(self, lower=True):
self.lower = lower
self.destructive = False
def props(self):
return (self.lower,
self.destructive)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def infer_shape(self, node, shapes):
return [shapes[0]]
def __str__(self):
if self.lower:
lu = 'lower'
else:
lu = 'upper'
if self.destructive:
destr = 'destructive'
else:
destr = 'non-destructive'
return 'Cholesky{%s,%s}' % (lu, destr)
def make_node(self, x):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the Cholesky op")
x = as_tensor_variable(x)
assert x.ndim == 2
return Apply(self, [x], [x.type()])
def perform(self, node, inputs, outputs):
x = inputs[0]
z = outputs[0]
z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
def grad(self, inputs, gradients):
return [CholeskyGrad(self.lower)(inputs[0], self(inputs[0]),
gradients[0])]
cholesky = Cholesky()
class CholeskyGrad(Op):
"""
"""
def __init__(self, lower=True):
self.lower = lower
self.destructive = False
def props(self):
return (self.lower,
self.destructive)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def __str__(self):
if self.lower:
lu = 'lower'
else:
lu = 'upper'
if self.destructive:
destr = 'destructive'
else:
destr = 'non-destructive'
return 'CholeskyGrad{%s,%s}' % (lu, destr)
def make_node(self, x, l, dz):
x = as_tensor_variable(x)
l = as_tensor_variable(l)
dz = as_tensor_variable(dz)
assert x.ndim == 2
assert l.ndim == 2
assert dz.ndim == 2
assert l.owner.op.lower == self.lower, (
"lower/upper mismatch between Cholesky op and CholeskyGrad op"
)
return Apply(self, [x, l, dz], [x.type()])
def perform(self, node, inputs, outputs):
"""Implements the "reverse-mode" gradient [1]_ for the
Cholesky factorization of a positive-definite matrix.
.. [1] S. P. Smith. "Differentiation of the Cholesky Algorithm".
Journal of Computational and Graphical Statistics,
Vol. 4, No. 2 (Jun.,1995), pp. 134-147
http://www.jstor.org/stable/1390762
"""
x = inputs[0]
L = inputs[1]
dz = inputs[2]
dx = outputs[0]
N = x.shape[0]
if self.lower:
F = numpy.tril(dz)
for k in xrange(N - 1, -1, -1):
for j in xrange(k + 1, N):
for i in xrange(j, N):
F[i, k] -= F[i, j] * L[j, k]
F[j, k] -= F[i, j] * L[i, k]
for j in xrange(k + 1, N):
F[j, k] /= L[k, k]
F[k, k] -= L[j, k] * F[j, k]
F[k, k] /= (2 * L[k, k])
else:
F = numpy.triu(dz)
M = N - 1
for k in xrange(N - 1, -1, -1):
for j in xrange(k + 1, N):
for i in xrange(j, N):
F[k, i] -= F[j, i] * L[k, j]
F[k, j] -= F[j, i] * L[k, i]
for j in xrange(k + 1, N):
F[k, j] /= L[k, k]
F[k, k] -= L[k, j] * F[k, j]
F[k, k] /= (2 * L[k, k])
dx[0] = F
def infer_shape(self, node, shapes):
return [shapes[0]]
class Solve(Op):
"""Solve a system of linear equations"""
def __init__(self,
A_structure='general',
lower=False,
overwrite_A=False,
overwrite_b=False):
if A_structure not in MATRIX_STRUCTURES:
raise ValueError('Invalid matrix structure argument', A_structure)
self.A_structure = A_structure
self.lower = lower
self.overwrite_A = overwrite_A
self.overwrite_b = overwrite_b
def props(self):
return (self.A_structure,
self.lower,
self.overwrite_A,
self.overwrite_b)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return type(self) == type(other) and self.props() == other.props()
def __repr__(self):
return 'Solve{%s}' % str(self.props())
def make_node(self, A, b):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the Solve op")
A = as_tensor_variable(A)
b = as_tensor_variable(b)
assert A.ndim == 2
assert b.ndim in [1, 2]
otype = tensor.tensor(
broadcastable=b.broadcastable,
dtype=(A * b).dtype)
return Apply(self, [A, b], [otype])
def perform(self, node, inputs, output_storage):
A, b = inputs
#TODO: use the A_structure to go faster
output_storage[0][0] = scipy.linalg.solve(A, b)
# computes shape of x where x = inv(A) * b
def infer_shape(self, node, shapes):
Ashape, Bshape = shapes
rows = Ashape[1]
if len(Bshape) == 1: # b is a Vector
return [(rows,)]
else:
cols = Bshape[1] # b is a Matrix
return [(rows, cols)]
solve = Solve() # general solve
#TODO : SolveTriangular
#TODO: Optimizations to replace multiplication by matrix inverse
# with solve() Op (still unwritten)
class Eigvalsh(Op):
"""Generalized eigenvalues of a Hermetian positive definite eigensystem
"""
def __init__(self, lower=True):
assert lower in [True, False]
self.lower = lower
def props(self):
return (self.lower,)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, a, b):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the Eigvalsh op")
if b == theano.tensor.NoneConst:
a = as_tensor_variable(a)
assert a.ndim == 2
out_dtype = theano.scalar.upcast(a.dtype)
w = theano.tensor.vector(dtype=out_dtype)
return Apply(self, [a], [w])
else:
a = as_tensor_variable(a)
b = as_tensor_variable(b)
assert a.ndim == 2
assert b.ndim == 2
out_dtype = theano.scalar.upcast(a.dtype, b.dtype)
w = theano.tensor.vector(dtype=out_dtype)
return Apply(self, [a, b], [w])
def perform(self, node, inputs, (w,)):
if len(inputs) == 2:
w[0] = scipy.linalg.eigvalsh(a=inputs[0], b=inputs[1], lower=self.lower)
else:
w[0] = scipy.linalg.eigvalsh(a=inputs[0], b=None, lower=self.lower)
def grad(self, inputs, g_outputs):
a, b = inputs
gw, = g_outputs
return EigvalshGrad(self.lower)(a, b, gw)
def infer_shape(self, node, shapes):
n = shapes[0][0]
return [(n,)]
class EigvalshGrad(Op):
"""Gradient of generalized eigenvalues of a Hermetian positive definite
eigensystem
"""
# Note: This Op (EigvalshGrad), should be removed and replaced with a graph
# of theano ops that is constructed directly in Eigvalsh.grad.
# But this can only be done once scipy.linalg.eigh is available as an Op
# (currently the Eigh uses numpy.linalg.eigh, which doesn't let you
# pass the right-hand-side matrix for a generalized eigenproblem.) See the
# discussion on github at
# https://github.com/Theano/Theano/pull/1846#discussion-diff-12486764
def __init__(self, lower=True):
assert lower in [True, False]
self.lower = lower
if lower:
self.tri0 = numpy.tril
self.tri1 = lambda a: numpy.triu(a, 1)
else:
self.tri0 = numpy.triu
self.tri1 = lambda a: numpy.tril(a, -1)
def props(self):
return (self.lower,)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, a, b, gw):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the GEigvalsh op")
a = as_tensor_variable(a)
b = as_tensor_variable(b)
gw = as_tensor_variable(gw)
assert a.ndim == 2
assert b.ndim == 2
assert gw.ndim == 1
out_dtype = theano.scalar.upcast(a.dtype, b.dtype, gw.dtype)
out1 = theano.tensor.matrix(dtype=out_dtype)
out2 = theano.tensor.matrix(dtype=out_dtype)
return Apply(self, [a, b, gw], [out1, out2])
def perform(self, node, (a, b, gw), outputs):
w, v = scipy.linalg.eigh(a, b, lower=self.lower)
gA = v.dot(numpy.diag(gw).dot(v.T))
gB = - v.dot(numpy.diag(gw*w).dot(v.T))
# See EighGrad comments for an explanation of these lines
out1 = self.tri0(gA) + self.tri1(gA).T
out2 = self.tri0(gB) + self.tri1(gB).T
outputs[0][0] = numpy.asarray(out1, dtype=node.outputs[0].dtype)
outputs[1][0] = numpy.asarray(out2, dtype=node.outputs[1].dtype)
def infer_shape(self, node, shapes):
return [shapes[0], shapes[1]]
def eigvalsh(a, b, lower=True):
return Eigvalsh(lower)(a, b)
def kron(a, b):
""" Kronecker product
Same as scipy.linalg.kron(a, b).
:note: numpy.kron(a, b) != scipy.linalg.kron(a, b)!
They don't have the same shape and order when
a.ndim != b.ndim != 2.
:param a: array_like
:param b: array_like
:return: array_like with a.ndim + b.ndim - 2 dimensions.
"""
a = tensor.as_tensor_variable(a)
b = tensor.as_tensor_variable(b)
if (a.ndim + b.ndim <= 2):
raise TypeError('kron: inputs dimensions must sum to 3 or more. '
'You passed %d and %d.' % (a.ndim, b.ndim))
o = tensor.outer(a, b)
o = o.reshape(tensor.concatenate((a.shape, b.shape)),
a.ndim + b.ndim)
shf = o.dimshuffle(0, 2, 1, * range(3, o.ndim))
if shf.ndim == 3:
shf = o.dimshuffle(1, 0, 2)
o = shf.flatten()
else:
o = shf.reshape((o.shape[0] * o.shape[2],
o.shape[1] * o.shape[3]) +
tuple([o.shape[i] for i in range(4, o.ndim)]))
return o
import unittest
import numpy
import numpy.linalg
from numpy.testing import assert_array_almost_equal
from numpy.testing import dec, assert_array_equal, assert_allclose
from numpy import inf
import theano
from theano import tensor, function
from theano.tensor.basic import _allclose
from theano.tests.test_rop import break_op
from theano.tests import unittest_tools as utt
from theano import config
from theano.tensor.nlinalg import ( MatrixInverse,
matrix_inverse,
MatrixPinv,
pinv,
AllocDiag,
alloc_diag,
ExtractDiag,
extract_diag,
diag,
trace,
Det,
det,
Eig,
eig,
Eigh,
EighGrad,
eigh,
matrix_dot,
_zero_disconnected,
qr,
matrix_power,
norm,
svd
)
from nose.plugins.skip import SkipTest
from nose.plugins.attrib import attr
from nose.tools import assert_raises
def test_pseudoinverse_correctness():
rng = numpy.random.RandomState(utt.fetch_seed())
d1 = rng.randint(4) + 2
d2 = rng.randint(4) + 2
r = rng.randn(d1, d2).astype(theano.config.floatX)
x = tensor.matrix()
xi = pinv(x)
ri = function([x], xi)(r)
assert ri.shape[0] == r.shape[1]
assert ri.shape[1] == r.shape[0]
assert ri.dtype == r.dtype
# Note that pseudoinverse can be quite unprecise so I prefer to compare
# the result with what numpy.linalg returns
assert _allclose(ri, numpy.linalg.pinv(r))
def test_inverse_correctness():
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(4, 4).astype(theano.config.floatX)
x = tensor.matrix()
xi = matrix_inverse(x)
ri = function([x], xi)(r)
assert ri.shape == r.shape
assert ri.dtype == r.dtype
rir = numpy.dot(ri, r)
rri = numpy.dot(r, ri)
assert _allclose(numpy.identity(4), rir), rir
assert _allclose(numpy.identity(4), rri), rri
def test_matrix_dot():
rng = numpy.random.RandomState(utt.fetch_seed())
n = rng.randint(4) + 2
rs = []
xs = []
for k in xrange(n):
rs += [rng.randn(4, 4).astype(theano.config.floatX)]
xs += [tensor.matrix()]
sol = matrix_dot(*xs)
theano_sol = function(xs, sol)(*rs)
numpy_sol = rs[0]
for r in rs[1:]:
numpy_sol = numpy.dot(numpy_sol, r)
assert _allclose(numpy_sol, theano_sol)
def test_qr_modes():
rng = numpy.random.RandomState(utt.fetch_seed())
A = tensor.matrix("A", dtype=theano.config.floatX)
a = rng.rand(4, 4).astype(theano.config.floatX)
f = function([A], qr(A))
t_qr = f(a)
n_qr = numpy.linalg.qr(a)
assert _allclose(n_qr, t_qr)
for mode in ["reduced", "r", "raw", "full", "economic"]:
f = function([A], qr(A, mode))
t_qr = f(a)
n_qr = numpy.linalg.qr(a, mode)
if isinstance(n_qr, (list, tuple)):
assert _allclose(n_qr[0], t_qr[0])
assert _allclose(n_qr[1], t_qr[1])
else:
assert _allclose(n_qr, t_qr)
try:
n_qr = numpy.linalg.qr(a, "complete")
f = function([A], qr(A, "complete"))
t_qr = f(a)
assert _allclose(n_qr, t_qr)
except TypeError, e:
assert "name 'complete' is not defined" in str(e)
def test_svd():
rng = numpy.random.RandomState(utt.fetch_seed())
A = tensor.matrix("A", dtype=theano.config.floatX)
U, V, T = svd(A)
fn = function([A], [U, V, T])
a = rng.rand(4, 4).astype(theano.config.floatX)
n_u, n_v, n_t = numpy.linalg.svd(a)
t_u, t_v, t_t = fn(a)
assert _allclose(n_u, t_u)
assert _allclose(n_v, t_v)
assert _allclose(n_t, t_t)
def test_inverse_singular():
singular = numpy.array([[1, 0, 0]] + [[0, 1, 0]] * 2,
dtype=theano.config.floatX)
a = tensor.matrix()
f = function([a], matrix_inverse(a))
try:
f(singular)
except numpy.linalg.LinAlgError:
return
assert False
def test_inverse_grad():
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(4, 4)
tensor.verify_grad(matrix_inverse, [r], rng=numpy.random)
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(4, 4)
tensor.verify_grad(matrix_inverse, [r], rng=numpy.random)
def test_det():
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5).astype(config.floatX)
x = tensor.matrix()
f = theano.function([x], det(x))
assert numpy.allclose(numpy.linalg.det(r), f(r))
def test_det_grad():
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5).astype(config.floatX)
tensor.verify_grad(det, [r], rng=numpy.random)
def test_det_shape():
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5).astype(config.floatX)
x = tensor.matrix()
f = theano.function([x], det(x))
f_shape = theano.function([x], det(x).shape)
assert numpy.all(f(r).shape == f_shape(r))
class test_diag(unittest.TestCase):
"""
Test that linalg.diag has the same behavior as numpy.diag.
numpy.diag has two behaviors:
(1) when given a vector, it returns a matrix with that vector as the
diagonal.
(2) when given a matrix, returns a vector which is the diagonal of the
matrix.
(1) and (2) are tested by test_alloc_diag and test_extract_diag
respectively.
test_diag test makes sure that linalg.diag instantiates
the right op based on the dimension of the input.
"""
def __init__(self, name, mode=None, shared=tensor._shared,
floatX=None, type=tensor.TensorType):
self.mode = mode
self.shared = shared
if floatX is None:
floatX = config.floatX
self.floatX = floatX
self.type = type
super(test_diag, self).__init__(name)
def test_alloc_diag(self):
rng = numpy.random.RandomState(utt.fetch_seed())
x = theano.tensor.vector()
g = alloc_diag(x)
f = theano.function([x], g)
# test "normal" scenario (5x5 matrix) and special cases of 0x0 and 1x1
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(self.floatX)
v = numpy.diag(m)
r = f(m)
# The right matrix is created
assert (r == v).all()
# Test we accept only vectors
xx = theano.tensor.matrix()
ok = False
try:
alloc_diag(xx)
except TypeError:
ok = True
assert ok
# Test infer_shape
f = theano.function([x], g.shape)
topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == AllocDiag for node in topo]) == 0
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(self.floatX)
assert (f(m) == m.shape).all()
def test_alloc_diag_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
x = rng.rand(5)
tensor.verify_grad(alloc_diag, [x], rng=rng)
def test_diag(self):
# test that it builds a matrix with given diagonal when using
# vector inputs
x = theano.tensor.vector()
y = diag(x)
assert y.owner.op.__class__ == AllocDiag
# test that it extracts the diagonal when using matrix input
x = theano.tensor.matrix()
y = extract_diag(x)
assert y.owner.op.__class__ == ExtractDiag
# other types should raise error
x = theano.tensor.tensor3()
ok = False
try:
y = extract_diag(x)
except TypeError:
ok = True
assert ok
# not testing the view=True case since it is not used anywhere.
def test_extract_diag(self):
rng = numpy.random.RandomState(utt.fetch_seed())
m = rng.rand(2, 3).astype(self.floatX)
x = self.shared(m)
g = extract_diag(x)
f = theano.function([], g)
assert [isinstance(node.inputs[0].type, self.type)
for node in f.maker.fgraph.toposort()
if isinstance(node.op, ExtractDiag)] == [True]
for shp in [(2, 3), (3, 2), (3, 3), (1, 1), (0, 0)]:
m = rng.rand(*shp).astype(self.floatX)
x.set_value(m)
v = numpy.diag(m)
r = f()
# The right diagonal is extracted
assert (r == v).all()
# Test we accept only matrix
xx = theano.tensor.vector()
ok = False
try:
extract_diag(xx)
except TypeError:
ok = True
assert ok
# Test infer_shape
f = theano.function([], g.shape)
topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == ExtractDiag
for node in topo]) == 0
for shp in [(2, 3), (3, 2), (3, 3)]:
m = rng.rand(*shp).astype(self.floatX)
x.set_value(m)
assert f() == min(shp)
def test_extract_diag_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
x = rng.rand(5, 4).astype(self.floatX)
tensor.verify_grad(extract_diag, [x], rng=rng)
@attr('slow')
def test_extract_diag_empty(self):
c = self.shared(numpy.array([[], []], self.floatX))
f = theano.function([], extract_diag(c), mode=self.mode)
assert [isinstance(node.inputs[0].type, self.type)
for node in f.maker.fgraph.toposort()
if isinstance(node.op, ExtractDiag)] == [True]
def test_trace():
rng = numpy.random.RandomState(utt.fetch_seed())
x = theano.tensor.matrix()
g = trace(x)
f = theano.function([x], g)
for shp in [(2, 3), (3, 2), (3, 3)]:
m = rng.rand(*shp).astype(config.floatX)
v = numpy.trace(m)
assert v == f(m)
xx = theano.tensor.vector()
ok = False
try:
trace(xx)
except TypeError:
ok = True
assert ok
class test_Eig(utt.InferShapeTester):
op_class = Eig
op = eig
dtype = 'float64'
def setUp(self):
super(test_Eig, self).setUp()
self.rng = numpy.random.RandomState(utt.fetch_seed())
self.A = theano.tensor.matrix(dtype=self.dtype)
X = numpy.asarray(self.rng.rand(5, 5),
dtype=self.dtype)
self.S = X.dot(X.T)
def test_infer_shape(self):
A = self.A
S = self.S
self._compile_and_check([A], # theano.function inputs
self.op(A), # theano.function outputs
# S must be square
[S],
self.op_class,
warn=False)
def test_eval(self):
A = theano.tensor.matrix(dtype=self.dtype)
self.assertEquals([e.eval({A: [[1]]}) for e in self.op(A)],
[[1.0], [[1.0]]])
x = [[0, 1], [1, 0]]
w, v = [e.eval({A: x}) for e in self.op(A)]
assert_array_almost_equal(numpy.dot(x, v), w * v)
class test_Eigh(test_Eig):
op = staticmethod(eigh)
def test_uplo(self):
S = self.S
a = theano.tensor.matrix(dtype=self.dtype)
wu, vu = [out.eval({a: S}) for out in self.op(a, 'U')]
wl, vl = [out.eval({a: S}) for out in self.op(a, 'L')]
assert_array_almost_equal(wu, wl)
assert_array_almost_equal(vu * numpy.sign(vu[0, :]),
vl * numpy.sign(vl[0, :]))
def test_grad(self):
S = self.S
utt.verify_grad(lambda x: self.op(x)[0], [S], rng=self.rng)
utt.verify_grad(lambda x: self.op(x)[1], [S], rng=self.rng)
utt.verify_grad(lambda x: self.op(x, 'U')[0], [S], rng=self.rng)
utt.verify_grad(lambda x: self.op(x, 'U')[1], [S], rng=self.rng)
class test_Eigh_float32(test_Eigh):
dtype = 'float32'
class T_lstsq(unittest.TestCase):
def test_correct_solution(self):
x = tensor.lmatrix()
y = tensor.lmatrix()
z = tensor.lscalar()
b = theano.tensor.nlinalg.lstsq()(x, y, z)
f = function([x, y, z], b)
TestMatrix1 = numpy.asarray([[2, 1], [3, 4]])
TestMatrix2 = numpy.asarray([[17, 20], [43, 50]])
TestScalar = numpy.asarray(1)
f = function([x, y, z], b)
m = f(TestMatrix1, TestMatrix2, TestScalar)
self.assertTrue(numpy.allclose(TestMatrix2, numpy.dot(TestMatrix1, m[0])))
def test_wrong_coefficient_matrix(self):
x = tensor.vector()
y = tensor.vector()
z = tensor.scalar()
b = theano.tensor.nlinalg.lstsq()(x, y, z)
f = function([x, y, z], b)
self.assertRaises(numpy.linalg.linalg.LinAlgError, f, [2, 1], [2, 1], 1)
def test_wrong_rcond_dimension(self):
x = tensor.vector()
y = tensor.vector()
z = tensor.vector()
b = theano.tensor.nlinalg.lstsq()(x, y, z)
f = function([x, y, z], b)
self.assertRaises(numpy.linalg.LinAlgError, f, [2, 1], [2, 1], [2, 1])
class Matrix_power():
def test_numpy_compare(self):
rng = numpy.random.RandomState(utt.fetch_seed())
A = tensor.matrix("A", dtype=theano.config.floatX)
Q = matrix_power(A, 3)
fn = function([A], [Q])
a = rng.rand(4, 4).astype(theano.config.floatX)
n_p = numpy.linalg.matrix_power(a, 3)
t_p = fn(a)
assert numpy.allclose(n_p, t_p)
def test_non_square_matrix(self):
rng = numpy.random.RandomState(utt.fetch_seed())
A = tensor.matrix("A", dtype=theano.config.floatX)
Q = matrix_power(A, 3)
f = function([A], [Q])
a = rng.rand(4, 3).astype(theano.config.floatX)
self.assertRaises(ValueError, f, a)
class T_NormTests(unittest.TestCase):
def test_wrong_type_of_ord_for_vector(self):
self.assertRaises(ValueError, norm, [2, 1], 'fro')
def test_wrong_type_of_ord_for_matrix(self):
self.assertRaises(ValueError, norm, [[2, 1], [3, 4]], 0)
def test_non_tensorial_input(self):
self.assertRaises(ValueError, norm, 3, None)
def test_tensor_input(self):
self.assertRaises(NotImplementedError, norm, numpy.random.rand(3, 4, 5), None)
def test_numpy_compare(self):
rng = numpy.random.RandomState(utt.fetch_seed())
M = tensor.matrix("A", dtype=theano.config.floatX)
V = tensor.vector("V", dtype=theano.config.floatX)
a = rng.rand(4, 4).astype(theano.config.floatX)
b = rng.rand(4).astype(theano.config.floatX)
A = ( [None, 'fro', 'inf', '-inf', 1, -1, None, 'inf', '-inf', 0, 1, -1, 2, -2],
[M, M, M, M, M, M, V, V, V, V, V, V, V, V],
[a, a, a, a, a, a, b, b, b, b, b, b, b, b],
[None, 'fro', inf, -inf, 1, -1, None, inf, -inf, 0, 1, -1, 2, -2])
for i in range(0, 14):
f = function([A[1][i]], norm(A[1][i], A[0][i]))
t_n = f(A[2][i])
n_n = numpy.linalg.norm(A[2][i], A[3][i])
assert _allclose(n_n, t_n)
\ No newline at end of file
import unittest
import numpy
import numpy.linalg
from numpy.testing import assert_array_almost_equal
from numpy.testing import dec, assert_array_equal, assert_allclose
from numpy import inf
import theano
from theano import tensor, function
from theano.tensor.basic import _allclose
from theano.tests.test_rop import break_op
from theano.tests import unittest_tools as utt
from theano import config
from theano.tensor.slinalg import ( Cholesky,
cholesky,
CholeskyGrad,
Solve,
solve,
Eigvalsh,
EigvalshGrad,
eigvalsh
)
from nose.plugins.skip import SkipTest
from nose.plugins.attrib import attr
from nose.tools import assert_raises
try:
import scipy.linalg
imported_scipy = True
except ImportError:
# some ops (e.g. Cholesky, Solve, A_Xinv_b) won't work
imported_scipy = False
def check_lower_triangular(pd, ch_f):
ch = ch_f(pd)
assert ch[0, pd.shape[1] - 1] == 0
assert ch[pd.shape[0] - 1, 0] != 0
assert numpy.allclose(numpy.dot(ch, ch.T), pd)
assert not numpy.allclose(numpy.dot(ch.T, ch), pd)
def check_upper_triangular(pd, ch_f):
ch = ch_f(pd)
assert ch[4, 0] == 0
assert ch[0, 4] != 0
assert numpy.allclose(numpy.dot(ch.T, ch), pd)
assert not numpy.allclose(numpy.dot(ch, ch.T), pd)
def test_cholesky():
if not imported_scipy:
raise SkipTest("Scipy needed for the Cholesky op.")
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5).astype(config.floatX)
pd = numpy.dot(r, r.T)
x = tensor.matrix()
chol = cholesky(x)
# Check the default.
ch_f = function([x], chol)
yield check_lower_triangular, pd, ch_f
# Explicit lower-triangular.
chol = Cholesky(lower=True)(x)
ch_f = function([x], chol)
yield check_lower_triangular, pd, ch_f
# Explicit upper-triangular.
chol = Cholesky(lower=False)(x)
ch_f = function([x], chol)
yield check_upper_triangular, pd, ch_f
def test_cholesky_grad():
if not imported_scipy:
raise SkipTest("Scipy needed for the Cholesky op.")
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5).astype(config.floatX)
pd = numpy.dot(r, r.T)
eps = None
if config.floatX == "float64":
eps = 2e-8
# Check the default.
yield (lambda: utt.verify_grad(cholesky, [pd], 3, rng, eps=eps))
# Explicit lower-triangular.
yield (lambda: utt.verify_grad(Cholesky(lower=True), [pd], 3,
rng, eps=eps))
# Explicit upper-triangular.
yield (lambda: utt.verify_grad(Cholesky(lower=False), [pd], 3,
rng, eps=eps))
@attr('slow')
def test_cholesky_and_cholesky_grad_shape():
if not imported_scipy:
raise SkipTest("Scipy needed for the Cholesky op.")
rng = numpy.random.RandomState(utt.fetch_seed())
x = tensor.matrix()
for l in (cholesky(x), Cholesky(lower=True)(x), Cholesky(lower=False)(x)):
f_chol = theano.function([x], l.shape)
g = tensor.grad(l.sum(), x)
f_cholgrad = theano.function([x], g.shape)
topo_chol = f_chol.maker.fgraph.toposort()
topo_cholgrad = f_cholgrad.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == Cholesky
for node in topo_chol]) == 0
assert sum([node.op.__class__ == CholeskyGrad
for node in topo_cholgrad]) == 0
for shp in [2, 3, 5]:
m = numpy.cov(rng.randn(shp, shp + 10)).astype(config.floatX)
yield numpy.testing.assert_equal, f_chol(m), (shp, shp)
yield numpy.testing.assert_equal, f_cholgrad(m), (shp, shp)
def test_eigvalsh():
if not imported_scipy:
raise SkipTest("Scipy needed for the geigvalsh op.")
import scipy.linalg
A = theano.tensor.dmatrix('a')
B = theano.tensor.dmatrix('b')
f = function([A, B], eigvalsh(A, B))
rng = numpy.random.RandomState(utt.fetch_seed())
a = rng.randn(5, 5)
a = a + a.T
for b in [10 * numpy.eye(5, 5) + rng.randn(5, 5)]:
w = f(a, b)
refw = scipy.linalg.eigvalsh(a, b)
numpy.testing.assert_array_almost_equal(w, refw)
# We need to test None separatly, as otherwise DebugMode will
# complain, as this isn't a valid ndarray.
b = None
B = theano.tensor.NoneConst
f = function([A], eigvalsh(A, B))
w = f(a)
refw = scipy.linalg.eigvalsh(a, b)
numpy.testing.assert_array_almost_equal(w, refw)
def test_eigvalsh_grad():
if not imported_scipy:
raise SkipTest("Scipy needed for the geigvalsh op.")
import scipy.linalg
rng = numpy.random.RandomState(utt.fetch_seed())
a = rng.randn(5, 5)
a = a + a.T
b = 10 * numpy.eye(5, 5) + rng.randn(5, 5)
tensor.verify_grad(lambda a, b: eigvalsh(a, b).dot([1, 2, 3, 4, 5]),
[a, b], rng=numpy.random)
class test_Solve(utt.InferShapeTester):
def setUp(self):
super(test_Solve, self).setUp()
self.op_class = Solve
self.op = Solve()
def test_infer_shape(self):
if not imported_scipy:
raise SkipTest("Scipy needed for the Cholesky op.")
rng = numpy.random.RandomState(utt.fetch_seed())
A = theano.tensor.matrix()
b = theano.tensor.matrix()
self._compile_and_check([A, b], # theano.function inputs
[self.op(A, b)], # theano.function outputs
# A must be square
[numpy.asarray(rng.rand(5, 5),
dtype=config.floatX),
numpy.asarray(rng.rand(5, 1),
dtype=config.floatX)],
self.op_class,
warn=False)
rng = numpy.random.RandomState(utt.fetch_seed())
A = theano.tensor.matrix()
b = theano.tensor.vector()
self._compile_and_check([A, b], # theano.function inputs
[self.op(A, b)], # theano.function outputs
# A must be square
[numpy.asarray(rng.rand(5, 5),
dtype=config.floatX),
numpy.asarray(rng.rand(5),
dtype=config.floatX)],
self.op_class,
warn=False)
......@@ -535,7 +535,7 @@ class _tensor_py_operators:
return theano.tensor.basic.round(self, mode)
def trace(self):
return theano.sandbox.linalg.trace(self)
return theano.tensor.nlinalg.trace(self)
# TO TRUMP NUMPY OPERATORS
__array_priority__ = 1000
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论