提交 def19585 authored 作者: hantek's avatar hantek

Merge branch 'allocdiag' of https://github.com/hantek/Theano into allocdiag

...@@ -1182,3 +1182,32 @@ class GpuDiagonal(Subtensor): ...@@ -1182,3 +1182,32 @@ class GpuDiagonal(Subtensor):
diag_size = T.minimum(dim1, dim2) diag_size = T.minimum(dim1, dim2)
out_shape.append(diag_size) out_shape.append(diag_size)
return [tuple(out_shape)] return [tuple(out_shape)]
class GpuAllocDiag(Subtensor):
__props__ = ("offset",)
def __init__(self, offset=0, axis1=0, axis2=1, view=False):
self.offset = offset
def make_node(self, _x):
ctx_name = infer_context_name(_x)
x = as_gpuarray_variable(_x, ctx_name)
if x.ndim != 1:
raise ValueError('AllocDiag argument must be a vector!', x)
return gof.Apply(self, [x], [x.type.__class__(dtype=x.dtype)()])
def perform(self, node, inputs, outputs):
(x,) = inputs
(z,) = outputs
z = alloc zero mtx in cudandarray
z = set subtensor
def grad(self, inputs, gout):
(input_x,) = inputs
return [grad_not_implemented(self, 0, input_x)]
def infer_shape(self, node, shapes):
return [(shapes[0][0],) * 2]
...@@ -14,7 +14,8 @@ from ..subtensor import (GpuIncSubtensor, GpuSubtensor, ...@@ -14,7 +14,8 @@ from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor, GpuAdvancedSubtensor,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20, GpuAdvancedIncSubtensor1_dev20,
GpuDiagonal) GpuDiagonal,
GpuAllocDiag)
from ..type import gpuarray_shared_constructor from ..type import gpuarray_shared_constructor
from .config import mode_with_gpu from .config import mode_with_gpu
...@@ -195,3 +196,15 @@ class test_gpudiagonal(unittest.TestCase): ...@@ -195,3 +196,15 @@ class test_gpudiagonal(unittest.TestCase):
assert np.allclose( assert np.allclose(
GpuDiagonal(offset, axis1, axis2)(x).eval({x: np_x}), GpuDiagonal(offset, axis1, axis2)(x).eval({x: np_x}),
np_x.diagonal(offset, axis1, axis2)) np_x.diagonal(offset, axis1, axis2))
class test_gpuallocdiag(unittest.TestCase):
def test_matrix(self):
x = tensor.matrix()
np_x = numpy.arange(7).astype(theano.config.floatX)
fn = theano.function([x], GpuAllocDiag()(x), mode=mode_with_gpu)
assert numpy.allclose(fn(np_x), numpy.diag(np_x))
fn = theano.function([x], GpuAllocDiag(2)(x), mode=mode_with_gpu)
assert numpy.allclose(fn(np_x), numpy.diag(np_x, 2))
fn = theano.function([x], GpuAllocDiag(-3)(x), mode=mode_with_gpu)
assert numpy.allclose(fn(np_x), numpy.diag(np_x, -3))
...@@ -6172,9 +6172,41 @@ def diagonal(a, offset=0, axis1=0, axis2=1): ...@@ -6172,9 +6172,41 @@ def diagonal(a, offset=0, axis1=0, axis2=1):
return ExtractDiag(offset, axis1, axis2)(a) return ExtractDiag(offset, axis1, axis2)(a)
class Diag(Op): class AllocDiag(Op):
"""
An op that copies a vector to the diagonal of an empty matrix. It does the
inverse of ExtractDiag.
__props__ = () __props__ = ()
Usage: T.AllocDiag()(x)
`x` should be a tensor vector. The parenthesis in the front should indicate
which main diagonal the vector value goes into. By default it is set to (
`0`, which corresponds to setting the values of x to the main diagonal in
the returned matrix. Currently the gradient is valid only when `offset=0`.
Parameters
----------
offset : int
Indicates which diagonal to put `x` into. Defaults to `0`.
x: symbolic vector
A tensor vector consists of diagonal values.
Returns
-------
tensor : symbolic tenstor
A tensor with passed vector values at its corresponding diagonal.
"""
__props__ = ("offset", )
default_offset = 0
def __init__(self, offset=0):
if numpy_diagonal_return_view:
self.view_map = {0: [0]}
self.offset = offset
def make_node(self, diag): def make_node(self, diag):
diag = as_tensor_variable(diag) diag = as_tensor_variable(diag)
...@@ -6185,24 +6217,50 @@ class Diag(Op): ...@@ -6185,24 +6217,50 @@ class Diag(Op):
def perform(self, node, inputs, outputs): def perform(self, node, inputs, outputs):
(z,) = outputs (z,) = outputs
z[0] = numpy.diag(inputs[0]) z[0] = numpy.diag(inputs[0], self.offset)
def grad(self, inputs, gout): def grad(self, inputs, gout):
(gz,) = gout (gz,) = gout
return [diagonal(gz)] if self.has_default_props():
return [diagonal(gz)]
else:
return [grad_not_implemented(self, 0, inputs[0])]
def infer_shape(self, nodes, shapes): def infer_shape(self, nodes, shapes):
return [(shapes[0][0],) * 2] return [(shapes[0][0],) * 2]
def has_default_props(self):
return self.offset == self.default_offset
def diag(v, k=0): def diag(v, k=0):
"""
A helper function for two ops: theano.tensor.ExtractDiag and
theano.tensor.AllocDiag. It both accepts tensor vector and tensor matrix.
While the passed tensor variable `v` has `v.ndim>=2`, it builds a
ExtractDiag instance, and returns a vector with its entries equal to
`v`'s main diagonal; otherwise if `v.ndim` is `1`, it builds a AllocDiag
instance, and returns a matrix with `v` at its k-th diaogonal.
Parameters
----------
v : symbolic tensor
k : int
offset
Returns
-------
tensor : symbolic tensor
"""
if v.ndim == 1: if v.ndim == 1:
assert k == 0, "diagonals other than main are not implemented" assert k == 0, "diagonals other than main are not implemented"
return Diag()(v) return AllocDiag()(v)
elif v.ndim == 2: elif v.ndim >= 2:
return diagonal(v, k) return diagonal(v, k)
else: else:
raise ValueError("Input must be 1- or 2-d.") raise ValueError("Input must has v.dim >= 1.")
def stacklists(arg): def stacklists(arg):
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import logging import logging
import warnings
import numpy import numpy
from six.moves import xrange from six.moves import xrange
...@@ -145,6 +145,10 @@ class AllocDiag(Op): ...@@ -145,6 +145,10 @@ class AllocDiag(Op):
__props__ = () __props__ = ()
def make_node(self, _x): def make_node(self, _x):
warnings.warn("DeprecationWarning: theano.tensor.nlinalg.AllocDiag"
"is deprecated, please use theano.tensor.AllocDiag"
"instead.",
category=DeprecationWarning)
x = as_tensor_variable(_x) x = as_tensor_variable(_x)
if x.type.ndim != 1: if x.type.ndim != 1:
raise TypeError('AllocDiag only works on vectors', _x) raise TypeError('AllocDiag only works on vectors', _x)
...@@ -184,6 +188,10 @@ class ExtractDiag(Op): ...@@ -184,6 +188,10 @@ class ExtractDiag(Op):
self.view_map = {0: [0]} self.view_map = {0: [0]}
def make_node(self, _x): def make_node(self, _x):
warnings.warn("DeprecationWarning: theano.tensor.nlinalg.ExtractDiag"
"is deprecated, please use theano.tensor.ExtractDiag"
"instead.",
category=DeprecationWarning)
if not isinstance(_x, theano.Variable): if not isinstance(_x, theano.Variable):
x = as_tensor_variable(_x) x = as_tensor_variable(_x)
else: else:
......
...@@ -32,7 +32,7 @@ from theano.tensor import (_shared, wvector, bvector, ...@@ -32,7 +32,7 @@ from theano.tensor import (_shared, wvector, bvector,
horizontal_stack, vertical_stack, argmax, get_vector_length, horizontal_stack, vertical_stack, argmax, get_vector_length,
fscalar, zeros_like, sum, tensor3, vector, add, addbroadcast, fscalar, zeros_like, sum, tensor3, vector, add, addbroadcast,
alloc, as_tensor_variable, tensor_from_scalar, ARange, alloc, as_tensor_variable, tensor_from_scalar, ARange,
clip, constant, default, dot, batched_dot, clip, constant, default, diag, diagonal, dot, batched_dot,
dmatrix, dscalar, dvector, eq, eye, fill, flatten, inverse_permutation, dmatrix, dscalar, dvector, eq, eye, fill, flatten, inverse_permutation,
tensor4, permute_row_elements, Flatten, fmatrix, fscalars, grad, tensor4, permute_row_elements, Flatten, fmatrix, fscalars, grad,
inplace, iscalar, matrix, minimum, matrices, maximum, mul, neq, inplace, iscalar, matrix, minimum, matrices, maximum, mul, neq,
...@@ -45,7 +45,7 @@ from theano.tensor import (_shared, wvector, bvector, ...@@ -45,7 +45,7 @@ from theano.tensor import (_shared, wvector, bvector,
tile, patternbroadcast, Eye, Shape, Dot, PermuteRowElements, tile, patternbroadcast, Eye, Shape, Dot, PermuteRowElements,
ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc, ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc,
dtensor3, SpecifyShape, Mean, dtensor3, SpecifyShape, Mean,
itensor3, Tile, switch, ExtractDiag, Diag, itensor3, Tile, switch, ExtractDiag, AllocDiag,
nonzero, flatnonzero, nonzero_values, nonzero, flatnonzero, nonzero_values,
stacklists, DimShuffle, hessian, ptp, power, stacklists, DimShuffle, hessian, ptp, power,
swapaxes, choose, Choose, NoneConst, AllocEmpty, swapaxes, choose, Choose, NoneConst, AllocEmpty,
...@@ -7332,6 +7332,98 @@ class test_size(unittest.TestCase): ...@@ -7332,6 +7332,98 @@ class test_size(unittest.TestCase):
assert y.size == function([], x.size)() assert y.size == function([], x.size)()
class test_diag(unittest.TestCase):
"""
Test that tensor.diag has the same behavior as numpy.diag.
numpy.diag has two behaviors:
(1) when given a vector, it returns a matrix with that vector as the
diagonal.
(2) when given a matrix, returns a vector which is the diagonal of the
matrix.
(1) and (2) are tested by test_alloc_diag and test_extract_diag
respectively.
test_diag test makes sure that linalg.diag instantiates
the right op based on the dimension of the input.
"""
def __init__(self, name, mode=None, shared=tensor._shared,
floatX=None, type=tensor.TensorType):
self.mode = mode
self.shared = shared
if floatX is None:
floatX = config.floatX
self.floatX = floatX
self.type = type
super(test_diag, self).__init__(name)
def test_diag(self):
rng = numpy.random.RandomState(utt.fetch_seed())
# test vector input
x = theano.tensor.vector()
g = diag(x)
assert isinstance(g.owner.op, AllocDiag)
f = theano.function([x], g)
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(self.floatX)
v = numpy.diag(m)
r = f(m)
# The right matrix is created
assert (r == v).all()
# Test matrix input
xx = self.shared(rng.rand(3, 5))
g = diag(xx)
assert isinstance(g.owner.op, ExtractDiag)
f = theano.function([], g)
for shp in [(5, 3), (3, 5), (5, 1), (1, 5), (5, 0), (0, 5),
(1, 0), (0, 1)]:
m = rng.rand(*shp).astype(self.floatX)
xx.set_value(m)
v = numpy.diag(m)
r = f()
# The right matrix is created
assert (r == v).all()
# Test scalar input
xx = theano.tensor.scalar()
numpy.testing.assert_raises(ValueError, diag, xx)
def test_infer_shape(self):
rng = numpy.random.RandomState(utt.fetch_seed())
x = theano.tensor.vector()
g = diag(x)
f = theano.function([x], g.shape)
topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([isinstance(node.op, AllocDiag) for node in topo]) == 0
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(self.floatX)
assert (f(m) == numpy.diag(m).shape).all()
x = theano.tensor.matrix()
g = diag(x)
f = theano.function([x], g.shape)
topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([isinstance(node.op, ExtractDiag) for node in topo]) == 0
for shp in [(5, 3), (3, 5), (5, 1), (1, 5), (5, 0), (0, 5),
(1, 0), (0, 1)]:
m = rng.rand(*shp).astype(self.floatX)
assert (f(m) == numpy.diag(m).shape).all()
def test_diag_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
x = rng.rand(5)
tensor.verify_grad(diag, [x], rng=rng)
x = rng.rand(5, 3)
tensor.verify_grad(diag, [x], rng=rng)
class test_numpy_assumptions(unittest.TestCase): class test_numpy_assumptions(unittest.TestCase):
""" """
Verify that some assumptions Theano makes on Numpy's behavior still hold. Verify that some assumptions Theano makes on Numpy's behavior still hold.
...@@ -7590,11 +7682,11 @@ class TestInferShape(utt.InferShapeTester): ...@@ -7590,11 +7682,11 @@ class TestInferShape(utt.InferShapeTester):
self._compile_and_check([atens3], [atens3_diag], self._compile_and_check([atens3], [atens3_diag],
[atens3_val], ExtractDiag) [atens3_val], ExtractDiag)
# Diag # AllocDiag
advec = dvector() advec = dvector()
advec_val = rand(4) advec_val = rand(4)
self._compile_and_check([advec], [Diag()(advec)], self._compile_and_check([advec], [AllocDiag()(advec)],
[advec_val], Diag) [advec_val], AllocDiag)
# Shape # Shape
# 'opt.Makevector' precludes optimizer from disentangling # 'opt.Makevector' precludes optimizer from disentangling
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论