提交 09017085 authored 作者: Frederic's avatar Frederic

Make ExtractDiag work on the GPU.

上级 5b16f045
...@@ -1438,6 +1438,32 @@ def tensor_to_cuda(x): ...@@ -1438,6 +1438,32 @@ def tensor_to_cuda(x):
return x return x
@register_opt()
@local_optimizer([])
def local_gpu_extract_diagonal(node):
"""
extract_diagonal(host_from_gpu()) -> host_from_gpu(extract_diagonal)
gpu_from_host(extract_diagonal) -> specifyshape(gpu_from_host)
"""
from theano.sandbox import linalg
if (isinstance(node.op, linalg.ops.ExtractDiag) and
isinstance(node.inputs[0].type,
theano.tensor.TensorType)):
inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, HostFromGpu):
return [host_from_gpu(linalg.extract_diag(gpu_from_host(inp)))]
if node.op == gpu_from_host:
host_input = node.inputs[0]
if (host_input.owner and
isinstance(host_input.owner.op, linalg.ops.ExtractDiag) and
isinstance(host_input.owner.inputs[0].type,
theano.tensor.TensorType)):
diag_node = host_input.owner
return [linalg.extract_diag(
gpu_from_host(diag_node.inputs[0]))]
return False
@register_opt('scan') @register_opt('scan')
@local_optimizer([]) @local_optimizer([])
def gpuScanOptimization(node): def gpuScanOptimization(node):
......
...@@ -4,9 +4,10 @@ import numpy ...@@ -4,9 +4,10 @@ import numpy
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import theano
from theano.compile.pfunc import pfunc from theano.compile.pfunc import pfunc
from theano import config, tensor from theano import config, tensor
import theano import theano.sandbox.linalg.tests
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
...@@ -381,6 +382,17 @@ def test_erfinvgpu(): ...@@ -381,6 +382,17 @@ def test_erfinvgpu():
assert numpy.allclose(f(xv),f2(xv)) assert numpy.allclose(f(xv),f2(xv))
class test_diag(theano.sandbox.linalg.tests.test_linalg.test_diag):
mode = mode_with_gpu
shared = staticmethod(cuda.shared_constructor)
floatX = 'float32'
type = CudaNdarrayType
def __init__(self, name):
super(theano.sandbox.linalg.tests.test_linalg.test_diag,
self).__init__(name)
if __name__ == '__main__': if __name__ == '__main__':
test_gpualloc() test_gpualloc()
test_opt_gpujoin_onlyajoin() test_opt_gpujoin_onlyajoin()
......
...@@ -684,7 +684,10 @@ solve = Solve() # general solve ...@@ -684,7 +684,10 @@ solve = Solve() # general solve
class ExtractDiag(Op): class ExtractDiag(Op):
""" Return the diagonal of a matrix. """ """ Return the diagonal of a matrix.
:note: work on the GPU.
"""
def __init__(self, view=False): def __init__(self, view=False):
self.view = view self.view = view
if self.view: if self.view:
...@@ -697,10 +700,15 @@ class ExtractDiag(Op): ...@@ -697,10 +700,15 @@ class ExtractDiag(Op):
return hash(type(self)) ^ hash(self.view) return hash(type(self)) ^ hash(self.view)
def make_node(self, _x): def make_node(self, _x):
if not isinstance(_x, theano.Variable):
x = as_tensor_variable(_x) x = as_tensor_variable(_x)
else:
x = _x
if x.type.ndim != 2: if x.type.ndim != 2:
raise TypeError('ExtractDiag only works on matrices', _x) raise TypeError('ExtractDiag only works on matrices', _x)
return Apply(self, [x], [tensor.vector(dtype=x.type.dtype)]) return Apply(self, [x], [x.type.__class__(broadcastable=(False,),
dtype=x.type.dtype)()])
def perform(self, node, ins, outs): def perform(self, node, ins, outs):
""" For some reason numpy.diag(x) is really slow, so we """ For some reason numpy.diag(x) is really slow, so we
......
import unittest
import numpy import numpy
import numpy.linalg import numpy.linalg
from numpy.testing import assert_array_almost_equal from numpy.testing import assert_array_almost_equal
...@@ -266,7 +268,32 @@ def test_det_shape(): ...@@ -266,7 +268,32 @@ def test_det_shape():
assert numpy.all(f(r).shape == f_shape(r)) assert numpy.all(f(r).shape == f_shape(r))
def test_alloc_diag(): class test_diag(unittest.TestCase):
"""
Test that linalg.diag has the same behavior as numpy.diag.
numpy.diag has two behaviors:
(1) when given a vector, it returns a matrix with that vector as the
diagonal.
(2) when given a matrix, returns a vector which is the diagonal of the
matrix.
(1) and (2) are tested by test_alloc_diag and test_extract_diag
respectively.
test_diag test makes sure that linalg.diag instantiates
the right op based on the dimension of the input.
"""
def __init__(self, name, mode=None, shared=tensor.shared,
floatX=None, type=tensor.TensorType):
self.mode = mode
self.shared = shared
if floatX is None:
floatX = config.floatX
self.floatX = floatX
self.type = type
super(test_diag, self).__init__(name)
def test_alloc_diag(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
x = theano.tensor.vector() x = theano.tensor.vector()
g = alloc_diag(x) g = alloc_diag(x)
...@@ -274,10 +301,10 @@ def test_alloc_diag(): ...@@ -274,10 +301,10 @@ def test_alloc_diag():
# test "normal" scenario (5x5 matrix) and special cases of 0x0 and 1x1 # test "normal" scenario (5x5 matrix) and special cases of 0x0 and 1x1
for shp in [5, 0, 1]: for shp in [5, 0, 1]:
m = rng.rand(shp).astype(config.floatX) m = rng.rand(shp).astype(self.floatX)
v = numpy.diag(m) v = numpy.diag(m)
r = f(m) r = f(m)
# The right diagonal is extracted # The right matrix is created
assert (r == v).all() assert (r == v).all()
# Test we accept only vectors # Test we accept only vectors
...@@ -295,31 +322,17 @@ def test_alloc_diag(): ...@@ -295,31 +322,17 @@ def test_alloc_diag():
if config.mode != 'FAST_COMPILE': if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == AllocDiag for node in topo]) == 0 assert sum([node.op.__class__ == AllocDiag for node in topo]) == 0
for shp in [5, 0, 1]: for shp in [5, 0, 1]:
m = rng.rand(shp).astype(config.floatX) m = rng.rand(shp).astype(self.floatX)
assert (f(m) == m.shape).all() assert (f(m) == m.shape).all()
def test_alloc_diag_grad(self):
def test_alloc_diag_grad():
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
x = rng.rand(5) x = rng.rand(5)
tensor.verify_grad(alloc_diag, [x], rng=rng) tensor.verify_grad(alloc_diag, [x], rng=rng)
def test_diag(self):
def test_diag(): # test that it builds a matrix with given diagonal when using
""" # vector inputs
Test that linalg.diag has the same behavior as numpy.diag.
numpy.diag has two behaviors:
(1) when given a vector, it returns a matrix with that vector as the
diagonal.
(2) when given a matrix, returns a vector which is the diagonal of the
matrix.
(1) and (2) are tested by test_alloc_diag and test_extract_diag
respectively. This test makes sure that linalg.diag instantiates
the right op based on the dimension of the input.
"""
# test that it builds a matrix with given diagonal when using vector inputs
x = theano.tensor.vector() x = theano.tensor.vector()
y = diag(x) y = diag(x)
assert y.owner.op.__class__ == AllocDiag assert y.owner.op.__class__ == AllocDiag
...@@ -338,18 +351,22 @@ def test_diag(): ...@@ -338,18 +351,22 @@ def test_diag():
ok = True ok = True
assert ok assert ok
# not testing the view=True case since it is not used anywhere.
# not testing the view=True case since it is not used anywhere. def test_extract_diag(self):
def test_extract_diag():
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
x = theano.tensor.matrix() m = rng.rand(2, 3).astype(self.floatX)
x = self.shared(m)
g = extract_diag(x) g = extract_diag(x)
f = theano.function([x], g) f = theano.function([], g)
assert [isinstance(node.inputs[0].type, self.type)
for node in f.maker.fgraph.toposort()
if isinstance(node.op, ExtractDiag)] == [True]
for shp in [(2, 3), (3, 2), (3, 3), (1, 1), (0, 0)]: for shp in [(2, 3), (3, 2), (3, 3), (1, 1), (0, 0)]:
m = rng.rand(*shp).astype(config.floatX) m = rng.rand(*shp).astype(self.floatX)
x.set_value(m)
v = numpy.diag(m) v = numpy.diag(m)
r = f(m) r = f()
# The right diagonal is extracted # The right diagonal is extracted
assert (r == v).all() assert (r == v).all()
...@@ -363,24 +380,28 @@ def test_extract_diag(): ...@@ -363,24 +380,28 @@ def test_extract_diag():
assert ok assert ok
# Test infer_shape # Test infer_shape
f = theano.function([x], g.shape) f = theano.function([], g.shape)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE': if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == ExtractDiag for node in topo]) == 0 assert sum([node.op.__class__ == ExtractDiag
for node in topo]) == 0
for shp in [(2, 3), (3, 2), (3, 3)]: for shp in [(2, 3), (3, 2), (3, 3)]:
m = rng.rand(*shp).astype(config.floatX) m = rng.rand(*shp).astype(self.floatX)
assert f(m) == min(shp) x.set_value(m)
assert f() == min(shp)
def test_extract_diag_grad(): def test_extract_diag_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
x = rng.rand(5, 4) x = rng.rand(5, 4).astype(self.floatX)
tensor.verify_grad(extract_diag, [x], rng=rng) tensor.verify_grad(extract_diag, [x], rng=rng)
def test_extract_diag_empty(self):
c = self.shared(numpy.array([[], []], self.floatX))
f = theano.function([], extract_diag(c), mode=self.mode)
def test_extract_diag_empty(): assert [isinstance(node.inputs[0].type, self.type)
c = theano.tensor.constant(numpy.array([[], []], 'int32')) for node in f.maker.fgraph.toposort()
extract_diag(c).eval() if isinstance(node.op, ExtractDiag)] == [True]
def test_trace(): def test_trace():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论