提交 cd50d5ef authored 作者: lamblin's avatar lamblin

Merge pull request #1436 from nouiz/gpu_extract_diag

Gpu extract diag
...@@ -16,6 +16,9 @@ ...@@ -16,6 +16,9 @@
present in convolutional neural networks (where filters are 3D and pool present in convolutional neural networks (where filters are 3D and pool
over several input channels). over several input channels).
The project `TheanoConv3d2d <https://github.com/jaberg/TheanoConv3d2d>`_
is probably faster then the Conv3d documented here.
.. module:: conv .. module:: conv
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: ops for signal processing :synopsis: ops for signal processing
......
...@@ -2391,8 +2391,58 @@ CudaNdarray_get_strides(CudaNdarray *self, void *closure) ...@@ -2391,8 +2391,58 @@ CudaNdarray_get_strides(CudaNdarray *self, void *closure)
static int static int
CudaNdarray_set_strides(CudaNdarray *self, PyObject *value, void *closure) CudaNdarray_set_strides(CudaNdarray *self, PyObject *value, void *closure)
{ {
PyErr_SetString(PyExc_NotImplementedError, ""); //npy_intp newstrides_bytes[PyTuple_Size(value)];
return -1; if (PyTuple_Check(value)){
if (PyTuple_Size(value) != CudaNdarray_NDIM(self)){
PyErr_SetString(PyExc_ValueError,
"The new strides tuple must have the same length"
" as the number of dimensions");
return -1;
}
}else if (PyList_Check(value)){
if (PyList_Size(value) != CudaNdarray_NDIM(self)){
PyErr_SetString(PyExc_ValueError,
"The new strides list must have the same length"
" as the number of dimensions");
return -1;
}
}else{
PyErr_SetString(PyExc_ValueError,
"The new strides need to be encoded in a tuple or list");
return -1;
}
npy_intp newstrides[CudaNdarray_NDIM(self)];
if (PyTuple_Check(value)){
for(int i=0; i < CudaNdarray_NDIM(self); i++){
newstrides[i] = PyInt_AsLong(PyTuple_GetItem(value, Py_ssize_t(i)));
//newstrides_bytes[i] = newstrides[i] * 4;
}
}else if (PyList_Check(value)){
for(int i=0; i < CudaNdarray_NDIM(self); i++){
newstrides[i] = PyInt_AsLong(PyList_GetItem(value, Py_ssize_t(i)));
//newstrides_bytes[i] = newstrides[i] * 4;
}
}
/*
// Do not do this check, as ExtractDiag needs that, and NumPy does not seem
// to do it.
npy_intp dims[PyTuple_Size(value)];
for(int i=0; i < CudaNdarray_NDIM(self); i++){
dims[i] = CudaNdarray_HOST_DIMS(self)[i];
}
if (!PyArray_CheckStrides(4,
CudaNdarray_NDIM(self),
0, 0,
dims,
newstrides_bytes)){
PyErr_SetString(PyExc_ValueError, "bad new strides");
return -1;
}
*/
for(int i=0; i < CudaNdarray_NDIM(self); i++){
CudaNdarray_set_stride(self, i, newstrides[i]);
}
return 0;
} }
static PyObject * static PyObject *
......
...@@ -289,7 +289,7 @@ def local_gpu_dimshuffle_0(node): ...@@ -289,7 +289,7 @@ def local_gpu_dimshuffle_0(node):
def local_gpu_specifyShape_0(node): def local_gpu_specifyShape_0(node):
""" """
specify_shape(host_from_gpu()) -> host_from_gpu(specify_shape) specify_shape(host_from_gpu()) -> host_from_gpu(specify_shape)
gpu_from_host(specify_shape) -> specifyshape(gpu_from_host) gpu_from_host(specify_shape) -> specify_shape(gpu_from_host)
""" """
if isinstance(node.op, tensor.SpecifyShape): if isinstance(node.op, tensor.SpecifyShape):
input = node.inputs[0] input = node.inputs[0]
...@@ -1438,6 +1438,32 @@ def tensor_to_cuda(x): ...@@ -1438,6 +1438,32 @@ def tensor_to_cuda(x):
return x return x
@register_opt()
@local_optimizer([])
def local_gpu_extract_diagonal(node):
"""
extract_diagonal(host_from_gpu()) -> host_from_gpu(extract_diagonal)
gpu_from_host(extract_diagonal) -> extract_diagonal(gpu_from_host)
"""
from theano.sandbox import linalg
if (isinstance(node.op, linalg.ops.ExtractDiag) and
isinstance(node.inputs[0].type,
theano.tensor.TensorType)):
inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, HostFromGpu):
return [host_from_gpu(linalg.extract_diag(gpu_from_host(inp)))]
if node.op == gpu_from_host:
host_input = node.inputs[0]
if (host_input.owner and
isinstance(host_input.owner.op, linalg.ops.ExtractDiag) and
isinstance(host_input.owner.inputs[0].type,
theano.tensor.TensorType)):
diag_node = host_input.owner
return [linalg.extract_diag(
gpu_from_host(diag_node.inputs[0]))]
return False
@register_opt('scan') @register_opt('scan')
@local_optimizer([]) @local_optimizer([])
def gpuScanOptimization(node): def gpuScanOptimization(node):
......
...@@ -941,6 +941,33 @@ def test_base(): ...@@ -941,6 +941,33 @@ def test_base():
e = b.reshape((5,2,2,3)) e = b.reshape((5,2,2,3))
assert e.base is a assert e.base is a
def test_set_strides():
a = cuda_ndarray.CudaNdarray.zeros((5, 5))
# Test with tuple
new_strides = (a.strides[1], a.strides[0])
a.strides = new_strides
assert a.strides == new_strides
# Test with list
new_strides = (a.strides[1], a.strides[0])
a.strides = [a.strides[1], a.strides[0]]
assert a.strides == new_strides
try:
a.strides = (a.strides[1],)
assert False
except ValueError:
pass
try:
a.strides = (1, 1, 1)
assert False
except ValueError:
pass
def test_is_c_contiguous(): def test_is_c_contiguous():
a = cuda_ndarray.CudaNdarray.zeros((3,4,5)) a = cuda_ndarray.CudaNdarray.zeros((3,4,5))
assert a.is_c_contiguous() assert a.is_c_contiguous()
......
...@@ -4,9 +4,10 @@ import numpy ...@@ -4,9 +4,10 @@ import numpy
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import theano
from theano.compile.pfunc import pfunc from theano.compile.pfunc import pfunc
from theano import config, tensor from theano import config, tensor
import theano import theano.sandbox.linalg.tests
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
...@@ -381,6 +382,17 @@ def test_erfinvgpu(): ...@@ -381,6 +382,17 @@ def test_erfinvgpu():
assert numpy.allclose(f(xv),f2(xv)) assert numpy.allclose(f(xv),f2(xv))
class test_diag(theano.sandbox.linalg.tests.test_linalg.test_diag):
mode = mode_with_gpu
shared = staticmethod(cuda.shared_constructor)
floatX = 'float32'
type = CudaNdarrayType
def __init__(self, name):
super(theano.sandbox.linalg.tests.test_linalg.test_diag,
self).__init__(name)
if __name__ == '__main__': if __name__ == '__main__':
test_gpualloc() test_gpualloc()
test_opt_gpujoin_onlyajoin() test_opt_gpujoin_onlyajoin()
......
...@@ -684,7 +684,10 @@ solve = Solve() # general solve ...@@ -684,7 +684,10 @@ solve = Solve() # general solve
class ExtractDiag(Op): class ExtractDiag(Op):
""" Return the diagonal of a matrix. """ """ Return the diagonal of a matrix.
:note: work on the GPU.
"""
def __init__(self, view=False): def __init__(self, view=False):
self.view = view self.view = view
if self.view: if self.view:
...@@ -697,10 +700,15 @@ class ExtractDiag(Op): ...@@ -697,10 +700,15 @@ class ExtractDiag(Op):
return hash(type(self)) ^ hash(self.view) return hash(type(self)) ^ hash(self.view)
def make_node(self, _x): def make_node(self, _x):
x = as_tensor_variable(_x) if not isinstance(_x, theano.Variable):
x = as_tensor_variable(_x)
else:
x = _x
if x.type.ndim != 2: if x.type.ndim != 2:
raise TypeError('ExtractDiag only works on matrices', _x) raise TypeError('ExtractDiag only works on matrices', _x)
return Apply(self, [x], [tensor.vector(dtype=x.type.dtype)]) return Apply(self, [x], [x.type.__class__(broadcastable=(False,),
dtype=x.type.dtype)()])
def perform(self, node, ins, outs): def perform(self, node, ins, outs):
""" For some reason numpy.diag(x) is really slow, so we """ For some reason numpy.diag(x) is really slow, so we
......
import unittest
import numpy import numpy
import numpy.linalg import numpy.linalg
from numpy.testing import assert_array_almost_equal from numpy.testing import assert_array_almost_equal
...@@ -266,46 +268,7 @@ def test_det_shape(): ...@@ -266,46 +268,7 @@ def test_det_shape():
assert numpy.all(f(r).shape == f_shape(r)) assert numpy.all(f(r).shape == f_shape(r))
def test_alloc_diag(): class test_diag(unittest.TestCase):
rng = numpy.random.RandomState(utt.fetch_seed())
x = theano.tensor.vector()
g = alloc_diag(x)
f = theano.function([x], g)
# test "normal" scenario (5x5 matrix) and special cases of 0x0 and 1x1
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(config.floatX)
v = numpy.diag(m)
r = f(m)
# The right diagonal is extracted
assert (r == v).all()
# Test we accept only vectors
xx = theano.tensor.matrix()
ok = False
try:
alloc_diag(xx)
except TypeError:
ok = True
assert ok
# Test infer_shape
f = theano.function([x], g.shape)
topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == AllocDiag for node in topo]) == 0
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(config.floatX)
assert (f(m) == m.shape).all()
def test_alloc_diag_grad():
rng = numpy.random.RandomState(utt.fetch_seed())
x = rng.rand(5)
tensor.verify_grad(alloc_diag, [x], rng=rng)
def test_diag():
""" """
Test that linalg.diag has the same behavior as numpy.diag. Test that linalg.diag has the same behavior as numpy.diag.
numpy.diag has two behaviors: numpy.diag has two behaviors:
...@@ -315,72 +278,130 @@ def test_diag(): ...@@ -315,72 +278,130 @@ def test_diag():
matrix. matrix.
(1) and (2) are tested by test_alloc_diag and test_extract_diag (1) and (2) are tested by test_alloc_diag and test_extract_diag
respectively. This test makes sure that linalg.diag instantiates respectively.
test_diag test makes sure that linalg.diag instantiates
the right op based on the dimension of the input. the right op based on the dimension of the input.
""" """
def __init__(self, name, mode=None, shared=tensor.shared,
floatX=None, type=tensor.TensorType):
self.mode = mode
self.shared = shared
if floatX is None:
floatX = config.floatX
self.floatX = floatX
self.type = type
super(test_diag, self).__init__(name)
def test_alloc_diag(self):
rng = numpy.random.RandomState(utt.fetch_seed())
x = theano.tensor.vector()
g = alloc_diag(x)
f = theano.function([x], g)
# test "normal" scenario (5x5 matrix) and special cases of 0x0 and 1x1
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(self.floatX)
v = numpy.diag(m)
r = f(m)
# The right matrix is created
assert (r == v).all()
# Test we accept only vectors
xx = theano.tensor.matrix()
ok = False
try:
alloc_diag(xx)
except TypeError:
ok = True
assert ok
# Test infer_shape
f = theano.function([x], g.shape)
topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == AllocDiag for node in topo]) == 0
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(self.floatX)
assert (f(m) == m.shape).all()
# test that it builds a matrix with given diagonal when using vector inputs def test_alloc_diag_grad(self):
x = theano.tensor.vector() rng = numpy.random.RandomState(utt.fetch_seed())
y = diag(x) x = rng.rand(5)
assert y.owner.op.__class__ == AllocDiag tensor.verify_grad(alloc_diag, [x], rng=rng)
# test that it extracts the diagonal when using matrix input def test_diag(self):
x = theano.tensor.matrix() # test that it builds a matrix with given diagonal when using
y = extract_diag(x) # vector inputs
assert y.owner.op.__class__ == ExtractDiag x = theano.tensor.vector()
y = diag(x)
# other types should raise error assert y.owner.op.__class__ == AllocDiag
x = theano.tensor.tensor3()
ok = False # test that it extracts the diagonal when using matrix input
try: x = theano.tensor.matrix()
y = extract_diag(x) y = extract_diag(x)
except TypeError: assert y.owner.op.__class__ == ExtractDiag
ok = True
assert ok # other types should raise error
x = theano.tensor.tensor3()
ok = False
# not testing the view=True case since it is not used anywhere. try:
def test_extract_diag(): y = extract_diag(x)
rng = numpy.random.RandomState(utt.fetch_seed()) except TypeError:
x = theano.tensor.matrix() ok = True
g = extract_diag(x) assert ok
f = theano.function([x], g)
# not testing the view=True case since it is not used anywhere.
for shp in [(2, 3), (3, 2), (3, 3), (1, 1), (0, 0)]: def test_extract_diag(self):
m = rng.rand(*shp).astype(config.floatX) rng = numpy.random.RandomState(utt.fetch_seed())
v = numpy.diag(m) m = rng.rand(2, 3).astype(self.floatX)
r = f(m) x = self.shared(m)
# The right diagonal is extracted g = extract_diag(x)
assert (r == v).all() f = theano.function([], g)
assert [isinstance(node.inputs[0].type, self.type)
# Test we accept only matrix for node in f.maker.fgraph.toposort()
xx = theano.tensor.vector() if isinstance(node.op, ExtractDiag)] == [True]
ok = False
try: for shp in [(2, 3), (3, 2), (3, 3), (1, 1), (0, 0)]:
extract_diag(xx) m = rng.rand(*shp).astype(self.floatX)
except TypeError: x.set_value(m)
ok = True v = numpy.diag(m)
assert ok r = f()
# The right diagonal is extracted
# Test infer_shape assert (r == v).all()
f = theano.function([x], g.shape)
topo = f.maker.fgraph.toposort() # Test we accept only matrix
if config.mode != 'FAST_COMPILE': xx = theano.tensor.vector()
assert sum([node.op.__class__ == ExtractDiag for node in topo]) == 0 ok = False
for shp in [(2, 3), (3, 2), (3, 3)]: try:
m = rng.rand(*shp).astype(config.floatX) extract_diag(xx)
assert f(m) == min(shp) except TypeError:
ok = True
assert ok
def test_extract_diag_grad():
rng = numpy.random.RandomState(utt.fetch_seed()) # Test infer_shape
x = rng.rand(5, 4) f = theano.function([], g.shape)
tensor.verify_grad(extract_diag, [x], rng=rng) topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == ExtractDiag
for node in topo]) == 0
for shp in [(2, 3), (3, 2), (3, 3)]:
m = rng.rand(*shp).astype(self.floatX)
x.set_value(m)
assert f() == min(shp)
def test_extract_diag_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
x = rng.rand(5, 4).astype(self.floatX)
tensor.verify_grad(extract_diag, [x], rng=rng)
def test_extract_diag_empty(self):
c = self.shared(numpy.array([[], []], self.floatX))
f = theano.function([], extract_diag(c), mode=self.mode)
def test_extract_diag_empty(): assert [isinstance(node.inputs[0].type, self.type)
c = theano.tensor.constant(numpy.array([[], []], 'int32')) for node in f.maker.fgraph.toposort()
extract_diag(c).eval() if isinstance(node.op, ExtractDiag)] == [True]
def test_trace(): def test_trace():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论