提交 cd50d5ef authored 作者: lamblin's avatar lamblin

Merge pull request #1436 from nouiz/gpu_extract_diag

Gpu extract diag
......@@ -16,6 +16,9 @@
present in convolutional neural networks (where filters are 3D and pool
over several input channels).
The project `TheanoConv3d2d <https://github.com/jaberg/TheanoConv3d2d>`_
is probably faster then the Conv3d documented here.
.. module:: conv
:platform: Unix, Windows
:synopsis: ops for signal processing
......
......@@ -2391,8 +2391,58 @@ CudaNdarray_get_strides(CudaNdarray *self, void *closure)
static int
CudaNdarray_set_strides(CudaNdarray *self, PyObject *value, void *closure)
{
PyErr_SetString(PyExc_NotImplementedError, "");
return -1;
//npy_intp newstrides_bytes[PyTuple_Size(value)];
if (PyTuple_Check(value)){
if (PyTuple_Size(value) != CudaNdarray_NDIM(self)){
PyErr_SetString(PyExc_ValueError,
"The new strides tuple must have the same length"
" as the number of dimensions");
return -1;
}
}else if (PyList_Check(value)){
if (PyList_Size(value) != CudaNdarray_NDIM(self)){
PyErr_SetString(PyExc_ValueError,
"The new strides list must have the same length"
" as the number of dimensions");
return -1;
}
}else{
PyErr_SetString(PyExc_ValueError,
"The new strides need to be encoded in a tuple or list");
return -1;
}
npy_intp newstrides[CudaNdarray_NDIM(self)];
if (PyTuple_Check(value)){
for(int i=0; i < CudaNdarray_NDIM(self); i++){
newstrides[i] = PyInt_AsLong(PyTuple_GetItem(value, Py_ssize_t(i)));
//newstrides_bytes[i] = newstrides[i] * 4;
}
}else if (PyList_Check(value)){
for(int i=0; i < CudaNdarray_NDIM(self); i++){
newstrides[i] = PyInt_AsLong(PyList_GetItem(value, Py_ssize_t(i)));
//newstrides_bytes[i] = newstrides[i] * 4;
}
}
/*
// Do not do this check, as ExtractDiag needs that, and NumPy does not seem
// to do it.
npy_intp dims[PyTuple_Size(value)];
for(int i=0; i < CudaNdarray_NDIM(self); i++){
dims[i] = CudaNdarray_HOST_DIMS(self)[i];
}
if (!PyArray_CheckStrides(4,
CudaNdarray_NDIM(self),
0, 0,
dims,
newstrides_bytes)){
PyErr_SetString(PyExc_ValueError, "bad new strides");
return -1;
}
*/
for(int i=0; i < CudaNdarray_NDIM(self); i++){
CudaNdarray_set_stride(self, i, newstrides[i]);
}
return 0;
}
static PyObject *
......
......@@ -289,7 +289,7 @@ def local_gpu_dimshuffle_0(node):
def local_gpu_specifyShape_0(node):
"""
specify_shape(host_from_gpu()) -> host_from_gpu(specify_shape)
gpu_from_host(specify_shape) -> specifyshape(gpu_from_host)
gpu_from_host(specify_shape) -> specify_shape(gpu_from_host)
"""
if isinstance(node.op, tensor.SpecifyShape):
input = node.inputs[0]
......@@ -1438,6 +1438,32 @@ def tensor_to_cuda(x):
return x
@register_opt()
@local_optimizer([])
def local_gpu_extract_diagonal(node):
"""
extract_diagonal(host_from_gpu()) -> host_from_gpu(extract_diagonal)
gpu_from_host(extract_diagonal) -> extract_diagonal(gpu_from_host)
"""
from theano.sandbox import linalg
if (isinstance(node.op, linalg.ops.ExtractDiag) and
isinstance(node.inputs[0].type,
theano.tensor.TensorType)):
inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, HostFromGpu):
return [host_from_gpu(linalg.extract_diag(gpu_from_host(inp)))]
if node.op == gpu_from_host:
host_input = node.inputs[0]
if (host_input.owner and
isinstance(host_input.owner.op, linalg.ops.ExtractDiag) and
isinstance(host_input.owner.inputs[0].type,
theano.tensor.TensorType)):
diag_node = host_input.owner
return [linalg.extract_diag(
gpu_from_host(diag_node.inputs[0]))]
return False
@register_opt('scan')
@local_optimizer([])
def gpuScanOptimization(node):
......
......@@ -941,6 +941,33 @@ def test_base():
e = b.reshape((5,2,2,3))
assert e.base is a
def test_set_strides():
a = cuda_ndarray.CudaNdarray.zeros((5, 5))
# Test with tuple
new_strides = (a.strides[1], a.strides[0])
a.strides = new_strides
assert a.strides == new_strides
# Test with list
new_strides = (a.strides[1], a.strides[0])
a.strides = [a.strides[1], a.strides[0]]
assert a.strides == new_strides
try:
a.strides = (a.strides[1],)
assert False
except ValueError:
pass
try:
a.strides = (1, 1, 1)
assert False
except ValueError:
pass
def test_is_c_contiguous():
a = cuda_ndarray.CudaNdarray.zeros((3,4,5))
assert a.is_c_contiguous()
......
......@@ -4,9 +4,10 @@ import numpy
# Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
import theano
from theano.compile.pfunc import pfunc
from theano import config, tensor
import theano
import theano.sandbox.linalg.tests
from theano.tests import unittest_tools as utt
......@@ -381,6 +382,17 @@ def test_erfinvgpu():
assert numpy.allclose(f(xv),f2(xv))
class test_diag(theano.sandbox.linalg.tests.test_linalg.test_diag):
mode = mode_with_gpu
shared = staticmethod(cuda.shared_constructor)
floatX = 'float32'
type = CudaNdarrayType
def __init__(self, name):
super(theano.sandbox.linalg.tests.test_linalg.test_diag,
self).__init__(name)
if __name__ == '__main__':
test_gpualloc()
test_opt_gpujoin_onlyajoin()
......
......@@ -684,7 +684,10 @@ solve = Solve() # general solve
class ExtractDiag(Op):
""" Return the diagonal of a matrix. """
""" Return the diagonal of a matrix.
:note: work on the GPU.
"""
def __init__(self, view=False):
self.view = view
if self.view:
......@@ -697,10 +700,15 @@ class ExtractDiag(Op):
return hash(type(self)) ^ hash(self.view)
def make_node(self, _x):
x = as_tensor_variable(_x)
if not isinstance(_x, theano.Variable):
x = as_tensor_variable(_x)
else:
x = _x
if x.type.ndim != 2:
raise TypeError('ExtractDiag only works on matrices', _x)
return Apply(self, [x], [tensor.vector(dtype=x.type.dtype)])
return Apply(self, [x], [x.type.__class__(broadcastable=(False,),
dtype=x.type.dtype)()])
def perform(self, node, ins, outs):
""" For some reason numpy.diag(x) is really slow, so we
......
import unittest
import numpy
import numpy.linalg
from numpy.testing import assert_array_almost_equal
......@@ -266,46 +268,7 @@ def test_det_shape():
assert numpy.all(f(r).shape == f_shape(r))
def test_alloc_diag():
rng = numpy.random.RandomState(utt.fetch_seed())
x = theano.tensor.vector()
g = alloc_diag(x)
f = theano.function([x], g)
# test "normal" scenario (5x5 matrix) and special cases of 0x0 and 1x1
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(config.floatX)
v = numpy.diag(m)
r = f(m)
# The right diagonal is extracted
assert (r == v).all()
# Test we accept only vectors
xx = theano.tensor.matrix()
ok = False
try:
alloc_diag(xx)
except TypeError:
ok = True
assert ok
# Test infer_shape
f = theano.function([x], g.shape)
topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == AllocDiag for node in topo]) == 0
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(config.floatX)
assert (f(m) == m.shape).all()
def test_alloc_diag_grad():
rng = numpy.random.RandomState(utt.fetch_seed())
x = rng.rand(5)
tensor.verify_grad(alloc_diag, [x], rng=rng)
def test_diag():
class test_diag(unittest.TestCase):
"""
Test that linalg.diag has the same behavior as numpy.diag.
numpy.diag has two behaviors:
......@@ -315,72 +278,130 @@ def test_diag():
matrix.
(1) and (2) are tested by test_alloc_diag and test_extract_diag
respectively. This test makes sure that linalg.diag instantiates
respectively.
test_diag test makes sure that linalg.diag instantiates
the right op based on the dimension of the input.
"""
def __init__(self, name, mode=None, shared=tensor.shared,
floatX=None, type=tensor.TensorType):
self.mode = mode
self.shared = shared
if floatX is None:
floatX = config.floatX
self.floatX = floatX
self.type = type
super(test_diag, self).__init__(name)
def test_alloc_diag(self):
rng = numpy.random.RandomState(utt.fetch_seed())
x = theano.tensor.vector()
g = alloc_diag(x)
f = theano.function([x], g)
# test "normal" scenario (5x5 matrix) and special cases of 0x0 and 1x1
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(self.floatX)
v = numpy.diag(m)
r = f(m)
# The right matrix is created
assert (r == v).all()
# Test we accept only vectors
xx = theano.tensor.matrix()
ok = False
try:
alloc_diag(xx)
except TypeError:
ok = True
assert ok
# Test infer_shape
f = theano.function([x], g.shape)
topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == AllocDiag for node in topo]) == 0
for shp in [5, 0, 1]:
m = rng.rand(shp).astype(self.floatX)
assert (f(m) == m.shape).all()
# test that it builds a matrix with given diagonal when using vector inputs
x = theano.tensor.vector()
y = diag(x)
assert y.owner.op.__class__ == AllocDiag
# test that it extracts the diagonal when using matrix input
x = theano.tensor.matrix()
y = extract_diag(x)
assert y.owner.op.__class__ == ExtractDiag
# other types should raise error
x = theano.tensor.tensor3()
ok = False
try:
def test_alloc_diag_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
x = rng.rand(5)
tensor.verify_grad(alloc_diag, [x], rng=rng)
def test_diag(self):
# test that it builds a matrix with given diagonal when using
# vector inputs
x = theano.tensor.vector()
y = diag(x)
assert y.owner.op.__class__ == AllocDiag
# test that it extracts the diagonal when using matrix input
x = theano.tensor.matrix()
y = extract_diag(x)
except TypeError:
ok = True
assert ok
# not testing the view=True case since it is not used anywhere.
def test_extract_diag():
rng = numpy.random.RandomState(utt.fetch_seed())
x = theano.tensor.matrix()
g = extract_diag(x)
f = theano.function([x], g)
for shp in [(2, 3), (3, 2), (3, 3), (1, 1), (0, 0)]:
m = rng.rand(*shp).astype(config.floatX)
v = numpy.diag(m)
r = f(m)
# The right diagonal is extracted
assert (r == v).all()
# Test we accept only matrix
xx = theano.tensor.vector()
ok = False
try:
extract_diag(xx)
except TypeError:
ok = True
assert ok
# Test infer_shape
f = theano.function([x], g.shape)
topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == ExtractDiag for node in topo]) == 0
for shp in [(2, 3), (3, 2), (3, 3)]:
m = rng.rand(*shp).astype(config.floatX)
assert f(m) == min(shp)
def test_extract_diag_grad():
rng = numpy.random.RandomState(utt.fetch_seed())
x = rng.rand(5, 4)
tensor.verify_grad(extract_diag, [x], rng=rng)
assert y.owner.op.__class__ == ExtractDiag
# other types should raise error
x = theano.tensor.tensor3()
ok = False
try:
y = extract_diag(x)
except TypeError:
ok = True
assert ok
# not testing the view=True case since it is not used anywhere.
def test_extract_diag(self):
rng = numpy.random.RandomState(utt.fetch_seed())
m = rng.rand(2, 3).astype(self.floatX)
x = self.shared(m)
g = extract_diag(x)
f = theano.function([], g)
assert [isinstance(node.inputs[0].type, self.type)
for node in f.maker.fgraph.toposort()
if isinstance(node.op, ExtractDiag)] == [True]
for shp in [(2, 3), (3, 2), (3, 3), (1, 1), (0, 0)]:
m = rng.rand(*shp).astype(self.floatX)
x.set_value(m)
v = numpy.diag(m)
r = f()
# The right diagonal is extracted
assert (r == v).all()
# Test we accept only matrix
xx = theano.tensor.vector()
ok = False
try:
extract_diag(xx)
except TypeError:
ok = True
assert ok
# Test infer_shape
f = theano.function([], g.shape)
topo = f.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == ExtractDiag
for node in topo]) == 0
for shp in [(2, 3), (3, 2), (3, 3)]:
m = rng.rand(*shp).astype(self.floatX)
x.set_value(m)
assert f() == min(shp)
def test_extract_diag_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
x = rng.rand(5, 4).astype(self.floatX)
tensor.verify_grad(extract_diag, [x], rng=rng)
def test_extract_diag_empty(self):
c = self.shared(numpy.array([[], []], self.floatX))
f = theano.function([], extract_diag(c), mode=self.mode)
def test_extract_diag_empty():
c = theano.tensor.constant(numpy.array([[], []], 'int32'))
extract_diag(c).eval()
assert [isinstance(node.inputs[0].type, self.type)
for node in f.maker.fgraph.toposort()
if isinstance(node.op, ExtractDiag)] == [True]
def test_trace():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论