提交 35e15192 authored 作者: abergeron's avatar abergeron

Merge pull request #2171 from MarcCote/cumsum_3D

Support GpuCumsum on 3D array.
...@@ -16,9 +16,8 @@ else: ...@@ -16,9 +16,8 @@ else:
from theano import tensor as T from theano import tensor as T
import numpy as np import numpy as np
import theano import theano
from theano import config
from theano.tensor.extra_ops import cumsum, CumsumOp from theano.tensor.extra_ops import cumsum, CumsumOp
import itertools
class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
mode = mode_with_gpu mode = mode_with_gpu
...@@ -45,68 +44,63 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -45,68 +44,63 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
def test_Strides1D(self): def test_Strides1D(self):
x = T.fvector('x') x = T.fvector('x')
# Stepped strides for axis in [0, None]:
f = theano.function([x], cumsum(x[::2]), mode=self.mode) a = np.random.random((42,)).astype("float32")
assert [n for n in f.maker.fgraph.toposort() cumsum_function = theano.function([x], cumsum(x, axis=axis), mode=self.mode)
if isinstance(n.op, GpuCumsum)]
a = np.random.randint(10, size=(42,)).astype("float32")
assert np.allclose(np.cumsum(a[::2]), f(a))
# Alternative stepped strides slicings = [slice(None, None, None), # Normal strides
f = theano.function([x], cumsum(x), mode=self.mode) slice(None, None, 2), # Stepped strides
assert [n for n in f.maker.fgraph.toposort() slice(None, None, -1), # Negative strides
if isinstance(n.op, GpuCumsum)] ]
a = np.random.randint(10, size=(42,)).astype("float32")
assert np.allclose(np.cumsum(a[::2]), f(a[::2]))
# Negative strides # Cartesian product of all slicings to test.
f = theano.function([x], cumsum(x[::-1]), mode=self.mode) for slicing in itertools.product(slicings, repeat=x.ndim):
assert [n for n in f.maker.fgraph.toposort() f = theano.function([x], cumsum(x[slicing], axis=axis), mode=self.mode)
if isinstance(n.op, GpuCumsum)] assert [n for n in f.maker.fgraph.toposort()
a = np.random.randint(10, size=(42,)).astype("float32") if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[::-1]), f(a)) assert np.allclose(np.cumsum(a[slicing], axis=axis), f(a))
assert np.allclose(np.cumsum(a[slicing], axis=axis), cumsum_function(a[slicing]))
def test_Strides2D(self): def test_Strides2D(self):
x = T.fmatrix('x') x = T.fmatrix('x')
for shape_axis, axis in zip([0, 1, 0], [0, 1, None]): for axis in [0, 1, None]:
a = np.random.random((42, 30)).astype("float32") a = np.random.random((42, 30)).astype("float32")
cumsum_function = theano.function([x], cumsum(x, axis=axis), mode=self.mode)
slicings = [slice(None, None, None), # Normal strides
slice(None, None, 2), # Stepped strides
slice(None, None, -1), # Negative strides
]
# Cartesian product of all slicings to test.
for slicing in itertools.product(slicings, repeat=x.ndim):
f = theano.function([x], cumsum(x[slicing], axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[slicing], axis=axis), f(a))
assert np.allclose(np.cumsum(a[slicing], axis=axis), cumsum_function(a[slicing]))
def test_Strides3D(self):
x = T.ftensor3('x')
# Stepped strides along axis=0 for axis in [0, 1, 2, None]:
f = theano.function([x], cumsum(x[::2], axis=axis), mode=self.mode) a = np.random.random((42, 30, 25)).astype("float32")
assert [n for n in f.maker.fgraph.toposort() cumsum_function = theano.function([x], cumsum(x, axis=axis), mode=self.mode)
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[::2], axis=axis), f(a))
# Stepped strides along axis=1
f = theano.function([x], cumsum(x[:, ::2], axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[:, ::2], axis=axis), f(a))
# Alternative stepped strides along axis=0
f = theano.function([x], cumsum(x), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[::2]), f(a[::2]))
# Alternative stepped strides along axis=1 slicings = [slice(None, None, None), # Normal strides
f = theano.function([x], cumsum(x), mode=self.mode) slice(None, None, 2), # Stepped strides
assert [n for n in f.maker.fgraph.toposort() slice(None, None, -1), # Negative strides
if isinstance(n.op, GpuCumsum)] ]
assert np.allclose(np.cumsum(a[:, ::2]), f(a[:, ::2]))
# Negative strides along axis=0 # Cartesian product of all slicings to test.
f = theano.function([x], cumsum(x[::-1], axis=axis), mode=self.mode) for slicing in itertools.product(slicings, repeat=x.ndim):
assert [n for n in f.maker.fgraph.toposort() f = theano.function([x], cumsum(x[slicing], axis=axis), mode=self.mode)
if isinstance(n.op, GpuCumsum)] assert [n for n in f.maker.fgraph.toposort()
assert np.allclose(np.cumsum(a[::-1], axis=axis), f(a)) if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[slicing], axis=axis), f(a))
assert np.allclose(np.cumsum(a[slicing], axis=axis), cumsum_function(a[slicing]))
# Negative strides along axis=1
f = theano.function([x], cumsum(x[:, ::-1], axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[:, ::-1], axis=axis), f(a))
def test_GpuCumsum1D(self): def test_GpuCumsum1D(self):
block_max_size = self.max_threads_dim0 * 2 block_max_size = self.max_threads_dim0 * 2
...@@ -163,14 +157,63 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -163,14 +157,63 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
assert np.allclose(np.cumsum(a, axis=axis), f(a)) assert np.allclose(np.cumsum(a, axis=axis), f(a))
# Use recursive cumsum # Use recursive cumsum
a_shape = [5, 3] a_shape = [3, 3]
a_shape[shape_axis] = block_max_size*(block_max_size+1)+2 a_shape[shape_axis] = block_max_size*(block_max_size+1)+2
a = np.ones(a_shape, dtype="float32") a = np.random.random(a_shape).astype("float32")
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
assert np.allclose(np.cumsum(a, axis=axis), f(a)) assert np.allclose(np.cumsum(a, axis=axis), f(a))
def test_GpuCumsum3D(self): def test_GpuCumsum3D(self):
# Should not use the GPU version. block_max_size = self.max_threads_dim0 * 2
x = T.ftensor3('x') x = T.ftensor3('x')
for shape_axis, axis in zip([0, 1, 2, 0], [0, 1, 2, None]):
f = theano.function([x], cumsum(x, axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
# Extensive testing for the first 1025 sizes
a_shape = [5, 5, 5]
a_shape[shape_axis] = 1025
a = np.random.rand(*a_shape).astype("float32")
slices = [slice(None), slice(None), slice(None)]
for i in xrange(a.shape[shape_axis]):
slices[shape_axis] = slice(i)
fa = f(a[slices])
npa = np.cumsum(a[slices], axis=axis)
assert np.allclose(npa, fa)
# Use multiple GPU threadblocks (along accumulation axis)
a_shape = [2, 2, 2]
a_shape[shape_axis] = block_max_size+2
a = np.random.random(a_shape).astype("float32")
assert np.allclose(np.cumsum(a, axis=axis), f(a))
# Use multiple GPU gridblocks (not along accumulation axis)
a_shape = [5, 5, 5]
a_shape[(shape_axis+1) % 3] = self.max_grid_size1+1
a = np.random.random(a_shape).astype("float32")
if axis is None:
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
assert np.allclose(np.cumsum(a, axis=axis), f(a))
a_shape = [5, 5, 5]
a_shape[(shape_axis+2) % 3] = self.max_grid_size1+1
a = np.random.random(a_shape).astype("float32")
if axis is None:
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
assert np.allclose(np.cumsum(a, axis=axis), f(a))
# Use recursive cumsum (along accumulation axis)
a_shape = [3, 3, 3]
a_shape[shape_axis] = block_max_size*(block_max_size+1)+2
a = np.random.random(a_shape).astype("float32")
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
assert np.allclose(np.cumsum(a, axis=axis), f(a))
def test_GpuCumsum4D(self):
# Should not use the GPU version.
x = T.ftensor4('x')
f = theano.function([x], cumsum(x, axis=1), mode=self.mode) f = theano.function([x], cumsum(x, axis=1), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort() assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, CumsumOp)] if isinstance(n.op, CumsumOp)]
...@@ -62,7 +62,7 @@ class TestCumsumOp(utt.InferShapeTester): ...@@ -62,7 +62,7 @@ class TestCumsumOp(utt.InferShapeTester):
utt.verify_grad(self.op, [a]) # Test axis=None utt.verify_grad(self.op, [a]) # Test axis=None
for axis in range(len(a.shape)): for axis in range(len(a.shape)):
utt.verify_grad(self.op_class(axis=axis), [a]) utt.verify_grad(self.op_class(axis=axis), [a], eps=4e-4)
class TestCumprodOp(utt.InferShapeTester): class TestCumprodOp(utt.InferShapeTester):
...@@ -493,10 +493,10 @@ class TestFillDiagonalOffset(utt.InferShapeTester): ...@@ -493,10 +493,10 @@ class TestFillDiagonalOffset(utt.InferShapeTester):
# We can't use numpy.fill_diagonal as it is bugged. # We can't use numpy.fill_diagonal as it is bugged.
assert numpy.allclose(numpy.diag(out, test_offset), val) assert numpy.allclose(numpy.diag(out, test_offset), val)
if test_offset >= 0: if test_offset >= 0:
assert (out == val).sum() == min( min(a.shape), assert (out == val).sum() == min( min(a.shape),
a.shape[1]-test_offset ) a.shape[1]-test_offset )
else: else:
assert (out == val).sum() == min( min(a.shape), assert (out == val).sum() == min( min(a.shape),
a.shape[0]+test_offset ) a.shape[0]+test_offset )
def test_gradient(self): def test_gradient(self):
...@@ -505,13 +505,13 @@ class TestFillDiagonalOffset(utt.InferShapeTester): ...@@ -505,13 +505,13 @@ class TestFillDiagonalOffset(utt.InferShapeTester):
def fill_diagonal_with_fix_offset( a, val): def fill_diagonal_with_fix_offset( a, val):
return fill_diagonal_offset( a, val, test_offset) return fill_diagonal_offset( a, val, test_offset)
utt.verify_grad(fill_diagonal_with_fix_offset, utt.verify_grad(fill_diagonal_with_fix_offset,
[numpy.random.rand(5, 8), numpy.random.rand()], [numpy.random.rand(5, 8), numpy.random.rand()],
n_tests=1, rng=TestFillDiagonalOffset.rng) n_tests=1, rng=TestFillDiagonalOffset.rng)
utt.verify_grad(fill_diagonal_with_fix_offset, utt.verify_grad(fill_diagonal_with_fix_offset,
[numpy.random.rand(8, 5), numpy.random.rand()], [numpy.random.rand(8, 5), numpy.random.rand()],
n_tests=1, rng=TestFillDiagonalOffset.rng) n_tests=1, rng=TestFillDiagonalOffset.rng)
utt.verify_grad(fill_diagonal_with_fix_offset, utt.verify_grad(fill_diagonal_with_fix_offset,
[numpy.random.rand(5, 5), numpy.random.rand()], [numpy.random.rand(5, 5), numpy.random.rand()],
n_tests=1, rng=TestFillDiagonalOffset.rng) n_tests=1, rng=TestFillDiagonalOffset.rng)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论