提交 35e15192 authored 作者: abergeron's avatar abergeron

Merge pull request #2171 from MarcCote/cumsum_3D

Support GpuCumsum on 3D array.
...@@ -16,9 +16,8 @@ else: ...@@ -16,9 +16,8 @@ else:
from theano import tensor as T from theano import tensor as T
import numpy as np import numpy as np
import theano import theano
from theano import config
from theano.tensor.extra_ops import cumsum, CumsumOp from theano.tensor.extra_ops import cumsum, CumsumOp
import itertools
class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
mode = mode_with_gpu mode = mode_with_gpu
...@@ -45,68 +44,63 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -45,68 +44,63 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
def test_Strides1D(self): def test_Strides1D(self):
x = T.fvector('x') x = T.fvector('x')
# Stepped strides for axis in [0, None]:
f = theano.function([x], cumsum(x[::2]), mode=self.mode) a = np.random.random((42,)).astype("float32")
assert [n for n in f.maker.fgraph.toposort() cumsum_function = theano.function([x], cumsum(x, axis=axis), mode=self.mode)
if isinstance(n.op, GpuCumsum)]
a = np.random.randint(10, size=(42,)).astype("float32")
assert np.allclose(np.cumsum(a[::2]), f(a))
# Alternative stepped strides slicings = [slice(None, None, None), # Normal strides
f = theano.function([x], cumsum(x), mode=self.mode) slice(None, None, 2), # Stepped strides
assert [n for n in f.maker.fgraph.toposort() slice(None, None, -1), # Negative strides
if isinstance(n.op, GpuCumsum)] ]
a = np.random.randint(10, size=(42,)).astype("float32")
assert np.allclose(np.cumsum(a[::2]), f(a[::2]))
# Negative strides # Cartesian product of all slicings to test.
f = theano.function([x], cumsum(x[::-1]), mode=self.mode) for slicing in itertools.product(slicings, repeat=x.ndim):
f = theano.function([x], cumsum(x[slicing], axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort() assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)] if isinstance(n.op, GpuCumsum)]
a = np.random.randint(10, size=(42,)).astype("float32") assert np.allclose(np.cumsum(a[slicing], axis=axis), f(a))
assert np.allclose(np.cumsum(a[::-1]), f(a)) assert np.allclose(np.cumsum(a[slicing], axis=axis), cumsum_function(a[slicing]))
def test_Strides2D(self): def test_Strides2D(self):
x = T.fmatrix('x') x = T.fmatrix('x')
for shape_axis, axis in zip([0, 1, 0], [0, 1, None]): for axis in [0, 1, None]:
a = np.random.random((42, 30)).astype("float32") a = np.random.random((42, 30)).astype("float32")
cumsum_function = theano.function([x], cumsum(x, axis=axis), mode=self.mode)
# Stepped strides along axis=0 slicings = [slice(None, None, None), # Normal strides
f = theano.function([x], cumsum(x[::2], axis=axis), mode=self.mode) slice(None, None, 2), # Stepped strides
assert [n for n in f.maker.fgraph.toposort() slice(None, None, -1), # Negative strides
if isinstance(n.op, GpuCumsum)] ]
assert np.allclose(np.cumsum(a[::2], axis=axis), f(a))
# Stepped strides along axis=1 # Cartesian product of all slicings to test.
f = theano.function([x], cumsum(x[:, ::2], axis=axis), mode=self.mode) for slicing in itertools.product(slicings, repeat=x.ndim):
f = theano.function([x], cumsum(x[slicing], axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort() assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)] if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[:, ::2], axis=axis), f(a)) assert np.allclose(np.cumsum(a[slicing], axis=axis), f(a))
assert np.allclose(np.cumsum(a[slicing], axis=axis), cumsum_function(a[slicing]))
# Alternative stepped strides along axis=0 def test_Strides3D(self):
f = theano.function([x], cumsum(x), mode=self.mode) x = T.ftensor3('x')
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[::2]), f(a[::2]))
# Alternative stepped strides along axis=1 for axis in [0, 1, 2, None]:
f = theano.function([x], cumsum(x), mode=self.mode) a = np.random.random((42, 30, 25)).astype("float32")
assert [n for n in f.maker.fgraph.toposort() cumsum_function = theano.function([x], cumsum(x, axis=axis), mode=self.mode)
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[:, ::2]), f(a[:, ::2]))
# Negative strides along axis=0 slicings = [slice(None, None, None), # Normal strides
f = theano.function([x], cumsum(x[::-1], axis=axis), mode=self.mode) slice(None, None, 2), # Stepped strides
assert [n for n in f.maker.fgraph.toposort() slice(None, None, -1), # Negative strides
if isinstance(n.op, GpuCumsum)] ]
assert np.allclose(np.cumsum(a[::-1], axis=axis), f(a))
# Negative strides along axis=1 # Cartesian product of all slicings to test.
f = theano.function([x], cumsum(x[:, ::-1], axis=axis), mode=self.mode) for slicing in itertools.product(slicings, repeat=x.ndim):
f = theano.function([x], cumsum(x[slicing], axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort() assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)] if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[:, ::-1], axis=axis), f(a)) assert np.allclose(np.cumsum(a[slicing], axis=axis), f(a))
assert np.allclose(np.cumsum(a[slicing], axis=axis), cumsum_function(a[slicing]))
def test_GpuCumsum1D(self): def test_GpuCumsum1D(self):
block_max_size = self.max_threads_dim0 * 2 block_max_size = self.max_threads_dim0 * 2
...@@ -163,14 +157,63 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -163,14 +157,63 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
assert np.allclose(np.cumsum(a, axis=axis), f(a)) assert np.allclose(np.cumsum(a, axis=axis), f(a))
# Use recursive cumsum # Use recursive cumsum
a_shape = [5, 3] a_shape = [3, 3]
a_shape[shape_axis] = block_max_size*(block_max_size+1)+2 a_shape[shape_axis] = block_max_size*(block_max_size+1)+2
a = np.ones(a_shape, dtype="float32") a = np.random.random(a_shape).astype("float32")
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
assert np.allclose(np.cumsum(a, axis=axis), f(a)) assert np.allclose(np.cumsum(a, axis=axis), f(a))
def test_GpuCumsum3D(self): def test_GpuCumsum3D(self):
# Should not use the GPU version. block_max_size = self.max_threads_dim0 * 2
x = T.ftensor3('x') x = T.ftensor3('x')
for shape_axis, axis in zip([0, 1, 2, 0], [0, 1, 2, None]):
f = theano.function([x], cumsum(x, axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
# Extensive testing for the first 1025 sizes
a_shape = [5, 5, 5]
a_shape[shape_axis] = 1025
a = np.random.rand(*a_shape).astype("float32")
slices = [slice(None), slice(None), slice(None)]
for i in xrange(a.shape[shape_axis]):
slices[shape_axis] = slice(i)
fa = f(a[slices])
npa = np.cumsum(a[slices], axis=axis)
assert np.allclose(npa, fa)
# Use multiple GPU threadblocks (along accumulation axis)
a_shape = [2, 2, 2]
a_shape[shape_axis] = block_max_size+2
a = np.random.random(a_shape).astype("float32")
assert np.allclose(np.cumsum(a, axis=axis), f(a))
# Use multiple GPU gridblocks (not along accumulation axis)
a_shape = [5, 5, 5]
a_shape[(shape_axis+1) % 3] = self.max_grid_size1+1
a = np.random.random(a_shape).astype("float32")
if axis is None:
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
assert np.allclose(np.cumsum(a, axis=axis), f(a))
a_shape = [5, 5, 5]
a_shape[(shape_axis+2) % 3] = self.max_grid_size1+1
a = np.random.random(a_shape).astype("float32")
if axis is None:
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
assert np.allclose(np.cumsum(a, axis=axis), f(a))
# Use recursive cumsum (along accumulation axis)
a_shape = [3, 3, 3]
a_shape[shape_axis] = block_max_size*(block_max_size+1)+2
a = np.random.random(a_shape).astype("float32")
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
assert np.allclose(np.cumsum(a, axis=axis), f(a))
def test_GpuCumsum4D(self):
# Should not use the GPU version.
x = T.ftensor4('x')
f = theano.function([x], cumsum(x, axis=1), mode=self.mode) f = theano.function([x], cumsum(x, axis=1), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort() assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, CumsumOp)] if isinstance(n.op, CumsumOp)]
...@@ -62,7 +62,7 @@ class TestCumsumOp(utt.InferShapeTester): ...@@ -62,7 +62,7 @@ class TestCumsumOp(utt.InferShapeTester):
utt.verify_grad(self.op, [a]) # Test axis=None utt.verify_grad(self.op, [a]) # Test axis=None
for axis in range(len(a.shape)): for axis in range(len(a.shape)):
utt.verify_grad(self.op_class(axis=axis), [a]) utt.verify_grad(self.op_class(axis=axis), [a], eps=4e-4)
class TestCumprodOp(utt.InferShapeTester): class TestCumprodOp(utt.InferShapeTester):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论