提交 f0649e3a authored 作者: Marc-Alexandre Cote's avatar Marc-Alexandre Cote

Support GpuCumsum on 3D array.

上级 ea9e3e54
...@@ -16,7 +16,6 @@ else: ...@@ -16,7 +16,6 @@ else:
from theano import tensor as T from theano import tensor as T
import numpy as np import numpy as np
import theano import theano
from theano import config
from theano.tensor.extra_ops import cumsum, CumsumOp from theano.tensor.extra_ops import cumsum, CumsumOp
...@@ -69,7 +68,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -69,7 +68,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
def test_Strides2D(self): def test_Strides2D(self):
x = T.fmatrix('x') x = T.fmatrix('x')
for shape_axis, axis in zip([0, 1, 0], [0, 1, None]): for axis in [0, 1, None]:
a = np.random.random((42, 30)).astype("float32") a = np.random.random((42, 30)).astype("float32")
# Stepped strides along axis=0 # Stepped strides along axis=0
...@@ -108,6 +107,66 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -108,6 +107,66 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
if isinstance(n.op, GpuCumsum)] if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[:, ::-1], axis=axis), f(a)) assert np.allclose(np.cumsum(a[:, ::-1], axis=axis), f(a))
def test_Strides3D(self):
x = T.ftensor3('x')
for axis in [0, 1, 2, None]:
a = np.random.random((42, 30, 25)).astype("float32")
# Stepped strides along axis=0
f = theano.function([x], cumsum(x[::2], axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[::2], axis=axis), f(a))
# Stepped strides along axis=1
f = theano.function([x], cumsum(x[:, ::2], axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[:, ::2], axis=axis), f(a))
# Stepped strides along axis=2
f = theano.function([x], cumsum(x[:, :, ::2], axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[:, :, ::2], axis=axis), f(a))
# Alternative stepped strides along axis=0
f = theano.function([x], cumsum(x), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[::2]), f(a[::2]))
# Alternative stepped strides along axis=1
f = theano.function([x], cumsum(x), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[:, ::2]), f(a[:, ::2]))
# Alternative stepped strides along axis=2
f = theano.function([x], cumsum(x), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[:, :, ::2]), f(a[:, :, ::2]))
# Negative strides along axis=0
f = theano.function([x], cumsum(x[::-1], axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[::-1], axis=axis), f(a))
# Negative strides along axis=1
f = theano.function([x], cumsum(x[:, ::-1], axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[:, ::-1], axis=axis), f(a))
# Negative strides along axis=2
f = theano.function([x], cumsum(x[:, :, ::-1], axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
assert np.allclose(np.cumsum(a[:, :, ::-1], axis=axis), f(a))
def test_GpuCumsum1D(self): def test_GpuCumsum1D(self):
block_max_size = self.max_threads_dim0 * 2 block_max_size = self.max_threads_dim0 * 2
...@@ -163,14 +222,63 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -163,14 +222,63 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
assert np.allclose(np.cumsum(a, axis=axis), f(a)) assert np.allclose(np.cumsum(a, axis=axis), f(a))
# Use recursive cumsum # Use recursive cumsum
a_shape = [5, 3] a_shape = [3, 3]
a_shape[shape_axis] = block_max_size*(block_max_size+1)+2 a_shape[shape_axis] = block_max_size*(block_max_size+1)+2
a = np.ones(a_shape, dtype="float32") a = np.random.random(a_shape).astype("float32")
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
assert np.allclose(np.cumsum(a, axis=axis), f(a)) assert np.allclose(np.cumsum(a, axis=axis), f(a))
def test_GpuCumsum3D(self): def test_GpuCumsum3D(self):
# Should not use the GPU version. block_max_size = self.max_threads_dim0 * 2
x = T.ftensor3('x') x = T.ftensor3('x')
for shape_axis, axis in zip([0, 1, 2, 0], [0, 1, 2, None]):
f = theano.function([x], cumsum(x, axis=axis), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, GpuCumsum)]
# Extensive testing for the first 1025 sizes
a_shape = [5, 5, 5]
a_shape[shape_axis] = 1025
a = np.random.rand(*a_shape).astype("float32")
slices = [slice(None), slice(None), slice(None)]
for i in xrange(a.shape[shape_axis]):
slices[shape_axis] = slice(i)
fa = f(a[slices])
npa = np.cumsum(a[slices], axis=axis)
assert np.allclose(npa, fa)
# Use multiple GPU threadblocks (along accumulation axis)
a_shape = [2, 2, 2]
a_shape[shape_axis] = block_max_size+2
a = np.random.random(a_shape).astype("float32")
assert np.allclose(np.cumsum(a, axis=axis), f(a))
# Use multiple GPU gridblocks (not along accumulation axis)
a_shape = [5, 5, 5]
a_shape[(shape_axis+1) % 3] = self.max_grid_size1+1
a = np.random.random(a_shape).astype("float32")
if axis is None:
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
assert np.allclose(np.cumsum(a, axis=axis), f(a))
a_shape = [5, 5, 5]
a_shape[(shape_axis+2) % 3] = self.max_grid_size1+1
a = np.random.random(a_shape).astype("float32")
if axis is None:
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
assert np.allclose(np.cumsum(a, axis=axis), f(a))
# Use recursive cumsum (along accumulation axis)
a_shape = [3, 3, 3]
a_shape[shape_axis] = block_max_size*(block_max_size+1)+2
a = np.random.random(a_shape).astype("float32")
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
assert np.allclose(np.cumsum(a, axis=axis), f(a))
def test_GpuCumsum4D(self):
# Should not use the GPU version.
x = T.ftensor4('x')
f = theano.function([x], cumsum(x, axis=1), mode=self.mode) f = theano.function([x], cumsum(x, axis=1), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort() assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, CumsumOp)] if isinstance(n.op, CumsumOp)]
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论