提交 14713c9d authored 作者: Marc-Alexandre Cote's avatar Marc-Alexandre Cote

Add tests for different strides.

Fix one problem related to strides by copying the strides infos from the input to the output.
上级 3454ea2d
...@@ -75,7 +75,7 @@ class GpuCumsum(CumsumOp, GpuOp): ...@@ -75,7 +75,7 @@ class GpuCumsum(CumsumOp, GpuOp):
compute_map, no_recycling) compute_map, no_recycling)
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (2,)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
return """ return """
...@@ -289,12 +289,21 @@ class GpuCumsum(CumsumOp, GpuOp): ...@@ -289,12 +289,21 @@ class GpuCumsum(CumsumOp, GpuOp):
if (CudaNdarray_HOST_DIMS(%(x)s)[i] == CudaNdarray_HOST_DIMS(%(z)s)[i]) { if (CudaNdarray_HOST_DIMS(%(x)s)[i] == CudaNdarray_HOST_DIMS(%(z)s)[i]) {
needAllocation = true; needAllocation = true;
} }
if (CudaNdarray_HOST_STRIDES(%(x)s)[i] == CudaNdarray_HOST_STRIDES(%(z)s)[i]) {
needAllocation = true;
}
} }
} }
if (needAllocation){ if (needAllocation){
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = (CudaNdarray*) CudaNdarray_NewDims(CudaNdarray_NDIM(%(x)s), shape); %(z)s = (CudaNdarray*) CudaNdarray_NewDims(CudaNdarray_NDIM(%(x)s), shape);
// Copy strides information
for (int i= 0; i < CudaNdarray_NDIM(%(x)s); ++i) {
CudaNdarray_set_stride(%(z)s, i, CudaNdarray_HOST_STRIDES(%(x)s)[i]);
}
} }
if (!%(z)s) { if (!%(z)s) {
......
...@@ -44,6 +44,19 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -44,6 +44,19 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
self.max_threads_dim0 = prop['maxThreadsDim0'] self.max_threads_dim0 = prop['maxThreadsDim0']
self.max_grid_size1 = prop['maxGridSize1'] self.max_grid_size1 = prop['maxGridSize1']
def test_Strides1D(self):
x = T.vector('x')
# Stepped strides
f = theano.function([x], cumsum(x[::2]))
a = np.random.randint(10, size=(42,)).astype(config.floatX)
assert np.allclose(np.cumsum(a[::2]), f(a))
# Negative strides
f = theano.function([x], cumsum(x[::-1]))
a = np.random.randint(10, size=(42,)).astype(config.floatX)
assert np.allclose(np.cumsum(a[::-1]), f(a))
def test_GpuCumsum1D(self): def test_GpuCumsum1D(self):
block_max_size = self.max_threads_dim0 * 2 block_max_size = self.max_threads_dim0 * 2
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论