提交 d87cf8b4 authored 作者: Kelvin Xu's avatar Kelvin Xu

pep8

上级 99cffe57
...@@ -43,7 +43,7 @@ class GpuCumsum(CumsumOp, GpuOp): ...@@ -43,7 +43,7 @@ class GpuCumsum(CumsumOp, GpuOp):
if x.ndim > GpuCumsum.SUPPORTED_NDIMS: if x.ndim > GpuCumsum.SUPPORTED_NDIMS:
raise NotImplementedError('Only cumsum on 1D, 2D and 3D array are supported right now!') raise NotImplementedError('Only cumsum on 1D, 2D and 3D array are supported right now!')
print(self.axis)
if self.axis >= x.ndim or self.axis < -x.ndim: if self.axis >= x.ndim or self.axis < -x.ndim:
raise ValueError('axis(={1}) out of bounds'.format(self.axis)) raise ValueError('axis(={1}) out of bounds'.format(self.axis))
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import theano
import numpy
import os import os
from theano import Op, Apply, config from theano import Apply
from theano.tensor.extra_ops import CumsumOp from theano.tensor.extra_ops import CumsumOp
try: try:
import pygpu
from pygpu import gpuarray from pygpu import gpuarray
except ImportError: except ImportError:
pass pass
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel,
infer_context_name, GpuFromHost, HideC) infer_context_name, GpuFromHost)
from .opt import register_opt as register_gpu_opt, op_lifter from .opt import register_opt as register_gpu_opt, op_lifter
from .type import GpuArrayType
class GpuCumsum(GpuKernelBase, HideC, CumsumOp): class GpuCumsum(GpuKernelBase):
""" """
Parameters Parameters
---------- ----------
...@@ -34,7 +30,7 @@ class GpuCumsum(GpuKernelBase, HideC, CumsumOp): ...@@ -34,7 +30,7 @@ class GpuCumsum(GpuKernelBase, HideC, CumsumOp):
return "%s{%s}" % (self.__class__.__name__, self.axis) return "%s{%s}" % (self.__class__.__name__, self.axis)
def c_code_cache_version_apply(self, node): def c_code_cache_version_apply(self, node):
return None return (1,)
def c_headers(self): def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/types.h>', '<gpuarray_helper.h>'] return ['<numpy_compat.h>', '<gpuarray/types.h>', '<gpuarray_helper.h>']
...@@ -57,13 +53,6 @@ class GpuCumsum(GpuKernelBase, HideC, CumsumOp): ...@@ -57,13 +53,6 @@ class GpuCumsum(GpuKernelBase, HideC, CumsumOp):
raise ValueError('axis(={0}) out of bounds'.format(self.axis)) raise ValueError('axis(={0}) out of bounds'.format(self.axis))
return Apply(self, [x], [x.type()]) return Apply(self, [x], [x.type()])
# copied from neighbour.py
def perform(self, node, inp, out, ctx):
# Disable the perform method from the CPU version
Op.perform(self, node, inp, out, ctx)
def gpu_kernels(self, node, nodename): def gpu_kernels(self, node, nodename):
kernels = [] kernels = []
# cumadd # cumadd
...@@ -104,8 +93,8 @@ class GpuCumsum(GpuKernelBase, HideC, CumsumOp): ...@@ -104,8 +93,8 @@ class GpuCumsum(GpuKernelBase, HideC, CumsumOp):
params = [gpuarray.GpuArray, gpuarray.GpuArray, gpuarray.SIZE, params = [gpuarray.GpuArray, gpuarray.GpuArray, gpuarray.SIZE,
gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE,
gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE,
'int32', 'int32', gpuarray.GpuArray,] 'int32', 'int32', gpuarray.GpuArray, ]
code=""" code = """
// helper functions // helper functions
WITHIN_KERNEL WITHIN_KERNEL
void k_reductionPhase(float* partialCumSum) { void k_reductionPhase(float* partialCumSum) {
...@@ -226,12 +215,11 @@ class GpuCumsum(GpuKernelBase, HideC, CumsumOp): ...@@ -226,12 +215,11 @@ class GpuCumsum(GpuKernelBase, HideC, CumsumOp):
""" """
params = [gpuarray.GpuArray, gpuarray.GpuArray, gpuarray.SIZE, params = [gpuarray.GpuArray, gpuarray.GpuArray, gpuarray.SIZE,
gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE,
'int32', 'int32',] 'int32', 'int32', ]
kernels.append(Kernel(code=code, name=kname, params=params, kernels.append(Kernel(code=code, name=kname, params=params,
flags=flags, objvar=k_var)) flags=flags, objvar=k_var))
return kernels return kernels
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda': if node.inputs[0].type.context.kind != 'cuda':
raise NotImplementedError("cuda only") raise NotImplementedError("cuda only")
......
...@@ -10,6 +10,7 @@ import theano ...@@ -10,6 +10,7 @@ import theano
import theano.tensor.tests.test_extra_ops import theano.tensor.tests.test_extra_ops
from theano.tensor.extra_ops import cumsum, CumsumOp from theano.tensor.extra_ops import cumsum, CumsumOp
from theano.tests.unittest_tools import SkipTest
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from .config import mode_with_gpu, test_ctx_name from .config import mode_with_gpu, test_ctx_name
...@@ -28,8 +29,6 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -28,8 +29,6 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
self.max_threads_dim0 = test_ctx.maxlsize0 self.max_threads_dim0 = test_ctx.maxlsize0
self.max_grid_size1 = test_ctx.maxgsize2 self.max_grid_size1 = test_ctx.maxgsize2
def test_Strides1D(self): def test_Strides1D(self):
x = T.fvector('x') x = T.fvector('x')
...@@ -113,11 +112,11 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -113,11 +112,11 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
utt.assert_allclose(np.cumsum(a[:i]), f(a[:i])) utt.assert_allclose(np.cumsum(a[:i]), f(a[:i]))
# Use multiple GPU threadblocks # Use multiple GPU threadblocks
a = np.random.random((block_max_size+2,)).astype("float32") a = np.random.random((block_max_size + 2, )).astype("float32")
utt.assert_allclose(np.cumsum(a), f(a)) utt.assert_allclose(np.cumsum(a), f(a))
# Use recursive cumsum # Use recursive cumsum
a = np.ones((block_max_size*(block_max_size+1)+2,), a = np.ones((block_max_size * (block_max_size + 1) + 2,),
dtype="float32") dtype="float32")
utt.assert_allclose(np.cumsum(a), f(a)) utt.assert_allclose(np.cumsum(a), f(a))
...@@ -143,21 +142,21 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -143,21 +142,21 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
# Use multiple GPU threadblocks # Use multiple GPU threadblocks
a_shape = [5, 5] a_shape = [5, 5]
a_shape[shape_axis] = block_max_size+2 a_shape[shape_axis] = block_max_size + 2
a = np.random.random(a_shape).astype("float32") a = np.random.random(a_shape).astype("float32")
utt.assert_allclose(np.cumsum(a, axis=axis), f(a)) utt.assert_allclose(np.cumsum(a, axis=axis), f(a))
# Use multiple GPU gridblocks # Use multiple GPU gridblocks
a_shape = [4, 4] a_shape = [4, 4]
a_shape[1-shape_axis] = self.max_grid_size1+1 a_shape[1 - shape_axis] = self.max_grid_size1 + 1
a = np.random.random(a_shape).astype("float32") a = np.random.random(a_shape).astype("float32")
utt.assert_allclose(np.cumsum(a, axis=axis), f(a), rtol=5e-5) utt.assert_allclose(np.cumsum(a, axis=axis), f(a), rtol=5e-5)
# Use recursive cumsum # Use recursive cumsum
a_shape = [3, 3] a_shape = [3, 3]
a_shape[shape_axis] = block_max_size*(block_max_size+1)+2 a_shape[shape_axis] = block_max_size * (block_max_size + 1) + 2
a = np.random.random(a_shape).astype("float32") a = np.random.random(a_shape).astype("float32")
a = np.sign(a-0.5).astype("float32") # Avoid floating point error a = np.sign(a - 0.5).astype("float32") # Avoid floating point error
utt.assert_allclose(np.cumsum(a, axis=axis), f(a)) utt.assert_allclose(np.cumsum(a, axis=axis), f(a))
def test_GpuCumsum3D(self): def test_GpuCumsum3D(self):
...@@ -182,32 +181,32 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -182,32 +181,32 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
# Use multiple GPU threadblocks (along accumulation axis) # Use multiple GPU threadblocks (along accumulation axis)
a_shape = [2, 2, 2] a_shape = [2, 2, 2]
a_shape[shape_axis] = block_max_size+2 a_shape[shape_axis] = block_max_size + 2
a = np.random.random(a_shape).astype("float32") a = np.random.random(a_shape).astype("float32")
utt.assert_allclose(np.cumsum(a, axis=axis), f(a)) utt.assert_allclose(np.cumsum(a, axis=axis), f(a))
# Use multiple GPU gridblocks (not along accumulation axis) # Use multiple GPU gridblocks (not along accumulation axis)
a_shape = [5, 5, 5] a_shape = [5, 5, 5]
a_shape[(shape_axis+1) % 3] = self.max_grid_size1+1 a_shape[(shape_axis + 1) % 3] = self.max_grid_size1 + 1
a = np.random.random(a_shape).astype("float32") a = np.random.random(a_shape).astype("float32")
if axis is None: if axis is None:
# Avoid floating point error # Avoid floating point error
a = np.sign(a-0.5).astype("float32") a = np.sign(a - 0.5).astype("float32")
utt.assert_allclose(np.cumsum(a, axis=axis), f(a)) utt.assert_allclose(np.cumsum(a, axis=axis), f(a))
a_shape = [5, 5, 5] a_shape = [5, 5, 5]
a_shape[(shape_axis+2) % 3] = self.max_grid_size1+1 a_shape[(shape_axis + 2) % 3] = self.max_grid_size1 + 1
a = np.random.random(a_shape).astype("float32") a = np.random.random(a_shape).astype("float32")
if axis is None: if axis is None:
# Avoid floating point error # Avoid floating point error
a = np.sign(a-0.5).astype("float32") a = np.sign(a - 0.5).astype("float32")
utt.assert_allclose(np.cumsum(a, axis=axis), f(a)) utt.assert_allclose(np.cumsum(a, axis=axis), f(a))
# Use recursive cumsum (along accumulation axis) # Use recursive cumsum (along accumulation axis)
a_shape = [3, 3, 3] a_shape = [3, 3, 3]
a_shape[shape_axis] = block_max_size*(block_max_size+1)+2 a_shape[shape_axis] = block_max_size * (block_max_size + 1) + 2
a = np.random.random(a_shape).astype("float32") a = np.random.random(a_shape).astype("float32")
a = np.sign(a-0.5).astype("float32") # Avoid floating point error a = np.sign(a - 0.5).astype("float32") # Avoid floating point error
utt.assert_allclose(np.cumsum(a, axis=axis), f(a)) utt.assert_allclose(np.cumsum(a, axis=axis), f(a))
def test_GpuCumsum4D(self): def test_GpuCumsum4D(self):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论