提交 d87cf8b4 authored 作者: Kelvin Xu's avatar Kelvin Xu

pep8

上级 99cffe57
......@@ -43,7 +43,7 @@ class GpuCumsum(CumsumOp, GpuOp):
if x.ndim > GpuCumsum.SUPPORTED_NDIMS:
raise NotImplementedError('Only cumsum on 1D, 2D and 3D array are supported right now!')
print(self.axis)
if self.axis >= x.ndim or self.axis < -x.ndim:
raise ValueError('axis(={1}) out of bounds'.format(self.axis))
......
from __future__ import absolute_import, print_function, division
import theano
import numpy
import os
from theano import Op, Apply, config
from theano import Apply
from theano.tensor.extra_ops import CumsumOp
try:
import pygpu
from pygpu import gpuarray
except ImportError:
pass
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel,
infer_context_name, GpuFromHost, HideC)
infer_context_name, GpuFromHost)
from .opt import register_opt as register_gpu_opt, op_lifter
from .type import GpuArrayType
class GpuCumsum(GpuKernelBase, HideC, CumsumOp):
class GpuCumsum(GpuKernelBase):
"""
Parameters
----------
......@@ -34,7 +30,7 @@ class GpuCumsum(GpuKernelBase, HideC, CumsumOp):
return "%s{%s}" % (self.__class__.__name__, self.axis)
def c_code_cache_version_apply(self, node):
return None
return (1,)
def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/types.h>', '<gpuarray_helper.h>']
......@@ -57,13 +53,6 @@ class GpuCumsum(GpuKernelBase, HideC, CumsumOp):
raise ValueError('axis(={0}) out of bounds'.format(self.axis))
return Apply(self, [x], [x.type()])
# copied from neighbour.py
def perform(self, node, inp, out, ctx):
# Disable the perform method from the CPU version
Op.perform(self, node, inp, out, ctx)
def gpu_kernels(self, node, nodename):
kernels = []
# cumadd
......@@ -104,8 +93,8 @@ class GpuCumsum(GpuKernelBase, HideC, CumsumOp):
params = [gpuarray.GpuArray, gpuarray.GpuArray, gpuarray.SIZE,
gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE,
gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE,
'int32', 'int32', gpuarray.GpuArray,]
code="""
'int32', 'int32', gpuarray.GpuArray, ]
code = """
// helper functions
WITHIN_KERNEL
void k_reductionPhase(float* partialCumSum) {
......@@ -226,12 +215,11 @@ class GpuCumsum(GpuKernelBase, HideC, CumsumOp):
"""
params = [gpuarray.GpuArray, gpuarray.GpuArray, gpuarray.SIZE,
gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.SSIZE,
'int32', 'int32',]
'int32', 'int32', ]
kernels.append(Kernel(code=code, name=kname, params=params,
flags=flags, objvar=k_var))
return kernels
def c_code(self, node, nodename, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda':
raise NotImplementedError("cuda only")
......
......@@ -10,6 +10,7 @@ import theano
import theano.tensor.tests.test_extra_ops
from theano.tensor.extra_ops import cumsum, CumsumOp
from theano.tests.unittest_tools import SkipTest
from theano.tests import unittest_tools as utt
from .config import mode_with_gpu, test_ctx_name
......@@ -28,8 +29,6 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
self.max_threads_dim0 = test_ctx.maxlsize0
self.max_grid_size1 = test_ctx.maxgsize2
def test_Strides1D(self):
x = T.fvector('x')
......@@ -113,11 +112,11 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
utt.assert_allclose(np.cumsum(a[:i]), f(a[:i]))
# Use multiple GPU threadblocks
a = np.random.random((block_max_size+2,)).astype("float32")
a = np.random.random((block_max_size + 2, )).astype("float32")
utt.assert_allclose(np.cumsum(a), f(a))
# Use recursive cumsum
a = np.ones((block_max_size*(block_max_size+1)+2,),
a = np.ones((block_max_size * (block_max_size + 1) + 2,),
dtype="float32")
utt.assert_allclose(np.cumsum(a), f(a))
......@@ -143,21 +142,21 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
# Use multiple GPU threadblocks
a_shape = [5, 5]
a_shape[shape_axis] = block_max_size+2
a_shape[shape_axis] = block_max_size + 2
a = np.random.random(a_shape).astype("float32")
utt.assert_allclose(np.cumsum(a, axis=axis), f(a))
# Use multiple GPU gridblocks
a_shape = [4, 4]
a_shape[1-shape_axis] = self.max_grid_size1+1
a_shape[1 - shape_axis] = self.max_grid_size1 + 1
a = np.random.random(a_shape).astype("float32")
utt.assert_allclose(np.cumsum(a, axis=axis), f(a), rtol=5e-5)
# Use recursive cumsum
a_shape = [3, 3]
a_shape[shape_axis] = block_max_size*(block_max_size+1)+2
a_shape[shape_axis] = block_max_size * (block_max_size + 1) + 2
a = np.random.random(a_shape).astype("float32")
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
a = np.sign(a - 0.5).astype("float32") # Avoid floating point error
utt.assert_allclose(np.cumsum(a, axis=axis), f(a))
def test_GpuCumsum3D(self):
......@@ -182,32 +181,32 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
# Use multiple GPU threadblocks (along accumulation axis)
a_shape = [2, 2, 2]
a_shape[shape_axis] = block_max_size+2
a_shape[shape_axis] = block_max_size + 2
a = np.random.random(a_shape).astype("float32")
utt.assert_allclose(np.cumsum(a, axis=axis), f(a))
# Use multiple GPU gridblocks (not along accumulation axis)
a_shape = [5, 5, 5]
a_shape[(shape_axis+1) % 3] = self.max_grid_size1+1
a_shape[(shape_axis + 1) % 3] = self.max_grid_size1 + 1
a = np.random.random(a_shape).astype("float32")
if axis is None:
# Avoid floating point error
a = np.sign(a-0.5).astype("float32")
a = np.sign(a - 0.5).astype("float32")
utt.assert_allclose(np.cumsum(a, axis=axis), f(a))
a_shape = [5, 5, 5]
a_shape[(shape_axis+2) % 3] = self.max_grid_size1+1
a_shape[(shape_axis + 2) % 3] = self.max_grid_size1 + 1
a = np.random.random(a_shape).astype("float32")
if axis is None:
# Avoid floating point error
a = np.sign(a-0.5).astype("float32")
a = np.sign(a - 0.5).astype("float32")
utt.assert_allclose(np.cumsum(a, axis=axis), f(a))
# Use recursive cumsum (along accumulation axis)
a_shape = [3, 3, 3]
a_shape[shape_axis] = block_max_size*(block_max_size+1)+2
a_shape[shape_axis] = block_max_size * (block_max_size + 1) + 2
a = np.random.random(a_shape).astype("float32")
a = np.sign(a-0.5).astype("float32") # Avoid floating point error
a = np.sign(a - 0.5).astype("float32") # Avoid floating point error
utt.assert_allclose(np.cumsum(a, axis=axis), f(a))
def test_GpuCumsum4D(self):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论