提交 e1fae722 authored 作者: Marc-Alexandre Cote's avatar Marc-Alexandre Cote

Adressed comments from @abergeron and @nouiz

Added __eq__ and __hash__ methods. Fixed c_code_cache_version Check if axis=None before bailing out.
上级 8ce2474e
...@@ -5,6 +5,7 @@ from theano.gof import local_optimizer ...@@ -5,6 +5,7 @@ from theano.gof import local_optimizer
from theano.sandbox.cuda import cuda_available, GpuOp from theano.sandbox.cuda import cuda_available, GpuOp
from theano.tensor.extra_ops import CumsumOp from theano.tensor.extra_ops import CumsumOp
from theano.sandbox.cuda import GpuFlatten
if cuda_available: if cuda_available:
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType
...@@ -15,12 +16,28 @@ if cuda_available: ...@@ -15,12 +16,28 @@ if cuda_available:
class GpuCumsum(CumsumOp, GpuOp): class GpuCumsum(CumsumOp, GpuOp):
SUPPORTED_NDIMS = 2 SUPPORTED_NDIMS = 2
def __eq__(self, other):
return type(self) == type(other) \
and self.axis == other.axis \
and self.max_threads_dim0 == other.max_threads_dim0 \
and self.max_grid_size1 == other.max_grid_size1
def __hash__(self):
return hash(type(self)) \
^ hash(self.axis) \
^ hash(self.max_threads_dim0) \
^ hash(self.max_grid_size1)
def __str__(self):
return "%s{%s}" % (self.__class__.__name__, self.axis)
def __init__(self, axis): def __init__(self, axis):
""" """
``axis`` can not be None. If you want the array flatten, do it before. ``axis`` can not be None. If you want the array flatten, do it before.
""" """
self.axis = axis self.axis = axis
self.max_threads_dim0 = None self.max_threads_dim0 = None
self.max_grid_size1 = None
def make_node(self, x): def make_node(self, x):
assert x.dtype == 'float32' assert x.dtype == 'float32'
...@@ -38,7 +55,7 @@ class GpuCumsum(CumsumOp, GpuOp): ...@@ -38,7 +55,7 @@ class GpuCumsum(CumsumOp, GpuOp):
def make_thunk(self, node, storage_map, compute_map, no_recycling): def make_thunk(self, node, storage_map, compute_map, no_recycling):
node_ = copy.copy(node) node_ = copy.copy(node)
assert node.op is node_.op assert node.op is node_.op
if node_.op.max_threads_dim0 is None: if node_.op.max_threads_dim0 is None or node_.op.max_grid_size1 is None:
cuda = theano.sandbox.cuda cuda = theano.sandbox.cuda
device_id = cuda.use.device_number device_id = cuda.use.device_number
if device_id is None: if device_id is None:
...@@ -53,12 +70,12 @@ class GpuCumsum(CumsumOp, GpuOp): ...@@ -53,12 +70,12 @@ class GpuCumsum(CumsumOp, GpuOp):
prop = cuda_ndarray.device_properties(device_id) prop = cuda_ndarray.device_properties(device_id)
node_.op.max_threads_dim0 = prop['maxThreadsDim0'] node_.op.max_threads_dim0 = prop['maxThreadsDim0']
node_.op.max_grid_size1 = prop['maxGridSize1'] node_.op.max_grid_size1 = prop['maxGridSize1']
return super(GpuCumsum, node_.op).make_thunk(node_, storage_map, return super(GpuCumsum, node_.op).make_thunk(node_, storage_map,
compute_map, no_recycling) compute_map, no_recycling)
def c_code_cache_version(self): def c_code_cache_version(self):
#return (1,) return (1,)
return ()
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
return """ return """
...@@ -301,16 +318,18 @@ class GpuCumsum(CumsumOp, GpuOp): ...@@ -301,16 +318,18 @@ class GpuCumsum(CumsumOp, GpuOp):
return code return code
from theano.sandbox.cuda import GpuFlatten
@local_optimizer([CumsumOp]) @local_optimizer([CumsumOp])
def use_gpu_cumsum(node): def use_gpu_cumsum(node):
if node.inputs[0].ndim > GpuCumsum.SUPPORTED_NDIMS:
return None
if type(node.op) is CumsumOp and node.inputs[0].dtype == 'float32': if type(node.op) is CumsumOp and node.inputs[0].dtype == 'float32':
x = gpu_from_host(node.inputs[0])
axis = node.op.axis axis = node.op.axis
x = node.inputs[0]
if axis is not None and x.ndim > GpuCumsum.SUPPORTED_NDIMS:
return None
x = gpu_from_host(x)
if axis is None and x.ndim > 1: if axis is None and x.ndim > 1:
x = GpuFlatten()(x) x = GpuFlatten()(x)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论