提交 30d2c4c3 authored 作者: Frederic's avatar Frederic

gpucareduce port continue.

上级 a482cc6f
import copy import copy
from itertools import izip from itertools import izip
from StringIO import StringIO
import numpy import numpy
from theano import Op, Apply, scalar, config from theano import Op, Apply, scalar, config
from theano.tensor.elemwise import Elemwise, DimShuffle, CAReduceDtype from theano import scalar as scal
from theano.scalar import Scalar
from theano.tensor.elemwise import (Elemwise, DimShuffle,
CAReduce, CAReduceDtype)
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
try: try:
...@@ -518,7 +522,7 @@ class GpuDimShuffle(HideC, DimShuffle): ...@@ -518,7 +522,7 @@ class GpuDimShuffle(HideC, DimShuffle):
return (3,) return (3,)
class GpuCAReduce(GpuOp): class GpuCAReduce(HideC, CAReduce):
"""GpuCAReduce is a Reduction along some dimensions by a scalar op. """GpuCAReduce is a Reduction along some dimensions by a scalar op.
The dimensions along which to reduce is specified by the The dimensions along which to reduce is specified by the
...@@ -552,36 +556,57 @@ class GpuCAReduce(GpuOp): ...@@ -552,36 +556,57 @@ class GpuCAReduce(GpuOp):
""" """
def __init__(self, reduce_mask, scalar_op): def __init__(self, scalar_op, axis=None,
self.reduce_mask = tuple(reduce_mask) reduce_mask=None):
self.scalar_op = scalar_op if reduce_mask is not None:
reduce_mask = tuple(reduce_mask)
self.reduce_mask = reduce_mask
# used to make sure that calls to scalar op # used to make sure that calls to scalar op
# have unique name arguments # have unique name arguments
self._n_scalar_op_calls = 0 self._n_scalar_op_calls = 0
if not hasattr(scalar_op, 'identity'):
raise ValueError("No identity on scalar op")
CAReduce.__init__(self, scalar_op, axis=axis)
def __eq__(self, other): def __eq__(self, other):
return (type(self) == type(other) and return (type(self) == type(other) and
self.axis == other.axis and
self.reduce_mask == other.reduce_mask and self.reduce_mask == other.reduce_mask and
self.scalar_op == other.scalar_op) self.scalar_op == other.scalar_op)
def __hash__(self): def __hash__(self):
return (hash(type(self)) ^ return (hash(type(self)) ^
hash(self.axis) ^
hash(self.reduce_mask) ^ hash(self.reduce_mask) ^
hash(type(self.scalar_op))) hash(type(self.scalar_op)))
def __str__(self): def __str__(self):
return "GpuCAReduce{%s}{%s}" % ( ax = ''
str(self.scalar_op), if self.axis is not None:
','.join(str(i) for i in self.reduce_mask) ax = '{%s}' % (', '.join(str(x) for x in self.axis),)
) return "GpuCAReduce{%s}%s" % (str(self.scalar_op), ax)
def make_node(self, x): def make_node(self, x):
x = as_gpu_array_varible(x) x = as_gpuarray_variable(x)
assert x.dtype == "float32"
ret = super(GpuCAReduce, self).make_node(x)
self = copy.copy(self)
self.axis = ret.op.axis
if self.reduce_mask is None:
if self.axis is None:
reduce_mask = [1] * x.type.ndim
else:
reduce_mask = [0] * x.type.ndim
for a in self.axis:
assert reduce_mask[a] == 0
reduce_mask[a] = 1
self.reduce_mask = tuple(reduce_mask)
if (x.type.ndim != len(self.reduce_mask)): if (x.type.ndim != len(self.reduce_mask)):
raise TypeError("x must have rank %i" % len(self.reduce_mask)) raise TypeError("x must have rank %i" % len(self.reduce_mask))
o_broadcast = [x.type.broadcastable[i] for i return Apply(self, [x], [GpuArrayType(x.dtype,
in xrange(x.type.ndim) if not self.reduce_mask[i]] ret.outputs[0].type.broadcastable)()])
return Apply(self, [x], [GpuArrayType(x.type.dtype, o_broadcast)()])
""" """
This method must be commented, because there's no way This method must be commented, because there's no way
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论