提交 4d530ed0 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add a Gpu version of the reduction op (only supports add and multiply for now).

上级 6e74a08b
......@@ -3,12 +3,13 @@ from itertools import izip
import numpy
from theano import Op, Apply, scalar, config
from theano.tensor.elemwise import Elemwise, DimShuffle
from theano.tensor.elemwise import Elemwise, DimShuffle, CAReduceDtype
try:
import pygpu
from pygpu.tools import ScalarArg, ArrayArg
from pygpu.elemwise import ElemwiseKernel
from pygpu.reduction import ReductionKernel
except ImportError:
pass
......@@ -283,3 +284,65 @@ class GpuDimShuffle(HideC, DimShuffle):
def c_code_cache_version(self):
return (2,)
class GpuCAReduce(HideC, CAReduceDtype):
def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None):
if not hasattr(scalar_op, 'identity'):
raise ValueError("No identity on scalar op")
CAReduceDtype.__init__(self, scalar_op, axis=axis, dtype=dtype,
acc_dtype=acc_dtype)
def __str__(self):
ax = ''
if self.axis is not None:
ax = '{%s}' % (', '.join(str(x) for x in self.axis),)
return "GpuReduce{%s}%s" % (self.scalar_op, ax)
def make_node(self, input):
res = CAReduceDtype.make_node(self, input)
input = as_gpuarray_variable(input)
otype = GpuArrayType(dtype=res.outputs[0].dtype,
broadcastable=res.outputs[0].broadcastable)
if res.op.axis is not None:
redux = []
for i in range(len(input.type.broadcastable)):
redux.append(i in res.op.axis)
# since redux is just another way to describe what is in axis
# it doesn't need to be compared in __eq__ or __hash__
res.op.redux = redux
return Apply(res.op, [input], [otype()])
def generate_kernel(self, node, odtype, redux):
if isinstance(self.scalar_op, scalar.basic.Add):
reduce_expr = "a + b"
elif isinstance(self.scalar_op, scalar.basic.Mul):
reduce_expr = "a * b"
else:
raise NotImplementedError()
return ReductionKernel(pygpu.get_default_context(), odtype,
self.scalar_op.identity, reduce_expr, redux)
def perform(self, node, inp, out):
input, = inp
output, = out
if self.axis is None:
redux = [True] * input.ndim
else:
redux = self.redux
acc_dtype = getattr(self, 'acc_dtype', None)
if acc_dtype is None:
acc_dtype = node.output[0].type.dtype
if any(redux):
if not hasattr(node, '_cache_reduction_k'):
node._cache_reduction_k = self.generate_kernel(node, acc_dtype,
redux)
output[0] = node._cache_reduction_k(input)
else:
output[0] = pygpu.array(input, copy=True,
dtype=node.outputs[0].type.dtype)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论