提交 dcc8ea72 authored 作者: Frederic's avatar Frederic

Allow reduction on maximum and minimum

上级 3943db3c
...@@ -3,6 +3,8 @@ from itertools import izip ...@@ -3,6 +3,8 @@ from itertools import izip
from StringIO import StringIO from StringIO import StringIO
import numpy import numpy
import theano
from theano import Op, Apply, scalar, config from theano import Op, Apply, scalar, config
from theano import scalar as scal from theano import scalar as scal
from theano.scalar import Scalar from theano.scalar import Scalar
...@@ -583,8 +585,6 @@ class GpuCAReduceCuda(HideC, CAReduce): ...@@ -583,8 +585,6 @@ class GpuCAReduceCuda(HideC, CAReduce):
# used to make sure that calls to scalar op # used to make sure that calls to scalar op
# have unique name arguments # have unique name arguments
self._n_scalar_op_calls = 0 self._n_scalar_op_calls = 0
if not hasattr(scalar_op, 'identity'):
raise ValueError("No identity on scalar op")
CAReduce.__init__(self, scalar_op, axis=axis) CAReduce.__init__(self, scalar_op, axis=axis)
def __eq__(self, other): def __eq__(self, other):
...@@ -1056,7 +1056,8 @@ class GpuCAReduceCuda(HideC, CAReduce): ...@@ -1056,7 +1056,8 @@ class GpuCAReduceCuda(HideC, CAReduce):
{ {
int idx = threadNum - (threadCount >> 1) * 2;""" int idx = threadNum - (threadCount >> 1) * 2;"""
new_version += self._assign_reduce(node, name, 'buf[idx]','buf[threadNum]', sub) new_version += self._assign_reduce(node, name, 'buf[idx]',
'buf[threadNum]', sub)
new_version += """ new_version += """
} }
......
...@@ -306,10 +306,10 @@ def local_gpua_advanced_incsubtensor(node): ...@@ -306,10 +306,10 @@ def local_gpua_advanced_incsubtensor(node):
@register_opt() @register_opt()
@op_lifter([tensor.CAReduce, tensor.Sum]) @op_lifter([tensor.CAReduce, tensor.Sum, tensor.elemwise.Prod])
def local_gpua_careduce(node): def local_gpua_careduce(node):
if (isinstance(node.op.scalar_op, scalar.basic.Add) or if isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul,
isinstance(node.op.scalar_op, scalar.basic.Mul)): scalar.Maximum, scalar.Minimum)):
x, = node.inputs x, = node.inputs
greduce = GpuCAReduceCuda(node.op.scalar_op, axis=node.op.axis) greduce = GpuCAReduceCuda(node.op.scalar_op, axis=node.op.axis)
if x.dtype != "float32": if x.dtype != "float32":
......
...@@ -140,7 +140,8 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY): ...@@ -140,7 +140,8 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
# ((5,4,3,10,11),[1,2]), # ((5,4,3,10,11),[1,2]),
] ]
op = GpuCAReduceCuda op = GpuCAReduceCuda
reds = [scalar.add, scalar.mul] reds = [scalar.add, scalar.mul,
scalar.maximum, scalar.minimum]
def test_perform(self): def test_perform(self):
return return
......
...@@ -46,16 +46,18 @@ def test_flatten(): ...@@ -46,16 +46,18 @@ def test_flatten():
for node in f.maker.fgraph.toposort()] for node in f.maker.fgraph.toposort()]
def test_sum_prod(): def test_reduce():
for method in ['sum']: for method in ['sum', 'prod', 'max', 'min']:
m = theano.tensor.fmatrix() m = theano.tensor.fmatrix()
f = theano.function([m], getattr(m, method)(), mode=mode_with_gpu) f = theano.function([m], getattr(m, method)(axis=0),
mode=mode_with_gpu)
val = numpy.random.rand(10, 11).astype("float32") val = numpy.random.rand(10, 11).astype("float32")
res = f(val) res = f(val)
utt.assert_allclose(res, val.sum()) utt.assert_allclose(res, getattr(val, method)(axis=0))
assert res.shape == () assert res.shape == (11,)
topo = f.maker.fgraph.toposort()
assert GpuCAReduceCuda in [type(node.op) assert GpuCAReduceCuda in [type(node.op)
for node in f.maker.fgraph.toposort()] for node in topo], topo
def test_local_gpualloc_memset_0(): def test_local_gpualloc_memset_0():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论