提交 dcc8ea72 authored 作者: Frederic's avatar Frederic

Allow reduction on maximum and minimum

上级 3943db3c
......@@ -3,6 +3,8 @@ from itertools import izip
from StringIO import StringIO
import numpy
import theano
from theano import Op, Apply, scalar, config
from theano import scalar as scal
from theano.scalar import Scalar
......@@ -583,8 +585,6 @@ class GpuCAReduceCuda(HideC, CAReduce):
# used to make sure that calls to scalar op
# have unique name arguments
self._n_scalar_op_calls = 0
if not hasattr(scalar_op, 'identity'):
raise ValueError("No identity on scalar op")
CAReduce.__init__(self, scalar_op, axis=axis)
def __eq__(self, other):
......@@ -1056,7 +1056,8 @@ class GpuCAReduceCuda(HideC, CAReduce):
{
int idx = threadNum - (threadCount >> 1) * 2;"""
new_version += self._assign_reduce(node, name, 'buf[idx]','buf[threadNum]', sub)
new_version += self._assign_reduce(node, name, 'buf[idx]',
'buf[threadNum]', sub)
new_version += """
}
......
......@@ -306,10 +306,10 @@ def local_gpua_advanced_incsubtensor(node):
@register_opt()
@op_lifter([tensor.CAReduce, tensor.Sum])
@op_lifter([tensor.CAReduce, tensor.Sum, tensor.elemwise.Prod])
def local_gpua_careduce(node):
if (isinstance(node.op.scalar_op, scalar.basic.Add) or
isinstance(node.op.scalar_op, scalar.basic.Mul)):
if isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul,
scalar.Maximum, scalar.Minimum)):
x, = node.inputs
greduce = GpuCAReduceCuda(node.op.scalar_op, axis=node.op.axis)
if x.dtype != "float32":
......
......@@ -140,7 +140,8 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
# ((5,4,3,10,11),[1,2]),
]
op = GpuCAReduceCuda
reds = [scalar.add, scalar.mul]
reds = [scalar.add, scalar.mul,
scalar.maximum, scalar.minimum]
def test_perform(self):
return
......
......@@ -46,16 +46,18 @@ def test_flatten():
for node in f.maker.fgraph.toposort()]
def test_sum_prod():
for method in ['sum']:
def test_reduce():
for method in ['sum', 'prod', 'max', 'min']:
m = theano.tensor.fmatrix()
f = theano.function([m], getattr(m, method)(), mode=mode_with_gpu)
f = theano.function([m], getattr(m, method)(axis=0),
mode=mode_with_gpu)
val = numpy.random.rand(10, 11).astype("float32")
res = f(val)
utt.assert_allclose(res, val.sum())
assert res.shape == ()
utt.assert_allclose(res, getattr(val, method)(axis=0))
assert res.shape == (11,)
topo = f.maker.fgraph.toposort()
assert GpuCAReduceCuda in [type(node.op)
for node in f.maker.fgraph.toposort()]
for node in topo], topo
def test_local_gpualloc_memset_0():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论