Allow reduction on maximum and minimum

dcc8ea72 · Frederic · 3943db3c · dcc8ea72 · dcc8ea72 · dcc8ea72
--- a/theano/sandbox/gpuarray/elemwise.py
+++ b/theano/sandbox/gpuarray/elemwise.py
@@ -3,6 +3,8 @@ from itertools import izip
 from StringIO import StringIO

 import numpy
+
+import theano
 from theano import Op, Apply, scalar, config
 from theano import scalar as scal
 from theano.scalar import Scalar
@@ -583,8 +585,6 @@ class GpuCAReduceCuda(HideC, CAReduce):
        # used to make sure that calls to scalar op
        # have unique name arguments
        self._n_scalar_op_calls = 0
-        if not hasattr(scalar_op, 'identity'):
-            raise ValueError("No identity on scalar op")
        CAReduce.__init__(self, scalar_op, axis=axis)

    def __eq__(self, other):
@@ -1056,7 +1056,8 @@ class GpuCAReduceCuda(HideC, CAReduce):
        {
            int idx = threadNum - (threadCount >> 1) * 2;"""

-        new_version += self._assign_reduce(node, name, 'buf[idx]','buf[threadNum]', sub)
+        new_version += self._assign_reduce(node, name, 'buf[idx]',
+                                           'buf[threadNum]', sub)

        new_version += """
        }

--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -306,10 +306,10 @@ def local_gpua_advanced_incsubtensor(node):


 @register_opt()
-@op_lifter([tensor.CAReduce, tensor.Sum])
+@op_lifter([tensor.CAReduce, tensor.Sum, tensor.elemwise.Prod])
 def local_gpua_careduce(node):
-    if (isinstance(node.op.scalar_op, scalar.basic.Add) or
-        isinstance(node.op.scalar_op, scalar.basic.Mul)):
+    if isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul,
+                                      scalar.Maximum, scalar.Minimum)):
        x, = node.inputs
        greduce = GpuCAReduceCuda(node.op.scalar_op, axis=node.op.axis)
        if x.dtype != "float32":

--- a/theano/sandbox/gpuarray/tests/test_elemwise.py
+++ b/theano/sandbox/gpuarray/tests/test_elemwise.py
@@ -140,7 +140,8 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
 #             ((5,4,3,10,11),[1,2]),
        ]
    op = GpuCAReduceCuda
-    reds = [scalar.add, scalar.mul]
+    reds = [scalar.add, scalar.mul,
+            scalar.maximum, scalar.minimum]

    def test_perform(self):
        return

--- a/theano/sandbox/gpuarray/tests/test_opt.py
+++ b/theano/sandbox/gpuarray/tests/test_opt.py
@@ -46,16 +46,18 @@ def test_flatten():
                          for node in f.maker.fgraph.toposort()]


-def test_sum_prod():
-    for method in ['sum']:
+def test_reduce():
+    for method in ['sum', 'prod', 'max', 'min']:
        m = theano.tensor.fmatrix()
-        f = theano.function([m], getattr(m, method)(), mode=mode_with_gpu)
+        f = theano.function([m], getattr(m, method)(axis=0),
+                            mode=mode_with_gpu)
        val = numpy.random.rand(10, 11).astype("float32")
        res = f(val)
-        utt.assert_allclose(res, val.sum())
-        assert res.shape == ()
+        utt.assert_allclose(res, getattr(val, method)(axis=0))
+        assert res.shape == (11,)
+        topo = f.maker.fgraph.toposort()
        assert GpuCAReduceCuda in [type(node.op)
-                                   for node in f.maker.fgraph.toposort()]
+                                   for node in topo], topo


 def test_local_gpualloc_memset_0():