make T.max() return a CAReduce instance as this is faster.

Add test for CAReduce{maximum,mul} and modified CAReduce to handle correctly maximum.

make T.max() return a CAReduce instance as this is faster.
64a95964 · Frederic Bastien · 31380660 · 64a95964 · 64a95964 · 64a95964
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -1406,11 +1406,17 @@ def max(x, axis=None):
    Return maximum elements obtained by iterating over given axis

    Default axis is the last one.
+
+    :note: we return an error as numpy when we reduce a dim with a shape of 0
    """
-    # In python (using MaxAndArgmax.perform()) this leads to an wasteful
-    # implementation that goes through the data twice instead of once
-    # but when Argmax.c_impl() is in place, it should be fine.
-    return max_and_argmax(x,axis)[0]
+    if isinstance(axis,int) or axis is None:
+      return CAReduce(scal.maximum,axis)(x)
+    #TODO: do CAReduce need axis to be constant?
+    try:
+      const = get_constant_value(axis)
+      return CAReduce(scal.maximum,list(const))(x)
+    except:
+      return max_and_argmax(x,axis)[0]

 @constructor
 def argmax(x, axis=None):

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -782,6 +782,7 @@ class CAReduce(Op):
    Examples:
     CAReduce(add) -> sum
     CAReduce(mul) -> product
+     CAReduce(maximum) -> sum
     CAReduce(_or) -> any # not lazy
     CAReduce(_and) -> all # not lazy

@@ -790,7 +791,7 @@ class CAReduce(Op):
    iterates over the dimensions and the elements of the
    array(s). Therefore, to ensure consistent variables, the scalar
    operation represented by the reduction must be both commutative
-    and associative (eg add, multiply, binary or/and/xor - but not
+    and associative (eg add, multiply, maximum, binary or/and/xor - but not
    subtract, divide or power).
    """

@@ -927,10 +928,36 @@ class CAReduce(Op):
        alloc += cgen.make_declare([range(nnested) + ['x'] * len(axis)], [odtype], dict(sub, lv0 = oname))
        alloc += cgen.make_alloc([order1], odtype, sub)
        alloc += cgen.make_checks([range(nnested) + ['x'] * len(axis)], [odtype], dict(sub, lv0 = oname))
+        
+        if hasattr(self.scalar_op,'identity'):
+            identity = self.scalar_op.identity
+        elif self.scalar_op == scalar.maximum:
+            if input.type.dtype in ["float32","float64"]:
+                identity = "-__builtin_inf()"
+            else:
+                identity = "NPY_MIN_"+str(input.type.dtype).upper()
+            fail = sub["fail"]
+            pattern=[0]*len(node.inputs[0].broadcastable)
+            axis = self.axis
+            if axis == None: axis = range(len(pattern))
+            for i in axis:
+                pattern[i]=1
+            pattern_ = str(pattern)[1:-1]
+            decl +="""int tosum[]={%(pattern_)s};"""%locals()
+            alloc += """
+for(int i=0;i<%(iname)s->nd;i++){
+  if(PyArray_DIMS(%(iname)s)[i]==0 && tosum[i]){
+    PyErr_Format(PyExc_ValueError, "Input of CAReduce{maximum} has zero-size on axis %%d",i);
+    %(fail)s;
+  }
+}
+                   """%locals()
+        else:
+            raise Exception("The CAReduce.scalar_op must have an identity field.")

        task0_decl = "%(dtype)s& %(name)s_i = *%(name)s_iter;\n%(name)s_i = %(identity)s;" % dict(dtype = odtype,
                                                                                                  name = onames[0],
-                                                                                                  identity = self.scalar_op.identity)
+                                                                                                  identity = identity)

        task1_decl = "%(dtype)s& %(name)s_i = *%(name)s_iter;\n" % dict(dtype = idtype, name = inames[0])


--- a/theano/tensor/tests/test_elemwise.py
+++ b/theano/tensor/tests/test_elemwise.py
@@ -154,7 +154,7 @@ class test_CAReduce(unittest.TestCase):
    def setUp(self):
        unittest_tools.seed_rng()

-    def with_linker(self, linker):
+    def with_linker(self, linker, scalar_op = add):
        for xsh, tosum in [((5, 6), None),
                           ((5, 6), (0, 1)),
                           ((5, 6), (0, )),
@@ -165,29 +165,70 @@ class test_CAReduce(unittest.TestCase):
                           ((5, 0), (1, )),
                           ((), ())]:
            x = TensorType('float64', [(entry == 1) for entry in xsh])('x')
-            e = CAReduce(add, axis = tosum)(x)
+            e = CAReduce(scalar_op, axis = tosum)(x)
            if tosum is None: tosum = range(len(xsh))
            f = copy(linker).accept(Env([x], [e])).make_function()
            xv = numpy.asarray(numpy.random.rand(*xsh))
            zv = xv
-            for axis in reversed(sorted(tosum)):
-                zv = numpy.add.reduce(zv, axis)
-            self.failUnless((numpy.abs(f(xv) - zv) < 1e-10).all())
+            numpy_raised = False
+            if scalar_op == add:
+                for axis in reversed(sorted(tosum)):
+                    zv = numpy.add.reduce(zv, axis)
+            elif scalar_op == mul:
+                for axis in reversed(sorted(tosum)):
+                    zv = numpy.multiply.reduce(zv, axis)
+            elif scalar_op == maximum:
+                try:
+                    for axis in reversed(sorted(tosum)):
+                        zv = numpy.maximum.reduce(zv, axis)
+                except ValueError:
+                    numpy_raised=True
+            elif scalar_op == or_:
+                for axis in reversed(sorted(tosum)):
+                    zv = numpy.any(zv, axis)
+            elif scalar_op == and_:
+                for axis in reversed(sorted(tosum)):
+                    zv = numpy.all(zv, axis)
+            else:
+                raise Exception("Test for CAReduce with scalar_op %s not implemented"%str(scalar_op))
+            if scalar_op == maximum and numpy_raised:
+                try:
+                    f(xv)
+                except ValueError:
+                    pass
+                else: 
+                    self.fail()
+            else:
+                self.failUnless((numpy.abs(f(xv) - zv) < 1e-10).all())
+                

            #test CAReduce.infer_shape
            #the Shape op don't implement c_code!
            if isinstance(linker,gof.PerformLinker):
                x = TensorType('float64', [(entry == 1) for entry in xsh])('x')
-                e = CAReduce(add, axis = tosum)(x)
+                e = CAReduce(scalar_op, axis = tosum)(x)
                if tosum is None: tosum = range(len(xsh))
                f = copy(linker).accept(Env([x], [e.shape])).make_function()
-                assert all(f(xv)== zv.shape)
+                if not(scalar_op == maximum and ((xsh==() or numpy.prod(xsh)==0))):
+                    assert all(f(xv)== zv.shape)

    def test_perform(self):
-        self.with_linker(gof.PerformLinker())
+        self.with_linker(gof.PerformLinker(), add)
+        self.with_linker(gof.PerformLinker(), mul)
+        self.with_linker(gof.PerformLinker(), maximum)
+        #need other dtype then real
+        #self.with_linker(gof.PerformLinker(), or_)
+        #self.with_linker(gof.PerformLinker(), and_)

    def test_c(self):
-        self.with_linker(gof.CLinker())
+        self.with_linker(gof.CLinker(), add)
+        self.with_linker(gof.CLinker(), mul)
+        self.with_linker(gof.CLinker(), maximum)
+
+        #need other dtype then real        
+        #no c_code for or_, and_
+        #self.with_linker(gof.CLinker(), or_)
+        #self.with_linker(gof.CLinker(), and_)


 if __name__ == '__main__':