Merge pull request #1457 from nouiz/gpu_min_max

Gpu min max, more pattern implemented

Merge pull request #1457 from nouiz/gpu_min_max
3d41cfaa · lamblin · ae91b627 · d7bad6de · 3d41cfaa · 3d41cfaa
--- a/NEWS_DEV.txt
+++ b/NEWS_DEV.txt
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -602,7 +602,7 @@ def local_gpu_careduce(node):
        scalar_op = node.op.scalar_op
        # currently, only these two ops are supported at all,
        # and max does not support all combinations of axes
-        if node.op.scalar_op in [scal.add, scal.maximum, scal.minimum]:
+        if node.op.scalar_op in [scal.add, scal.mul, scal.maximum, scal.minimum]:
            x, = node.inputs
            if x.owner and x.owner.op == host_from_gpu:
                if node.op.axis is None:

--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -65,9 +65,16 @@ def test_careduce():
    TODO: test with broadcast
    """
    for scalar_op, careduce_op in [
+            (theano.scalar.mul, tensor.elemwise.CAReduceDtype),
            (theano.scalar.add, tensor.elemwise.CAReduceDtype),
            (theano.scalar.maximum, tensor.CAReduce),
-            (theano.scalar.minimum, tensor.CAReduce)]:
+            (theano.scalar.minimum, tensor.CAReduce)
+            #The following 2 cases could work if the scalar_op.c_code work with float* dtype.
+            #Currently we have this error:
+            #error: invalid operands of types 'npy_float32' and 'npy_float32' to binary 'operator&'
+            #(theano.scalar.and_, tensor.elemwise.CAReduce),
+            #(theano.scalar.or_, tensor.elemwise.CAReduce),
+    ]:
        for shape, pattern in [((1,1),(1,)),
                               ((1,0),(1,)),
                               ((0,1),(1,)),
@@ -124,11 +131,6 @@ def test_careduce():

            op = careduce_op(scalar_op, axis=pattern)
            pat = tensor_pattern_to_gpu_pattern(shape, pattern)
-            #GpuCAReduce{maximum/minimum} support only those patterns
-            if scalar_op in [theano.scalar.maximum,
-                             theano.scalar.minimum] and pat not in [
-                                 (0, 1), (0, 1, 1), (0, 1, 1), (1, 0)]:
-                continue

            a = tensor.TensorType('float32', (False,) * len(shape))()
            b = op(a)
@@ -139,15 +141,22 @@ def test_careduce():
            f = theano.function([a], b, mode=mode_with_gpu)
            f2 = theano.function([a], b, mode=mode_without_gpu)
            assert tcn.GpuCAReduce in [x.op.__class__
-                                       for x in f.maker.fgraph.toposort()]
+                                       for x in f.maker.fgraph.toposort()], (
+                                           scalar_op, shape, pattern)
            assert op.__class__ in [x.op.__class__
-                                    for x in f2.maker.fgraph.toposort()]
+                                    for x in f2.maker.fgraph.toposort()], (
+                                           scalar_op, shape, pattern)
            f_caused_value_error = False
            try:
                f_out = f(val)
            except ValueError, e:
                exc = e
                f_caused_value_error = True
+            except NotImplementedError:
+                if (numpy.prod(shape) == 0 and
+                    getattr(scalar_op, 'identity', None) != 0):
+                    continue
+                raise

            f2_caused_value_error = False
            try:
@@ -179,6 +188,7 @@ def test_careduce():
                theano.tensor.basic.float32_rtol = 2e-5
                assert _allclose(f_out, f2_out), ('shape', shape,
                                                    'pattern', pattern,
+                                                    scalar_op,
                                                    sum([shape[i] for i in pattern]),
                                                    f2(val), f(val), val)
            finally:
@@ -193,11 +203,6 @@ def test_careduce():
                               ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
            op = careduce_op(scalar_op, axis=pattern)
            pat = tensor_pattern_to_gpu_pattern(shape, pattern)
-            #GpuCAReduce{maximum/minimum} support only those patterns
-            if scalar_op in [theano.scalar.maximum,
-                             theano.scalar.minimum] and pat not in [
-                                 (0, 1), (0, 1, 1), (0, 1, 1), (1, 0)]:
-                continue

            a = tensor.TensorType('float32', (False,) * len(shape))()
            dim_pattern = range(len(shape))
@@ -212,11 +217,14 @@ def test_careduce():
            f = theano.function([a], b, mode=mode_with_gpu)
            f2 = theano.function([a], b, mode=mode_without_gpu)
            assert tcn.GpuCAReduce in [x.op.__class__
-                                       for x in f.maker.fgraph.toposort()]
+                                       for x in f.maker.fgraph.toposort()], (
+                                           scalar_op, shape, pattern)
            assert op.__class__ in [x.op.__class__
-                                    for x in f2.maker.fgraph.toposort()]
+                                    for x in f2.maker.fgraph.toposort()], (
+                                           scalar_op, shape, pattern)
            assert _allclose(f2(val), f(val)), ('shape', shape,
                                                'pattern', pattern,
+                                                scalar_op,
                                                sum([shape[i] for i in pattern]))

            #test with broadcast
@@ -227,11 +235,6 @@ def test_careduce():
                               ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
            op = careduce_op(scalar_op, axis=pattern)
            pat = tensor_pattern_to_gpu_pattern(shape, pattern)
-            #GpuCAReduce{maximum/minimum} support only those patterns
-            if scalar_op in [theano.scalar.maximum,
-                             theano.scalar.minimum] and pat not in [
-                                 (0, 1), (0, 1, 1), (0, 1, 1), (1, 0)]:
-                continue

            shape = numpy.asarray(shape) * 2
            a = tensor.TensorType('float32', (False,) * len(shape))()
@@ -258,9 +261,11 @@ def test_careduce():
            f = theano.function([a], b, mode=mode_without_gpu)
            f2 = theano.function([a2], b2, mode=mode_with_gpu)
            assert tcn.GpuCAReduce in [x.op.__class__
-                                       for x in f2.maker.fgraph.toposort()]
+                                       for x in f2.maker.fgraph.toposort()], (
+                                           scalar_op, shape, pattern)
            assert op.__class__ in [x.op.__class__
-                                    for x in f.maker.fgraph.toposort()]
+                                    for x in f.maker.fgraph.toposort()], (
+                                           scalar_op, shape, pattern)
            assert _allclose(f2(val2), f(val)), ('shape', shape,
                                                 'pattern', pattern,
                                                 sum([shape[i] for i in pattern]))

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -2721,7 +2721,7 @@ class TrueDotTester(utt.InferShapeTester):
                assert tested.format == format
                assert tested.dtype == expected.dtype
                tested = tested.toarray()
-                assert numpy.allclose(tested, expected)
+                utt.assert_allclose(tested, expected)

    def test_op_sd(self):
        for format in sparse.sparse_formats:
@@ -2743,7 +2743,7 @@ class TrueDotTester(utt.InferShapeTester):
                assert tested.format == format
                assert tested.dtype == expected.dtype
                tested = tested.toarray()
-                assert numpy.allclose(tested, expected)
+                utt.assert_allclose(tested, expected)

    def test_infer_shape(self):
        for format in sparse.sparse_formats: