changed references to GpuSum to now use GpuCAReduce

e5978249 · Ian Goodfellow · 2a1aae56 · e5978249 · e5978249 · e5978249
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -270,7 +270,7 @@ if cuda_available:

    import basic_ops
    from basic_ops import (GpuFromHost, HostFromGpu, GpuElemwise,
-                           GpuDimShuffle, GpuSum, GpuReshape, GpuContiguous,
+                           GpuDimShuffle, GpuCAReduce, GpuReshape, GpuContiguous,
                           GpuSubtensor, GpuIncSubtensor,
                           GpuAdvancedSubtensor1, GpuAdvancedIncSubtensor1,
                           GpuFlatten, GpuShape, GpuAlloc,

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -593,7 +593,7 @@ def local_gpu_sum(node):
                    for a in node.op.axis:
                        assert reduce_mask[a] == 0
                        reduce_mask[a] = 1
-                gsum = GpuSum(reduce_mask)
+                gsum = GpuCAReduce(reduce_mask, theano.scalar.basic.add)
                pattern = (''.join(str(i) for i in reduce_mask))
                if hasattr(gsum, 'c_code_reduce_%s' % pattern):
                    rval = host_from_gpu(gsum(gpu_from_host(x)))
@@ -625,7 +625,7 @@ def local_gpu_sum(node):
                            new_in_shp.append(x_shape[i])

                    pattern = (''.join(str(i) for i in new_mask))
-                    new_gsum = GpuSum(new_mask)
+                    new_gsum = GpuCAReduce(new_mask, theano.scalar.basic.add)
                    if hasattr(new_gsum, 'c_code_reduce_%s' % pattern):
                        reshaped_x = x.reshape(tensor.stack(*new_in_shp))
                        sum_reshaped_x = host_from_gpu(
@@ -644,7 +644,7 @@ def local_gpu_sum(node):
                            return None

                        raise Exception(
-                            "GpuSum don't have implemented the pattern",
+                            "GpuCAReduce don't have implemented the pattern",
                            pattern)
    return False


--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -108,7 +108,7 @@ def test_sum():
        val = theano._asarray(val, dtype='float32')
        f = theano.function([a], b, mode=mode_with_gpu)
        f2 = theano.function([a], b, mode=mode_without_gpu)
-        assert tcn.GpuSum in [x.op.__class__ for x in f.maker.fgraph.toposort()]
+        assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
        assert T.Sum in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
        if val.size == 0:
            assert f2(val) == f(val), ('shape', shape, 'pattern', pattern)
@@ -145,7 +145,7 @@ def test_sum():
        val = theano._asarray(val, dtype='float32')
        f = theano.function([a], b, mode=mode_with_gpu)
        f2 = theano.function([a], b, mode=mode_without_gpu)
-        assert tcn.GpuSum in [x.op.__class__ for x in f.maker.fgraph.toposort()]
+        assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
        assert T.Sum in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
        assert _allclose(f2(val), f(val)), ('shape', shape,
                                            'pattern', pattern,
@@ -181,7 +181,7 @@ def test_sum():
            val2 = val2[::2, ::2, ::2, ::2]
        f = theano.function([a], b, mode=mode_without_gpu)
        f2 = theano.function([a2], b2, mode=mode_with_gpu)
-        assert tcn.GpuSum in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
+        assert tcn.GpuCAReduce in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
        assert T.Sum in [x.op.__class__ for x in f.maker.fgraph.toposort()]
        assert _allclose(f2(val2), f(val)), ('shape', shape,
                                             'pattern', pattern,

--- a/theano/sandbox/cuda/tests/test_driver.py
+++ b/theano/sandbox/cuda/tests/test_driver.py
@@ -28,7 +28,7 @@ def test_nvidia_driver1():
                        profile=False)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 2
-    assert sum(isinstance(node.op, B.GpuSum) for node in topo) == 1
+    assert sum(isinstance(node.op, B.GpuCAReduce) for node in topo) == 1
    if not numpy.allclose(f(), a.sum()):
        raise Exception("The nvidia driver version installed with this OS "
                        "does not give good results for reduction."

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -44,11 +44,11 @@ def test_int_pow():
    f = theano.function([a], (a*4).sum(), mode=mode_with_gpu)

    op_names = [n.op.__class__.__name__ for n in f.maker.fgraph.toposort()]
-    assert op_names == ['GpuSum', 'GpuElemwise', 'HostFromGpu']
+    assert op_names == ['GpuCAReduce', 'GpuElemwise', 'HostFromGpu']

    f = theano.function([a], tensor.pow(a,4).sum(), mode=mode_with_gpu)
    op_names = [n.op.__class__.__name__ for n in f.maker.fgraph.toposort()]
-    assert op_names == ['GpuElemwise', 'GpuSum', 'HostFromGpu']
+    assert op_names == ['GpuElemwise', 'GpuCAReduce', 'HostFromGpu']

    #theano.printing.debugprint(f)