[CRASH] Fix crash as the opt didn't returned the same type of variable

b4240f3f · Frederic · 1e7b219a · b4240f3f · b4240f3f · b4240f3f
--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -504,6 +504,24 @@ def test_pdbbreakpoint_op():
    assert topo[-1].op == cuda.host_from_gpu
+def test_local_gpu_elemwise_careduce():
+    x = theano.tensor.fmatrix()
+    o = (x * x).sum()
+    f = theano.function([x], o, mode=mode_with_gpu)
+    topo = f.maker.fgraph.toposort()
+    assert len(topo) == 3
+    assert topo[1].op.pre_scalar_op == theano.scalar.sqr
+    data = numpy.random.rand(3, 4).astype('float32')
+    utt.assert_allclose(f(data), (data*data).sum())
+    o = (x * x).sum(axis=1)
+    f = theano.function([x], o, mode=mode_with_gpu)
+    topo = f.maker.fgraph.toposort()
+    assert len(topo) == 3
+    assert topo[1].op.pre_scalar_op == theano.scalar.sqr
+    utt.assert_allclose(f(data), (data*data).sum(axis=1))
 def test_huge_elemwise_fusion():
    """ Test the the GpuElemwise fusion work correctly
        We check that we fuse one node with part of its input

--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -867,12 +867,13 @@ def local_gpu_elemwise_careduce(node):
            isinstance(node.inputs[0].owner.op, GpuElemwise) and
            # The Op support all scalar with 1 inputs.  We don't
            # automatically add more case, as some like trigonometic
-            # operation with some reduction pattern will probably result
+            # operation with some reduction pattern will probably results
-            # to slow down.
+            # in slow down.
            isinstance(node.inputs[0].owner.op.scalar_op, scalar.basic.Sqr)):
        op = node.op
        inp = node.inputs[0].owner.inputs[0]
        return [GpuCAReduceCuda(scalar_op=op.scalar_op,
+                                axis=op.axis,
                                reduce_mask=op.reduce_mask,
                                pre_scalar_op=scalar.basic.sqr)(inp)]

--- a/theano/sandbox/gpuarray/tests/test_opt.py
+++ b/theano/sandbox/gpuarray/tests/test_opt.py
@@ -212,7 +212,15 @@ def test_local_gpu_elemwise_careduce():
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 3
    assert topo[1].op.pre_scalar_op == theano.scalar.sqr
-    f(numpy.random.rand(3, 4).astype(theano.config.floatX))
+    data = numpy.random.rand(3, 4).astype(theano.config.floatX)
+    utt.assert_allclose(f(data), (data*data).sum())
+    o = (x * x).sum(axis=1)
+    f = theano.function([x], o, mode=mode_with_gpu)
+    topo = f.maker.fgraph.toposort()
+    assert len(topo) == 3
+    assert topo[1].op.pre_scalar_op == theano.scalar.sqr
+    utt.assert_allclose(f(data), (data*data).sum(axis=1))
 def test_local_gpu_subtensor():