make sure it work with non scalar tensor and test it.

c056820d · Frederic · a9e821f5 · c056820d · c056820d
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1345,7 +1345,8 @@ def local_gpualloc_memset_0(node):
    if isinstance(node.op, GpuAlloc) and not node.op.memset_0:
        inp = node.inputs[0]
        if (isinstance(inp, CudaNdarrayConstant) and
-            numpy.asarray(inp.data) == 0 and inp.data.size == 1):
+            inp.data.size == 1 and
+            (numpy.asarray(inp.data) == 0).all()):
            new_out = GpuAlloc(memset_0=True)(*node.inputs)
            return [new_out]

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -73,6 +73,10 @@ def test_gpualloc():
 def test_alloc_memset_0():
    i = tensor.iscalar()
    z = numpy.zeros((1,), dtype='float32')
+    o = numpy.ones((1,), dtype='float32')
+    ones = numpy.ones((2,), dtype='float32')
+    # Test with 0
    a = basic_ops.gpu_alloc(cuda.gpu_from_host(tensor.constant(z)), i)
    f = theano.function([i], a, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
@@ -80,6 +84,24 @@ def test_alloc_memset_0():
    assert isinstance(topo[0].op, basic_ops.GpuAlloc) and topo[0].op.memset_0
    assert (numpy.asarray(f(6)) == 0).all()
+    # Test with 1
+    a = basic_ops.gpu_alloc(cuda.gpu_from_host(tensor.constant(o)), i)
+    f = theano.function([i], a, mode=mode_with_gpu)
+    topo = f.maker.fgraph.toposort()
+    assert len(topo) == 1
+    assert isinstance(topo[0].op, basic_ops.GpuAlloc)
+    assert not topo[0].op.memset_0
+    assert (numpy.asarray(f(6)) == 1).all()
+    # Test with 1, 1
+    a = basic_ops.gpu_alloc(cuda.gpu_from_host(tensor.constant(ones)), i)
+    f = theano.function([i], a, mode=mode_with_gpu)
+    topo = f.maker.fgraph.toposort()
+    assert len(topo) == 1
+    assert isinstance(topo[0].op, basic_ops.GpuAlloc)
+    assert not topo[0].op.memset_0
+    assert (numpy.asarray(f(2)) == 1).all()
 def test_gpuspecifyshape():
    x = cuda.shared_constructor(numpy.ones(3,dtype='float32'), 'x')