Do the constant folding on the new back-end gpualloc.

Add other gpualloc tests. It currently fail as ger isn't implement on the new back-end.

Do the constant folding on the new back-end gpualloc.
9ad7030e · Frederic · a82957e5 · 9ad7030e · 9ad7030e
--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -694,6 +694,36 @@ class GpuReshape(HideC, tensor.Reshape):
                raise ValueError("total size of new array must be unchanged")
        out[0] = x.reshape(tuple(shp))
+    def do_constant_folding(self, node):
+        for client in node.outputs[0].clients:
+            if client[0] == 'output':
+                # If the output is a constant, it will have to be deepcopied
+                # each time the function is called.  So we do not fold.
+                return False
+            elif (#The following ops work inplace of their input id 0.
+                  client[1] == 0 and
+                  isinstance(client[0].op, (
+                    #Ops that will work inplace on the Alloc. So if they
+                    #get constant_folded, they would copy the
+                    #constant and this is less efficients.
+                    #Not doing the constant folding could also lower
+                    #the peak memory usage, as we the "constant" won't
+                    #always exists.
+                      #theano.tensor.subtensor.AdvancedIncSubtensor,
+                      GpuIncSubtensor,
+                      GpuAdvancedIncSubtensor1,
+                      theano.sandbox.gpuarray.blas.GpuGemm,
+                      theano.sandbox.gpuarray.blas.GpuGemv,
+                      #theano.sandbox.gpuarray.blas.GpuGer, Not Yet implemented
+                  ))):
+                return False
+            #If the clients is a transfer, we don't want to fold. We
+            #let the moving opt finish before deciding what to do.
+            elif isinstance(client[0].op, HostFromGpu):
+                return False
+        return True
 class GpuEye(GpuKernelBase, Op):
    def __init__(self, dtype=None):

--- a/theano/sandbox/gpuarray/tests/test_basic_ops.py
+++ b/theano/sandbox/gpuarray/tests/test_basic_ops.py
@@ -32,8 +32,10 @@ if not theano.sandbox.gpuarray.pygpu_activated:
 from theano.sandbox.gpuarray.type import (GpuArrayType,
                                          gpuarray_shared_constructor)
-from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, gpu_from_host,
+from theano.sandbox.gpuarray.basic_ops import (
-                                               gpu_alloc, gpu_from_cuda,
+    host_from_gpu, gpu_from_host,
+    gpu_alloc, GpuAlloc,
+    gpu_from_cuda,
    cuda_from_gpu, HostFromGpu,
    GpuFromHost, GpuReshape,
    GpuEye)
@@ -290,6 +292,13 @@ GpuAllocTester = makeTester(
 )
+class TestAlloc(theano.tensor.tests.test_basic.TestAlloc):
+    dtype = "float32"
+    mode = mode_with_gpu
+    shared = staticmethod(gpuarray_shared_constructor)
+    allocs = [GpuAlloc, GpuAlloc, T.Alloc]
 def test_shape():
    x = GpuArrayType(dtype='float32', broadcastable=[False, False, False])()
    v = gpuarray.zeros((3, 4, 5), dtype='float32')