Make a gc test with the ifelse op.

67448f81 · Frederic · 351efd7a · 67448f81
--- a/theano/sandbox/cuda/tests/test_memory.py
+++ b/theano/sandbox/cuda/tests/test_memory.py
@@ -5,6 +5,7 @@ import numpy as np
 import theano
 from theano import tensor
 from theano.sandbox import cuda
+from theano import ifelse
 # Skip test if cuda_ndarray is not available.
 from nose.plugins.skip import SkipTest
@@ -112,3 +113,65 @@ def test_memory():
        del derp, variables, grad_derp
        print "After deleting shared variable and ref to it", freemem()
        assert mem1 == freemem(), (mem1, freemem())
+def test_memory_lazy():
+    """As test_memory, but with the ifelse op.
+    We need to test it as the ifelse op with the [c]vm create op not
+    executed in the graph. This mess with [c]vm gc implementation.
+    """
+    shapes = (200, 100)
+    # more_alloc1 and more_alloc2 is not the same for both dtype.
+    # when dtype is float32, the computation is done on the gpu.
+    # This insert constant on the gpu during compilation
+    # that raise the number of alloc.
+    # When dtype is float64, only the shared is on the gpu and it is transferd
+    # to the cpu for computation. So no extra alloc after compilation.
+    # more_alloc1 if after the first compilation, more_alloc2 after the second.
+    for dtype, more_alloc1 in [("float32", 3),
+                               ("float64", 0)]:
+        print dtype
+        test_params = np.asarray(np.random.randn(np.prod(shapes)), dtype)
+        some_vector = tensor.vector('some_vector', dtype=dtype)
+        some_matrix = some_vector.reshape(shapes)
+        branch_select = tensor.iscalar()
+        mem1 = freemem()
+        print "Before shared variable", mem1
+        variables = cuda.shared_constructor(np.ones((shapes[1],),
+                                                    dtype='float32'))
+        derp = tensor.sum(tensor.dot(some_matrix[:shapes[0]], variables))
+        derp = ifelse.IfElse(1)(branch_select,
+                                derp, some_matrix[:shapes[0]].sum())
+        derp += 1
+        print "Shared took ", np.prod(variables.get_value(
+                borrow=True,
+                return_internal_type=True).shape) * 4 / 1024, "kB"
+        mem2 = freemem()
+        print "Before compilation", mem2
+        mem2_1 = freemem(extra_alloc=more_alloc1)
+        obj = theano.function([some_vector, branch_select], derp,
+                              mode=mode_with_gpu)
+        #theano.printing.debugprint(obj, print_type=True)
+        mem3 = freemem()
+        print "After function compilation 1", mem3
+        assert mem2_1 == mem3, (mem2_1, mem3)
+        for i in range(3):
+            obj(test_params, 1)
+            print "After function evaluation branch true", freemem()
+            assert mem2_1 == freemem(), (mem2_1, freemem())
+            obj(test_params, 0)
+            print "After function evaluation branch false", freemem()
+            assert mem2_1 == freemem(), (mem2_1, freemem())
+        del obj
+        print "After deleting function 1", freemem()
+        assert mem2 == freemem(), (mem2, freemem())
+        del derp, variables
+        print "After deleting shared variable and ref to it", freemem()
+        assert mem1 == freemem(), (mem1, freemem())