fix test in float32, but also check float64 at the same time.

bae2a093 · Frederic · f948f053 · bae2a093
--- a/theano/sandbox/cuda/tests/test_memory.py
+++ b/theano/sandbox/cuda/tests/test_memory.py
@@ -18,7 +18,7 @@ else:
    mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')


-def freemem():
+def freemem(extra_alloc=0):
    """
    Return the free memory on the gpu in megabytes.
    """
@@ -30,15 +30,16 @@ def freemem():
    if hasattr(cuda.cuda_ndarray.cuda_ndarray, "theano_allocated"):
        theano_alloc = cuda.cuda_ndarray.cuda_ndarray.theano_allocated()
        return ("(n malloc/theano mem allocated in KB)",
-                n_mallocs, int(theano_alloc / 1024))
+                n_mallocs + extra_alloc,
+                int(theano_alloc / 1024) + extra_size)

-    return ("n malloc on the gpu", n_mallocs)
+    return ("n malloc on the gpu", n_mallocs + extra_alloc)
    # I don't use the following by default as if there is other stuff running
    # on the GPU, this won't work.
    mem_info = cuda.cuda_ndarray.cuda_ndarray.mem_info()
    gpu_used = (mem_info[1] - mem_info[0]) / 1024 ** 2
    mem_info_msg = "(n malloc/gpu mem used in MB)"
-    return ("(n malloc/gpu mem used in MB)", n_mallocs, int(gpu_used))
+    return (mem_info_msg, n_mallocs, int(gpu_used))


 def test_memory():
@@ -52,49 +53,62 @@ def test_memory():
    note::
        This test can fail if there is other process running on the gpu.
    """
-    shapes = (6000, 5000)
-    test_params = np.asarray(np.random.randn(np.prod(shapes)), 'float32')
-
-    some_vector = tensor.vector('some_vector')
-    some_matrix = some_vector.reshape(shapes)
-
-    mem1 = freemem()
-    print "Before shared variable", mem1
-    variables = cuda.shared_constructor(np.ones((shapes[1],), dtype='float32'))
-    derp = tensor.sum(tensor.dot(some_matrix[:shapes[0]], variables))
-    print "Shared took ", np.prod(variables.get_value(
-            borrow=True,
-            return_internal_type=True).shape) * 4 / 1024, "kB"
-
-    mem2 = freemem()
-    print "Before compilation", mem2
-    obj = theano.function([some_vector], derp, mode=mode_with_gpu)
-    mem3 = freemem()
-    print "After function compilation 1", mem3
-    assert mem2 == mem3, (mem2, mem3)
-
-    grad_derp = tensor.grad(derp, some_vector)
-    grad = theano.function([some_vector], grad_derp, mode=mode_with_gpu)
-    mem4 = freemem()
-    print "After function compilation 2", mem4
-    assert mem2 == mem4, (mem2, mem4)
-
-    for i in range(3):
-        obj(test_params)
-        print "After function evaluation 1", freemem()
+    shapes = (200, 100)
+    # more_alloc1 and more_alloc2 is not the same for both dtype.
+    # when dtype is float32, the computation is done on the gpu.
+    # This insert constant on the gpu during compilation
+    # that raise the number of alloc.
+    # When dtype is float64, only the shared is on the gpu and it is transferd
+    # to the cpu for computation. So no extra alloc after compilation.
+    # more_alloc1 if after the first compilation, more_alloc2 after the second.
+    for dtype, more_alloc1, more_alloc2 in [("float32", 2, 9),
+                                            ("float64", 0, 0)]:
+        print dtype
+        test_params = np.asarray(np.random.randn(np.prod(shapes)), dtype)
+
+        some_vector = tensor.vector('some_vector', dtype=dtype)
+        some_matrix = some_vector.reshape(shapes)
+
+        mem1 = freemem()
+        print "Before shared variable", mem1
+        variables = cuda.shared_constructor(np.ones((shapes[1],),
+                                                    dtype='float32'))
+        derp = tensor.sum(tensor.dot(some_matrix[:shapes[0]], variables))
+        print "Shared took ", np.prod(variables.get_value(
+                borrow=True,
+                return_internal_type=True).shape) * 4 / 1024, "kB"
+
+        mem2 = freemem()
+        print "Before compilation", mem2
+        mem2_1 = freemem(extra_alloc=more_alloc1)
+        mem2_2 = freemem(extra_alloc=more_alloc2)
+        obj = theano.function([some_vector], derp, mode=mode_with_gpu)
+        mem3 = freemem()
+        print "After function compilation 1", mem3
+        assert mem2_1 == mem3, (mem2_1, mem3)
+
+        grad_derp = tensor.grad(derp, some_vector)
+        grad = theano.function([some_vector], grad_derp, mode=mode_with_gpu)
+        mem4 = freemem()
+        print "After function compilation 2", mem4
+        assert mem2_2 == mem4, (mem2_2, mem4)
+
+        for i in range(3):
+            obj(test_params)
+            print "After function evaluation 1", freemem()
+            assert mem2_2 == freemem(), (mem2_2, freemem())
+            grad(test_params)
+            print "After function evaluation 2", freemem()
+            assert mem2_2 == freemem(), (mem2_2, freemem())
+
+        del obj
+        #print "After deleting function 1", freemem()
+        #assert mem2 == freemem(), (mem2, freemem())
+
+        del grad
+        print "After deleting function 2", freemem()
        assert mem2 == freemem(), (mem2, freemem())
-        grad(test_params)
-        print "After function evaluation 2", freemem()
-        assert mem2 == freemem(), (mem2, freemem())
-
-    del obj
-    print "After deleting function 1", freemem()
-    assert mem2 == freemem(), (mem2, freemem())
-
-    del grad
-    print "After deleting function 2", freemem()
-    assert mem2 == freemem(), (mem2, freemem())

-    del derp, variables, grad_derp
-    print "After deleting shared variable and ref to it", freemem()
-    assert mem1 == freemem(), (mem1, freemem())
+        del derp, variables, grad_derp
+        print "After deleting shared variable and ref to it", freemem()
+        assert mem1 == freemem(), (mem1, freemem())