提交 bae2a093 authored 作者: Frederic's avatar Frederic

fix test in float32, but also check float64 at the same time.

上级 f948f053
...@@ -18,7 +18,7 @@ else: ...@@ -18,7 +18,7 @@ else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
def freemem(): def freemem(extra_alloc=0):
""" """
Return the free memory on the gpu in megabytes. Return the free memory on the gpu in megabytes.
""" """
...@@ -30,15 +30,16 @@ def freemem(): ...@@ -30,15 +30,16 @@ def freemem():
if hasattr(cuda.cuda_ndarray.cuda_ndarray, "theano_allocated"): if hasattr(cuda.cuda_ndarray.cuda_ndarray, "theano_allocated"):
theano_alloc = cuda.cuda_ndarray.cuda_ndarray.theano_allocated() theano_alloc = cuda.cuda_ndarray.cuda_ndarray.theano_allocated()
return ("(n malloc/theano mem allocated in KB)", return ("(n malloc/theano mem allocated in KB)",
n_mallocs, int(theano_alloc / 1024)) n_mallocs + extra_alloc,
int(theano_alloc / 1024) + extra_size)
return ("n malloc on the gpu", n_mallocs) return ("n malloc on the gpu", n_mallocs + extra_alloc)
# I don't use the following by default as if there is other stuff running # I don't use the following by default as if there is other stuff running
# on the GPU, this won't work. # on the GPU, this won't work.
mem_info = cuda.cuda_ndarray.cuda_ndarray.mem_info() mem_info = cuda.cuda_ndarray.cuda_ndarray.mem_info()
gpu_used = (mem_info[1] - mem_info[0]) / 1024 ** 2 gpu_used = (mem_info[1] - mem_info[0]) / 1024 ** 2
mem_info_msg = "(n malloc/gpu mem used in MB)" mem_info_msg = "(n malloc/gpu mem used in MB)"
return ("(n malloc/gpu mem used in MB)", n_mallocs, int(gpu_used)) return (mem_info_msg, n_mallocs, int(gpu_used))
def test_memory(): def test_memory():
...@@ -52,49 +53,62 @@ def test_memory(): ...@@ -52,49 +53,62 @@ def test_memory():
note:: note::
This test can fail if there is other process running on the gpu. This test can fail if there is other process running on the gpu.
""" """
shapes = (6000, 5000) shapes = (200, 100)
test_params = np.asarray(np.random.randn(np.prod(shapes)), 'float32') # more_alloc1 and more_alloc2 is not the same for both dtype.
# when dtype is float32, the computation is done on the gpu.
some_vector = tensor.vector('some_vector') # This insert constant on the gpu during compilation
some_matrix = some_vector.reshape(shapes) # that raise the number of alloc.
# When dtype is float64, only the shared is on the gpu and it is transferd
mem1 = freemem() # to the cpu for computation. So no extra alloc after compilation.
print "Before shared variable", mem1 # more_alloc1 if after the first compilation, more_alloc2 after the second.
variables = cuda.shared_constructor(np.ones((shapes[1],), dtype='float32')) for dtype, more_alloc1, more_alloc2 in [("float32", 2, 9),
derp = tensor.sum(tensor.dot(some_matrix[:shapes[0]], variables)) ("float64", 0, 0)]:
print "Shared took ", np.prod(variables.get_value( print dtype
borrow=True, test_params = np.asarray(np.random.randn(np.prod(shapes)), dtype)
return_internal_type=True).shape) * 4 / 1024, "kB"
some_vector = tensor.vector('some_vector', dtype=dtype)
mem2 = freemem() some_matrix = some_vector.reshape(shapes)
print "Before compilation", mem2
obj = theano.function([some_vector], derp, mode=mode_with_gpu) mem1 = freemem()
mem3 = freemem() print "Before shared variable", mem1
print "After function compilation 1", mem3 variables = cuda.shared_constructor(np.ones((shapes[1],),
assert mem2 == mem3, (mem2, mem3) dtype='float32'))
derp = tensor.sum(tensor.dot(some_matrix[:shapes[0]], variables))
grad_derp = tensor.grad(derp, some_vector) print "Shared took ", np.prod(variables.get_value(
grad = theano.function([some_vector], grad_derp, mode=mode_with_gpu) borrow=True,
mem4 = freemem() return_internal_type=True).shape) * 4 / 1024, "kB"
print "After function compilation 2", mem4
assert mem2 == mem4, (mem2, mem4) mem2 = freemem()
print "Before compilation", mem2
for i in range(3): mem2_1 = freemem(extra_alloc=more_alloc1)
obj(test_params) mem2_2 = freemem(extra_alloc=more_alloc2)
print "After function evaluation 1", freemem() obj = theano.function([some_vector], derp, mode=mode_with_gpu)
mem3 = freemem()
print "After function compilation 1", mem3
assert mem2_1 == mem3, (mem2_1, mem3)
grad_derp = tensor.grad(derp, some_vector)
grad = theano.function([some_vector], grad_derp, mode=mode_with_gpu)
mem4 = freemem()
print "After function compilation 2", mem4
assert mem2_2 == mem4, (mem2_2, mem4)
for i in range(3):
obj(test_params)
print "After function evaluation 1", freemem()
assert mem2_2 == freemem(), (mem2_2, freemem())
grad(test_params)
print "After function evaluation 2", freemem()
assert mem2_2 == freemem(), (mem2_2, freemem())
del obj
#print "After deleting function 1", freemem()
#assert mem2 == freemem(), (mem2, freemem())
del grad
print "After deleting function 2", freemem()
assert mem2 == freemem(), (mem2, freemem()) assert mem2 == freemem(), (mem2, freemem())
grad(test_params)
print "After function evaluation 2", freemem()
assert mem2 == freemem(), (mem2, freemem())
del obj
print "After deleting function 1", freemem()
assert mem2 == freemem(), (mem2, freemem())
del grad
print "After deleting function 2", freemem()
assert mem2 == freemem(), (mem2, freemem())
del derp, variables, grad_derp del derp, variables, grad_derp
print "After deleting shared variable and ref to it", freemem() print "After deleting shared variable and ref to it", freemem()
assert mem1 == freemem(), (mem1, freemem()) assert mem1 == freemem(), (mem1, freemem())
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论