提交 177f1296 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merge pull request #296 from nouiz/fix_pycuda_test

Fix pycuda test
...@@ -75,6 +75,8 @@ Exercise 6 ...@@ -75,6 +75,8 @@ Exercise 6
- Modify and execute it to work for a matrix of 20 x 10 - Modify and execute it to work for a matrix of 20 x 10
.. _pyCUDA_theano:
Theano + PyCUDA Theano + PyCUDA
--------------- ---------------
......
...@@ -33,8 +33,10 @@ Compiling with PyCUDA ...@@ -33,8 +33,10 @@ Compiling with PyCUDA
--------------------- ---------------------
You can use PyCUDA to compile some CUDA function that work directly on You can use PyCUDA to compile some CUDA function that work directly on
CudaNdarray. There is an example in the function `test_pycuda_simple` in CudaNdarray. There is an example in the function `test_pycuda_theano`
the file `theano/misc/tests/test_pycuda_theano_simple.py`. in the file `theano/misc/tests/test_pycuda_theano_simple.py`. Also,
there is an example that show how to make an op that call a pycuda
function :ref:`here <pyCUDA_theano>`
Theano op using PyCUDA function Theano op using PyCUDA function
------------------------------- -------------------------------
......
...@@ -15,7 +15,8 @@ import theano.misc.pycuda_init ...@@ -15,7 +15,8 @@ import theano.misc.pycuda_init
if not theano.misc.pycuda_init.pycuda_available: if not theano.misc.pycuda_init.pycuda_available:
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
raise SkipTest("Pycuda not installed. Skip test of theano op with pycuda code.") raise SkipTest("Pycuda not installed."
" We skip test of theano op with pycuda code.")
if cuda_ndarray.cuda_available == False: if cuda_ndarray.cuda_available == False:
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
...@@ -26,9 +27,8 @@ import pycuda.driver as drv ...@@ -26,9 +27,8 @@ import pycuda.driver as drv
import pycuda.gpuarray import pycuda.gpuarray
def test_pycuda_simple(): def test_pycuda_only():
x = cuda_ndarray.CudaNdarray.zeros((5,5)) """Run pycuda only example to test that pycuda work."""
from pycuda.compiler import SourceModule from pycuda.compiler import SourceModule
mod = SourceModule(""" mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b) __global__ void multiply_them(float *dest, float *a, float *b)
...@@ -40,36 +40,63 @@ __global__ void multiply_them(float *dest, float *a, float *b) ...@@ -40,36 +40,63 @@ __global__ void multiply_them(float *dest, float *a, float *b)
multiply_them = mod.get_function("multiply_them") multiply_them = mod.get_function("multiply_them")
# Test with pycuda in/out of numpy.ndarray
a = numpy.random.randn(100).astype(numpy.float32) a = numpy.random.randn(100).astype(numpy.float32)
b = numpy.random.randn(100).astype(numpy.float32) b = numpy.random.randn(100).astype(numpy.float32)
dest = numpy.zeros_like(a) dest = numpy.zeros_like(a)
multiply_them( multiply_them(
drv.Out(dest), drv.In(a), drv.In(b), drv.Out(dest), drv.In(a), drv.In(b),
block=(400,1,1), grid=(1,1)) block=(400, 1, 1), grid=(1, 1))
assert (dest==a*b).all() assert (dest == a * b).all()
def test_pycuda_theano():
"""Simple example with pycuda function and Theano CudaNdarray object."""
from pycuda.compiler import SourceModule
mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
const int i = threadIdx.x;
dest[i] = a[i] * b[i];
}
""")
multiply_them = mod.get_function("multiply_them")
a = numpy.random.randn(100).astype(numpy.float32)
b = numpy.random.randn(100).astype(numpy.float32)
# Test with Theano object
ga = cuda_ndarray.CudaNdarray(a)
gb = cuda_ndarray.CudaNdarray(b)
dest = cuda_ndarray.CudaNdarray.zeros(a.shape)
multiply_them(dest, ga, gb,
block=(400, 1, 1), grid=(1, 1))
assert (numpy.asarray(dest) == a * b).all()
def test_pycuda_memory_to_theano(): def test_pycuda_memory_to_theano():
#Test that we can use the GpuArray memory space in pycuda in a CudaNdarray #Test that we can use the GpuArray memory space in pycuda in a CudaNdarray
y = pycuda.gpuarray.zeros((3,4,5), 'float32') y = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
print numpy.asarray(y) print numpy.asarray(y)
print "gpuarray ref count before creating a CudaNdarray", sys.getrefcount(y) print "gpuarray ref count before creating a CudaNdarray",
assert sys.getrefcount(y)==2 print sys.getrefcount(y)
assert sys.getrefcount(y) == 2
rand = numpy.random.randn(*y.shape).astype(numpy.float32) rand = numpy.random.randn(*y.shape).astype(numpy.float32)
cuda_rand = cuda_ndarray.CudaNdarray(rand) cuda_rand = cuda_ndarray.CudaNdarray(rand)
strides = [1] strides = [1]
for i in y.shape[::-1][:-1]: for i in y.shape[::-1][:-1]:
strides.append(strides[-1]*i) strides.append(strides[-1] * i)
strides = tuple(strides[::-1]) strides = tuple(strides[::-1])
print 'strides', strides print 'strides', strides
assert cuda_rand._strides == strides, (cuda_rand._strides, strides) assert cuda_rand._strides == strides, (cuda_rand._strides, strides)
y_ptr = int(y.gpudata) # in pycuda trunk, y.ptr also works, which is a little cleaner # in pycuda trunk, y.ptr also works, which is a little cleaner
y_ptr = int(y.gpudata)
z = cuda_ndarray.from_gpu_pointer(y_ptr, y.shape, strides, y) z = cuda_ndarray.from_gpu_pointer(y_ptr, y.shape, strides, y)
print "gpuarray ref count after creating a CudaNdarray", sys.getrefcount(y) print "gpuarray ref count after creating a CudaNdarray", sys.getrefcount(y)
assert sys.getrefcount(y)==3 assert sys.getrefcount(y) == 3
assert (numpy.asarray(z) == 0).all() assert (numpy.asarray(z) == 0).all()
assert z.base is y assert z.base is y
...@@ -80,7 +107,8 @@ def test_pycuda_memory_to_theano(): ...@@ -80,7 +107,8 @@ def test_pycuda_memory_to_theano():
del zz del zz
assert sys.getrefcount(y) == 3 assert sys.getrefcount(y) == 3
cuda_ones = cuda_ndarray.CudaNdarray(numpy.asarray([[[1]]],dtype='float32')) cuda_ones = cuda_ndarray.CudaNdarray(numpy.asarray([[[1]]],
dtype='float32'))
z += cuda_ones z += cuda_ones
assert (numpy.asarray(z) == numpy.ones(y.shape)).all() assert (numpy.asarray(z) == numpy.ones(y.shape)).all()
assert (numpy.asarray(z) == 1).all() assert (numpy.asarray(z) == 1).all()
...@@ -89,9 +117,10 @@ def test_pycuda_memory_to_theano(): ...@@ -89,9 +117,10 @@ def test_pycuda_memory_to_theano():
assert cuda_rand._strides == z._strides, (cuda_rand._strides, z._strides) assert cuda_rand._strides == z._strides, (cuda_rand._strides, z._strides)
assert (numpy.asarray(cuda_rand) == rand).all() assert (numpy.asarray(cuda_rand) == rand).all()
z += cuda_rand z += cuda_rand
assert (numpy.asarray(z)==(rand+1)).all() assert (numpy.asarray(z) == (rand + 1)).all()
# Check that the ref count to the gpuarray is right. # Check that the ref count to the gpuarray is right.
del z del z
print "gpuarray ref count after deleting the CudaNdarray", sys.getrefcount(y) print "gpuarray ref count after deleting the CudaNdarray",
assert sys.getrefcount(y)==2 print sys.getrefcount(y)
assert sys.getrefcount(y) == 2
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论