提交 177f1296 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merge pull request #296 from nouiz/fix_pycuda_test

Fix pycuda test
......@@ -75,6 +75,8 @@ Exercise 6
- Modify and execute it to work for a matrix of 20 x 10
.. _pyCUDA_theano:
Theano + PyCUDA
---------------
......
......@@ -33,8 +33,10 @@ Compiling with PyCUDA
---------------------
You can use PyCUDA to compile some CUDA function that work directly on
CudaNdarray. There is an example in the function `test_pycuda_simple` in
the file `theano/misc/tests/test_pycuda_theano_simple.py`.
CudaNdarray. There is an example in the function `test_pycuda_theano`
in the file `theano/misc/tests/test_pycuda_theano_simple.py`. Also,
there is an example that show how to make an op that call a pycuda
function :ref:`here <pyCUDA_theano>`
Theano op using PyCUDA function
-------------------------------
......
......@@ -15,7 +15,8 @@ import theano.misc.pycuda_init
if not theano.misc.pycuda_init.pycuda_available:
from nose.plugins.skip import SkipTest
raise SkipTest("Pycuda not installed. Skip test of theano op with pycuda code.")
raise SkipTest("Pycuda not installed."
" We skip test of theano op with pycuda code.")
if cuda_ndarray.cuda_available == False:
from nose.plugins.skip import SkipTest
......@@ -26,9 +27,8 @@ import pycuda.driver as drv
import pycuda.gpuarray
def test_pycuda_simple():
x = cuda_ndarray.CudaNdarray.zeros((5,5))
def test_pycuda_only():
"""Run pycuda only example to test that pycuda work."""
from pycuda.compiler import SourceModule
mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
......@@ -40,36 +40,63 @@ __global__ void multiply_them(float *dest, float *a, float *b)
multiply_them = mod.get_function("multiply_them")
# Test with pycuda in/out of numpy.ndarray
a = numpy.random.randn(100).astype(numpy.float32)
b = numpy.random.randn(100).astype(numpy.float32)
dest = numpy.zeros_like(a)
multiply_them(
drv.Out(dest), drv.In(a), drv.In(b),
block=(400,1,1), grid=(1,1))
assert (dest==a*b).all()
block=(400, 1, 1), grid=(1, 1))
assert (dest == a * b).all()
def test_pycuda_theano():
"""Simple example with pycuda function and Theano CudaNdarray object."""
from pycuda.compiler import SourceModule
mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
const int i = threadIdx.x;
dest[i] = a[i] * b[i];
}
""")
multiply_them = mod.get_function("multiply_them")
a = numpy.random.randn(100).astype(numpy.float32)
b = numpy.random.randn(100).astype(numpy.float32)
# Test with Theano object
ga = cuda_ndarray.CudaNdarray(a)
gb = cuda_ndarray.CudaNdarray(b)
dest = cuda_ndarray.CudaNdarray.zeros(a.shape)
multiply_them(dest, ga, gb,
block=(400, 1, 1), grid=(1, 1))
assert (numpy.asarray(dest) == a * b).all()
def test_pycuda_memory_to_theano():
#Test that we can use the GpuArray memory space in pycuda in a CudaNdarray
y = pycuda.gpuarray.zeros((3,4,5), 'float32')
y = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
print numpy.asarray(y)
print "gpuarray ref count before creating a CudaNdarray", sys.getrefcount(y)
assert sys.getrefcount(y)==2
print "gpuarray ref count before creating a CudaNdarray",
print sys.getrefcount(y)
assert sys.getrefcount(y) == 2
rand = numpy.random.randn(*y.shape).astype(numpy.float32)
cuda_rand = cuda_ndarray.CudaNdarray(rand)
strides = [1]
for i in y.shape[::-1][:-1]:
strides.append(strides[-1]*i)
strides.append(strides[-1] * i)
strides = tuple(strides[::-1])
print 'strides', strides
assert cuda_rand._strides == strides, (cuda_rand._strides, strides)
y_ptr = int(y.gpudata) # in pycuda trunk, y.ptr also works, which is a little cleaner
# in pycuda trunk, y.ptr also works, which is a little cleaner
y_ptr = int(y.gpudata)
z = cuda_ndarray.from_gpu_pointer(y_ptr, y.shape, strides, y)
print "gpuarray ref count after creating a CudaNdarray", sys.getrefcount(y)
assert sys.getrefcount(y)==3
assert sys.getrefcount(y) == 3
assert (numpy.asarray(z) == 0).all()
assert z.base is y
......@@ -80,7 +107,8 @@ def test_pycuda_memory_to_theano():
del zz
assert sys.getrefcount(y) == 3
cuda_ones = cuda_ndarray.CudaNdarray(numpy.asarray([[[1]]],dtype='float32'))
cuda_ones = cuda_ndarray.CudaNdarray(numpy.asarray([[[1]]],
dtype='float32'))
z += cuda_ones
assert (numpy.asarray(z) == numpy.ones(y.shape)).all()
assert (numpy.asarray(z) == 1).all()
......@@ -89,9 +117,10 @@ def test_pycuda_memory_to_theano():
assert cuda_rand._strides == z._strides, (cuda_rand._strides, z._strides)
assert (numpy.asarray(cuda_rand) == rand).all()
z += cuda_rand
assert (numpy.asarray(z)==(rand+1)).all()
assert (numpy.asarray(z) == (rand + 1)).all()
# Check that the ref count to the gpuarray is right.
del z
print "gpuarray ref count after deleting the CudaNdarray", sys.getrefcount(y)
assert sys.getrefcount(y)==2
print "gpuarray ref count after deleting the CudaNdarray",
print sys.getrefcount(y)
assert sys.getrefcount(y) == 2
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论