Merge pull request #296 from nouiz/fix_pycuda_test

Fix pycuda test

Merge pull request #296 from nouiz/fix_pycuda_test
177f1296 · Olivier Delalleau · 35baedd6 · d238fb07 · 177f1296 · 177f1296
--- a/doc/cifarSC2011/pyCUDA.txt
+++ b/doc/cifarSC2011/pyCUDA.txt
@@ -75,6 +75,8 @@ Exercise 6
 - Modify and execute it to work for a matrix of 20 x 10
+.. _pyCUDA_theano:
 Theano + PyCUDA
 ---------------

--- a/doc/tutorial/gpu_data_convert.txt
+++ b/doc/tutorial/gpu_data_convert.txt
@@ -33,8 +33,10 @@ Compiling with PyCUDA
 ---------------------
 You can use PyCUDA to compile some CUDA function that work directly on
-CudaNdarray. There is an example in the function `test_pycuda_simple` in
+CudaNdarray. There is an example in the function `test_pycuda_theano`
-the file `theano/misc/tests/test_pycuda_theano_simple.py`.
+in the file `theano/misc/tests/test_pycuda_theano_simple.py`. Also,
+there is an example that show how to make an op that call a pycuda
+function :ref:`here <pyCUDA_theano>`
 Theano op using PyCUDA function
 -------------------------------

--- a/theano/misc/tests/test_pycuda_theano_simple.py
+++ b/theano/misc/tests/test_pycuda_theano_simple.py
@@ -15,7 +15,8 @@ import theano.misc.pycuda_init
 if not theano.misc.pycuda_init.pycuda_available:
    from nose.plugins.skip import SkipTest
-    raise SkipTest("Pycuda not installed. Skip test of theano op with pycuda code.")
+    raise SkipTest("Pycuda not installed."
+                   " We skip test of theano op with pycuda code.")
 if cuda_ndarray.cuda_available == False:
    from nose.plugins.skip import SkipTest
@@ -26,9 +27,8 @@ import pycuda.driver as drv
 import pycuda.gpuarray
-def test_pycuda_simple():
+def test_pycuda_only():
-    x = cuda_ndarray.CudaNdarray.zeros((5,5))
+    """Run pycuda only example to test that pycuda work."""
    from pycuda.compiler import SourceModule
    mod = SourceModule("""
 __global__ void multiply_them(float *dest, float *a, float *b)
@@ -40,36 +40,63 @@ __global__ void multiply_them(float *dest, float *a, float *b)
    multiply_them = mod.get_function("multiply_them")
+    # Test with pycuda in/out of numpy.ndarray
    a = numpy.random.randn(100).astype(numpy.float32)
    b = numpy.random.randn(100).astype(numpy.float32)
    dest = numpy.zeros_like(a)
    multiply_them(
        drv.Out(dest), drv.In(a), drv.In(b),
-        block=(400,1,1), grid=(1,1))
+        block=(400, 1, 1), grid=(1, 1))
-    assert (dest==a*b).all()
+    assert (dest == a * b).all()
+def test_pycuda_theano():
+    """Simple example with pycuda function and Theano CudaNdarray object."""
+    from pycuda.compiler import SourceModule
+    mod = SourceModule("""
+__global__ void multiply_them(float *dest, float *a, float *b)
+{
+  const int i = threadIdx.x;
+  dest[i] = a[i] * b[i];
+}
+""")
+    multiply_them = mod.get_function("multiply_them")
+    a = numpy.random.randn(100).astype(numpy.float32)
+    b = numpy.random.randn(100).astype(numpy.float32)
+    # Test with Theano object
+    ga = cuda_ndarray.CudaNdarray(a)
+    gb = cuda_ndarray.CudaNdarray(b)
+    dest = cuda_ndarray.CudaNdarray.zeros(a.shape)
+    multiply_them(dest, ga, gb,
+                  block=(400, 1, 1), grid=(1, 1))
+    assert (numpy.asarray(dest) == a * b).all()
 def test_pycuda_memory_to_theano():
    #Test that we can use the GpuArray memory space in pycuda in a CudaNdarray
-    y = pycuda.gpuarray.zeros((3,4,5), 'float32')
+    y = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
    print numpy.asarray(y)
-    print "gpuarray ref count before creating a CudaNdarray", sys.getrefcount(y)
+    print "gpuarray ref count before creating a CudaNdarray",
-    assert sys.getrefcount(y)==2
+    print sys.getrefcount(y)
+    assert sys.getrefcount(y) == 2
    rand = numpy.random.randn(*y.shape).astype(numpy.float32)
    cuda_rand = cuda_ndarray.CudaNdarray(rand)
    strides = [1]
    for i in y.shape[::-1][:-1]:
-        strides.append(strides[-1]*i)
+        strides.append(strides[-1] * i)
    strides = tuple(strides[::-1])
    print 'strides', strides
    assert cuda_rand._strides == strides, (cuda_rand._strides, strides)
-    y_ptr = int(y.gpudata) # in pycuda trunk, y.ptr also works, which is a little cleaner
+    # in pycuda trunk, y.ptr also works, which is a little cleaner
+    y_ptr = int(y.gpudata)
    z = cuda_ndarray.from_gpu_pointer(y_ptr, y.shape, strides, y)
    print "gpuarray ref count after creating a CudaNdarray", sys.getrefcount(y)
-    assert sys.getrefcount(y)==3
+    assert sys.getrefcount(y) == 3
    assert (numpy.asarray(z) == 0).all()
    assert z.base is y
@@ -80,7 +107,8 @@ def test_pycuda_memory_to_theano():
    del zz
    assert sys.getrefcount(y) == 3
-    cuda_ones = cuda_ndarray.CudaNdarray(numpy.asarray([[[1]]],dtype='float32'))
+    cuda_ones = cuda_ndarray.CudaNdarray(numpy.asarray([[[1]]],
+                                                       dtype='float32'))
    z += cuda_ones
    assert (numpy.asarray(z) == numpy.ones(y.shape)).all()
    assert (numpy.asarray(z) == 1).all()
@@ -89,9 +117,10 @@ def test_pycuda_memory_to_theano():
    assert cuda_rand._strides == z._strides, (cuda_rand._strides, z._strides)
    assert (numpy.asarray(cuda_rand) == rand).all()
    z += cuda_rand
-    assert (numpy.asarray(z)==(rand+1)).all()
+    assert (numpy.asarray(z) == (rand + 1)).all()
    # Check that the ref count to the gpuarray is right.
    del z
-    print "gpuarray ref count after deleting the CudaNdarray", sys.getrefcount(y)
+    print "gpuarray ref count after deleting the CudaNdarray",
-    assert sys.getrefcount(y)==2
+    print sys.getrefcount(y)
+    assert sys.getrefcount(y) == 2