Added function to create pycuda.gpuarray.GPUArray from CudaNdarray and the other way.

6a2ee199 · Frederic Bastien · 53105ba7 · 6a2ee199 · 6a2ee199
--- a/theano/misc/pycuda_utils.py
+++ b/theano/misc/pycuda_utils.py
+import numpy
+import pycuda.gpuarray
+import theano.sandbox.cuda as cuda
+if cuda.cuda_available == False:
+    raise ImportError('Optional theano package cuda disabled')
+def to_gpuarray(x, copyif=False):
+    """ take a CudaNdarray and return a pycuda.gpuarray.GPUArray
+    :type x: CudaNdarray
+    :param x: The array to transform to pycuda.gpuarray.GPUArray.
+    :type copyif: bool
+    :param copyif: If False, raise an error if x is not c contiguous.
+                   If it is c contiguous, we return a GPUArray that share
+                   the same memory region as x.
+                   If True, copy x if it is no c contiguous, so the return won't
+                   shape the same memory region. If c contiguous, the return
+                   will share the same memory region.
+                   We need to do this as GPUArray don't fully support strided memory.
+    :return type: pycuda.gpuarray.GPUArray
+    """
+    if not isinstance(x, cuda.CudaNdarray):
+        raise ValueError("We can transfer only CudaNdarray to pycuda.gpuarray.GPUArray")
+    else:
+        # Check if it is c contiguous
+        size = 1
+        c_contiguous = True
+        for i in range(x.ndim-1, -1, -1):
+            if x.shape[i] == 1:
+                continue
+            if x._strides[i] != size:
+                c_contiguous = False
+                break
+            size *= x.shape[i]
+        if not c_contiguous:
+            if copyif:
+                x = x.copy()
+            else:
+                raise ValueError("We where asked to don't copy memory, but the memory is not c contiguous.")
+        # Now x is always c contiguous
+        px = pycuda.gpuarray.GPUArray(x.shape, x.dtype, base=x, gpudata=x.gpudata)
+        return px
+def to_cudandarray(x):
+    """ take a pycuda.gpuarray.GPUArray and make a CudaNdarray that point to its memory
+    :note: CudaNdarray support only float32, so only float32 GPUArray are accepted
+    """
+    if not isinstance(x, pycuda.gpuarray.GPUArray):
+        raise ValueError("We can transfer only pycuda.gpuarray.GPUArray to CudaNdarray")
+    elif x.dtype != "float32":
+        raise ValueError("CudaNdarray support only float32")
+    else:
+        strides = [1]
+        for i in x.shape[::-1][:-1]:
+            strides.append(strides[-1]*i)
+        strides = tuple(strides[::-1])
+        ptr = int(x.gpudata) # in pycuda trunk, y.ptr also works, which is a little cleaner
+        z = cuda.from_gpu_pointer(ptr, x.shape, strides, x)
+        return z
--- a/theano/misc/tests/test_pycuda_utils.py
+++ b/theano/misc/tests/test_pycuda_utils.py
+import numpy
+import theano.sandbox.cuda as cuda
+import theano.misc.pycuda_init
+from theano.misc.pycuda_utils import to_gpuarray, to_cudandarray
+if not theano.misc.pycuda_init.pycuda_available:
+    from nose.plugins.skip import SkipTest
+    raise SkipTest("Pycuda not installed. Skip test of theano op with pycuda code.")
+if cuda.cuda_available == False:
+    from nose.plugins.skip import SkipTest
+    raise SkipTest('Optional theano package cuda disabled')
+import pycuda.gpuarray
+def test_to_gpuarray():
+    cx = cuda.CudaNdarray.zeros((5,4))
+    px = to_gpuarray(cx)
+    assert isinstance(px, pycuda.gpuarray.GPUArray)
+    cx[0,0] = numpy.asarray(1, dtype="float32")
+    # Check that they share the same memory space
+    assert px.gpudata == cx.gpudata
+    assert numpy.asarray(cx[0,0]) == 1
+    assert numpy.allclose(numpy.asarray(cx), px.get())
+    assert px.dtype == cx.dtype
+    assert px.shape == cx.shape
+    assert all(numpy.asarray(cx._strides) * 4 == px.strides)
+    # Test when the CudaNdarray is strided
+    cx = cx[::2,::]
+    px = to_gpuarray(cx, copyif=True)
+    assert isinstance(px, pycuda.gpuarray.GPUArray)
+    cx[0,0] = numpy.asarray(2, dtype="float32")
+    # Check that they do not share the same memory space
+    assert px.gpudata != cx.gpudata
+    assert numpy.asarray(cx[0,0]) == 2
+    assert not numpy.allclose(numpy.asarray(cx), px.get())
+    assert px.dtype == cx.dtype
+    assert px.shape == cx.shape
+    assert not all(numpy.asarray(cx._strides) * 4 == px.strides)
+    # Test that we return an error
+    try:
+        px = to_gpuarray(cx)
+        assert False
+    except ValueError:
+        pass
+def test_to_cudandarray():
+    px = pycuda.gpuarray.zeros((3,4,5), 'float32')
+    cx = to_cudandarray(px)
+    assert isinstance(cx, cuda.CudaNdarray)
+    assert numpy.allclose(px.get(),
+                          numpy.asarray(cx))
+    assert px.dtype == cx.dtype
+    assert px.shape == cx.shape
+    assert all(numpy.asarray(cx._strides) * 4 == px.strides)
+    try:
+        px = pycuda.gpuarray.zeros((3,4,5), 'float64')
+        to_cudandarray(px)
+        assert False
+    except ValueError:
+        pass
+    try:
+        to_cudandarray(numpy.zeros(4))
+        assert False
+    except ValueError:
+        pass