Added method to convert from/to cudamat gpu matrix and cudandarray.

715f193d · Frederic Bastien · 72abdcb7 · 715f193d · 715f193d
--- a/theano/misc/cudamat_utils.py
+++ b/theano/misc/cudamat_utils.py
+"""
+This code can only work if cudamat and theano are initialized on the
+same gpu as theano.
+WARNING: In the test of this file there is a transpose that is used...
+         So there can be problem with shape and stride order...
+"""
+try:
+    import cudamat
+    cudamat_available = True
+    import theano.sandbox.cuda as cuda
+    if cuda.cuda_available == False:
+        raise ImportError('Optional theano package cuda disabled')
+    def cudandarray_to_cudamat(x, copyif=False):
+        """ take a CudaNdarray and return a cudamat.CUDAMatrix object.
+        :type x: CudaNdarray
+        :param x: The array to transform to cudamat.CUDAMatrix.
+        :type copyif: bool
+        :param copyif: If False, raise an error if x is not c contiguous.
+                       If it is c contiguous, we return a GPUArray that share
+                       the same memory region as x.
+                       If True, copy x if it is no c contiguous, so the return won't
+                       shape the same memory region. If c contiguous, the return
+                       will share the same memory region.
+                       We need to do this as GPUArray don't fully support strided memory.
+        :return type: cudamat.CUDAMatrix
+        """
+        if not isinstance(x, cuda.CudaNdarray):
+            raise ValueError("We can transfer only CudaNdarray to cudamat.CUDAMatrix")
+        elif x.ndim!=2:
+            raise TypeError("cudandarray_to_cudamat: input must be 2-d (has %s dims). That's "
+                            "because cudamat arrays are always 2-dimensional")
+        else:
+            # Check if it is c contiguous
+            size = 1
+            c_contiguous = True
+            for i in range(x.ndim-1, -1, -1):
+                if x.shape[i] == 1:
+                    continue
+                if x._strides[i] != size:
+                    c_contiguous = False
+                    break
+                size *= x.shape[i]
+            if not c_contiguous:
+                if copyif:
+                    x = x.copy()
+                else:
+                    raise ValueError("We where asked to don't copy memory, but the memory is not c contiguous.")
+            # Now x is always c contiguous.
+            # the next step is to create a CUDAMatrix object. We do so by first creating
+            # a cudamat object with no data_host.
+            cm_mat = cudamat.cudamat()
+            cm_mat.size[0] = x.shape[0]
+            cm_mat.size[1] = x.shape[1]
+            cm_mat.on_host = 0
+            cm_mat.on_device = 1
+            cm_mat.is_trans = 0
+            cm_mat.owns_data = 0 # <-- note: cm_mat dosen't owe the data; x does. So x will delete it.
+            # x.gpudata is a long. We need a pointer to a float. cast.
+            import ctypes
+            cm_mat.data_device = ctypes.cast(x.gpudata, ctypes.POINTER(ctypes.c_float))
+            px = cudamat.CUDAMatrix(cm_mat)
+            px._base = x # x won't be __del__'ed as long as px is around.
+            px.mat_on_host = False # let cudamat know that we don't have a numpy
+                                   # array attached.
+            return px
+    def cudamat_to_cudandarray(x):
+        """ take a cudamat.CUDAMatrix and make a CudaNdarray that point to its memory
+        """
+        if not isinstance(x, cudamat.CUDAMatrix):
+            raise ValueError("We can transfer only cudamat.CUDAMatrix to CudaNdarray")
+        # elif x.dtype != "float32":
+        #     raise ValueError("CudaNdarray support only float32")
+        # We don't need this, because cudamat is always float32.
+        else:
+            strides = [1]
+            for i in x.shape[::-1][:-1]:
+                strides.append(strides[-1]*i)
+            strides = tuple(strides[::-1])
+            import ctypes
+            ptr_long = long(ctypes.cast(x.mat.data_device, ctypes.c_void_p).value)
+            # seems legit.
+            z = cuda.from_gpu_pointer(ptr_long, x.shape, strides, x)
+            return z
+except (ImportError, OSError):
+    cudamat_available = False
--- a/theano/misc/tests/test_cudamat_utils.py
+++ b/theano/misc/tests/test_cudamat_utils.py
+import numpy
+import theano
+from theano.misc.cudamat_utils import cudamat_available
+if not cudamat_available:
+    from nose.plugins.skip import SkipTest
+    raise SkipTest("gnumpy not installed. Skip test of theano op with pycuda code.")
+from theano.misc.cudamat_utils import cudandarray_to_cudamat, cudamat_to_cudandarray
+def test(shape=(3,4)):
+    """
+Make sure that the cudamat conversion is exact.
+"""
+    gpu = theano.sandbox.cuda.basic_ops.gpu_from_host
+    U = gpu(theano.tensor.fmatrix('U'))
+    ii = theano.function([U], gpu(U+1))
+    A_cpu = numpy.asarray(numpy.random.rand(*shape), dtype="float32")
+    A_cnd = theano.sandbox.cuda.CudaNdarray(A_cpu)
+    A_cmat = cudandarray_to_cudamat(A_cnd)
+    B_cnd = cudamat_to_cudandarray(A_cmat)
+    B_cnd = ii(A_cnd)
+    u = A_cnd.copy()
+    u += theano.sandbox.cuda.CudaNdarray(numpy.asarray([[1]],dtype='float32'))
+    u = numpy.asarray(u)
+    v = numpy.asarray(B_cnd)
+    w = A_cmat.add(1).asarray()
+    assert abs(u-v).max() == 0
+    assert abs(u-w.T.reshape(u.shape)).max() == 0