Get rid of transfers to/from the old backend.

a81457e8 · Arnaud Bergeron · 0af21f1d · a81457e8 · a81457e8
--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -332,236 +332,6 @@ class GpuFromHost(Op):
 gpu_from_host = GpuFromHost()
-class GpuFromCuda(Op):
-    view_map = {0: [0]}
-    __props__ = ()
-    def make_node(self, x):
-        from theano.sandbox.cuda import CudaNdarrayType
-        if not isinstance(x.type, CudaNdarrayType):
-            raise TypeError(x)
-        return Apply(self, [x], [GpuArrayType(broadcastable=x.broadcastable,
-                                              dtype=x.dtype)()])
-    def perform(self, node, inp, out):
-        x, = inp
-        z, = out
-        z[0] = gpuarray.array(numpy.asarray(x))
-    def grad(self, inputs, grads):
-        gz, = grads
-        return [cuda_from_gpu(gz)]
-    def R_op(self, inputs, eval_points):
-        ev, = eval_points
-        if isinstance(ev, GpuArrayType):
-            return [cuda_from_gpu(ev)]
-        else:
-            return ev
-    def infer_shape(self, node, xshp):
-        return xshp
-    def c_headers(self):
-        return ['<cuda_ndarray.cuh>', '<gpuarray/extension.h>',
-                '<gpuarray/types.h>', '<cuda.h>']
-    def c_header_dirs(self):
-        import cuda_ndarray
-        ret = [os.path.dirname(cuda_ndarray.__file__)]
-        cuda_root = config.cuda.root
-        if cuda_root:
-            ret.append(os.path.join(cuda_root, 'include'))
-        return ret
-    def c_lib_dirs(self):
-        import cuda_ndarray
-        ret = [os.path.dirname(cuda_ndarray.__file__)]
-        cuda_root = config.cuda.root
-        if cuda_root:
-            ret.append(os.path.join(cuda_root, 'lib'))
-        return ret
-    def c_libraries(self):
-        return ['cudart', 'cublas', 'cuda']
-    def c_support_code(self):
-        return """
-        CUcontext (*cuda_get_ctx)(void *ctx);
-        gpudata *(*cuda_make_buf)(void *c, CUdeviceptr p, size_t sz);
-        """
-    def c_init_code(self):
-        return ['cuda_get_ctx = (CUcontext (*)(void *))gpuarray_get_extension("cuda_get_ctx");',
-                'cuda_make_buf = (gpudata *(*)(void *, CUdeviceptr, size_t))gpuarray_get_extension("cuda_make_buf");']
-    def c_code(self, node, name, inputs, outputs, sub):
-        return """
-        int %(name)serr;
-        gpudata *%(name)sdata;
-        CUcontext %(name)scur;
-        size_t *%(name)sdims;
-        ssize_t *%(name)sstr;
-        cuCtxGetCurrent(&%(name)scur);
-        if (%(name)scur != cuda_get_ctx(pygpu_default_context()->ctx)) {
-            PyErr_SetString(PyExc_ValueError, "Ambient cuda context is not the same as output context.");
-            %(fail)s
-        }
-        %(name)sdims = (size_t *)calloc(%(in)s->nd, sizeof(size_t));
-        if (%(name)sdims == NULL) {
-            PyErr_SetString(PyExc_MemoryError, "Can't allocate dimensions.");
-            %(fail)s
-        }
-        %(name)sstr = (ssize_t *)calloc(%(in)s->nd, sizeof(ssize_t));
-        if (%(name)sstr == NULL) {
-            free(%(name)sdims);
-            PyErr_SetString(PyExc_MemoryError, "Can't allocate strides.");
-            %(fail)s
-        }
-        for (unsigned int i = 0; i < %(in)s->nd; i++) {
-            %(name)sdims[i] = (size_t)CudaNdarray_HOST_DIMS(%(in)s)[i];
-            %(name)sstr[i] = (ssize_t)CudaNdarray_HOST_STRIDES(%(in)s)[i]*4;
-        }
-        %(name)sdata = cuda_make_buf(pygpu_default_context()->ctx,
-                                     (CUdeviceptr)%(in)s->devdata,
-                                     ((size_t)%(in)s->data_allocated)*4);
-        if (%(name)sdata == NULL) {
-            Py_DECREF(%(out)s);
-            free(%(name)sdims);
-            free(%(name)sstr);
-            PyErr_SetString(PyExc_MemoryError, "Could not allocate gpudata structure.");
-            %(fail)s
-        }
-        Py_XDECREF(%(out)s);
-        %(out)s = pygpu_fromgpudata(%(name)sdata, 0, GA_FLOAT, %(in)s->nd,
-                                    %(name)sdims, %(name)sstr,
-                                    pygpu_default_context(), 1,
-                                    (PyObject *)%(in)s,
-                                    (PyObject *)&PyGpuArrayType);
-        pygpu_default_context()->ops->buffer_release(%(name)sdata);
-        free(%(name)sdims);
-        free(%(name)sstr);
-        if (%(out)s == NULL) {
-            %(fail)s
-        }
-        """ % {'name': name, 'in': inputs[0], 'out': outputs[0],
-               'fail': sub['fail']}
-    def c_code_cache_version(self):
-        return (5,)
-gpu_from_cuda = GpuFromCuda()
-class CudaFromGpu(Op):
-    view_map = {0: [0]}
-    __props__ = ()
-    def make_node(self, x):
-        from theano.sandbox.cuda import CudaNdarrayType
-        if not isinstance(x.type, GpuArrayType):
-            raise TypeError(x)
-        if x.type.dtype != 'float32':
-            raise TypeError(x)
-        return Apply(self, [x], [CudaNdarrayType(broadcastable=x.broadcastable)()])
-    def perform(self, node, inp, out):
-        from theano.sandbox.cuda import filter as cuda_filter
-        x, = inp
-        z, = out
-        z[0] = cuda_filter(theano._asarray(x, dtype='float32'),
-                           tuple([0] * x.ndim), 0, z[0])
-    def grad(self, inputs, grads):
-        gz, = grads
-        return [gpu_from_cuda(gz)]
-    def R_op(self, inputs, eval_points):
-        from theano.sandbox.cuda import CudaNdarrayType
-        ev, = eval_points
-        if (isinstance(ev, CudaNdarrayType)):
-            return [gpu_from_cuda(ev)]
-        else:
-            return [ev]
-    def infer_shape(self, node, shp):
-        return shp
-    def c_headers(self):
-        return ['<cuda_ndarray.cuh>', '<gpuarray/extension.h>', '<cuda.h>']
-    def c_header_dirs(self):
-        import cuda_ndarray
-        ret = [os.path.dirname(cuda_ndarray.__file__)]
-        cuda_root = config.cuda.root
-        if cuda_root:
-            ret.append(os.path.join(cuda_root, 'include'))
-        return ret
-    def c_lib_dirs(self):
-        import cuda_ndarray
-        ret = [os.path.dirname(cuda_ndarray.__file__)]
-        cuda_root = config.cuda.root
-        if cuda_root:
-            ret.append(os.path.join(cuda_root, 'lib'))
-        return ret
-    def c_libraries(self):
-        return ['cudart', 'cublas', 'cuda']
-    def c_support_code(self):
-        return """
-        CUcontext (*cuda_get_ctx)(void *ctx);
-        CUdeviceptr (*cuda_get_ptr)(gpudata *g);
-        """
-    def c_init_code(self):
-        return ['cuda_get_ctx = (CUcontext (*)(void *ctx))gpuarray_get_extension("cuda_get_ctx");',
-                'cuda_get_ptr = (CUdeviceptr (*)(gpudata *g))gpuarray_get_extension("cuda_get_ptr");']
-    def c_code(self, node, name, inputs, outputs, sub):
-        return """
-        int %(name)serr = 0, %(name)si;
-        CUcontext %(name)scur;
-        cuCtxGetCurrent(&%(name)scur);
-        if (%(name)scur != cuda_get_ctx(pygpu_default_context()->ctx)) {
-            PyErr_SetString(PyExc_ValueError, "Ambient cuda context is not the same as output context.");
-            %(fail)s
-        }
-        if (GpuArray_sync(&%(inp)s->ga) != GA_NO_ERROR) {
-            PyErr_SetString(PyExc_RuntimeError, "Could not sync GpuArray");
-            %(fail)s
-        }
-        Py_XDECREF(%(out)s);
-        %(out)s = (CudaNdarray *)CudaNdarray_new_nd(%(inp)s->ga.nd);
-        if (!%(out)s) {
-            %(fail)s
-        }
-        for (%(name)si = 0; %(name)si < %(inp)s->ga.nd; %(name)si++) {
-            CudaNdarray_set_dim(%(out)s, %(name)si, %(inp)s->ga.dimensions[%(name)si]);
-            CudaNdarray_set_stride(%(out)s, %(name)si, %(inp)s->ga.strides[%(name)si]/4);
-        }
-        %(name)serr = CudaNdarray_set_device_data(%(out)s,
-          (float *)(((char *)cuda_get_ptr(%(inp)s->ga.data))+%(inp)s->ga.offset),
-                                          (PyObject *)%(inp)s);
-        if (%(name)serr) {
-           %(fail)s
-        }
-        """ % {'name': name, 'inp': inputs[0], 'out': outputs[0],
-               'fail': sub['fail']}
-    def c_code_cache_version(self):
-        return (3,)
-cuda_from_gpu = CudaFromGpu()
 class GpuAlloc(HideC, Alloc):
    """

--- a/theano/sandbox/gpuarray/tests/test_basic_ops.py
+++ b/theano/sandbox/gpuarray/tests/test_basic_ops.py
@@ -42,8 +42,7 @@ from ..basic_ops import (
    host_from_gpu, gpu_from_host,
    gpu_alloc, GpuAlloc,
    GpuAllocEmpty,
-    gpu_from_cuda,
+    HostFromGpu,
-    cuda_from_gpu, HostFromGpu,
    GpuContiguous,
    GpuFromHost, GpuReshape,
    gpu_join, GpuJoin, GpuSplit, GpuEye, gpu_contiguous)
@@ -250,36 +249,6 @@ def test_transfer_strided():
    assert numpy.all(fv == av)
-@may_fail("Op fails if both contexts are not the same and it's rare "
-          "that the tests will be run this way", ValueError)
-def test_transfer_cuda_gpu():
-    import theano.sandbox.cuda as cuda_ndarray
-    if cuda_ndarray.cuda_available is False:
-        raise SkipTest("Can't test interaction with cuda if cuda not present")
-    g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g')
-    c = cuda_ndarray.CudaNdarrayType((False, False))('c')
-    av = theano._asarray(rng.rand(5, 4), dtype='float32')
-    gv = gpuarray.array(av)
-    cv = cuda_ndarray.CudaNdarray(av)
-    gvs = gv[:, ::-2]
-    cvs = cv[:, ::-2]
-    f = theano.function([c], gpu_from_cuda(c))
-    fv = f(cv)
-    assert GpuArrayType.values_eq_approx(fv, gv)
-    fvs = f(cvs)
-    assert GpuArrayType.values_eq_approx(fvs, gvs)
-    f = theano.function([g], cuda_from_gpu(g))
-    fv = f(gv)
-    assert cuda_ndarray.CudaNdarrayType.values_eq_approx(fv, cv)
-    fvs = f(gvs)
-    assert cuda_ndarray.CudaNdarrayType.values_eq_approx(fvs, cvs)
 def gpu_alloc_expected(x, *shp):
    g = gpuarray.empty(shp, dtype=x.dtype)
    g[:] = x