提交 a81457e8 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Get rid of transfers to/from the old backend.

上级 0af21f1d
...@@ -332,236 +332,6 @@ class GpuFromHost(Op): ...@@ -332,236 +332,6 @@ class GpuFromHost(Op):
gpu_from_host = GpuFromHost() gpu_from_host = GpuFromHost()
class GpuFromCuda(Op):
view_map = {0: [0]}
__props__ = ()
def make_node(self, x):
from theano.sandbox.cuda import CudaNdarrayType
if not isinstance(x.type, CudaNdarrayType):
raise TypeError(x)
return Apply(self, [x], [GpuArrayType(broadcastable=x.broadcastable,
dtype=x.dtype)()])
def perform(self, node, inp, out):
x, = inp
z, = out
z[0] = gpuarray.array(numpy.asarray(x))
def grad(self, inputs, grads):
gz, = grads
return [cuda_from_gpu(gz)]
def R_op(self, inputs, eval_points):
ev, = eval_points
if isinstance(ev, GpuArrayType):
return [cuda_from_gpu(ev)]
else:
return ev
def infer_shape(self, node, xshp):
return xshp
def c_headers(self):
return ['<cuda_ndarray.cuh>', '<gpuarray/extension.h>',
'<gpuarray/types.h>', '<cuda.h>']
def c_header_dirs(self):
import cuda_ndarray
ret = [os.path.dirname(cuda_ndarray.__file__)]
cuda_root = config.cuda.root
if cuda_root:
ret.append(os.path.join(cuda_root, 'include'))
return ret
def c_lib_dirs(self):
import cuda_ndarray
ret = [os.path.dirname(cuda_ndarray.__file__)]
cuda_root = config.cuda.root
if cuda_root:
ret.append(os.path.join(cuda_root, 'lib'))
return ret
def c_libraries(self):
return ['cudart', 'cublas', 'cuda']
def c_support_code(self):
return """
CUcontext (*cuda_get_ctx)(void *ctx);
gpudata *(*cuda_make_buf)(void *c, CUdeviceptr p, size_t sz);
"""
def c_init_code(self):
return ['cuda_get_ctx = (CUcontext (*)(void *))gpuarray_get_extension("cuda_get_ctx");',
'cuda_make_buf = (gpudata *(*)(void *, CUdeviceptr, size_t))gpuarray_get_extension("cuda_make_buf");']
def c_code(self, node, name, inputs, outputs, sub):
return """
int %(name)serr;
gpudata *%(name)sdata;
CUcontext %(name)scur;
size_t *%(name)sdims;
ssize_t *%(name)sstr;
cuCtxGetCurrent(&%(name)scur);
if (%(name)scur != cuda_get_ctx(pygpu_default_context()->ctx)) {
PyErr_SetString(PyExc_ValueError, "Ambient cuda context is not the same as output context.");
%(fail)s
}
%(name)sdims = (size_t *)calloc(%(in)s->nd, sizeof(size_t));
if (%(name)sdims == NULL) {
PyErr_SetString(PyExc_MemoryError, "Can't allocate dimensions.");
%(fail)s
}
%(name)sstr = (ssize_t *)calloc(%(in)s->nd, sizeof(ssize_t));
if (%(name)sstr == NULL) {
free(%(name)sdims);
PyErr_SetString(PyExc_MemoryError, "Can't allocate strides.");
%(fail)s
}
for (unsigned int i = 0; i < %(in)s->nd; i++) {
%(name)sdims[i] = (size_t)CudaNdarray_HOST_DIMS(%(in)s)[i];
%(name)sstr[i] = (ssize_t)CudaNdarray_HOST_STRIDES(%(in)s)[i]*4;
}
%(name)sdata = cuda_make_buf(pygpu_default_context()->ctx,
(CUdeviceptr)%(in)s->devdata,
((size_t)%(in)s->data_allocated)*4);
if (%(name)sdata == NULL) {
Py_DECREF(%(out)s);
free(%(name)sdims);
free(%(name)sstr);
PyErr_SetString(PyExc_MemoryError, "Could not allocate gpudata structure.");
%(fail)s
}
Py_XDECREF(%(out)s);
%(out)s = pygpu_fromgpudata(%(name)sdata, 0, GA_FLOAT, %(in)s->nd,
%(name)sdims, %(name)sstr,
pygpu_default_context(), 1,
(PyObject *)%(in)s,
(PyObject *)&PyGpuArrayType);
pygpu_default_context()->ops->buffer_release(%(name)sdata);
free(%(name)sdims);
free(%(name)sstr);
if (%(out)s == NULL) {
%(fail)s
}
""" % {'name': name, 'in': inputs[0], 'out': outputs[0],
'fail': sub['fail']}
def c_code_cache_version(self):
return (5,)
gpu_from_cuda = GpuFromCuda()
class CudaFromGpu(Op):
view_map = {0: [0]}
__props__ = ()
def make_node(self, x):
from theano.sandbox.cuda import CudaNdarrayType
if not isinstance(x.type, GpuArrayType):
raise TypeError(x)
if x.type.dtype != 'float32':
raise TypeError(x)
return Apply(self, [x], [CudaNdarrayType(broadcastable=x.broadcastable)()])
def perform(self, node, inp, out):
from theano.sandbox.cuda import filter as cuda_filter
x, = inp
z, = out
z[0] = cuda_filter(theano._asarray(x, dtype='float32'),
tuple([0] * x.ndim), 0, z[0])
def grad(self, inputs, grads):
gz, = grads
return [gpu_from_cuda(gz)]
def R_op(self, inputs, eval_points):
from theano.sandbox.cuda import CudaNdarrayType
ev, = eval_points
if (isinstance(ev, CudaNdarrayType)):
return [gpu_from_cuda(ev)]
else:
return [ev]
def infer_shape(self, node, shp):
return shp
def c_headers(self):
return ['<cuda_ndarray.cuh>', '<gpuarray/extension.h>', '<cuda.h>']
def c_header_dirs(self):
import cuda_ndarray
ret = [os.path.dirname(cuda_ndarray.__file__)]
cuda_root = config.cuda.root
if cuda_root:
ret.append(os.path.join(cuda_root, 'include'))
return ret
def c_lib_dirs(self):
import cuda_ndarray
ret = [os.path.dirname(cuda_ndarray.__file__)]
cuda_root = config.cuda.root
if cuda_root:
ret.append(os.path.join(cuda_root, 'lib'))
return ret
def c_libraries(self):
return ['cudart', 'cublas', 'cuda']
def c_support_code(self):
return """
CUcontext (*cuda_get_ctx)(void *ctx);
CUdeviceptr (*cuda_get_ptr)(gpudata *g);
"""
def c_init_code(self):
return ['cuda_get_ctx = (CUcontext (*)(void *ctx))gpuarray_get_extension("cuda_get_ctx");',
'cuda_get_ptr = (CUdeviceptr (*)(gpudata *g))gpuarray_get_extension("cuda_get_ptr");']
def c_code(self, node, name, inputs, outputs, sub):
return """
int %(name)serr = 0, %(name)si;
CUcontext %(name)scur;
cuCtxGetCurrent(&%(name)scur);
if (%(name)scur != cuda_get_ctx(pygpu_default_context()->ctx)) {
PyErr_SetString(PyExc_ValueError, "Ambient cuda context is not the same as output context.");
%(fail)s
}
if (GpuArray_sync(&%(inp)s->ga) != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Could not sync GpuArray");
%(fail)s
}
Py_XDECREF(%(out)s);
%(out)s = (CudaNdarray *)CudaNdarray_new_nd(%(inp)s->ga.nd);
if (!%(out)s) {
%(fail)s
}
for (%(name)si = 0; %(name)si < %(inp)s->ga.nd; %(name)si++) {
CudaNdarray_set_dim(%(out)s, %(name)si, %(inp)s->ga.dimensions[%(name)si]);
CudaNdarray_set_stride(%(out)s, %(name)si, %(inp)s->ga.strides[%(name)si]/4);
}
%(name)serr = CudaNdarray_set_device_data(%(out)s,
(float *)(((char *)cuda_get_ptr(%(inp)s->ga.data))+%(inp)s->ga.offset),
(PyObject *)%(inp)s);
if (%(name)serr) {
%(fail)s
}
""" % {'name': name, 'inp': inputs[0], 'out': outputs[0],
'fail': sub['fail']}
def c_code_cache_version(self):
return (3,)
cuda_from_gpu = CudaFromGpu()
class GpuAlloc(HideC, Alloc): class GpuAlloc(HideC, Alloc):
""" """
......
...@@ -42,8 +42,7 @@ from ..basic_ops import ( ...@@ -42,8 +42,7 @@ from ..basic_ops import (
host_from_gpu, gpu_from_host, host_from_gpu, gpu_from_host,
gpu_alloc, GpuAlloc, gpu_alloc, GpuAlloc,
GpuAllocEmpty, GpuAllocEmpty,
gpu_from_cuda, HostFromGpu,
cuda_from_gpu, HostFromGpu,
GpuContiguous, GpuContiguous,
GpuFromHost, GpuReshape, GpuFromHost, GpuReshape,
gpu_join, GpuJoin, GpuSplit, GpuEye, gpu_contiguous) gpu_join, GpuJoin, GpuSplit, GpuEye, gpu_contiguous)
...@@ -250,36 +249,6 @@ def test_transfer_strided(): ...@@ -250,36 +249,6 @@ def test_transfer_strided():
assert numpy.all(fv == av) assert numpy.all(fv == av)
@may_fail("Op fails if both contexts are not the same and it's rare "
"that the tests will be run this way", ValueError)
def test_transfer_cuda_gpu():
import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available is False:
raise SkipTest("Can't test interaction with cuda if cuda not present")
g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g')
c = cuda_ndarray.CudaNdarrayType((False, False))('c')
av = theano._asarray(rng.rand(5, 4), dtype='float32')
gv = gpuarray.array(av)
cv = cuda_ndarray.CudaNdarray(av)
gvs = gv[:, ::-2]
cvs = cv[:, ::-2]
f = theano.function([c], gpu_from_cuda(c))
fv = f(cv)
assert GpuArrayType.values_eq_approx(fv, gv)
fvs = f(cvs)
assert GpuArrayType.values_eq_approx(fvs, gvs)
f = theano.function([g], cuda_from_gpu(g))
fv = f(gv)
assert cuda_ndarray.CudaNdarrayType.values_eq_approx(fv, cv)
fvs = f(gvs)
assert cuda_ndarray.CudaNdarrayType.values_eq_approx(fvs, cvs)
def gpu_alloc_expected(x, *shp): def gpu_alloc_expected(x, *shp):
g = gpuarray.empty(shp, dtype=x.dtype) g = gpuarray.empty(shp, dtype=x.dtype)
g[:] = x g[:] = x
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论