提交 a48f8ac9 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix a host of problem in the cuda interoperation ops (since they were untested).

上级 03dfb4b9
import os
import numpy import numpy
import theano import theano
...@@ -166,7 +168,7 @@ class GpuFromHost(Op): ...@@ -166,7 +168,7 @@ class GpuFromHost(Op):
%(name)serr = GpuArray_empty(&%(out)s->ga, %(name)serr = GpuArray_empty(&%(out)s->ga,
GpuArray_default_context()->ops, GpuArray_default_context()->ops,
GpuArray_default_context()->ctx, GpuArray_default_context()->ctx,
get_typecode(PyArray_DESCR(%(name)s_tmp)), get_typecode((PyObject *)PyArray_DESCR(%(name)s_tmp)),
PyArray_NDIM(%(inp)s), PyArray_NDIM(%(inp)s),
(size_t *)PyArray_DIMS(%(inp)s), (size_t *)PyArray_DIMS(%(inp)s),
GA_C_ORDER); GA_C_ORDER);
...@@ -207,11 +209,11 @@ class GpuFromCuda(Op): ...@@ -207,11 +209,11 @@ class GpuFromCuda(Op):
return 'GpuFromCuda' return 'GpuFromCuda'
def make_node(self, x): def make_node(self, x):
from theano.sandbox.cuda import CudaNdArrayType from theano.sandbox.cuda import CudaNdarrayType
if not isinstance(x.type, CudaNdArrayType): if not isinstance(x.type, CudaNdarrayType):
raise TypeError(x) raise TypeError(x)
return Apply(self, [x], [GpuArrayType(broadcastable=x.broadcastable, return Apply(self, [x], [GpuArrayType(broadcastable=x.broadcastable,
dtype=x.dtype)]()) dtype=x.dtype)()])
def perform(self, node, inp, out): def perform(self, node, inp, out):
x, = inp x, = inp
...@@ -233,7 +235,8 @@ class GpuFromCuda(Op): ...@@ -233,7 +235,8 @@ class GpuFromCuda(Op):
return xshp return xshp
def c_headers(self): def c_headers(self):
return ['<cuda_ndarray.cuh>', '<compyte/extension.h>', '<cuda.h>'] return ['<cuda_ndarray.cuh>', '<compyte/extension.h>',
'<compyte/types.h>', '<cuda.h>']
def c_header_dirs(self): def c_header_dirs(self):
import cuda_ndarray import cuda_ndarray
...@@ -260,21 +263,25 @@ class GpuFromCuda(Op): ...@@ -260,21 +263,25 @@ class GpuFromCuda(Op):
def c_support_code(self): def c_support_code(self):
return """ return """
CUcontext (*cuda_get_ctx)(void *ctx) = compyte_get_extension('cuda_get_ctx'); CUcontext (*cuda_get_ctx)(void *ctx);
gpudata *(*cuda_make_buf)(void *c, CUdeviceptr p, size_t sz) = compyte_get_extension('cuda_make_buf'); gpudata *(*cuda_make_buf)(void *c, CUdeviceptr p, size_t sz);
""" """
def c_code(self, node, name, input, output, sub): def c_init_code(self):
return ['cuda_get_ctx = (CUcontext (*)(void *))compyte_get_extension("cuda_get_ctx");',
'cuda_make_buf = (gpudata *(*)(void *, CUdeviceptr, size_t))compyte_get_extension("cuda_make_buf");']
def c_code(self, node, name, inputs, outputs, sub):
return """ return """
int %(name)serr; int %(name)serr;
gpudata *%(name)sdata; gpudata *%(name)sdata;
CUcontext *%(name)scur; CUcontext %(name)scur;
size_t *%(name)sdims; size_t *%(name)sdims;
ssize_t *%(name)sstr; ssize_t *%(name)sstr;
cuCtxGetCurrent(&%(name)scur); cuCtxGetCurrent(&%(name)scur);
if (%(name)scur != cuda_get_ctx(GpuArray_default_context()->ctx)) { if (%(name)scur != cuda_get_ctx(GpuArray_default_context()->ctx)) {
PyErr_SetString(PyErr_ValueError, "Ambient cuda context is not the same as output context."); PyErr_SetString(PyExc_ValueError, "Ambient cuda context is not the same as output context.");
%(fail)s %(fail)s
} }
%(name)sdims = (size_t *)calloc(%(in)s->nd, sizeof(size_t)); %(name)sdims = (size_t *)calloc(%(in)s->nd, sizeof(size_t));
...@@ -290,12 +297,12 @@ class GpuFromCuda(Op): ...@@ -290,12 +297,12 @@ class GpuFromCuda(Op):
} }
for (unsigned int i = 0; i < %(in)s->nd; i++) { for (unsigned int i = 0; i < %(in)s->nd; i++) {
%(name)sdims[i] = (size_t)CudaNdArray_HOST_DIMS(%(in)s)[i]; %(name)sdims[i] = (size_t)CudaNdarray_HOST_DIMS(%(in)s)[i];
%(name)sstr[i] = (ssize_t)CudaNdArray_HOST_STRIDES(%(in)s)[i]; %(name)sstr[i] = (ssize_t)CudaNdarray_HOST_STRIDES(%(in)s)[i];
} }
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
%(out)s = new_GpuArray((PyObject *)&GpuArrayType); %(out)s = new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context());
if (%(out)s == NULL) { if (%(out)s == NULL) {
free(%(name)sdims); free(%(name)sdims);
free(%(name)sstr); free(%(name)sstr);
...@@ -324,7 +331,7 @@ class GpuFromCuda(Op): ...@@ -324,7 +331,7 @@ class GpuFromCuda(Op):
%(fail)s %(fail)s
} }
Py_INCREF(%(in)s); Py_INCREF(%(in)s);
%(out)s->base = %(in)s; %(out)s->base = (PyObject *)%(in)s;
""" % {'name':name, 'in': inputs[0], 'out': outputs[0], """ % {'name':name, 'in': inputs[0], 'out': outputs[0],
'fail': sub['fail']} 'fail': sub['fail']}
...@@ -347,11 +354,12 @@ class CudaFromGpu(Op): ...@@ -347,11 +354,12 @@ class CudaFromGpu(Op):
return 'CudaFromGpu' return 'CudaFromGpu'
def make_node(self, x): def make_node(self, x):
from theano.sandbox.cuda import CudaNdArrayType from theano.sandbox.cuda import CudaNdarrayType
if not isinstance(x.type, GpuArrayType): if not isinstance(x.type, GpuArrayType):
raise TypeError(x) raise TypeError(x)
return Apply(self, [x], [CudaNdarrayType(broadcastable=x.broadcastable, if x.type.dtype != 'float32':
dtype=x.dtype)()]) raise TypeError(x)
return Apply(self, [x], [CudaNdarrayType(broadcastable=x.broadcastable)()])
def perform(self, node, inp, out): def perform(self, node, inp, out):
from theano.sandbox.cuda import filter as cuda_filter from theano.sandbox.cuda import filter as cuda_filter
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论