提交 0a162810 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix problems found in testing.

上级 dba4873c
......@@ -118,30 +118,38 @@ dnn_available.msg = None
def c_set_tensor4d(var, desc, err, fail):
return """
{
cudnnDataType_t dt;
size_t ds;
switch (%(var)s->ga.typecode) {
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensor4d");
return -1;
}
ds = gpuarray_get_elsize(%(var)s->ga.typecode);
%(err)s = cudnnSetTensor4dDescriptorEx(
%(desc)s, CUDNN_DATA_FLOAT,
%(desc)s, dt,
PyGpuArray_DIMS(%(var)s)[0],
PyGpuArray_DIMS(%(var)s)[1],
PyGpuArray_DIMS(%(var)s)[2],
PyGpuArray_DIMS(%(var)s)[3],
PyGpuArray_STRIDES(%(var)s)[0],
PyGpuArray_STRIDES(%(var)s)[1],
PyGpuArray_STRIDES(%(var)s)[2],
PyGpuArray_STRIDES(%(var)s)[3]);
PyGpuArray_STRIDES(%(var)s)[0] / ds,
PyGpuArray_STRIDES(%(var)s)[1] / ds,
PyGpuArray_STRIDES(%(var)s)[2] / ds,
PyGpuArray_STRIDES(%(var)s)[3] / ds);
if (%(err)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"could not set tensor4d descriptor: %%s"
"shapes=%%d %%d %%d %%d strides=%%d %%d %%d %%d",
cudnnGetErrorString(%(err)s),
PyGpuArray_DIMS(%(var)s)[0],
PyGpuArray_DIMS(%(var)s)[1],
PyGpuArray_DIMS(%(var)s)[2],
PyGpuArray_DIMS(%(var)s)[3],
PyGpuArray_STRIDES(%(var)s)[0],
PyGpuArray_STRIDES(%(var)s)[1],
PyGpuArray_STRIDES(%(var)s)[2],
PyGpuArray_STRIDES(%(var)s)[3]);
"could not set tensor4d descriptor: %%s",
cudnnGetErrorString(%(err)s));
%(fail)s
}
}
......@@ -160,9 +168,9 @@ class DnnBase(COp):
COp.__init__(self, "dnn_base.c")
def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h',
'gpuarray/types.h', 'gpuarray/array.h',
'gpuarray_api.h']
return ['cudnn.h', 'cudnn_helper.h', 'gpuarray_helper.h',
'gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/util.h',
'gpuarray_api.h', 'numpy_compat.h']
def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include(),
......@@ -953,7 +961,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
%(out)s_dims[3] = (PyGpuArray_DIMS(%(input)s)[3] + (hpad*2) - wsY) / strideY + 1;
if (theano_prep_output(&%(out)s, 4, %(out)s_dims, %(input)s->ga.typecode,
GA_C_ORDER) != 0) {
GA_C_ORDER, pygpu_default_context()) != 0) {
%(fail)s
}
......@@ -1008,7 +1016,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return [[1], [0]]
def c_code_cache_version(self):
return (6, version())
return (7, version())
class GpuDnnPoolGrad(DnnBase):
......@@ -1125,7 +1133,7 @@ if (!GpuArray_IS_C_CONTIGUOUS(&%(input_grad)s->ga)) {
%(fail)s
}
if (!GpuArray_IS_C_CONTIGUOUS(%(output)s)) {
if (!GpuArray_IS_C_CONTIGUOUS(&%(output)s->ga)) {
PyErr_SetString(PyExc_ValueError,
"GpuDnnPoolGrad: Only contiguous outputs are supported.");
%(fail)s
......@@ -1134,8 +1142,8 @@ if (!GpuArray_IS_C_CONTIGUOUS(%(output)s)) {
%(set_in)s
if (theano_prep_output(&%(output_grad)s, PyGpuArray_NDIM(%(output)s),
PyGpuArray_DIMS(%(output)s, %(output)s->ga.typecode,
GA_C_ORDER)) != 0)
PyGpuArray_DIMS(%(output)s), %(output)s->ga.typecode,
GA_C_ORDER, pygpu_default_context()) != 0)
{
%(fail)s
}
......@@ -1168,29 +1176,8 @@ _handle,
#endif
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnPoolGrad: error doing operation: %%s. "
"input.shape=(%%d, %%d, %%d, %%d) "
"input_grad.shape=(%%d, %%d, %%d, %%d) "
"output.shape=(%%d, %%d, %%d, %%d) "
"output_grad.shape=(%%d, %%d, %%d, %%d)",
cudnnGetErrorString(err%(name)s),
PyGpuArray_DIMS(%(input)s)[0],
PyGpuArray_DIMS(%(input)s)[1],
PyGpuArray_DIMS(%(input)s)[2],
PyGpuArray_DIMS(%(input)s)[3],
PyGpuArray_DIMS(%(input_grad)s)[0],
PyGpuArray_DIMS(%(input_grad)s)[1],
PyGpuArray_DIMS(%(input_grad)s)[2],
PyGpuArray_DIMS(%(input_grad)s)[3],
PyGpuArray_DIMS(%(output)s)[0],
PyGpuArray_DIMS(%(output)s)[1],
PyGpuArray_DIMS(%(output)s)[2],
PyGpuArray_DIMS(%(output)s)[3],
PyGpuArray_DIMS(%(output_grad)s)[0],
PyGpuArray_DIMS(%(output_grad)s)[1],
PyGpuArray_DIMS(%(output_grad)s)[2],
PyGpuArray_DIMS(%(output_grad)s)[3]
);
"GpuDnnPoolGrad: error doing operation: %%s.",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
""" % dict(output_grad=out_grad, desc=desc,
......@@ -1363,7 +1350,7 @@ if (%(mode)d == 1)
result += """
if (theano_prep_output(&%(outs)s, PyGpuArray_NDIM(%(ins)s),
PyGpuArray_DIMS(%(ins)s), %(ins)s->ga.typecode,
GA_C_ORDER) != 0)
GA_C_ORDER, pygpu_default_context()) != 0)
{
%(fail)s
}
......
......@@ -4,6 +4,7 @@ static cudnnHandle_t _handle = NULL;
static int
c_set_tensor4d(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
cudnnDataType_t dt;
size_t ds;
switch (var->ga.typecode) {
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
......@@ -18,25 +19,17 @@ c_set_tensor4d(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensor4d");
return -1;
}
ds = gpuarray_get_elsize(var->ga.typecode);
cudnnStatus_t err = cudnnSetTensor4dDescriptorEx(
desc, dt,
PyGpuArray_DIM(var, 0), PyGpuArray_DIM(var, 1),
PyGpuArray_DIM(var, 2), PyGpuArray_DIM(var, 3),
PyGpuArray_STRIDE(var, 0), PyGpuArray_STRIDE(var, 1),
PyGpuArray_STRIDE(var, 2), PyGpuArray_STRIDE(var, 3));
PyGpuArray_STRIDE(var, 0) / ds, PyGpuArray_STRIDE(var, 1) / ds,
PyGpuArray_STRIDE(var, 2) / ds, PyGpuArray_STRIDE(var, 3) / ds);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set tensor4d descriptor: %s"
"shapes=%d %d %d %d strides=%d %d %d %d",
cudnnGetErrorString(err),
PyGpuArray_DIMS(var)[0],
PyGpuArray_DIMS(var)[1],
PyGpuArray_DIMS(var)[2],
PyGpuArray_DIMS(var)[3],
PyGpuArray_STRIDES(var)[0],
PyGpuArray_STRIDES(var)[1],
PyGpuArray_STRIDES(var)[2],
PyGpuArray_STRIDES(var)[3]);
"Could not set tensor4d descriptor: %s",
cudnnGetErrorString(err));
return -1;
}
return 0;
......@@ -45,7 +38,7 @@ c_set_tensor4d(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
static int
c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
cudnnDataType_t dt;
if (!GpuArray_IS_C_CONTIGUOUS(&var->ga))
if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) {
PyErr_SetString(PyExc_ValueError,
"Only contiguous filters (kernels) are supported.");
return -1;
......@@ -70,13 +63,8 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
PyGpuArray_DIMS(var)[2], PyGpuArray_DIMS(var)[3]);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set filter descriptor: %s."
" dims= %d %d %d %d",
cudnnGetErrorString(err),
PyGpuArray_DIMS(var)[0],
PyGpuArray_DIMS(var)[1],
PyGpuArray_DIMS(var)[2],
PyGpuArray_DIMS(var)[3]);
"Could not set filter descriptor: %s.",
cudnnGetErrorString(err));
return -1;
}
return 0;
......
......@@ -42,7 +42,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
Py_INCREF(*output);
#else
if (theano_prep_output(output, PyGpuArray_NDIM(om), PyGpuArray_DIMS(om),
om->ga.typecode, GA_C_ORDER) != 0)
om->ga.typecode, GA_C_ORDER,
pygpu_default_context()) != 0)
return 1;
if (beta != 0.0 && pygpu_move(*output, om))
return 1;
......@@ -54,6 +55,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
{
size_t worksize;
gpudata *workspace;
PyGpuContextObject *c;
err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
APPLY_SPECIFIC(input),
......@@ -75,8 +77,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
* to place a nice get_work_mem() function in.
*/
if (worksize != 0) {
workspace = pygpu_default_context->ops->buffer_alloc(
pygpu_default_context->ctx, worksize, NULL, 0, NULL);
c = pygpu_default_context();
workspace = c->ops->buffer_alloc(c->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"Could not allocate working memory");
......@@ -93,9 +95,10 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
worksize == 0 ? NULL : *(void **)workspace, worksize,
beta_p,
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output));
}
pygpu_default_context->ops->buffer_release(workspace);
if (worksize != 0)
c->ops->buffer_release(workspace);
}
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "GpuDnnConv: error doing operation: %s",
......
......@@ -21,7 +21,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
switch (input->ga.typecode) {
switch (im->ga.typecode) {
case GA_DOUBLE:
alpha_p = (void *)α
beta_p = (void *)β
......@@ -41,7 +41,8 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
Py_INCREF(*input);
#else
if (theano_prep_output(input, PyGpuArray_NDIM(im), PyGpuArray_DIMS(im),
im->ga.typecode, GA_C_ORDER) != 0)
im->ga.typecode, GA_C_ORDER,
pygpu_default_context()) != 0)
return 1;
if (beta != 0.0 && pygpu_move(*input, im))
return 1;
......
......@@ -41,7 +41,8 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
Py_INCREF(*kerns);
#else
if (theano_prep_output(kerns, PyGpuArray_NDIM(km), PyGpuArray_DIMS(km),
km->ga.typecode, GA_C_ORDER) != 0)
km->ga.typecode, GA_C_ORDER,
pygpu_default_context()) != 0)
return 1;
if (beta != 0.0 && pygpu_move(*kerns, km))
return 1;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论