提交 0a162810 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix problems found in testing.

上级 dba4873c
...@@ -118,30 +118,38 @@ dnn_available.msg = None ...@@ -118,30 +118,38 @@ dnn_available.msg = None
def c_set_tensor4d(var, desc, err, fail): def c_set_tensor4d(var, desc, err, fail):
return """ return """
{ {
cudnnDataType_t dt;
size_t ds;
switch (%(var)s->ga.typecode) {
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensor4d");
return -1;
}
ds = gpuarray_get_elsize(%(var)s->ga.typecode);
%(err)s = cudnnSetTensor4dDescriptorEx( %(err)s = cudnnSetTensor4dDescriptorEx(
%(desc)s, CUDNN_DATA_FLOAT, %(desc)s, dt,
PyGpuArray_DIMS(%(var)s)[0], PyGpuArray_DIMS(%(var)s)[0],
PyGpuArray_DIMS(%(var)s)[1], PyGpuArray_DIMS(%(var)s)[1],
PyGpuArray_DIMS(%(var)s)[2], PyGpuArray_DIMS(%(var)s)[2],
PyGpuArray_DIMS(%(var)s)[3], PyGpuArray_DIMS(%(var)s)[3],
PyGpuArray_STRIDES(%(var)s)[0], PyGpuArray_STRIDES(%(var)s)[0] / ds,
PyGpuArray_STRIDES(%(var)s)[1], PyGpuArray_STRIDES(%(var)s)[1] / ds,
PyGpuArray_STRIDES(%(var)s)[2], PyGpuArray_STRIDES(%(var)s)[2] / ds,
PyGpuArray_STRIDES(%(var)s)[3]); PyGpuArray_STRIDES(%(var)s)[3] / ds);
if (%(err)s != CUDNN_STATUS_SUCCESS) { if (%(err)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"could not set tensor4d descriptor: %%s" "could not set tensor4d descriptor: %%s",
"shapes=%%d %%d %%d %%d strides=%%d %%d %%d %%d", cudnnGetErrorString(%(err)s));
cudnnGetErrorString(%(err)s),
PyGpuArray_DIMS(%(var)s)[0],
PyGpuArray_DIMS(%(var)s)[1],
PyGpuArray_DIMS(%(var)s)[2],
PyGpuArray_DIMS(%(var)s)[3],
PyGpuArray_STRIDES(%(var)s)[0],
PyGpuArray_STRIDES(%(var)s)[1],
PyGpuArray_STRIDES(%(var)s)[2],
PyGpuArray_STRIDES(%(var)s)[3]);
%(fail)s %(fail)s
} }
} }
...@@ -160,9 +168,9 @@ class DnnBase(COp): ...@@ -160,9 +168,9 @@ class DnnBase(COp):
COp.__init__(self, "dnn_base.c") COp.__init__(self, "dnn_base.c")
def c_headers(self): def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h', return ['cudnn.h', 'cudnn_helper.h', 'gpuarray_helper.h',
'gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/util.h',
'gpuarray_api.h'] 'gpuarray_api.h', 'numpy_compat.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include(), return [os.path.dirname(__file__), pygpu.get_include(),
...@@ -953,7 +961,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -953,7 +961,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
%(out)s_dims[3] = (PyGpuArray_DIMS(%(input)s)[3] + (hpad*2) - wsY) / strideY + 1; %(out)s_dims[3] = (PyGpuArray_DIMS(%(input)s)[3] + (hpad*2) - wsY) / strideY + 1;
if (theano_prep_output(&%(out)s, 4, %(out)s_dims, %(input)s->ga.typecode, if (theano_prep_output(&%(out)s, 4, %(out)s_dims, %(input)s->ga.typecode,
GA_C_ORDER) != 0) { GA_C_ORDER, pygpu_default_context()) != 0) {
%(fail)s %(fail)s
} }
...@@ -1008,7 +1016,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -1008,7 +1016,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return [[1], [0]] return [[1], [0]]
def c_code_cache_version(self): def c_code_cache_version(self):
return (6, version()) return (7, version())
class GpuDnnPoolGrad(DnnBase): class GpuDnnPoolGrad(DnnBase):
...@@ -1125,7 +1133,7 @@ if (!GpuArray_IS_C_CONTIGUOUS(&%(input_grad)s->ga)) { ...@@ -1125,7 +1133,7 @@ if (!GpuArray_IS_C_CONTIGUOUS(&%(input_grad)s->ga)) {
%(fail)s %(fail)s
} }
if (!GpuArray_IS_C_CONTIGUOUS(%(output)s)) { if (!GpuArray_IS_C_CONTIGUOUS(&%(output)s->ga)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"GpuDnnPoolGrad: Only contiguous outputs are supported."); "GpuDnnPoolGrad: Only contiguous outputs are supported.");
%(fail)s %(fail)s
...@@ -1134,8 +1142,8 @@ if (!GpuArray_IS_C_CONTIGUOUS(%(output)s)) { ...@@ -1134,8 +1142,8 @@ if (!GpuArray_IS_C_CONTIGUOUS(%(output)s)) {
%(set_in)s %(set_in)s
if (theano_prep_output(&%(output_grad)s, PyGpuArray_NDIM(%(output)s), if (theano_prep_output(&%(output_grad)s, PyGpuArray_NDIM(%(output)s),
PyGpuArray_DIMS(%(output)s, %(output)s->ga.typecode, PyGpuArray_DIMS(%(output)s), %(output)s->ga.typecode,
GA_C_ORDER)) != 0) GA_C_ORDER, pygpu_default_context()) != 0)
{ {
%(fail)s %(fail)s
} }
...@@ -1168,29 +1176,8 @@ _handle, ...@@ -1168,29 +1176,8 @@ _handle,
#endif #endif
if (err%(name)s != CUDNN_STATUS_SUCCESS) { if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"GpuDnnPoolGrad: error doing operation: %%s. " "GpuDnnPoolGrad: error doing operation: %%s.",
"input.shape=(%%d, %%d, %%d, %%d) " cudnnGetErrorString(err%(name)s));
"input_grad.shape=(%%d, %%d, %%d, %%d) "
"output.shape=(%%d, %%d, %%d, %%d) "
"output_grad.shape=(%%d, %%d, %%d, %%d)",
cudnnGetErrorString(err%(name)s),
PyGpuArray_DIMS(%(input)s)[0],
PyGpuArray_DIMS(%(input)s)[1],
PyGpuArray_DIMS(%(input)s)[2],
PyGpuArray_DIMS(%(input)s)[3],
PyGpuArray_DIMS(%(input_grad)s)[0],
PyGpuArray_DIMS(%(input_grad)s)[1],
PyGpuArray_DIMS(%(input_grad)s)[2],
PyGpuArray_DIMS(%(input_grad)s)[3],
PyGpuArray_DIMS(%(output)s)[0],
PyGpuArray_DIMS(%(output)s)[1],
PyGpuArray_DIMS(%(output)s)[2],
PyGpuArray_DIMS(%(output)s)[3],
PyGpuArray_DIMS(%(output_grad)s)[0],
PyGpuArray_DIMS(%(output_grad)s)[1],
PyGpuArray_DIMS(%(output_grad)s)[2],
PyGpuArray_DIMS(%(output_grad)s)[3]
);
%(fail)s %(fail)s
} }
""" % dict(output_grad=out_grad, desc=desc, """ % dict(output_grad=out_grad, desc=desc,
...@@ -1363,7 +1350,7 @@ if (%(mode)d == 1) ...@@ -1363,7 +1350,7 @@ if (%(mode)d == 1)
result += """ result += """
if (theano_prep_output(&%(outs)s, PyGpuArray_NDIM(%(ins)s), if (theano_prep_output(&%(outs)s, PyGpuArray_NDIM(%(ins)s),
PyGpuArray_DIMS(%(ins)s), %(ins)s->ga.typecode, PyGpuArray_DIMS(%(ins)s), %(ins)s->ga.typecode,
GA_C_ORDER) != 0) GA_C_ORDER, pygpu_default_context()) != 0)
{ {
%(fail)s %(fail)s
} }
......
...@@ -4,6 +4,7 @@ static cudnnHandle_t _handle = NULL; ...@@ -4,6 +4,7 @@ static cudnnHandle_t _handle = NULL;
static int static int
c_set_tensor4d(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) { c_set_tensor4d(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
cudnnDataType_t dt; cudnnDataType_t dt;
size_t ds;
switch (var->ga.typecode) { switch (var->ga.typecode) {
case GA_FLOAT: case GA_FLOAT:
dt = CUDNN_DATA_FLOAT; dt = CUDNN_DATA_FLOAT;
...@@ -18,25 +19,17 @@ c_set_tensor4d(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) { ...@@ -18,25 +19,17 @@ c_set_tensor4d(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensor4d"); PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensor4d");
return -1; return -1;
} }
ds = gpuarray_get_elsize(var->ga.typecode);
cudnnStatus_t err = cudnnSetTensor4dDescriptorEx( cudnnStatus_t err = cudnnSetTensor4dDescriptorEx(
desc, dt, desc, dt,
PyGpuArray_DIM(var, 0), PyGpuArray_DIM(var, 1), PyGpuArray_DIM(var, 0), PyGpuArray_DIM(var, 1),
PyGpuArray_DIM(var, 2), PyGpuArray_DIM(var, 3), PyGpuArray_DIM(var, 2), PyGpuArray_DIM(var, 3),
PyGpuArray_STRIDE(var, 0), PyGpuArray_STRIDE(var, 1), PyGpuArray_STRIDE(var, 0) / ds, PyGpuArray_STRIDE(var, 1) / ds,
PyGpuArray_STRIDE(var, 2), PyGpuArray_STRIDE(var, 3)); PyGpuArray_STRIDE(var, 2) / ds, PyGpuArray_STRIDE(var, 3) / ds);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"Could not set tensor4d descriptor: %s" "Could not set tensor4d descriptor: %s",
"shapes=%d %d %d %d strides=%d %d %d %d", cudnnGetErrorString(err));
cudnnGetErrorString(err),
PyGpuArray_DIMS(var)[0],
PyGpuArray_DIMS(var)[1],
PyGpuArray_DIMS(var)[2],
PyGpuArray_DIMS(var)[3],
PyGpuArray_STRIDES(var)[0],
PyGpuArray_STRIDES(var)[1],
PyGpuArray_STRIDES(var)[2],
PyGpuArray_STRIDES(var)[3]);
return -1; return -1;
} }
return 0; return 0;
...@@ -45,7 +38,7 @@ c_set_tensor4d(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) { ...@@ -45,7 +38,7 @@ c_set_tensor4d(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
static int static int
c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
cudnnDataType_t dt; cudnnDataType_t dt;
if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"Only contiguous filters (kernels) are supported."); "Only contiguous filters (kernels) are supported.");
return -1; return -1;
...@@ -70,13 +63,8 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { ...@@ -70,13 +63,8 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
PyGpuArray_DIMS(var)[2], PyGpuArray_DIMS(var)[3]); PyGpuArray_DIMS(var)[2], PyGpuArray_DIMS(var)[3]);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"Could not set filter descriptor: %s." "Could not set filter descriptor: %s.",
" dims= %d %d %d %d", cudnnGetErrorString(err));
cudnnGetErrorString(err),
PyGpuArray_DIMS(var)[0],
PyGpuArray_DIMS(var)[1],
PyGpuArray_DIMS(var)[2],
PyGpuArray_DIMS(var)[3]);
return -1; return -1;
} }
return 0; return 0;
......
...@@ -42,7 +42,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -42,7 +42,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
Py_INCREF(*output); Py_INCREF(*output);
#else #else
if (theano_prep_output(output, PyGpuArray_NDIM(om), PyGpuArray_DIMS(om), if (theano_prep_output(output, PyGpuArray_NDIM(om), PyGpuArray_DIMS(om),
om->ga.typecode, GA_C_ORDER) != 0) om->ga.typecode, GA_C_ORDER,
pygpu_default_context()) != 0)
return 1; return 1;
if (beta != 0.0 && pygpu_move(*output, om)) if (beta != 0.0 && pygpu_move(*output, om))
return 1; return 1;
...@@ -54,6 +55,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -54,6 +55,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
{ {
size_t worksize; size_t worksize;
gpudata *workspace; gpudata *workspace;
PyGpuContextObject *c;
err = cudnnGetConvolutionForwardWorkspaceSize(_handle, err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
APPLY_SPECIFIC(input), APPLY_SPECIFIC(input),
...@@ -75,8 +77,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -75,8 +77,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
* to place a nice get_work_mem() function in. * to place a nice get_work_mem() function in.
*/ */
if (worksize != 0) { if (worksize != 0) {
workspace = pygpu_default_context->ops->buffer_alloc( c = pygpu_default_context();
pygpu_default_context->ctx, worksize, NULL, 0, NULL); workspace = c->ops->buffer_alloc(c->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) { if (workspace == NULL) {
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
"Could not allocate working memory"); "Could not allocate working memory");
...@@ -93,9 +95,10 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -93,9 +95,10 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
worksize == 0 ? NULL : *(void **)workspace, worksize, worksize == 0 ? NULL : *(void **)workspace, worksize,
beta_p, beta_p,
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output)); APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output));
}
pygpu_default_context->ops->buffer_release(workspace); if (worksize != 0)
c->ops->buffer_release(workspace);
}
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "GpuDnnConv: error doing operation: %s", PyErr_Format(PyExc_RuntimeError, "GpuDnnConv: error doing operation: %s",
......
...@@ -21,7 +21,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -21,7 +21,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
switch (input->ga.typecode) { switch (im->ga.typecode) {
case GA_DOUBLE: case GA_DOUBLE:
alpha_p = (void *)α alpha_p = (void *)α
beta_p = (void *)β beta_p = (void *)β
...@@ -41,7 +41,8 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -41,7 +41,8 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
Py_INCREF(*input); Py_INCREF(*input);
#else #else
if (theano_prep_output(input, PyGpuArray_NDIM(im), PyGpuArray_DIMS(im), if (theano_prep_output(input, PyGpuArray_NDIM(im), PyGpuArray_DIMS(im),
im->ga.typecode, GA_C_ORDER) != 0) im->ga.typecode, GA_C_ORDER,
pygpu_default_context()) != 0)
return 1; return 1;
if (beta != 0.0 && pygpu_move(*input, im)) if (beta != 0.0 && pygpu_move(*input, im))
return 1; return 1;
......
...@@ -41,7 +41,8 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -41,7 +41,8 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
Py_INCREF(*kerns); Py_INCREF(*kerns);
#else #else
if (theano_prep_output(kerns, PyGpuArray_NDIM(km), PyGpuArray_DIMS(km), if (theano_prep_output(kerns, PyGpuArray_NDIM(km), PyGpuArray_DIMS(km),
km->ga.typecode, GA_C_ORDER) != 0) km->ga.typecode, GA_C_ORDER,
pygpu_default_context()) != 0)
return 1; return 1;
if (beta != 0.0 && pygpu_move(*kerns, km)) if (beta != 0.0 && pygpu_move(*kerns, km))
return 1; return 1;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论