提交 1a593ff3 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add the cudnn error message to the error strings.

上级 a4c65bf1
#ifndef CUDNN_HELPER_H
#define CUDNN_HELPER_H
#include <cudnn.h>
static inline const char *cudnnGetErrorString(cudnnStatus_t err) {
switch (err) {
case CUDNN_STATUS_SUCCESS:
return "The operation completed successfully.";
case CUDNN_STATUS_NOT_INITIALIZED:
return "The handle was not initialized.";
case CUDNN_STATUS_ALLOC_FAILED:
return "Ressource allocation failed inside the library.";
case CUDNN_STATUS_BAD_PARAM:
return "An incorrect value was passed in.";
case CUDNN_STATUS_ARCH_MISMATCH:
return "The current GPU does not support the required features (only cc 3.0+ are supported).";
case CUDNN_STATUS_MAPPING_ERROR:
return "An access to GPU memory space failed (probably due to a failure to bind texture).";
case CUDNN_STATUS_EXECUTION_FAILED:
return "A kernel failed to execute.";
case CUDNN_STATUS_INTERNAL_ERROR:
return "An internal cuDNN operation failed.";
case CUDNN_STATUS_NOT_SUPPORTED:
return "The combination of parameters is not currently supported.";
default:
return "Unknown error code.";
}
}
#endif
...@@ -30,7 +30,10 @@ class GpuDnnConv(GpuOp): ...@@ -30,7 +30,10 @@ class GpuDnnConv(GpuOp):
return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()]) return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()])
def c_headers(self): def c_headers(self):
return ['cudnn.h'] return ['cudnn.h', 'cudnn_helper.h']
def c_header_dirs(self):
return [os.path.dirname(__file__)]
def c_libraries(self): def c_libraries(self):
return ['cudnn'] return ['cudnn']
...@@ -46,29 +49,30 @@ cudnnConvolutionDescriptor_t op%(id)d; ...@@ -46,29 +49,30 @@ cudnnConvolutionDescriptor_t op%(id)d;
def c_init_code_struct(self, node, struct_id, sub): def c_init_code_struct(self, node, struct_id, sub):
return """ return """
handle%(id)d = NULL; cudnnStatus_t err%(id)d;
input%(id)d = NULL; if ((err%(id)d = cudnnCreate(&handle%(id)d)) != CUDNN_STATUS_SUCCESS) {
output%(id)d = NULL; PyErr_Format(PyExc_RuntimeError, "could not create cudnn handle: %%s",
kerns%(id)d = NULL; cudnnGetErrorString(err%(id)d));
op%(id)d = NULL;
if (cudnnCreate(&handle%(id)d) != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "could not create cudnn handle");
%(fail)s %(fail)s
} }
if (cudnnCreateTensor4dDescriptor(&input%(id)d) != CUDNN_STATUS_SUCCESS) { if ((err%(id)d = cudnnCreateTensor4dDescriptor(&input%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_MemoryError, "could not allocate tensor4d descriptor (inp)"); PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
"(inp): %%s", cudnnGetErrorString(err%(id)d));
%(fail)s %(fail)s
} }
if (cudnnCreateTensor4dDescriptor(&output%(id)d) != CUDNN_STATUS_SUCCESS) { if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_MemoryError, "could not allocate tensor4d descriptor (out)"); PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
"(out): %%s", cudnnGetErrorString(err%(id)d));
%(fail)s %(fail)s
} }
if (cudnnCreateFilterDescriptor(&kerns%(id)d) != CUDNN_STATUS_SUCCESS) { if ((err%(id)d = cudnnCreateFilterDescriptor(&kerns%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_MemoryError, "could not allocate filter descriptor"); PyErr_Format(PyExc_MemoryError, "could not allocate filter descriptor: %%s",
cudnnGetErrorString(err%(id)d));
%(fail)s %(fail)s
} }
if (cudnnCreateConvolutionDescriptor(&op%(id)d) != CUDNN_STATUS_SUCCESS) { if ((err%(id)d = cudnnCreateConvolutionDescriptor(&op%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_MemoryError, "could not allocate convolution descriptor"); PyErr_Format(PyExc_MemoryError, "could not allocate convolution "
"descriptor: %%s", cudnnGetErrorString(err%(id)d));
%(fail)s %(fail)s
} }
""" % dict(id=struct_id, fail=sub['fail']) """ % dict(id=struct_id, fail=sub['fail'])
...@@ -108,10 +112,10 @@ CudaNdarray_HOST_STRIDES(%(img)s)[2], ...@@ -108,10 +112,10 @@ CudaNdarray_HOST_STRIDES(%(img)s)[2],
CudaNdarray_HOST_STRIDES(%(img)s)[3] CudaNdarray_HOST_STRIDES(%(img)s)[3]
); );
if (err%(name)s != CUDNN_STATUS_SUCCESS) { if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "could not set tensor4d descriptor"); PyErr_Format(PyExc_RuntimeError, "could not set tensor4d descriptor: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
// TODO: make sure the kernels are contiguous or ... BOOM!
err%(name)s = cudnnSetFilterDescriptor( err%(name)s = cudnnSetFilterDescriptor(
kerns%(id)d, CUDNN_DATA_FLOAT, kerns%(id)d, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(%(kerns)s)[0], CudaNdarray_HOST_DIMS(%(kerns)s)[0],
...@@ -120,7 +124,8 @@ CudaNdarray_HOST_DIMS(%(kerns)s)[2], ...@@ -120,7 +124,8 @@ CudaNdarray_HOST_DIMS(%(kerns)s)[2],
CudaNdarray_HOST_DIMS(%(kerns)s)[3] CudaNdarray_HOST_DIMS(%(kerns)s)[3]
); );
if (err%(name)s != CUDNN_STATUS_SUCCESS) { if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "could not set filter descriptor"); PyErr_Format(PyExc_RuntimeError, "could not set filter descriptor: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
if (%(bmode)d == 1) { if (%(bmode)d == 1) {
...@@ -141,7 +146,8 @@ pad_w%(name)s, ...@@ -141,7 +146,8 @@ pad_w%(name)s,
CUDNN_CONVOLUTION CUDNN_CONVOLUTION
); );
if (err%(name)s != CUDNN_STATUS_SUCCESS) { if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "could not set op descriptor"); PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
{ {
...@@ -152,7 +158,8 @@ op%(id)d, CUDNN_CONVOLUTION_FWD, ...@@ -152,7 +158,8 @@ op%(id)d, CUDNN_CONVOLUTION_FWD,
&out_dims[2], &out_dims[3] &out_dims[2], &out_dims[3]
); );
if (err%(name)s != CUDNN_STATUS_SUCCESS) { if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "could not set op descriptor"); PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
if (CudaNdarray_prep_output(&%(out)s, 4, out_dims) != 0) { if (CudaNdarray_prep_output(&%(out)s, 4, out_dims) != 0) {
...@@ -171,7 +178,8 @@ CudaNdarray_HOST_STRIDES(%(out)s)[2], ...@@ -171,7 +178,8 @@ CudaNdarray_HOST_STRIDES(%(out)s)[2],
CudaNdarray_HOST_STRIDES(%(out)s)[3] CudaNdarray_HOST_STRIDES(%(out)s)[3]
); );
if (err%(name)s != CUDNN_STATUS_SUCCESS) { if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "could not set out descriptor"); PyErr_Format(PyExc_RuntimeError, "could not set out descriptor: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
err%(name)s = cudnnConvolutionForward( err%(name)s = cudnnConvolutionForward(
...@@ -183,14 +191,15 @@ output%(id)d, CudaNdarray_DEV_DATA(%(out)s), ...@@ -183,14 +191,15 @@ output%(id)d, CudaNdarray_DEV_DATA(%(out)s),
CUDNN_RESULT_NO_ACCUMULATE CUDNN_RESULT_NO_ACCUMULATE
); );
if (err%(name)s != CUDNN_STATUS_SUCCESS) { if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "error doing operation"); PyErr_Format(PyExc_RuntimeError, "error doing operation: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
""" % dict(img=img, kerns=kern, out=out, bmode=bmode, """ % dict(img=img, kerns=kern, out=out, bmode=bmode,
fail=sub['fail'], id=sub['struct_id'], name=name) fail=sub['fail'], id=sub['struct_id'], name=name)
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (2,)
from theano.sandbox.cuda.opt import (local_optimizer, gpu_contiguous, from theano.sandbox.cuda.opt import (local_optimizer, gpu_contiguous,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论