提交 76dce4d6 authored 作者: Mathieu Germain's avatar Mathieu Germain

test_dnn are passing. fixed PoolingNdDescriptor

上级 88599601
...@@ -1048,9 +1048,13 @@ class GpuDnnPoolDesc(Op): ...@@ -1048,9 +1048,13 @@ class GpuDnnPoolDesc(Op):
static const int win[%(nd)d] = {%(win)s}; static const int win[%(nd)d] = {%(win)s};
static const int pad[%(nd)d] = {%(pad)s}; static const int pad[%(nd)d] = {%(pad)s};
static const int str[%(nd)d] = {%(str)s}; static const int str[%(nd)d] = {%(str)s};
err = cudnnSetPoolingNdDescriptor(
%(desc)s, %(mode_flag)s, %(nd)d, #if CUDNN_VERSION >= 5000
win, pad, str); err = cudnnSetPoolingNdDescriptor(%(desc)s, %(mode_flag)s, CUDNN_PROPAGATE_NAN, %(nd)d, win, pad, str);
#else
err = cudnnSetPoolingNdDescriptor(%(desc)s, %(mode_flag)s, %(nd)d, win, pad, str);
#endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s", PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
...@@ -1063,7 +1067,7 @@ class GpuDnnPoolDesc(Op): ...@@ -1063,7 +1067,7 @@ class GpuDnnPoolDesc(Op):
str=', '.join(map(str, self.stride))) str=', '.join(map(str, self.stride)))
def c_code_cache_version(self): def c_code_cache_version(self):
return (3, version()) return (4, version())
class GpuDnnPool(DnnBase): class GpuDnnPool(DnnBase):
......
...@@ -51,6 +51,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) { ...@@ -51,6 +51,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
static int static int
c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
cudnnDataType_t dt; cudnnDataType_t dt;
cudnnStatus_t err;
if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) { if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"Only contiguous filters (kernels) are supported."); "Only contiguous filters (kernels) are supported.");
...@@ -86,10 +88,10 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { ...@@ -86,10 +88,10 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
dims[i] = PyGpuArray_DIM(var, i); dims[i] = PyGpuArray_DIM(var, i);
} }
#if CUDNN_VERSION > 5000 #if CUDNN_VERSION >= 5000
cudnnStatus_t err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims); err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims);
#else #else
cudnnStatus_t err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims); err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims);
#endif #endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
......
...@@ -24,7 +24,7 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output))) ...@@ -24,7 +24,7 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output)))
} }
if ((APPLY_SPECIFIC(err) = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) { if ((APPLY_SPECIFIC(err) = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor" PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor"
"(pool): %s", cudnnGetErrorString(APPLY_SPECIFIC(err))); "(pool): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL; FAIL;
} }
...@@ -38,7 +38,7 @@ if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC ...@@ -38,7 +38,7 @@ if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC
#section support_code_struct #section support_code_struct
int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
PyArrayObject *ws, PyArrayObject *ws,
PyArrayObject *stride, PyArrayObject *stride,
PyArrayObject *pad, PyArrayObject *pad,
PyGpuArrayObject **out, PyGpuArrayObject **out,
...@@ -69,7 +69,12 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -69,7 +69,12 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
for(int i = 0; i < ndims; i++) { for(int i = 0; i < ndims; i++) {
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i)); s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
} }
#if CUDNN_VERSION >= 5000
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s);
#else
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s); err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
#endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err)); PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论