提交 76dce4d6 authored 作者: Mathieu Germain's avatar Mathieu Germain

test_dnn are passing. fixed PoolingNdDescriptor

上级 88599601
...@@ -1048,9 +1048,13 @@ class GpuDnnPoolDesc(Op): ...@@ -1048,9 +1048,13 @@ class GpuDnnPoolDesc(Op):
static const int win[%(nd)d] = {%(win)s}; static const int win[%(nd)d] = {%(win)s};
static const int pad[%(nd)d] = {%(pad)s}; static const int pad[%(nd)d] = {%(pad)s};
static const int str[%(nd)d] = {%(str)s}; static const int str[%(nd)d] = {%(str)s};
err = cudnnSetPoolingNdDescriptor(
%(desc)s, %(mode_flag)s, %(nd)d, #if CUDNN_VERSION >= 5000
win, pad, str); err = cudnnSetPoolingNdDescriptor(%(desc)s, %(mode_flag)s, CUDNN_PROPAGATE_NAN, %(nd)d, win, pad, str);
#else
err = cudnnSetPoolingNdDescriptor(%(desc)s, %(mode_flag)s, %(nd)d, win, pad, str);
#endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s", PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
...@@ -1063,7 +1067,7 @@ class GpuDnnPoolDesc(Op): ...@@ -1063,7 +1067,7 @@ class GpuDnnPoolDesc(Op):
str=', '.join(map(str, self.stride))) str=', '.join(map(str, self.stride)))
def c_code_cache_version(self): def c_code_cache_version(self):
return (3, version()) return (4, version())
class GpuDnnPool(DnnBase): class GpuDnnPool(DnnBase):
......
...@@ -51,6 +51,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) { ...@@ -51,6 +51,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
static int static int
c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
cudnnDataType_t dt; cudnnDataType_t dt;
cudnnStatus_t err;
if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) { if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"Only contiguous filters (kernels) are supported."); "Only contiguous filters (kernels) are supported.");
...@@ -86,10 +88,10 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { ...@@ -86,10 +88,10 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
dims[i] = PyGpuArray_DIM(var, i); dims[i] = PyGpuArray_DIM(var, i);
} }
#if CUDNN_VERSION > 5000 #if CUDNN_VERSION >= 5000
cudnnStatus_t err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims); err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims);
#else #else
cudnnStatus_t err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims); err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims);
#endif #endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
......
...@@ -69,7 +69,12 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -69,7 +69,12 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
for(int i = 0; i < ndims; i++) { for(int i = 0; i < ndims; i++) {
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i)); s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
} }
#if CUDNN_VERSION >= 5000
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s);
#else
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s); err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
#endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err)); PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论