提交 ef4c9e9b authored 作者: Harm de Vries's avatar Harm de Vries 提交者: Frederic Bastien

new gpu backend accepting tensor variables

上级 fe0b1477
...@@ -141,6 +141,7 @@ dnn_available.msg = None ...@@ -141,6 +141,7 @@ dnn_available.msg = None
class DnnBase(COp): class DnnBase(COp):
""" """
Creates a handle for cudnn and pulls in the cudnn libraries and headers. Creates a handle for cudnn and pulls in the cudnn libraries and headers.
...@@ -254,6 +255,7 @@ version.v = None ...@@ -254,6 +255,7 @@ version.v = None
class GpuDnnConvDesc(COp): class GpuDnnConvDesc(COp):
""" """
This Op builds a convolution descriptor for use in the other convolution This Op builds a convolution descriptor for use in the other convolution
operations. operations.
...@@ -387,6 +389,7 @@ def ensure_dt(val, default, name, dtype): ...@@ -387,6 +389,7 @@ def ensure_dt(val, default, name, dtype):
class GpuDnnConv(DnnBase): class GpuDnnConv(DnnBase):
""" """
The forward convolution. The forward convolution.
...@@ -554,6 +557,7 @@ class GpuDnnConv(DnnBase): ...@@ -554,6 +557,7 @@ class GpuDnnConv(DnnBase):
class GpuDnnConvGradW(DnnBase): class GpuDnnConvGradW(DnnBase):
""" """
The convolution gradient with respect to the weights. The convolution gradient with respect to the weights.
...@@ -674,6 +678,7 @@ class GpuDnnConvGradW(DnnBase): ...@@ -674,6 +678,7 @@ class GpuDnnConvGradW(DnnBase):
class GpuDnnConvGradI(DnnBase): class GpuDnnConvGradI(DnnBase):
""" """
The convolution gradient with respect to the inputs. The convolution gradient with respect to the inputs.
...@@ -942,6 +947,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid', ...@@ -942,6 +947,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
class GpuDnnPoolDesc(Op): class GpuDnnPoolDesc(Op):
""" """
This Op builds a pooling descriptor for use in the other This Op builds a pooling descriptor for use in the other
pooling operations. pooling operations.
...@@ -1060,69 +1066,89 @@ class GpuDnnPoolDesc(Op): ...@@ -1060,69 +1066,89 @@ class GpuDnnPoolDesc(Op):
class GpuDnnPool(DnnBase): class GpuDnnPool(DnnBase):
"""
Pooling.
"""
Parameters Parameters
---------- ----------
img img
The image 4d tensor. The image 4d or 5d tensor.
desc Parameters
The pooling descriptor. ----------
ws : tensor variable
Window size.
stride : tensor variable
(dx, dy) or (dx, dy, dz).
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
The old deprecated name 'average' corresponds to 'average_inc_pad'.
pad : tensor
(padX, padY) or (padX, padY, padZ)
""" """
__props__ = () __props__ = ('mode',)
def __init__(self): def __init__(self, mode='max'):
DnnBase.__init__(self, ["dnn_pool.c"], "APPLY_SPECIFIC(dnn_pool)") DnnBase.__init__(self, ["dnn_pool.c"], "APPLY_SPECIFIC(dnn_pool)")
if mode == 'average':
mode = 'average_inc_pad'
assert mode in ('max', 'average_inc_pad', 'average_exc_pad')
if version() == -1:
raise Exception("cudnn v1 do not support average_exc_pad")
self.mode = mode
def make_node(self, img, desc): def get_op_params(self):
img = as_gpuarray_variable(img, infer_context_name(img)) if self.mode == 'max':
mode_flag = 'CUDNN_POOLING_MAX'
elif self.mode == "average_inc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif self.mode == "average_exc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if desc.owner is not None: return [('MODE_FLAG', mode_flag)]
e_ndim = desc.owner.op.get_ndim() + 2
if img.type.ndim != e_ndim: def make_node(self, img, ws, stride, pad):
raise TypeError('img must be %dD tensor' % (e_ndim,)) ctx_name = infer_context_name(img, ws, stride, pad)
img = as_gpuarray_variable(img, ctx_name)
if (not isinstance(desc.type, CDataType) or ws = tensor.as_tensor_variable(ws)
desc.type.ctype != 'cudnnPoolingDescriptor_t'): stride = tensor.as_tensor_variable(stride)
raise TypeError('desc must be cudnnPoolingDescriptor_t') pad = tensor.as_tensor_variable(pad)
assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
assert ws.type.ndim == 1
return Apply(self, [img, desc], [img.type()]) return Apply(self, [img, ws, stride, pad], [img.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
desc = node.inputs[1].owner.op w = node.inputs[1]
w = desc.ws s = node.inputs[2]
s = desc.stride p = node.inputs[3]
p = desc.pad
res = [shape[0][0], shape[0][1], res = [shape[0][0], shape[0][1],
(shape[0][2] + 2 * p[0] - w[0]) // s[0] + 1, (shape[0][2] + 2 * p[0] - w[0]) // s[0] + 1,
(shape[0][3] + 2 * p[1] - w[1]) // s[1] + 1 (shape[0][3] + 2 * p[1] - w[1]) // s[1] + 1
] ]
if len(w) > 2: if node.inputs[0].ndim == 5:
res.append((shape[0][4] + 2 * p[2] - w[2]) // s[2] + 1) res.append((shape[0][4] + 2 * p[2] - w[2]) // s[2] + 1)
return [res] return [res]
def grad(self, inp, grads): def grad(self, inp, grads):
img, desc = inp img, ws, stride, pad = inp
grad, = grads grad, = grads
grad = gpu_contiguous(grad) grad = gpu_contiguous(grad)
out = self(img, desc) out = self(img, ws, stride, pad)
g_out = GpuDnnPoolGrad()(img, out, grad, desc) g_out = GpuDnnPoolGrad(mode=self.mode)(img, out, grad, ws, stride, pad)
return g_out, theano.gradient.DisconnectedType()() return g_out, theano.gradient.DisconnectedType()(), theano.gradient.DisconnectedType()(), theano.gradient.DisconnectedType()()
def connection_pattern(self, node): def connection_pattern(self, node):
# not connected to desc # not connected to parameters
return [[1], [0]] return [[1], [0], [0], [0]]
class GpuDnnPoolGrad(DnnBase): class GpuDnnPoolGrad(DnnBase):
""" """
The pooling gradient. The pooling gradient.
...@@ -1134,40 +1160,56 @@ class GpuDnnPoolGrad(DnnBase): ...@@ -1134,40 +1160,56 @@ class GpuDnnPoolGrad(DnnBase):
The output of the pooling in the forward. The output of the pooling in the forward.
out_grad out_grad
Same size as out, but is the corresponding gradient information. Same size as out, but is the corresponding gradient information.
desc ws : tensor variable
The pooling descriptor. Window size.
stride : tensor variable
(dx, dy) or (dx, dy, dz).
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
The old deprecated name 'average' corresponds to 'average_inc_pad'.
pad : tensor
(padX, padY) or (padX, padY, padZ)
""" """
__props__ = () __props__ = ('mode',)
def __init__(self): def __init__(self, mode='max'):
DnnBase.__init__(self, ["dnn_pool_grad.c"], DnnBase.__init__(self, ["dnn_pool_grad.c"],
"APPLY_SPECIFIC(dnn_pool_grad)") "APPLY_SPECIFIC(dnn_pool_grad)")
if mode == 'average':
mode = 'average_inc_pad'
assert mode in ('max', 'average_inc_pad', 'average_exc_pad')
self.mode = mode
def get_op_params(self):
if self.mode == 'max':
mode_flag = 'CUDNN_POOLING_MAX'
elif self.mode == "average_inc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif self.mode == "average_exc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
def make_node(self, inp, out, out_grad, desc): return [('MODE_FLAG', mode_flag)]
def make_node(self, inp, out, out_grad, ws, stride, pad):
ctx_name = infer_context_name(inp, out, out_grad) ctx_name = infer_context_name(inp, out, out_grad)
inp = as_gpuarray_variable(inp, ctx_name) inp = as_gpuarray_variable(inp, ctx_name)
assert (inp.ndim in [4, 5])
out_grad = as_gpuarray_variable(out_grad, ctx_name) out_grad = as_gpuarray_variable(out_grad, ctx_name)
assert (out_grad.ndim in [4, 5])
out = as_gpuarray_variable(out, ctx_name) out = as_gpuarray_variable(out, ctx_name)
assert(out.ndim in [4, 5])
if desc.owner is not None: assert (out_grad.ndim == inp.ndim)
nd = desc.owner.op.get_ndim() + 2 assert (inp.ndim == out.ndim)
if inp.type.ndim != nd:
raise TypeError('inp must be %dD tensor' % (nd,))
if out_grad.type.ndim != nd: ws = tensor.as_tensor_variable(ws)
raise TypeError('out_grad must be %dD tensor' % (nd,)) stride = tensor.as_tensor_variable(stride)
pad = tensor.as_tensor_variable(pad)
assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
assert ws.type.ndim == 1
if out.type.ndim != nd: return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
raise TypeError('out must be %dD tensor' % (nd,))
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnPoolingDescriptor_t'):
raise TypeError('desc must be cudnnPoolingDescriptor_t')
return Apply(self, [inp, out, out_grad, desc], [inp.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[0]] return [shape[0]]
...@@ -1205,11 +1247,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -1205,11 +1247,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
""" """
img = gpu_contiguous(img) img = gpu_contiguous(img)
desc = GpuDnnPoolDesc(ws=ws, stride=stride, mode=mode, pad=pad)() return GpuDnnPool(mode=mode)(img, ws, stride, pad)
return GpuDnnPool()(img, desc)
class GpuDnnSoftmaxBase(DnnBase): class GpuDnnSoftmaxBase(DnnBase):
""" """
Op for the cuDNN Softmax. Op for the cuDNN Softmax.
...@@ -1262,6 +1304,7 @@ class GpuDnnSoftmaxBase(DnnBase): ...@@ -1262,6 +1304,7 @@ class GpuDnnSoftmaxBase(DnnBase):
class GpuDnnSoftmax(GpuDnnSoftmaxBase): class GpuDnnSoftmax(GpuDnnSoftmaxBase):
""" """
Op for the cuDNN Softmax. Op for the cuDNN Softmax.
...@@ -1295,6 +1338,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase): ...@@ -1295,6 +1338,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase): class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
""" """
Op for the cuDNN SoftmaxGrad. Op for the cuDNN SoftmaxGrad.
...@@ -1466,11 +1510,12 @@ def local_pool_dnn_grad_stride(node, ctx_name): ...@@ -1466,11 +1510,12 @@ def local_pool_dnn_grad_stride(node, ctx_name):
pad = node.op.padding pad = node.op.padding
mode = node.op.mode mode = node.op.mode
desc = GpuDnnPoolDesc(ws=ds, stride=st, mode=mode, pad=pad)() return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
return GpuDnnPoolGrad()(gpu_contiguous(inp), gpu_contiguous(out),
gpu_contiguous(out), gpu_contiguous(out_grad),
gpu_contiguous(out_grad), ds,
desc) st,
pad)
@register_opt('cudnn') @register_opt('cudnn')
...@@ -1490,11 +1535,10 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name): ...@@ -1490,11 +1535,10 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name):
cg = gpu_contiguous(out_grad) cg = gpu_contiguous(out_grad)
desc = GpuDnnPoolDesc(ws=ds, stride=st, mode=mode, pad=pad)()
# We reuse cg because CuDNN does not use the value of the `out` # We reuse cg because CuDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This # argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know. # has been observed in v2 and v3 as far as I know.
return GpuDnnPoolGrad()(gpu_contiguous(inp), cg, cg, desc) return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), cg, cg, ds, st, pad)
@register_opt('cudnn') @register_opt('cudnn')
...@@ -1547,6 +1591,7 @@ def local_logsoftmax_to_dnn(node, ctx_name): ...@@ -1547,6 +1591,7 @@ def local_logsoftmax_to_dnn(node, ctx_name):
class NoCuDNNRaise(Optimizer): class NoCuDNNRaise(Optimizer):
def apply(self, fgraph): def apply(self, fgraph):
""" """
Raise a error if cudnn can't be used. Raise a error if cudnn can't be used.
......
...@@ -2,12 +2,15 @@ ...@@ -2,12 +2,15 @@
cudnnTensorDescriptor_t APPLY_SPECIFIC(input); cudnnTensorDescriptor_t APPLY_SPECIFIC(input);
cudnnTensorDescriptor_t APPLY_SPECIFIC(output); cudnnTensorDescriptor_t APPLY_SPECIFIC(output);
cudnnPoolingDescriptor_t APPLY_SPECIFIC(pool);
#section init_code_struct #section init_code_struct
cudnnStatus_t APPLY_SPECIFIC(err); cudnnStatus_t APPLY_SPECIFIC(err);
APPLY_SPECIFIC(input) = NULL; APPLY_SPECIFIC(input) = NULL;
APPLY_SPECIFIC(output) = NULL; APPLY_SPECIFIC(output) = NULL;
APPLY_SPECIFIC(pool) = NULL;
if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(input))) != CUDNN_STATUS_SUCCESS) { if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(input))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor " PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
...@@ -19,16 +22,25 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output))) ...@@ -19,16 +22,25 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output)))
"(out): %s", cudnnGetErrorString(APPLY_SPECIFIC(err))); "(out): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL; FAIL;
} }
if ((APPLY_SPECIFIC(err) = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor"
"(pool): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL;
}
#section cleanup_code_struct #section cleanup_code_struct
if (APPLY_SPECIFIC(input) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input)); } if (APPLY_SPECIFIC(input) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input)); }
if (APPLY_SPECIFIC(output) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output)); } if (APPLY_SPECIFIC(output) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output)); }
if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC(pool)); }
#section support_code_struct #section support_code_struct
int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
cudnnPoolingDescriptor_t desc, PyArrayObject *ws,
PyArrayObject *stride,
PyArrayObject *pad,
PyGpuArrayObject **out, PyGpuArrayObject **out,
PyGpuContextObject *c) { PyGpuContextObject *c) {
cudnnStatus_t err; cudnnStatus_t err;
...@@ -46,14 +58,21 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -46,14 +58,21 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
int w[3]; int w[3];
int p[3]; int p[3];
int s[3]; int s[3];
int ndims; int ndims = PyArray_DIM(ws, 0);//PyGpuArray_NDIM(img) - 2;
for(int i = 0; i < ndims; i++) {
w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i));
}
for(int i = 0; i < ndims; i++) {
p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i));
}
for(int i = 0; i < ndims; i++) {
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
}
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
err = cudnnGetPoolingNdDescriptor(desc, 3, &mode, &ndims, w, p, s);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
"error doing cudnnGetPoolingDescriptor operation: %s",
cudnnGetErrorString(err));
return 1;
} }
dims[0] = PyGpuArray_DIM(img, 0); dims[0] = PyGpuArray_DIM(img, 0);
...@@ -98,7 +117,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -98,7 +117,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
cuda_wait((*out)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*out)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnPoolingForward( err = cudnnPoolingForward(
APPLY_SPECIFIC(_handle), desc, APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(pool),
alpha, alpha,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(img), APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(img),
beta, beta,
......
...@@ -4,6 +4,7 @@ cudnnTensorDescriptor_t APPLY_SPECIFIC(input); ...@@ -4,6 +4,7 @@ cudnnTensorDescriptor_t APPLY_SPECIFIC(input);
cudnnTensorDescriptor_t APPLY_SPECIFIC(input_grad); cudnnTensorDescriptor_t APPLY_SPECIFIC(input_grad);
cudnnTensorDescriptor_t APPLY_SPECIFIC(output); cudnnTensorDescriptor_t APPLY_SPECIFIC(output);
cudnnTensorDescriptor_t APPLY_SPECIFIC(output_grad); cudnnTensorDescriptor_t APPLY_SPECIFIC(output_grad);
cudnnPoolingDescriptor_t APPLY_SPECIFIC(pool);
#section init_code_struct #section init_code_struct
...@@ -11,6 +12,7 @@ APPLY_SPECIFIC(input) = NULL; ...@@ -11,6 +12,7 @@ APPLY_SPECIFIC(input) = NULL;
APPLY_SPECIFIC(input_grad) = NULL; APPLY_SPECIFIC(input_grad) = NULL;
APPLY_SPECIFIC(output) = NULL; APPLY_SPECIFIC(output) = NULL;
APPLY_SPECIFIC(output_grad) = NULL; APPLY_SPECIFIC(output_grad) = NULL;
APPLY_SPECIFIC(pool) = NULL;
{ {
cudnnStatus_t err; cudnnStatus_t err;
...@@ -38,6 +40,11 @@ APPLY_SPECIFIC(output_grad) = NULL; ...@@ -38,6 +40,11 @@ APPLY_SPECIFIC(output_grad) = NULL;
cudnnGetErrorString(err)); cudnnGetErrorString(err));
FAIL; FAIL;
} }
if ((err = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor"
"(pool): %s", cudnnGetErrorString(err));
FAIL;
}
} }
#section cleanup_code_struct #section cleanup_code_struct
...@@ -46,13 +53,16 @@ if (APPLY_SPECIFIC(input) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC ...@@ -46,13 +53,16 @@ if (APPLY_SPECIFIC(input) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC
if (APPLY_SPECIFIC(input_grad) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input_grad)); } if (APPLY_SPECIFIC(input_grad) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input_grad)); }
if (APPLY_SPECIFIC(output) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output)); } if (APPLY_SPECIFIC(output) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output)); }
if (APPLY_SPECIFIC(output_grad) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output_grad)); } if (APPLY_SPECIFIC(output_grad) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output_grad)); }
if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC(pool)); }
#section support_code_struct #section support_code_struct
int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
PyGpuArrayObject *out, PyGpuArrayObject *out,
PyGpuArrayObject *out_grad, PyGpuArrayObject *out_grad,
cudnnPoolingDescriptor_t desc, PyArrayObject *ws,
PyArrayObject *stride,
PyArrayObject *pad,
PyGpuArrayObject **inp_grad, PyGpuArrayObject **inp_grad,
PyGpuContextObject *c) { PyGpuContextObject *c) {
cudnnStatus_t err; cudnnStatus_t err;
...@@ -85,6 +95,26 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, ...@@ -85,6 +95,26 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
return 1; return 1;
} }
int w[3];
int p[3];
int s[3];
int ndims = PyArray_DIM(ws, 0);//PyGpuArray_NDIM(img) - 2;
for(int i = 0; i < ndims; i++) {
w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i));
}
for(int i = 0; i < ndims; i++) {
p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i));
}
for(int i = 0; i < ndims; i++) {
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
}
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
}
if (c_set_tensorNd(*inp_grad, APPLY_SPECIFIC(input_grad)) != 0) if (c_set_tensorNd(*inp_grad, APPLY_SPECIFIC(input_grad)) != 0)
return 1; return 1;
...@@ -118,7 +148,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, ...@@ -118,7 +148,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
cuda_wait((*inp_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*inp_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnPoolingBackward( err = cudnnPoolingBackward(
APPLY_SPECIFIC(_handle), desc, APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(pool),
alpha, alpha,
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(out), APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(out),
APPLY_SPECIFIC(output_grad), PyGpuArray_DEV_DATA(out_grad), APPLY_SPECIFIC(output_grad), PyGpuArray_DEV_DATA(out_grad),
......
...@@ -339,6 +339,7 @@ def test_dnn_tag(): ...@@ -339,6 +339,7 @@ def test_dnn_tag():
class TestDnnInferShapes(utt.InferShapeTester): class TestDnnInferShapes(utt.InferShapeTester):
def setUp(self): def setUp(self):
super(TestDnnInferShapes, self).setUp() super(TestDnnInferShapes, self).setUp()
self.mode = mode_with_gpu self.mode = mode_with_gpu
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论