提交 268bc917 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron 提交者: --global

Extend the current pooling op to support 3d pooling.

This does not have any tests or optimizations for 3d pooling, but the 2d support still works at least.
上级 525c9c84
...@@ -197,6 +197,85 @@ cudnnConvolutionBackwardData_v2( ...@@ -197,6 +197,85 @@ cudnnConvolutionBackwardData_v2(
#define cudnnConvolutionBackwardData cudnnConvolutionBackwardData_v2 #define cudnnConvolutionBackwardData cudnnConvolutionBackwardData_v2
static inline cudnnStatus_t
cudnnSetPoolingNdDescriptor(
cudnnPoolingDescriptor_t poolingDesc,
const cudnnPoolingMode_t mode,
int nbDims,
const int windowDimA[],
const int paddingA[],
const in strideA[]) {
if (nbDims != 2) return CUDNN_STATUS_NOT_SUPPORTED;
if (paddingA[0] != 0 || paddingA[1] != 0) return CUDNN_STATUS_NOT_SUPPORTED;
return cudnnSetPoolingDescriptor(poolingDesc, mode,
windowDimA[0], windowDimA[1],
strideA[0], strideA[1]);
}
static inline cudnnStatus_t
cudnnGetPoolingNdDescriptor(
const cudnnPoolingDescriptor_t poolingDesc,
const int nbDimsRequested,
cudnnPoolingMode_t *mode,
int *nbDims,
int windowA[],
int paddingA[],
int strideA[]) {
int win0, win1, str0, str1;
cudnnStatus_t err;
if (ndDimsRequested < 2) return CUDNN_STATUS_NOT_SUPPORTED;
err = cudnnGetPoolingDescriptor(poolingDesc, mode, &win0, &win1,
&str0, &str1);
if (err != CUDNN_STATUS_SUCCESS) return err;
*nbDims = 2;
paddingA[0] = 0;
paddingA[1] = 0;
windowA[0] = win0;
windowA[1] = win1;
strideA[0] = str0;
strideA[1] = str1;
return CUDNN_STATUS_SUCCESS;
}
static inline cudnnStatus_t
cudnnPoolingForward_v2(
cudnnHandle_t handle,
const cudnnPoolingDescriptor_t poolingDesc,
const void *alpha,
const cudnnTensorDescriptor_t srcDesc,
const void *srcData,
const void *beta,
const cudnnTensorDescriptor_t destDesc,
void *destData) {
if (*(float*)alpha != 1.0 || *(float *)beta != 0.0) return CUDNN_STATUS_NOT_SUPPORTED;
return cudnnPoolingForward(handle, poolingDesc, srcDesc, srcData,
destDesc, destData);
}
#define cudnnPoolingForward cudnnPoolingForward_v2
static inline cudnnStatus_t
cudnnPoolingBackward_v2(
cudnnHandle_t handle,
const cudnnPoolingDescriptor_t poolingDesc,
const void *alpha,
const cudnnTensorDescriptor_t srcDesc,
const void *srcData,
const cudnnTensorDescriptor_t srcDiffDesc,
const void *srcDiffData,
const cudnnTensorDescriptor_t destDesc,
const void *destData,
const void *beta,
const cudnnTensorDescriptor_t destDiffDesc,
void *destDiffData) {
if (*(float*)alpha != 1.0 || *(float *)beta != 0.0) return CUDNN_STATUS_NOT_SUPPORTED;
return cudnnPoolingBackward(handle, poolingDesc,
srcDesc, srcData,
srcDiffDesc, srcDiffData,
destDesc, destData,
destDiffDesc, destDiffData);
}
#define cudnnPoolingBackward cudnnPoolingBackward_v2
//Needed for R2 rc2 //Needed for R2 rc2
# define CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING CUDNN_POOLING_AVERAGE # define CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING CUDNN_POOLING_AVERAGE
#else #else
......
...@@ -1266,7 +1266,7 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -1266,7 +1266,7 @@ class GpuDnnPoolDesc(GpuOp):
padX is the size of the left and right borders, padX is the size of the left and right borders,
padY is the size of the top and bottom borders. padY is the size of the top and bottom borders.
""" """
__props__ = ('ws', 'stride', 'mode', 'pad') __props__ = ('ws', 'stride', 'mode', 'pad', 'nd')
def c_headers(self): def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h'] return ['cudnn.h', 'cudnn_helper.h']
...@@ -1283,19 +1283,23 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -1283,19 +1283,23 @@ class GpuDnnPoolDesc(GpuOp):
def do_constant_folding(self, node): def do_constant_folding(self, node):
return False return False
def __init__(self, ws=(1, 1), stride=(1, 1), mode='max', pad=(0, 0)): def __init__(self, ws=(1, 1), stride=(1, 1), mode='max', pad=(0, 0), nd=2):
if mode == 'average': if mode == 'average':
mode = 'average_inc_pad' mode = 'average_inc_pad'
assert mode in ('max', 'average_inc_pad', 'average_exc_pad') assert mode in ('max', 'average_inc_pad', 'average_exc_pad')
self.mode = mode self.mode = mode
assert len(ws) == 2 assert len(ws) == nd
self.ws = ws self.ws = ws
assert len(stride) == 2 assert len(stride) == nd
self.stride = stride self.stride = stride
assert len(stride) == 2 assert len(stride) == nd
self.pad = pad self.pad = pad
if (pad[0] != 0 or pad[1] != 0) and version() == -1: if (pad[0] != 0 or pad[1] != 0) and version() == -1:
raise RuntimeError("CuDNN pooling with padding requires CuDNN v2") raise RuntimeError("CuDNN pooling with padding requires CuDNN v2")
assert nd in (2, 3)
if nd == 3 and version() < (3000, 3000):
raise RuntimeError("3 pooling only supported on CuDNN v3")
self.nd = nd
def __setstate__(self, d): def __setstate__(self, d):
self.__dict__.update(d) self.__dict__.update(d)
...@@ -1333,22 +1337,14 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -1333,22 +1337,14 @@ class GpuDnnPoolDesc(GpuOp):
"descriptor: %%s", cudnnGetErrorString(err)); "descriptor: %%s", cudnnGetErrorString(err));
%(fail)s %(fail)s
} }
#ifndef CUDNN_VERSION {
err = cudnnSetPoolingDescriptor( int win[%(nd)d] = {%(win)s};
%(desc)s, int pad[%(nd)d] = {%(pad)s};
%(mode_flag)s, int str[%(nd)d] = {%(str)s};
%(wsX)d, %(wsY)d, err = cudnnSetPoolingNdDescriptor(
%(stridex)d, %(stridey)d %(desc)s, %(mode_flag)s, %(nd)d,
); win, pad, str);
#else }
err = cudnnSetPooling2dDescriptor(
%(desc)s,
%(mode_flag)s,
%(wsX)d, %(wsY)d,
%(padX)d, %(padY)d,
%(stridex)d, %(stridey)d
);
#endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s", PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
...@@ -1356,46 +1352,49 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -1356,46 +1352,49 @@ class GpuDnnPoolDesc(GpuOp):
} }
} }
""" % dict(name=name, desc=desc, mode_flag=mode_flag, fail=sub['fail'], """ % dict(name=name, desc=desc, mode_flag=mode_flag, fail=sub['fail'],
wsX=self.ws[0], wsY=self.ws[1], nd=self.nd, win=', '.join(str(w) for w in self.ws),
stridex=self.stride[0], stridey=self.stride[1], pad=', '.join(str(p) for p in self.pad),
padX=self.pad[0], padY=self.pad[1]) str=', '.join(str(s) for s in self.stride))
def c_code_cache_version(self): def c_code_cache_version(self):
return (2, version()) return (3, version())
class GpuDnnPool(DnnBase): class GpuDnnPool(DnnBase):
""" """
Pooling. Pooling.
:param img: the image 4d tensor. :param img: the image 4d or 5d tensor.
:param desc: the pooling descriptor. :param desc: the pooling descriptor.
""" """
__props__ = () __props__ = ()
def make_node(self, img, desc): def make_node(self, img, desc):
img = as_cuda_ndarray_variable(img) img = as_cuda_ndarray_variable(img)
if img.type.ndim != 4:
raise TypeError('img must be 4D tensor')
if not isinstance(desc.type, CDataType) \ if not isinstance(desc.type, CDataType) \
or desc.type.ctype != 'cudnnPoolingDescriptor_t': or desc.type.ctype != 'cudnnPoolingDescriptor_t':
raise TypeError('desc must be cudnnPoolingDescriptor_t') raise TypeError('desc must be cudnnPoolingDescriptor_t')
return Apply(self, [img, desc], dop = desc.owner.op
[img.type()]) e_ndim = dop.nd + 2 # 4 or 5
if img.type.ndim != e_ndim:
raise TypeError('img must be %dD tensor' % e_ndim)
return Apply(self, [img, desc], [img.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
desc = node.inputs[1].owner.op desc = node.inputs[1].owner.op
kh, kw = desc.ws nd = desc.nd
sh, sw = desc.stride w = desc.ws
padh, padw = desc.pad s = desc.stride
return [( p = desc.pad
shape[0][0], ret = [shape[0][0], shape[0][1],
shape[0][1], (shape[0][2] + 2 * p[0] - w[0]) // s[0] + 1,
(shape[0][2] + 2*padh - kh)//sh + 1, (shape[0][3] + 2 * p[1] - w[1]) // s[1] + 1]
(shape[0][3] + 2*padw - kw)//sw + 1 if nd == 3:
)] ret.append((shape[0][4] + 2 * p[2] - w[2]) // s[2] + 1)
return [ret]
def c_support_code_struct(self, node, name): def c_support_code_struct(self, node, name):
return """ return """
...@@ -1409,12 +1408,12 @@ cudnnStatus_t err%(name)s; ...@@ -1409,12 +1408,12 @@ cudnnStatus_t err%(name)s;
input%(name)s = NULL; input%(name)s = NULL;
output%(name)s = NULL; output%(name)s = NULL;
if ((err%(name)s = cudnnCreateTensorDescriptor(&input%(name)s)) != CUDNN_STATUS_SUCCESS) { if ((err%(name)s = cudnnCreateTensorDescriptor(&input%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor " PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
"(inp): %%s", cudnnGetErrorString(err%(name)s)); "(inp): %%s", cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
if ((err%(name)s = cudnnCreateTensorDescriptor(&output%(name)s)) != CUDNN_STATUS_SUCCESS) { if ((err%(name)s = cudnnCreateTensorDescriptor(&output%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor " PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
"(out): %%s", cudnnGetErrorString(err%(name)s)); "(out): %%s", cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
...@@ -1430,65 +1429,51 @@ if (output%(name)s != NULL) { cudnnDestroyTensorDescriptor(output%(name)s); } ...@@ -1430,65 +1429,51 @@ if (output%(name)s != NULL) { cudnnDestroyTensorDescriptor(output%(name)s); }
desc = inputs[1] desc = inputs[1]
out, = outputs out, = outputs
set_in = c_set_tensor4d(inputs[0], "input" + str(name),
'err' + name, sub['fail'])
set_out = c_set_tensor4d(out, "output" + str(name),
'err' + name, sub['fail'])
return """ return """
cudnnStatus_t err%(name)s; cudnnStatus_t err%(name)s;
int %(out)s_dims[4]; int %(out)s_dims[5];
if (!CudaNdarray_is_c_contiguous(%(input)s)) { if (!CudaNdarray_is_c_contiguous(%(input)s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported."); PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
%(fail)s %(fail)s
} }
%(set_in)s if (c_set_tensorNd(%(input)s, %(input)s->nd, %(input_desc)s) != 0)
%(fail)s
cudnnPoolingMode_t mode; cudnnPoolingMode_t mode;
int wsX, wsY, vpad, hpad, strideX, strideY; int win[3];
#ifndef CUDNN_VERSION int pad[3];
err%(name)s = cudnnGetPoolingDescriptor( int str[3];
%(desc)s, &mode, int ndims;
&wsX, &wsY, err%(name)s = cudnnGetPoolingNdDescriptor(
&strideX, &strideY); %(desc)s, 3,
#else &mode, &ndims,
err%(name)s = cudnnGetPooling2dDescriptor( win, pad, str);
%(desc)s, &mode,
&wsX, &wsY,
&vpad, &hpad,
&strideX, &strideY);
#endif
if (err%(name)s != CUDNN_STATUS_SUCCESS) { if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"GpuDnnPool: error doing cudnnGetPoolingDescriptor operation: %%s", "GpuDnnPool: error doing cudnnGetPoolingNdDescriptor operation: %%s",
cudnnGetErrorString(err%(name)s)); cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
%(out)s_dims[0] = CudaNdarray_HOST_DIMS(%(input)s)[0]; %(out)s_dims[0] = CudaNdarray_HOST_DIMS(%(input)s)[0];
%(out)s_dims[1] = CudaNdarray_HOST_DIMS(%(input)s)[1]; %(out)s_dims[1] = CudaNdarray_HOST_DIMS(%(input)s)[1];
%(out)s_dims[2] = (CudaNdarray_HOST_DIMS(%(input)s)[2] + (vpad*2) - wsX) / strideX + 1; %(out)s_dims[2] = (CudaNdarray_HOST_DIMS(%(input)s)[2] + (pad[0]*2) - win[0]) / str[0] + 1;
%(out)s_dims[3] = (CudaNdarray_HOST_DIMS(%(input)s)[3] + (hpad*2) - wsY) / strideY + 1; %(out)s_dims[3] = (CudaNdarray_HOST_DIMS(%(input)s)[3] + (pad[1]*2) - win[1]) / str[1] + 1;
if (ndims == 3)
%(out)s_dims[4] = (CudaNdarray_HOST_DIMS(%(input)s)[4] + (pad[2]*2) - win[2]) / str[2] + 1;
if (CudaNdarray_prep_output(&%(out)s, 4, %(out)s_dims) != 0) if (CudaNdarray_prep_output(&%(out)s, ndims+2, %(out)s_dims) != 0)
{ {
%(fail)s %(fail)s
} }
%(set_out)s if (c_set_tensorNd(%(out)s, %(out)s->nd, %(output_desc)s) != 0)
#ifndef CUDNN_VERSION %(fail)s
err%(name)s = cudnnPoolingForward(
_handle,
%(desc)s,
%(input_desc)s, CudaNdarray_DEV_DATA(%(input)s),
%(output_desc)s, CudaNdarray_DEV_DATA(%(out)s)
);
#else
{ {
const float alpha = 1; const float alpha = 1;
const float beta = 0; const float beta = 0;
...@@ -1501,7 +1486,6 @@ _handle, ...@@ -1501,7 +1486,6 @@ _handle,
%(output_desc)s, CudaNdarray_DEV_DATA(%(out)s) %(output_desc)s, CudaNdarray_DEV_DATA(%(out)s)
); );
} }
#endif
if (err%(name)s != CUDNN_STATUS_SUCCESS) { if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"GpuDnnPool: error doing cudnnPoolingForward operation: %%s", "GpuDnnPool: error doing cudnnPoolingForward operation: %%s",
...@@ -1509,8 +1493,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -1509,8 +1493,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
%(fail)s %(fail)s
} }
""" % dict(out=out, desc=desc, fail=sub['fail'], """ % dict(out=out, desc=desc, fail=sub['fail'],
name=name, set_in=set_in, name=name, input=inputs[0],
set_out=set_out, input=inputs[0],
input_desc="input"+name, input_desc="input"+name,
output_desc="output"+name) output_desc="output"+name)
...@@ -1531,7 +1514,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -1531,7 +1514,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return [[1], [0]] return [[1], [0]]
def c_code_cache_version(self): def c_code_cache_version(self):
return (6, version()) return (7, version())
class GpuDnnPoolGrad(DnnBase): class GpuDnnPoolGrad(DnnBase):
...@@ -1546,21 +1529,23 @@ class GpuDnnPoolGrad(DnnBase): ...@@ -1546,21 +1529,23 @@ class GpuDnnPoolGrad(DnnBase):
__props__ = () __props__ = ()
def make_node(self, inp, out, inp_grad, desc): def make_node(self, inp, out, inp_grad, desc):
if not isinstance(desc.type, CDataType) \
or desc.type.ctype != 'cudnnPoolingDescriptor_t':
raise TypeError('desc must be cudnnPoolingDescriptor_t')
nd = desc.owner.op.nd + 2 # 4 or 5
inp = as_cuda_ndarray_variable(inp) inp = as_cuda_ndarray_variable(inp)
if inp.type.ndim != 4: if inp.type.ndim != nd:
raise TypeError('inp must be 4D tensor') raise TypeError('inp must be %dD tensor' % (nd,))
inp_grad = as_cuda_ndarray_variable(inp_grad) inp_grad = as_cuda_ndarray_variable(inp_grad)
if inp_grad.type.ndim != 4: if inp_grad.type.ndim != nd:
raise TypeError('inp_grad must be 4D tensor') raise TypeError('inp_grad must be %dD tensor' % (nd,))
out = as_cuda_ndarray_variable(out) out = as_cuda_ndarray_variable(out)
if out.type.ndim != 4: if out.type.ndim != nd:
raise TypeError('out must be 4D tensor') raise TypeError('out must be %dD tensor' % (nd,))
if not isinstance(desc.type, CDataType) \
or desc.type.ctype != 'cudnnPoolingDescriptor_t':
raise TypeError('desc must be cudnnPoolingDescriptor_t')
return Apply(self, [inp, out, inp_grad, desc], return Apply(self, [inp, out, inp_grad, desc],
[inp.type()]) [inp.type()])
...@@ -1621,18 +1606,6 @@ if (output_grad%(name)s != NULL) { cudnnDestroyTensorDescriptor(output_grad%(nam ...@@ -1621,18 +1606,6 @@ if (output_grad%(name)s != NULL) { cudnnDestroyTensorDescriptor(output_grad%(nam
out, inp, inp_grad, desc = inputs out, inp, inp_grad, desc = inputs
out_grad, = outputs out_grad, = outputs
set_in = "\n".join([
c_set_tensor4d(inp, "input" + name,
'err' + name, sub['fail']),
c_set_tensor4d(inp_grad, "input_grad" + name,
'err' + name, sub['fail']),
c_set_tensor4d(out, "output" + name,
'err' + name, sub['fail'])
])
set_out = c_set_tensor4d(out, "output_grad" + name,
'err' + name, sub['fail'])
return """ return """
cudnnStatus_t err%(name)s; cudnnStatus_t err%(name)s;
...@@ -1654,25 +1627,23 @@ if (!CudaNdarray_is_c_contiguous(%(output)s)) { ...@@ -1654,25 +1627,23 @@ if (!CudaNdarray_is_c_contiguous(%(output)s)) {
%(fail)s %(fail)s
} }
%(set_in)s if (c_set_tensorNd(%(input)s, %(input)s->nd, %(input_desc)s) != 0)
%(fail)s
if (c_set_tensorNd(%(input_grad)s, %(input_grad)s->nd, %(input_grad_desc)s) != 0)
%(fail)s
if (c_set_tensorNd(%(output)s, %(output)s->nd, %(output_desc)s) != 0)
%(fail)s
if (CudaNdarray_prep_output(&%(output_grad)s, 4, if (CudaNdarray_prep_output(&%(output_grad)s,
%(output)s->nd,
CudaNdarray_HOST_DIMS(%(output)s)) != 0) CudaNdarray_HOST_DIMS(%(output)s)) != 0)
{ {
%(fail)s %(fail)s
} }
%(set_out)s if (c_set_tensorNd(%(output_grad)s, %(output_grad)s->nd, %(output_grad_desc)s) != 0)
#ifndef CUDNN_VERSION %(fail)s
err%(name)s = cudnnPoolingBackward(
_handle,
%(desc)s,
%(input_desc)s, CudaNdarray_DEV_DATA(%(input)s),
%(input_grad_desc)s, CudaNdarray_DEV_DATA(%(input_grad)s),
%(output_desc)s, CudaNdarray_DEV_DATA(%(output)s),
%(output_grad_desc)s, CudaNdarray_DEV_DATA(%(output_grad)s)
);
#else
{ {
const float alpha = 1; const float alpha = 1;
const float beta = 0; const float beta = 0;
...@@ -1687,51 +1658,28 @@ _handle, ...@@ -1687,51 +1658,28 @@ _handle,
%(output_grad_desc)s, CudaNdarray_DEV_DATA(%(output_grad)s) %(output_grad_desc)s, CudaNdarray_DEV_DATA(%(output_grad)s)
); );
} }
#endif
if (err%(name)s != CUDNN_STATUS_SUCCESS) { if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"GpuDnnPoolGrad: error doing operation: %%s. " "GpuDnnPoolGrad: error doing operation: %%s.",
"input.shape=(%%d, %%d, %%d, %%d) " cudnnGetErrorString(err%(name)s));
"input_grad.shape=(%%d, %%d, %%d, %%d) " %(fail)s
"output.shape=(%%d, %%d, %%d, %%d) "
"output_grad.shape=(%%d, %%d, %%d, %%d)",
cudnnGetErrorString(err%(name)s),
CudaNdarray_HOST_DIMS(%(input)s)[0],
CudaNdarray_HOST_DIMS(%(input)s)[1],
CudaNdarray_HOST_DIMS(%(input)s)[2],
CudaNdarray_HOST_DIMS(%(input)s)[3],
CudaNdarray_HOST_DIMS(%(input_grad)s)[0],
CudaNdarray_HOST_DIMS(%(input_grad)s)[1],
CudaNdarray_HOST_DIMS(%(input_grad)s)[2],
CudaNdarray_HOST_DIMS(%(input_grad)s)[3],
CudaNdarray_HOST_DIMS(%(output)s)[0],
CudaNdarray_HOST_DIMS(%(output)s)[1],
CudaNdarray_HOST_DIMS(%(output)s)[2],
CudaNdarray_HOST_DIMS(%(output)s)[3],
CudaNdarray_HOST_DIMS(%(output_grad)s)[0],
CudaNdarray_HOST_DIMS(%(output_grad)s)[1],
CudaNdarray_HOST_DIMS(%(output_grad)s)[2],
CudaNdarray_HOST_DIMS(%(output_grad)s)[3]
);
%(fail)s
} }
""" % dict(output_grad=out_grad, desc=desc, """ % dict(output_grad=out_grad, desc=desc,
fail=sub['fail'], fail=sub['fail'], name=name,
name=name, set_in=set_in, input=inp, input_grad=inp_grad, output=out,
set_out=set_out, input=inp, input_grad=inp_grad, output=out,
input_desc="input"+name, input_desc="input"+name,
input_grad_desc="input_grad"+name, input_grad_desc="input_grad"+name,
output_desc="output"+name, output_desc="output"+name,
output_grad_desc="output_grad"+name) output_grad_desc="output_grad"+name)
def c_code_cache_version(self): def c_code_cache_version(self):
return (5, version()) return (6, version())
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[0]] return [shape[0]]
def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0), nd=2):
""" """
GPU pooling using cuDNN from NVIDIA. GPU pooling using cuDNN from NVIDIA.
...@@ -1746,6 +1694,9 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -1746,6 +1694,9 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
:param pad: (padX, padY) padding information. :param pad: (padX, padY) padding information.
padX is the size of the left and right borders, padX is the size of the left and right borders,
padY is the size of the top and bottom borders. padY is the size of the top and bottom borders.
:param nd: dimensions of pooling, can be 2 or 3 for 2d or 3d pooling
If set to 3 all other params (except mode) must have an extra
dimension to match. 3 is only available for cudnn v3
:warning: The cuDNN library only works with GPU that have a compute :warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not capability of 3.0 or higer. This means that older GPU will not
...@@ -1753,7 +1704,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -1753,7 +1704,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
:note: This Op implements the ignore_border=True of max_pool_2d. :note: This Op implements the ignore_border=True of max_pool_2d.
""" """
img = gpu_contiguous(img) img = gpu_contiguous(img)
desc = GpuDnnPoolDesc(ws=ws, stride=stride, mode=mode, pad=pad)() desc = GpuDnnPoolDesc(ws=ws, stride=stride, mode=mode, pad=pad, nd=nd)()
return GpuDnnPool()(img, desc) return GpuDnnPool()(img, desc)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论