提交 7ddf071c authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #4256 from harmdevries89/gpupool_newbackend

Gpupool newbackend
...@@ -364,28 +364,32 @@ def test_pooling_with_tensor_vars(): ...@@ -364,28 +364,32 @@ def test_pooling_with_tensor_vars():
cast_to_output_type=False, cast_to_output_type=False,
mode=mode_with_gpu) mode=mode_with_gpu)
out2 = pool_2d_i2n(x, ds=(2, 2), strides=(1, 1),
pad=(0, 0),
pool_function=T.max)
mode_without_gpu2 = mode_without_gpu.including() mode_without_gpu2 = mode_without_gpu.including()
mode_without_gpu2.check_isfinite = False mode_without_gpu2.check_isfinite = False
f1 = theano.function([x], fn(x), mode=mode_with_gpu) f_gpu = theano.function([x], fn(x), mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool) assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes]) for node in f_gpu.maker.fgraph.apply_nodes])
f2 = theano.function([x], out2, mode=mode_without_gpu2)
assert not any([isinstance(node.op, cuda.dnn.GpuDnnPool) i = 1
for node in f2.maker.fgraph.apply_nodes])
for shp in [(1, 10, 100, 100), for shp in [(1, 10, 100, 100),
(1, 3, 99, 99), (1, 3, 99, 99),
(32, 1, 147, 197), (32, 1, 147, 197)]:
]:
data = numpy.random.normal(0, 1, shp).astype("float32") data = numpy.random.normal(0, 1, shp).astype("float32")
a = f1(data).__array__() out = pool_2d_i2n(x, ds=(i, i), strides=(1, 1),
pad=(0, 0),
b = f2(data).__array__() pool_function=T.max)
f_cpu = theano.function([x], out, mode=mode_without_gpu2)
assert not any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f_cpu.maker.fgraph.apply_nodes])
# Change the window size dynamically for gpu op
ws.set_value(numpy.array([i, i]).astype('int32'))
a = f_gpu(data).__array__()
b = f_cpu(data).__array__()
utt.assert_allclose(a, b) utt.assert_allclose(a, b)
i += 1
def test_old_pool_interface(): def test_old_pool_interface():
...@@ -745,6 +749,7 @@ def test_dnn_tag(): ...@@ -745,6 +749,7 @@ def test_dnn_tag():
class TestDnnInferShapes(utt.InferShapeTester): class TestDnnInferShapes(utt.InferShapeTester):
def setUp(self): def setUp(self):
super(TestDnnInferShapes, self).setUp() super(TestDnnInferShapes, self).setUp()
self.mode = mode_with_gpu self.mode = mode_with_gpu
......
...@@ -142,6 +142,7 @@ dnn_available.msg = None ...@@ -142,6 +142,7 @@ dnn_available.msg = None
class DnnBase(COp): class DnnBase(COp):
""" """
Creates a handle for cudnn and pulls in the cudnn libraries and headers. Creates a handle for cudnn and pulls in the cudnn libraries and headers.
...@@ -255,6 +256,7 @@ version.v = None ...@@ -255,6 +256,7 @@ version.v = None
class GpuDnnConvDesc(COp): class GpuDnnConvDesc(COp):
""" """
This Op builds a convolution descriptor for use in the other convolution This Op builds a convolution descriptor for use in the other convolution
operations. operations.
...@@ -388,6 +390,7 @@ def ensure_dt(val, default, name, dtype): ...@@ -388,6 +390,7 @@ def ensure_dt(val, default, name, dtype):
class GpuDnnConv(DnnBase): class GpuDnnConv(DnnBase):
""" """
The forward convolution. The forward convolution.
...@@ -555,6 +558,7 @@ class GpuDnnConv(DnnBase): ...@@ -555,6 +558,7 @@ class GpuDnnConv(DnnBase):
class GpuDnnConvGradW(DnnBase): class GpuDnnConvGradW(DnnBase):
""" """
The convolution gradient with respect to the weights. The convolution gradient with respect to the weights.
...@@ -675,6 +679,7 @@ class GpuDnnConvGradW(DnnBase): ...@@ -675,6 +679,7 @@ class GpuDnnConvGradW(DnnBase):
class GpuDnnConvGradI(DnnBase): class GpuDnnConvGradI(DnnBase):
""" """
The convolution gradient with respect to the inputs. The convolution gradient with respect to the inputs.
...@@ -943,6 +948,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid', ...@@ -943,6 +948,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
class GpuDnnPoolDesc(Op): class GpuDnnPoolDesc(Op):
""" """
This Op builds a pooling descriptor for use in the other This Op builds a pooling descriptor for use in the other
pooling operations. pooling operations.
...@@ -1061,69 +1067,87 @@ class GpuDnnPoolDesc(Op): ...@@ -1061,69 +1067,87 @@ class GpuDnnPoolDesc(Op):
class GpuDnnPool(DnnBase): class GpuDnnPool(DnnBase):
"""
Pooling.
"""
Parameters Parameters
---------- ----------
img img
The image 4d tensor. The image 4d or 5d tensor.
desc Parameters
The pooling descriptor. ----------
ws : tensor variable
Window size.
stride : tensor variable
(dx, dy) or (dx, dy, dz).
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
The old deprecated name 'average' corresponds to 'average_inc_pad'.
pad : tensor
(padX, padY) or (padX, padY, padZ)
""" """
__props__ = () __props__ = ('mode',)
def __init__(self): def __init__(self, mode='max'):
DnnBase.__init__(self, ["dnn_pool.c"], "APPLY_SPECIFIC(dnn_pool)") DnnBase.__init__(self, ["dnn_pool.c"], "APPLY_SPECIFIC(dnn_pool)")
if mode == 'average':
mode = 'average_inc_pad'
assert mode in ('max', 'average_inc_pad', 'average_exc_pad')
self.mode = mode
def make_node(self, img, desc): def get_op_params(self):
img = as_gpuarray_variable(img, infer_context_name(img)) if self.mode == 'max':
mode_flag = 'CUDNN_POOLING_MAX'
elif self.mode == "average_inc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif self.mode == "average_exc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if desc.owner is not None: return [('MODE_FLAG', mode_flag)]
e_ndim = desc.owner.op.get_ndim() + 2
if img.type.ndim != e_ndim: def make_node(self, img, ws, stride, pad):
raise TypeError('img must be %dD tensor' % (e_ndim,)) ctx_name = infer_context_name(img)
img = as_gpuarray_variable(img, ctx_name)
if (not isinstance(desc.type, CDataType) or ws = tensor.as_tensor_variable(ws)
desc.type.ctype != 'cudnnPoolingDescriptor_t'): stride = tensor.as_tensor_variable(stride)
raise TypeError('desc must be cudnnPoolingDescriptor_t') pad = tensor.as_tensor_variable(pad)
assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
assert ws.type.ndim == 1
return Apply(self, [img, desc], [img.type()]) return Apply(self, [img, ws, stride, pad], [img.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
desc = node.inputs[1].owner.op w = node.inputs[1]
w = desc.ws s = node.inputs[2]
s = desc.stride p = node.inputs[3]
p = desc.pad
res = [shape[0][0], shape[0][1], res = [shape[0][0], shape[0][1],
(shape[0][2] + 2 * p[0] - w[0]) // s[0] + 1, (shape[0][2] + 2 * p[0] - w[0]) // s[0] + 1,
(shape[0][3] + 2 * p[1] - w[1]) // s[1] + 1 (shape[0][3] + 2 * p[1] - w[1]) // s[1] + 1
] ]
if len(w) > 2: if node.inputs[0].ndim == 5:
res.append((shape[0][4] + 2 * p[2] - w[2]) // s[2] + 1) res.append((shape[0][4] + 2 * p[2] - w[2]) // s[2] + 1)
return [res] return [res]
def grad(self, inp, grads): def grad(self, inp, grads):
img, desc = inp img, ws, stride, pad = inp
grad, = grads grad, = grads
grad = gpu_contiguous(grad) grad = gpu_contiguous(grad)
out = self(img, desc) out = self(img, ws, stride, pad)
g_out = GpuDnnPoolGrad()(img, out, grad, desc) g_out = GpuDnnPoolGrad(mode=self.mode)(img, out, grad, ws, stride, pad)
return g_out, theano.gradient.DisconnectedType()() return g_out, theano.gradient.DisconnectedType()(), theano.gradient.DisconnectedType()(), theano.gradient.DisconnectedType()()
def connection_pattern(self, node): def connection_pattern(self, node):
# not connected to desc # not connected to parameters
return [[1], [0]] return [[1], [0], [0], [0]]
class GpuDnnPoolGrad(DnnBase): class GpuDnnPoolGrad(DnnBase):
""" """
The pooling gradient. The pooling gradient.
...@@ -1135,40 +1159,56 @@ class GpuDnnPoolGrad(DnnBase): ...@@ -1135,40 +1159,56 @@ class GpuDnnPoolGrad(DnnBase):
The output of the pooling in the forward. The output of the pooling in the forward.
out_grad out_grad
Same size as out, but is the corresponding gradient information. Same size as out, but is the corresponding gradient information.
desc ws : tensor variable
The pooling descriptor. Window size.
stride : tensor variable
(dx, dy) or (dx, dy, dz).
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
The old deprecated name 'average' corresponds to 'average_inc_pad'.
pad : tensor
(padX, padY) or (padX, padY, padZ)
""" """
__props__ = () __props__ = ('mode',)
def __init__(self): def __init__(self, mode='max'):
DnnBase.__init__(self, ["dnn_pool_grad.c"], DnnBase.__init__(self, ["dnn_pool_grad.c"],
"APPLY_SPECIFIC(dnn_pool_grad)") "APPLY_SPECIFIC(dnn_pool_grad)")
if mode == 'average':
mode = 'average_inc_pad'
assert mode in ('max', 'average_inc_pad', 'average_exc_pad')
self.mode = mode
def get_op_params(self):
if self.mode == 'max':
mode_flag = 'CUDNN_POOLING_MAX'
elif self.mode == "average_inc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif self.mode == "average_exc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
def make_node(self, inp, out, out_grad, desc): return [('MODE_FLAG', mode_flag)]
def make_node(self, inp, out, out_grad, ws, stride, pad):
ctx_name = infer_context_name(inp, out, out_grad) ctx_name = infer_context_name(inp, out, out_grad)
inp = as_gpuarray_variable(inp, ctx_name) inp = as_gpuarray_variable(inp, ctx_name)
assert (inp.ndim in [4, 5])
out_grad = as_gpuarray_variable(out_grad, ctx_name) out_grad = as_gpuarray_variable(out_grad, ctx_name)
assert (out_grad.ndim in [4, 5])
out = as_gpuarray_variable(out, ctx_name) out = as_gpuarray_variable(out, ctx_name)
assert(out.ndim in [4, 5])
if desc.owner is not None: assert (out_grad.ndim == inp.ndim)
nd = desc.owner.op.get_ndim() + 2 assert (inp.ndim == out.ndim)
if inp.type.ndim != nd:
raise TypeError('inp must be %dD tensor' % (nd,))
if out_grad.type.ndim != nd: ws = tensor.as_tensor_variable(ws)
raise TypeError('out_grad must be %dD tensor' % (nd,)) stride = tensor.as_tensor_variable(stride)
pad = tensor.as_tensor_variable(pad)
assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
assert ws.type.ndim == 1
if out.type.ndim != nd: return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
raise TypeError('out must be %dD tensor' % (nd,))
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnPoolingDescriptor_t'):
raise TypeError('desc must be cudnnPoolingDescriptor_t')
return Apply(self, [inp, out, out_grad, desc], [inp.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[0]] return [shape[0]]
...@@ -1206,11 +1246,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -1206,11 +1246,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
""" """
img = gpu_contiguous(img) img = gpu_contiguous(img)
desc = GpuDnnPoolDesc(ws=ws, stride=stride, mode=mode, pad=pad)() return GpuDnnPool(mode=mode)(img, ws, stride, pad)
return GpuDnnPool()(img, desc)
class GpuDnnSoftmaxBase(DnnBase): class GpuDnnSoftmaxBase(DnnBase):
""" """
Op for the cuDNN Softmax. Op for the cuDNN Softmax.
...@@ -1263,6 +1303,7 @@ class GpuDnnSoftmaxBase(DnnBase): ...@@ -1263,6 +1303,7 @@ class GpuDnnSoftmaxBase(DnnBase):
class GpuDnnSoftmax(GpuDnnSoftmaxBase): class GpuDnnSoftmax(GpuDnnSoftmaxBase):
""" """
Op for the cuDNN Softmax. Op for the cuDNN Softmax.
...@@ -1296,6 +1337,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase): ...@@ -1296,6 +1337,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase): class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
""" """
Op for the cuDNN SoftmaxGrad. Op for the cuDNN SoftmaxGrad.
...@@ -1467,11 +1509,12 @@ def local_pool_dnn_grad_stride(node, ctx_name): ...@@ -1467,11 +1509,12 @@ def local_pool_dnn_grad_stride(node, ctx_name):
pad = node.op.padding pad = node.op.padding
mode = node.op.mode mode = node.op.mode
desc = GpuDnnPoolDesc(ws=ds, stride=st, mode=mode, pad=pad)() return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
return GpuDnnPoolGrad()(gpu_contiguous(inp), gpu_contiguous(out),
gpu_contiguous(out), gpu_contiguous(out_grad),
gpu_contiguous(out_grad), ds,
desc) st,
pad)
@register_opt('cudnn') @register_opt('cudnn')
...@@ -1491,11 +1534,10 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name): ...@@ -1491,11 +1534,10 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name):
cg = gpu_contiguous(out_grad) cg = gpu_contiguous(out_grad)
desc = GpuDnnPoolDesc(ws=ds, stride=st, mode=mode, pad=pad)()
# We reuse cg because CuDNN does not use the value of the `out` # We reuse cg because CuDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This # argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know. # has been observed in v2 and v3 as far as I know.
return GpuDnnPoolGrad()(gpu_contiguous(inp), cg, cg, desc) return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), cg, cg, ds, st, pad)
@register_opt('cudnn') @register_opt('cudnn')
...@@ -1548,6 +1590,7 @@ def local_logsoftmax_to_dnn(node, ctx_name): ...@@ -1548,6 +1590,7 @@ def local_logsoftmax_to_dnn(node, ctx_name):
class NoCuDNNRaise(Optimizer): class NoCuDNNRaise(Optimizer):
def apply(self, fgraph): def apply(self, fgraph):
""" """
Raise a error if cudnn can't be used. Raise a error if cudnn can't be used.
......
...@@ -2,12 +2,15 @@ ...@@ -2,12 +2,15 @@
cudnnTensorDescriptor_t APPLY_SPECIFIC(input); cudnnTensorDescriptor_t APPLY_SPECIFIC(input);
cudnnTensorDescriptor_t APPLY_SPECIFIC(output); cudnnTensorDescriptor_t APPLY_SPECIFIC(output);
cudnnPoolingDescriptor_t APPLY_SPECIFIC(pool);
#section init_code_struct #section init_code_struct
cudnnStatus_t APPLY_SPECIFIC(err); cudnnStatus_t APPLY_SPECIFIC(err);
APPLY_SPECIFIC(input) = NULL; APPLY_SPECIFIC(input) = NULL;
APPLY_SPECIFIC(output) = NULL; APPLY_SPECIFIC(output) = NULL;
APPLY_SPECIFIC(pool) = NULL;
if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(input))) != CUDNN_STATUS_SUCCESS) { if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(input))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor " PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
...@@ -19,16 +22,25 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output))) ...@@ -19,16 +22,25 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output)))
"(out): %s", cudnnGetErrorString(APPLY_SPECIFIC(err))); "(out): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL; FAIL;
} }
if ((APPLY_SPECIFIC(err) = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor"
"(pool): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL;
}
#section cleanup_code_struct #section cleanup_code_struct
if (APPLY_SPECIFIC(input) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input)); } if (APPLY_SPECIFIC(input) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input)); }
if (APPLY_SPECIFIC(output) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output)); } if (APPLY_SPECIFIC(output) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output)); }
if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC(pool)); }
#section support_code_struct #section support_code_struct
int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
cudnnPoolingDescriptor_t desc, PyArrayObject *ws,
PyArrayObject *stride,
PyArrayObject *pad,
PyGpuArrayObject **out, PyGpuArrayObject **out,
PyGpuContextObject *c) { PyGpuContextObject *c) {
cudnnStatus_t err; cudnnStatus_t err;
...@@ -46,14 +58,21 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -46,14 +58,21 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
int w[3]; int w[3];
int p[3]; int p[3];
int s[3]; int s[3];
int ndims; int ndims = PyArray_DIM(ws, 0);//PyGpuArray_NDIM(img) - 2;
for(int i = 0; i < ndims; i++) {
w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i));
}
for(int i = 0; i < ndims; i++) {
p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i));
}
for(int i = 0; i < ndims; i++) {
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
}
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
err = cudnnGetPoolingNdDescriptor(desc, 3, &mode, &ndims, w, p, s);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
"error doing cudnnGetPoolingDescriptor operation: %s",
cudnnGetErrorString(err));
return 1;
} }
dims[0] = PyGpuArray_DIM(img, 0); dims[0] = PyGpuArray_DIM(img, 0);
...@@ -98,7 +117,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -98,7 +117,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
cuda_wait((*out)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*out)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnPoolingForward( err = cudnnPoolingForward(
APPLY_SPECIFIC(_handle), desc, APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(pool),
alpha, alpha,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(img), APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(img),
beta, beta,
......
...@@ -4,6 +4,7 @@ cudnnTensorDescriptor_t APPLY_SPECIFIC(input); ...@@ -4,6 +4,7 @@ cudnnTensorDescriptor_t APPLY_SPECIFIC(input);
cudnnTensorDescriptor_t APPLY_SPECIFIC(input_grad); cudnnTensorDescriptor_t APPLY_SPECIFIC(input_grad);
cudnnTensorDescriptor_t APPLY_SPECIFIC(output); cudnnTensorDescriptor_t APPLY_SPECIFIC(output);
cudnnTensorDescriptor_t APPLY_SPECIFIC(output_grad); cudnnTensorDescriptor_t APPLY_SPECIFIC(output_grad);
cudnnPoolingDescriptor_t APPLY_SPECIFIC(pool);
#section init_code_struct #section init_code_struct
...@@ -11,6 +12,7 @@ APPLY_SPECIFIC(input) = NULL; ...@@ -11,6 +12,7 @@ APPLY_SPECIFIC(input) = NULL;
APPLY_SPECIFIC(input_grad) = NULL; APPLY_SPECIFIC(input_grad) = NULL;
APPLY_SPECIFIC(output) = NULL; APPLY_SPECIFIC(output) = NULL;
APPLY_SPECIFIC(output_grad) = NULL; APPLY_SPECIFIC(output_grad) = NULL;
APPLY_SPECIFIC(pool) = NULL;
{ {
cudnnStatus_t err; cudnnStatus_t err;
...@@ -38,6 +40,11 @@ APPLY_SPECIFIC(output_grad) = NULL; ...@@ -38,6 +40,11 @@ APPLY_SPECIFIC(output_grad) = NULL;
cudnnGetErrorString(err)); cudnnGetErrorString(err));
FAIL; FAIL;
} }
if ((err = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor"
"(pool): %s", cudnnGetErrorString(err));
FAIL;
}
} }
#section cleanup_code_struct #section cleanup_code_struct
...@@ -46,13 +53,16 @@ if (APPLY_SPECIFIC(input) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC ...@@ -46,13 +53,16 @@ if (APPLY_SPECIFIC(input) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC
if (APPLY_SPECIFIC(input_grad) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input_grad)); } if (APPLY_SPECIFIC(input_grad) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input_grad)); }
if (APPLY_SPECIFIC(output) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output)); } if (APPLY_SPECIFIC(output) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output)); }
if (APPLY_SPECIFIC(output_grad) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output_grad)); } if (APPLY_SPECIFIC(output_grad) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output_grad)); }
if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC(pool)); }
#section support_code_struct #section support_code_struct
int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
PyGpuArrayObject *out, PyGpuArrayObject *out,
PyGpuArrayObject *out_grad, PyGpuArrayObject *out_grad,
cudnnPoolingDescriptor_t desc, PyArrayObject *ws,
PyArrayObject *stride,
PyArrayObject *pad,
PyGpuArrayObject **inp_grad, PyGpuArrayObject **inp_grad,
PyGpuContextObject *c) { PyGpuContextObject *c) {
cudnnStatus_t err; cudnnStatus_t err;
...@@ -85,6 +95,26 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, ...@@ -85,6 +95,26 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
return 1; return 1;
} }
int w[3];
int p[3];
int s[3];
int ndims = PyArray_DIM(ws, 0);//PyGpuArray_NDIM(img) - 2;
for(int i = 0; i < ndims; i++) {
w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i));
}
for(int i = 0; i < ndims; i++) {
p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i));
}
for(int i = 0; i < ndims; i++) {
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
}
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
}
if (c_set_tensorNd(*inp_grad, APPLY_SPECIFIC(input_grad)) != 0) if (c_set_tensorNd(*inp_grad, APPLY_SPECIFIC(input_grad)) != 0)
return 1; return 1;
...@@ -118,7 +148,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, ...@@ -118,7 +148,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
cuda_wait((*inp_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*inp_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnPoolingBackward( err = cudnnPoolingBackward(
APPLY_SPECIFIC(_handle), desc, APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(pool),
alpha, alpha,
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(out), APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(out),
APPLY_SPECIFIC(output_grad), PyGpuArray_DEV_DATA(out_grad), APPLY_SPECIFIC(output_grad), PyGpuArray_DEV_DATA(out_grad),
......
...@@ -275,6 +275,55 @@ def test_pooling(): ...@@ -275,6 +275,55 @@ def test_pooling():
utt.assert_allclose(c_out, g_out) utt.assert_allclose(c_out, g_out)
def test_pooling_with_tensor_vars():
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
x = T.ftensor4()
ws = theano.shared(numpy.array([2, 2], dtype='int32'))
st = theano.shared(numpy.array([1, 1], dtype='int32'))
pad = theano.shared(numpy.array([0, 0], dtype='int32'))
mode = 'max'
def fn(x):
dnn_op = dnn.dnn_pool(x,
ws=ws,
stride=st,
pad=pad,
mode=mode)
return dnn_op
for shp in [(1, 1, 2, 2),
(1, 1, 3, 3)]:
data = numpy.random.normal(0, 1, shp).astype("float32") * 10
theano.tests.unittest_tools.verify_grad(
fn, [data],
cast_to_output_type=False,
mode=mode_with_gpu)
out2 = pool_2d_i2n(x, ds=(2, 2), strides=(1, 1),
pad=(0, 0),
pool_function=T.max)
mode_without_gpu2 = mode_without_gpu.including()
mode_without_gpu2.check_isfinite = False
f1 = theano.function([x], fn(x), mode=mode_with_gpu)
assert any([isinstance(node.op, dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes])
f2 = theano.function([x], out2, mode=mode_without_gpu2)
assert not any([isinstance(node.op, dnn.GpuDnnPool)
for node in f2.maker.fgraph.apply_nodes])
for shp in [(1, 10, 100, 100),
(1, 3, 99, 99),
(32, 1, 147, 197),
]:
data = numpy.random.normal(0, 1, shp).astype("float32")
a = f1(data).__array__()
b = f2(data).__array__()
utt.assert_allclose(a, b)
def test_pooling_opt(): def test_pooling_opt():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
...@@ -340,6 +389,7 @@ def test_dnn_tag(): ...@@ -340,6 +389,7 @@ def test_dnn_tag():
class TestDnnInferShapes(utt.InferShapeTester): class TestDnnInferShapes(utt.InferShapeTester):
def setUp(self): def setUp(self):
super(TestDnnInferShapes, self).setUp() super(TestDnnInferShapes, self).setUp()
self.mode = mode_with_gpu self.mode = mode_with_gpu
...@@ -525,14 +575,9 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -525,14 +575,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
[(1, 1), (2, 2), (3, 3)], [(1, 1), (2, 2), (3, 3)],
modes modes
): ):
desc = dnn.GpuDnnPoolDesc(
ws=params[0],
stride=params[1],
mode=params[2]
)()
self._compile_and_check( self._compile_and_check(
[img], [img],
[dnn.GpuDnnPool()(img, desc)], [dnn.GpuDnnPool(mode=params[2])(img, params[0], params[1], (0, 0))],
[img_val], [img_val],
dnn.GpuDnnPool dnn.GpuDnnPool
) )
...@@ -561,16 +606,13 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -561,16 +606,13 @@ class TestDnnInferShapes(utt.InferShapeTester):
[(1, 1), (2, 2), (3, 3)], [(1, 1), (2, 2), (3, 3)],
['max', 'average_inc_pad'] ['max', 'average_inc_pad']
): ):
desc = dnn.GpuDnnPoolDesc( pool_grad = dnn.GpuDnnPoolGrad(mode=params[2])(
ws=params[0],
stride=params[1],
mode=params[2]
)()
pool_grad = dnn.GpuDnnPoolGrad()(
img, img,
out, out,
img_grad, img_grad,
desc params[0],
params[1],
(0, 0)
) )
self._compile_and_check( self._compile_and_check(
[img, img_grad, out], [img, img_grad, out],
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论