提交 b6907730 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #5537 from lucasb-eyer/cudnn-pool-zerobatch

Allow for pooling of empty batch (like for conv in #3715).
...@@ -52,9 +52,6 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -52,9 +52,6 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
return 1; return 1;
} }
if (c_set_tensorNd(img, APPLY_SPECIFIC(input)) != 0)
return 1;
cudnnPoolingMode_t mode; cudnnPoolingMode_t mode;
int w[3]; int w[3];
int p[3]; int p[3];
...@@ -71,12 +68,6 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -71,12 +68,6 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i)); s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
} }
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
}
dims[0] = PyGpuArray_DIM(img, 0); dims[0] = PyGpuArray_DIM(img, 0);
dims[1] = PyGpuArray_DIM(img, 1); dims[1] = PyGpuArray_DIM(img, 1);
dims[2] = (PyGpuArray_DIM(img, 2) + (p[0]*2) - w[0]) / s[0] + 1; dims[2] = (PyGpuArray_DIM(img, 2) + (p[0]*2) - w[0]) / s[0] + 1;
...@@ -88,9 +79,23 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -88,9 +79,23 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
GA_C_ORDER, c) != 0) GA_C_ORDER, c) != 0)
return 1; return 1;
// if input batch is empty, we return the empty output without calling cuDNN
// (which will fail on zero batch size).
if (PyGpuArray_DIM(*out, 0) == 0)
return 0;
if (c_set_tensorNd(img, APPLY_SPECIFIC(input)) != 0)
return 1;
if (c_set_tensorNd(*out, APPLY_SPECIFIC(output)) != 0) if (c_set_tensorNd(*out, APPLY_SPECIFIC(output)) != 0)
return 1; return 1;
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
}
{ {
const float alphaf = 1; const float alphaf = 1;
const float betaf = 0; const float betaf = 0;
......
...@@ -83,6 +83,17 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, ...@@ -83,6 +83,17 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
return 1; return 1;
} }
if (theano_prep_output(inp_grad, PyGpuArray_NDIM(inp),
PyGpuArray_DIMS(inp), inp->ga.typecode,
GA_C_ORDER, c) != 0) {
return 1;
}
// if input batch is empty, we return the empty output without calling cuDNN
// (which will fail on zero batch size).
if (PyGpuArray_DIM(*inp_grad, 0) == 0)
return 0;
if (c_set_tensorNd(inp, APPLY_SPECIFIC(input)) != 0) if (c_set_tensorNd(inp, APPLY_SPECIFIC(input)) != 0)
return 1; return 1;
if (c_set_tensorNd(out_grad, APPLY_SPECIFIC(output_grad)) != 0) if (c_set_tensorNd(out_grad, APPLY_SPECIFIC(output_grad)) != 0)
...@@ -90,12 +101,6 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, ...@@ -90,12 +101,6 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
if (c_set_tensorNd(out, APPLY_SPECIFIC(output)) != 0) if (c_set_tensorNd(out, APPLY_SPECIFIC(output)) != 0)
return 1; return 1;
if (theano_prep_output(inp_grad, PyGpuArray_NDIM(inp),
PyGpuArray_DIMS(inp), inp->ga.typecode,
GA_C_ORDER, c) != 0) {
return 1;
}
int w[3]; int w[3];
int p[3]; int p[3];
int s[3]; int s[3];
......
...@@ -510,6 +510,22 @@ def test_pooling_opt_arbitrary_dimensions(): ...@@ -510,6 +510,22 @@ def test_pooling_opt_arbitrary_dimensions():
utt.assert_allclose(res_gpu[1], res_cpu[1]) utt.assert_allclose(res_gpu[1], res_cpu[1])
def test_pooling_empty_batch():
img_shp = (0, 5, 6, 8)
img = T.ftensor4('img')
o = dnn.dnn_pool(img, (2, 2), (2, 2))
f = theano.function([img], o, mode=mode_with_gpu)
d = f(np.random.rand(*img_shp).astype('float32'))
assert d.shape == (0, 5, 3, 4)
g = T.grad(T.sum(o), wrt=img)
f = theano.function([img], g, mode=mode_with_gpu)
d = f(np.random.rand(*img_shp).astype('float32'))
# Not sure what to assert, it should just pass, that's all.
assert d.shape == (0, 5, 6, 8)
def test_dnn_tag(): def test_dnn_tag():
""" """
Test that if cudnn isn't avail we crash and that if it is avail, we use it. Test that if cudnn isn't avail we crash and that if it is avail, we use it.
......
...@@ -1673,9 +1673,6 @@ if (!CudaNdarray_is_c_contiguous(%(input)s)) { ...@@ -1673,9 +1673,6 @@ if (!CudaNdarray_is_c_contiguous(%(input)s)) {
%(fail)s %(fail)s
} }
if (c_set_tensorNd(%(input)s, %(input_desc)s) != 0)
%(fail)s
int win[%(nd)d]; int win[%(nd)d];
int pad[%(nd)d]; int pad[%(nd)d];
int str[%(nd)d]; int str[%(nd)d];
...@@ -1711,6 +1708,15 @@ if (CudaNdarray_prep_output(&%(out)s, %(nd)s+2, %(out)s_dims) != 0) ...@@ -1711,6 +1708,15 @@ if (CudaNdarray_prep_output(&%(out)s, %(nd)s+2, %(out)s_dims) != 0)
%(fail)s %(fail)s
} }
// if input batch is empty, we return the empty output without calling cuDNN
// (which will fail on zero batch size).
// Ideally, "return success" here, but we don't have a %%(done)s, so just skip the call.
if (CudaNdarray_DIMS(%(input)s)[0] > 0) {
// Don't indent for keeping history
if (c_set_tensorNd(%(input)s, %(input_desc)s) != 0)
%(fail)s
if (c_set_tensorNd(%(out)s, %(output_desc)s) != 0) if (c_set_tensorNd(%(out)s, %(output_desc)s) != 0)
%(fail)s %(fail)s
...@@ -1732,6 +1738,8 @@ if (err != CUDNN_STATUS_SUCCESS) { ...@@ -1732,6 +1738,8 @@ if (err != CUDNN_STATUS_SUCCESS) {
cudnnGetErrorString(err)); cudnnGetErrorString(err));
%(fail)s %(fail)s
} }
} // Closes the batchdim > 0 check.
""" % dict(out=out, fail=sub['fail'], """ % dict(out=out, fail=sub['fail'],
name=name, input=inputs[0], name=name, input=inputs[0],
ws=ws, pad=pad, str=stride, ws=ws, pad=pad, str=stride,
...@@ -1756,7 +1764,7 @@ if (err != CUDNN_STATUS_SUCCESS) { ...@@ -1756,7 +1764,7 @@ if (err != CUDNN_STATUS_SUCCESS) {
return [[1], [0], [0], [0]] return [[1], [0], [0], [0]]
def c_code_cache_version(self): def c_code_cache_version(self):
return (8, version()) return (9, version())
class GpuDnnPoolGrad(DnnBase): class GpuDnnPoolGrad(DnnBase):
...@@ -1938,13 +1946,6 @@ if (!CudaNdarray_is_c_contiguous(%(output)s)) { ...@@ -1938,13 +1946,6 @@ if (!CudaNdarray_is_c_contiguous(%(output)s)) {
%(fail)s %(fail)s
} }
if (c_set_tensorNd(%(input)s, %(input_desc)s) != 0)
%(fail)s
if (c_set_tensorNd(%(input_grad)s, %(input_grad_desc)s) != 0)
%(fail)s
if (c_set_tensorNd(%(output)s, %(output_desc)s) != 0)
%(fail)s
if (CudaNdarray_prep_output(&%(output_grad)s, if (CudaNdarray_prep_output(&%(output_grad)s,
%(output)s->nd, %(output)s->nd,
CudaNdarray_HOST_DIMS(%(output)s)) != 0) CudaNdarray_HOST_DIMS(%(output)s)) != 0)
...@@ -1952,6 +1953,18 @@ if (CudaNdarray_prep_output(&%(output_grad)s, ...@@ -1952,6 +1953,18 @@ if (CudaNdarray_prep_output(&%(output_grad)s,
%(fail)s %(fail)s
} }
// if input batch is empty, we return the empty output without calling cuDNN
// (which will fail on zero batch size).
// Ideally, "return success" here, but we don't have a %%(done)s, so just skip the call.
if (CudaNdarray_DIMS(%(input)s)[0] > 0) {
// Don't indent for keeping history
if (c_set_tensorNd(%(input)s, %(input_desc)s) != 0)
%(fail)s
if (c_set_tensorNd(%(input_grad)s, %(input_grad_desc)s) != 0)
%(fail)s
if (c_set_tensorNd(%(output)s, %(output_desc)s) != 0)
%(fail)s
int win[%(nd)d]; int win[%(nd)d];
int pad[%(nd)d]; int pad[%(nd)d];
...@@ -1999,6 +2012,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -1999,6 +2012,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
cudnnGetErrorString(err%(name)s)); cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
} // Closes the batchdim > 0 check.
""" % dict(output_grad=out_grad, """ % dict(output_grad=out_grad,
fail=sub['fail'], name=name, fail=sub['fail'], name=name,
input=inp, input_grad=inp_grad, output=out, input=inp, input_grad=inp_grad, output=out,
...@@ -2010,7 +2025,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -2010,7 +2025,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
ws=ws, pad=pad, str=stride) ws=ws, pad=pad, str=stride)
def c_code_cache_version(self): def c_code_cache_version(self):
return (8, version()) return (9, version())
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[0]] return [shape[0]]
......
...@@ -546,6 +546,22 @@ def test_pooling_opt_arbitrary_dimensions(): ...@@ -546,6 +546,22 @@ def test_pooling_opt_arbitrary_dimensions():
utt.assert_allclose(res_gpu[1], res_cpu[1]) utt.assert_allclose(res_gpu[1], res_cpu[1])
def test_pooling_empty_batch():
img_shp = (0, 5, 6, 8)
img = T.ftensor4('img')
o = dnn.dnn_pool(img, (2, 2), (2, 2))
f = theano.function([img], o, mode=mode_with_gpu)
d = f(numpy.random.rand(*img_shp).astype('float32'))
assert d.shape == (0, 5, 3, 4)
g = T.grad(T.sum(o), wrt=img)
f = theano.function([img], g, mode=mode_with_gpu)
d = f(numpy.random.rand(*img_shp).astype('float32'))
# Not sure what to assert, it should just pass, that's all.
assert d.shape == (0, 5, 6, 8)
class test_DnnSoftMax(test_nnet.test_SoftMax): class test_DnnSoftMax(test_nnet.test_SoftMax):
gpu_op = dnn.GpuDnnSoftmax gpu_op = dnn.GpuDnnSoftmax
gpu_grad_op = dnn.GpuDnnSoftmaxGrad gpu_grad_op = dnn.GpuDnnSoftmaxGrad
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论