Allow for pooling of empty batch (like for conv in #3715).

5921fd10 · lucasb-eyer · Frederic Bastien · 8757c2dc · 5921fd10 · 5921fd10
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -1673,9 +1673,6 @@ if (!CudaNdarray_is_c_contiguous(%(input)s)) {
  %(fail)s
 }
-if (c_set_tensorNd(%(input)s, %(input_desc)s) != 0)
-  %(fail)s
 int win[%(nd)d];
 int pad[%(nd)d];
 int str[%(nd)d];
@@ -1711,6 +1708,24 @@ if (CudaNdarray_prep_output(&%(out)s, %(nd)s+2, %(out)s_dims) != 0)
  %(fail)s
 }
+// if input batch is empty, we return the empty output without calling cuDNN
+// (which will fail on zero batch size)
+if (CudaNdarray_DIMS(%(input)s)[0] == 0) {
+  cudaError_t err2 = cudaMemset((%(out)s)->devdata, 0,
+                                CudaNdarray_SIZE(%(out)s) * sizeof(real));
+  if (err2 != cudaSuccess) {
+    PyErr_Format(PyExc_RuntimeError,
+                 "GpuDnnConv could not fill the output with zeros: %%s",
+                 cudaGetErrorString(err2));
+    %(fail)s
+  }
+  // Ideally, "return success" here, but we don't have a %%(done)s
+} else {
+if (c_set_tensorNd(%(input)s, %(input_desc)s) != 0)
+  %(fail)s
 if (c_set_tensorNd(%(out)s, %(output_desc)s) != 0)
  %(fail)s
@@ -1732,6 +1747,8 @@ if (err != CUDNN_STATUS_SUCCESS) {
               cudnnGetErrorString(err));
  %(fail)s
 }
+}
 """ % dict(out=out, fail=sub['fail'],
           name=name, input=inputs[0],
           ws=ws, pad=pad, str=stride,
@@ -1756,7 +1773,7 @@ if (err != CUDNN_STATUS_SUCCESS) {
        return [[1], [0], [0], [0]]
    def c_code_cache_version(self):
-        return (8, version())
+        return (9, version())
 class GpuDnnPoolGrad(DnnBase):
@@ -1938,13 +1955,6 @@ if (!CudaNdarray_is_c_contiguous(%(output)s)) {
  %(fail)s
 }
-if (c_set_tensorNd(%(input)s, %(input_desc)s) != 0)
-  %(fail)s
-if (c_set_tensorNd(%(input_grad)s, %(input_grad_desc)s) != 0)
-  %(fail)s
-if (c_set_tensorNd(%(output)s, %(output_desc)s) != 0)
-  %(fail)s
 if (CudaNdarray_prep_output(&%(output_grad)s,
                            %(output)s->nd,
                            CudaNdarray_HOST_DIMS(%(output)s)) != 0)
@@ -1952,6 +1962,27 @@ if (CudaNdarray_prep_output(&%(output_grad)s,
  %(fail)s
 }
+// if input batch is empty, we return the empty output without calling cuDNN
+// (which will fail on zero batch size)
+if (CudaNdarray_DIMS(%(input)s)[0] == 0) {
+  cudaError_t err2 = cudaMemset((%(output)s)->devdata, 0,
+                                CudaNdarray_SIZE(%(output)s) * sizeof(real));
+  if (err2 != cudaSuccess) {
+    PyErr_Format(PyExc_RuntimeError,
+                 "GpuDnnConv could not fill the output with zeros: %%s",
+                 cudaGetErrorString(err2));
+    %(fail)s
+  }
+  // Ideally, "return success" here, but we don't have a %%(done)s, so do else.
+} else {
+if (c_set_tensorNd(%(input)s, %(input_desc)s) != 0)
+  %(fail)s
+if (c_set_tensorNd(%(input_grad)s, %(input_grad_desc)s) != 0)
+  %(fail)s
+if (c_set_tensorNd(%(output)s, %(output_desc)s) != 0)
+  %(fail)s
 int win[%(nd)d];
 int pad[%(nd)d];
@@ -1999,6 +2030,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
               cudnnGetErrorString(err%(name)s));
 %(fail)s
 }
+}
 """ % dict(output_grad=out_grad,
           fail=sub['fail'], name=name,
           input=inp, input_grad=inp_grad, output=out,
@@ -2010,7 +2043,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
           ws=ws, pad=pad, str=stride)
    def c_code_cache_version(self):
-        return (8, version())
+        return (9, version())
    def infer_shape(self, node, shape):
        return [shape[0]]

--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
@@ -546,6 +546,22 @@ def test_pooling_opt_arbitrary_dimensions():
                utt.assert_allclose(res_gpu[1], res_cpu[1])
+def test_pooling_empty_batch():
+    img_shp = (0, 5, 6, 8)
+    img = T.ftensor4('img')
+    o = dnn.dnn_pool(img, (2, 2), (2, 2))
+    f = theano.function([img], o, mode=mode_with_gpu)
+    d = f(numpy.random.rand(*img_shp).astype('float32'))
+    assert d.shape == (0, 5, 3, 4)
+    g = T.grad(T.sum(o), wrt=img)
+    f = theano.function([img], g, mode=mode_with_gpu)
+    d = f(numpy.random.rand(*img_shp).astype('float32'))
+    # Not sure what to assert, it should just pass, that's all.
+    assert d.shape == (0, 5, 6, 8)
 class test_DnnSoftMax(test_nnet.test_SoftMax):
    gpu_op = dnn.GpuDnnSoftmax
    gpu_grad_op = dnn.GpuDnnSoftmaxGrad