Optimize CPU max pooling with padding to cudnn

e4533cec · Pascal Lamblin · 6f9b538b · e4533cec · e4533cec
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -1483,9 +1483,10 @@ if True:
            img, = node.inputs
            ds = node.op.ds
            stride = node.op.st
+            pad = node.op.padding
            if (img.owner and isinstance(img.owner.op, HostFromGpu)):
                ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]),
-                               ds, stride=stride)
+                               ds, stride=stride, pad=pad)
                return [host_from_gpu(ret)]
    @register_opt('cudnn')
@@ -1514,12 +1515,13 @@ if True:
            inp, out, inp_grad = node.inputs
            ds = node.op.ds
            st = node.op.st
+            pad = node.op.padding
            if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
                (out.owner and isinstance(out.owner.op, HostFromGpu)) or
                (inp_grad.owner and isinstance(inp_grad.owner.op, HostFromGpu))
            ):
-                desc = GpuDnnPoolDesc(ws=ds, stride=st, mode="max")()
+                desc = GpuDnnPoolDesc(ws=ds, stride=st, mode="max", pad=pad)()
                if not node.op.ignore_border:
                    return
                ret = GpuDnnPoolGrad()(gpu_contiguous(inp),

--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
@@ -79,12 +79,12 @@ def test_pooling():
            for stride in (2, 3):
                if stride > ws:
                    continue
-                if func is T.max and pad == (0, 0):
+                if func is T.max:
                    # We will check that the opt introduced it.
                    out1 = max_pool_2d(x, (ws, ws),
                                       st=(stride, stride),
-                                       ignore_border=True,)
+                                       ignore_border=True,
-#                                       pad=pad)
+                                       padding=pad)
                else:
                    out1 = cuda.dnn.dnn_pool(
                        x, ws=(ws, ws),
@@ -120,13 +120,13 @@ def test_pooling():
            data = numpy.random.normal(0, 1, shp).astype("float32")*10
            ws = 2
-            strides = 2
+            stride = 2
            print func, pad, ws, stride, shp
            # This test the CPU grad + opt + GPU implemtentation
            def fn(x):
-                return max_pool_2d(x, (ws, ws), ignore_border=True,)
+                return max_pool_2d(x, (ws, ws), ignore_border=True,
-#                                   pad=pad)
+                                   padding=pad)
            theano.tests.unittest_tools.verify_grad(fn, [data],
                                                    cast_to_output_type=False,
                                                    mode=mode_with_gpu)
@@ -155,10 +155,10 @@ def test_pooling():
                        for node in fg.maker.fgraph.toposort()])
            g_out = fg(data)
-            if func is T.max and pad == (0, 0):
+            if func is T.max:
                # Compare again the CPU result
                out = max_pool_2d(x, (ws, ws),
-#                                  pad=pad,
+                                  padding=pad,
                                  ignore_border=True)
                fc = theano.function([x], theano.grad(out.sum(), x),
                                     mode=mode_without_gpu)