提交 e4533cec authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Optimize CPU max pooling with padding to cudnn

上级 6f9b538b
......@@ -1483,9 +1483,10 @@ if True:
img, = node.inputs
ds = node.op.ds
stride = node.op.st
pad = node.op.padding
if (img.owner and isinstance(img.owner.op, HostFromGpu)):
ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]),
ds, stride=stride)
ds, stride=stride, pad=pad)
return [host_from_gpu(ret)]
@register_opt('cudnn')
......@@ -1514,12 +1515,13 @@ if True:
inp, out, inp_grad = node.inputs
ds = node.op.ds
st = node.op.st
pad = node.op.padding
if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
(out.owner and isinstance(out.owner.op, HostFromGpu)) or
(inp_grad.owner and isinstance(inp_grad.owner.op, HostFromGpu))
):
desc = GpuDnnPoolDesc(ws=ds, stride=st, mode="max")()
desc = GpuDnnPoolDesc(ws=ds, stride=st, mode="max", pad=pad)()
if not node.op.ignore_border:
return
ret = GpuDnnPoolGrad()(gpu_contiguous(inp),
......
......@@ -79,12 +79,12 @@ def test_pooling():
for stride in (2, 3):
if stride > ws:
continue
if func is T.max and pad == (0, 0):
if func is T.max:
# We will check that the opt introduced it.
out1 = max_pool_2d(x, (ws, ws),
st=(stride, stride),
ignore_border=True,)
# pad=pad)
ignore_border=True,
padding=pad)
else:
out1 = cuda.dnn.dnn_pool(
x, ws=(ws, ws),
......@@ -120,13 +120,13 @@ def test_pooling():
data = numpy.random.normal(0, 1, shp).astype("float32")*10
ws = 2
strides = 2
stride = 2
print func, pad, ws, stride, shp
# This test the CPU grad + opt + GPU implemtentation
def fn(x):
return max_pool_2d(x, (ws, ws), ignore_border=True,)
# pad=pad)
return max_pool_2d(x, (ws, ws), ignore_border=True,
padding=pad)
theano.tests.unittest_tools.verify_grad(fn, [data],
cast_to_output_type=False,
mode=mode_with_gpu)
......@@ -155,10 +155,10 @@ def test_pooling():
for node in fg.maker.fgraph.toposort()])
g_out = fg(data)
if func is T.max and pad == (0, 0):
if func is T.max:
# Compare again the CPU result
out = max_pool_2d(x, (ws, ws),
# pad=pad,
padding=pad,
ignore_border=True)
fc = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论