提交 e4533cec authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Optimize CPU max pooling with padding to cudnn

上级 6f9b538b
...@@ -1483,9 +1483,10 @@ if True: ...@@ -1483,9 +1483,10 @@ if True:
img, = node.inputs img, = node.inputs
ds = node.op.ds ds = node.op.ds
stride = node.op.st stride = node.op.st
pad = node.op.padding
if (img.owner and isinstance(img.owner.op, HostFromGpu)): if (img.owner and isinstance(img.owner.op, HostFromGpu)):
ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]), ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]),
ds, stride=stride) ds, stride=stride, pad=pad)
return [host_from_gpu(ret)] return [host_from_gpu(ret)]
@register_opt('cudnn') @register_opt('cudnn')
...@@ -1514,12 +1515,13 @@ if True: ...@@ -1514,12 +1515,13 @@ if True:
inp, out, inp_grad = node.inputs inp, out, inp_grad = node.inputs
ds = node.op.ds ds = node.op.ds
st = node.op.st st = node.op.st
pad = node.op.padding
if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
(out.owner and isinstance(out.owner.op, HostFromGpu)) or (out.owner and isinstance(out.owner.op, HostFromGpu)) or
(inp_grad.owner and isinstance(inp_grad.owner.op, HostFromGpu)) (inp_grad.owner and isinstance(inp_grad.owner.op, HostFromGpu))
): ):
desc = GpuDnnPoolDesc(ws=ds, stride=st, mode="max")() desc = GpuDnnPoolDesc(ws=ds, stride=st, mode="max", pad=pad)()
if not node.op.ignore_border: if not node.op.ignore_border:
return return
ret = GpuDnnPoolGrad()(gpu_contiguous(inp), ret = GpuDnnPoolGrad()(gpu_contiguous(inp),
......
...@@ -79,12 +79,12 @@ def test_pooling(): ...@@ -79,12 +79,12 @@ def test_pooling():
for stride in (2, 3): for stride in (2, 3):
if stride > ws: if stride > ws:
continue continue
if func is T.max and pad == (0, 0): if func is T.max:
# We will check that the opt introduced it. # We will check that the opt introduced it.
out1 = max_pool_2d(x, (ws, ws), out1 = max_pool_2d(x, (ws, ws),
st=(stride, stride), st=(stride, stride),
ignore_border=True,) ignore_border=True,
# pad=pad) padding=pad)
else: else:
out1 = cuda.dnn.dnn_pool( out1 = cuda.dnn.dnn_pool(
x, ws=(ws, ws), x, ws=(ws, ws),
...@@ -120,13 +120,13 @@ def test_pooling(): ...@@ -120,13 +120,13 @@ def test_pooling():
data = numpy.random.normal(0, 1, shp).astype("float32")*10 data = numpy.random.normal(0, 1, shp).astype("float32")*10
ws = 2 ws = 2
strides = 2 stride = 2
print func, pad, ws, stride, shp print func, pad, ws, stride, shp
# This test the CPU grad + opt + GPU implemtentation # This test the CPU grad + opt + GPU implemtentation
def fn(x): def fn(x):
return max_pool_2d(x, (ws, ws), ignore_border=True,) return max_pool_2d(x, (ws, ws), ignore_border=True,
# pad=pad) padding=pad)
theano.tests.unittest_tools.verify_grad(fn, [data], theano.tests.unittest_tools.verify_grad(fn, [data],
cast_to_output_type=False, cast_to_output_type=False,
mode=mode_with_gpu) mode=mode_with_gpu)
...@@ -155,10 +155,10 @@ def test_pooling(): ...@@ -155,10 +155,10 @@ def test_pooling():
for node in fg.maker.fgraph.toposort()]) for node in fg.maker.fgraph.toposort()])
g_out = fg(data) g_out = fg(data)
if func is T.max and pad == (0, 0): if func is T.max:
# Compare again the CPU result # Compare again the CPU result
out = max_pool_2d(x, (ws, ws), out = max_pool_2d(x, (ws, ws),
# pad=pad, padding=pad,
ignore_border=True) ignore_border=True)
fc = theano.function([x], theano.grad(out.sum(), x), fc = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu) mode=mode_without_gpu)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论