提交 44be801d authored 作者: Frederic's avatar Frederic

[disabled BUGfix,CRASH] related to the cudnn pooling gradient.

bug: The optimization where converting the ignore_border=False to ignore_border=True. crash: The optimization that convert the gpudownsample grad to cuddpoolgrad was swapping the inputs parameter. This is caused in part by cudnn using strange definition of input and output. Now the op use the same inputs order as the other pool grad ops. The only difference is tha the c_code() use the name that cudnn use. There is a comment that tell that.
上级 216a6670
......@@ -737,7 +737,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
out = self(img, desc)
g_out = GpuDnnPoolGrad()(out, grad, img, desc)
g_out = GpuDnnPoolGrad()(img, out, grad, desc)
return g_out, theano.gradient.DisconnectedType()()
......@@ -745,8 +745,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
# not connected to desc
return [[1], [0]]
# def c_code_cache_version(self):
# return (3,)
def c_code_cache_version(self):
return (4,)
class GpuDnnPoolGrad(DnnBase):
......@@ -754,13 +754,13 @@ class GpuDnnPoolGrad(DnnBase):
The pooling gradient.
:param inp: the input of the pooling.
:param inp_grad: same size as out, but is the corresponding gradient information.
:param out: the output of the pooling in the forward.
:param inp_grad: same size as out, but is the corresponding gradient information.
:param desc: The pooling descriptor.
"""
__props__ = ()
def make_node(self, inp, inp_grad, out, desc):
def make_node(self, inp, out, inp_grad, desc):
inp = as_cuda_ndarray_variable(inp)
if inp.type.ndim != 4:
raise TypeError('inp must be 4D tensor')
......@@ -777,7 +777,7 @@ class GpuDnnPoolGrad(DnnBase):
or desc.type.ctype != 'cudnnPoolingDescriptor_t':
raise TypeError('desc must be cudnnPoolingDescriptor_t')
return Apply(self, [inp, inp_grad, out, desc],
return Apply(self, [inp, out, inp_grad, desc],
[inp.type()])
def c_support_code_struct(self, node, struct_id):
......@@ -830,7 +830,10 @@ if (output_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id)
""" % dict(id=struct_id)
def c_code(self, node, name, inputs, outputs, sub):
inp, inp_grad, out, desc = inputs
# Here the name out and inp are based on the cudnn definition.
# Not the definition of this class.
# This make it complicated.
out, inp, inp_grad, desc = inputs
out_grad, = outputs
set_in = "\n".join([
......@@ -900,7 +903,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
output_grad_desc="output_grad"+str(sub['struct_id']))
def c_code_cache_version(self):
return (3,)
return (4,)
def dnn_pool(img, ws, stride=(1, 1), mode='max'):
......@@ -1178,36 +1181,34 @@ if cuda_available:
border_mode=border_mode, subsample=subsample,
direction_hint=direction_hint)]
# DISABLED as there is problems in the handling of borders
# @register_opt('cudnn')
@register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMax])
def local_pool_dnn(node):
if not dnn_available():
return
if isinstance(node.op, GpuDownsampleFactorMax):
if node.op.ignore_border:
if not node.op.ignore_border:
return
img, = node.inputs
ds = node.op.ds
return [dnn_pool(gpu_contiguous(img), ds, ds)]
# DISABLED as there is problems in the handling of borders
# @register_opt('cudnn')
@register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMaxGrad])
def local_pool_dnn_grad(node):
if not dnn_available():
return
if isinstance(node.op, GpuDownsampleFactorMaxGrad):
if node.op.ignore_border:
if not node.op.ignore_border:
return
inp, out, inp_grad = node.inputs
ds = node.op.ds
desc = GpuDnnPoolDesc(ws=ds, stride=ds, mode="max")()
return [GpuDnnPoolGrad()(gpu_contiguous(inp),
gpu_contiguous(out),
gpu_contiguous(inp_grad),
gpu_contiguous(out), desc)]
desc)]
@register_opt('cudnn')
@local_optimizer([GpuSoftmax])
......
......@@ -11,6 +11,7 @@ import theano.tensor as T
import theano.tests.unittest_tools as utt
from theano.sandbox.neighbours import images2neibs, neibs2images
from theano.tensor.signal.downsample import max_pool_2d
from theano.tensor.signal.downsample import DownsampleFactorMaxGrad
# Skip test if cuda_ndarray is not available.
......@@ -61,6 +62,10 @@ def test_pooling():
for stride in (2, 3):
if stride > ws:
continue
if ws == stride and func is T.max:
# We will check that the opt introduced it.
out1 = max_pool_2d(x, (ws, ws), ignore_border=True)
else:
out1 = cuda.dnn.dnn_pool(
x, ws=(ws, ws),
stride=(stride, stride),
......@@ -69,7 +74,11 @@ def test_pooling():
pool_function=func)
f1 = theano.function([x], out1, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes])
f2 = theano.function([x], out2, mode=mode_with_gpu)
assert not any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f2.maker.fgraph.apply_nodes])
for shp in [(1, 10, 100, 100),
(1, 3, 99, 99),
(32, 1, 147, 197),
......@@ -81,6 +90,53 @@ def test_pooling():
assert numpy.allclose(a, b,
atol=numpy.finfo(numpy.float32).eps)
# Test the grad
for shp in [(1, 1, 2, 2),
(1, 1, 3, 3)]:
data = numpy.random.normal(0, 1, shp).astype("float32")*10
ws = 2
strides = 2
# This test the CPU grad + opt + GPU implemtentation
def fn(x):
return max_pool_2d(x, (ws, ws), ignore_border=True)
theano.tests.unittest_tools.verify_grad(fn, [data],
cast_to_output_type=False,
mode=mode_with_gpu)
# Confirm that the opt would have inserted it.
f = theano.function([x], theano.grad(fn(x).sum(), x),
mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
for node in f.maker.fgraph.toposort()])
# Test the GPU grad + GPU implementation
def fn(x):
dnn_op = cuda.dnn.dnn_pool(
x, ws=(ws, ws),
stride=(stride, stride),
mode='max' if func is T.max else "average")
return dnn_op
theano.tests.unittest_tools.verify_grad(fn, [data],
cast_to_output_type=False,
mode=mode_with_gpu)
# Confirm that we get the good op.
f = theano.function([x], theano.grad(fn(x).sum(), x),
mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
for node in f.maker.fgraph.toposort()])
g_out = f(data)
if func is T.max:
# Compare again the CPU result
out = max_pool_2d(x, (ws, ws), ignore_border=True)
f = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu)
assert any([isinstance(node.op, DownsampleFactorMaxGrad)
for node in f.maker.fgraph.toposort()])
c_out = f(data)
assert numpy.allclose(c_out, g_out)
def test_pooling_opt():
if not cuda.dnn.dnn_available():
......@@ -90,7 +146,7 @@ def test_pooling_opt():
f = theano.function(
[x],
max_pool_2d(x, ds=(2, 2)),
max_pool_2d(x, ds=(2, 2), ignore_border=True),
mode=mode_with_gpu)
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
......@@ -98,7 +154,7 @@ def test_pooling_opt():
f = theano.function(
[x],
T.grad(max_pool_2d(x, ds=(2, 2)).sum(), x),
T.grad(max_pool_2d(x, ds=(2, 2), ignore_border=True).sum(), x),
mode=mode_with_gpu.including("cudnn"))
assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论