提交 36437aca authored 作者: abergeron's avatar abergeron

Merge pull request #2281 from nouiz/dnn_pool_grad

Enable again dnn pool grad after fixed it.
......@@ -81,6 +81,16 @@ from theano.updates import Updates, OrderedUpdates
from theano.gradient import Rop, Lop, grad, subgraph_grad
# This need to be before the init of GPU, as it add config variable
# needed during that phase.
import theano.tests
if hasattr(theano.tests, "TheanoNoseTester"):
test = theano.tests.TheanoNoseTester().test
else:
def test():
raise ImportError("The nose module is not installed."
" It is needed for Theano tests.")
if config.device.startswith('gpu') or config.init_gpu_device.startswith('gpu'):
import theano.sandbox.cuda
# We can't test the driver during import of theano.sandbox.cuda as
......@@ -185,15 +195,6 @@ def sparse_grad(var):
ret = var.owner.op.__class__(sparse_grad=True)(*var.owner.inputs)
return ret
import theano.tests
if hasattr(theano.tests, "TheanoNoseTester"):
test = theano.tests.TheanoNoseTester().test
else:
def test():
raise ImportError("The nose module is not installed."
" It is needed for Theano tests.")
# This cannot be done in tensor/__init__.py due to a circular dependency -- randomstreams
# depends on raw_random which depends on tensor. As a work-around, we import RandomStreams
# here and inject an instance in tensor.
......
......@@ -126,10 +126,10 @@ def raise_with_op(node, thunk=None, exc_info=None):
for ipt in thunk.inputs]
scalar_values = []
for ipt in thunk.inputs:
if getattr(ipt[0], "size", -1) == 1:
if getattr(ipt[0], "size", -1) <= 5:
scalar_values.append(ipt[0])
else:
scalar_values.append("not scalar")
scalar_values.append("not shown")
else:
shapes = "The thunk don't have an inputs attributes."
strides = "So we can't access the strides of inputs values"
......@@ -137,7 +137,7 @@ def raise_with_op(node, thunk=None, exc_info=None):
detailed_err_msg += ("Inputs shapes: %s" % shapes +
"\nInputs strides: %s" % strides +
"\nInputs scalar values: %s\n" % scalar_values)
"\nInputs values: %s\n" % scalar_values)
else:
hints.append(
"HINT: Use another linker then the c linker to"
......
......@@ -2345,7 +2345,8 @@ class GpuReshape(tensor.Reshape, GpuOp):
shp = shp_new
else:
raise ValueError("total size of new array must be unchanged")
raise ValueError("total size of new array must be unchanged",
x.shape, shp)
out[0] = x.reshape(tuple(shp))
......
......@@ -723,7 +723,8 @@ int wsX, wsY, strideX, strideY;
err%(name)s = cudnnGetPoolingDescriptor(%(desc)s, &mode, &wsX, &wsY, &strideX, &strideY);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error doing operation: %%s",
PyErr_Format(PyExc_RuntimeError,
"GpuDnnPool: error doing cudnnGetPoolingDescriptor operation: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
......@@ -747,7 +748,8 @@ _handle,
%(output_desc)s, CudaNdarray_DEV_DATA(%(out)s)
);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error doing operation: %%s",
PyErr_Format(PyExc_RuntimeError,
"GpuDnnPool: error doing cudnnPoolingForward operation: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
......@@ -765,7 +767,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
out = self(img, desc)
g_out = GpuDnnPoolGrad()(out, grad, img, desc)
g_out = GpuDnnPoolGrad()(img, out, grad, desc)
return g_out, theano.gradient.DisconnectedType()()
......@@ -774,7 +776,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return [[1], [0]]
def c_code_cache_version(self):
return (2,)
return (4,)
class GpuDnnPoolGrad(DnnBase):
......@@ -782,13 +784,13 @@ class GpuDnnPoolGrad(DnnBase):
The pooling gradient.
:param inp: the input of the pooling.
:param inp_grad: same size as out, but is the corresponding gradient information.
:param out: the output of the pooling in the forward.
:param inp_grad: same size as out, but is the corresponding gradient information.
:param desc: The pooling descriptor.
"""
__props__ = ()
def make_node(self, inp, inp_grad, out, desc):
def make_node(self, inp, out, inp_grad, desc):
inp = as_cuda_ndarray_variable(inp)
if inp.type.ndim != 4:
raise TypeError('inp must be 4D tensor')
......@@ -805,7 +807,7 @@ class GpuDnnPoolGrad(DnnBase):
or desc.type.ctype != 'cudnnPoolingDescriptor_t':
raise TypeError('desc must be cudnnPoolingDescriptor_t')
return Apply(self, [inp, inp_grad, out, desc],
return Apply(self, [inp, out, inp_grad, desc],
[inp.type()])
def c_support_code_struct(self, node, struct_id):
......@@ -824,22 +826,26 @@ input_grad%(id)d = NULL;
output%(id)d = NULL;
output_grad%(id)d = NULL;
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&input%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate tensor4d descriptor "
"(input): %%s", cudnnGetErrorString(err%(id)d));
%(fail)s
}
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&input_grad%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate tensor4d descriptor "
"(input_grad): %%s", cudnnGetErrorString(err%(id)d));
%(fail)s
}
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate tensor4d descriptor "
"(output): %%s", cudnnGetErrorString(err%(id)d));
%(fail)s
}
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output_grad%(id)d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate tensor4d descriptor "
"(output_grad): %%s", cudnnGetErrorString(err%(id)d));
%(fail)s
}
......@@ -854,7 +860,10 @@ if (output_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id)
""" % dict(id=struct_id)
def c_code(self, node, name, inputs, outputs, sub):
inp, inp_grad, out, desc = inputs
# Here the name out and inp are based on the cudnn definition.
# Not the definition of this class.
# This make it complicated.
out, inp, inp_grad, desc = inputs
out_grad, = outputs
set_in = "\n".join([
......@@ -873,23 +882,27 @@ if (output_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id)
cudnnStatus_t err%(name)s;
if (!CudaNdarray_is_c_contiguous(%(input)s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
PyErr_SetString(PyExc_ValueError,
"GpuDnnPoolGrad: Only contiguous inputs are supported.");
%(fail)s
}
if (!CudaNdarray_is_c_contiguous(%(input_grad)s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous input gradients are supported.");
PyErr_SetString(PyExc_ValueError,
"GpuDnnPoolGrad: Only contiguous input gradients are supported.");
%(fail)s
}
if (!CudaNdarray_is_c_contiguous(%(output)s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous outputs are supported.");
PyErr_SetString(PyExc_ValueError,
"GpuDnnPoolGrad: Only contiguous outputs are supported.");
%(fail)s
}
%(set_in)s
if (CudaNdarray_prep_output(&%(output_grad)s, 4, CudaNdarray_HOST_DIMS(%(output)s)) != 0)
if (CudaNdarray_prep_output(&%(output_grad)s, 4,
CudaNdarray_HOST_DIMS(%(output)s)) != 0)
{
%(fail)s
}
......@@ -905,7 +918,8 @@ _handle,
%(output_grad_desc)s, CudaNdarray_DEV_DATA(%(output_grad)s)
);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error doing operation: %%s",
PyErr_Format(PyExc_RuntimeError,
"GpuDnnPoolGrad: error doing operation: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
......@@ -919,7 +933,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
output_grad_desc="output_grad"+str(sub['struct_id']))
def c_code_cache_version(self):
return (2,)
return (4,)
def dnn_pool(img, ws, stride=(1, 1), mode='max'):
......@@ -1193,36 +1207,34 @@ if True:
border_mode=border_mode, subsample=subsample,
direction_hint=direction_hint)]
# DISABLED as there is problems in the handling of borders
# @register_opt('cudnn')
@register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMax])
def local_pool_dnn(node):
if not dnn_available():
return
if isinstance(node.op, GpuDownsampleFactorMax):
if node.op.ignore_border:
if not node.op.ignore_border:
return
img, = node.inputs
ds = node.op.ds
return [dnn_pool(gpu_contiguous(img), ds, ds)]
# DISABLED as there is problems in the handling of borders
# @register_opt('cudnn')
@register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMaxGrad])
def local_pool_dnn_grad(node):
if not dnn_available():
return
if isinstance(node.op, GpuDownsampleFactorMaxGrad):
if node.op.ignore_border:
if not node.op.ignore_border:
return
inp, out, inp_grad = node.inputs
ds = node.op.ds
desc = GpuDnnPoolDesc(ws=ds, stride=ds, mode="max")()
return [GpuDnnPoolGrad()(gpu_contiguous(inp),
gpu_contiguous(out),
gpu_contiguous(inp_grad),
gpu_contiguous(out), desc)]
desc)]
@register_opt('cudnn')
@local_optimizer([GpuSoftmax])
......
......@@ -11,6 +11,7 @@ import theano.tensor as T
import theano.tests.unittest_tools as utt
from theano.sandbox.neighbours import images2neibs, neibs2images
from theano.tensor.signal.downsample import max_pool_2d
from theano.tensor.signal.downsample import DownsampleFactorMaxGrad
# Skip test if cuda_ndarray is not available.
......@@ -57,26 +58,84 @@ def test_pooling():
x = T.ftensor4()
for func in (T.max, T.mean):
for ws in (4, 5):
for ws in (2, 4, 5):
for stride in (2, 3):
out1 = cuda.dnn.dnn_pool(
x, ws=(ws, ws),
stride=(stride, stride),
mode='max' if func is T.max else "average")
if stride > ws:
continue
if ws == stride and func is T.max:
# We will check that the opt introduced it.
out1 = max_pool_2d(x, (ws, ws), ignore_border=True)
else:
out1 = cuda.dnn.dnn_pool(
x, ws=(ws, ws),
stride=(stride, stride),
mode='max' if func is T.max else "average")
out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride),
pool_function=func)
f1 = theano.function([x], out1, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes])
f2 = theano.function([x], out2, mode=mode_with_gpu)
data = numpy.random.normal(
0, 1, (1, 10, 100, 100)).astype("float32")
a = f1(data).__array__()
b = f2(data).__array__()
assert numpy.allclose(a, b,
atol=numpy.finfo(numpy.float32).eps)
assert not any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f2.maker.fgraph.apply_nodes])
for shp in [(1, 10, 100, 100),
(1, 3, 99, 99),
(32, 1, 147, 197),
]:
data = numpy.random.normal(0, 1, shp).astype("float32")
a = f1(data).__array__()
b = f2(data).__array__()
assert numpy.allclose(a, b,
atol=numpy.finfo(numpy.float32).eps)
# Test the grad
for shp in [(1, 1, 2, 2),
(1, 1, 3, 3)]:
data = numpy.random.normal(0, 1, shp).astype("float32")*10
ws = 2
strides = 2
# This test the CPU grad + opt + GPU implemtentation
def fn(x):
return max_pool_2d(x, (ws, ws), ignore_border=True)
theano.tests.unittest_tools.verify_grad(fn, [data],
cast_to_output_type=False,
mode=mode_with_gpu)
# Confirm that the opt would have inserted it.
f = theano.function([x], theano.grad(fn(x).sum(), x),
mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
for node in f.maker.fgraph.toposort()])
# Test the GPU grad + GPU implementation
def fn(x):
dnn_op = cuda.dnn.dnn_pool(
x, ws=(ws, ws),
stride=(stride, stride),
mode='max' if func is T.max else "average")
return dnn_op
theano.tests.unittest_tools.verify_grad(fn, [data],
cast_to_output_type=False,
mode=mode_with_gpu)
# Confirm that we get the good op.
f = theano.function([x], theano.grad(fn(x).sum(), x),
mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
for node in f.maker.fgraph.toposort()])
g_out = f(data)
if func is T.max:
# Compare again the CPU result
out = max_pool_2d(x, (ws, ws), ignore_border=True)
f = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu)
assert any([isinstance(node.op, DownsampleFactorMaxGrad)
for node in f.maker.fgraph.toposort()])
c_out = f(data)
assert numpy.allclose(c_out, g_out)
def test_pooling_opt():
......@@ -87,7 +146,7 @@ def test_pooling_opt():
f = theano.function(
[x],
max_pool_2d(x, ds=(2, 2)),
max_pool_2d(x, ds=(2, 2), ignore_border=True),
mode=mode_with_gpu)
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
......@@ -95,7 +154,7 @@ def test_pooling_opt():
f = theano.function(
[x],
T.grad(max_pool_2d(x, ds=(2, 2)).sum(), x),
T.grad(max_pool_2d(x, ds=(2, 2), ignore_border=True).sum(), x),
mode=mode_with_gpu.including("cudnn"))
assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论