提交 a9655e21 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #3728 from nouiz/pool_grad_grad_infer_shape

[BUG] Fix DownsampleFactorMaxGradGrad.infer_shape. Fix a failing tests
......@@ -34,6 +34,7 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs)
def dnn_available():
if dnn_available.avail is None:
if not theano.sandbox.cuda.cuda_available:
......@@ -67,7 +68,8 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
if config.dnn.library_path:
params.append("-L" + config.dnn.library_path)
if config.nvcc.compiler_bindir:
params.extend(['--compiler-bindir', config.nvcc.compiler_bindir])
params.extend(['--compiler-bindir',
config.nvcc.compiler_bindir])
# Do not run here the test program. It would run on the
# default gpu, not the one selected by the user. If mixed
# GPU are installed or if the GPUs are configured in
......@@ -1087,9 +1089,9 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
This parameter is used internally by graph optimizers and may be
removed at any time without a deprecation period. You have been warned.
workmem
*deprecated*, use parameter algo instead.
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Convolution implementation to use. Some of its values may require certain
*deprecated*, use parameter algo instead.
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Convolution implementation to use. Some of its values may require certain
versions of CuDNN to be installed. Default is the value of
:attr:`config.dnn.conv.algo_fwd`.
......@@ -1167,10 +1169,10 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
:param img: images to do the convolution over
:param kerns: convolution filters
:param border_mode: One of 'valid', 'full'; additionally, the padding
size can be directly specified by an integer or a pair of integers
(as a tuple), specifying the amount of zero padding added to _both_
the top and bottom (first entry) and left and right (second entry)
sides of the image.
size can be directly specified by an integer or a pair of integers
(as a tuple), specifying the amount of zero padding added to _both_
the top and bottom (first entry) and left and right (second entry)
sides of the image.
:param subsample: perform subsampling of the output (default: (1, 1, 1))
:param conv_mode: perform convolution (kernels flipped) or
cross-correlation. One of 'conv', 'cross'. (default: 'conv')
......@@ -1257,12 +1259,13 @@ def dnn_gradweight(img, topgrad,
img = gpu_contiguous(img)
topgrad = gpu_contiguous(topgrad)
kerns_shp = theano.tensor.as_tensor_variable(kerns_shp)
kerns_shp = theano.tensor.as_tensor_variable(kerns_shp)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(img.shape, kerns_shp)
out = gpu_alloc_empty(*kerns_shp)
return GpuDnnConvGradW()(img, topgrad, out, desc)
def dnn_gradinput(kerns, topgrad,
img_shp,
border_mode='valid', subsample=(1, 1),
......@@ -1550,8 +1553,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
}
""" % dict(out=out, desc=desc, fail=sub['fail'],
name=name, input=inputs[0],
input_desc="input"+name,
output_desc="output"+name)
input_desc="input" + name,
output_desc="output" + name)
def grad(self, inp, grads):
img, desc = inp
......@@ -1745,10 +1748,10 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
""" % dict(output_grad=out_grad, desc=desc,
fail=sub['fail'], name=name,
input=inp, input_grad=inp_grad, output=out,
input_desc="input"+name,
input_grad_desc="input_grad"+name,
output_desc="output"+name,
output_grad_desc="output_grad"+name)
input_desc="input" + name,
input_grad_desc="input_grad" + name,
output_desc="output" + name,
output_grad_desc="output_grad" + name)
def c_code_cache_version(self):
return (7, version())
......@@ -1804,8 +1807,8 @@ class GpuDnnSoftmaxBase(DnnBase):
Always set this to 'bc01'.
algo
'fast', 'accurate' or 'log' indicating whether, respectively, computations
should be optimized for speed, for accuracy, or if CuDNN should rather
compute the log-softmax instead.
should be optimized for speed, for accuracy, or if CuDNN should rather
compute the log-softmax instead.
mode
'instance' or 'channel' indicating whether the softmax should
be computed per image across 'c01' or per spatial location '01' per
......
......@@ -193,16 +193,23 @@ class DownsampleFactorMax(Op):
c += padding[1] * 2
if ignore_border:
out_r = (r - ds[0]) // st[0] + 1
out_c = (c - ds[1]) // st[1] + 1
if isinstance(r, theano.Variable):
nr = tensor.maximum(out_r, 0)
if ds[0] == st[0]:
nr = r // st[0]
else:
nr = numpy.maximum(out_r, 0)
if isinstance(c, theano.Variable):
nc = tensor.maximum(out_c, 0)
out_r = (r - ds[0]) // st[0] + 1
if isinstance(r, theano.Variable):
nr = tensor.maximum(out_r, 0)
else:
nr = numpy.maximum(out_r, 0)
if ds[1] == st[1]:
nc = c // st[1]
else:
nc = numpy.maximum(out_c, 0)
out_c = (c - ds[1]) // st[1] + 1
if isinstance(c, theano.Variable):
nc = tensor.maximum(out_c, 0)
else:
nc = numpy.maximum(out_c, 0)
else:
if isinstance(r, theano.Variable):
nr = tensor.switch(tensor.ge(st[0], ds[0]),
......@@ -893,84 +900,6 @@ class AveragePoolGrad(PoolGrad):
class DownsampleFactorMaxGradGrad(Op):
__props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode')
@staticmethod
def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)):
"""
Return the shape of the output from this op, for input of given
shape and flags.
Parameters
----------
imgshape : tuple, list, or similar of integer or scalar Theano variable
The shape of a tensor of images. The last two elements
are interpreted as the number of rows, and the number of cols.
ds : list or tuple of two ints
Downsample factor over rows and columns this parameter indicates the
size of the pooling region.
st: list or tuple of two ints
The stride size. This is the distance between the pooling regions.
If it's set to None, in which case it equlas ds.
ignore_border: bool
If ds doesn't divide imgshape, do we include an
extra row/col of partial downsampling (False) or ignore it (True).
padding : tuple of two ints
(pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
Returns
-------
list
The shape of the output from this op, for input of given shape.
This will have the same length as imgshape, but with last two
elements reduced as per the downsampling & ignore_border flags.
"""
if len(imgshape) < 2:
raise TypeError('imgshape must have at least two elements '
'(rows, cols)')
if st is None:
st = ds
r, c = imgshape[-2:]
r += padding[0] * 2
c += padding[1] * 2
if ignore_border:
out_r = (r - ds[0]) // st[0] + 1
out_c = (c - ds[1]) // st[1] + 1
if isinstance(r, theano.Variable):
nr = tensor.maximum(out_r, 0)
else:
nr = numpy.maximum(out_r, 0)
if isinstance(c, theano.Variable):
nc = tensor.maximum(out_c, 0)
else:
nc = numpy.maximum(out_c, 0)
else:
if isinstance(r, theano.Variable):
nr = tensor.switch(tensor.ge(st[0], ds[0]),
(r - 1) // st[0] + 1,
tensor.maximum(0, (r - 1 - ds[0]) //
st[0] + 1) + 1)
elif st[0] >= ds[0]:
nr = (r - 1) // st[0] + 1
else:
nr = max(0, (r - 1 - ds[0]) // st[0] + 1) + 1
if isinstance(c, theano.Variable):
nc = tensor.switch(tensor.ge(st[1], ds[1]),
(c - 1) // st[1] + 1,
tensor.maximum(0, (c - 1 - ds[1]) //
st[1] + 1) + 1)
elif st[1] >= ds[1]:
nc = (c - 1) // st[1] + 1
else:
nc = max(0, (c - 1 - ds[1]) // st[1] + 1) + 1
rval = list(imgshape[:-2]) + [nr, nc]
return rval
def __init__(self, ds, ignore_border, st=None, padding=(0, 0), mode='max'):
self.ds = tuple(ds)
if not all([isinstance(d, int) for d in ds]):
......@@ -1010,10 +939,8 @@ class DownsampleFactorMaxGradGrad(Op):
if len(x.shape) != 4:
raise NotImplementedError(
'DownsampleFactorMaxGradGrad requires 4D input for now')
z_shape = self.out_shape(x.shape, self.ds, self.ignore_border,
self.st, self.padding)
if (z[0] is None) or (z[0].shape != z_shape):
z[0] = numpy.zeros(z_shape, dtype=x.dtype)
if (z[0] is None) or (z[0].shape != x.shape):
z[0] = numpy.zeros(x.shape, dtype=x.dtype)
ggz = z[0] # grad wrt maxout_grad has the same shape as maxout
# number of pooling output rows
pr = ggz.shape[-2]
......@@ -1053,7 +980,7 @@ class DownsampleFactorMaxGradGrad(Op):
ggz[n, k, r, c] = ggx_padded[n, k, row_ind, col_ind]
def infer_shape(self, node, in_shapes):
return [in_shapes[0]]
return [in_shapes[1]]
def c_code(self, node, name, inp, out, sub):
if self.mode != 'max':
......
......@@ -582,10 +582,10 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
stridesize = stridesizes[i]
paddingsize = paddingsizes[i]
grad_shape = DownsampleFactorMaxGradGrad.out_shape(imval.shape,
maxpoolsize, st=stridesize,
ignore_border=True,
padding=paddingsize)
grad_shape = DownsampleFactorMax.out_shape(imval.shape,
maxpoolsize, st=stridesize,
ignore_border=True,
padding=paddingsize)
grad_val = rng.rand(*grad_shape) * 10.0
def mp(input, grad):
......
......@@ -106,7 +106,6 @@ whitelist_flake8 = [
"sandbox/tests/test_neighbourhoods.py",
"sandbox/tests/test_multinomial.py",
"sandbox/tests/__init__.py",
"sandbox/cuda/dnn.py",
"sandbox/cuda/var.py",
"sandbox/cuda/GpuConvGrad3D.py",
"sandbox/cuda/basic_ops.py",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论