提交 22db3930 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3786 from lamblin/abstract_conv_infershape

Abstract conv: fix tests and add infer_shape
......@@ -439,18 +439,6 @@ def test_default_conv():
assert any([isinstance(a.op, cuda.blas.GpuCorrMM)
for a in f.maker.fgraph.apply_nodes])
mode = theano_mode.excluding('local_conv_dnn', 'local_conv_gemm')
f = theano.function([img, fil], c, mode=mode)
assert any([isinstance(a.op, cuda.blas.GpuConv)
for a in f.maker.fgraph.apply_nodes])
mode = theano_mode.excluding('conv_dnn', 'conv_gemm')
f = theano.function([img, fil], c, mode=mode)
assert any([isinstance(a.op, cuda.blas.GpuConv)
for a in f.maker.fgraph.apply_nodes])
def _test_full(cls, mode=None, version=[-1], extra_shapes=[],
test_bigger_kernels=True):
......@@ -614,7 +602,8 @@ class TestConv2DGPU(unittest.TestCase):
cuda.blas.BaseGpuCorrMM)
def test_logical_shapes(self):
seed_rng()
# Logical shapes are not supported anymore, so we check that it
# raises an Exception.
for stride in range(1, 4):
kshp = (10, 2, 10, 10)
featshp = (3, 10, 11, 11)
......@@ -629,23 +618,14 @@ class TestConv2DGPU(unittest.TestCase):
featshp_logical = (featshp[0], featshp[1], featshp[2] * stride,
featshp[3] * stride)
kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
# print featshp, kshp_rotated, featshp_logical[1:], kshp[2:]
image_estimate = tensor.nnet.conv2d(a, kernel_rotated,
border_mode='full',
image_shape=featshp,
filter_shape=kshp_rotated,
imshp_logical=featshp_logical[1:],
kshp_logical=kshp[2:])
self.assertRaises(ValueError, tensor.nnet.conv2d,
a, kernel_rotated,
border_mode='full',
image_shape=featshp,
filter_shape=kshp_rotated,
imshp_logical=featshp_logical[1:],
kshp_logical=kshp[2:])
func = theano.function([a, A], image_estimate, mode=theano_mode)
# theano.printing.debugprint(func,)
assert any([isinstance(node.op, self.conv_ops)
for node in func.maker.fgraph.toposort()])
a_in = numpy.random.randn(*featshp).astype("float32")
A_in = numpy.random.randn(*kshp).astype("float32")
func(a_in, A_in)
def test_invalid_input_shape(self):
"""
......@@ -838,17 +818,8 @@ def conv_grad(mode, bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsample, op):
# TODO: also test custom pad values
corr_op = op(mode, subsample)(i, k)
# try to compile reference implementation without shape,
# so we don't have to compile hundreds of versions
conv_op = tensor.nnet.conv2d(i, k[:, :, ::-1, ::-1],
border_mode=mode, subsample=subsample)
try:
conv_op_di = theano.grad(conv_op.sum(), i)
conv_op_dk = theano.grad(conv_op.sum(), k)
except Exception:
# compile with shape information only when needed
conv_op = tensor.nnet.conv2d(i, k[:, :, ::-1, ::-1],
ishape, kshape, mode, subsample)
conv_op_di = theano.grad(conv_op.sum(), i)
conv_op_dk = theano.grad(conv_op.sum(), k)
corr_op_di = theano.grad(corr_op.sum(), i)
......@@ -856,18 +827,15 @@ def conv_grad(mode, bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsample, op):
outputs = [corr_op, conv_op,
corr_op_di, conv_op_di,
corr_op_dk, conv_op_dk]
try:
conv_op_dik = theano.grad(conv_op_di.sum(), k)
conv_op_dki = theano.grad(conv_op_dk.sum(), i)
corr_op_dik = theano.grad(corr_op_di.sum(), k)
corr_op_dki = theano.grad(corr_op_dk.sum(), i)
outputs.extend([corr_op_dik, conv_op_dik,
corr_op_dki, conv_op_dki])
except Exception:
# skip if the reference implementation can't do it
pass
f = theano.function([i, k], outputs, mode=theano_mode.excluding('conv_dnn', 'conv_gemm'))
conv_op_dik = theano.grad(conv_op_di.sum(), k)
conv_op_dki = theano.grad(conv_op_dk.sum(), i)
corr_op_dik = theano.grad(corr_op_di.sum(), k)
corr_op_dki = theano.grad(corr_op_dk.sum(), i)
outputs.extend([corr_op_dik, conv_op_dik,
corr_op_dki, conv_op_dki])
f = theano.function([i, k], outputs, mode=theano_mode)
allvals = f(npy_img, npy_kern)
......
......@@ -101,16 +101,24 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
of shape (batch size, output channels, output rows, output columns)
"""
if 'imshp_logical' in kwargs or 'kshp_logical' in kwargs:
raise ValueError(
"Keyword arguments 'imshp_logical' and 'kshp_logical' for conv2d "
"are not supported anymore (and have not been a reliable way to "
"perform upsampling). That feature is still available by calling "
"theano.tensor.nnet.conv.conv2d() for the time being.")
if len(kwargs.keys()) > 0:
warnings.warn(str(kwargs.keys()) +
" are now deprecated in "
"`tensor.nnet.abstract_conv.conv2d` interface"
" and will be ignored.")
" and will be ignored.",
stacklevel=2)
if image_shape is not None:
warnings.warn("The `image_shape` keyword argument to "
"`tensor.nnet.conv2d` is deprecated, it has been "
"renamed to `input_shape`.")
"renamed to `input_shape`.",
stacklevel=2)
if input_shape is None:
input_shape = image_shape
else:
......
......@@ -269,6 +269,11 @@ class BaseAbstractConv2d(Op):
flops *= inputs[1] * filters[0] * inputs[0]
return flops
def do_constant_folding(self, node):
# Disable constant folding since there is no implementation.
# This may change in the future.
return False
class AbstractConv2d(BaseAbstractConv2d):
"""
......@@ -298,7 +303,10 @@ class AbstractConv2d(BaseAbstractConv2d):
return Apply(self, [img, kern], [output])
def perform(self, node, inp, out_):
raise NotImplementedError('AbstractConv2d theano optimization failed')
raise NotImplementedError(
'AbstractConv2d theano optimization failed. '
'Did you exclude both "conv_dnn" and "conv_gemm" from '
'the optimizer?')
def grad(self, inp, grads):
bottom, weights = inp
......@@ -322,6 +330,21 @@ class AbstractConv2d(BaseAbstractConv2d):
d_weights = patternbroadcast(d_weights, weights.broadcastable)
return d_bottom, d_weights
def infer_shape(self, node, input_shapes):
imshp = input_shapes[0]
kshp = input_shapes[1]
# replace symbolic shapes with known constant shapes
if self.imshp is not None:
imshp = [imshp[i] if self.imshp[i] is None else self.imshp[i]
for i in range(4)]
if self.kshp is not None:
kshp = [kshp[i] if self.kshp[i] is None else self.kshp[i]
for i in range(4)]
res = get_conv_output_shape(imshp, kshp, self.border_mode,
self.subsample)
return [res]
class AbstractConv2d_gradWeights(BaseAbstractConv2d):
"""Gradient wrt. filters for `AbstractConv2d`.
......@@ -358,7 +381,9 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
def perform(self, node, inp, out_):
raise NotImplementedError(
'AbstractConv2d_gradWeight theano optimization failed')
'AbstractConv2d_gradWeights theano optimization failed. '
'Did you exclude both "conv_dnn" and "conv_gemm" from '
'the optimizer?')
def grad(self, inp, grads):
bottom, top = inp[:2]
......@@ -387,6 +412,19 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
def connection_pattern(self, node):
return [[1], [1], [0]] # no connection to height, width
def infer_shape(self, node, input_shapes):
# We use self.kshp (that was passed when creating the Op) if possible,
# or fall back to the `shape` input of the node.
# TODO: when there is no subsampling, try to infer the kernel shape
# from the shapes of inputs.
imshp = input_shapes[0]
topshp = input_shapes[1]
kshp = self.kshp[:] if self.kshp is not None else [None] * 4
fallback_kshp = [topshp[1], imshp[1], node.inputs[2][0], node.inputs[2][1]]
kshp = [fallback_kshp[i] if kshp[i] is None else kshp[i]
for i in range(4)]
return [kshp]
class AbstractConv2d_gradInputs(BaseAbstractConv2d):
"""Gradient wrt. inputs for `AbstractConv2d`.
......@@ -424,7 +462,9 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
def perform(self, node, inp, out_):
raise NotImplementedError(
'AbstractConv2d_gradWeight theano optimization failed')
'AbstractConv2d_gradInputs theano optimization failed. '
'Did you exclude both "conv_dnn" and "conv_gemm" from '
'the optimizer?')
def grad(self, inp, grads):
weights, top = inp[:2]
......@@ -448,3 +488,17 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
def connection_pattern(self, node):
return [[1], [1], [0]] # no connection to height, width
def infer_shape(self, node, input_shapes):
# We use self.imshp (that was passed when creating the Op) if possible,
# or fall back to the `shape` input of the node.
# TODO: when there is no subsampling, try to infer the image shape
# from the shapes of inputs.
kshp = input_shapes[0]
topshp = input_shapes[1]
imshp = self.imshp[:] if self.imshp is not None else [None] * 4
fallback_imshp = [topshp[0], kshp[1], node.inputs[2][0],
node.inputs[2][1]]
imshp = [fallback_imshp[i] if imshp[i] is None else imshp[i]
for i in range(4)]
return [imshp]
......@@ -367,7 +367,7 @@ class ConvOp(OpenMPOp):
# with s=1 for mode=='full' and s=-1 for mode=='valid'.
# To support symbolic shapes, we express this with integer arithmetics.
warnings.warn("The method `getOutputShape` is deprecated use"
"`get_conv_output_shape` instead.")
"`get_conv_output_shape` instead.", stacklevel=2)
return tuple(get_conv_shape_1axis(i, k, mode, d)
for i, k, d in zip(inshp, kshp, stride))
......
......@@ -13,8 +13,14 @@ from theano.tests.unittest_tools import attr
class TestConv2D(utt.InferShapeTester):
# This class contains tests for the legacy 2d convolution,
# but will also be inherited from for other implementations
mode = None
dtype = theano.config.floatX
# This will be set to the appropriate function in the inherited classes.
# The call to `staticmethod` is necessary to prevent Python from passing
# `self` as the first argument.
conv2d = staticmethod(conv.conv2d)
def setUp(self):
super(TestConv2D, self).setUp()
......@@ -435,7 +441,7 @@ class TestConv2D(utt.InferShapeTester):
input = theano.shared(numpy.random.random(image_shape))
filters = theano.shared(numpy.random.random(filter_shape))
output = conv.conv2d(input, filters,
output = self.conv2d(input, filters,
image_shape, filter_shape,
border_mode,
unroll_patch=True,
......@@ -465,62 +471,75 @@ class TestConv2D(utt.InferShapeTester):
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
[self.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp,
excluding=['conv_gemm'])
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
[self.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp,
excluding=['conv_gemm'])
aivec_val = [6, 2, 8, 3]
bivec_val = [4, 2, 5, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
[self.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp,
excluding=['conv_gemm'])
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
[self.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp,
excluding=['conv_gemm'])
aivec_val = [3, 6, 7, 5]
bivec_val = [5, 6, 3, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
[self.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp,
excluding=['conv_gemm'])
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
[self.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp,
excluding=['conv_gemm'])
aivec_val = [3, 6, 7, 5]
bivec_val = [5, 6, 2, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
[self.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp,
excluding=['conv_gemm'])
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
[self.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp,
excluding=['conv_gemm'])
aivec_val = [5, 2, 4, 3]
bivec_val = [6, 2, 4, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
[self.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp,
excluding=['conv_gemm'])
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
[self.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp,
excluding=['conv_gemm'])
class TestDefaultConv2D(TestConv2D):
conv2d = staticmethod(theano.tensor.nnet.conv2d)
# Test that broadcasting of gradients works correctly when using the
# nnet.conv2d() interface. This was reported in #3763, and uses the example
# code from that ticket.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论