Merge pull request #2259 from jia-kai/master

padding support for dnn_conv

Merge pull request #2259 from jia-kai/master
4c513ba6 · Frédéric Bastien · d8ffeccd · 333d471a · 4c513ba6 · 4c513ba6
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -211,6 +211,8 @@ if cuda_available:
    except EnvironmentError, e:
        cuda_available = False
        cuda_initialization_error_message = " ".join(e.args)
+else:
+    cuda_initialization_error_message = 'cuda unavilable'


 class GpuOp(theano.gof.Op):

--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
 import copy
 import os
+import logging
+_logger = logging.getLogger(__name__)

 import theano
 from theano import Apply
@@ -504,39 +506,61 @@ gpu_ger_inplace = GpuGer(inplace=True)

 class BaseGpuCorrMM(GpuOp):
    """Base class for `GpuCorrMM`, `GpuCorrMM_gradWeights` and
-    `GpuCorrMM_gradInputs`. Cannot be used directly."""
+    `GpuCorrMM_gradInputs`. Cannot be used directly.

-    def __init__(self, border_mode="valid",
-            subsample=(1, 1),
-            pad=(0, 0)):
+    :param border_mode: one of 'valid', 'full', 'half'; additionally, the
+        padding size could be directly specified by an integer or a pair of
+        integers
+    :param subsample: perform subsampling of the output (default: (1, 1))
+    :param pad: *deprecated*, now you should always use border_mode
+    
+    """
+
+    def __init__(self, border_mode="valid", subsample=(1, 1), pad=(0, 0)):
+        if pad != (0, 0):
+            _logger.warning(
+                'do not use pad for BaseGpuCorrMM; please set padding in'
+                'border_mode, see the docstring for more details')
            if border_mode != "valid":
                raise ValueError("border_mode must be 'valid'")
+            border_mode = pad
+        if isinstance(border_mode, int):
+            border_mode = (border_mode, border_mode)
+        if isinstance(border_mode, tuple):
+            pad_h, pad_w = map(int, border_mode)
+            border_mode = (pad_h, pad_w)
+        if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or
+                border_mode in ('valid', 'full', 'half')):
+            raise ValueError(
+                'invalid border_mode {}, which must be either '
+                '"valid", "full", "half", an integer or a pair of'
+                ' integers'.format(border_mode))
        self.border_mode = border_mode
        if len(subsample) != 2:
            raise ValueError("subsample must have two elements")
        self.subsample = subsample
-        if (pad not in ("half", "full")) and (len(pad) != 2):
-            raise ValueError("pad must be 'half', 'full', or have two elements")
-        self.pad = pad
+
+    @property
+    def pad(self):
+        if self.border_mode != 'valid':
+            return self.border_mode
+        return (0, 0)

    def __eq__(self, other):
        return type(self) == type(other) \
            and self.border_mode == other.border_mode \
-            and self.subsample == other.subsample \
-            and self.pad == other.pad
+            and self.subsample == other.subsample

    def __hash__(self):
        return hash(type(self)) \
            ^ hash(self.border_mode) \
-            ^ hash(self.subsample) \
-            ^ hash(self.pad)
+            ^ hash(self.subsample)

    def __str__(self):
-        return '%s{%s, %s, pad=%r}' % (
+        return '%s{%s, %s}' % (
            self.__class__.__name__,
            self.border_mode,
-            str(self.subsample),
-            self.pad)
+            str(self.subsample))

    def flops(self, inp, outp):
        """ Useful with the hack in profilemode to print the MFlops"""
@@ -558,7 +582,7 @@ class BaseGpuCorrMM(GpuOp):

    def c_code_cache_version(self):
        # raise this whenever modifying any of the support_code_files
-        return (0, 23)
+        return (0, 24)

    def c_support_code_apply(self, node, nodename):
        # REMEMBER TO RAISE c_code_cache_version when changing any of
@@ -591,27 +615,28 @@ class BaseGpuCorrMM(GpuOp):
        :param sub: Dictionary of substitutions useable to help generating the
            C code.
        :param height: If self.subsample[0] != 1, a variable giving the height
-            of the filters for direction="backprop weights" or the height of the
-            input images for direction="backprop inputs".
-            If self.pad == 'half', a variable giving the height of the filters
-            for direction="backprop weights".
-            Ignored otherwise.
+            of the filters for direction="backprop weights" or the height of
+            the input images for direction="backprop inputs".
+
+            If self.border_mode == 'half', a variable giving the height of the
+            filters for direction="backprop weights".  Ignored otherwise.
        :param width: If self.subsample[1] != 1, a variable giving the width
            of the filters for direction="backprop weights" or the width of the
            input images for direction="backprop inputs".
-            If self.pad == 'half', a variable giving the width of the filters
-            for direction="backprop weights".
-            Ignored otherwise.
+
+            If self.border_mode == 'half', a variable giving the width of the
+            filters for direction="backprop weights".  Ignored otherwise.
        """
-        if self.border_mode != "valid":
-            raise ValueError("mode must be 'valid'")
        dH, dW = self.subsample
-        if self.pad == "half":
+        if self.border_mode == "half":
            padH = padW = -1
-        elif self.pad == "full":
+        elif self.border_mode == "full":
            padH = padW = -2
+        elif isinstance(self.border_mode, tuple):
+            padH, padW = self.border_mode
        else:
-            padH, padW = self.pad
+            assert self.border_mode == "valid"
+            padH = padW = 0
        if direction == "forward":
            direction = 0
            out = top
@@ -841,9 +866,9 @@ class GpuCorrMM(BaseGpuCorrMM):
        bottom, weights = inp
        top, = grads
        top = gpu_contiguous(top)
-        d_bottom = GpuCorrMM_gradInputs(self.border_mode, self.subsample, self.pad)(
+        d_bottom = GpuCorrMM_gradInputs(self.border_mode, self.subsample)(
            weights, top, bottom.shape[-2:])
-        d_weights = GpuCorrMM_gradWeights(self.border_mode, self.subsample, self.pad)(
+        d_weights = GpuCorrMM_gradWeights(self.border_mode, self.subsample)(
            bottom, top, weights.shape[-2:])
        return d_bottom, d_weights


--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -122,9 +122,7 @@ class GpuDnnConvDesc(GpuOp):
    """This Op builds a convolution descriptor for use in the other
    convolution operations.

-    :param border_mode: 'valid' or 'full'
-    :param subsample: The subsample, tuple like (dx, dy)
-    :param conv_mode: 'conv' or 'cross'
+    see the doc of :func:`dnn_conv` for a description of the parameters

    """
    __props__ = ('border_mode', 'subsample', 'conv_mode')
@@ -142,7 +140,17 @@ class GpuDnnConvDesc(GpuOp):
        return NVCC_compiler

    def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv'):
-        assert border_mode in ('valid', 'full')
+        if isinstance(border_mode, int):
+            border_mode = (border_mode, border_mode)
+        if isinstance(border_mode, tuple):
+            pad_h, pad_w = map(int, border_mode)
+            border_mode = (pad_h, pad_w)
+        if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or
+                border_mode in ('valid', 'full')):
+            raise ValueError(
+                'invalid border_mode {}, which must be either '
+                '"valid", "full", an integer or a pair of'
+                ' integers'.format(border_mode))
        self.border_mode = border_mode
        assert len(subsample) == 2
        self.subsample = subsample
@@ -162,6 +170,13 @@ class GpuDnnConvDesc(GpuOp):
        img_shape, kern_shape = inputs
        desc, = outputs

+        if isinstance(self.border_mode, tuple):
+            pad_h_spec, pad_w_spec = map(int, self.border_mode)
+            assert pad_h_spec >= 0 and pad_w_spec >= 0
+            bmode = 2
+        else:
+            pad_h_spec = pad_w_spec = 0
+
            if self.border_mode == "valid":
                bmode = 1
            else:
@@ -185,7 +200,10 @@ class GpuDnnConvDesc(GpuOp):
    %(fail)s
  }

-  if (%(bmode)d == 1) {
+  if (%(bmode)d == 2) {
+    pad_h%(name)s = %(pad_h_spec)d;
+    pad_w%(name)s = %(pad_w_spec)d;
+  } else if (%(bmode)d == 1) {
    pad_h%(name)s = 0;
    pad_w%(name)s = 0;
  } else if (%(bmode)d == 0) {
@@ -218,10 +236,11 @@ class GpuDnnConvDesc(GpuOp):
 }
 """ % dict(name=name, img_shape=img_shape, kern_shape=kern_shape, desc=desc,
           bmode=bmode, conv_flag=conv_flag, fail=sub['fail'],
-           subsx=self.subsample[0], subsy=self.subsample[1])
+           subsx=self.subsample[0], subsy=self.subsample[1],
+           pad_h_spec=pad_h_spec, pad_w_spec=pad_w_spec)

    def c_code_cache_version(self):
-        return (1,)
+        return (2,)


 class GpuDnnConvBase(DnnBase):
@@ -459,7 +478,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),

    :param img: images to do the convolution over
    :param kerns: convolution filters
-    :param border_mode: one of 'valid', 'full' (default: 'valid')
+    :param border_mode: one of 'valid', 'full'; additionally, the padding size
+        could be directly specified by an integer or a pair of integers
    :param subsample: perform subsampling of the output (default: (1, 1))
    :param conv_mode: perform convolution (kernels flipped) or cross-correlation.  One of 'conv', 'cross'. (default: 'conv')


--- a/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
@@ -9,6 +9,7 @@ import traceback
 import numpy

 from nose.plugins.skip import SkipTest
+from nose.tools import assert_raises
 imported_scipy_convolve2d = False
 try:
    from scipy.signal import convolve2d
@@ -72,16 +73,21 @@ def py_conv_valid_numpy(img, kern):
                    out[b, k, rr, cc] = innerprod
    return out

+def py_conv_pad_img(img, pad_h, pad_w):
+    assert pad_h >= 0 and pad_w >= 0
+    padded_img = numpy.zeros(
+        (img.shape[0], img.shape[1],
+         pad_h * 2 + img.shape[2], pad_w * 2 + img.shape[3]),
+        dtype=img.dtype)
+    padded_img[:, :,
+               pad_h: pad_h + img.shape[2],
+               pad_w: pad_w + img.shape[3]] = img
+    return padded_img

 def py_conv_full_numpy(img, kern):
    # manually pad the img with zeros all around, and then run it
    # through py_conv_valid
-    pad_rows = 2 * (kern.shape[2] - 1) + img.shape[2]
-    pad_cols = 2 * (kern.shape[3] - 1) + img.shape[3]
-    padded_img = numpy.zeros((img.shape[0], img.shape[1], pad_rows, pad_cols),
-                             dtype=img.dtype)
-    padded_img[:, :, kern.shape[2] - 1: kern.shape[2] - 1 + img.shape[2],
-                     kern.shape[3] - 1: kern.shape[3] - 1 + img.shape[3]] = img
+    padded_img = py_conv_pad_img(img, kern.shape[2] - 1, kern.shape[3] - 1)
    return py_conv_valid_numpy(padded_img, kern)


@@ -90,6 +96,12 @@ def py_conv(img, kern, mode, subsample):
    use a scipy or numpy implementation depending is scipy is available.
    The scipy version is faster.
    """
+    if isinstance(mode, int):
+        mode = (mode, mode)
+    if isinstance(mode, tuple):
+        pad_h, pad_w = map(int, mode)
+        img = py_conv_pad_img(img, pad_h, pad_w)
+        mode = 'valid'
    if imported_scipy_convolve2d:
        return py_conv_scipy(img, kern, mode, subsample)
    elif mode == 'valid':
@@ -820,6 +832,63 @@ class TestConv2DGPU(unittest.TestCase):
        finally:
            theano_mode = theano_mode_orig

+class TestConvWithPadding(object):
+    """test conv ops that support arbitrary padding via border_mode
+    note that in order to make the yield work, we can not subclass from 
+    unittest.TestCase
+    """
+
+    @staticmethod
+    def gemm_conv_op(img, kern, border_mode):
+        kern = theano.sandbox.cuda.basic_ops.gpu_contiguous(
+            kern[:, :, ::-1, ::-1])
+        y = theano.sandbox.cuda.blas.GpuCorrMM(border_mode=border_mode)(
+            img, kern)
+        return y
+
+    conv_ops = []
+
+    @classmethod
+    def setup_class(cls):
+        cls.conv_ops.append(cls.gemm_conv_op)
+        if cuda.dnn.dnn_available():
+            cls.conv_ops.append(cuda.dnn.dnn_conv)
+
+    def test_invalid_arg(self):
+        img = theano._asarray(numpy.empty((1, 1, 1, 1)), dtype='float32')
+        kern = theano._asarray(numpy.empty((1, 1, 1, 1)), dtype='float32')
+        for i in self.conv_ops:
+            assert_raises(ValueError, i, img, kern,
+                              border_mode=(-1, 0))
+            assert_raises(ValueError, i, img, kern,
+                              border_mode=(0, -1))
+            assert_raises(ValueError, i, img, kern,
+                              border_mode='not border')
+
+    def _run_onecase(self, img_shape, kern_shape, padding, op):
+        npy_img = numpy.random.rand(*img_shape).astype('float32')
+        npy_kern = numpy.random.rand(*kern_shape).astype('float32')
+        img = theano._asarray(npy_img, dtype='float32')
+        kern = theano.shared(npy_kern)
+        border_mode = padding
+        cpuval = py_conv(npy_img, npy_kern, border_mode, (1, 1))
+        X = tensor.ftensor4()
+        Y = op(X, kern, border_mode=border_mode)
+        func = theano.function([X], Y, mode=theano_mode)
+        gpuval = numpy.asarray(func(img))
+        assert_allclose(cpuval, gpuval, rtol=1e-5, atol=1e-5)
+
+    def test_numeric_value(self):
+        params = [
+            ((5, 10, 4, 4), (12, 10, 4, 4), (2, 1)),
+            ((5, 10, 8, 8), (12, 10, 4, 4), 3),
+            ((5, 10, 6, 8), (12, 10, 3, 4), 'full'),
+            ((5, 10, 9, 6), (12, 10, 9, 4), 'valid')
+        ]
+        for img_shape, kern_shape, padding in params:
+            for op in self.conv_ops:
+                yield self._run_onecase, img_shape, kern_shape, padding, op
+

 def gemm_directly(bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsx, subsy,
                  direction):
@@ -879,8 +948,7 @@ def test_gemm_directly():


 def gemm_op(mode, subsample):
-    pad = 'full' if mode == 'full' else (0, 0)
-    return theano.sandbox.cuda.blas.GpuCorrMM('valid', subsample, pad)
+    return theano.sandbox.cuda.blas.GpuCorrMM(mode, subsample)


 def dnn_op(mode, subsample):