Added support for cudnn v6 dilated convolution.

9f37bce1 · Gabe Schwartz · 89aac420 · 9f37bce1 · 9f37bce1 · 9f37bce1
--- a/theano/gpuarray/conv_desc.c
+++ b/theano/gpuarray/conv_desc.c
@@ -5,7 +5,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
  cudnnStatus_t err;
  int pad[3] = {PAD_0, PAD_1, PAD_2};
  int strides[3] = {SUB_0, SUB_1, SUB_2};
-  int upscale[3] = {1, 1, 1};
+  int dilation[3] = {DIL_0, DIL_1, DIL_2};

 #if BORDER_MODE == 0
  pad[0] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) - 1;
@@ -36,6 +36,11 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
  }

  err = cudnnSetConvolutionNdDescriptor(*desc, NB_DIMS, pad, strides,
-                                        upscale, CONV_MODE, PRECISION);
+                                        dilation, CONV_MODE, PRECISION);
+  if (err != CUDNN_STATUS_SUCCESS) {
+    PyErr_Format(PyExc_MemoryError, "could not set convolution "
+                 "descriptor: %s", cudnnGetErrorString(err));
+    return -1;
+  }
  return 0;
 }
--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -131,11 +131,11 @@ def _dnn_check_version():
    if v < 5000:
        return False, "cuDNN version is too old. Update to v5, was %d." % v
    # 5200 should not print warning with cudnn 5.1 final.
-    if v >= 5200:
+    if v > 6020:
        warnings.warn("Your cuDNN version is more recent than "
                      "Theano. If you encounter problems, try "
                      "updating Theano or downgrading cuDNN to "
-                      "version 5.1.")
+                      "version 6.0.")
    return True, None


@@ -363,7 +363,7 @@ class GpuDnnConvDesc(COp):

    """

-    __props__ = ('border_mode', 'subsample', 'conv_mode', 'precision')
+    __props__ = ('border_mode', 'subsample', 'dilation', 'conv_mode', 'precision')

    def c_headers(self):
        return ['cudnn.h', 'cudnn_helper.h']
@@ -380,7 +380,7 @@ class GpuDnnConvDesc(COp):
    def do_constant_folding(self, node):
        return False

-    def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv',
+    def __init__(self, border_mode, subsample=(1, 1), dilation=(1, 1), conv_mode='conv',
                 precision="float32"):
        COp.__init__(self, ["conv_desc.c"], "APPLY_SPECIFIC(conv_desc)")

@@ -401,6 +401,10 @@ class GpuDnnConvDesc(COp):
        assert conv_mode in ('conv', 'cross')
        self.conv_mode = conv_mode

+        assert len(dilation) in (2, 3)
+        assert len(dilation) == len(subsample)
+        self.dilation = dilation
+
        assert precision in ['float16', 'float32', 'float64']
        self.precision = precision

@@ -452,6 +456,18 @@ class GpuDnnConvDesc(COp):
        else:
            sub2 = '0'

+        if version() < 6000:
+            dil0 = '1'
+            dil1 = '1'
+            dil2 = '1'
+        else:
+            dil0 = str(self.dilation[0])
+            dil1 = str(self.dilation[1])
+            if len(self.dilation) > 2:
+                dil2 = str(self.dilation[2])
+            else:
+                dil2 = '0'
+
        if self.precision == 'float16':
            precision = 'CUDNN_DATA_HALF'
        elif self.precision == 'float32':
@@ -463,6 +479,7 @@ class GpuDnnConvDesc(COp):
        return [('NB_DIMS', str(len(self.subsample))),
                ('BORDER_MODE', bmode),
                ('PAD_0', pad0), ('PAD_1', pad1), ('PAD_2', pad2),
+                ('DIL_0', dil0), ('DIL_1', dil1), ('DIL_2', dil2),
                ('CONV_MODE', conv_flag),
                ('SUB_0', sub0), ('SUB_1', sub1), ('SUB_2', sub2),
                ('PRECISION', precision)]
@@ -574,6 +591,7 @@ class GpuDnnConv(DnnBase):
        img = as_gpuarray_variable(img, ctx_name)
        kern = as_gpuarray_variable(kern, ctx_name)
        output = as_gpuarray_variable(output, ctx_name)
+
        if img.type.ndim not in (4, 5):
            raise TypeError('img must be 4D or 5D tensor')
        if kern.type.ndim not in (4, 5):
@@ -897,7 +915,7 @@ class GpuDnnConvGradI(DnnBase):
        return [shape[2]]


-def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
+def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
             conv_mode='conv', direction_hint=None, workmem=None,
             algo=None, precision=None):
    """
@@ -956,7 +974,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
        algo = workmem
    fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
    ctx_name = infer_context_name(img, kerns)
-    if (border_mode == 'valid' and subsample == (1, 1) and
+    if (border_mode == 'valid' and subsample == (1, 1) and dilation == (1, 1) and
            direction_hint == 'bprop weights'):
        # Special case: We are asked to use GpuDnnConvGradW. We need to set
        # up a suitable 'fake' convolution to compute the gradient for.
@@ -972,12 +990,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
                   shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1)
        out_shp = assert_conv_shape(out_shp)
        out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
-        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
+        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
                              conv_mode='cross', precision=precision)(out.shape)
        conv = GpuDnnConvGradW()(img, kerns, out, desc)
        return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name)

-    elif (border_mode == 'full' and subsample == (1, 1) and
+    elif (border_mode == 'full' and subsample == (1, 1) and dilation == (1, 1) and
          direction_hint != 'forward!'):
        # Special case: We can be faster by using GpuDnnConvGradI to compute
        # the full convolution as the backward pass of a valid convolution.
@@ -991,7 +1009,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
                   shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1)
        out_shp = assert_conv_shape(out_shp)
        out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
-        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
+        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
                              conv_mode=conv_mode, precision=precision)(kerns.shape)
        return GpuDnnConvGradI()(kerns, img, out, desc)

@@ -1000,7 +1018,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
    # if the img contains negative strides
    img = gpu_contiguous(img)
    kerns = gpu_contiguous(kerns)
-    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
+    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
                          conv_mode=conv_mode, precision=precision)(kerns.shape)
    desc_op = desc.owner.op
    # We can use Shape_i and bypass the infer_shape here as this is on
@@ -1009,13 +1027,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
    kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)]
    out_shp = get_conv_output_shape(ishape, kshape,
                                    desc_op.border_mode,
-                                    desc_op.subsample)
+                                    desc_op.subsample,
+                                    filter_dilation=dilation)
    out_shp = assert_conv_shape(out_shp)
    out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
    return GpuDnnConv(algo=algo)(img, kerns, out, desc)


-def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
+def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
               conv_mode='conv', direction_hint=None,
               algo='none', precision=None):
    """
@@ -1067,7 +1086,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),

    fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
    ctx_name = infer_context_name(img, kerns)
-    if (border_mode == 'valid' and subsample == (1, 1, 1) and
+    if (border_mode == 'valid' and subsample == (1, 1, 1) and dilation == (1, 1, 1) and
            direction_hint == 'bprop weights'):
        # Special case: We are asked to use GpuDnnConvGradW. We need to set
        # up a suitable 'fake' convolution to compute the gradient for.
@@ -1084,7 +1103,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
                   shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1)
        out_shp = assert_conv_shape(out_shp)
        out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
-        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
+        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
                              conv_mode='cross', precision=precision)(out.shape)
        conv = GpuDnnConvGradW()(img, kerns, out, desc)
        return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3, 4), ctx_name)
@@ -1104,7 +1123,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
                   shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1)
        out_shp = assert_conv_shape(out_shp)
        out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
-        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
+        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
                              conv_mode=conv_mode, precision=precision)(kerns.shape)
        return GpuDnnConvGradI()(kerns, img, out, desc)

@@ -1113,7 +1132,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
    # if the img contains negative strides
    img = gpu_contiguous(img)
    kerns = gpu_contiguous(kerns)
-    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
+    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
                          conv_mode=conv_mode, precision=precision)(kerns.shape)
    desc_op = desc.owner.op
    # We can use Shape_i and bypass the infer_shape here as this is on
@@ -1122,14 +1141,15 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
    kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)]
    out_shp = get_conv_output_shape(ishape, kshape,
                                    desc_op.border_mode,
-                                    desc_op.subsample)
+                                    desc_op.subsample,
+                                    filter_dilation=dilation)
    out_shp = assert_conv_shape(out_shp)
    out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
    return GpuDnnConv(algo=algo)(img, kerns, out, desc)


 def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
-                   subsample=(1, 1), conv_mode='conv', precision=None):
+                   subsample=(1, 1), dilation=(1, 1), conv_mode='conv', precision=None):
    """
    TODO: document this
    """
@@ -1141,7 +1161,7 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
    kerns_shp = as_tensor_variable(kerns_shp)
    precision = get_precision(precision, [img, topgrad])

-    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
+    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
                          conv_mode=conv_mode, precision=precision)(kerns_shp)
    out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*kerns_shp)
    return GpuDnnConvGradW()(img, topgrad, out, desc)
@@ -1157,7 +1177,7 @@ def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid',


 def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
-                  subsample=(1, 1), conv_mode='conv', precision=None):
+                  subsample=(1, 1), dilation=(1, 1), conv_mode='conv', precision=None):
    """
    TODO: document this
    """
@@ -1169,7 +1189,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
    img_shp = as_tensor_variable(img_shp)
    precision = get_precision(precision, [kerns, topgrad])

-    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
+    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
                          conv_mode=conv_mode, precision=precision)(kerns.shape)
    out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp)
    return GpuDnnConvGradI()(kerns, topgrad, out, desc)
@@ -2698,7 +2718,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
                            AbstractConv2d_gradInputs))):
        return

-    if (op.filter_dilation != (1, 1)):
+    if version() < 6000 and op.filter_dilation != (1, 1):
        return None

    inp1 = inputs[0]
@@ -2716,6 +2736,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
        rval = dnn_conv(inp1, inp2,
                        border_mode=op.border_mode,
                        subsample=op.subsample,
+                        dilation=op.filter_dilation,
                        direction_hint='forward!',
                        conv_mode=conv_mode)
    elif isinstance(op, AbstractConv2d_gradWeights):
@@ -2724,6 +2745,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
        rval = dnn_gradweight(inp1, inp2, shape,
                              border_mode=op.border_mode,
                              subsample=op.subsample,
+                              dilation=op.filter_dilation,
                              conv_mode=conv_mode)
    elif isinstance(op, AbstractConv2d_gradInputs):
        shape = (inp2.shape[0], inp1.shape[1],
@@ -2731,6 +2753,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
        rval = dnn_gradinput(inp1, inp2, shape,
                             border_mode=op.border_mode,
                             subsample=op.subsample,
+                             dilation=op.filter_dilation,
                             conv_mode=conv_mode)
    return [rval]

@@ -2741,7 +2764,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
                            AbstractConv3d_gradInputs))):
        return

-    if (op.filter_dilation != (1, 1, 1)):
+    if version() < 6000 and op.filter_dilation != (1, 1, 1):
        return None

    inp1 = inputs[0]
@@ -2759,6 +2782,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
        rval = dnn_conv3d(inp1, inp2,
                          border_mode=op.border_mode,
                          subsample=op.subsample,
+                          dilation=op.filter_dilation,
                          direction_hint='forward!',
                          conv_mode=conv_mode)
    elif isinstance(op, AbstractConv3d_gradWeights):
@@ -2767,6 +2791,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
        rval = dnn_gradweight3d(inp1, inp2, shape,
                                border_mode=op.border_mode,
                                subsample=op.subsample,
+                                dilation=op.filter_dilation,
                                conv_mode=conv_mode)
    elif isinstance(op, AbstractConv3d_gradInputs):
        shape = (inp2.shape[0], inp1.shape[1],
@@ -2774,6 +2799,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
        rval = dnn_gradinput3d(inp1, inp2, shape,
                               border_mode=op.border_mode,
                               subsample=op.subsample,
+                               dilation=op.filter_dilation,
                               conv_mode=conv_mode)
    return [rval]


--- a/theano/gpuarray/dnn_fwd.c
+++ b/theano/gpuarray/dnn_fwd.c
@@ -188,11 +188,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
    int nd;
    int pad[2];
    int stride[2];
-    int upscale[2];
+    int dilation[2];
    cudnnConvolutionMode_t mode;
    cudnnDataType_t data_type;
    err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
-                                             upscale, &mode, &data_type);
+                                             dilation, &mode, &data_type);
    if (err != CUDNN_STATUS_SUCCESS) {
      PyErr_Format(PyExc_RuntimeError,
                   "error getting convolution properties: %s",