提交 4e094fc0 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #5936 from HapeMask/cudnnv6_dilation

Add support for cudnn v6 dilated convolution.
...@@ -5,19 +5,19 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp, ...@@ -5,19 +5,19 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
cudnnStatus_t err; cudnnStatus_t err;
int pad[3] = {PAD_0, PAD_1, PAD_2}; int pad[3] = {PAD_0, PAD_1, PAD_2};
int strides[3] = {SUB_0, SUB_1, SUB_2}; int strides[3] = {SUB_0, SUB_1, SUB_2};
int upscale[3] = {1, 1, 1}; int dilation[3] = {DIL_0, DIL_1, DIL_2};
#if BORDER_MODE == 0 #if BORDER_MODE == 0
pad[0] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) - 1; pad[0] = (*(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) - 1) * DIL_0;
pad[1] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 3) - 1; pad[1] = (*(npy_int64 *)PyArray_GETPTR1(filt_shp, 3) - 1) * DIL_1;
#if NB_DIMS > 2 #if NB_DIMS > 2
pad[2] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 4) - 1; pad[2] = (*(npy_int64 *)PyArray_GETPTR1(filt_shp, 4) - 1) * DIL_2;
#endif #endif
#elif BORDER_MODE == 2 #elif BORDER_MODE == 2
pad[0] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) / 2; pad[0] = ((*(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) - 1) * DIL_0 + 1) / 2;
pad[1] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 3) / 2; pad[1] = ((*(npy_int64 *)PyArray_GETPTR1(filt_shp, 3) - 1) * DIL_1 + 1) / 2;
#if NB_DIMS > 2 #if NB_DIMS > 2
pad[2] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 4) / 2; pad[2] = ((*(npy_int64 *)PyArray_GETPTR1(filt_shp, 4) - 1) * DIL_2 + 1) / 2;
#endif #endif
#endif #endif
...@@ -36,6 +36,11 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp, ...@@ -36,6 +36,11 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
} }
err = cudnnSetConvolutionNdDescriptor(*desc, NB_DIMS, pad, strides, err = cudnnSetConvolutionNdDescriptor(*desc, NB_DIMS, pad, strides,
upscale, CONV_MODE, PRECISION); dilation, CONV_MODE, PRECISION);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not set convolution "
"descriptor: %s", cudnnGetErrorString(err));
return -1;
}
return 0; return 0;
} }
...@@ -131,11 +131,11 @@ def _dnn_check_version(): ...@@ -131,11 +131,11 @@ def _dnn_check_version():
if v < 5000: if v < 5000:
return False, "cuDNN version is too old. Update to v5, was %d." % v return False, "cuDNN version is too old. Update to v5, was %d." % v
# 5200 should not print warning with cudnn 5.1 final. # 5200 should not print warning with cudnn 5.1 final.
if v >= 5200: if v >= 6100:
warnings.warn("Your cuDNN version is more recent than " warnings.warn("Your cuDNN version is more recent than "
"Theano. If you encounter problems, try " "Theano. If you encounter problems, try "
"updating Theano or downgrading cuDNN to " "updating Theano or downgrading cuDNN to "
"version 5.1.") "version 6.0.")
return True, None return True, None
...@@ -363,7 +363,7 @@ class GpuDnnConvDesc(COp): ...@@ -363,7 +363,7 @@ class GpuDnnConvDesc(COp):
""" """
__props__ = ('border_mode', 'subsample', 'conv_mode', 'precision') __props__ = ('border_mode', 'subsample', 'dilation', 'conv_mode', 'precision')
def c_headers(self): def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h'] return ['cudnn.h', 'cudnn_helper.h']
...@@ -380,10 +380,13 @@ class GpuDnnConvDesc(COp): ...@@ -380,10 +380,13 @@ class GpuDnnConvDesc(COp):
def do_constant_folding(self, node): def do_constant_folding(self, node):
return False return False
def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv', def __init__(self, border_mode, subsample=(1, 1), dilation=(1, 1), conv_mode='conv',
precision="float32"): precision="float32"):
COp.__init__(self, ["conv_desc.c"], "APPLY_SPECIFIC(conv_desc)") COp.__init__(self, ["conv_desc.c"], "APPLY_SPECIFIC(conv_desc)")
if version() < 6000 and any([d != 1 for d in dilation]):
raise RuntimeError("Dilation > 1 not supported for cuDNN version < 6.")
if isinstance(border_mode, integer_types): if isinstance(border_mode, integer_types):
border_mode = (border_mode,) * len(subsample) border_mode = (border_mode,) * len(subsample)
if isinstance(border_mode, tuple): if isinstance(border_mode, tuple):
...@@ -401,6 +404,9 @@ class GpuDnnConvDesc(COp): ...@@ -401,6 +404,9 @@ class GpuDnnConvDesc(COp):
assert conv_mode in ('conv', 'cross') assert conv_mode in ('conv', 'cross')
self.conv_mode = conv_mode self.conv_mode = conv_mode
assert len(dilation) == len(subsample)
self.dilation = dilation
assert precision in ['float16', 'float32', 'float64'] assert precision in ['float16', 'float32', 'float64']
self.precision = precision self.precision = precision
...@@ -452,6 +458,13 @@ class GpuDnnConvDesc(COp): ...@@ -452,6 +458,13 @@ class GpuDnnConvDesc(COp):
else: else:
sub2 = '0' sub2 = '0'
dil0 = str(self.dilation[0])
dil1 = str(self.dilation[1])
if len(self.dilation) > 2:
dil2 = str(self.dilation[2])
else:
dil2 = '0'
if self.precision == 'float16': if self.precision == 'float16':
precision = 'CUDNN_DATA_HALF' precision = 'CUDNN_DATA_HALF'
elif self.precision == 'float32': elif self.precision == 'float32':
...@@ -463,6 +476,7 @@ class GpuDnnConvDesc(COp): ...@@ -463,6 +476,7 @@ class GpuDnnConvDesc(COp):
return [('NB_DIMS', str(len(self.subsample))), return [('NB_DIMS', str(len(self.subsample))),
('BORDER_MODE', bmode), ('BORDER_MODE', bmode),
('PAD_0', pad0), ('PAD_1', pad1), ('PAD_2', pad2), ('PAD_0', pad0), ('PAD_1', pad1), ('PAD_2', pad2),
('DIL_0', dil0), ('DIL_1', dil1), ('DIL_2', dil2),
('CONV_MODE', conv_flag), ('CONV_MODE', conv_flag),
('SUB_0', sub0), ('SUB_1', sub1), ('SUB_2', sub2), ('SUB_0', sub0), ('SUB_1', sub1), ('SUB_2', sub2),
('PRECISION', precision)] ('PRECISION', precision)]
...@@ -470,6 +484,11 @@ class GpuDnnConvDesc(COp): ...@@ -470,6 +484,11 @@ class GpuDnnConvDesc(COp):
def c_code_cache_version(self): def c_code_cache_version(self):
return (super(GpuDnnConvDesc, self).c_code_cache_version(), version()) return (super(GpuDnnConvDesc, self).c_code_cache_version(), version())
def __setstate__(self, d):
self.__dict__.update(d)
if not hasattr(self, "dilation"):
self.dilation = (1,) * len(self.subsample)
# scalar constants # scalar constants
_zero = constant(np.asarray(0.0, dtype='float64')) _zero = constant(np.asarray(0.0, dtype='float64'))
...@@ -574,6 +593,7 @@ class GpuDnnConv(DnnBase): ...@@ -574,6 +593,7 @@ class GpuDnnConv(DnnBase):
img = as_gpuarray_variable(img, ctx_name) img = as_gpuarray_variable(img, ctx_name)
kern = as_gpuarray_variable(kern, ctx_name) kern = as_gpuarray_variable(kern, ctx_name)
output = as_gpuarray_variable(output, ctx_name) output = as_gpuarray_variable(output, ctx_name)
if img.type.ndim not in (4, 5): if img.type.ndim not in (4, 5):
raise TypeError('img must be 4D or 5D tensor') raise TypeError('img must be 4D or 5D tensor')
if kern.type.ndim not in (4, 5): if kern.type.ndim not in (4, 5):
...@@ -619,7 +639,7 @@ class GpuDnnConv(DnnBase): ...@@ -619,7 +639,7 @@ class GpuDnnConv(DnnBase):
return [[1], [1], [1], [0], [1], [1]] return [[1], [1], [1], [0], [1], [1]]
@staticmethod @staticmethod
def get_out_shape(ishape, kshape, border_mode, subsample): def get_out_shape(ishape, kshape, border_mode, subsample, dilation):
""" """
This function computes the output shape for a convolution with This function computes the output shape for a convolution with
the specified parameters. `ishape` and `kshape` can be symbolic the specified parameters. `ishape` and `kshape` can be symbolic
...@@ -638,7 +658,8 @@ class GpuDnnConv(DnnBase): ...@@ -638,7 +658,8 @@ class GpuDnnConv(DnnBase):
ishape, ishape,
kshape, kshape,
border_mode, border_mode,
subsample) subsample,
dilation)
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[2]] return [shape[2]]
...@@ -910,7 +931,7 @@ class GpuDnnConvGradI(DnnBase): ...@@ -910,7 +931,7 @@ class GpuDnnConvGradI(DnnBase):
return [shape[2]] return [shape[2]]
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='conv', direction_hint=None, workmem=None, conv_mode='conv', direction_hint=None, workmem=None,
algo=None, precision=None): algo=None, precision=None):
""" """
...@@ -930,16 +951,20 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -930,16 +951,20 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
could be directly specified by an integer or a pair of integers. could be directly specified by an integer or a pair of integers.
subsample subsample
Perform subsampling of the output (default: (1, 1)). Perform subsampling of the output (default: (1, 1)).
dilation
Filter dilation factor. A dilation factor of d is equivalent to a
convolution with d - 1 zeros inserted between neighboring filter
values.
conv_mode conv_mode
Perform convolution (kernels flipped) or cross-correlation. Perform convolution (kernels flipped) or cross-correlation.
One of 'conv', 'cross' (default: 'conv'). One of 'conv', 'cross' (default: 'conv').
direction_hint direction_hint
Used by graph optimizers to change algorithm choice. Used by graph optimizers to change algorithm choice.
By default, GpuDnnConv will be used to carry out the convolution. By default, GpuDnnConv will be used to carry out the convolution.
If border_mode is 'valid', subsample is (1, 1) and direction_hint is If border_mode is 'valid', subsample is (1, 1), dilation is (1, 1), and
'bprop weights', it will use GpuDnnConvGradW. direction_hint is 'bprop weights', it will use GpuDnnConvGradW.
If border_mode is 'full', subsample is (1, 1) and direction_hint is If border_mode is 'full', subsample is (1, 1), dilation is (1, 1), and
*not* 'forward!', it will use GpuDnnConvGradI. direction_hint is *not* 'forward!', it will use GpuDnnConvGradI.
This parameter is used internally by graph optimizers and may be This parameter is used internally by graph optimizers and may be
removed at any time without a deprecation period. You have been warned. removed at any time without a deprecation period. You have been warned.
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'} algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
...@@ -969,7 +994,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -969,7 +994,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
algo = workmem algo = workmem
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None) fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
ctx_name = infer_context_name(img, kerns) ctx_name = infer_context_name(img, kerns)
if (border_mode == 'valid' and subsample == (1, 1) and if (border_mode == 'valid' and subsample == (1, 1) and dilation == (1, 1) and
direction_hint == 'bprop weights'): direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set # Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for. # up a suitable 'fake' convolution to compute the gradient for.
...@@ -985,12 +1010,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -985,12 +1010,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1) shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='cross', precision=precision)(out.shape) conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc) conv = GpuDnnConvGradW()(img, kerns, out, desc)
return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name) return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name)
elif (border_mode == 'full' and subsample == (1, 1) and elif (border_mode == 'full' and subsample == (1, 1) and dilation == (1, 1) and
direction_hint != 'forward!'): direction_hint != 'forward!'):
# Special case: We can be faster by using GpuDnnConvGradI to compute # Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution. # the full convolution as the backward pass of a valid convolution.
...@@ -1004,7 +1029,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1004,7 +1029,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1) shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc) return GpuDnnConvGradI()(kerns, img, out, desc)
...@@ -1013,7 +1038,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1013,7 +1038,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# if the img contains negative strides # if the img contains negative strides
img = gpu_contiguous(img) img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
desc_op = desc.owner.op desc_op = desc.owner.op
# We can use Shape_i and bypass the infer_shape here as this is on # We can use Shape_i and bypass the infer_shape here as this is on
...@@ -1022,13 +1047,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1022,13 +1047,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)] kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)]
out_shp = get_conv_output_shape(ishape, kshape, out_shp = get_conv_output_shape(ishape, kshape,
desc_op.border_mode, desc_op.border_mode,
desc_op.subsample) desc_op.subsample,
filter_dilation=dilation)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
return GpuDnnConv(algo=algo)(img, kerns, out, desc) return GpuDnnConv(algo=algo)(img, kerns, out, desc)
def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode='conv', direction_hint=None, conv_mode='conv', direction_hint=None,
algo='none', precision=None): algo='none', precision=None):
""" """
...@@ -1047,17 +1073,23 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1047,17 +1073,23 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
One of 'valid', 'full', 'half'; additionally, the padding size One of 'valid', 'full', 'half'; additionally, the padding size
could be directly specified by an integer or a pair of integers. could be directly specified by an integer or a pair of integers.
subsample subsample
Perform subsampling of the output (default: (1, 1)). Perform subsampling of the output (default: (1, 1, 1)).
dilation
Filter dilation factor. A dilation factor of d is equivalent to a
convolution with d - 1 zeros inserted between neighboring filter
values.
conv_mode conv_mode
Perform convolution (kernels flipped) or cross-correlation. Perform convolution (kernels flipped) or cross-correlation.
One of 'conv', 'cross' (default: 'conv'). One of 'conv', 'cross' (default: 'conv').
direction_hint direction_hint
Used by graph optimizers to change algorithm choice. Used by graph optimizers to change algorithm choice.
By default, GpuDnnConv will be used to carry out the convolution. By default, GpuDnnConv will be used to carry out the convolution.
If border_mode is 'valid', subsample is (1, 1) and direction_hint is If border_mode is 'valid', subsample is (1, 1, 1), dilation is
'bprop weights', it will use GpuDnnConvGradW. (1, 1, 1), and direction_hint is 'bprop weights', it will use
If border_mode is 'full', subsample is (1, 1) and direction_hint is GpuDnnConvGradW.
*not* 'forward!', it will use GpuDnnConvGradI. If border_mode is 'full', subsample is (1, 1, 1), dilation is
(1, 1, 1), and direction_hint is *not* 'forward!', it will use
GpuDnnConvGradI.
This parameter is used internally by graph optimizers and may be This parameter is used internally by graph optimizers and may be
removed at any time without a deprecation period. You have been warned. removed at any time without a deprecation period. You have been warned.
algo : convolution implementation to use. Only 'none' is implemented algo : convolution implementation to use. Only 'none' is implemented
...@@ -1080,7 +1112,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1080,7 +1112,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None) fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
ctx_name = infer_context_name(img, kerns) ctx_name = infer_context_name(img, kerns)
if (border_mode == 'valid' and subsample == (1, 1, 1) and if (border_mode == 'valid' and subsample == (1, 1, 1) and dilation == (1, 1, 1) and
direction_hint == 'bprop weights'): direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set # Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for. # up a suitable 'fake' convolution to compute the gradient for.
...@@ -1097,12 +1129,12 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1097,12 +1129,12 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1) shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode='cross', precision=precision)(out.shape) conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc) conv = GpuDnnConvGradW()(img, kerns, out, desc)
return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3, 4), ctx_name) return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3, 4), ctx_name)
elif (border_mode == 'full' and subsample == (1, 1, 1) and elif (border_mode == 'full' and subsample == (1, 1, 1) and dilation == (1, 1, 1) and
direction_hint != 'forward!'): direction_hint != 'forward!'):
# Special case: We can be faster by using GpuDnnConvGradI to compute # Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution. # the full convolution as the backward pass of a valid convolution.
...@@ -1117,7 +1149,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1117,7 +1149,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1) shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc) return GpuDnnConvGradI()(kerns, img, out, desc)
...@@ -1126,7 +1158,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1126,7 +1158,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
# if the img contains negative strides # if the img contains negative strides
img = gpu_contiguous(img) img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
desc_op = desc.owner.op desc_op = desc.owner.op
# We can use Shape_i and bypass the infer_shape here as this is on # We can use Shape_i and bypass the infer_shape here as this is on
...@@ -1135,14 +1167,15 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1135,14 +1167,15 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)] kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)]
out_shp = get_conv_output_shape(ishape, kshape, out_shp = get_conv_output_shape(ishape, kshape,
desc_op.border_mode, desc_op.border_mode,
desc_op.subsample) desc_op.subsample,
filter_dilation=dilation)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
return GpuDnnConv(algo=algo)(img, kerns, out, desc) return GpuDnnConv(algo=algo)(img, kerns, out, desc)
def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid', def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
subsample=(1, 1), conv_mode='conv', precision=None): subsample=(1, 1), dilation=(1, 1), conv_mode='conv', precision=None):
""" """
TODO: document this TODO: document this
""" """
...@@ -1154,23 +1187,23 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid', ...@@ -1154,23 +1187,23 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
kerns_shp = as_tensor_variable(kerns_shp) kerns_shp = as_tensor_variable(kerns_shp)
precision = get_precision(precision, [img, topgrad]) precision = get_precision(precision, [img, topgrad])
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns_shp) conv_mode=conv_mode, precision=precision)(kerns_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*kerns_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*kerns_shp)
return GpuDnnConvGradW()(img, topgrad, out, desc) return GpuDnnConvGradW()(img, topgrad, out, desc)
def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid', def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid',
subsample=(1, 1, 1), conv_mode='conv', precision=None): subsample=(1, 1, 1), dilation=(1, 1, 1), conv_mode='conv', precision=None):
""" """
3d version of dnn_gradweight 3d version of dnn_gradweight
""" """
return dnn_gradweight(img, topgrad, kerns_shp, border_mode, return dnn_gradweight(img, topgrad, kerns_shp, border_mode,
subsample, conv_mode, precision) subsample, dilation, conv_mode, precision)
def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid', def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
subsample=(1, 1), conv_mode='conv', precision=None): subsample=(1, 1), dilation=(1, 1), conv_mode='conv', precision=None):
""" """
TODO: document this TODO: document this
""" """
...@@ -1182,19 +1215,19 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid', ...@@ -1182,19 +1215,19 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
img_shp = as_tensor_variable(img_shp) img_shp = as_tensor_variable(img_shp)
precision = get_precision(precision, [kerns, topgrad]) precision = get_precision(precision, [kerns, topgrad])
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp) out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp)
return GpuDnnConvGradI()(kerns, topgrad, out, desc) return GpuDnnConvGradI()(kerns, topgrad, out, desc)
def dnn_gradinput3d(kerns, topgrad, img_shp, border_mode='valid', def dnn_gradinput3d(kerns, topgrad, img_shp, border_mode='valid',
subsample=(1, 1, 1), conv_mode='conv', precision=None): subsample=(1, 1, 1), dilation=(1, 1, 1), conv_mode='conv', precision=None):
""" """
3d version of `dnn_gradinput`. 3d version of `dnn_gradinput`.
""" """
return dnn_gradinput(kerns, topgrad, img_shp, border_mode, subsample, return dnn_gradinput(kerns, topgrad, img_shp, border_mode, subsample,
conv_mode, precision) dilation, conv_mode, precision)
class GpuDnnPoolDesc(Op): class GpuDnnPoolDesc(Op):
...@@ -2711,7 +2744,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2711,7 +2744,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
AbstractConv2d_gradInputs))): AbstractConv2d_gradInputs))):
return return
if (op.filter_dilation != (1, 1)): if version() < 6000 and op.filter_dilation != (1, 1):
return None return None
inp1 = inputs[0] inp1 = inputs[0]
...@@ -2729,6 +2762,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2729,6 +2762,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_conv(inp1, inp2, rval = dnn_conv(inp1, inp2,
border_mode=op.border_mode, border_mode=op.border_mode,
subsample=op.subsample, subsample=op.subsample,
dilation=op.filter_dilation,
direction_hint='forward!', direction_hint='forward!',
conv_mode=conv_mode) conv_mode=conv_mode)
elif isinstance(op, AbstractConv2d_gradWeights): elif isinstance(op, AbstractConv2d_gradWeights):
...@@ -2737,6 +2771,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2737,6 +2771,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradweight(inp1, inp2, shape, rval = dnn_gradweight(inp1, inp2, shape,
border_mode=op.border_mode, border_mode=op.border_mode,
subsample=op.subsample, subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode) conv_mode=conv_mode)
elif isinstance(op, AbstractConv2d_gradInputs): elif isinstance(op, AbstractConv2d_gradInputs):
shape = (inp2.shape[0], inp1.shape[1], shape = (inp2.shape[0], inp1.shape[1],
...@@ -2744,6 +2779,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2744,6 +2779,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradinput(inp1, inp2, shape, rval = dnn_gradinput(inp1, inp2, shape,
border_mode=op.border_mode, border_mode=op.border_mode,
subsample=op.subsample, subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode) conv_mode=conv_mode)
return [rval] return [rval]
...@@ -2754,7 +2790,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2754,7 +2790,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
AbstractConv3d_gradInputs))): AbstractConv3d_gradInputs))):
return return
if (op.filter_dilation != (1, 1, 1)): if version() < 6000 and op.filter_dilation != (1, 1, 1):
return None return None
inp1 = inputs[0] inp1 = inputs[0]
...@@ -2772,6 +2808,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2772,6 +2808,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_conv3d(inp1, inp2, rval = dnn_conv3d(inp1, inp2,
border_mode=op.border_mode, border_mode=op.border_mode,
subsample=op.subsample, subsample=op.subsample,
dilation=op.filter_dilation,
direction_hint='forward!', direction_hint='forward!',
conv_mode=conv_mode) conv_mode=conv_mode)
elif isinstance(op, AbstractConv3d_gradWeights): elif isinstance(op, AbstractConv3d_gradWeights):
...@@ -2780,6 +2817,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2780,6 +2817,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradweight3d(inp1, inp2, shape, rval = dnn_gradweight3d(inp1, inp2, shape,
border_mode=op.border_mode, border_mode=op.border_mode,
subsample=op.subsample, subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode) conv_mode=conv_mode)
elif isinstance(op, AbstractConv3d_gradInputs): elif isinstance(op, AbstractConv3d_gradInputs):
shape = (inp2.shape[0], inp1.shape[1], shape = (inp2.shape[0], inp1.shape[1],
...@@ -2787,6 +2825,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2787,6 +2825,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradinput3d(inp1, inp2, shape, rval = dnn_gradinput3d(inp1, inp2, shape,
border_mode=op.border_mode, border_mode=op.border_mode,
subsample=op.subsample, subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode) conv_mode=conv_mode)
return [rval] return [rval]
......
...@@ -188,11 +188,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -188,11 +188,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
int nd; int nd;
int pad[2]; int pad[2];
int stride[2]; int stride[2];
int upscale[2]; int dilation[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
cudnnDataType_t data_type; cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride, err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type); dilation, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s", "error getting convolution properties: %s",
......
...@@ -13,7 +13,7 @@ import theano.tensor as T ...@@ -13,7 +13,7 @@ import theano.tensor as T
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
from theano.tensor.signal.pool import pool_2d, pool_3d from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
from theano.tensor.nnet.abstract_conv import get_conv_output_shape from theano.tensor.nnet.abstract_conv import get_conv_output_shape, get_conv_gradinputs_shape
from theano.tensor.nnet import bn from theano.tensor.nnet import bn
from .. import dnn from .. import dnn
...@@ -45,9 +45,9 @@ def test_dnn_conv_desc_merge(): ...@@ -45,9 +45,9 @@ def test_dnn_conv_desc_merge():
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
kern_shp = T.as_tensor_variable( kern_shp = T.as_tensor_variable(
np.asarray([3, 1, 2, 2]).astype('int64')) np.asarray([3, 1, 2, 2]).astype('int64'))
desc1 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2), desc1 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2), dilation=(1, 1),
conv_mode='conv')(kern_shp) conv_mode='conv')(kern_shp)
desc2 = dnn.GpuDnnConvDesc(border_mode='full', subsample=(1, 1), desc2 = dnn.GpuDnnConvDesc(border_mode='full', subsample=(1, 1), dilation=(1, 1),
conv_mode='cross')(kern_shp) conv_mode='cross')(kern_shp)
# CDataType is not DeepCopyable so this will crash if we don't use # CDataType is not DeepCopyable so this will crash if we don't use
# borrow=True # borrow=True
...@@ -602,22 +602,25 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -602,22 +602,25 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnSoftmaxGrad dnn.GpuDnnSoftmaxGrad
) )
def _test_conv(self, img, kerns, out, img_val, kern_vals, border_mode, conv_mode, subsamples, algo): def _test_conv(self, img, kerns, out, img_val, kern_vals, border_mode, conv_mode, subsamples, dilations, algo):
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
img_val = np.asarray(img_val, dtype=theano.config.floatX) img_val = np.asarray(img_val, dtype=theano.config.floatX)
kern_vals = np.asarray(kern_vals, dtype=theano.config.floatX) kern_vals = np.asarray(kern_vals, dtype=theano.config.floatX)
for dilation in dilations:
for subsample in subsamples: for subsample in subsamples:
out_vals = np.zeros( out_vals = np.zeros(
dnn.GpuDnnConv.get_out_shape(img_val.shape, kern_vals.shape, dnn.GpuDnnConv.get_out_shape(img_val.shape, kern_vals.shape,
border_mode=border_mode, border_mode=border_mode,
subsample=subsample), subsample=subsample,
dilation=dilation),
dtype=theano.config.floatX) dtype=theano.config.floatX)
desc = dnn.GpuDnnConvDesc( desc = dnn.GpuDnnConvDesc(
border_mode=border_mode, border_mode=border_mode,
subsample=subsample, subsample=subsample,
dilation=dilation,
conv_mode=conv_mode, conv_mode=conv_mode,
precision=set_precision(theano.config.floatX) precision=set_precision(theano.config.floatX)
)(kerns.shape) )(kerns.shape)
...@@ -637,34 +640,49 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -637,34 +640,49 @@ class TestDnnInferShapes(utt.InferShapeTester):
[conv_modes[0]])), [conv_modes[0]])),
testcase_func_name=utt.custom_name_func) testcase_func_name=utt.custom_name_func)
def test_conv(self, algo, border_mode, conv_mode): def test_conv(self, algo, border_mode, conv_mode):
# Currently only CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM (algo 'none')
# supports dilation > 1. 'time*' and 'guess*' should fallback to it.
dilations = [(1, 1)]
if dnn.version() >= 6000 and (algo == "none" or "time_" in algo or "guess_" in algo):
dilations += [(2, 2)]
self._test_conv(T.tensor4('img'), self._test_conv(T.tensor4('img'),
T.tensor4('kerns'), T.tensor4('kerns'),
T.tensor4('out'), T.tensor4('out'),
np.random.rand(7, 2, 8, 4), np.random.rand(7, 2, 12, 16),
np.random.rand(8, 2, 4, 3), np.random.rand(8, 2, 4, 3),
border_mode, border_mode,
conv_mode, conv_mode,
[(1, 1), (2, 2)], [(1, 1), (2, 2)],
dilations,
algo) algo)
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func) @parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv3d_none(self, border_mode, conv_mode): def test_conv3d_none(self, border_mode, conv_mode):
dilations = [(1, 1, 1), (2, 2, 2)] if dnn.version() >= 6000 else [(1, 1, 1)]
self._test_conv(T.tensor5('img'), self._test_conv(T.tensor5('img'),
T.tensor5('kerns'), T.tensor5('kerns'),
T.tensor5('out'), T.tensor5('out'),
np.random.rand(10, 2, 6, 4, 11), np.random.rand(10, 2, 15, 16, 17),
np.random.rand(8, 2, 4, 3, 1), np.random.rand(8, 2, 4, 3, 1),
border_mode, border_mode,
conv_mode, conv_mode,
[(1, 1, 1), (2, 2, 2)], [(1, 1, 1), (2, 2, 2)],
dilations,
'none') 'none')
def _test_conv_gradw(self, img, topgrad, kerns, img_shape, kerns_shape, border_mode, conv_mode, subsample): def _test_conv_gradw(self, img, topgrad, kerns, img_shape, kerns_shape, border_mode, conv_mode, subsamples, dilations):
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
kerns_vals = np.zeros(kerns_shape, dtype=theano.config.floatX)
kerns_shape_shared = theano.shared(np.asarray(kerns_shape))
for dilation in dilations:
for subsample in subsamples:
topgrad_shape = get_conv_output_shape(img_shape, kerns_shape, topgrad_shape = get_conv_output_shape(img_shape, kerns_shape,
border_mode, subsample) border_mode, subsample, dilation)
img_val = np.asarray( img_val = np.asarray(
np.random.rand(*img_shape), np.random.rand(*img_shape),
...@@ -675,14 +693,13 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -675,14 +693,13 @@ class TestDnnInferShapes(utt.InferShapeTester):
dtype=theano.config.floatX dtype=theano.config.floatX
) )
kerns_vals = np.zeros(kerns_shape, dtype=theano.config.floatX)
kerns_shape = theano.shared(np.asarray(kerns_shape))
desc = dnn.GpuDnnConvDesc( desc = dnn.GpuDnnConvDesc(
border_mode=border_mode, border_mode=border_mode,
subsample=subsample, subsample=subsample,
dilation=dilation,
conv_mode=conv_mode, conv_mode=conv_mode,
precision=set_precision(theano.config.floatX) precision=set_precision(theano.config.floatX)
)(kerns_shape) )(kerns_shape_shared)
conv_grad_w = dnn.GpuDnnConvGradW()( conv_grad_w = dnn.GpuDnnConvGradW()(
img, img,
topgrad, topgrad,
...@@ -698,6 +715,8 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -698,6 +715,8 @@ class TestDnnInferShapes(utt.InferShapeTester):
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func) @parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv_gradw(self, border_mode, conv_mode): def test_conv_gradw(self, border_mode, conv_mode):
dilations = [(1, 1), (2, 2)] if dnn.version() >= 6000 else [(1, 1)]
self._test_conv_gradw(T.tensor4('img'), self._test_conv_gradw(T.tensor4('img'),
T.tensor4('topgrad'), T.tensor4('topgrad'),
T.tensor4('kerns'), T.tensor4('kerns'),
...@@ -705,7 +724,8 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -705,7 +724,8 @@ class TestDnnInferShapes(utt.InferShapeTester):
(1, 2, 3, 7), (1, 2, 3, 7),
border_mode, border_mode,
conv_mode, conv_mode,
(1, 1)) [(1, 1)],
dilations)
def test_conv_gradi(self): def test_conv_gradi(self):
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
...@@ -714,29 +734,28 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -714,29 +734,28 @@ class TestDnnInferShapes(utt.InferShapeTester):
kerns = T.tensor4('kerns') kerns = T.tensor4('kerns')
out = T.tensor4('out') out = T.tensor4('out')
kern_vals = np.asarray( kern_vals = np.asarray(
np.random.rand(13, 14, 15, 16), np.random.rand(13, 4, 5, 6),
dtype=theano.config.floatX dtype=theano.config.floatX
) )
out_vals = np.asarray( out_vals = np.asarray(
np.random.rand(3, 13, 5, 6), np.random.rand(3, 13, 9, 11),
dtype=theano.config.floatX dtype=theano.config.floatX
) )
for params in product( dilations = [(1, 1), (2, 2)] if dnn.version() >= 6000 else [(1, 1)]
['valid'], # Should this work for 'full'? for border_mode, subsample, dilation, conv_mode in product(
['valid', 'full'],
[(1, 1)], [(1, 1)],
dilations,
['conv', 'cross'] ['conv', 'cross']
): ):
shape = ( shape = get_conv_gradinputs_shape(kern_vals.shape, out_vals.shape, border_mode, subsample, dilation)
out_vals.shape[0], kern_vals.shape[1],
out_vals.shape[2] + kern_vals.shape[2] - 1,
out_vals.shape[3] + kern_vals.shape[3] - 1
)
img_vals = np.zeros(shape, dtype=theano.config.floatX) img_vals = np.zeros(shape, dtype=theano.config.floatX)
desc = dnn.GpuDnnConvDesc( desc = dnn.GpuDnnConvDesc(
border_mode=params[0], border_mode=border_mode,
subsample=params[1], subsample=subsample,
conv_mode=params[2], dilation=dilation,
conv_mode=conv_mode,
precision=set_precision(theano.config.floatX) precision=set_precision(theano.config.floatX)
)(kerns.shape) )(kerns.shape)
conv_grad_i = dnn.GpuDnnConvGradI()( conv_grad_i = dnn.GpuDnnConvGradI()(
...@@ -982,18 +1001,18 @@ def test_dnn_conv_grad(): ...@@ -982,18 +1001,18 @@ def test_dnn_conv_grad():
iw - kw + 1)).astype(theano.config.floatX) iw - kw + 1)).astype(theano.config.floatX)
def dconv(img, kern, out): def dconv(img, kern, out):
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='conv', precision=set_precision(theano.config.floatX))(kern.shape) conv_mode='conv', precision=set_precision(theano.config.floatX))(kern.shape)
return dnn.GpuDnnConv()(img, kern, out, desc, alpha=0.5, beta=0.75) return dnn.GpuDnnConv()(img, kern, out, desc, alpha=0.5, beta=0.75)
def dconvi(img, kern, out): def dconvi(img, kern, out):
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='conv', precision=set_precision(theano.config.floatX))(kern.shape) conv_mode='conv', precision=set_precision(theano.config.floatX))(kern.shape)
return dnn.GpuDnnConvGradI()(kern, out, img, desc, alpha=-1.0, return dnn.GpuDnnConvGradI()(kern, out, img, desc, alpha=-1.0,
beta=0.0) beta=0.0)
def dconvw(img, kern, out): def dconvw(img, kern, out):
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='conv', precision=set_precision(theano.config.floatX))(kern.shape) conv_mode='conv', precision=set_precision(theano.config.floatX))(kern.shape)
return dnn.GpuDnnConvGradW()(img, out, kern, desc, alpha=0.75, return dnn.GpuDnnConvGradW()(img, out, kern, desc, alpha=0.75,
beta=-1.0) beta=-1.0)
...@@ -1005,29 +1024,37 @@ def test_dnn_conv_grad(): ...@@ -1005,29 +1024,37 @@ def test_dnn_conv_grad():
def get_conv3d_test_cases(): def get_conv3d_test_cases():
# Every element of test_shapes follows the format # Every element of test_shapes follows the format
# [input_shape, filter_shape, subsample] # [input_shape, filter_shape, subsample, dilation]
test_shapes = [[(128, 3, 5, 5, 5), (64, 3, 1, 2, 4), (1, 1, 1)], test_shapes = [[(128, 3, 5, 5, 5), (64, 3, 1, 2, 4), (1, 1, 1), (1, 1, 1)],
[(8, 4, 20, 12, 15), (5, 4, 6, 12, 4), (2, 2, 2)], [(8, 4, 20, 12, 15), (5, 4, 6, 12, 4), (2, 2, 2), (1, 1, 1)],
[(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 3, 3)], [(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 3, 3), (1, 1, 1)],
[(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 2, 1)], [(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 2, 1), (1, 1, 1)],
# Test with 1x1x1 filters # Test with 1x1x1 filters
[(8, 1, 10, 10, 10), (10, 1, 1, 1, 1), (1, 1, 1)], [(8, 1, 10, 10, 10), (10, 1, 1, 1, 1), (1, 1, 1), (1, 1, 1)],
# Test with dimensions larger than 1024 (thread block dim) # Test with dimensions larger than 1024 (thread block dim)
[(1025, 1, 2, 3, 4), (5, 1, 1, 2, 3), (1, 1, 1)], [(1025, 1, 2, 3, 4), (5, 1, 1, 2, 3), (1, 1, 1), (1, 1, 1)],
[(8, 1, 2, 3, 4), (1025, 1, 1, 2, 3), (1, 1, 1)], [(8, 1, 2, 3, 4), (1025, 1, 1, 2, 3), (1, 1, 1), (1, 1, 1)],
[(8, 1025, 2, 3, 4), (5, 1025, 1, 1, 2), (1, 1, 1)], [(8, 1025, 2, 3, 4), (5, 1025, 1, 1, 2), (1, 1, 1), (1, 1, 1)],
[(8, 1, 1030, 3, 4), (5, 1, 1025, 1, 1), (1, 1, 1)], [(8, 1, 1030, 3, 4), (5, 1, 1025, 1, 1), (1, 1, 1), (1, 1, 1)],
[(8, 1, 2, 1030, 4), (5, 1, 2, 1025, 1), (1, 1, 1)], [(8, 1, 2, 1030, 4), (5, 1, 2, 1025, 1), (1, 1, 1), (1, 1, 1)],
[(8, 1, 2, 3, 1030), (5, 1, 1, 2, 1025), (1, 1, 1)], [(8, 1, 2, 3, 1030), (5, 1, 1, 2, 1025), (1, 1, 1), (1, 1, 1)],
# The equivalent of this caused a crash with conv2d # The equivalent of this caused a crash with conv2d
[(1, 1, 1, 44800, 1), (6, 1, 1, 1, 1), (1, 1, 1)]] [(1, 1, 1, 44800, 1), (6, 1, 1, 1, 1), (1, 1, 1), (1, 1, 1)]]
# With border mode 'full', test with kernel bigger than image in some/all # With border mode 'full', test with kernel bigger than image in some/all
# dimensions # dimensions
test_shapes_full = [[(6, 2, 2, 2, 2), (4, 2, 3, 1, 1), (1, 1, 1)], test_shapes_full = [[(6, 2, 2, 2, 2), (4, 2, 3, 1, 1), (1, 1, 1), (1, 1, 1)],
[(6, 2, 2, 2, 2), (4, 2, 1, 3, 1), (1, 1, 1)], [(6, 2, 2, 2, 2), (4, 2, 1, 3, 1), (1, 1, 1), (1, 1, 1)],
[(6, 2, 2, 2, 2), (4, 2, 1, 1, 3), (1, 1, 1)], [(6, 2, 2, 2, 2), (4, 2, 1, 1, 3), (1, 1, 1), (1, 1, 1)],
[(6, 2, 2, 2, 2), (4, 2, 5, 5, 5), (1, 1, 1)]] [(6, 2, 2, 2, 2), (4, 2, 5, 5, 5), (1, 1, 1), (1, 1, 1)]]
if dnn.version() >= 6000:
test_shapes.extend([
[(8, 1, 20, 12, 15), (5, 1, 6, 3, 4), (1, 1, 2), (3, 2, 1)],
[(8, 1, 20, 12, 15), (5, 1, 6, 3, 4), (2, 2, 1), (1, 2, 3)]])
test_shapes_full.append(
[(6, 2, 2, 2, 2), (4, 2, 5, 5, 5), (1, 1, 1), (3, 2, 1)])
border_modes = ['valid', 'full', 'half', (1, 2, 3), (3, 2, 1), 1, 2] border_modes = ['valid', 'full', 'half', (1, 2, 3), (3, 2, 1), 1, 2]
conv_modes = ['conv', 'cross'] conv_modes = ['conv', 'cross']
...@@ -1044,7 +1071,7 @@ def test_conv3d_fwd(): ...@@ -1044,7 +1071,7 @@ def test_conv3d_fwd():
utt.seed_rng() utt.seed_rng()
def run_conv3d_fwd(inputs_shape, filters_shape, subsample, def run_conv3d_fwd(inputs_shape, filters_shape, subsample,
border_mode, conv_mode): dilation, border_mode, conv_mode):
inputs_val = np.random.random(inputs_shape).astype(theano.config.floatX) inputs_val = np.random.random(inputs_shape).astype(theano.config.floatX)
filters_val = np.random.random(filters_shape).astype(theano.config.floatX) filters_val = np.random.random(filters_shape).astype(theano.config.floatX)
...@@ -1060,6 +1087,7 @@ def test_conv3d_fwd(): ...@@ -1060,6 +1087,7 @@ def test_conv3d_fwd():
# Compile a theano function for the cuDNN implementation # Compile a theano function for the cuDNN implementation
conv = dnn.dnn_conv3d(img=inputs, kerns=filters, conv = dnn.dnn_conv3d(img=inputs, kerns=filters,
border_mode=border_mode, subsample=subsample, border_mode=border_mode, subsample=subsample,
dilation=dilation,
conv_mode=conv_mode) conv_mode=conv_mode)
f = theano.function([], conv, mode=mode_with_gpu) f = theano.function([], conv, mode=mode_with_gpu)
...@@ -1072,7 +1100,8 @@ def test_conv3d_fwd(): ...@@ -1072,7 +1100,8 @@ def test_conv3d_fwd():
# Compile a theano function for the reference implementation # Compile a theano function for the reference implementation
conv_ref = theano.tensor.nnet.corr3d.Corr3dMM(border_mode=border_mode, conv_ref = theano.tensor.nnet.corr3d.Corr3dMM(border_mode=border_mode,
subsample=subsample subsample=subsample,
filter_dilation=dilation,
)(ref_cast(inputs), flipped_filters) )(ref_cast(inputs), flipped_filters)
f_ref = theano.function([], conv_ref, mode="FAST_RUN") f_ref = theano.function([], conv_ref, mode="FAST_RUN")
...@@ -1087,8 +1116,8 @@ def test_conv3d_fwd(): ...@@ -1087,8 +1116,8 @@ def test_conv3d_fwd():
utt.assert_allclose(res_ref, res, rtol=rtol) utt.assert_allclose(res_ref, res, rtol=rtol)
test_cases = get_conv3d_test_cases() test_cases = get_conv3d_test_cases()
for (i_shape, f_shape, subsample), border_mode, conv_mode in test_cases: for (i_shape, f_shape, subsample, dilation), border_mode, conv_mode in test_cases:
yield (run_conv3d_fwd, i_shape, f_shape, subsample, border_mode, yield (run_conv3d_fwd, i_shape, f_shape, subsample, dilation, border_mode,
conv_mode) conv_mode)
...@@ -1099,7 +1128,7 @@ def test_conv3d_bwd(): ...@@ -1099,7 +1128,7 @@ def test_conv3d_bwd():
utt.seed_rng() utt.seed_rng()
def run_conv3d_bwd(inputs_shape, filters_shape, subsample, def run_conv3d_bwd(inputs_shape, filters_shape, subsample,
border_mode, conv_mode): dilation, border_mode, conv_mode):
inputs_val = np.random.random(inputs_shape).astype(theano.config.floatX) inputs_val = np.random.random(inputs_shape).astype(theano.config.floatX)
filters_val = np.random.random(filters_shape).astype(theano.config.floatX) filters_val = np.random.random(filters_shape).astype(theano.config.floatX)
...@@ -1109,7 +1138,9 @@ def test_conv3d_bwd(): ...@@ -1109,7 +1138,9 @@ def test_conv3d_bwd():
# Compile a theano function for the cuDNN implementation # Compile a theano function for the cuDNN implementation
conv = dnn.dnn_conv3d(img=inputs, kerns=filters, conv = dnn.dnn_conv3d(img=inputs, kerns=filters,
border_mode=border_mode, subsample=subsample, border_mode=border_mode,
subsample=subsample,
dilation=dilation,
conv_mode=conv_mode) conv_mode=conv_mode)
grad_i, grad_w = theano.tensor.grad(conv.sum(), [inputs, filters]) grad_i, grad_w = theano.tensor.grad(conv.sum(), [inputs, filters])
...@@ -1125,7 +1156,8 @@ def test_conv3d_bwd(): ...@@ -1125,7 +1156,8 @@ def test_conv3d_bwd():
# Compile a theano function for the reference implementation # Compile a theano function for the reference implementation
conv_ref = theano.tensor.nnet.corr3d.Corr3dMM(border_mode=border_mode, conv_ref = theano.tensor.nnet.corr3d.Corr3dMM(border_mode=border_mode,
subsample=subsample subsample=subsample,
filter_dilation=dilation,
)(ref_cast(inputs), flipped_filters) )(ref_cast(inputs), flipped_filters)
(grad_i_ref, (grad_i_ref,
grad_w_ref) = theano.tensor.grad(conv_ref.sum(), grad_w_ref) = theano.tensor.grad(conv_ref.sum(),
...@@ -1145,8 +1177,8 @@ def test_conv3d_bwd(): ...@@ -1145,8 +1177,8 @@ def test_conv3d_bwd():
utt.assert_allclose(res_ref[1], res[1], rtol=rtol) utt.assert_allclose(res_ref[1], res[1], rtol=rtol)
test_cases = get_conv3d_test_cases() test_cases = get_conv3d_test_cases()
for (i_shape, f_shape, subsample), border_mode, conv_mode in test_cases: for (i_shape, f_shape, subsample, dilation), border_mode, conv_mode in test_cases:
yield (run_conv3d_bwd, i_shape, f_shape, subsample, border_mode, yield (run_conv3d_bwd, i_shape, f_shape, subsample, dilation, border_mode,
conv_mode) conv_mode)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论