提交 4e094fc0 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #5936 from HapeMask/cudnnv6_dilation

Add support for cudnn v6 dilated convolution.
......@@ -5,19 +5,19 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
cudnnStatus_t err;
int pad[3] = {PAD_0, PAD_1, PAD_2};
int strides[3] = {SUB_0, SUB_1, SUB_2};
int upscale[3] = {1, 1, 1};
int dilation[3] = {DIL_0, DIL_1, DIL_2};
#if BORDER_MODE == 0
pad[0] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) - 1;
pad[1] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 3) - 1;
pad[0] = (*(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) - 1) * DIL_0;
pad[1] = (*(npy_int64 *)PyArray_GETPTR1(filt_shp, 3) - 1) * DIL_1;
#if NB_DIMS > 2
pad[2] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 4) - 1;
pad[2] = (*(npy_int64 *)PyArray_GETPTR1(filt_shp, 4) - 1) * DIL_2;
#endif
#elif BORDER_MODE == 2
pad[0] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) / 2;
pad[1] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 3) / 2;
pad[0] = ((*(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) - 1) * DIL_0 + 1) / 2;
pad[1] = ((*(npy_int64 *)PyArray_GETPTR1(filt_shp, 3) - 1) * DIL_1 + 1) / 2;
#if NB_DIMS > 2
pad[2] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 4) / 2;
pad[2] = ((*(npy_int64 *)PyArray_GETPTR1(filt_shp, 4) - 1) * DIL_2 + 1) / 2;
#endif
#endif
......@@ -36,6 +36,11 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
}
err = cudnnSetConvolutionNdDescriptor(*desc, NB_DIMS, pad, strides,
upscale, CONV_MODE, PRECISION);
dilation, CONV_MODE, PRECISION);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not set convolution "
"descriptor: %s", cudnnGetErrorString(err));
return -1;
}
return 0;
}
......@@ -131,11 +131,11 @@ def _dnn_check_version():
if v < 5000:
return False, "cuDNN version is too old. Update to v5, was %d." % v
# 5200 should not print warning with cudnn 5.1 final.
if v >= 5200:
if v >= 6100:
warnings.warn("Your cuDNN version is more recent than "
"Theano. If you encounter problems, try "
"updating Theano or downgrading cuDNN to "
"version 5.1.")
"version 6.0.")
return True, None
......@@ -363,7 +363,7 @@ class GpuDnnConvDesc(COp):
"""
__props__ = ('border_mode', 'subsample', 'conv_mode', 'precision')
__props__ = ('border_mode', 'subsample', 'dilation', 'conv_mode', 'precision')
def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h']
......@@ -380,10 +380,13 @@ class GpuDnnConvDesc(COp):
def do_constant_folding(self, node):
return False
def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv',
def __init__(self, border_mode, subsample=(1, 1), dilation=(1, 1), conv_mode='conv',
precision="float32"):
COp.__init__(self, ["conv_desc.c"], "APPLY_SPECIFIC(conv_desc)")
if version() < 6000 and any([d != 1 for d in dilation]):
raise RuntimeError("Dilation > 1 not supported for cuDNN version < 6.")
if isinstance(border_mode, integer_types):
border_mode = (border_mode,) * len(subsample)
if isinstance(border_mode, tuple):
......@@ -401,6 +404,9 @@ class GpuDnnConvDesc(COp):
assert conv_mode in ('conv', 'cross')
self.conv_mode = conv_mode
assert len(dilation) == len(subsample)
self.dilation = dilation
assert precision in ['float16', 'float32', 'float64']
self.precision = precision
......@@ -452,6 +458,13 @@ class GpuDnnConvDesc(COp):
else:
sub2 = '0'
dil0 = str(self.dilation[0])
dil1 = str(self.dilation[1])
if len(self.dilation) > 2:
dil2 = str(self.dilation[2])
else:
dil2 = '0'
if self.precision == 'float16':
precision = 'CUDNN_DATA_HALF'
elif self.precision == 'float32':
......@@ -463,6 +476,7 @@ class GpuDnnConvDesc(COp):
return [('NB_DIMS', str(len(self.subsample))),
('BORDER_MODE', bmode),
('PAD_0', pad0), ('PAD_1', pad1), ('PAD_2', pad2),
('DIL_0', dil0), ('DIL_1', dil1), ('DIL_2', dil2),
('CONV_MODE', conv_flag),
('SUB_0', sub0), ('SUB_1', sub1), ('SUB_2', sub2),
('PRECISION', precision)]
......@@ -470,6 +484,11 @@ class GpuDnnConvDesc(COp):
def c_code_cache_version(self):
return (super(GpuDnnConvDesc, self).c_code_cache_version(), version())
def __setstate__(self, d):
self.__dict__.update(d)
if not hasattr(self, "dilation"):
self.dilation = (1,) * len(self.subsample)
# scalar constants
_zero = constant(np.asarray(0.0, dtype='float64'))
......@@ -574,6 +593,7 @@ class GpuDnnConv(DnnBase):
img = as_gpuarray_variable(img, ctx_name)
kern = as_gpuarray_variable(kern, ctx_name)
output = as_gpuarray_variable(output, ctx_name)
if img.type.ndim not in (4, 5):
raise TypeError('img must be 4D or 5D tensor')
if kern.type.ndim not in (4, 5):
......@@ -619,7 +639,7 @@ class GpuDnnConv(DnnBase):
return [[1], [1], [1], [0], [1], [1]]
@staticmethod
def get_out_shape(ishape, kshape, border_mode, subsample):
def get_out_shape(ishape, kshape, border_mode, subsample, dilation):
"""
This function computes the output shape for a convolution with
the specified parameters. `ishape` and `kshape` can be symbolic
......@@ -638,7 +658,8 @@ class GpuDnnConv(DnnBase):
ishape,
kshape,
border_mode,
subsample)
subsample,
dilation)
def infer_shape(self, node, shape):
return [shape[2]]
......@@ -910,7 +931,7 @@ class GpuDnnConvGradI(DnnBase):
return [shape[2]]
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='conv', direction_hint=None, workmem=None,
algo=None, precision=None):
"""
......@@ -930,16 +951,20 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
could be directly specified by an integer or a pair of integers.
subsample
Perform subsampling of the output (default: (1, 1)).
dilation
Filter dilation factor. A dilation factor of d is equivalent to a
convolution with d - 1 zeros inserted between neighboring filter
values.
conv_mode
Perform convolution (kernels flipped) or cross-correlation.
One of 'conv', 'cross' (default: 'conv').
direction_hint
Used by graph optimizers to change algorithm choice.
By default, GpuDnnConv will be used to carry out the convolution.
If border_mode is 'valid', subsample is (1, 1) and direction_hint is
'bprop weights', it will use GpuDnnConvGradW.
If border_mode is 'full', subsample is (1, 1) and direction_hint is
*not* 'forward!', it will use GpuDnnConvGradI.
If border_mode is 'valid', subsample is (1, 1), dilation is (1, 1), and
direction_hint is 'bprop weights', it will use GpuDnnConvGradW.
If border_mode is 'full', subsample is (1, 1), dilation is (1, 1), and
direction_hint is *not* 'forward!', it will use GpuDnnConvGradI.
This parameter is used internally by graph optimizers and may be
removed at any time without a deprecation period. You have been warned.
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
......@@ -969,7 +994,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
algo = workmem
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
ctx_name = infer_context_name(img, kerns)
if (border_mode == 'valid' and subsample == (1, 1) and
if (border_mode == 'valid' and subsample == (1, 1) and dilation == (1, 1) and
direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for.
......@@ -985,12 +1010,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1)
out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc)
return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name)
elif (border_mode == 'full' and subsample == (1, 1) and
elif (border_mode == 'full' and subsample == (1, 1) and dilation == (1, 1) and
direction_hint != 'forward!'):
# Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution.
......@@ -1004,7 +1029,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1)
out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc)
......@@ -1013,7 +1038,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# if the img contains negative strides
img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape)
desc_op = desc.owner.op
# We can use Shape_i and bypass the infer_shape here as this is on
......@@ -1022,13 +1047,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)]
out_shp = get_conv_output_shape(ishape, kshape,
desc_op.border_mode,
desc_op.subsample)
desc_op.subsample,
filter_dilation=dilation)
out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
return GpuDnnConv(algo=algo)(img, kerns, out, desc)
def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode='conv', direction_hint=None,
algo='none', precision=None):
"""
......@@ -1047,17 +1073,23 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
One of 'valid', 'full', 'half'; additionally, the padding size
could be directly specified by an integer or a pair of integers.
subsample
Perform subsampling of the output (default: (1, 1)).
Perform subsampling of the output (default: (1, 1, 1)).
dilation
Filter dilation factor. A dilation factor of d is equivalent to a
convolution with d - 1 zeros inserted between neighboring filter
values.
conv_mode
Perform convolution (kernels flipped) or cross-correlation.
One of 'conv', 'cross' (default: 'conv').
direction_hint
Used by graph optimizers to change algorithm choice.
By default, GpuDnnConv will be used to carry out the convolution.
If border_mode is 'valid', subsample is (1, 1) and direction_hint is
'bprop weights', it will use GpuDnnConvGradW.
If border_mode is 'full', subsample is (1, 1) and direction_hint is
*not* 'forward!', it will use GpuDnnConvGradI.
If border_mode is 'valid', subsample is (1, 1, 1), dilation is
(1, 1, 1), and direction_hint is 'bprop weights', it will use
GpuDnnConvGradW.
If border_mode is 'full', subsample is (1, 1, 1), dilation is
(1, 1, 1), and direction_hint is *not* 'forward!', it will use
GpuDnnConvGradI.
This parameter is used internally by graph optimizers and may be
removed at any time without a deprecation period. You have been warned.
algo : convolution implementation to use. Only 'none' is implemented
......@@ -1080,7 +1112,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
ctx_name = infer_context_name(img, kerns)
if (border_mode == 'valid' and subsample == (1, 1, 1) and
if (border_mode == 'valid' and subsample == (1, 1, 1) and dilation == (1, 1, 1) and
direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for.
......@@ -1097,12 +1129,12 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1)
out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc)
return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3, 4), ctx_name)
elif (border_mode == 'full' and subsample == (1, 1, 1) and
elif (border_mode == 'full' and subsample == (1, 1, 1) and dilation == (1, 1, 1) and
direction_hint != 'forward!'):
# Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution.
......@@ -1117,7 +1149,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1)
out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc)
......@@ -1126,7 +1158,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
# if the img contains negative strides
img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape)
desc_op = desc.owner.op
# We can use Shape_i and bypass the infer_shape here as this is on
......@@ -1135,14 +1167,15 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)]
out_shp = get_conv_output_shape(ishape, kshape,
desc_op.border_mode,
desc_op.subsample)
desc_op.subsample,
filter_dilation=dilation)
out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
return GpuDnnConv(algo=algo)(img, kerns, out, desc)
def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
subsample=(1, 1), conv_mode='conv', precision=None):
subsample=(1, 1), dilation=(1, 1), conv_mode='conv', precision=None):
"""
TODO: document this
"""
......@@ -1154,23 +1187,23 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
kerns_shp = as_tensor_variable(kerns_shp)
precision = get_precision(precision, [img, topgrad])
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*kerns_shp)
return GpuDnnConvGradW()(img, topgrad, out, desc)
def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid',
subsample=(1, 1, 1), conv_mode='conv', precision=None):
subsample=(1, 1, 1), dilation=(1, 1, 1), conv_mode='conv', precision=None):
"""
3d version of dnn_gradweight
"""
return dnn_gradweight(img, topgrad, kerns_shp, border_mode,
subsample, conv_mode, precision)
subsample, dilation, conv_mode, precision)
def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
subsample=(1, 1), conv_mode='conv', precision=None):
subsample=(1, 1), dilation=(1, 1), conv_mode='conv', precision=None):
"""
TODO: document this
"""
......@@ -1182,19 +1215,19 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
img_shp = as_tensor_variable(img_shp)
precision = get_precision(precision, [kerns, topgrad])
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape)
out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp)
return GpuDnnConvGradI()(kerns, topgrad, out, desc)
def dnn_gradinput3d(kerns, topgrad, img_shp, border_mode='valid',
subsample=(1, 1, 1), conv_mode='conv', precision=None):
subsample=(1, 1, 1), dilation=(1, 1, 1), conv_mode='conv', precision=None):
"""
3d version of `dnn_gradinput`.
"""
return dnn_gradinput(kerns, topgrad, img_shp, border_mode, subsample,
conv_mode, precision)
dilation, conv_mode, precision)
class GpuDnnPoolDesc(Op):
......@@ -2711,7 +2744,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
AbstractConv2d_gradInputs))):
return
if (op.filter_dilation != (1, 1)):
if version() < 6000 and op.filter_dilation != (1, 1):
return None
inp1 = inputs[0]
......@@ -2729,6 +2762,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_conv(inp1, inp2,
border_mode=op.border_mode,
subsample=op.subsample,
dilation=op.filter_dilation,
direction_hint='forward!',
conv_mode=conv_mode)
elif isinstance(op, AbstractConv2d_gradWeights):
......@@ -2737,6 +2771,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradweight(inp1, inp2, shape,
border_mode=op.border_mode,
subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode)
elif isinstance(op, AbstractConv2d_gradInputs):
shape = (inp2.shape[0], inp1.shape[1],
......@@ -2744,6 +2779,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradinput(inp1, inp2, shape,
border_mode=op.border_mode,
subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode)
return [rval]
......@@ -2754,7 +2790,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
AbstractConv3d_gradInputs))):
return
if (op.filter_dilation != (1, 1, 1)):
if version() < 6000 and op.filter_dilation != (1, 1, 1):
return None
inp1 = inputs[0]
......@@ -2772,6 +2808,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_conv3d(inp1, inp2,
border_mode=op.border_mode,
subsample=op.subsample,
dilation=op.filter_dilation,
direction_hint='forward!',
conv_mode=conv_mode)
elif isinstance(op, AbstractConv3d_gradWeights):
......@@ -2780,6 +2817,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradweight3d(inp1, inp2, shape,
border_mode=op.border_mode,
subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode)
elif isinstance(op, AbstractConv3d_gradInputs):
shape = (inp2.shape[0], inp1.shape[1],
......@@ -2787,6 +2825,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradinput3d(inp1, inp2, shape,
border_mode=op.border_mode,
subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode)
return [rval]
......
......@@ -188,11 +188,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
int nd;
int pad[2];
int stride[2];
int upscale[2];
int dilation[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
dilation, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s",
......
......@@ -13,7 +13,7 @@ import theano.tensor as T
import theano.tests.unittest_tools as utt
from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
from theano.tensor.nnet.abstract_conv import get_conv_output_shape
from theano.tensor.nnet.abstract_conv import get_conv_output_shape, get_conv_gradinputs_shape
from theano.tensor.nnet import bn
from .. import dnn
......@@ -45,9 +45,9 @@ def test_dnn_conv_desc_merge():
raise SkipTest(dnn.dnn_available.msg)
kern_shp = T.as_tensor_variable(
np.asarray([3, 1, 2, 2]).astype('int64'))
desc1 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2),
desc1 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2), dilation=(1, 1),
conv_mode='conv')(kern_shp)
desc2 = dnn.GpuDnnConvDesc(border_mode='full', subsample=(1, 1),
desc2 = dnn.GpuDnnConvDesc(border_mode='full', subsample=(1, 1), dilation=(1, 1),
conv_mode='cross')(kern_shp)
# CDataType is not DeepCopyable so this will crash if we don't use
# borrow=True
......@@ -602,32 +602,35 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnSoftmaxGrad
)
def _test_conv(self, img, kerns, out, img_val, kern_vals, border_mode, conv_mode, subsamples, algo):
def _test_conv(self, img, kerns, out, img_val, kern_vals, border_mode, conv_mode, subsamples, dilations, algo):
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
img_val = np.asarray(img_val, dtype=theano.config.floatX)
kern_vals = np.asarray(kern_vals, dtype=theano.config.floatX)
for subsample in subsamples:
out_vals = np.zeros(
dnn.GpuDnnConv.get_out_shape(img_val.shape, kern_vals.shape,
border_mode=border_mode,
subsample=subsample),
dtype=theano.config.floatX)
desc = dnn.GpuDnnConvDesc(
border_mode=border_mode,
subsample=subsample,
conv_mode=conv_mode,
precision=set_precision(theano.config.floatX)
)(kerns.shape)
conv = dnn.GpuDnnConv(algo=algo)(img, kerns, out, desc)
self._compile_and_check(
[img, kerns, out],
[conv],
[img_val, kern_vals, out_vals],
dnn.GpuDnnConv
)
for dilation in dilations:
for subsample in subsamples:
out_vals = np.zeros(
dnn.GpuDnnConv.get_out_shape(img_val.shape, kern_vals.shape,
border_mode=border_mode,
subsample=subsample,
dilation=dilation),
dtype=theano.config.floatX)
desc = dnn.GpuDnnConvDesc(
border_mode=border_mode,
subsample=subsample,
dilation=dilation,
conv_mode=conv_mode,
precision=set_precision(theano.config.floatX)
)(kerns.shape)
conv = dnn.GpuDnnConv(algo=algo)(img, kerns, out, desc)
self._compile_and_check(
[img, kerns, out],
[conv],
[img_val, kern_vals, out_vals],
dnn.GpuDnnConv
)
@parameterized.expand(chain(product([SUPPORTED_DNN_CONV_ALGO_FWD[0]],
border_modes,
......@@ -637,67 +640,83 @@ class TestDnnInferShapes(utt.InferShapeTester):
[conv_modes[0]])),
testcase_func_name=utt.custom_name_func)
def test_conv(self, algo, border_mode, conv_mode):
# Currently only CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM (algo 'none')
# supports dilation > 1. 'time*' and 'guess*' should fallback to it.
dilations = [(1, 1)]
if dnn.version() >= 6000 and (algo == "none" or "time_" in algo or "guess_" in algo):
dilations += [(2, 2)]
self._test_conv(T.tensor4('img'),
T.tensor4('kerns'),
T.tensor4('out'),
np.random.rand(7, 2, 8, 4),
np.random.rand(7, 2, 12, 16),
np.random.rand(8, 2, 4, 3),
border_mode,
conv_mode,
[(1, 1), (2, 2)],
dilations,
algo)
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv3d_none(self, border_mode, conv_mode):
dilations = [(1, 1, 1), (2, 2, 2)] if dnn.version() >= 6000 else [(1, 1, 1)]
self._test_conv(T.tensor5('img'),
T.tensor5('kerns'),
T.tensor5('out'),
np.random.rand(10, 2, 6, 4, 11),
np.random.rand(10, 2, 15, 16, 17),
np.random.rand(8, 2, 4, 3, 1),
border_mode,
conv_mode,
[(1, 1, 1), (2, 2, 2)],
dilations,
'none')
def _test_conv_gradw(self, img, topgrad, kerns, img_shape, kerns_shape, border_mode, conv_mode, subsample):
def _test_conv_gradw(self, img, topgrad, kerns, img_shape, kerns_shape, border_mode, conv_mode, subsamples, dilations):
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
topgrad_shape = get_conv_output_shape(img_shape, kerns_shape,
border_mode, subsample)
kerns_vals = np.zeros(kerns_shape, dtype=theano.config.floatX)
kerns_shape_shared = theano.shared(np.asarray(kerns_shape))
img_val = np.asarray(
np.random.rand(*img_shape),
dtype=theano.config.floatX
)
topgrad_vals = np.asarray(
np.random.rand(*topgrad_shape),
dtype=theano.config.floatX
)
for dilation in dilations:
for subsample in subsamples:
topgrad_shape = get_conv_output_shape(img_shape, kerns_shape,
border_mode, subsample, dilation)
kerns_vals = np.zeros(kerns_shape, dtype=theano.config.floatX)
kerns_shape = theano.shared(np.asarray(kerns_shape))
desc = dnn.GpuDnnConvDesc(
border_mode=border_mode,
subsample=subsample,
conv_mode=conv_mode,
precision=set_precision(theano.config.floatX)
)(kerns_shape)
conv_grad_w = dnn.GpuDnnConvGradW()(
img,
topgrad,
kerns,
desc,
)
self._compile_and_check(
[img, topgrad, kerns],
[conv_grad_w],
[img_val, topgrad_vals, kerns_vals],
dnn.GpuDnnConvGradW
)
img_val = np.asarray(
np.random.rand(*img_shape),
dtype=theano.config.floatX
)
topgrad_vals = np.asarray(
np.random.rand(*topgrad_shape),
dtype=theano.config.floatX
)
desc = dnn.GpuDnnConvDesc(
border_mode=border_mode,
subsample=subsample,
dilation=dilation,
conv_mode=conv_mode,
precision=set_precision(theano.config.floatX)
)(kerns_shape_shared)
conv_grad_w = dnn.GpuDnnConvGradW()(
img,
topgrad,
kerns,
desc,
)
self._compile_and_check(
[img, topgrad, kerns],
[conv_grad_w],
[img_val, topgrad_vals, kerns_vals],
dnn.GpuDnnConvGradW
)
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv_gradw(self, border_mode, conv_mode):
dilations = [(1, 1), (2, 2)] if dnn.version() >= 6000 else [(1, 1)]
self._test_conv_gradw(T.tensor4('img'),
T.tensor4('topgrad'),
T.tensor4('kerns'),
......@@ -705,7 +724,8 @@ class TestDnnInferShapes(utt.InferShapeTester):
(1, 2, 3, 7),
border_mode,
conv_mode,
(1, 1))
[(1, 1)],
dilations)
def test_conv_gradi(self):
if not dnn.dnn_available(test_ctx_name):
......@@ -714,29 +734,28 @@ class TestDnnInferShapes(utt.InferShapeTester):
kerns = T.tensor4('kerns')
out = T.tensor4('out')
kern_vals = np.asarray(
np.random.rand(13, 14, 15, 16),
np.random.rand(13, 4, 5, 6),
dtype=theano.config.floatX
)
out_vals = np.asarray(
np.random.rand(3, 13, 5, 6),
np.random.rand(3, 13, 9, 11),
dtype=theano.config.floatX
)
for params in product(
['valid'], # Should this work for 'full'?
dilations = [(1, 1), (2, 2)] if dnn.version() >= 6000 else [(1, 1)]
for border_mode, subsample, dilation, conv_mode in product(
['valid', 'full'],
[(1, 1)],
dilations,
['conv', 'cross']
):
shape = (
out_vals.shape[0], kern_vals.shape[1],
out_vals.shape[2] + kern_vals.shape[2] - 1,
out_vals.shape[3] + kern_vals.shape[3] - 1
)
shape = get_conv_gradinputs_shape(kern_vals.shape, out_vals.shape, border_mode, subsample, dilation)
img_vals = np.zeros(shape, dtype=theano.config.floatX)
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2],
border_mode=border_mode,
subsample=subsample,
dilation=dilation,
conv_mode=conv_mode,
precision=set_precision(theano.config.floatX)
)(kerns.shape)
conv_grad_i = dnn.GpuDnnConvGradI()(
......@@ -982,18 +1001,18 @@ def test_dnn_conv_grad():
iw - kw + 1)).astype(theano.config.floatX)
def dconv(img, kern, out):
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='conv', precision=set_precision(theano.config.floatX))(kern.shape)
return dnn.GpuDnnConv()(img, kern, out, desc, alpha=0.5, beta=0.75)
def dconvi(img, kern, out):
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='conv', precision=set_precision(theano.config.floatX))(kern.shape)
return dnn.GpuDnnConvGradI()(kern, out, img, desc, alpha=-1.0,
beta=0.0)
def dconvw(img, kern, out):
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='conv', precision=set_precision(theano.config.floatX))(kern.shape)
return dnn.GpuDnnConvGradW()(img, out, kern, desc, alpha=0.75,
beta=-1.0)
......@@ -1005,29 +1024,37 @@ def test_dnn_conv_grad():
def get_conv3d_test_cases():
# Every element of test_shapes follows the format
# [input_shape, filter_shape, subsample]
test_shapes = [[(128, 3, 5, 5, 5), (64, 3, 1, 2, 4), (1, 1, 1)],
[(8, 4, 20, 12, 15), (5, 4, 6, 12, 4), (2, 2, 2)],
[(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 3, 3)],
[(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 2, 1)],
# [input_shape, filter_shape, subsample, dilation]
test_shapes = [[(128, 3, 5, 5, 5), (64, 3, 1, 2, 4), (1, 1, 1), (1, 1, 1)],
[(8, 4, 20, 12, 15), (5, 4, 6, 12, 4), (2, 2, 2), (1, 1, 1)],
[(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 3, 3), (1, 1, 1)],
[(8, 1, 20, 12, 15), (5, 1, 6, 12, 4), (3, 2, 1), (1, 1, 1)],
# Test with 1x1x1 filters
[(8, 1, 10, 10, 10), (10, 1, 1, 1, 1), (1, 1, 1)],
[(8, 1, 10, 10, 10), (10, 1, 1, 1, 1), (1, 1, 1), (1, 1, 1)],
# Test with dimensions larger than 1024 (thread block dim)
[(1025, 1, 2, 3, 4), (5, 1, 1, 2, 3), (1, 1, 1)],
[(8, 1, 2, 3, 4), (1025, 1, 1, 2, 3), (1, 1, 1)],
[(8, 1025, 2, 3, 4), (5, 1025, 1, 1, 2), (1, 1, 1)],
[(8, 1, 1030, 3, 4), (5, 1, 1025, 1, 1), (1, 1, 1)],
[(8, 1, 2, 1030, 4), (5, 1, 2, 1025, 1), (1, 1, 1)],
[(8, 1, 2, 3, 1030), (5, 1, 1, 2, 1025), (1, 1, 1)],
[(1025, 1, 2, 3, 4), (5, 1, 1, 2, 3), (1, 1, 1), (1, 1, 1)],
[(8, 1, 2, 3, 4), (1025, 1, 1, 2, 3), (1, 1, 1), (1, 1, 1)],
[(8, 1025, 2, 3, 4), (5, 1025, 1, 1, 2), (1, 1, 1), (1, 1, 1)],
[(8, 1, 1030, 3, 4), (5, 1, 1025, 1, 1), (1, 1, 1), (1, 1, 1)],
[(8, 1, 2, 1030, 4), (5, 1, 2, 1025, 1), (1, 1, 1), (1, 1, 1)],
[(8, 1, 2, 3, 1030), (5, 1, 1, 2, 1025), (1, 1, 1), (1, 1, 1)],
# The equivalent of this caused a crash with conv2d
[(1, 1, 1, 44800, 1), (6, 1, 1, 1, 1), (1, 1, 1)]]
[(1, 1, 1, 44800, 1), (6, 1, 1, 1, 1), (1, 1, 1), (1, 1, 1)]]
# With border mode 'full', test with kernel bigger than image in some/all
# dimensions
test_shapes_full = [[(6, 2, 2, 2, 2), (4, 2, 3, 1, 1), (1, 1, 1)],
[(6, 2, 2, 2, 2), (4, 2, 1, 3, 1), (1, 1, 1)],
[(6, 2, 2, 2, 2), (4, 2, 1, 1, 3), (1, 1, 1)],
[(6, 2, 2, 2, 2), (4, 2, 5, 5, 5), (1, 1, 1)]]
test_shapes_full = [[(6, 2, 2, 2, 2), (4, 2, 3, 1, 1), (1, 1, 1), (1, 1, 1)],
[(6, 2, 2, 2, 2), (4, 2, 1, 3, 1), (1, 1, 1), (1, 1, 1)],
[(6, 2, 2, 2, 2), (4, 2, 1, 1, 3), (1, 1, 1), (1, 1, 1)],
[(6, 2, 2, 2, 2), (4, 2, 5, 5, 5), (1, 1, 1), (1, 1, 1)]]
if dnn.version() >= 6000:
test_shapes.extend([
[(8, 1, 20, 12, 15), (5, 1, 6, 3, 4), (1, 1, 2), (3, 2, 1)],
[(8, 1, 20, 12, 15), (5, 1, 6, 3, 4), (2, 2, 1), (1, 2, 3)]])
test_shapes_full.append(
[(6, 2, 2, 2, 2), (4, 2, 5, 5, 5), (1, 1, 1), (3, 2, 1)])
border_modes = ['valid', 'full', 'half', (1, 2, 3), (3, 2, 1), 1, 2]
conv_modes = ['conv', 'cross']
......@@ -1044,7 +1071,7 @@ def test_conv3d_fwd():
utt.seed_rng()
def run_conv3d_fwd(inputs_shape, filters_shape, subsample,
border_mode, conv_mode):
dilation, border_mode, conv_mode):
inputs_val = np.random.random(inputs_shape).astype(theano.config.floatX)
filters_val = np.random.random(filters_shape).astype(theano.config.floatX)
......@@ -1060,6 +1087,7 @@ def test_conv3d_fwd():
# Compile a theano function for the cuDNN implementation
conv = dnn.dnn_conv3d(img=inputs, kerns=filters,
border_mode=border_mode, subsample=subsample,
dilation=dilation,
conv_mode=conv_mode)
f = theano.function([], conv, mode=mode_with_gpu)
......@@ -1072,7 +1100,8 @@ def test_conv3d_fwd():
# Compile a theano function for the reference implementation
conv_ref = theano.tensor.nnet.corr3d.Corr3dMM(border_mode=border_mode,
subsample=subsample
subsample=subsample,
filter_dilation=dilation,
)(ref_cast(inputs), flipped_filters)
f_ref = theano.function([], conv_ref, mode="FAST_RUN")
......@@ -1087,8 +1116,8 @@ def test_conv3d_fwd():
utt.assert_allclose(res_ref, res, rtol=rtol)
test_cases = get_conv3d_test_cases()
for (i_shape, f_shape, subsample), border_mode, conv_mode in test_cases:
yield (run_conv3d_fwd, i_shape, f_shape, subsample, border_mode,
for (i_shape, f_shape, subsample, dilation), border_mode, conv_mode in test_cases:
yield (run_conv3d_fwd, i_shape, f_shape, subsample, dilation, border_mode,
conv_mode)
......@@ -1099,7 +1128,7 @@ def test_conv3d_bwd():
utt.seed_rng()
def run_conv3d_bwd(inputs_shape, filters_shape, subsample,
border_mode, conv_mode):
dilation, border_mode, conv_mode):
inputs_val = np.random.random(inputs_shape).astype(theano.config.floatX)
filters_val = np.random.random(filters_shape).astype(theano.config.floatX)
......@@ -1109,7 +1138,9 @@ def test_conv3d_bwd():
# Compile a theano function for the cuDNN implementation
conv = dnn.dnn_conv3d(img=inputs, kerns=filters,
border_mode=border_mode, subsample=subsample,
border_mode=border_mode,
subsample=subsample,
dilation=dilation,
conv_mode=conv_mode)
grad_i, grad_w = theano.tensor.grad(conv.sum(), [inputs, filters])
......@@ -1125,7 +1156,8 @@ def test_conv3d_bwd():
# Compile a theano function for the reference implementation
conv_ref = theano.tensor.nnet.corr3d.Corr3dMM(border_mode=border_mode,
subsample=subsample
subsample=subsample,
filter_dilation=dilation,
)(ref_cast(inputs), flipped_filters)
(grad_i_ref,
grad_w_ref) = theano.tensor.grad(conv_ref.sum(),
......@@ -1145,8 +1177,8 @@ def test_conv3d_bwd():
utt.assert_allclose(res_ref[1], res[1], rtol=rtol)
test_cases = get_conv3d_test_cases()
for (i_shape, f_shape, subsample), border_mode, conv_mode in test_cases:
yield (run_conv3d_bwd, i_shape, f_shape, subsample, border_mode,
for (i_shape, f_shape, subsample, dilation), border_mode, conv_mode in test_cases:
yield (run_conv3d_bwd, i_shape, f_shape, subsample, dilation, border_mode,
conv_mode)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论