提交 9f37bce1 authored 作者: Gabe Schwartz's avatar Gabe Schwartz

Added support for cudnn v6 dilated convolution.

上级 89aac420
......@@ -5,7 +5,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
cudnnStatus_t err;
int pad[3] = {PAD_0, PAD_1, PAD_2};
int strides[3] = {SUB_0, SUB_1, SUB_2};
int upscale[3] = {1, 1, 1};
int dilation[3] = {DIL_0, DIL_1, DIL_2};
#if BORDER_MODE == 0
pad[0] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) - 1;
......@@ -36,6 +36,11 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
}
err = cudnnSetConvolutionNdDescriptor(*desc, NB_DIMS, pad, strides,
upscale, CONV_MODE, PRECISION);
dilation, CONV_MODE, PRECISION);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not set convolution "
"descriptor: %s", cudnnGetErrorString(err));
return -1;
}
return 0;
}
......@@ -131,11 +131,11 @@ def _dnn_check_version():
if v < 5000:
return False, "cuDNN version is too old. Update to v5, was %d." % v
# 5200 should not print warning with cudnn 5.1 final.
if v >= 5200:
if v > 6020:
warnings.warn("Your cuDNN version is more recent than "
"Theano. If you encounter problems, try "
"updating Theano or downgrading cuDNN to "
"version 5.1.")
"version 6.0.")
return True, None
......@@ -363,7 +363,7 @@ class GpuDnnConvDesc(COp):
"""
__props__ = ('border_mode', 'subsample', 'conv_mode', 'precision')
__props__ = ('border_mode', 'subsample', 'dilation', 'conv_mode', 'precision')
def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h']
......@@ -380,7 +380,7 @@ class GpuDnnConvDesc(COp):
def do_constant_folding(self, node):
return False
def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv',
def __init__(self, border_mode, subsample=(1, 1), dilation=(1, 1), conv_mode='conv',
precision="float32"):
COp.__init__(self, ["conv_desc.c"], "APPLY_SPECIFIC(conv_desc)")
......@@ -401,6 +401,10 @@ class GpuDnnConvDesc(COp):
assert conv_mode in ('conv', 'cross')
self.conv_mode = conv_mode
assert len(dilation) in (2, 3)
assert len(dilation) == len(subsample)
self.dilation = dilation
assert precision in ['float16', 'float32', 'float64']
self.precision = precision
......@@ -452,6 +456,18 @@ class GpuDnnConvDesc(COp):
else:
sub2 = '0'
if version() < 6000:
dil0 = '1'
dil1 = '1'
dil2 = '1'
else:
dil0 = str(self.dilation[0])
dil1 = str(self.dilation[1])
if len(self.dilation) > 2:
dil2 = str(self.dilation[2])
else:
dil2 = '0'
if self.precision == 'float16':
precision = 'CUDNN_DATA_HALF'
elif self.precision == 'float32':
......@@ -463,6 +479,7 @@ class GpuDnnConvDesc(COp):
return [('NB_DIMS', str(len(self.subsample))),
('BORDER_MODE', bmode),
('PAD_0', pad0), ('PAD_1', pad1), ('PAD_2', pad2),
('DIL_0', dil0), ('DIL_1', dil1), ('DIL_2', dil2),
('CONV_MODE', conv_flag),
('SUB_0', sub0), ('SUB_1', sub1), ('SUB_2', sub2),
('PRECISION', precision)]
......@@ -574,6 +591,7 @@ class GpuDnnConv(DnnBase):
img = as_gpuarray_variable(img, ctx_name)
kern = as_gpuarray_variable(kern, ctx_name)
output = as_gpuarray_variable(output, ctx_name)
if img.type.ndim not in (4, 5):
raise TypeError('img must be 4D or 5D tensor')
if kern.type.ndim not in (4, 5):
......@@ -897,7 +915,7 @@ class GpuDnnConvGradI(DnnBase):
return [shape[2]]
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='conv', direction_hint=None, workmem=None,
algo=None, precision=None):
"""
......@@ -956,7 +974,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
algo = workmem
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
ctx_name = infer_context_name(img, kerns)
if (border_mode == 'valid' and subsample == (1, 1) and
if (border_mode == 'valid' and subsample == (1, 1) and dilation == (1, 1) and
direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for.
......@@ -972,12 +990,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1)
out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc)
return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name)
elif (border_mode == 'full' and subsample == (1, 1) and
elif (border_mode == 'full' and subsample == (1, 1) and dilation == (1, 1) and
direction_hint != 'forward!'):
# Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution.
......@@ -991,7 +1009,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1)
out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc)
......@@ -1000,7 +1018,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# if the img contains negative strides
img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape)
desc_op = desc.owner.op
# We can use Shape_i and bypass the infer_shape here as this is on
......@@ -1009,13 +1027,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)]
out_shp = get_conv_output_shape(ishape, kshape,
desc_op.border_mode,
desc_op.subsample)
desc_op.subsample,
filter_dilation=dilation)
out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
return GpuDnnConv(algo=algo)(img, kerns, out, desc)
def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode='conv', direction_hint=None,
algo='none', precision=None):
"""
......@@ -1067,7 +1086,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
ctx_name = infer_context_name(img, kerns)
if (border_mode == 'valid' and subsample == (1, 1, 1) and
if (border_mode == 'valid' and subsample == (1, 1, 1) and dilation == (1, 1, 1) and
direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for.
......@@ -1084,7 +1103,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1)
out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc)
return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3, 4), ctx_name)
......@@ -1104,7 +1123,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1)
out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc)
......@@ -1113,7 +1132,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
# if the img contains negative strides
img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape)
desc_op = desc.owner.op
# We can use Shape_i and bypass the infer_shape here as this is on
......@@ -1122,14 +1141,15 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)]
out_shp = get_conv_output_shape(ishape, kshape,
desc_op.border_mode,
desc_op.subsample)
desc_op.subsample,
filter_dilation=dilation)
out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
return GpuDnnConv(algo=algo)(img, kerns, out, desc)
def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
subsample=(1, 1), conv_mode='conv', precision=None):
subsample=(1, 1), dilation=(1, 1), conv_mode='conv', precision=None):
"""
TODO: document this
"""
......@@ -1141,7 +1161,7 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
kerns_shp = as_tensor_variable(kerns_shp)
precision = get_precision(precision, [img, topgrad])
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*kerns_shp)
return GpuDnnConvGradW()(img, topgrad, out, desc)
......@@ -1157,7 +1177,7 @@ def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid',
def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
subsample=(1, 1), conv_mode='conv', precision=None):
subsample=(1, 1), dilation=(1, 1), conv_mode='conv', precision=None):
"""
TODO: document this
"""
......@@ -1169,7 +1189,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
img_shp = as_tensor_variable(img_shp)
precision = get_precision(precision, [kerns, topgrad])
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape)
out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp)
return GpuDnnConvGradI()(kerns, topgrad, out, desc)
......@@ -2698,7 +2718,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
AbstractConv2d_gradInputs))):
return
if (op.filter_dilation != (1, 1)):
if version() < 6000 and op.filter_dilation != (1, 1):
return None
inp1 = inputs[0]
......@@ -2716,6 +2736,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_conv(inp1, inp2,
border_mode=op.border_mode,
subsample=op.subsample,
dilation=op.filter_dilation,
direction_hint='forward!',
conv_mode=conv_mode)
elif isinstance(op, AbstractConv2d_gradWeights):
......@@ -2724,6 +2745,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradweight(inp1, inp2, shape,
border_mode=op.border_mode,
subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode)
elif isinstance(op, AbstractConv2d_gradInputs):
shape = (inp2.shape[0], inp1.shape[1],
......@@ -2731,6 +2753,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradinput(inp1, inp2, shape,
border_mode=op.border_mode,
subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode)
return [rval]
......@@ -2741,7 +2764,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
AbstractConv3d_gradInputs))):
return
if (op.filter_dilation != (1, 1, 1)):
if version() < 6000 and op.filter_dilation != (1, 1, 1):
return None
inp1 = inputs[0]
......@@ -2759,6 +2782,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_conv3d(inp1, inp2,
border_mode=op.border_mode,
subsample=op.subsample,
dilation=op.filter_dilation,
direction_hint='forward!',
conv_mode=conv_mode)
elif isinstance(op, AbstractConv3d_gradWeights):
......@@ -2767,6 +2791,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradweight3d(inp1, inp2, shape,
border_mode=op.border_mode,
subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode)
elif isinstance(op, AbstractConv3d_gradInputs):
shape = (inp2.shape[0], inp1.shape[1],
......@@ -2774,6 +2799,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradinput3d(inp1, inp2, shape,
border_mode=op.border_mode,
subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode)
return [rval]
......
......@@ -188,11 +188,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
int nd;
int pad[2];
int stride[2];
int upscale[2];
int dilation[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
dilation, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论