提交 9f37bce1 authored 作者: Gabe Schwartz's avatar Gabe Schwartz

Added support for cudnn v6 dilated convolution.

上级 89aac420
...@@ -5,7 +5,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp, ...@@ -5,7 +5,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
cudnnStatus_t err; cudnnStatus_t err;
int pad[3] = {PAD_0, PAD_1, PAD_2}; int pad[3] = {PAD_0, PAD_1, PAD_2};
int strides[3] = {SUB_0, SUB_1, SUB_2}; int strides[3] = {SUB_0, SUB_1, SUB_2};
int upscale[3] = {1, 1, 1}; int dilation[3] = {DIL_0, DIL_1, DIL_2};
#if BORDER_MODE == 0 #if BORDER_MODE == 0
pad[0] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) - 1; pad[0] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) - 1;
...@@ -36,6 +36,11 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp, ...@@ -36,6 +36,11 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
} }
err = cudnnSetConvolutionNdDescriptor(*desc, NB_DIMS, pad, strides, err = cudnnSetConvolutionNdDescriptor(*desc, NB_DIMS, pad, strides,
upscale, CONV_MODE, PRECISION); dilation, CONV_MODE, PRECISION);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not set convolution "
"descriptor: %s", cudnnGetErrorString(err));
return -1;
}
return 0; return 0;
} }
...@@ -131,11 +131,11 @@ def _dnn_check_version(): ...@@ -131,11 +131,11 @@ def _dnn_check_version():
if v < 5000: if v < 5000:
return False, "cuDNN version is too old. Update to v5, was %d." % v return False, "cuDNN version is too old. Update to v5, was %d." % v
# 5200 should not print warning with cudnn 5.1 final. # 5200 should not print warning with cudnn 5.1 final.
if v >= 5200: if v > 6020:
warnings.warn("Your cuDNN version is more recent than " warnings.warn("Your cuDNN version is more recent than "
"Theano. If you encounter problems, try " "Theano. If you encounter problems, try "
"updating Theano or downgrading cuDNN to " "updating Theano or downgrading cuDNN to "
"version 5.1.") "version 6.0.")
return True, None return True, None
...@@ -363,7 +363,7 @@ class GpuDnnConvDesc(COp): ...@@ -363,7 +363,7 @@ class GpuDnnConvDesc(COp):
""" """
__props__ = ('border_mode', 'subsample', 'conv_mode', 'precision') __props__ = ('border_mode', 'subsample', 'dilation', 'conv_mode', 'precision')
def c_headers(self): def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h'] return ['cudnn.h', 'cudnn_helper.h']
...@@ -380,7 +380,7 @@ class GpuDnnConvDesc(COp): ...@@ -380,7 +380,7 @@ class GpuDnnConvDesc(COp):
def do_constant_folding(self, node): def do_constant_folding(self, node):
return False return False
def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv', def __init__(self, border_mode, subsample=(1, 1), dilation=(1, 1), conv_mode='conv',
precision="float32"): precision="float32"):
COp.__init__(self, ["conv_desc.c"], "APPLY_SPECIFIC(conv_desc)") COp.__init__(self, ["conv_desc.c"], "APPLY_SPECIFIC(conv_desc)")
...@@ -401,6 +401,10 @@ class GpuDnnConvDesc(COp): ...@@ -401,6 +401,10 @@ class GpuDnnConvDesc(COp):
assert conv_mode in ('conv', 'cross') assert conv_mode in ('conv', 'cross')
self.conv_mode = conv_mode self.conv_mode = conv_mode
assert len(dilation) in (2, 3)
assert len(dilation) == len(subsample)
self.dilation = dilation
assert precision in ['float16', 'float32', 'float64'] assert precision in ['float16', 'float32', 'float64']
self.precision = precision self.precision = precision
...@@ -452,6 +456,18 @@ class GpuDnnConvDesc(COp): ...@@ -452,6 +456,18 @@ class GpuDnnConvDesc(COp):
else: else:
sub2 = '0' sub2 = '0'
if version() < 6000:
dil0 = '1'
dil1 = '1'
dil2 = '1'
else:
dil0 = str(self.dilation[0])
dil1 = str(self.dilation[1])
if len(self.dilation) > 2:
dil2 = str(self.dilation[2])
else:
dil2 = '0'
if self.precision == 'float16': if self.precision == 'float16':
precision = 'CUDNN_DATA_HALF' precision = 'CUDNN_DATA_HALF'
elif self.precision == 'float32': elif self.precision == 'float32':
...@@ -463,6 +479,7 @@ class GpuDnnConvDesc(COp): ...@@ -463,6 +479,7 @@ class GpuDnnConvDesc(COp):
return [('NB_DIMS', str(len(self.subsample))), return [('NB_DIMS', str(len(self.subsample))),
('BORDER_MODE', bmode), ('BORDER_MODE', bmode),
('PAD_0', pad0), ('PAD_1', pad1), ('PAD_2', pad2), ('PAD_0', pad0), ('PAD_1', pad1), ('PAD_2', pad2),
('DIL_0', dil0), ('DIL_1', dil1), ('DIL_2', dil2),
('CONV_MODE', conv_flag), ('CONV_MODE', conv_flag),
('SUB_0', sub0), ('SUB_1', sub1), ('SUB_2', sub2), ('SUB_0', sub0), ('SUB_1', sub1), ('SUB_2', sub2),
('PRECISION', precision)] ('PRECISION', precision)]
...@@ -574,6 +591,7 @@ class GpuDnnConv(DnnBase): ...@@ -574,6 +591,7 @@ class GpuDnnConv(DnnBase):
img = as_gpuarray_variable(img, ctx_name) img = as_gpuarray_variable(img, ctx_name)
kern = as_gpuarray_variable(kern, ctx_name) kern = as_gpuarray_variable(kern, ctx_name)
output = as_gpuarray_variable(output, ctx_name) output = as_gpuarray_variable(output, ctx_name)
if img.type.ndim not in (4, 5): if img.type.ndim not in (4, 5):
raise TypeError('img must be 4D or 5D tensor') raise TypeError('img must be 4D or 5D tensor')
if kern.type.ndim not in (4, 5): if kern.type.ndim not in (4, 5):
...@@ -897,7 +915,7 @@ class GpuDnnConvGradI(DnnBase): ...@@ -897,7 +915,7 @@ class GpuDnnConvGradI(DnnBase):
return [shape[2]] return [shape[2]]
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='conv', direction_hint=None, workmem=None, conv_mode='conv', direction_hint=None, workmem=None,
algo=None, precision=None): algo=None, precision=None):
""" """
...@@ -956,7 +974,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -956,7 +974,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
algo = workmem algo = workmem
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None) fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
ctx_name = infer_context_name(img, kerns) ctx_name = infer_context_name(img, kerns)
if (border_mode == 'valid' and subsample == (1, 1) and if (border_mode == 'valid' and subsample == (1, 1) and dilation == (1, 1) and
direction_hint == 'bprop weights'): direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set # Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for. # up a suitable 'fake' convolution to compute the gradient for.
...@@ -972,12 +990,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -972,12 +990,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1) shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='cross', precision=precision)(out.shape) conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc) conv = GpuDnnConvGradW()(img, kerns, out, desc)
return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name) return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name)
elif (border_mode == 'full' and subsample == (1, 1) and elif (border_mode == 'full' and subsample == (1, 1) and dilation == (1, 1) and
direction_hint != 'forward!'): direction_hint != 'forward!'):
# Special case: We can be faster by using GpuDnnConvGradI to compute # Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution. # the full convolution as the backward pass of a valid convolution.
...@@ -991,7 +1009,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -991,7 +1009,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1) shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc) return GpuDnnConvGradI()(kerns, img, out, desc)
...@@ -1000,7 +1018,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1000,7 +1018,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# if the img contains negative strides # if the img contains negative strides
img = gpu_contiguous(img) img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
desc_op = desc.owner.op desc_op = desc.owner.op
# We can use Shape_i and bypass the infer_shape here as this is on # We can use Shape_i and bypass the infer_shape here as this is on
...@@ -1009,13 +1027,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1009,13 +1027,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)] kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)]
out_shp = get_conv_output_shape(ishape, kshape, out_shp = get_conv_output_shape(ishape, kshape,
desc_op.border_mode, desc_op.border_mode,
desc_op.subsample) desc_op.subsample,
filter_dilation=dilation)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
return GpuDnnConv(algo=algo)(img, kerns, out, desc) return GpuDnnConv(algo=algo)(img, kerns, out, desc)
def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode='conv', direction_hint=None, conv_mode='conv', direction_hint=None,
algo='none', precision=None): algo='none', precision=None):
""" """
...@@ -1067,7 +1086,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1067,7 +1086,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None) fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
ctx_name = infer_context_name(img, kerns) ctx_name = infer_context_name(img, kerns)
if (border_mode == 'valid' and subsample == (1, 1, 1) and if (border_mode == 'valid' and subsample == (1, 1, 1) and dilation == (1, 1, 1) and
direction_hint == 'bprop weights'): direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set # Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for. # up a suitable 'fake' convolution to compute the gradient for.
...@@ -1084,7 +1103,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1084,7 +1103,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1) shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode='cross', precision=precision)(out.shape) conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc) conv = GpuDnnConvGradW()(img, kerns, out, desc)
return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3, 4), ctx_name) return as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3, 4), ctx_name)
...@@ -1104,7 +1123,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1104,7 +1123,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1) shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc) return GpuDnnConvGradI()(kerns, img, out, desc)
...@@ -1113,7 +1132,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1113,7 +1132,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
# if the img contains negative strides # if the img contains negative strides
img = gpu_contiguous(img) img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
desc_op = desc.owner.op desc_op = desc.owner.op
# We can use Shape_i and bypass the infer_shape here as this is on # We can use Shape_i and bypass the infer_shape here as this is on
...@@ -1122,14 +1141,15 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1122,14 +1141,15 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)] kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)]
out_shp = get_conv_output_shape(ishape, kshape, out_shp = get_conv_output_shape(ishape, kshape,
desc_op.border_mode, desc_op.border_mode,
desc_op.subsample) desc_op.subsample,
filter_dilation=dilation)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
return GpuDnnConv(algo=algo)(img, kerns, out, desc) return GpuDnnConv(algo=algo)(img, kerns, out, desc)
def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid', def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
subsample=(1, 1), conv_mode='conv', precision=None): subsample=(1, 1), dilation=(1, 1), conv_mode='conv', precision=None):
""" """
TODO: document this TODO: document this
""" """
...@@ -1141,7 +1161,7 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid', ...@@ -1141,7 +1161,7 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
kerns_shp = as_tensor_variable(kerns_shp) kerns_shp = as_tensor_variable(kerns_shp)
precision = get_precision(precision, [img, topgrad]) precision = get_precision(precision, [img, topgrad])
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns_shp) conv_mode=conv_mode, precision=precision)(kerns_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*kerns_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*kerns_shp)
return GpuDnnConvGradW()(img, topgrad, out, desc) return GpuDnnConvGradW()(img, topgrad, out, desc)
...@@ -1157,7 +1177,7 @@ def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid', ...@@ -1157,7 +1177,7 @@ def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid',
def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid', def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
subsample=(1, 1), conv_mode='conv', precision=None): subsample=(1, 1), dilation=(1, 1), conv_mode='conv', precision=None):
""" """
TODO: document this TODO: document this
""" """
...@@ -1169,7 +1189,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid', ...@@ -1169,7 +1189,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
img_shp = as_tensor_variable(img_shp) img_shp = as_tensor_variable(img_shp)
precision = get_precision(precision, [kerns, topgrad]) precision = get_precision(precision, [kerns, topgrad])
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp) out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp)
return GpuDnnConvGradI()(kerns, topgrad, out, desc) return GpuDnnConvGradI()(kerns, topgrad, out, desc)
...@@ -2698,7 +2718,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2698,7 +2718,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
AbstractConv2d_gradInputs))): AbstractConv2d_gradInputs))):
return return
if (op.filter_dilation != (1, 1)): if version() < 6000 and op.filter_dilation != (1, 1):
return None return None
inp1 = inputs[0] inp1 = inputs[0]
...@@ -2716,6 +2736,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2716,6 +2736,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_conv(inp1, inp2, rval = dnn_conv(inp1, inp2,
border_mode=op.border_mode, border_mode=op.border_mode,
subsample=op.subsample, subsample=op.subsample,
dilation=op.filter_dilation,
direction_hint='forward!', direction_hint='forward!',
conv_mode=conv_mode) conv_mode=conv_mode)
elif isinstance(op, AbstractConv2d_gradWeights): elif isinstance(op, AbstractConv2d_gradWeights):
...@@ -2724,6 +2745,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2724,6 +2745,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradweight(inp1, inp2, shape, rval = dnn_gradweight(inp1, inp2, shape,
border_mode=op.border_mode, border_mode=op.border_mode,
subsample=op.subsample, subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode) conv_mode=conv_mode)
elif isinstance(op, AbstractConv2d_gradInputs): elif isinstance(op, AbstractConv2d_gradInputs):
shape = (inp2.shape[0], inp1.shape[1], shape = (inp2.shape[0], inp1.shape[1],
...@@ -2731,6 +2753,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2731,6 +2753,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradinput(inp1, inp2, shape, rval = dnn_gradinput(inp1, inp2, shape,
border_mode=op.border_mode, border_mode=op.border_mode,
subsample=op.subsample, subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode) conv_mode=conv_mode)
return [rval] return [rval]
...@@ -2741,7 +2764,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2741,7 +2764,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
AbstractConv3d_gradInputs))): AbstractConv3d_gradInputs))):
return return
if (op.filter_dilation != (1, 1, 1)): if version() < 6000 and op.filter_dilation != (1, 1, 1):
return None return None
inp1 = inputs[0] inp1 = inputs[0]
...@@ -2759,6 +2782,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2759,6 +2782,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_conv3d(inp1, inp2, rval = dnn_conv3d(inp1, inp2,
border_mode=op.border_mode, border_mode=op.border_mode,
subsample=op.subsample, subsample=op.subsample,
dilation=op.filter_dilation,
direction_hint='forward!', direction_hint='forward!',
conv_mode=conv_mode) conv_mode=conv_mode)
elif isinstance(op, AbstractConv3d_gradWeights): elif isinstance(op, AbstractConv3d_gradWeights):
...@@ -2767,6 +2791,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2767,6 +2791,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradweight3d(inp1, inp2, shape, rval = dnn_gradweight3d(inp1, inp2, shape,
border_mode=op.border_mode, border_mode=op.border_mode,
subsample=op.subsample, subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode) conv_mode=conv_mode)
elif isinstance(op, AbstractConv3d_gradInputs): elif isinstance(op, AbstractConv3d_gradInputs):
shape = (inp2.shape[0], inp1.shape[1], shape = (inp2.shape[0], inp1.shape[1],
...@@ -2774,6 +2799,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2774,6 +2799,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
rval = dnn_gradinput3d(inp1, inp2, shape, rval = dnn_gradinput3d(inp1, inp2, shape,
border_mode=op.border_mode, border_mode=op.border_mode,
subsample=op.subsample, subsample=op.subsample,
dilation=op.filter_dilation,
conv_mode=conv_mode) conv_mode=conv_mode)
return [rval] return [rval]
......
...@@ -188,11 +188,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -188,11 +188,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
int nd; int nd;
int pad[2]; int pad[2];
int stride[2]; int stride[2];
int upscale[2]; int dilation[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
cudnnDataType_t data_type; cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride, err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type); dilation, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s", "error getting convolution properties: %s",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论