提交 88ed910a authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Convert GpuDnnConvDesc to a COp and add support for v3.

上级 6cd85677
#section support_code_apply
int conv_desc(PyArrayObject *img_shp, PyArrayObject *filt_shp,
cudnnConvolutionDescriptor_t *desc) {
cudnnStatus_t err;
int pad[3] = {PAD_0, PAD_1, PAD_2};
int strides[3] = {SUB_0, SUB_1, SUB_2};
int upscale[3] = {1, 1, 1};
if (PyArray_DIM(filt_shp, 0) != PyArray_DIM(img_shp, 0)) {
PyErr_SetString(PyExc_ValueError, "Differing number of dimensions for "
"image and filter shape");
return -1;
}
#if BORDER_MODE == 0
pad[0] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) - 1;
pad[1] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 3) - 1;
#if NB_DIMS > 2
pad[2] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 4) - 1;
#endif
#endif
if (PyArray_DIM(img_shp, 0) - 2 != NB_DIMS) {
PyErr_Format(PyExc_ValueError, "Input shapes have too many dimensions: "
"expected %d, got %lld.", NB_DIMS,
(long long)PyArray_DIM(img_shp, 0));
return -1;
}
err = cudnnCreateConvolutionDescriptor(desc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate convolution "
"descriptor: %s", cudnnGetErrorString(err));
return -1;
}
err = cudnnSetConvolutionNdDescriptor(*desc, NB_DIMS, pad, strides, upscale,
CONV_MODE);
}
...@@ -228,7 +228,7 @@ def version(): ...@@ -228,7 +228,7 @@ def version():
version.v = None version.v = None
class GpuDnnConvDesc(Op): class GpuDnnConvDesc(COp):
""" """
This Op builds a convolution descriptor for use in the other convolution This Op builds a convolution descriptor for use in the other convolution
operations. operations.
...@@ -251,12 +251,17 @@ class GpuDnnConvDesc(Op): ...@@ -251,12 +251,17 @@ class GpuDnnConvDesc(Op):
def c_lib_dirs(self): def c_lib_dirs(self):
return [config.dnn.library_path] return [config.dnn.library_path]
def do_constant_folding(self, node):
return False
def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv'): def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv'):
COp.__init__(self, ["conv_desc.c"], "conv_desc")
if isinstance(border_mode, int): if isinstance(border_mode, int):
border_mode = (border_mode, border_mode) border_mode = (border_mode,) * len(subsample)
if isinstance(border_mode, tuple): if isinstance(border_mode, tuple):
pad_h, pad_w = map(int, border_mode) assert len(border_mode) == len(subsample)
border_mode = (pad_h, pad_w) border_mode = tuple(map(int, border_mode))
if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or
border_mode in ('valid', 'full')): border_mode in ('valid', 'full')):
raise ValueError( raise ValueError(
...@@ -264,7 +269,7 @@ class GpuDnnConvDesc(Op): ...@@ -264,7 +269,7 @@ class GpuDnnConvDesc(Op):
'"valid", "full", an integer or a pair of' '"valid", "full", an integer or a pair of'
' integers'.format(border_mode)) ' integers'.format(border_mode))
self.border_mode = border_mode self.border_mode = border_mode
assert len(subsample) == 2 assert len(subsample) in (2, 3)
self.subsample = subsample self.subsample = subsample
assert conv_mode in ('conv', 'cross') assert conv_mode in ('conv', 'cross')
self.conv_mode = conv_mode self.conv_mode = conv_mode
...@@ -279,90 +284,40 @@ class GpuDnnConvDesc(Op): ...@@ -279,90 +284,40 @@ class GpuDnnConvDesc(Op):
[CDataType("cudnnConvolutionDescriptor_t", [CDataType("cudnnConvolutionDescriptor_t",
freefunc="cudnnDestroyConvolutionDescriptor")()]) freefunc="cudnnDestroyConvolutionDescriptor")()])
def c_code(self, node, name, inputs, outputs, sub): def get_op_params(self):
img_shape, kern_shape = inputs pad0 = '0'
desc, = outputs pad1 = '0'
pad2 = '0'
if isinstance(self.border_mode, tuple): if isinstance(self.border_mode, tuple):
pad_h_spec, pad_w_spec = map(int, self.border_mode) pad0 = str(self.border_mode[0])
assert pad_h_spec >= 0 and pad_w_spec >= 0 pad1 = str(self.border_mode[1])
bmode = 2 if len(self.border_mode) > 2:
pad2 = str(self.border_mode[2])
bmode = '2'
elif self.border_mode == "valid":
bmode = 1
elif self.border_mode == "full":
bmode = 0
else: else:
pad_h_spec = pad_w_spec = 0 raise ValueError("Invalid value for border_mode")
if self.border_mode == "valid":
bmode = 1
else:
assert self.border_mode == "full"
bmode = 0
if self.conv_mode == 'conv': if self.conv_mode == 'conv':
conv_flag = 'CUDNN_CONVOLUTION' conv_flag = 'CUDNN_CONVOLUTION'
else: else:
conv_flag = 'CUDNN_CROSS_CORRELATION' conv_flag = 'CUDNN_CROSS_CORRELATION'
return """ sub0 = str(self.subsample[0])
{ sub1 = str(self.subsample[1])
cudnnStatus_t err; if len(self.subsample) > 2:
int pad_h%(name)s; sub2 = str(self.subsample[2])
int pad_w%(name)s; else:
sub2 = '0'
if ((err = cudnnCreateConvolutionDescriptor(&%(desc)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate convolution "
"descriptor: %%s", cudnnGetErrorString(err));
%(fail)s
}
if (%(bmode)d == 2) {
pad_h%(name)s = %(pad_h_spec)d;
pad_w%(name)s = %(pad_w_spec)d;
} else if (%(bmode)d == 1) {
pad_h%(name)s = 0;
pad_w%(name)s = 0;
} else if (%(bmode)d == 0) {
pad_h%(name)s = *(npy_int64 *)PyArray_GETPTR1(%(kern_shape)s, 2) - 1;
pad_w%(name)s = *(npy_int64 *)PyArray_GETPTR1(%(kern_shape)s, 3) - 1;
} else {
PyErr_SetString(PyExc_ValueError, "bad border mode");
%(fail)s
}
#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 20
err = cudnnSetConvolution2dDescriptor(
%(desc)s,
pad_h%(name)s,
pad_w%(name)s,
%(subsx)d, %(subsy)d, 1, 1,
%(conv_flag)s
);
#else
err = cudnnSetConvolutionDescriptorEx(
%(desc)s,
*(npy_int64 *)PyArray_GETPTR1(%(img_shape)s, 0),
*(npy_int64 *)PyArray_GETPTR1(%(img_shape)s, 1),
*(npy_int64 *)PyArray_GETPTR1(%(img_shape)s, 2),
*(npy_int64 *)PyArray_GETPTR1(%(img_shape)s, 3),
*(npy_int64 *)PyArray_GETPTR1(%(kern_shape)s, 0),
*(npy_int64 *)PyArray_GETPTR1(%(kern_shape)s, 2),
*(npy_int64 *)PyArray_GETPTR1(%(kern_shape)s, 3),
pad_h%(name)s,
pad_w%(name)s,
%(subsx)d, %(subsy)d, 1, 1,
%(conv_flag)s
);
#endif
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s",
cudnnGetErrorString(err));
%(fail)s
}
}
""" % dict(name=name, img_shape=img_shape, kern_shape=kern_shape, desc=desc,
bmode=bmode, conv_flag=conv_flag, fail=sub['fail'],
subsx=self.subsample[0], subsy=self.subsample[1],
pad_h_spec=pad_h_spec, pad_w_spec=pad_w_spec)
def c_code_cache_version(self): return [('NB_DIMS', str(len(self.subsample))),
return (1, version()) ('BORDER_MODE', bmode),
('PAD_0', pad0), ('PAD_1', pad1), ('PAD_2', pad2),
('CONV_MODE', conv_flag),
('SUB_0', sub0), ('SUB_1', sub1), ('SUB_2', sub2)]
# scalar constants # scalar constants
_zero = constant(numpy.asarray(0.0, dtype='float64')) _zero = constant(numpy.asarray(0.0, dtype='float64'))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论