提交 155c9169 authored 作者: affanv14's avatar affanv14 提交者: notoraptor

move setting of groups to gpuconvdesc

上级 46e0dbdd
#section support_code_apply #section support_code_apply
static int c_set_groups_for_conv(cudnnConvolutionDescriptor_t desc, int groups) {
#if CUDNN_MAJOR >= 7
cudnnStatus_t err = cudnnSetConvolutionGroupCount(desc, groups);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"error setting groups for convolution : %s",
cudnnGetErrorString(err));
return -1;
}
#endif
return 1;
}
int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp, int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
cudnnConvolutionDescriptor_t *desc, cudnnConvolutionDescriptor_t *desc,
PARAMS_TYPE* params) { PARAMS_TYPE* params) {
...@@ -43,5 +56,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp, ...@@ -43,5 +56,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
"descriptor: %s", cudnnGetErrorString(err)); "descriptor: %s", cudnnGetErrorString(err));
return -1; return -1;
} }
if (c_set_groups_for_conv(*desc, params->num_groups) == -1)
return -1;
return 0; return 0;
} }
...@@ -3,16 +3,22 @@ cudnnTensorDescriptor_t APPLY_SPECIFIC(input); ...@@ -3,16 +3,22 @@ cudnnTensorDescriptor_t APPLY_SPECIFIC(input);
cudnnTensorDescriptor_t APPLY_SPECIFIC(output); cudnnTensorDescriptor_t APPLY_SPECIFIC(output);
cudnnFilterDescriptor_t APPLY_SPECIFIC(kerns); cudnnFilterDescriptor_t APPLY_SPECIFIC(kerns);
static int c_set_groups_for_conv(cudnnConvolutionDescriptor_t desc, int groups) { static int c_check_groups_for_conv(cudnnConvolutionDescriptor_t desc, int groups) {
#if CUDNN_MAJOR >= 7 #if CUDNN_MAJOR >= 7
int desc_groups;
if (groups > 1) { if (groups > 1) {
cudnnStatus_t err = cudnnSetConvolutionGroupCount(desc, groups); cudnnStatus_t err = cudnnGetConvolutionGroupCount(desc, &desc_groups);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"error setting groups for convolution : %s", "error getting groups for convolution : %s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
return -1; return -1;
} }
if (groups != desc_groups) {
PyErr_SetString(PyExc_MemoryError,
"groups specified different from convolution descriptor");
return -1;
}
} }
return 1; return 1;
#else #else
......
...@@ -73,7 +73,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -73,7 +73,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return 0; return 0;
} }
int groups = c_set_groups_for_conv(desc, params->num_groups); int groups = c_check_groups_for_conv(desc, params->num_groups);
if (groups == -1) if (groups == -1)
return 1; return 1;
if (c_set_tensor_for_conv(input, APPLY_SPECIFIC(input), groups) == -1) if (c_set_tensor_for_conv(input, APPLY_SPECIFIC(input), groups) == -1)
......
...@@ -73,7 +73,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -73,7 +73,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
} }
int groups = c_set_groups_for_conv(desc, params->num_groups); int groups = c_check_groups_for_conv(desc, params->num_groups);
if (groups == -1) if (groups == -1)
return 1; return 1;
if (c_set_tensor_for_conv(output, APPLY_SPECIFIC(output), groups) == -1) if (c_set_tensor_for_conv(output, APPLY_SPECIFIC(output), groups) == -1)
......
...@@ -73,7 +73,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -73,7 +73,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return 0; return 0;
} }
int groups = c_set_groups_for_conv(desc, params->num_groups); int groups = c_check_groups_for_conv(desc, params->num_groups);
if (groups == -1) if (groups == -1)
return 1; return 1;
if (c_set_tensor_for_conv(input, APPLY_SPECIFIC(input), groups) == -1) if (c_set_tensor_for_conv(input, APPLY_SPECIFIC(input), groups) == -1)
......
...@@ -412,7 +412,8 @@ class GpuDnnConvDesc(COp): ...@@ -412,7 +412,8 @@ class GpuDnnConvDesc(COp):
""" """
__props__ = ('border_mode', 'subsample', 'dilation', 'conv_mode', 'precision') __props__ = ('border_mode', 'subsample', 'dilation', 'conv_mode',
'precision', 'num_groups')
params_type = ParamsType(pad0=int_t, pad1=int_t, pad2=int_t, params_type = ParamsType(pad0=int_t, pad1=int_t, pad2=int_t,
sub0=int_t, sub1=int_t, sub2=int_t, sub0=int_t, sub1=int_t, sub2=int_t,
dil0=int_t, dil1=int_t, dil2=int_t, dil0=int_t, dil1=int_t, dil2=int_t,
...@@ -421,7 +422,8 @@ class GpuDnnConvDesc(COp): ...@@ -421,7 +422,8 @@ class GpuDnnConvDesc(COp):
('BORDER_MODE_VALID', 'valid'), ('BORDER_MODE_VALID', 'valid'),
('BORDER_MODE_HALF', 'half')), ('BORDER_MODE_HALF', 'half')),
conv_mode=cudnn.cudnnConvolutionMode_t, conv_mode=cudnn.cudnnConvolutionMode_t,
precision=cudnn.cudnnDataType_t) precision=cudnn.cudnnDataType_t,
num_groups=int_t)
def c_headers(self): def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h'] return ['cudnn.h', 'cudnn_helper.h']
...@@ -448,7 +450,7 @@ class GpuDnnConvDesc(COp): ...@@ -448,7 +450,7 @@ class GpuDnnConvDesc(COp):
return False return False
def __init__(self, border_mode, subsample=(1, 1), dilation=(1, 1), conv_mode='conv', def __init__(self, border_mode, subsample=(1, 1), dilation=(1, 1), conv_mode='conv',
precision="float32"): precision="float32", num_groups=1):
COp.__init__(self, ["c_code/conv_desc.c"], "APPLY_SPECIFIC(conv_desc)") COp.__init__(self, ["c_code/conv_desc.c"], "APPLY_SPECIFIC(conv_desc)")
if version() < 6000 and any([d != 1 for d in dilation]): if version() < 6000 and any([d != 1 for d in dilation]):
...@@ -470,6 +472,7 @@ class GpuDnnConvDesc(COp): ...@@ -470,6 +472,7 @@ class GpuDnnConvDesc(COp):
self.subsample = subsample self.subsample = subsample
assert cudnn.cudnnConvolutionMode_t.has_alias(conv_mode) assert cudnn.cudnnConvolutionMode_t.has_alias(conv_mode)
self.conv_mode = conv_mode self.conv_mode = conv_mode
self.num_groups = num_groups
assert len(dilation) == len(subsample) assert len(dilation) == len(subsample)
self.dilation = dilation self.dilation = dilation
...@@ -1039,7 +1042,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1), ...@@ -1039,7 +1042,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
img = gpu_contiguous(img) img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision,
num_groups=num_groups)(kerns.shape)
desc_op = desc.owner.op desc_op = desc.owner.op
# We can use Shape_i and bypass the infer_shape here as this is on # We can use Shape_i and bypass the infer_shape here as this is on
# the input of node and it will always be present. # the input of node and it will always be present.
...@@ -1189,7 +1193,8 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid', ...@@ -1189,7 +1193,8 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
precision = get_precision(precision, [img, topgrad]) precision = get_precision(precision, [img, topgrad])
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns_shp) conv_mode=conv_mode, precision=precision,
num_groups=num_groups)(kerns_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*kerns_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*kerns_shp)
return GpuDnnConvGradW(algo=algo, num_groups=num_groups)(img, topgrad, out, desc) return GpuDnnConvGradW(algo=algo, num_groups=num_groups)(img, topgrad, out, desc)
...@@ -1218,7 +1223,8 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid', ...@@ -1218,7 +1223,8 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
precision = get_precision(precision, [kerns, topgrad]) precision = get_precision(precision, [kerns, topgrad])
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision,
num_groups=num_groups)(kerns.shape)
out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp) out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp)
return GpuDnnConvGradI(algo=algo, num_groups=num_groups)(kerns, topgrad, out, desc) return GpuDnnConvGradI(algo=algo, num_groups=num_groups)(kerns, topgrad, out, desc)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论