提交 1bc17311 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6354 from notoraptor/fix-conv-runtime-algos-with-different-dtypes

Add data type configuration to algorithms hash for cuDNN convolutions caching system
...@@ -199,7 +199,7 @@ static std::string shape(cudnnFilterDescriptor_t t, cudnnDataType_t* type) ...@@ -199,7 +199,7 @@ static std::string shape(cudnnFilterDescriptor_t t, cudnnDataType_t* type)
return shape(res, outDims); return shape(res, outDims);
}; };
static std::string shape(cudnnConvolutionDescriptor_t convDesc) static std::string shape(cudnnConvolutionDescriptor_t convDesc, int dataTypecode)
{ {
int nDim; int nDim;
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
...@@ -208,6 +208,9 @@ static std::string shape(cudnnConvolutionDescriptor_t convDesc) ...@@ -208,6 +208,9 @@ static std::string shape(cudnnConvolutionDescriptor_t convDesc)
int padA[5]; int padA[5];
int strideA[5]; int strideA[5];
int dilationA[5]; int dilationA[5];
/* Data type configuration. Format: " -<dtype><precision>" with dtype and precision in {h, f, d},
* h for half (float16), f for float (float32), d for double (float64). */
char data_type_configuration[5];
checkCudnnStatus( checkCudnnStatus(
cudnnGetConvolutionNdDescriptor( convDesc, 5, cudnnGetConvolutionNdDescriptor( convDesc, 5,
...@@ -220,6 +223,27 @@ static std::string shape(cudnnConvolutionDescriptor_t convDesc) ...@@ -220,6 +223,27 @@ static std::string shape(cudnnConvolutionDescriptor_t convDesc)
"error getting convolution description"); "error getting convolution description");
if (PyErr_Occurred()) return ""; if (PyErr_Occurred()) return "";
/* Build data type configuration string. */
data_type_configuration[0] = ' ';
data_type_configuration[1] = '-';
switch (dataTypecode) {
case GA_HALF: data_type_configuration[2] = 'h'; break;
case GA_FLOAT: data_type_configuration[2] = 'f'; break;
case GA_DOUBLE: data_type_configuration[2] = 'd'; break;
default:
PyErr_SetString(PyExc_TypeError, "Unsupported data type in convolution.");
return "";
}
switch (computeType) {
case CUDNN_DATA_HALF: data_type_configuration[3] = 'h'; break;
case CUDNN_DATA_FLOAT: data_type_configuration[3] = 'f'; break;
case CUDNN_DATA_DOUBLE: data_type_configuration[3] = 'd'; break;
default:
PyErr_SetString(PyExc_TypeError, "Unsupported precision in convolution.");
return "";
}
data_type_configuration[4] = '\0';
return (std::string("-mode ") + return (std::string("-mode ") +
((mode == CUDNN_CONVOLUTION) ? "conv" : "cross") + ((mode == CUDNN_CONVOLUTION) ? "conv" : "cross") +
" -pad " + " -pad " +
...@@ -227,7 +251,8 @@ static std::string shape(cudnnConvolutionDescriptor_t convDesc) ...@@ -227,7 +251,8 @@ static std::string shape(cudnnConvolutionDescriptor_t convDesc)
" -subsample " + " -subsample " +
shape(strideA, nDim) + shape(strideA, nDim) +
" -dilation " + " -dilation " +
shape(dilationA, nDim)); shape(dilationA, nDim) +
data_type_configuration);
} }
static bool all_aligned(cudnnDataType_t type, void* in, void* out, void* filter) static bool all_aligned(cudnnDataType_t type, void* in, void* out, void* filter)
...@@ -253,7 +278,7 @@ static std::string dnn_conv_shape(cudnnTensorDescriptor_t inputDesc, PyGpuArrayO ...@@ -253,7 +278,7 @@ static std::string dnn_conv_shape(cudnnTensorDescriptor_t inputDesc, PyGpuArrayO
return ""; return "";
std::string shapeInput = shape(inputDesc); std::string shapeInput = shape(inputDesc);
std::string shapeFilter = shape(filterDesc, &dType); std::string shapeFilter = shape(filterDesc, &dType);
std::string shapeConvDesc = shape(convDesc); std::string shapeConvDesc = shape(convDesc, input->ga.typecode);
if (shapeInput.empty() || shapeFilter.empty() || shapeConvDesc.empty()) if (shapeInput.empty() || shapeFilter.empty() || shapeConvDesc.empty())
return ""; return "";
s << "-g " << groups << " -dim " << shapeInput << " -filt " << s << "-g " << groups << " -dim " << shapeInput << " -filt " <<
......
...@@ -352,7 +352,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -352,7 +352,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
} }
fprintf(stderr, "(using %s%s %s%s%s, ws:%ld, hash:%s)\n", fprintf(stderr, "(using %s%s %s%s%s, ws:%ld, hash:%s)\n",
algorithm_name, algorithm_name,
mathtype == CUDNN_TENSOR_OP_MATH ? "[T]" : "", mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor_op)" : "",
params->choose_time ? "(timed)": "" , params->choose_time ? "(timed)": "" ,
reuse_algo ? "(reused)" : "", reuse_algo ? "(reused)" : "",
use_cached ? "(cache)": "", use_cached ? "(cache)": "",
......
...@@ -170,7 +170,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -170,7 +170,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
char pci_id[16]; char pci_id[16];
gpucontext_property(c->ctx, GA_CTX_PROP_PCIBUSID, pci_id); gpucontext_property(c->ctx, GA_CTX_PROP_PCIBUSID, pci_id);
// check out cache // check out cache
hashkey=dnn_conv_shape(APPLY_SPECIFIC(input), *input, APPLY_SPECIFIC(kerns), kerns, desc, output, groups); hashkey = dnn_conv_shape(APPLY_SPECIFIC(input), *input, APPLY_SPECIFIC(kerns), kerns, desc, output, groups);
if (hashkey.empty()) { if (hashkey.empty()) {
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
...@@ -307,13 +307,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -307,13 +307,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
} }
// NB: This is printed only when algorithm is chosen at runtime. fprintf(stderr, "(using %s%s %s%s%s, ws:%ld, hash:%s)\n",
fprintf(stderr, "(using %s %s%s%s%s, ws:%ld, hash:%s)\n",
algorithm_name, algorithm_name,
mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor_op)" : "",
params->choose_time ? "(timed)": "" , params->choose_time ? "(timed)": "" ,
reuse_algo ? "(reused)" : "", reuse_algo ? "(reused)" : "",
use_cached ? "(cache)": "", use_cached ? "(cache)": "",
mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor op)" : "",
worksize, worksize,
hashkey.c_str() hashkey.c_str()
); );
......
...@@ -297,13 +297,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -297,13 +297,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
} }
// NB: This is printed only when algorithm is chosen at runtime. fprintf(stderr, "(using %s%s %s%s%s, ws:%ld, hash:%s)\n",
fprintf(stderr, "(using %s %s%s%s%s, ws:%ld, hash:%s)\n",
algorithm_name, algorithm_name,
mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor_op)" : "",
params->choose_time ? "(timed)": "" , params->choose_time ? "(timed)": "" ,
reuse_algo ? "(reused)" : "", reuse_algo ? "(reused)" : "",
use_cached ? "(cache)": "", use_cached ? "(cache)": "",
mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor op)" : "",
worksize, worksize,
hashkey.c_str() hashkey.c_str()
); );
......
...@@ -2666,3 +2666,31 @@ class TestDnnConv3DRuntimeAlgorithms(TestDnnConv2DRuntimeAlgorithms): ...@@ -2666,3 +2666,31 @@ class TestDnnConv3DRuntimeAlgorithms(TestDnnConv2DRuntimeAlgorithms):
(1, [(4, 2, 20, 20, 20), (2, 2, 20, 19, 18)]), # cache should be used (1, [(4, 2, 20, 20, 20), (2, 2, 20, 19, 18)]), # cache should be used
(1, [(1, 2, 3, 4, 5), (6, 2, 3, 2, 1)]) (1, [(1, 2, 3, 4, 5), (6, 2, 3, 2, 1)])
] ]
def test_conv_guess_once_with_dtypes():
utt.seed_rng()
inputs_shape = (2, 3, 5, 5)
filters_shape = (2, 3, 40, 4)
border_mode = 'full'
def get_function(dtype, precision):
inputs_val = np.random.random(inputs_shape).astype(dtype)
filters_val = np.random.random(filters_shape).astype(dtype)
inputs_val /= 10
filters_val /= 10
inputs = theano.shared(inputs_val)
filters = theano.shared(filters_val)
conv = dnn.dnn_conv(img=inputs, kerns=filters, border_mode=border_mode, precision=precision,
algo='guess_once', direction_hint='forward!')
return theano.function([], conv)
f_true_half_config = get_function('float16', 'float16')
f_pseudo_half_config = get_function('float16', 'float32')
f_float_config = get_function('float32', 'float32')
f_double_config = get_function('float64', 'float64')
# Let's just see if everything runs without raising any exception.
f_true_half_config()
f_pseudo_half_config()
f_float_config()
f_double_config()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论