提交 c55c15ad authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6359 from borisfom/f32-hmma-fix

Fixing math_type for F32 and cache update
......@@ -231,7 +231,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return -1;
}
// set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
if (input->ga.typecode == GA_HALF)
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
// We don't sync the buffer as we don't care about the values.
err = cudnnFindConvolutionForwardAlgorithmEx(
......@@ -265,12 +266,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
#endif
algo = choice.algo;
prev_algo.algo = (int)algo;
prev_algo.wsSize = worksize = choice.memory;
worksize = choice.memory;
#if CUDNN_MAJOR >= 7
prev_algo.mathType = mathtype = choice.mathType;
if (input->ga.typecode == GA_HALF)
mathtype = choice.mathType;
#endif
} else {
err = cudnnGetConvolutionForwardAlgorithm(
params->handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
......@@ -283,9 +283,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
cuda_exit(c->ctx);
return 1;
}
prev_algo.algo = algo;
// no tensor_op returned from Get()
prev_algo.mathType = mathtype = CUDNN_DEFAULT_MATH;
}
}
}
......@@ -334,18 +331,9 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
}
}
if (params->choose_algo && (!params->choose_once || !reuse_algo)) {
// algo may have changed due to fallback, we must update it.
prev_algo.algo = algo;
// save worksize for next time/cache
prev_algo.wsSize = worksize;
// Add to the cache if we choose on shape change, or first time if we choose once.
dnn_conv_update_cache(hashkey, prev_algo);
}
if (params->choose_algo) {
#ifdef DEBUG
if (params->choose_algo) {
if (0 != theano_enum_to_string_cudnnConvolutionFwdAlgo_t(algo, algorithm_name)) {
cuda_exit(c->ctx);
return 1;
......@@ -359,12 +347,23 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
worksize,
hashkey.c_str()
);
}
#endif
if (params->choose_once) {
reuse_algo = 1;
}
if (!reuse_algo) {
// save for next time/cache
prev_algo.algo = algo;
prev_algo.wsSize = worksize;
prev_algo.mathType = mathtype;
// Add to the cache if we choose on shape change, or first time if
// we choose once.
if (!use_cached)
dnn_conv_update_cache(hashkey, prev_algo);
if (params->choose_once)
reuse_algo = 1;
}
} // params->choose_algo
{
gpudata *workspace = 0;
......
......@@ -194,7 +194,8 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
gpudata *tmpmem;
// set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
if (im->ga.typecode == GA_HALF)
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL);
if (tmpmem == NULL) {
......@@ -231,12 +232,11 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
#endif
algo = choice.algo;
prev_algo.algo = (int)algo;
prev_algo.wsSize = worksize = choice.memory;
worksize = choice.memory;
#if CUDNN_MAJOR >= 7
prev_algo.mathType = mathtype = choice.mathType;
if (im->ga.typecode == GA_HALF)
mathtype = choice.mathType;
#endif
} else {
err = cudnnGetConvolutionBackwardDataAlgorithm(
params->handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output),
......@@ -248,9 +248,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_exit(c->ctx);
return 1;
}
prev_algo.algo = algo;
// no tensor_op returned from Get()
prev_algo.mathType = mathtype = CUDNN_DEFAULT_MATH;
}
}
}
......@@ -291,18 +288,9 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
} // !(reuse_algo || use_cached || params->choose_time)
if (params->choose_algo && (!params->choose_once || !reuse_algo)) {
// algo may have changed due to fallback, we must update it.
prev_algo.algo = algo;
// save worksize for next time/cache
prev_algo.wsSize = worksize;
// Add to the cache
dnn_conv_update_cache(hashkey, prev_algo);
}
if (params->choose_algo) {
#ifdef DEBUG
if (params->choose_algo) {
if (0 != theano_enum_to_string_cudnnConvolutionBwdDataAlgo_t(algo, algorithm_name)) {
cuda_exit(c->ctx);
return 1;
......@@ -316,12 +304,23 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
worksize,
hashkey.c_str()
);
}
#endif
if (params->choose_once) {
reuse_algo = 1;
}
if (!reuse_algo) {
// save for next time/cache
prev_algo.algo = algo;
prev_algo.wsSize = worksize;
prev_algo.mathType = mathtype;
// Add to the cache
if (!use_cached)
dnn_conv_update_cache(hashkey, prev_algo);
if (params->choose_once)
reuse_algo = 1;
}
} // params->choose_algo
gpudata *workspace = 0;
if (worksize != 0) {
......
......@@ -181,7 +181,8 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
gpudata *tmpmem;
// set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
if (input->ga.typecode == GA_HALF)
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL);
if (tmpmem == NULL) {
......@@ -220,12 +221,11 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
#endif
algo = choice.algo;
prev_algo.algo = (int)algo;
prev_algo.wsSize = worksize = choice.memory;
worksize = choice.memory;
#if CUDNN_MAJOR >= 7
prev_algo.mathType = mathtype = choice.mathType;
if (input->ga.typecode == GA_HALF)
mathtype = choice.mathType;
#endif
} else {
err = cudnnGetConvolutionBackwardFilterAlgorithm(
params->handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output),
......@@ -238,9 +238,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_exit(c->ctx);
return 1;
}
prev_algo.algo = algo;
// no tensor_op returned from Get()
prev_algo.mathType = mathtype = CUDNN_DEFAULT_MATH;
}
}
} /* choose_algo */
......@@ -281,18 +278,9 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
}
}
if (params->choose_algo && (!params->choose_once || !reuse_algo)) {
// algo may have changed due to fallback, we must update it.
prev_algo.algo = algo;
// save worksize for next time/cache
prev_algo.wsSize = worksize;
// Add to the cache
dnn_conv_update_cache(hashkey, prev_algo);
}
if (params->choose_algo) {
#ifdef DEBUG
if (params->choose_algo) {
if (0 != theano_enum_to_string_cudnnConvolutionBwdFilterAlgo_t(algo, algorithm_name)) {
cuda_exit(c->ctx);
return 1;
......@@ -306,12 +294,23 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
worksize,
hashkey.c_str()
);
}
#endif
if (params->choose_once) {
reuse_algo = 1;
}
if (!reuse_algo) {
// save for next time/cache
prev_algo.algo = algo;
prev_algo.wsSize = worksize;
prev_algo.mathType = mathtype;
// Add to the cache if we choose on shape change, or first time if
// we choose once.
if (!use_cached)
dnn_conv_update_cache(hashkey, prev_algo);
if (params->choose_once)
reuse_algo = 1;
}
} // params->choose_algo
gpudata *workspace = 0;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论