提交 c55c15ad authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6359 from borisfom/f32-hmma-fix

Fixing math_type for F32 and cache update
...@@ -231,7 +231,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -231,7 +231,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return -1; return -1;
} }
// set the 'tensor math ok' flag // set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH); if (input->ga.typecode == GA_HALF)
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
// We don't sync the buffer as we don't care about the values. // We don't sync the buffer as we don't care about the values.
err = cudnnFindConvolutionForwardAlgorithmEx( err = cudnnFindConvolutionForwardAlgorithmEx(
...@@ -265,12 +266,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -265,12 +266,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
#endif #endif
algo = choice.algo; algo = choice.algo;
prev_algo.algo = (int)algo; worksize = choice.memory;
prev_algo.wsSize = worksize = choice.memory;
#if CUDNN_MAJOR >= 7 #if CUDNN_MAJOR >= 7
prev_algo.mathType = mathtype = choice.mathType; if (input->ga.typecode == GA_HALF)
mathtype = choice.mathType;
#endif #endif
} else { } else {
err = cudnnGetConvolutionForwardAlgorithm( err = cudnnGetConvolutionForwardAlgorithm(
params->handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns), params->handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
...@@ -283,9 +283,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -283,9 +283,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
} }
prev_algo.algo = algo;
// no tensor_op returned from Get()
prev_algo.mathType = mathtype = CUDNN_DEFAULT_MATH;
} }
} }
} }
...@@ -334,18 +331,9 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -334,18 +331,9 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
} }
} }
if (params->choose_algo && (!params->choose_once || !reuse_algo)) { if (params->choose_algo) {
// algo may have changed due to fallback, we must update it.
prev_algo.algo = algo;
// save worksize for next time/cache
prev_algo.wsSize = worksize;
// Add to the cache if we choose on shape change, or first time if we choose once.
dnn_conv_update_cache(hashkey, prev_algo);
}
#ifdef DEBUG #ifdef DEBUG
if (params->choose_algo) {
if (0 != theano_enum_to_string_cudnnConvolutionFwdAlgo_t(algo, algorithm_name)) { if (0 != theano_enum_to_string_cudnnConvolutionFwdAlgo_t(algo, algorithm_name)) {
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
...@@ -359,12 +347,23 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -359,12 +347,23 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
worksize, worksize,
hashkey.c_str() hashkey.c_str()
); );
}
#endif #endif
if (params->choose_once) { if (!reuse_algo) {
reuse_algo = 1; // save for next time/cache
} prev_algo.algo = algo;
prev_algo.wsSize = worksize;
prev_algo.mathType = mathtype;
// Add to the cache if we choose on shape change, or first time if
// we choose once.
if (!use_cached)
dnn_conv_update_cache(hashkey, prev_algo);
if (params->choose_once)
reuse_algo = 1;
}
} // params->choose_algo
{ {
gpudata *workspace = 0; gpudata *workspace = 0;
......
...@@ -194,7 +194,8 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -194,7 +194,8 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
gpudata *tmpmem; gpudata *tmpmem;
// set the 'tensor math ok' flag // set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH); if (im->ga.typecode == GA_HALF)
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL); tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL);
if (tmpmem == NULL) { if (tmpmem == NULL) {
...@@ -231,12 +232,11 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -231,12 +232,11 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
#endif #endif
algo = choice.algo; algo = choice.algo;
prev_algo.algo = (int)algo; worksize = choice.memory;
prev_algo.wsSize = worksize = choice.memory;
#if CUDNN_MAJOR >= 7 #if CUDNN_MAJOR >= 7
prev_algo.mathType = mathtype = choice.mathType; if (im->ga.typecode == GA_HALF)
mathtype = choice.mathType;
#endif #endif
} else { } else {
err = cudnnGetConvolutionBackwardDataAlgorithm( err = cudnnGetConvolutionBackwardDataAlgorithm(
params->handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output), params->handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output),
...@@ -248,9 +248,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -248,9 +248,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
} }
prev_algo.algo = algo;
// no tensor_op returned from Get()
prev_algo.mathType = mathtype = CUDNN_DEFAULT_MATH;
} }
} }
} }
...@@ -291,18 +288,9 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -291,18 +288,9 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
} }
} // !(reuse_algo || use_cached || params->choose_time) } // !(reuse_algo || use_cached || params->choose_time)
if (params->choose_algo && (!params->choose_once || !reuse_algo)) { if (params->choose_algo) {
// algo may have changed due to fallback, we must update it.
prev_algo.algo = algo;
// save worksize for next time/cache
prev_algo.wsSize = worksize;
// Add to the cache
dnn_conv_update_cache(hashkey, prev_algo);
}
#ifdef DEBUG #ifdef DEBUG
if (params->choose_algo) {
if (0 != theano_enum_to_string_cudnnConvolutionBwdDataAlgo_t(algo, algorithm_name)) { if (0 != theano_enum_to_string_cudnnConvolutionBwdDataAlgo_t(algo, algorithm_name)) {
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
...@@ -316,12 +304,23 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -316,12 +304,23 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
worksize, worksize,
hashkey.c_str() hashkey.c_str()
); );
}
#endif #endif
if (params->choose_once) { if (!reuse_algo) {
reuse_algo = 1; // save for next time/cache
} prev_algo.algo = algo;
prev_algo.wsSize = worksize;
prev_algo.mathType = mathtype;
// Add to the cache
if (!use_cached)
dnn_conv_update_cache(hashkey, prev_algo);
if (params->choose_once)
reuse_algo = 1;
}
} // params->choose_algo
gpudata *workspace = 0; gpudata *workspace = 0;
if (worksize != 0) { if (worksize != 0) {
......
...@@ -181,7 +181,8 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -181,7 +181,8 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
gpudata *tmpmem; gpudata *tmpmem;
// set the 'tensor math ok' flag // set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH); if (input->ga.typecode == GA_HALF)
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL); tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL);
if (tmpmem == NULL) { if (tmpmem == NULL) {
...@@ -220,12 +221,11 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -220,12 +221,11 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
#endif #endif
algo = choice.algo; algo = choice.algo;
prev_algo.algo = (int)algo; worksize = choice.memory;
prev_algo.wsSize = worksize = choice.memory;
#if CUDNN_MAJOR >= 7 #if CUDNN_MAJOR >= 7
prev_algo.mathType = mathtype = choice.mathType; if (input->ga.typecode == GA_HALF)
mathtype = choice.mathType;
#endif #endif
} else { } else {
err = cudnnGetConvolutionBackwardFilterAlgorithm( err = cudnnGetConvolutionBackwardFilterAlgorithm(
params->handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), params->handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output),
...@@ -238,9 +238,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -238,9 +238,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
} }
prev_algo.algo = algo;
// no tensor_op returned from Get()
prev_algo.mathType = mathtype = CUDNN_DEFAULT_MATH;
} }
} }
} /* choose_algo */ } /* choose_algo */
...@@ -281,18 +278,9 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -281,18 +278,9 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
} }
} }
if (params->choose_algo && (!params->choose_once || !reuse_algo)) { if (params->choose_algo) {
// algo may have changed due to fallback, we must update it.
prev_algo.algo = algo;
// save worksize for next time/cache
prev_algo.wsSize = worksize;
// Add to the cache
dnn_conv_update_cache(hashkey, prev_algo);
}
#ifdef DEBUG #ifdef DEBUG
if (params->choose_algo) {
if (0 != theano_enum_to_string_cudnnConvolutionBwdFilterAlgo_t(algo, algorithm_name)) { if (0 != theano_enum_to_string_cudnnConvolutionBwdFilterAlgo_t(algo, algorithm_name)) {
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
...@@ -306,12 +294,23 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -306,12 +294,23 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
worksize, worksize,
hashkey.c_str() hashkey.c_str()
); );
}
#endif #endif
if (params->choose_once) { if (!reuse_algo) {
reuse_algo = 1; // save for next time/cache
} prev_algo.algo = algo;
prev_algo.wsSize = worksize;
prev_algo.mathType = mathtype;
// Add to the cache if we choose on shape change, or first time if
// we choose once.
if (!use_cached)
dnn_conv_update_cache(hashkey, prev_algo);
if (params->choose_once)
reuse_algo = 1;
}
} // params->choose_algo
gpudata *workspace = 0; gpudata *workspace = 0;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论