提交 ade0510d authored 作者: Boris Fomitchev's avatar Boris Fomitchev

Fixing math_type for F32 and cache update

上级 1bc17311
...@@ -231,7 +231,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -231,7 +231,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return -1; return -1;
} }
// set the 'tensor math ok' flag // set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH); if (input->ga.typecode == GA_HALF)
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
// We don't sync the buffer as we don't care about the values. // We don't sync the buffer as we don't care about the values.
err = cudnnFindConvolutionForwardAlgorithmEx( err = cudnnFindConvolutionForwardAlgorithmEx(
...@@ -265,12 +266,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -265,12 +266,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
#endif #endif
algo = choice.algo; algo = choice.algo;
prev_algo.algo = (int)algo; worksize = choice.memory;
prev_algo.wsSize = worksize = choice.memory;
#if CUDNN_MAJOR >= 7 #if CUDNN_MAJOR >= 7
prev_algo.mathType = mathtype = choice.mathType; if (input->ga.typecode == GA_HALF)
mathtype = choice.mathType;
#endif #endif
} else { } else {
err = cudnnGetConvolutionForwardAlgorithm( err = cudnnGetConvolutionForwardAlgorithm(
params->handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns), params->handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
...@@ -283,9 +283,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -283,9 +283,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
} }
prev_algo.algo = algo;
// no tensor_op returned from Get()
prev_algo.mathType = mathtype = CUDNN_DEFAULT_MATH;
} }
} }
} }
...@@ -334,18 +331,17 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -334,18 +331,17 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
} }
} }
if (params->choose_algo && (!params->choose_once || !reuse_algo)) { if (params->choose_algo && !reuse_algo) {
// algo may have changed due to fallback, we must update it. // save for next time/cache
prev_algo.algo = algo; prev_algo.algo = algo;
// save worksize for next time/cache
prev_algo.wsSize = worksize; prev_algo.wsSize = worksize;
prev_algo.mathType = mathtype;
// Add to the cache if we choose on shape change, or first time if we choose once. // Add to the cache if we choose on shape change, or first time if
dnn_conv_update_cache(hashkey, prev_algo); // we choose once.
} if (!use_cached)
dnn_conv_update_cache(hashkey, prev_algo);
#ifdef DEBUG #ifdef DEBUG
if (params->choose_algo) {
if (0 != theano_enum_to_string_cudnnConvolutionFwdAlgo_t(algo, algorithm_name)) { if (0 != theano_enum_to_string_cudnnConvolutionFwdAlgo_t(algo, algorithm_name)) {
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
...@@ -359,12 +355,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -359,12 +355,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
worksize, worksize,
hashkey.c_str() hashkey.c_str()
); );
}
#endif #endif
if (params->choose_once) { if (params->choose_once)
reuse_algo = 1; reuse_algo = 1;
} } // params->choose_algo && !reuse_algo
{ {
gpudata *workspace = 0; gpudata *workspace = 0;
......
...@@ -194,7 +194,8 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -194,7 +194,8 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
gpudata *tmpmem; gpudata *tmpmem;
// set the 'tensor math ok' flag // set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH); if (im->ga.typecode == GA_HALF)
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL); tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL);
if (tmpmem == NULL) { if (tmpmem == NULL) {
...@@ -229,14 +230,13 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -229,14 +230,13 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return 1; return 1;
} // Else, count is necessarly 1 for current implementation. } // Else, count is necessarly 1 for current implementation.
#endif #endif
algo = choice.algo; algo = choice.algo;
prev_algo.algo = (int)algo; worksize = choice.memory;
prev_algo.wsSize = worksize = choice.memory;
#if CUDNN_MAJOR >= 7 #if CUDNN_MAJOR >= 7
prev_algo.mathType = mathtype = choice.mathType; if (im->ga.typecode == GA_HALF)
mathtype = choice.mathType;
#endif #endif
} else { } else {
err = cudnnGetConvolutionBackwardDataAlgorithm( err = cudnnGetConvolutionBackwardDataAlgorithm(
params->handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output), params->handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output),
...@@ -248,9 +248,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -248,9 +248,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
} }
prev_algo.algo = algo;
// no tensor_op returned from Get()
prev_algo.mathType = mathtype = CUDNN_DEFAULT_MATH;
} }
} }
} }
...@@ -291,18 +288,17 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -291,18 +288,17 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
} }
} // !(reuse_algo || use_cached || params->choose_time) } // !(reuse_algo || use_cached || params->choose_time)
if (params->choose_algo && (!params->choose_once || !reuse_algo)) { if (params->choose_algo && !reuse_algo) {
// algo may have changed due to fallback, we must update it. // save for next time/cache
prev_algo.algo = algo; prev_algo.algo = algo;
// save worksize for next time/cache
prev_algo.wsSize = worksize; prev_algo.wsSize = worksize;
prev_algo.mathType = mathtype;
// Add to the cache // Add to the cache
dnn_conv_update_cache(hashkey, prev_algo); if (!use_cached)
} dnn_conv_update_cache(hashkey, prev_algo);
#ifdef DEBUG #ifdef DEBUG
if (params->choose_algo) {
if (0 != theano_enum_to_string_cudnnConvolutionBwdDataAlgo_t(algo, algorithm_name)) { if (0 != theano_enum_to_string_cudnnConvolutionBwdDataAlgo_t(algo, algorithm_name)) {
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
...@@ -316,13 +312,11 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -316,13 +312,11 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
worksize, worksize,
hashkey.c_str() hashkey.c_str()
); );
}
#endif #endif
if (params->choose_once)
if (params->choose_once) { reuse_algo = 1;
reuse_algo = 1; } // params->choose_algo && !reuse_algo
}
gpudata *workspace = 0; gpudata *workspace = 0;
if (worksize != 0) { if (worksize != 0) {
workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL); workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
......
...@@ -181,7 +181,8 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -181,7 +181,8 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
gpudata *tmpmem; gpudata *tmpmem;
// set the 'tensor math ok' flag // set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH); if (input->ga.typecode == GA_HALF)
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL); tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL);
if (tmpmem == NULL) { if (tmpmem == NULL) {
...@@ -220,12 +221,11 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -220,12 +221,11 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
#endif #endif
algo = choice.algo; algo = choice.algo;
prev_algo.algo = (int)algo; worksize = choice.memory;
prev_algo.wsSize = worksize = choice.memory;
#if CUDNN_MAJOR >= 7 #if CUDNN_MAJOR >= 7
prev_algo.mathType = mathtype = choice.mathType; if (input->ga.typecode == GA_HALF)
mathtype = choice.mathType;
#endif #endif
} else { } else {
err = cudnnGetConvolutionBackwardFilterAlgorithm( err = cudnnGetConvolutionBackwardFilterAlgorithm(
params->handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), params->handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output),
...@@ -238,9 +238,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -238,9 +238,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
} }
prev_algo.algo = algo;
// no tensor_op returned from Get()
prev_algo.mathType = mathtype = CUDNN_DEFAULT_MATH;
} }
} }
} /* choose_algo */ } /* choose_algo */
...@@ -281,18 +278,17 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -281,18 +278,17 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
} }
} }
if (params->choose_algo && (!params->choose_once || !reuse_algo)) { if (params->choose_algo && !reuse_algo) {
// algo may have changed due to fallback, we must update it. // save for next time/cache
prev_algo.algo = algo; prev_algo.algo = algo;
// save worksize for next time/cache
prev_algo.wsSize = worksize; prev_algo.wsSize = worksize;
prev_algo.mathType = mathtype;
// Add to the cache // Add to the cache if we choose on shape change, or first time if
dnn_conv_update_cache(hashkey, prev_algo); // we choose once.
} if (!use_cached)
dnn_conv_update_cache(hashkey, prev_algo);
#ifdef DEBUG #ifdef DEBUG
if (params->choose_algo) {
if (0 != theano_enum_to_string_cudnnConvolutionBwdFilterAlgo_t(algo, algorithm_name)) { if (0 != theano_enum_to_string_cudnnConvolutionBwdFilterAlgo_t(algo, algorithm_name)) {
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
...@@ -306,13 +302,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -306,13 +302,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
worksize, worksize,
hashkey.c_str() hashkey.c_str()
); );
}
#endif #endif
if (params->choose_once) { if (params->choose_once)
reuse_algo = 1; reuse_algo = 1;
} } // params->choose_algo && !reuse_algo
gpudata *workspace = 0; gpudata *workspace = 0;
if (worksize != 0) { if (worksize != 0) {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论