提交 ade0510d authored 作者: Boris Fomitchev's avatar Boris Fomitchev

Fixing math_type for F32 and cache update

上级 1bc17311
......@@ -231,7 +231,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return -1;
}
// set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
if (input->ga.typecode == GA_HALF)
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
// We don't sync the buffer as we don't care about the values.
err = cudnnFindConvolutionForwardAlgorithmEx(
......@@ -265,12 +266,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
#endif
algo = choice.algo;
prev_algo.algo = (int)algo;
prev_algo.wsSize = worksize = choice.memory;
worksize = choice.memory;
#if CUDNN_MAJOR >= 7
prev_algo.mathType = mathtype = choice.mathType;
if (input->ga.typecode == GA_HALF)
mathtype = choice.mathType;
#endif
} else {
err = cudnnGetConvolutionForwardAlgorithm(
params->handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
......@@ -283,9 +283,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
cuda_exit(c->ctx);
return 1;
}
prev_algo.algo = algo;
// no tensor_op returned from Get()
prev_algo.mathType = mathtype = CUDNN_DEFAULT_MATH;
}
}
}
......@@ -334,18 +331,17 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
}
}
if (params->choose_algo && (!params->choose_once || !reuse_algo)) {
// algo may have changed due to fallback, we must update it.
if (params->choose_algo && !reuse_algo) {
// save for next time/cache
prev_algo.algo = algo;
// save worksize for next time/cache
prev_algo.wsSize = worksize;
// Add to the cache if we choose on shape change, or first time if we choose once.
dnn_conv_update_cache(hashkey, prev_algo);
}
prev_algo.mathType = mathtype;
// Add to the cache if we choose on shape change, or first time if
// we choose once.
if (!use_cached)
dnn_conv_update_cache(hashkey, prev_algo);
#ifdef DEBUG
if (params->choose_algo) {
if (0 != theano_enum_to_string_cudnnConvolutionFwdAlgo_t(algo, algorithm_name)) {
cuda_exit(c->ctx);
return 1;
......@@ -359,12 +355,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
worksize,
hashkey.c_str()
);
}
#endif
if (params->choose_once) {
reuse_algo = 1;
}
if (params->choose_once)
reuse_algo = 1;
} // params->choose_algo && !reuse_algo
{
gpudata *workspace = 0;
......
......@@ -194,7 +194,8 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
gpudata *tmpmem;
// set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
if (im->ga.typecode == GA_HALF)
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL);
if (tmpmem == NULL) {
......@@ -229,14 +230,13 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return 1;
} // Else, count is necessarly 1 for current implementation.
#endif
algo = choice.algo;
prev_algo.algo = (int)algo;
prev_algo.wsSize = worksize = choice.memory;
worksize = choice.memory;
#if CUDNN_MAJOR >= 7
prev_algo.mathType = mathtype = choice.mathType;
if (im->ga.typecode == GA_HALF)
mathtype = choice.mathType;
#endif
} else {
err = cudnnGetConvolutionBackwardDataAlgorithm(
params->handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output),
......@@ -248,9 +248,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_exit(c->ctx);
return 1;
}
prev_algo.algo = algo;
// no tensor_op returned from Get()
prev_algo.mathType = mathtype = CUDNN_DEFAULT_MATH;
}
}
}
......@@ -291,18 +288,17 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
} // !(reuse_algo || use_cached || params->choose_time)
if (params->choose_algo && (!params->choose_once || !reuse_algo)) {
// algo may have changed due to fallback, we must update it.
if (params->choose_algo && !reuse_algo) {
// save for next time/cache
prev_algo.algo = algo;
// save worksize for next time/cache
prev_algo.wsSize = worksize;
prev_algo.mathType = mathtype;
// Add to the cache
dnn_conv_update_cache(hashkey, prev_algo);
}
if (!use_cached)
dnn_conv_update_cache(hashkey, prev_algo);
#ifdef DEBUG
if (params->choose_algo) {
if (0 != theano_enum_to_string_cudnnConvolutionBwdDataAlgo_t(algo, algorithm_name)) {
cuda_exit(c->ctx);
return 1;
......@@ -316,13 +312,11 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
worksize,
hashkey.c_str()
);
}
#endif
if (params->choose_once) {
reuse_algo = 1;
}
if (params->choose_once)
reuse_algo = 1;
} // params->choose_algo && !reuse_algo
gpudata *workspace = 0;
if (worksize != 0) {
workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
......
......@@ -181,7 +181,8 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
gpudata *tmpmem;
// set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
if (input->ga.typecode == GA_HALF)
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL);
if (tmpmem == NULL) {
......@@ -220,12 +221,11 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
#endif
algo = choice.algo;
prev_algo.algo = (int)algo;
prev_algo.wsSize = worksize = choice.memory;
worksize = choice.memory;
#if CUDNN_MAJOR >= 7
prev_algo.mathType = mathtype = choice.mathType;
if (input->ga.typecode == GA_HALF)
mathtype = choice.mathType;
#endif
} else {
err = cudnnGetConvolutionBackwardFilterAlgorithm(
params->handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output),
......@@ -238,9 +238,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_exit(c->ctx);
return 1;
}
prev_algo.algo = algo;
// no tensor_op returned from Get()
prev_algo.mathType = mathtype = CUDNN_DEFAULT_MATH;
}
}
} /* choose_algo */
......@@ -281,18 +278,17 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
}
}
if (params->choose_algo && (!params->choose_once || !reuse_algo)) {
// algo may have changed due to fallback, we must update it.
if (params->choose_algo && !reuse_algo) {
// save for next time/cache
prev_algo.algo = algo;
// save worksize for next time/cache
prev_algo.wsSize = worksize;
// Add to the cache
dnn_conv_update_cache(hashkey, prev_algo);
}
prev_algo.mathType = mathtype;
// Add to the cache if we choose on shape change, or first time if
// we choose once.
if (!use_cached)
dnn_conv_update_cache(hashkey, prev_algo);
#ifdef DEBUG
if (params->choose_algo) {
if (0 != theano_enum_to_string_cudnnConvolutionBwdFilterAlgo_t(algo, algorithm_name)) {
cuda_exit(c->ctx);
return 1;
......@@ -306,13 +302,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
worksize,
hashkey.c_str()
);
}
#endif
if (params->choose_once) {
reuse_algo = 1;
}
if (params->choose_once)
reuse_algo = 1;
} // params->choose_algo && !reuse_algo
gpudata *workspace = 0;
if (worksize != 0) {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论