提交 6384da8a authored 作者: notoraptor's avatar notoraptor

Just standardize `c_set_math_type_for_conv()`.

上级 c5944c7d
...@@ -37,7 +37,7 @@ static int c_set_math_type_for_conv(cudnnConvolutionDescriptor_t desc, cudnnMath ...@@ -37,7 +37,7 @@ static int c_set_math_type_for_conv(cudnnConvolutionDescriptor_t desc, cudnnMath
return -1; return -1;
} }
#endif #endif
return 1; return 0;
} }
#section init_code_struct #section init_code_struct
......
...@@ -191,7 +191,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -191,7 +191,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
size_t maxfree = c_get_largest_free_block_size(c); size_t maxfree = c_get_largest_free_block_size(c);
if (PyErr_Occurred()) return 1; if (PyErr_Occurred()) return 1;
if (params->choose_algo) { if (params->choose_algo) {
if (!reuse_algo) { if (!reuse_algo) {
...@@ -286,7 +286,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -286,7 +286,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
} }
} }
} }
if (c_set_math_type_for_conv(desc, mathtype) == -1 || if (c_set_math_type_for_conv(desc, mathtype) == -1 ||
dnn_conv_fwd_fallback(&algo, input, kerns, desc) != 0) { dnn_conv_fwd_fallback(&algo, input, kerns, desc) != 0) {
cuda_exit(c->ctx); cuda_exit(c->ctx);
......
...@@ -192,7 +192,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -192,7 +192,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
// set the 'tensor math ok' flag // set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH); c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL); tmpmem = gpudata_alloc(c->ctx, maxfree, NULL, 0, NULL);
if (tmpmem == NULL) { if (tmpmem == NULL) {
PyErr_SetString(PyExc_MemoryError, "Could not allocate working GPU memory"); PyErr_SetString(PyExc_MemoryError, "Could not allocate working GPU memory");
...@@ -251,7 +251,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -251,7 +251,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
} }
} }
} }
if (c_set_math_type_for_conv(desc, mathtype) == -1 || if (c_set_math_type_for_conv(desc, mathtype) == -1 ||
dnn_conv_gi_fallback(&algo, *input, kerns, desc) != 0) { dnn_conv_gi_fallback(&algo, *input, kerns, desc) != 0) {
cuda_exit(c->ctx); cuda_exit(c->ctx);
...@@ -320,7 +320,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -320,7 +320,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
if (params->choose_once) { if (params->choose_once) {
reuse_algo = 1; reuse_algo = 1;
} }
gpudata *workspace = 0; gpudata *workspace = 0;
if (worksize != 0) { if (worksize != 0) {
workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL); workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
......
...@@ -176,7 +176,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -176,7 +176,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
int count; int count;
cudnnConvolutionBwdFilterAlgoPerf_t choice; cudnnConvolutionBwdFilterAlgoPerf_t choice;
gpudata *tmpmem; gpudata *tmpmem;
// set the 'tensor math ok' flag // set the 'tensor math ok' flag
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH); c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
...@@ -241,7 +241,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -241,7 +241,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
} }
} }
} /* choose_algo */ } /* choose_algo */
if (c_set_math_type_for_conv(desc, mathtype) == -1 || if (c_set_math_type_for_conv(desc, mathtype) == -1 ||
dnn_conv_gw_fallback(&algo, input, *kerns, desc) != 0) { dnn_conv_gw_fallback(&algo, input, *kerns, desc) != 0) {
cuda_exit(c->ctx); cuda_exit(c->ctx);
...@@ -310,9 +310,9 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -310,9 +310,9 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
if (params->choose_once) { if (params->choose_once) {
reuse_algo = 1; reuse_algo = 1;
} }
gpudata *workspace = 0; gpudata *workspace = 0;
if (worksize != 0) { if (worksize != 0) {
workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL); workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) { if (workspace == NULL) {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论