Fix cuDNN conv problem related to runtime algorithms

with different data type configurations.

Fix cuDNN conv problem related to runtime algorithms
bcff31c9 · notoraptor · a66b878f · bcff31c9 · bcff31c9 · bcff31c9
--- a/theano/gpuarray/c_code/dnn_conv_base.c
+++ b/theano/gpuarray/c_code/dnn_conv_base.c
@@ -199,7 +199,7 @@ static std::string shape(cudnnFilterDescriptor_t t, cudnnDataType_t* type)
    return shape(res, outDims);
 };

-static std::string shape(cudnnConvolutionDescriptor_t convDesc)
+static std::string shape(cudnnConvolutionDescriptor_t convDesc, int dataTypecode)
 {
    int nDim;
    cudnnConvolutionMode_t mode;
@@ -208,6 +208,9 @@ static std::string shape(cudnnConvolutionDescriptor_t convDesc)
    int                                 padA[5];
    int                                 strideA[5];
    int                                 dilationA[5];
+    /* Data type configuration. Format: " -<dtype><precision>" with dtype and precision in {h, f, d},
+     * h for half (float16), f for float (float32), d for double (float64). */
+    char data_type_configuration[5];

    checkCudnnStatus(
        cudnnGetConvolutionNdDescriptor( convDesc, 5,
@@ -220,6 +223,27 @@ static std::string shape(cudnnConvolutionDescriptor_t convDesc)
        "error getting convolution description");
    if (PyErr_Occurred()) return "";

+    /* Build data type configuration string. */
+    data_type_configuration[0] = ' ';
+    data_type_configuration[1] = '-';
+    switch (dataTypecode) {
+        case GA_HALF: data_type_configuration[2] = 'h'; break;
+        case GA_FLOAT: data_type_configuration[2] = 'f'; break;
+        case GA_DOUBLE: data_type_configuration[2] = 'd'; break;
+        default:
+            PyErr_SetString(PyExc_TypeError, "Unsupported data type in convolution.");
+            return "";
+    }
+    switch (computeType) {
+        case CUDNN_DATA_HALF: data_type_configuration[3] = 'h'; break;
+        case CUDNN_DATA_FLOAT: data_type_configuration[3] = 'f'; break;
+        case CUDNN_DATA_DOUBLE: data_type_configuration[3] = 'd'; break;
+        default:
+            PyErr_SetString(PyExc_TypeError, "Unsupported precision in convolution.");
+            return "";
+    }
+    data_type_configuration[4] = '\0';
+
    return (std::string("-mode ") +
            ((mode == CUDNN_CONVOLUTION) ? "conv" : "cross") +
            " -pad " +
@@ -227,7 +251,8 @@ static std::string shape(cudnnConvolutionDescriptor_t convDesc)
            " -subsample " +
            shape(strideA, nDim) +
            " -dilation " +
-            shape(dilationA, nDim));
+            shape(dilationA, nDim) +
+            data_type_configuration);
 }

 static bool all_aligned(cudnnDataType_t type, void* in, void* out, void* filter)
@@ -253,7 +278,7 @@ static std::string dnn_conv_shape(cudnnTensorDescriptor_t inputDesc, PyGpuArrayO
        return "";
    std::string shapeInput = shape(inputDesc);
    std::string shapeFilter = shape(filterDesc, &dType);
-    std::string shapeConvDesc = shape(convDesc);
+    std::string shapeConvDesc = shape(convDesc, input->ga.typecode);
    if (shapeInput.empty() || shapeFilter.empty() || shapeConvDesc.empty())
        return "";
    s << "-g " << groups << " -dim " << shapeInput << " -filt " <<

--- a/theano/gpuarray/c_code/dnn_fwd.c
+++ b/theano/gpuarray/c_code/dnn_fwd.c
@@ -352,7 +352,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
    }
    fprintf(stderr, "(using %s%s %s%s%s, ws:%ld, hash:%s)\n",
            algorithm_name,
-            mathtype == CUDNN_TENSOR_OP_MATH ? "[T]" : "",
+            mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor_op)" : "",
            params->choose_time ? "(timed)": "" ,
            reuse_algo ? "(reused)" : "",
            use_cached ? "(cache)": "",

--- a/theano/gpuarray/c_code/dnn_gi.c
+++ b/theano/gpuarray/c_code/dnn_gi.c
@@ -170,7 +170,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
      char pci_id[16];
      gpucontext_property(c->ctx, GA_CTX_PROP_PCIBUSID, pci_id);
      // check out cache
-      hashkey=dnn_conv_shape(APPLY_SPECIFIC(input), *input, APPLY_SPECIFIC(kerns), kerns, desc, output, groups);
+      hashkey = dnn_conv_shape(APPLY_SPECIFIC(input), *input, APPLY_SPECIFIC(kerns), kerns, desc, output, groups);
      if (hashkey.empty()) {
        cuda_exit(c->ctx);
        return 1;
@@ -307,13 +307,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
        cuda_exit(c->ctx);
        return 1;
    }
-    // NB: This is printed only when algorithm is chosen at runtime.
-    fprintf(stderr, "(using %s %s%s%s%s, ws:%ld, hash:%s)\n",
+    fprintf(stderr, "(using %s%s %s%s%s, ws:%ld, hash:%s)\n",
            algorithm_name,
+            mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor_op)" : "",
            params->choose_time ? "(timed)": "" ,
            reuse_algo ? "(reused)" : "",
            use_cached ? "(cache)": "",
-            mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor op)" : "",
            worksize,
            hashkey.c_str()
      );

--- a/theano/gpuarray/c_code/dnn_gw.c
+++ b/theano/gpuarray/c_code/dnn_gw.c
@@ -297,13 +297,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
      cuda_exit(c->ctx);
      return 1;
    }
-    // NB: This is printed only when algorithm is chosen at runtime.
-    fprintf(stderr, "(using %s %s%s%s%s, ws:%ld, hash:%s)\n",
+    fprintf(stderr, "(using %s%s %s%s%s, ws:%ld, hash:%s)\n",
            algorithm_name,
+            mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor_op)" : "",
            params->choose_time ? "(timed)": "" ,
            reuse_algo ? "(reused)" : "",
            use_cached ? "(cache)": "",
-            mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor op)" : "",
            worksize,
            hashkey.c_str()
     );