Merge pull request #6354 from notoraptor/fix-conv-runtime-algos-with-different-dtypes

Add data type configuration to algorithms hash for cuDNN convolutions caching system

Merge pull request #6354 from notoraptor/fix-conv-runtime-algos-with-different-dtypes
1bc17311 · Frédéric Bastien · GitHub · 078bdfb1 · bcff31c9 · 1bc17311
--- a/theano/gpuarray/c_code/dnn_conv_base.c
+++ b/theano/gpuarray/c_code/dnn_conv_base.c
@@ -199,7 +199,7 @@ static std::string shape(cudnnFilterDescriptor_t t, cudnnDataType_t* type)
    return shape(res, outDims);
 };
-static std::string shape(cudnnConvolutionDescriptor_t convDesc)
+static std::string shape(cudnnConvolutionDescriptor_t convDesc, int dataTypecode)
 {
    int nDim;
    cudnnConvolutionMode_t mode;
@@ -208,6 +208,9 @@ static std::string shape(cudnnConvolutionDescriptor_t convDesc)
    int                                 padA[5];
    int                                 strideA[5];
    int                                 dilationA[5];
+    /* Data type configuration. Format: " -<dtype><precision>" with dtype and precision in {h, f, d},
+     * h for half (float16), f for float (float32), d for double (float64). */
+    char data_type_configuration[5];
    checkCudnnStatus(
        cudnnGetConvolutionNdDescriptor( convDesc, 5,
@@ -220,6 +223,27 @@ static std::string shape(cudnnConvolutionDescriptor_t convDesc)
        "error getting convolution description");
    if (PyErr_Occurred()) return "";
+    /* Build data type configuration string. */
+    data_type_configuration[0] = ' ';
+    data_type_configuration[1] = '-';
+    switch (dataTypecode) {
+        case GA_HALF: data_type_configuration[2] = 'h'; break;
+        case GA_FLOAT: data_type_configuration[2] = 'f'; break;
+        case GA_DOUBLE: data_type_configuration[2] = 'd'; break;
+        default:
+            PyErr_SetString(PyExc_TypeError, "Unsupported data type in convolution.");
+            return "";
+    }
+    switch (computeType) {
+        case CUDNN_DATA_HALF: data_type_configuration[3] = 'h'; break;
+        case CUDNN_DATA_FLOAT: data_type_configuration[3] = 'f'; break;
+        case CUDNN_DATA_DOUBLE: data_type_configuration[3] = 'd'; break;
+        default:
+            PyErr_SetString(PyExc_TypeError, "Unsupported precision in convolution.");
+            return "";
+    }
+    data_type_configuration[4] = '\0';
    return (std::string("-mode ") +
            ((mode == CUDNN_CONVOLUTION) ? "conv" : "cross") +
            " -pad " +
@@ -227,7 +251,8 @@ static std::string shape(cudnnConvolutionDescriptor_t convDesc)
            " -subsample " +
            shape(strideA, nDim) +
            " -dilation " +
-            shape(dilationA, nDim));
+            shape(dilationA, nDim) +
+            data_type_configuration);
 }
 static bool all_aligned(cudnnDataType_t type, void* in, void* out, void* filter)
@@ -253,7 +278,7 @@ static std::string dnn_conv_shape(cudnnTensorDescriptor_t inputDesc, PyGpuArrayO
        return "";
    std::string shapeInput = shape(inputDesc);
    std::string shapeFilter = shape(filterDesc, &dType);
-    std::string shapeConvDesc = shape(convDesc);
+    std::string shapeConvDesc = shape(convDesc, input->ga.typecode);
    if (shapeInput.empty() || shapeFilter.empty() || shapeConvDesc.empty())
        return "";
    s << "-g " << groups << " -dim " << shapeInput << " -filt " <<

--- a/theano/gpuarray/c_code/dnn_fwd.c
+++ b/theano/gpuarray/c_code/dnn_fwd.c
@@ -352,7 +352,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
    }
    fprintf(stderr, "(using %s%s %s%s%s, ws:%ld, hash:%s)\n",
            algorithm_name,
-            mathtype == CUDNN_TENSOR_OP_MATH ? "[T]" : "",
+            mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor_op)" : "",
            params->choose_time ? "(timed)": "" ,
            reuse_algo ? "(reused)" : "",
            use_cached ? "(cache)": "",

--- a/theano/gpuarray/c_code/dnn_gi.c
+++ b/theano/gpuarray/c_code/dnn_gi.c
@@ -170,7 +170,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
      char pci_id[16];
      gpucontext_property(c->ctx, GA_CTX_PROP_PCIBUSID, pci_id);
      // check out cache
-      hashkey=dnn_conv_shape(APPLY_SPECIFIC(input), *input, APPLY_SPECIFIC(kerns), kerns, desc, output, groups);
+      hashkey = dnn_conv_shape(APPLY_SPECIFIC(input), *input, APPLY_SPECIFIC(kerns), kerns, desc, output, groups);
      if (hashkey.empty()) {
        cuda_exit(c->ctx);
        return 1;
@@ -307,13 +307,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
        cuda_exit(c->ctx);
        return 1;
    }
-    // NB: This is printed only when algorithm is chosen at runtime.
+    fprintf(stderr, "(using %s%s %s%s%s, ws:%ld, hash:%s)\n",
-    fprintf(stderr, "(using %s %s%s%s%s, ws:%ld, hash:%s)\n",
            algorithm_name,
+            mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor_op)" : "",
            params->choose_time ? "(timed)": "" ,
            reuse_algo ? "(reused)" : "",
            use_cached ? "(cache)": "",
-            mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor op)" : "",
            worksize,
            hashkey.c_str()
      );

--- a/theano/gpuarray/c_code/dnn_gw.c
+++ b/theano/gpuarray/c_code/dnn_gw.c
@@ -297,13 +297,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
      cuda_exit(c->ctx);
      return 1;
    }
-    // NB: This is printed only when algorithm is chosen at runtime.
+    fprintf(stderr, "(using %s%s %s%s%s, ws:%ld, hash:%s)\n",
-    fprintf(stderr, "(using %s %s%s%s%s, ws:%ld, hash:%s)\n",
            algorithm_name,
+            mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor_op)" : "",
            params->choose_time ? "(timed)": "" ,
            reuse_algo ? "(reused)" : "",
            use_cached ? "(cache)": "",
-            mathtype == CUDNN_TENSOR_OP_MATH ? "(tensor op)" : "",
            worksize,
            hashkey.c_str()
     );

--- a/theano/gpuarray/tests/test_dnn.py
+++ b/theano/gpuarray/tests/test_dnn.py
@@ -2666,3 +2666,31 @@ class TestDnnConv3DRuntimeAlgorithms(TestDnnConv2DRuntimeAlgorithms):
        (1, [(4, 2, 20, 20, 20), (2, 2, 20, 19, 18)]),  # cache should be used
        (1, [(1, 2, 3, 4, 5), (6, 2, 3, 2, 1)])
    ]
+def test_conv_guess_once_with_dtypes():
+    utt.seed_rng()
+    inputs_shape = (2, 3, 5, 5)
+    filters_shape = (2, 3, 40, 4)
+    border_mode = 'full'
+    def get_function(dtype, precision):
+        inputs_val = np.random.random(inputs_shape).astype(dtype)
+        filters_val = np.random.random(filters_shape).astype(dtype)
+        inputs_val /= 10
+        filters_val /= 10
+        inputs = theano.shared(inputs_val)
+        filters = theano.shared(filters_val)
+        conv = dnn.dnn_conv(img=inputs, kerns=filters, border_mode=border_mode, precision=precision,
+                            algo='guess_once', direction_hint='forward!')
+        return theano.function([], conv)
+    f_true_half_config = get_function('float16', 'float16')
+    f_pseudo_half_config = get_function('float16', 'float32')
+    f_float_config = get_function('float32', 'float32')
+    f_double_config = get_function('float64', 'float64')
+    # Let's just see if everything runs without raising any exception.
+    f_true_half_config()
+    f_pseudo_half_config()
+    f_float_config()
+    f_double_config()