fix guess/time workmem bug

822c06d4 · Nicolas Ballas · --global · 7f78fce1 · 822c06d4 · 822c06d4
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -713,11 +713,11 @@ class GpuDnnConv3d(GpuDnnConv):

    def __init__(self, workmem=None, inplace=False):
        """
-        :param workmem: either 'none', 'small', 'large', 'fft', 'time' or
-        'guess'. Default is the value of :attr:`config.dnn.conv.workmem`.
+        :param workmem: either 'none' 'time' or 'guess'.
+        Default is the value of :attr:`config.dnn.conv.workmem`.
        """
-        ### Only workmem = 'none' work with cudnn conv 3d
-        super(GpuDnnConv3d, self).__init__(workmem='none', inplace=inplace)
+        super(GpuDnnConv3d, self).__init__(workmem='guess', inplace=inplace)
+        assert self.workmem in ['none' 'time','guess']

    def make_node(self, img, kern, output, desc, alpha=None, beta=None, nb_dim=None):


--- a/theano/sandbox/cuda/dnn_fwd.c
+++ b/theano/sandbox/cuda/dnn_fwd.c
@@ -36,17 +36,17 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
    void *workspace;
    cudnnConvolutionFwdAlgo_t chosen_algo;

+
    if (CHOOSE_ALGO)
    {
-
      // Check if the input and the kernels have the same shape as they have
      // last time the apply node was executed
      bool same_shapes = true;
      for (int i = 0; (i < nb_dim) && same_shapes; i++)
      {
-          same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] !=
+          same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] ==
                          APPLY_SPECIFIC(previous_input_shape)[i]);
-          same_shapes &= (CudaNdarray_HOST_DIMS(kerns)[i] !=
+          same_shapes &= (CudaNdarray_HOST_DIMS(kerns)[i] ==
                          APPLY_SPECIFIC(previous_kerns_shape)[i]);
      }

@@ -67,6 +67,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
          return 1;
        }

+
        // Obtain a convolution algorithm appropriate for the input and kernel
        // shapes. Either by choosing one according to heuristics or by making
        // CuDNN time every implementation and choose the best one.
@@ -131,7 +132,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
          // be used here
          chosen_algo = APPLY_SPECIFIC(previous_algo);
      }
-
    }
    else
    {
@@ -179,7 +179,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
      }
    }

-
    err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
                                                  APPLY_SPECIFIC(input),
                                                  APPLY_SPECIFIC(kerns),
@@ -188,10 +187,9 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
                                                  chosen_algo,
                                                  &worksize);
    if (err != CUDNN_STATUS_SUCCESS) {
-      std::cout << "here" << std::endl;
      PyErr_Format(PyExc_RuntimeError,
                   "GpuDnnConv: error getting worksize: %s",
-                     cudnnGetErrorString(err));
+                   cudnnGetErrorString(err));
      return 1;
    }
    workspace = get_work_mem(worksize);

--- a/theano/sandbox/cuda/dnn_gw.c
+++ b/theano/sandbox/cuda/dnn_gw.c
@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
    return 1;
  }

-  /* if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) */
-  /*   return 1; */
-  /* if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) */
-  /*   return 1; */
-
  if (c_set_tensorNd(input, nb_dim, APPLY_SPECIFIC(input)) == -1)
    return 1;
  if (c_set_tensorNd(output, nb_dim, APPLY_SPECIFIC(output)) == -1)
@@ -33,8 +28,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
    return 1;
 #endif

-  /* if (c_set_filter(*kerns, APPLY_SPECIFIC(kerns)) == -1) */
-  /*   return 1; */
  if (c_set_filterNd(*kerns, nb_dim, APPLY_SPECIFIC(kerns)) == -1)
    return 1;

@@ -50,9 +43,9 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
      bool same_shapes = true;
      for (int i = 0; (i < nb_dim) && same_shapes; i++)
      {
-          same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] !=
+          same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] ==
                          APPLY_SPECIFIC(previous_input_shape)[i]);
-          same_shapes &= (CudaNdarray_HOST_DIMS(output)[i] !=
+          same_shapes &= (CudaNdarray_HOST_DIMS(output)[i] ==
                          APPLY_SPECIFIC(previous_output_shape)[i]);
      }