GpuCorrMM (sandbox.cuda) with zero-sized inputs, channels, filters.

771cf248 · Gijs van Tulder · b0b0f076 · 771cf248 · 771cf248 · 771cf248
--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -922,7 +922,7 @@ class BaseGpuCorrMM(GpuOp):
    def c_code_cache_version(self):
        # raise this whenever modifying any of the support_code_files
-        return (0, 29)
+        return (0, 30)
    def c_support_code_apply(self, node, nodename):
        # REMEMBER TO RAISE c_code_cache_version when changing any of
@@ -1513,7 +1513,7 @@ class BaseGpuCorr3dMM(GpuOp):
    def c_code_cache_version(self):
        # raise this whenever modifying any of the support_code_files
-        return (0, 28)
+        return (0, 29)
    def c_support_code_apply(self, node, nodename):
        # REMEMBER TO RAISE c_code_cache_version when changing any of

--- a/theano/sandbox/cuda/corr3d_gemm.cu
+++ b/theano/sandbox/cuda/corr3d_gemm.cu
@@ -486,6 +486,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
    if (direction == 0)
    { // forward pass
      output = top;
+      if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
+          cudaError_t err = cudaMemset(output->devdata, 0,
+                                       CudaNdarray_SIZE(output) * sizeof(real));
+          if (err != cudaSuccess) {
+            PyErr_Format(PyExc_RuntimeError,
+                         "GpuCorr3dMM could not fill the output with zeros: %s",
+                         cudaGetErrorString(err));
+            Py_DECREF(col);
+            return NULL;
+          }
+          Py_DECREF(col);
+          return output;
+      }
      // valid correlation: im2col, then gemm
      // Iterate over batch
      for (int n = 0; n < batchSize; n++)
@@ -535,6 +548,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
    {
      // backprop wrt. weights
      output = weight;
+      if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
+          cudaError_t err = cudaMemset(output->devdata, 0,
+                                       CudaNdarray_SIZE(output) * sizeof(real));
+          if (err != cudaSuccess) {
+            PyErr_Format(PyExc_RuntimeError,
+                         "GpuCorr3dMM grad wrt. weights could not fill the output with zeros: %s",
+                         cudaGetErrorString(err));
+            Py_DECREF(col);
+            return NULL;
+          }
+          Py_DECREF(col);
+          return output;
+      }
      // valid convolution: im2col, then gemm
      // Iterate over batch
      for (int n = 0; n < batchSize; n++)
@@ -586,6 +612,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
    {
      // backprop wrt. inputs
      output = bottom;
+      if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
+          cudaError_t err = cudaMemset(output->devdata, 0,
+                                       CudaNdarray_SIZE(output) * sizeof(real));
+          if (err != cudaSuccess) {
+            PyErr_Format(PyExc_RuntimeError,
+                         "GpuCorr3dMM grad wrt. inputs could not fill the output with zeros: %s",
+                         cudaGetErrorString(err));
+            Py_DECREF(col);
+            return NULL;
+          }
+          Py_DECREF(col);
+          return output;
+      }
      // full convolution: gemm, then col2im3d
      // Iterate over batch
      for (int n = 0; n < batchSize; n++)

--- a/theano/sandbox/cuda/corr_gemm.cu
+++ b/theano/sandbox/cuda/corr_gemm.cu
@@ -384,6 +384,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
    CudaNdarray *output;
    if (direction == 0) {  // forward pass
        output = top;
+        if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
+            cudaError_t err = cudaMemset(output->devdata, 0,
+                                         CudaNdarray_SIZE(output) * sizeof(real));
+            if (err != cudaSuccess) {
+                PyErr_Format(PyExc_RuntimeError,
+                             "GpuCorrMM could not fill the output with zeros: %s",
+                             cudaGetErrorString(err));
+                Py_DECREF(col);
+                return NULL;
+            }
+            Py_DECREF(col);
+            return output;
+        }
        // valid correlation: im2col, then gemm
        // Iterate over batch
        for (int n = 0; n < batchSize; n++) {
@@ -452,6 +465,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
    }
    else if (direction == 1) {  // backprop wrt. weights
        output = weight;
+        if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
+            cudaError_t err = cudaMemset(output->devdata, 0,
+                                         CudaNdarray_SIZE(output) * sizeof(real));
+            if (err != cudaSuccess) {
+                PyErr_Format(PyExc_RuntimeError,
+                             "GpuCorrMM grad wrt. weights could not fill the output with zeros: %s",
+                             cudaGetErrorString(err));
+                Py_DECREF(col);
+                return NULL;
+            }
+            Py_DECREF(col);
+            return output;
+        }
        // valid convolution: im2col, then gemm
        // Iterate over batch
        for (int n = 0; n < batchSize; n++) {
@@ -520,6 +546,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
    }
    else if (direction == 2) {  // backprop wrt. inputs
        output = bottom;
+        if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
+            cudaError_t err = cudaMemset(output->devdata, 0,
+                                         CudaNdarray_SIZE(output) * sizeof(real));
+            if (err != cudaSuccess) {
+                PyErr_Format(PyExc_RuntimeError,
+                             "GpuCorrMM grad wrt. inputs could not fill the output with zeros: %s",
+                             cudaGetErrorString(err));
+                Py_DECREF(col);
+                return NULL;
+            }
+            Py_DECREF(col);
+            return output;
+        }
        // full convolution: gemm, then col2im
        // Iterate over batch
        for (int n = 0; n < batchSize; n++) {