Small fixes

09b1f19c · Nicolas Ballas · 9e2d8a33 · 09b1f19c · 09b1f19c · 09b1f19c
--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -1018,7 +1018,7 @@ class BaseGpuCorr3dMM(GpuOp):
    def c_code_cache_version(self):
        # raise this whenever modifying any of the support_code_files
-        return (0, 1)
+        return None #(0, 1)
    def c_support_code_apply(self, node, nodename):
        # REMEMBER TO RAISE c_code_cache_version when changing any of
@@ -1065,6 +1065,12 @@ class BaseGpuCorr3dMM(GpuOp):
            If self.pad == 'half', a variable giving the width of the filters
            for direction="backprop weights".
            Ignored otherwise.
+        :param depth: If self.subsample[2] != 1, a variable giving the depth
+            of the filters for direction="backprop weights" or the depth of the
+            input images for direction="backprop inputs".
+            If self.pad == 'half', a variable giving the depth of the filters
+            for direction="backprop weights".
+            Ignored otherwise.
        """
        if self.border_mode != "valid":
            raise ValueError("mode must be 'valid'")
@@ -1212,11 +1218,11 @@ class BaseGpuCorr3dMM(GpuOp):
    }
    else if (padD == -2)
    { // horizontal full padding
-      padW = kW - 1;
+      padD = kD - 1;
    }
-    else if (padW < 0)
+    else if (padD < 0)
    {
-      PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: padW must be >= -2");
+      PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: padD must be >= -2");
      %(fail)s
    }
@@ -1255,6 +1261,8 @@ class BaseGpuCorr3dMM(GpuOp):
        %(fail)s
    }
    // Prepare output array
    if (!(%(out)s
          && %(out)s->nd == 5
@@ -1380,7 +1388,6 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
        broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1],
                         False, False, False]
-        print [img, topgrad] + height_width_depth
        return Apply(self, [img, topgrad] + height_width_depth, [CudaNdarrayType(broadcastable)()])
    def c_code(self, node, nodename, inp, out_, sub):

--- a/theano/sandbox/cuda/corr3d_gemm.cu
+++ b/theano/sandbox/cuda/corr3d_gemm.cu
@@ -53,16 +53,11 @@ inline int GET_BLOCKS(const int N) {
 // (Adapted from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu)
 // Kernels for fast unfold + copy
 __global__ void im3d2col_kernel(const int n, const float* data_im,
-                                const int height, const int width,
+                                const int height, const int width, const int depth,
-                                const int depth,
+                                const int kernel_h, const int kernel_w, const int kernel_d,
-                                const int kernel_h, const int kernel_w,
+                                const int pad_h, const int pad_w, const int pad_d,
-                                const int kernel_d,
+                                const int stride_h, const int stride_w, const int stride_d,
-                                const int pad_h, const int pad_w,
+                                const int height_col, const int width_col, const int depth_col,
-                                const int pad_d,
-                                const int stride_h, const int stride_w,
-                                const int stride_d,
-                                const int height_col, const int width_col,
-                                const int depth_col,
                                float* data_col)
 {
  CUDA_KERNEL_LOOP(index, n)
@@ -134,14 +129,12 @@ void im3d2col(const float* data_im, const int channels,
 __global__ void col2im3d_kernel(const int n, const float* data_col,
-                                const int height, const int width,
+                                const int height, const int width, const int depth,
-                                const int depth, const int channels,
+                                const int channels,
-                                const int patch_h, const int patch_w,
+                                const int patch_h, const int patch_w, const int patch_d,
-                                const int patch_d, const int pad_h,
+                                const int pad_h, const int pad_w, const int pad_d,
-                                const int pad_w, const int pad_d,
+                                const int stride_h, const int stride_w, const int stride_d,
-                                const int stride_h, const int stride_w,
+                                const int height_col, const int width_col, const int depth_col,
-                                const int stride_d, const int height_col,
-                                const int width_col, const int depth_col,
                                float* data_im)
 {
  CUDA_KERNEL_LOOP(index, n)
@@ -219,7 +212,7 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
                      const int dD = 1,
                      const int padH = 0,
                      const int padW = 0,
-                      const int padD = 1)
+                      const int padD = 0)
 {
    if (bottom->nd != 5)
    {

--- a/theano/sandbox/cuda/tests/test_gemmcorr3d.py
+++ b/theano/sandbox/cuda/tests/test_gemmcorr3d.py
@@ -22,7 +22,7 @@ else:
 class TestCorr3DMM(unittest.TestCase):
    def run_conv_valid(self, inputs_shape, filters_shape,
-                       subsample = ( 1, 1, 1)):
+                       subsample=(1, 1, 1)):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')
@@ -66,7 +66,7 @@ class TestCorr3DMM(unittest.TestCase):
                            subsample=(1, 2, 3))
    def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape,
-                        subsample = (1, 1, 1)):
+                        subsample=(1, 1, 1)):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        dCdH_val = numpy.random.random(dCdH_shape).astype('float32')
@@ -96,25 +96,25 @@ class TestCorr3DMM(unittest.TestCase):
        utt.assert_allclose(res_ref, res,  rtol=1e-04, atol=1e-04)
    def test_gradweight(self):
-        self.run_gradweight(inputs_shape = (16, 20, 32, 16, 1),
+        self.run_gradweight(inputs_shape=(16, 20, 32, 16, 1),
-                            filters_shape = (10, 6, 12, 4, 1),
+                            filters_shape=(10, 6, 12, 4, 1),
-                            dCdH_shape = (16, 15, 21, 13, 10),
+                            dCdH_shape=(16, 15, 21, 13, 10),
-                            subsample = (1, 1, 1))
+                            subsample=(1, 1, 1))
-        self.run_gradweight(inputs_shape = (16, 20, 32, 16, 1),
+        self.run_gradweight(inputs_shape=(16, 20, 32, 16, 1),
-                            filters_shape = (10, 6, 12, 4, 1),
+                            filters_shape=(10, 6, 12, 4, 1),
-                            dCdH_shape = (16, 8, 11, 7, 10),
+                            dCdH_shape=(16, 8, 11, 7, 10),
-                            subsample = (2, 2, 2))
+                            subsample=(2, 2, 2))
-        self.run_gradweight(inputs_shape = (16, 20, 32, 16, 1),
+        self.run_gradweight(inputs_shape=(16, 20, 32, 16, 1),
-                            filters_shape = (10, 6, 12, 4, 1),
+                            filters_shape=(10, 6, 12, 4, 1),
-                            dCdH_shape = (16, 5, 7, 5, 10),
+                            dCdH_shape=(16, 5, 7, 5, 10),
-                            subsample = (3, 3, 3))
+                            subsample=(3, 3, 3))
-        self.run_gradweight(inputs_shape = (16, 20, 32, 16, 1),
+        self.run_gradweight(inputs_shape=(16, 20, 32, 16, 1),
-                            filters_shape = (10, 6, 12, 4, 1),
+                            filters_shape=(10, 6, 12, 4, 1),
-                            dCdH_shape = (16, 8, 21, 5, 10),
+                            dCdH_shape=(16, 8, 21, 5, 10),
-                            subsample = (2, 1, 3))
+                            subsample=(2, 1, 3))
    def run_gradinput(self, inputs_shape, filters_shape,
-                      subsample = (1, 1, 1)):
+                      subsample=(1, 1, 1)):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')
@@ -138,7 +138,7 @@ class TestCorr3DMM(unittest.TestCase):
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(kern=weight, topgrad=top)
        else:
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(kern=weight, topgrad=top,
-                                                                    shape = bottom.shape[1:4])
+                                                                    shape=bottom.shape[1:4])
        conv_gemm = conv_gemm.dimshuffle(0, 2, 3, 4, 1)
        f = theano.function([], conv_gemm)
@@ -146,15 +146,15 @@ class TestCorr3DMM(unittest.TestCase):
        utt.assert_allclose(res_ref, res,  rtol=1e-04, atol=1e-04)
    def test_gradinput(self):
-        self.run_gradinput(inputs_shape = (16, 15, 21, 12, 10),
+        self.run_gradinput(inputs_shape=(16, 15, 21, 12, 10),
-                           filters_shape = (10, 6, 12, 4, 1))
+                           filters_shape=(10, 6, 12, 4, 1))
-        self.run_gradinput(inputs_shape = (16, 15, 21, 12, 10),
+        self.run_gradinput(inputs_shape=(16, 15, 21, 12, 10),
-                           filters_shape = (10, 6, 12, 4, 1),
+                           filters_shape=(10, 6, 12, 4, 1),
                           subsample=(2,2,2))
-        self.run_gradinput(inputs_shape = (16, 15, 21, 12, 10),
+        self.run_gradinput(inputs_shape=(16, 15, 21, 12, 10),
-                           filters_shape = (10, 6, 12, 4, 1),
+                           filters_shape=(10, 6, 12, 4, 1),
                           subsample=(3,3,3))
-        self.run_gradinput(inputs_shape = (16, 15, 21, 12, 10),
+        self.run_gradinput(inputs_shape=(16, 15, 21, 12, 10),
-                           filters_shape = (10, 6, 12, 4, 1),
+                           filters_shape=(10, 6, 12, 4, 1),
                           subsample=(3,1,2))