Merge pull request #586 from nouiz/small

some extra verbose prints in gpu conv code.

Merge pull request #586 from nouiz/small
1ede3c8b · James Bergstra · dff74dc7 · 5a944062 · 1ede3c8b
--- a/theano/sandbox/cuda/conv.cu
+++ b/theano/sandbox/cuda/conv.cu
@@ -52,6 +52,14 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
                CudaNdarray_HOST_STRIDES(kern)[1],
                CudaNdarray_HOST_STRIDES(kern)[2],
                CudaNdarray_HOST_STRIDES(kern)[3]);
+        fprintf(stderr,
+                "INFO:   out dim: %i %i %i %i  out stride: %i %i %i %i\n",
+               CudaNdarray_HOST_DIMS(out)[0], CudaNdarray_HOST_DIMS(out)[1],
+               CudaNdarray_HOST_DIMS(out)[2], CudaNdarray_HOST_DIMS(out)[3],
+               CudaNdarray_HOST_STRIDES(out)[0],
+               CudaNdarray_HOST_STRIDES(out)[1],
+               CudaNdarray_HOST_STRIDES(out)[2],
+               CudaNdarray_HOST_STRIDES(out)[3]);
        fprintf(stderr,
                "INFO:   subsample_rows=%d, subsample_cols=%d\n",
                subsample_rows, subsample_cols);
@@ -646,18 +654,6 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
                else if(!kern_flipped && !ccontig  && split && !full_kern) f=conv_patch_stack_reduce<false,kern_wid,false, true, false>;
            CONV_PATCH_STACK_REDUCE_SPECIAL(THEANO_KERN_WID);
-            if (verbose)
-              fprintf(stderr,
-                      "INFO: using 'conv_patch_stack_reduce' version"
-                      " kern_flipped=%i ccontig=%i nb_split=%d,"
-                      " preload_full_kern=%d\n",
-                      kern_flipped, ccontig, nb_split, full_kern);
-            if (verbose>1)
-              fprintf(stderr,
-                      "threads.x=%i, threads.y=%i, threads.z=%i, grid.x=%i,"
-                      " grid.y=%i, shared_size=%i, nb_threads=%i\n",
-                      threads.x, threads.y, threads.z, grid.x, grid.y,
-                      shared_size, threads.x * threads.y * threads.z);
            f<<< grid, threads, shared_size>>>(img->devdata, kern_data_unflipped, out->devdata,
                                               img_len, img_wid, kern_len, kern_wid,
                                               nkern, nstack,
@@ -668,6 +664,19 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
            cudaError_t sts = cudaGetLastError();
            if (cudaSuccess == sts)
            {
+                if (verbose>1)
+                    fprintf(stderr,
+                            "threads.x=%i, threads.y=%i, threads.z=%i, "
+                            "grid.x=%i, grid.y=%i, shared_size=%i,"
+                            " nb_threads=%i\n",
+                            threads.x, threads.y, threads.z, grid.x, grid.y,
+                            shared_size, threads.x * threads.y * threads.z);
+                if (verbose)
+                    fprintf(stderr,
+                            "INFO: used 'conv_patch_stack_reduce' version"
+                            " kern_flipped=%i ccontig=%i nb_split=%d,"
+                            " preload_full_kern=%d\n",
+                            kern_flipped, ccontig, nb_split, full_kern);
                work_complete = true;
            }
            else
@@ -991,6 +1000,13 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern,
               CudaNdarray_HOST_STRIDES(kern)[1],
               CudaNdarray_HOST_STRIDES(kern)[2],
               CudaNdarray_HOST_STRIDES(kern)[3]);
+        printf("INFO:   out dim: %i %i %i %i  out stride: %i %i %i %i\n",
+               CudaNdarray_HOST_DIMS(out)[0], CudaNdarray_HOST_DIMS(out)[1],
+               CudaNdarray_HOST_DIMS(out)[2], CudaNdarray_HOST_DIMS(out)[3],
+               CudaNdarray_HOST_STRIDES(out)[0],
+               CudaNdarray_HOST_STRIDES(out)[1],
+               CudaNdarray_HOST_STRIDES(out)[2],
+               CudaNdarray_HOST_STRIDES(out)[3]);
    }
    if (!subsample &&