fix bug in gpu convolution introduced in commit 7122f9987ab4d the 31 januarry 2011.

00a3e6b3 · Frederic Bastien · f0dedb92 · 00a3e6b3 · 00a3e6b3
--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -363,7 +363,7 @@ class GpuConv(Op):
        return ['cuda_ndarray.cuh','<stdio.h>']

    def c_code_cache_version(self):
-        return (0,9) # raise this whenever modifying any of the support_code_files
+        return (0,10) # raise this whenever modifying any of the support_code_files

    def c_support_code_apply(self, node, nodename):
        # REMEMBER TO RAISE c_code_cache_version when changing any of these files

--- a/theano/sandbox/cuda/conv.cu
+++ b/theano/sandbox/cuda/conv.cu
@@ -474,9 +474,9 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
 	    //if we can't fit the kernel in shared memory, we must split it more.
            nb_split++;
            thread_z=ceil_intdiv(kern_len,nb_split);
-	    shared_size=sizeof(float)*std::max(
-                    img_size + kern_wid*thread_z,
-                    out_size*thread_z);
+	    shared_size = sizeof(float)*(full_kern
+                ? std::max(img_size + kern_size, out_size*thread_z)
+                : std::max(img_size + thread_z*kern_wid, out_size*thread_z));
        }
        if (nb_split <= kern_len)
        {