move the convolution from cuda_ndarray to the GpuConvOp.

0dae8f43 · Frederic Bastien · ccc01a40 · 0dae8f43 · 0dae8f43 · 0dae8f43
--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
--- a/theano/sandbox/cuda/conv.cu
+++ b/theano/sandbox/cuda/conv.cu
--- a/theano/sandbox/cuda/conv_full_kernel.cu
+++ b/theano/sandbox/cuda/conv_full_kernel.cu
-#include"conv_kernel.cu"
 //we store the full image and the full kernel in the shared memory
 //each thread compute only one value for the output
 //thread block size=out_wid, out_len/nb_split

--- a/theano/sandbox/cuda/conv_kernel.cu
+++ b/theano/sandbox/cuda/conv_kernel.cu
@@ -28,8 +28,6 @@ for (int iter_m=0; iter_m < Os[0]; iter_m++) {
 #ifndef CONV_KERNEL_CU
 #define CONV_KERNEL_CU

-#include <stdio.h>
-
 /*
 #define CHECK_BANK_CONFLICTS 0
 #if CHECK_BANK_CONFLICTS

--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
--- a/theano/sandbox/cuda/cuda_ndarray.cuh
+++ b/theano/sandbox/cuda/cuda_ndarray.cuh
@@ -26,6 +26,16 @@ typedef float real;
 #endif


+#ifndef SHARED_SIZE 
+#define SHARED_SIZE (16*1024)
+#endif
+
+template <typename T>
+static T ceil_intdiv(T a, T b)
+{
+    return (a/b) + ((a % b) ? 1: 0);
+}
+
 /**
 * struct CudaNdarray
 *
@@ -408,14 +418,6 @@ int CudaNdarray_reduce_max(CudaNdarray * self, CudaNdarray * A);

 int CudaNdarray_dimshuffle(CudaNdarray * self, unsigned int len, const int * pattern);

-enum { ConvMode_FULL, ConvMode_VALID };
-PyObject * CudaNdarray_Conv(const CudaNdarray *img, const CudaNdarray * kern, CudaNdarray * out, const int mode, const int subsample_rows, const int subsample_cols, const int version, const int verbose);
-PyObject * CudaNdarray_Conv(const CudaNdarray *img, const CudaNdarray * kern, CudaNdarray * out, const int mode)
-{
-    return CudaNdarray_Conv(img, kern, out, mode, 1, 1, -1, 0);
-}
-int CudaNdarray_conv(const CudaNdarray *img, const CudaNdarray * kern, CudaNdarray * out, const int mode);
-
 void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self)
 {
    fprintf(fd, "CudaNdarray <%p, %p> nd=%i \n", self, self->devdata, self->nd);