Including support for 'full' convolutions. It uses the existing pad…

Including support for 'full' convolutions. It uses the existing pad functionality of the c/cuda code. If mode == valid, I make pad = filter_size -1. This is passed to the C code, and everything else gets taken care of automatically. The theano-nose for test_full does not pass, however, the test function test_gemm() passes. @nouiz: do you know why?

Including support for 'full' convolutions. It uses the existing pad…
497a2d9f · Arjun Jain · 46f64a65 · 497a2d9f · 497a2d9f · 497a2d9f
--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -597,7 +597,6 @@ class GpuConvMM(GpuOp):
        border_mode = self.border_mode
        sub = sub.copy()
        pad = self.pad
-
        sub.update(locals())

        return """
@@ -607,6 +606,7 @@ class GpuConvMM(GpuOp):
    //Optional args
    int dx = %(dx)s;
    int dy = %(dy)s;
+    int pad = 0;
    CudaNdarray * img = %(img)s;
    CudaNdarray * kern = %(kern)s;
    CudaNdarray * out2 = NULL;
@@ -640,6 +640,7 @@ class GpuConvMM(GpuOp):
    {
        logical_rows = CudaNdarray_HOST_DIMS(img)[2] + CudaNdarray_HOST_DIMS(kern)[2] - 1;
        logical_cols = CudaNdarray_HOST_DIMS(img)[3] + CudaNdarray_HOST_DIMS(kern)[3] - 1;
+        pad = CudaNdarray_HOST_DIMS(kern)[2] - 1;
    }
    out_dim[2] = ceil_intdiv(logical_rows, dx);
    out_dim[3] = ceil_intdiv(logical_cols, dy);
@@ -657,7 +658,7 @@ class GpuConvMM(GpuOp):

    }

-    out2 = validMM(%(img)s, %(kern)s, %(out)s);
+    out2 = validMM(%(img)s, %(kern)s, %(out)s, pad);
    if (out2==NULL){
       %(fail)s
    }
@@ -669,6 +670,7 @@ class GpuConvMM(GpuOp):
 ##
 # Not really a BLAS operation, but whatever.
 #
+
 class GpuConv(GpuOp):
    """
    Implement the batched and stacked 2d convolution on the gpu.

--- a/theano/sandbox/cuda/conv_gemm.cu
+++ b/theano/sandbox/cuda/conv_gemm.cu
@@ -78,7 +78,8 @@ void im2col(const float* data_im, const int channels,
 // Author: Arjun Jain
 CudaNdarray* validMM(const CudaNdarray *input, 
 				      CudaNdarray *weight,
-				      CudaNdarray *output) 
+				      CudaNdarray *output,
+				      int padding = 0) 
 {

  	cublasStatus_t status;
@@ -96,7 +97,6 @@ CudaNdarray* validMM(const CudaNdarray *input,
     // TODO: stride(dW, dH) and padding as function parameter
     int dH = 1; 
     int dW = 1;
-     int padding = 0; 
     int kH = CudaNdarray_HOST_DIMS(weight)[2];
     int kW = CudaNdarray_HOST_DIMS(weight)[3];
     int nInputPlane = CudaNdarray_HOST_DIMS(input)[1]; 

--- a/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
@@ -839,7 +839,7 @@ def test_gemm():
                        kshape = (nf, ch, rFlt, rFlt)
                        print "ishape: ", ishape
                        print "kshape: ", kshape 
-                        mode = 'valid'
+                        mode = 'full'
                        subsample = (1, 1)
                    
                        npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
@@ -938,4 +938,3 @@ def test_stack_rows_segfault_070312():
            nkern=1, bsize=1)
    f = theano.function([], [], updates=[(out, op(img, kern))], mode=theano_mode)
    f()
-