fix white space.

4d2769d2 · Frederic Bastien · 22583950 · 4d2769d2 · 4d2769d2
--- a/theano/sandbox/cuda/GpuConv3D.py
+++ b/theano/sandbox/cuda/GpuConv3D.py
@@ -44,59 +44,59 @@ class GpuConv3D(theano.Op):
        H = outputs[0]
        codeSource =  """
-			///////////// < code generated by GpuConv3D >
+                        ///////////// < code generated by GpuConv3D >
-			//printf("\t\t\t\tConv3DGPU c code\\n");
+                        //printf("\t\t\t\tConv3DGPU c code\\n");
-			//Check dimensionality of inputs
+                        //Check dimensionality of inputs
-			if (%(W)s->nd != 5)
+                        if (%(W)s->nd != 5)
-			{
+                        {
                PyErr_Format(PyExc_ValueError, "GpuConv3D: W must be a 5 dimensional CudaNdarray");
                            %(fail)s
-			}
+                        }
-			if (%(V)s->nd != 5)
+                        if (%(V)s->nd != 5)
-			{
+                        {
                PyErr_Format(PyExc_ValueError, "GpuConv3D: V must be a 5 dimensional CudaNdarray");
                            %(fail)s
-			}
+                        }
-			if (%(b)s->nd != 1)
+                        if (%(b)s->nd != 1)
-			{
+                        {
                PyErr_Format(PyExc_ValueError, "GpuConv3D: b must be a vector CudaNdarray");
                            %(fail)s
-			}
+                        }
-			if (%(d)s->nd != 1)
+                        if (%(d)s->nd != 1)
-			{
+                        {
 PyErr_Format(PyExc_ValueError, "GpuConv3D: d must be a vector CudaNdarray");
                            %(fail)s
-			}
+                        }
-			if (%(d)s->dimensions[0] != 3)
+                        if (%(d)s->dimensions[0] != 3)
-			{
+                        {
                PyErr_Format(PyExc_ValueError, "GpuConv3D: 3 stride length arguments expected (row, col, time) but %%li were given", %(d)s->dimensions[0]);
                            %(fail)s
-			}
+                        }
 { //extra scope so fail doesn't jump over declarations
-			//Read and check sizes of inputs
+                        //Read and check sizes of inputs
-			const int batchSize = CudaNdarray_HOST_DIMS(%(V)s)[0];
+                        const int batchSize = CudaNdarray_HOST_DIMS(%(V)s)[0];
-			const int outputChannels =  CudaNdarray_HOST_DIMS(%(W)s)[0];
+                        const int outputChannels =  CudaNdarray_HOST_DIMS(%(W)s)[0];
-			const int inputChannels = CudaNdarray_HOST_DIMS(%(V)s)[4];
+                        const int inputChannels = CudaNdarray_HOST_DIMS(%(V)s)[4];
-			if (CudaNdarray_HOST_DIMS(%(W)s)[4] != inputChannels)
+                        if (CudaNdarray_HOST_DIMS(%(W)s)[4] != inputChannels)
-			{
+                        {
                            PyErr_Format(PyExc_ValueError, "Conv3D: W operates on a %%i channel image but the image has %%i channels",CudaNdarray_HOST_DIMS(%(W)s)[4],inputChannels);
                            %(fail)s
-			}
+                        }
 {  //extra scope so error handler jumps don't cause errors
-			const int filterHeight = CudaNdarray_HOST_DIMS(%(W)s)[1];
+                        const int filterHeight = CudaNdarray_HOST_DIMS(%(W)s)[1];
-			const int filterWidth = CudaNdarray_HOST_DIMS(%(W)s)[2];
+                        const int filterWidth = CudaNdarray_HOST_DIMS(%(W)s)[2];
-			const int filterDur = CudaNdarray_HOST_DIMS(%(W)s)[3];
+                        const int filterDur = CudaNdarray_HOST_DIMS(%(W)s)[3];
-			const int vidHeight = CudaNdarray_HOST_DIMS(%(V)s)[1];
+                        const int vidHeight = CudaNdarray_HOST_DIMS(%(V)s)[1];
-			const int vidWidth = CudaNdarray_HOST_DIMS(%(V)s)[2];
+                        const int vidWidth = CudaNdarray_HOST_DIMS(%(V)s)[2];
-			const int vidDur = CudaNdarray_HOST_DIMS(%(V)s)[3];
+                        const int vidDur = CudaNdarray_HOST_DIMS(%(V)s)[3];
            if (vidHeight < filterHeight)
            {
                PyErr_Format(PyExc_ValueError, "W has a height of %%i but V is only %%i pixels tall",filterHeight,vidHeight);
@@ -116,58 +116,58 @@ PyErr_Format(PyExc_ValueError, "GpuConv3D: d must be a vector CudaNdarray");
            }
 { // extra scope so fail works
-			//Read and check stride arguments
+                        //Read and check stride arguments
                        const int dr = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,0);
                        const int dc = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,1);
                        const int dt = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,2);
-			if (dr <= 0 || dc <= 0 || dt <= 0)
+                        if (dr <= 0 || dc <= 0 || dt <= 0)
-			{
+                        {
                PyErr_Format(PyExc_ValueError, "GpuConv3D: Strides must all be positive but are %%i, %%i, %%i", dr, dc, dt);
                %(fail)s
-			}
+                        }
 { // extra scope so fail works
-			//Make correctly sized output
+                        //Make correctly sized output
-			const int outputHeight = int( (vidHeight - filterHeight) / dr )+1;
+                        const int outputHeight = int( (vidHeight - filterHeight) / dr )+1;
-			const int outputWidth = int( (vidWidth - filterWidth) / dc )+1;
+                        const int outputWidth = int( (vidWidth - filterWidth) / dc )+1;
-			const int outputDur = int( (vidDur - filterDur) / dt ) +1;
+                        const int outputDur = int( (vidDur - filterDur) / dt ) +1;
-			npy_intp dims[5];
+                        npy_intp dims[5];
-			dims[0] = batchSize;
+                        dims[0] = batchSize;
-			dims[4] = outputChannels;
+                        dims[4] = outputChannels;
-			dims[1] = outputHeight;
+                        dims[1] = outputHeight;
-			dims[2] = outputWidth;
+                        dims[2] = outputWidth;
-			dims[3] = outputDur;
+                        dims[3] = outputDur;
-			if(!(%(H)s) || CudaNdarray_HOST_DIMS(%(H)s)[0]!=dims[0] ||
+                        if(!(%(H)s) || CudaNdarray_HOST_DIMS(%(H)s)[0]!=dims[0] ||
-			CudaNdarray_HOST_DIMS(%(H)s)[1]!=dims[1] ||
+                        CudaNdarray_HOST_DIMS(%(H)s)[1]!=dims[1] ||
-			CudaNdarray_HOST_DIMS(%(H)s)[2]!=dims[2] ||
+                        CudaNdarray_HOST_DIMS(%(H)s)[2]!=dims[2] ||
-			CudaNdarray_HOST_DIMS(%(H)s)[3]!=dims[3] ||
+                        CudaNdarray_HOST_DIMS(%(H)s)[3]!=dims[3] ||
-			CudaNdarray_HOST_DIMS(%(H)s)[4]!=dims[4]){
+                        CudaNdarray_HOST_DIMS(%(H)s)[4]!=dims[4]){
-				Py_XDECREF(%(H)s);
+                                Py_XDECREF(%(H)s);
-				%(H)s = (CudaNdarray*)CudaNdarray_NewDims(5,dims);
+                                %(H)s = (CudaNdarray*)CudaNdarray_NewDims(5,dims);
-				if (!(%(H)s)) {
+                                if (!(%(H)s)) {
                    PyErr_Format(PyExc_MemoryError, "GpuConv3D: could not allocate output");
                            %(fail)s
-				}
+                                }
-			}
+                        }
 { // extra scope so fail will not cross declarations
-			//#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) )####################
+                        //#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) )####################
-			const int ws4 = CudaNdarray_HOST_STRIDES(%(W)s)[4];
+                        const int ws4 = CudaNdarray_HOST_STRIDES(%(W)s)[4];
-			const int vs4 = CudaNdarray_HOST_STRIDES(%(V)s)[4];
+                        const int vs4 = CudaNdarray_HOST_STRIDES(%(V)s)[4];
-			const int ws3 = CudaNdarray_HOST_STRIDES(%(W)s)[3];
+                        const int ws3 = CudaNdarray_HOST_STRIDES(%(W)s)[3];
-			const int vs3 = CudaNdarray_HOST_STRIDES(%(V)s)[3];
+                        const int vs3 = CudaNdarray_HOST_STRIDES(%(V)s)[3];
-			const int ws2 = CudaNdarray_HOST_STRIDES(%(W)s)[2];
+                        const int ws2 = CudaNdarray_HOST_STRIDES(%(W)s)[2];
-			const int vs2 = CudaNdarray_HOST_STRIDES(%(V)s)[2];
+                        const int vs2 = CudaNdarray_HOST_STRIDES(%(V)s)[2];
-			const int ws1 = CudaNdarray_HOST_STRIDES(%(W)s)[1];
+                        const int ws1 = CudaNdarray_HOST_STRIDES(%(W)s)[1];
-			const int vs1 = CudaNdarray_HOST_STRIDES(%(V)s)[1];
+                        const int vs1 = CudaNdarray_HOST_STRIDES(%(V)s)[1];
-			const int ws0 = CudaNdarray_HOST_STRIDES(%(W)s)[0];
+                        const int ws0 = CudaNdarray_HOST_STRIDES(%(W)s)[0];
-			const int vs0 = CudaNdarray_HOST_STRIDES(%(V)s)[0];
+                        const int vs0 = CudaNdarray_HOST_STRIDES(%(V)s)[0];
-			// Compute H
+                        // Compute H
-			//H[i,x,y,t,j] = b_j + sum_k sum_l sum_m sum_z W[j,k,l,m,z] V[i, dr*r+k,dc*c+l,dt*t+m,z]
+                        //H[i,x,y,t,j] = b_j + sum_k sum_l sum_m sum_z W[j,k,l,m,z] V[i, dr*r+k,dc*c+l,dt*t+m,z]
 bool out_contiguous = CudaNdarray_is_c_contiguous(%(H)s);
 int version = -1;
@@ -196,7 +196,7 @@ if(out_contiguous && !b_strided && (version==0||version==-1) && outputDur<=512 &
        if (cudaSuccess == sts)
        {
            work_complete = true;
-	    if (verbose>1) printf("threads.x=%%i, threads.y=%%i, grid.x=%%i, grid.y=%%i, shared_size=%%i, nb_threads=%%i\\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y);
+            if (verbose>1) printf("threads.x=%%i, threads.y=%%i, grid.x=%%i, grid.y=%%i, shared_size=%%i, nb_threads=%%i\\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y);
            if (verbose) printf("INFO: used 'conv_rows_stack' version\\n");
        }
        else
@@ -217,7 +217,7 @@ if(!work_complete){
 }
 }}}}}}} //extra scope so error handler jumps don't cross declarations
-			///////////// < /code generated by GpuConv3D >
+                        ///////////// < /code generated by GpuConv3D >
        """
        return strutil.renderString(codeSource,locals())
@@ -231,14 +231,14 @@ __global__ void
 //grid block size =(out_len*out_wid, nb kern *nb batch)
 //
 conv_rows_stack( float* img, float* kern, float* bias, float* out,
-		 int img_len, int img_wid, int img_dur,
+                 int img_len, int img_wid, int img_dur,
                 int kern_height, int kern_wid, int kern_dur,
-		 int nkern, int input_channels,
+                 int nkern, int input_channels,
                 int dr, int dc, int dt,
                 int img_stride_frame, int img_stride_col, int img_stride_row,
-		 int img_stride_ochannel, int img_stride_batch,
+                 int img_stride_ochannel, int img_stride_batch,
                 int kern_stride_frame, int kern_stride_col, int kern_stride_row,
-		 int kern_stride_stack, int kern_stride_okern)
+                 int kern_stride_stack, int kern_stride_okern)
 {
  int __shared__ out_len, out_wid, out_dur, batch_id, kern_id;
  float  __shared__ *d_img, *d_kern;
@@ -257,18 +257,18 @@ conv_rows_stack( float* img, float* kern, float* bias, float* out,
  kern += kern_id*kern_stride_okern;
    float sum = 0.0f;
    for (int z = 0; z < input_channels; z++) {//1 for first layer
-	for (int k =0; k < kern_height; k++) {
+        for (int k =0; k < kern_height; k++) {
-	  for (int l = 0; l < kern_wid; l++) {
+          for (int l = 0; l < kern_wid; l++) {
-	    for (int m = 0; m < kern_dur; m++) {
+            for (int m = 0; m < kern_dur; m++) {
-	      sum += img[img_stride_ochannel*z+img_stride_row*k+img_stride_col*l+img_stride_frame*m] *
+              sum += img[img_stride_ochannel*z+img_stride_row*k+img_stride_col*l+img_stride_frame*m] *
-		         kern[kern_stride_stack*z+kern_stride_row*k+kern_stride_col*l+kern_stride_frame*m];
+                         kern[kern_stride_stack*z+kern_stride_row*k+kern_stride_col*l+kern_stride_frame*m];
            }
          }
        }
      out[batch_id*nkern*out_len*out_wid*out_dur+//the good batch
-	  out_frame*nkern+//the output frame
+          out_frame*nkern+//the output frame
-	  out_row*out_wid*out_dur*nkern+//the output row
+          out_row*out_wid*out_dur*nkern+//the output row
          out_col*out_dur*nkern + //the output_col
          kern_id //the output image (channel)
 ] = sum + bias[kern_id];

--- a/theano/sandbox/cuda/GpuConvGrad3D.py
+++ b/theano/sandbox/cuda/GpuConvGrad3D.py
@@ -243,7 +243,7 @@ if(out_contiguous && (version==0||version==-1) && WShape[4]<=512 && !work_comple
        if (cudaSuccess == sts)
        {
            work_complete = true;
-	    if (verbose>1) printf("threads.x=%%i, threads.y=%%i, grid.x=%%i, grid.y=%%i, shared_size=%%i, nb_threads=%%i\\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y);
+            if (verbose>1) printf("threads.x=%%i, threads.y=%%i, grid.x=%%i, grid.y=%%i, shared_size=%%i, nb_threads=%%i\\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y);
            if (verbose) printf("INFO: used 'conv_rows_stack' version\\n");
        }
        else
@@ -276,16 +276,16 @@ __global__ void
 //grid block size = (WShape[0]*WShape[1],WShape[2]*WShape[3])
 //
 convgrad_rows_stack( float* img, float* dCdH, float* dCdW,
-		 int img_len, int img_wid, int img_dur,
+                 int img_len, int img_wid, int img_dur,
                 int dCdW_len, int dCdW_wid, int dCdW_dur,
                 int wsh0, int wsh1, int wsh2, int wsh3, int wsh4,
                 int out_len, int out_wid, int out_dur,
-		 int batchSize, int nkern, int nstack,
+                 int batchSize, int nkern, int nstack,
                 int dr, int dc, int dt,
                 int img_stride_frame, int img_stride_col, int img_stride_row,
-		 int img_stride_stack, int img_stride_batch,
+                 int img_stride_stack, int img_stride_batch,
                 int dCdW_stride_frame, int dCdW_stride_col, int dCdW_stride_row,
-		 int dCdW_stride_stack, int dCdW_stride_nkern)
+                 int dCdW_stride_stack, int dCdW_stride_nkern)
 {
  int __shared__ kern_id, stack_id;
  float  __shared__ *d_img, *d_kern;