Whitespace fixes.

032c5b14 · David Warde-Farley · 9d85fda7 · 032c5b14 · 032c5b14 · 032c5b14
--- a/theano/sandbox/cuda/nnet.py
+++ b/theano/sandbox/cuda/nnet.py
@@ -150,15 +150,15 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (Op):
            k_xent_sm_1hot_bias<<<n_blocks, n_threads, n_shared_bytes>>>(
                CudaNdarray_HOST_DIMS(%(x)s)[0],
                CudaNdarray_HOST_DIMS(%(x)s)[1],
-                CudaNdarray_DEV_DATA(%(x)s), CudaNdarray_HOST_STRIDES(%(x)s)[0], CudaNdarray_HOST_STRIDES(%(x)s)[1], 
-                CudaNdarray_DEV_DATA(%(b)s), CudaNdarray_HOST_STRIDES(%(b)s)[0], 
-                CudaNdarray_DEV_DATA(%(y_idx)s), CudaNdarray_HOST_STRIDES(%(y_idx)s)[0], 
-                CudaNdarray_DEV_DATA(%(nll)s), CudaNdarray_HOST_STRIDES(%(nll)s)[0], 
-                CudaNdarray_DEV_DATA(%(sm)s), CudaNdarray_HOST_STRIDES(%(sm)s)[0], CudaNdarray_HOST_STRIDES(%(sm)s)[1], 
+                CudaNdarray_DEV_DATA(%(x)s), CudaNdarray_HOST_STRIDES(%(x)s)[0], CudaNdarray_HOST_STRIDES(%(x)s)[1],
+                CudaNdarray_DEV_DATA(%(b)s), CudaNdarray_HOST_STRIDES(%(b)s)[0],
+                CudaNdarray_DEV_DATA(%(y_idx)s), CudaNdarray_HOST_STRIDES(%(y_idx)s)[0],
+                CudaNdarray_DEV_DATA(%(nll)s), CudaNdarray_HOST_STRIDES(%(nll)s)[0],
+                CudaNdarray_DEV_DATA(%(sm)s), CudaNdarray_HOST_STRIDES(%(sm)s)[0], CudaNdarray_HOST_STRIDES(%(sm)s)[1],
                CudaNdarray_DEV_DATA(%(am)s), CudaNdarray_HOST_STRIDES(%(am)s)[0]);
            CNDA_THREAD_SYNC;
            cudaError_t err = cudaGetLastError();
-            if (cudaSuccess != err) 
+            if (cudaSuccess != err)
            {
                PyErr_Format(PyExc_RuntimeError, "Cuda error: %(classname)s %(nodename)s: %%s.\\n", cudaGetErrorString(err));
                // no need to decref output vars the cleanup code should pick them up.
@@ -233,7 +233,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
                    std::min(CudaNdarray_HOST_DIMS(%(dx)s)[1],256)
                >>>(
                        CudaNdarray_HOST_DIMS(%(dx)s)[0],
-                        CudaNdarray_HOST_DIMS(%(dx)s)[1], 
+                        CudaNdarray_HOST_DIMS(%(dx)s)[1],

                        CudaNdarray_DEV_DATA(%(dnll)s),
                        CudaNdarray_HOST_STRIDES(%(dnll)s)[0],
@@ -249,11 +249,11 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
                );
            CNDA_THREAD_SYNC;
            cudaError_t err = cudaGetLastError();
-            if( cudaSuccess != err) 
+            if( cudaSuccess != err)
            {
                PyErr_Format(PyExc_RuntimeError, "Cuda error: %%s: %%s.\\n", "kCrossEntropySoftmax1HotWithBiasDx_%(nodename)s", cudaGetErrorString(err));
                %(fail)s;
-            }                         
+            }
        }
        assert(%(dx)s);
        """ % locals()
@@ -337,7 +337,7 @@ class GpuSoftmax (Op):
                    CudaNdarray_HOST_DIMS(%(x)s)[1] * 2 * sizeof(float)
                >>>(
                        CudaNdarray_HOST_DIMS(%(x)s)[0],
-                        CudaNdarray_HOST_DIMS(%(x)s)[1], 
+                        CudaNdarray_HOST_DIMS(%(x)s)[1],

                        CudaNdarray_DEV_DATA(%(x)s),
                        CudaNdarray_HOST_STRIDES(%(x)s)[0],
@@ -347,18 +347,18 @@ class GpuSoftmax (Op):
                );
            CNDA_THREAD_SYNC;
            cudaError_t err = cudaGetLastError();
-            if( cudaSuccess != err) 
+            if( cudaSuccess != err)
            {
                PyErr_Format(PyExc_RuntimeError, "Cuda error: %%s: %%s.\\n", "kSoftmax_%(nodename)s", cudaGetErrorString(err));
                %(fail)s;
-            }                         
+            }
        }
        assert(%(z)s);
        """ % locals()

    def c_support_code_apply(self, node, nodename):
        return nvcc_kernel("kSoftmax_%s"%nodename,
-                params=['int M', 'int N', 
+                params=['int M', 'int N',
                    'const float * x', 'const int sx0', 'const int sx1',
                    'float * sm'],
                body=[
@@ -436,7 +436,7 @@ class GpuSoftmaxWithBias (Op):
                    CudaNdarray_HOST_DIMS(%(x)s)[1] * 2 * sizeof(float)
                >>>(
                        CudaNdarray_HOST_DIMS(%(x)s)[0],
-                        CudaNdarray_HOST_DIMS(%(x)s)[1], 
+                        CudaNdarray_HOST_DIMS(%(x)s)[1],

                        CudaNdarray_DEV_DATA(%(x)s),
                        CudaNdarray_HOST_STRIDES(%(x)s)[0],
@@ -449,18 +449,18 @@ class GpuSoftmaxWithBias (Op):
                );
            CNDA_THREAD_SYNC;
            cudaError_t err = cudaGetLastError();
-            if( cudaSuccess != err) 
+            if( cudaSuccess != err)
            {
                PyErr_Format(PyExc_RuntimeError, "Cuda error: %%s: %%s.\\n", "kSoftmax_%(nodename)s", cudaGetErrorString(err));
                %(fail)s;
-            }                         
+            }
        }
        assert(%(z)s);
        """ % locals()

    def c_support_code_apply(self, node, nodename):
        return nvcc_kernel("kSoftmaxWithBias_%s"%nodename,
-                params=['int M', 'int N', 
+                params=['int M', 'int N',
                    'const float * x', 'const int sx0', 'const int sx1',
                    'const float * b', 'const int sb0',
                    'float * sm'],

--- a/theano/sandbox/fourier.py
+++ b/theano/sandbox/fourier.py
@@ -18,7 +18,7 @@ grad_todo = GradTodo()

 class FFT(Op):
    """Fast Fourier Transform
-    
+
    .. TODO:
        The current implementation just works for matrix inputs, and permits taking a 1D FFT over
        either rows or columns.  Add support for N-D FFTs as provided by either numpy or FFTW
@@ -29,7 +29,7 @@ class FFT(Op):

    .. TODO:
        unit tests.
-    
+
    """

    default_output = 0
@@ -61,7 +61,7 @@ class FFT(Op):
            raise TypeError('Argument to HalfFFT must not be complex', frames)
        spectrogram = tensor.zmatrix()
        buf = generic()
-        # The `buf` output is present for future work 
+        # The `buf` output is present for future work
        # when we call FFTW directly and re-use the 'plan' that FFTW creates.
        # In that case, buf would store a CObject encapsulating the plan.
        rval = Apply(self, [_frames, _n, _axis], [spectrogram, buf])

--- a/theano/sandbox/minimal.py
+++ b/theano/sandbox/minimal.py
@@ -25,13 +25,13 @@ class Minimal(gof.Op):
        return hash(type(self))

    def make_node(self, *args):
-        # HERE `args` must be THEANO VARIABLES 
+        # HERE `args` must be THEANO VARIABLES
        return gof.Apply(op=self, inputs=args, outputs=[tensor.lscalar()])

    def perform(self, node, inputs, (output, )):
        # HERE `inputs` are PYTHON OBJECTS 

-        # do what you want here, 
+        # do what you want here,
        # but do not modify any of the arguments [inplace].
        print "perform got %i arguments" % len(inputs)


--- a/theano/sandbox/multinomial.py
+++ b/theano/sandbox/multinomial.py
@@ -59,12 +59,12 @@ class Multinomial(Op):
            npy_intp dims[2];
            dims[0] = (%(pvals)s->dimensions)[0];
            dims[1] = (%(pvals)s->dimensions)[1];
-            
+
            %(z)s = (PyArrayObject*) PyArray_ZEROS(2,
                dims,
                type_num_%(pvals)s,
                0);
-                       
+
            if (!%(z)s)
            {
                PyErr_SetString(PyExc_MemoryError, "failed to alloc z output");
@@ -96,7 +96,7 @@ class Multinomial(Op):
                }
            }
        }
-        
+
        } // END NESTED SCOPE
        """ % locals()
 multinomial = Multinomial()
@@ -128,24 +128,24 @@ class GpuMultinomial(Multinomial):
            float * global_unis,
            float * global_outs
        )
-        {            
+        {
            int n = blockDim.x*blockIdx.x + threadIdx.x;
            if (n < nb_multi)
-            {    
-            
+            {
+
            float cummul = 0.;
            bool done = false;
            for (int m = 0; m < nb_outcomes; ++m)
            {
                cummul += global_pvals[n * pvals_col_strides + m * pvals_row_strides];
-                
+
                float current_out = 0.;

                if (!done && global_unis[n] < cummul)
                {
                    current_out = 1.;
                    done = true;
-                }  
+                }
                global_outs[n + m * nb_multi] = current_out;
            }
            }
@@ -157,7 +157,7 @@ class GpuMultinomial(Multinomial):
    def c_code(self, node, name, (pvals, unis), (z,), sub):
        fail = sub['fail']
        return """
-        
+
        if (%(pvals)s->nd != 2)
        {
            PyErr_Format(PyExc_TypeError, "pvals wrong rank");
@@ -168,7 +168,7 @@ class GpuMultinomial(Multinomial):
            PyErr_Format(PyExc_TypeError, "unis wrong rank");
            %(fail)s;
        }
-        
+
        if (CudaNdarray_HOST_DIMS(%(unis)s)[0] != CudaNdarray_HOST_DIMS(%(pvals)s)[1])
        {
            PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[1]");
@@ -201,7 +201,7 @@ class GpuMultinomial(Multinomial):
        { // NESTED SCOPE
            int nb_outcomes = CudaNdarray_HOST_DIMS(%(z)s)[0];
            int nb_multi = CudaNdarray_HOST_DIMS(%(z)s)[1];
-            
+
            //TODO : change this for a beautiful constant
            int max_nb_blocks = 2<<15 - 1;
            int nb_blocks = max_nb_blocks + 1;
@@ -212,7 +212,7 @@ class GpuMultinomial(Multinomial):
                if (nb_multi %% nb_threads == 0)
                    nb_blocks = nb_multi/nb_threads;
                else
-                    nb_blocks = (int)((float)nb_multi/(float)nb_threads + 1.); 
+                    nb_blocks = (int)((float)nb_multi/(float)nb_threads + 1.);
            } while (nb_blocks > max_nb_blocks);

            //printf("\\nN=%%i b=%%i t=%%i t*b=%%i", nb_multi, nb_blocks, nb_threads, nb_blocks*nb_threads);
@@ -224,7 +224,7 @@ class GpuMultinomial(Multinomial):
                %(fail)s;
            }

-                
+
            dim3 n_blocks(nb_blocks,1,1);
            dim3 n_threads(nb_threads,1,1);
            int n_shared = 0;
@@ -240,7 +240,7 @@ class GpuMultinomial(Multinomial):
            );
            CNDA_THREAD_SYNC;
            cudaError_t sts = cudaGetLastError();
-            if (cudaSuccess != sts) 
+            if (cudaSuccess != sts)
            {
                PyErr_Format(PyExc_RuntimeError, "Cuda error: %%s: %%s. (grid: %%i x %%i; block: %%i x %%i x %%i; shared: %%i)\\n",
                    "k_multi_warp_%(name)s",
@@ -264,4 +264,4 @@ def use_gpu_multinomial(node):
        return [host_from_gpu(gpu_multinomial(*[gpu_from_host(i) for i in node.inputs]))]
 if cuda_enabled:#theano.config.device.startswith('gpu'):
    register_specialize(use_gpu_multinomial)
-    
+
--- a/theano/sandbox/neighbourhoods.py
+++ b/theano/sandbox/neighbourhoods.py
@@ -7,7 +7,7 @@ import numpy
 import __builtin__

 class NeighbourhoodsFromImages(Op):
-    def __init__(self, n_dims_before, dims_neighbourhoods, 
+    def __init__(self, n_dims_before, dims_neighbourhoods,
                    strides=None, ignore_border=False, inverse=False):
        """
        This extracts neighbourhoods from "images", but in a
@@ -65,10 +65,10 @@ class NeighbourhoodsFromImages(Op):
        """
        self.n_dims_before = n_dims_before
        self.dims_neighbourhoods = dims_neighbourhoods
-        if not strides is None:                                                               
-            self.strides = strides                                                            
-        else:                                                                                 
-            self.strides = dims_neighbourhoods                                                
+        if not strides is None:
+            self.strides = strides
+        else:
+            self.strides = dims_neighbourhoods
        self.ignore_border = ignore_border

        self.inverse = inverse
@@ -99,7 +99,7 @@ class NeighbourhoodsFromImages(Op):

    def __str__(self):
        return '%s{%s,%s,%s,%s}' % \
-                (self.__class__.__name__, 
+                (self.__class__.__name__,
                 self.n_dims_before,
                 self.dims_neighbourhoods,
                 self.strides,
@@ -135,7 +135,7 @@ class NeighbourhoodsFromImages(Op):
            # the number of strides performed by NeighFromImg is
            # directly given by this shape
            num_strides.append(output_shape[self.n_dims_before + i])
-            
+
            # our Op's output image must be at least this wide
            at_least_width = num_strides[i] * self.strides[i]

@@ -231,7 +231,7 @@ class NeighbourhoodsFromImages(Op):
                ("for neigh_idx_%d in xrange(min(max_neigh_idx_%d,"\
                +" self.dims_neighbourhoods[%d])):\n") % \
                    (inner_dim_no, inner_dim_no, inner_dim_no)
-        
+
        return code_before

    def _py_flattened_idx(self):
@@ -268,8 +268,8 @@ class NeighbourhoodsFromImages(Op):
 class ImagesFromNeighbourhoods(NeighbourhoodsFromImages):
    def __init__(self, n_dims_before, dims_neighbourhoods,
                        strides=None, ignore_border=False):
-        NeighbourhoodsFromImages.__init__(self,n_dims_before, dims_neighbourhoods, 
-                                strides=strides, ignore_border=ignore_border, 
+        NeighbourhoodsFromImages.__init__(self,n_dims_before, dims_neighbourhoods,
+                                strides=strides, ignore_border=ignore_border,
                                inverse=True)
        # and that's all there is to it

--- a/theano/sandbox/neighbours.py
+++ b/theano/sandbox/neighbours.py
@@ -88,7 +88,7 @@ class Images2Neibs(Op):
            PyErr_Format(PyExc_TypeError, "neib_step wrong step ; has to contain 2 elements");
            %(fail)s;
        }
-        
+
        // (c,d) = neib_shape
        const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
        const npy_intp d = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
@@ -137,7 +137,7 @@ class Images2Neibs(Op):
                            * grid_d
                            * (%(ten4)s->dimensions)[1]
                            * (%(ten4)s->dimensions)[0];
-        
+
        if ((NULL == %(z)s)
            || ((%(z)s->dimensions)[0] != z_dim0 )
            || ((%(z)s->dimensions)[1] != z_dim1 )
@@ -147,12 +147,12 @@ class Images2Neibs(Op):
            npy_intp dims[2];
            dims[0] = z_dim0;
            dims[1] = z_dim1;
-            
+
            %(z)s = (PyArrayObject*) PyArray_EMPTY(2,
                dims,
                type_num_%(ten4)s,
                0);
-                       
+
            if (!%(z)s)
            {
                PyErr_SetString(PyExc_MemoryError, "failed to alloc z output");
@@ -162,12 +162,12 @@ class Images2Neibs(Op):
        }

        { // NESTED SCOPE
-        
+
        const int nb_batch = (%(ten4)s->dimensions)[0];
        const int nb_stack = (%(ten4)s->dimensions)[1];
        const int height = (%(ten4)s->dimensions)[2];
        const int width = (%(ten4)s->dimensions)[3];
-        
+
        // (c,d) = neib_shape
        const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
        const npy_intp d = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
@@ -177,7 +177,7 @@ class Images2Neibs(Op):

        const int wrap_centered_idx_shift_x = c/2;
        const int wrap_centered_idx_shift_y = d/2;
-        // Oh this is messed up...      
+        // Oh this is messed up...
        for (int n = 0; n < nb_batch; n++)              // loop over batches
            for (int s = 0; s < nb_stack; s++)          // loop over stacks
                for (int a = 0; a < grid_c; a++)        // loop over the number of patch in height
@@ -194,18 +194,18 @@ class Images2Neibs(Op):
                            }
                            for (int j = 0; j < d; j++)  // loop over d
                            {
-                                
-                                int ten4_3 = j + b * step_y;     
+
+                                int ten4_3 = j + b * step_y;
                                if ( "%(mode)s" == "wrap_centered" ){
                                    ten4_3 -= wrap_centered_idx_shift_y;
                                    if ( ten4_3 < 0 ) ten4_3 += width;
                                    else if (ten4_3 >= width) ten4_3 -= width;
                                }
                                int z_col = j + d * i;
-                                
+
                                dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, z_col);
                                *curr_z = *( (dtype_%(ten4)s*) PyArray_GETPTR4(%(ten4)s, n, s, ten4_2, ten4_3));
-                                
+
                                //printf("\\n(%%i,%%i,%%i,%%i) --> (%%i,%%i)",n,s, ten4_2, ten4_3, z_row, z_col);
                                //printf("%%f ", *curr_z);
                            }
@@ -220,22 +220,22 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
 def neibs2images(neibs, neib_shape, original_shape):
    """
    Inverse of images2neib.
-    
+
    neibs : matrix like the one obtained by images2neib
    neib_shape : neib_shape that was used in images2neib
    original_shape : original shape of the 4d tensor given to images2neib
-    
+
    Return a 4d tensor of shape `original_shape`.
    """
    neibs = T.as_tensor_variable(neibs)
    neib_shape = T.as_tensor_variable(neib_shape)
    original_shape = T.as_tensor_variable(original_shape)
-    
+
    new_neib_shape = T.stack( original_shape[-1]/neib_shape[1], neib_shape[1] )
    return images2neibs(neibs.dimshuffle('x','x',0,1), new_neib_shape).reshape(original_shape)
    #return images2neibs(neibs.reshape((1,1,neibs.shape[0],neibs.shape[1])), new_neib_shape).reshape(original_shape)
-    
-   
+
+
 # This is work in progress
 class GpuImages2Neibs(Images2Neibs):
    def __init__(self, mode='valid'):
@@ -251,7 +251,7 @@ class GpuImages2Neibs(Images2Neibs):
        assert ten4.ndim==4
        assert neib_shape.ndim==1
        assert neib_step.ndim==1
-        
+
        return Apply(self, [ten4, neib_shape, neib_step], [CudaNdarrayType(broadcastable=(False,False),
                                                                dtype=ten4.type.dtype)()])

@@ -313,8 +313,8 @@ class GpuImages2Neibs(Images2Neibs):
                                    }

                                    //int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
-                                    //int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n)); 
-                                    int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n; 
+                                    //int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
+                                    int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;

                                    int z_col = j + d * i;
                                    int z_idx = z_col + c*d*z_row;
@@ -375,8 +375,8 @@ class GpuImages2Neibs(Images2Neibs):
                                    }

                                    //int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
-                                    //int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n)); 
-                                    int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n; 
+                                    //int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
+                                    int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;

                                    int z_col = j + d * i;
                                    int z_idx = z_col + c*d*z_row;
@@ -406,7 +406,7 @@ class GpuImages2Neibs(Images2Neibs):
                PyErr_Format(PyExc_TypeError, "unis wrong rank");
                %(fail)s;
            }
-            
+
            if (%(neib_shape)s->dimensions[0] != 2)
            {
                PyErr_Format(PyExc_ValueError, "neib_shape has to contain two elements");
@@ -459,7 +459,7 @@ class GpuImages2Neibs(Images2Neibs):
                                * grid_d
                                * CudaNdarray_HOST_DIMS(%(ten4)s)[1]
                                * CudaNdarray_HOST_DIMS(%(ten4)s)[0];
-            
+
            if ((NULL == %(z)s)
                || (CudaNdarray_HOST_DIMS(%(z)s)[0] != z_dim0)
                || (CudaNdarray_HOST_DIMS(%(z)s)[1] != z_dim1))
@@ -475,11 +475,11 @@ class GpuImages2Neibs(Images2Neibs):
                    %(fail)s;
                }
            }
-        
+
        }

        { // NESTED SCOPE
-        
+
            const int nb_batch = CudaNdarray_HOST_DIMS(%(ten4)s)[0];
            const int nb_stack = CudaNdarray_HOST_DIMS(%(ten4)s)[1];
            const int height = CudaNdarray_HOST_DIMS(%(ten4)s)[2];
@@ -489,11 +489,11 @@ class GpuImages2Neibs(Images2Neibs):
            const int d = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
            const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
            const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
-            
+
            dim3 n_threads(d,c,1);
            //Their is a max of 512 threads per blocks
-            while(n_threads.x*n_threads.y>512 && n_threads.y>1)n_threads.y--; 
-            while(n_threads.x*n_threads.y>512 && n_threads.x>1)n_threads.x--; 
+            while(n_threads.x*n_threads.y>512 && n_threads.y>1)n_threads.y--;
+            while(n_threads.x*n_threads.y>512 && n_threads.x>1)n_threads.x--;

            //Make bigger block to have better memory access pattern and a higher core utilisation.
            //for smaller patch size
@@ -519,7 +519,7 @@ class GpuImages2Neibs(Images2Neibs):
                f = k_multi_warp_%(name)s;
            }

-            f<<<n_blocks, n_threads, n_shared>>>(                
+            f<<<n_blocks, n_threads, n_shared>>>(
                nb_batch,
                nb_stack,
                height, width,
@@ -534,7 +534,7 @@ class GpuImages2Neibs(Images2Neibs):
            );
            CNDA_THREAD_SYNC;
            cudaError_t sts = cudaGetLastError();
-            if (cudaSuccess != sts) 
+            if (cudaSuccess != sts)
            {
                PyErr_Format(PyExc_RuntimeError, "Cuda error: %%s: %%s. (grid: %%i x %%i; block: %%i x %%i x %%i; shared: %%i)\\n",
                    "k_multi_warp_%(name)s",
@@ -560,4 +560,4 @@ def use_gpu_images2neibs(node):

 if cuda_available:
    register_gpu_opt()(use_gpu_images2neibs)
-    
+
--- a/theano/sparse/sandbox/truedot.py
+++ b/theano/sparse/sandbox/truedot.py
@@ -60,7 +60,7 @@ class TrueDot(gof.op.Op):
            if self.grad_preserves_dense:
                rval[1] = dense_from_sparse(rval[1])
        return rval
-    
+
 def true_dot(x, y, grad_preserves_dense=True):
    """
    @todo: Maybe the triple-transposition formulation (when x is dense)