Correcting alignment

1d2b3245 · Vivek Kulkarni · 36c30634 · 1d2b3245 · 1d2b3245 · 1d2b3245
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -2516,11 +2516,11 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
     		unsigned int num_threads_per_block = std::min(numcolsX, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK);
     		unsigned int num_blocks = std::min(size ,(unsigned int)NUM_VECTOR_OP_BLOCKS);

-    		dim3 n_blocks(num_blocks);
+     		dim3 n_blocks(num_blocks);
     		dim3 n_threads(num_threads_per_block);
     		long *d_indices_arr = NULL;

-		PyArrayObject *cpu_indices_arr = PyArray_GETCONTIGUOUS(indices_arr);
+    		PyArrayObject *cpu_indices_arr = PyArray_GETCONTIGUOUS(indices_arr);

    		d_indices_arr = (long *)device_malloc(PyArray_NBYTES(cpu_indices_arr));
     		assert(d_indices_arr);
@@ -2530,7 +2530,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
                                             PyArray_NBYTES(cpu_indices_arr),
                                             cudaMemcpyHostToDevice);

-		assert(err == cudaSuccess);
+    		assert(err == cudaSuccess);

     		k_vector_add_fast<<<n_blocks, n_threads>>>(shapeX[0],
                                                           shapeX[1],

--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
@@ -1776,7 +1776,6 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
    return 0;
 }

-
 /*
 * We need this inplace Add to support IncSubTensor
 * It returns py_self on success with an additional reference. Else NULL.

--- a/theano/sandbox/cuda/cuda_ndarray.cuh
+++ b/theano/sandbox/cuda/cuda_ndarray.cuh
@@ -490,7 +490,6 @@ DllExport PyObject * CudaNdarray_View(const CudaNdarray * self);
 DllExport PyObject * CudaNdarray_inplace_add(PyObject* py_self, PyObject * py_other);
 DllExport PyObject * CudaNdarray_Subscript(PyObject * py_self, PyObject * key);
 DllExport int CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t fct_nb);
-DllExport void CudaNdarray_vector_add_fast(CudaNdarray* py_self, CudaNdarray *py_other, PyArrayObject *indices_arr);

 // Ensures that *arr is a pointer to a contiguous ndarray of the specified
 // dimensions.

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1353,6 +1353,7 @@ def local_gpualloc_memset_0(node):
            new_out = GpuAlloc(memset_0=True)(*node.inputs)
            return [new_out]

+
 def safe_to_gpu(x):
    if (isinstance(x.type, tensor.TensorType) and
        x.type.dtype == 'float32'):