Replace tabs with 4 spaces in GpuConvTransp3D.py.

fd8541cb · David Warde-Farley · 703fb32e · fd8541cb
--- a/theano/sandbox/cuda/GpuConvTransp3D.py
+++ b/theano/sandbox/cuda/GpuConvTransp3D.py
@@ -15,10 +15,10 @@ from theano.sandbox.cuda import CudaNdarrayType, HostFromGpu, host_from_gpu
 class GpuConvTransp3D(theano.Op):
    """ The gpu version of ConvTransp3D """
    def __eq__(self,other):
-	    return type(self) == type(other)
+        return type(self) == type(other)

    def __hash__(self):
-	    return hash(type(self))
+        return hash(type(self))

    def make_node(self, W, b, d, H, RShape = None):
        W_ = as_cuda_ndarray_variable(W)
@@ -41,153 +41,153 @@ class GpuConvTransp3D(theano.Op):


    def perform_(self, node, inputs, output_storage):
-		W, b, d, H, RShape = inputs
-		print "\t\t\t\tGpuConvTransp3D python code still uses old format"
-		output_storage[0][0] = computeR(W,b,d,H,RShape)
+        W, b, d, H, RShape = inputs
+        print "\t\t\t\tGpuConvTransp3D python code still uses old format"
+        output_storage[0][0] = computeR(W,b,d,H,RShape)

    def c_code_cache_version(self):
        return ()

    def c_code(self, node, nodename, (W, b, d, H, RShape), outputs, sub):
-		fail = sub['fail']
+        fail = sub['fail']

-		R = outputs[0]
+        R = outputs[0]

-		codeSource =  """
-			///////////// < code generated by GpuConvTransp3D >
+        codeSource =  """
+            ///////////// < code generated by GpuConvTransp3D >

-			//printf("\t\t\t\tGpuConvTransp c code\\n");
+            //printf("\t\t\t\tGpuConvTransp c code\\n");

-			//Check dimensionality of inputs
-			if (%(H)s->nd != 5)
-			{
+            //Check dimensionality of inputs
+            if (%(H)s->nd != 5)
+            {
                PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: H must be a 5-D tensor but it is %%i-D",%(H)s->nd);
                %(fail)s
-			}
+            }
+
+            if (%(W)s->nd != 5)
+            {
+                PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: W must be a 5-D tensor");
+                %(fail)s
+            }

-			if (%(W)s->nd != 5)
-			{
-				PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: W must be a 5-D tensor");
+            if (%(b)s->nd != 1)
+            {
+                PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: b must be a vector");
                %(fail)s
-			}
+            }

-			if (%(b)s->nd != 1)
-			{
-				PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: b must be a vector");
+            if (%(d)s->nd != 1)
+            {
+                PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: d must be a vector");
                %(fail)s
-			}
+            }

-			if (%(d)s->nd != 1)
-			{
-				PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: d must be a vector");
+            //Read and check stride arguments
+            if (%(d)s->dimensions[0] != 3)
+            {
+                PyErr_Format(PyExc_ValueError,"GpuConvTransp3D: 3 stride length arguments expected (for row, col, and time) but %%li were given", %(d)s->dimensions[0]);
                %(fail)s
-			}
-
-			//Read and check stride arguments
-			if (%(d)s->dimensions[0] != 3)
-			{
-				PyErr_Format(PyExc_ValueError,"GpuConvTransp3D: 3 stride length arguments expected (for row, col, and time) but %%li were given", %(d)s->dimensions[0]);
-				%(fail)s
-			}
+            }
 { // for fail
-			const int dr = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,0);
-			const int dc = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,1);
-			const int dt = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,2);
-			if (dr <= 0 || dc <= 0 || dt <= 0)
-			{
-				PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: Strides must all be positive but are %%i, %%i, %%i",dr,dc,dt);
-			    %(fail)s
-			}
+            const int dr = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,0);
+            const int dc = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,1);
+            const int dt = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,2);
+            if (dr <= 0 || dc <= 0 || dt <= 0)
+            {
+                PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: Strides must all be positive but are %%i, %%i, %%i",dr,dc,dt);
+                %(fail)s
+            }


-			//Read and check sizes of inputs
+            //Read and check sizes of inputs

 { // for fail
-			const int batchSize = CudaNdarray_HOST_DIMS(%(H)s)[0];
-			const int outputChannels =  CudaNdarray_HOST_DIMS(%(W)s)[0];
+            const int batchSize = CudaNdarray_HOST_DIMS(%(H)s)[0];
+            const int outputChannels =  CudaNdarray_HOST_DIMS(%(W)s)[0];

-			if (CudaNdarray_HOST_DIMS(%(H)s)[4] != outputChannels)
-			{
+            if (CudaNdarray_HOST_DIMS(%(H)s)[4] != outputChannels)
+            {
                PyErr_Format(PyExc_ValueError, "W produces a %%i channel image but the image has %%i channels. W.shape: (%%i, %%i, %%i,%%i, %%i) H.shape: (%%i, %%i, %%i, %%i, %%i)",outputChannels,CudaNdarray_HOST_DIMS(%(H)s)[4], CudaNdarray_HOST_DIMS(%(W)s)[0], CudaNdarray_HOST_DIMS(%(W)s)[1], CudaNdarray_HOST_DIMS(%(W)s)[2], CudaNdarray_HOST_DIMS(%(W)s)[3], CudaNdarray_HOST_DIMS(%(W)s)[4], CudaNdarray_HOST_DIMS(%(H)s)[0], CudaNdarray_HOST_DIMS(%(H)s)[1], CudaNdarray_HOST_DIMS(%(H)s)[2], CudaNdarray_HOST_DIMS(%(H)s)[3], CudaNdarray_HOST_DIMS(%(H)s)[4]);
                %(fail)s
-			}
+            }
 { // for fail

-			const int inputChannels = CudaNdarray_HOST_DIMS(%(W)s)[4];
+            const int inputChannels = CudaNdarray_HOST_DIMS(%(W)s)[4];

-			if (CudaNdarray_HOST_DIMS(%(b)s)[0] != inputChannels)
-			{
+            if (CudaNdarray_HOST_DIMS(%(b)s)[0] != inputChannels)
+            {
                PyErr_Format(PyExc_ValueError, "ConvTransp3D: b operates on a %%i channel image but the image has %%i channels", CudaNdarray_HOST_DIMS(%(b)s)[0], inputChannels );
-				%(fail)s
-			}
+                %(fail)s
+            }
 { // for fail

-			const int filterHeight = CudaNdarray_HOST_DIMS(%(W)s)[1];
-			const int filterWidth = CudaNdarray_HOST_DIMS(%(W)s)[2];
-			const int filterDur = CudaNdarray_HOST_DIMS(%(W)s)[3];
-			const int outputHeight = CudaNdarray_HOST_DIMS(%(H)s)[1];
-			const int outputWidth = CudaNdarray_HOST_DIMS(%(H)s)[2];
-			const int outputDur = CudaNdarray_HOST_DIMS(%(H)s)[3];
+            const int filterHeight = CudaNdarray_HOST_DIMS(%(W)s)[1];
+            const int filterWidth = CudaNdarray_HOST_DIMS(%(W)s)[2];
+            const int filterDur = CudaNdarray_HOST_DIMS(%(W)s)[3];
+            const int outputHeight = CudaNdarray_HOST_DIMS(%(H)s)[1];
+            const int outputWidth = CudaNdarray_HOST_DIMS(%(H)s)[2];
+            const int outputDur = CudaNdarray_HOST_DIMS(%(H)s)[3];

-			int videoHeight = (outputHeight-1) * dr + filterHeight;
-			int videoWidth = (outputWidth-1) * dc + filterWidth;
-			int videoDur = (outputDur-1) * dt + filterDur;
+            int videoHeight = (outputHeight-1) * dr + filterHeight;
+            int videoWidth = (outputWidth-1) * dc + filterWidth;
+            int videoDur = (outputDur-1) * dt + filterDur;


-			if (%(RShape)s)
-			{
-				if (%(RShape)s->nd != 1)
-				{
-					PyErr_Format(PyExc_ValueError, "RShape must be a vector");
-				    %(fail)s
-				}
+            if (%(RShape)s)
+            {
+                if (%(RShape)s->nd != 1)
+                {
+                    PyErr_Format(PyExc_ValueError, "RShape must be a vector");
+                    %(fail)s
+                }

-				if (%(RShape)s->dimensions[0] != 3)
-				{
+                if (%(RShape)s->dimensions[0] != 3)
+                {
                    PyErr_Format(PyExc_ValueError, "RShape must specify a 3D shape ( [height,width,duration] )");
                    %(fail)s
-				}
+                }
 { // for fail

                                dtype_%(RShape)s RShape0 = *(dtype_%(RShape)s*)PyArray_GETPTR1(%(RShape)s,0);
                                dtype_%(RShape)s RShape1 = *(dtype_%(RShape)s*)PyArray_GETPTR1(%(RShape)s,1);
                                dtype_%(RShape)s RShape2 = *(dtype_%(RShape)s*)PyArray_GETPTR1(%(RShape)s,2);

-				if (RShape0 != -1)
-				{
-					if (RShape0 < videoHeight || RShape1 < videoWidth || RShape2 < videoDur)
-					{
-						PyErr_Format(PyExc_ValueError, "Reconstruction must have shape of at least [%%i,%%i,%%i] but RShape argument requests that it be [%%i,%%i,%%i]" , videoHeight, videoWidth, videoDur, RShape0, RShape 1, RShape2 );
-					    %(fail)s
-					}
-
-					videoHeight = RShape0;
-					videoWidth = RShape1;
-					videoDur = RShape2;
-				}
-			}
-
-			//Allocate the reconstruction
-			npy_intp dims[5];
-			dims[0] = batchSize;
-			dims[4] = inputChannels;
-			dims[1] = videoHeight;
-			dims[2] = videoWidth;
-			dims[3] = videoDur;
+                if (RShape0 != -1)
+                {
+                    if (RShape0 < videoHeight || RShape1 < videoWidth || RShape2 < videoDur)
+                    {
+                        PyErr_Format(PyExc_ValueError, "Reconstruction must have shape of at least [%%i,%%i,%%i] but RShape argument requests that it be [%%i,%%i,%%i]" , videoHeight, videoWidth, videoDur, RShape0, RShape 1, RShape2 );
+                        %(fail)s
+                    }
+
+                    videoHeight = RShape0;
+                    videoWidth = RShape1;
+                    videoDur = RShape2;
+                }
+            }
+
+            //Allocate the reconstruction
+            npy_intp dims[5];
+            dims[0] = batchSize;
+            dims[4] = inputChannels;
+            dims[1] = videoHeight;
+            dims[2] = videoWidth;
+            dims[3] = videoDur;

                        if(!(%(R)s) || CudaNdarray_HOST_DIMS(%(R)s)[0]!=dims[0] ||
                        CudaNdarray_HOST_DIMS(%(R)s)[1]!=dims[1] ||
                        CudaNdarray_HOST_DIMS(%(R)s)[2]!=dims[2] ||
                        CudaNdarray_HOST_DIMS(%(R)s)[3]!=dims[3] ||
                        CudaNdarray_HOST_DIMS(%(R)s)[4]!=dims[4]){
-		            Py_XDECREF(%(R)s);
-			   %(R)s = (CudaNdarray*)CudaNdarray_NewDims(5,dims);
-			    if (!(%(R)s)) {
-				    PyErr_Format(PyExc_MemoryError,"Could not allocate R");
-				    %(fail)s;
-			    }
+                    Py_XDECREF(%(R)s);
+               %(R)s = (CudaNdarray*)CudaNdarray_NewDims(5,dims);
+                if (!(%(R)s)) {
+                    PyErr_Format(PyExc_MemoryError,"Could not allocate R");
+                    %(fail)s;
+                }
                        }
-			cudaMemset(%(R)s->devdata, 0, 4 * batchSize * inputChannels * videoHeight * videoWidth * videoDur);
+            cudaMemset(%(R)s->devdata, 0, 4 * batchSize * inputChannels * videoHeight * videoWidth * videoDur);

 { // for fail

@@ -235,7 +235,7 @@ HERE
        if (cudaSuccess == sts)
        {
            work_complete = true;
-	    if (verbose>1) printf("threads.x=%%i, threads.y=%%i, grid.x=%%i, grid.y=%%i, shared_size=%%i, nb_threads=%%i\\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y);
+        if (verbose>1) printf("threads.x=%%i, threads.y=%%i, grid.x=%%i, grid.y=%%i, shared_size=%%i, nb_threads=%%i\\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y);
            if (verbose) printf("INFO: used 'conv_transp_rows_stack' version\\n");
        }
        else
@@ -259,9 +259,9 @@ if(!work_complete){


 }}}}}} // for fail
-			///////////// < /code generated by GpuConvTransp3D >
-		"""
-		return strutil.renderString(codeSource,locals())
+            ///////////// < /code generated by GpuConvTransp3D >
+        """
+        return strutil.renderString(codeSource,locals())

    def c_support_code_apply(self, node, nodename):
        # This code is not sensitive to the ignore_border flag.
@@ -275,15 +275,15 @@ __global__ void


 conv_transp_rows_stack( float* H, float* kern, float* bias, float* R,
-		 int img_len, int img_wid, int img_dur,
+         int img_len, int img_wid, int img_dur,
                 int kern_len, int kern_wid, int kern_dur,
                 int H_len, int H_wid, int H_dur,
-		 int nkern, int nstack,
+         int nkern, int nstack,
                 int dr, int dc, int dt,
                 int H_stride_frame, int H_stride_col, int H_stride_row,
-		 int H_stride_stack, int H_stride_batch,
+         int H_stride_stack, int H_stride_batch,
                 int kern_stride_frame, int kern_stride_col, int kern_stride_row,
-		 int kern_stride_stack, int kern_stride_nkern,
+         int kern_stride_stack, int kern_stride_nkern,
                 int bias_stride)
 {
    int __shared__ batch_id, stack_id;
@@ -306,30 +306,30 @@ conv_transp_rows_stack( float* H, float* kern, float* bias, float* R,

    float sum = 0;
    while(rc < H_len){
-	int rk = r - rc * dr;
-    	if(rk < 0)
-    		break;
-    	int cc = fcc;
-    	while( cc < H_wid){
-    		int ck = c - cc * dc;
-    		if(ck < 0)
-    			break;
-    		int tc = ftc;
-    		while(tc < H_dur){
-    			int tk = t - tc * dt;
-    			if(tk < 0)
-    				break;
-    			//R[i,j,r,c,t] += numpy.dot(W[:,j,rk,ck,tk], H[i,:,rc,cc,tc] )
+    int rk = r - rc * dr;
+        if(rk < 0)
+            break;
+        int cc = fcc;
+        while( cc < H_wid){
+            int ck = c - cc * dc;
+            if(ck < 0)
+                break;
+            int tc = ftc;
+            while(tc < H_dur){
+                int tk = t - tc * dt;
+                if(tk < 0)
+                    break;
+                //R[i,j,r,c,t] += numpy.dot(W[:,j,rk,ck,tk], H[i,:,rc,cc,tc] )
                        for(int q=0;q<nkern;q++){
                          sum += kern[q*kern_stride_nkern+stack_id*kern_stride_stack+rk*kern_stride_row+ck*kern_stride_col+tk*kern_stride_frame]*
                                 H[batch_id*H_stride_batch+q*H_stride_stack+rc*H_stride_row+cc*H_stride_col+tc*H_stride_frame];
                        }

-    			tc += 1;
+                tc += 1;
                }
-    		cc += 1;
+            cc += 1;
        }
-    	rc += 1;
+        rc += 1;
    }
    R[batch_id*nstack*img_len*img_wid*img_dur+//the good batch
      stack_id+//the output image
@@ -357,89 +357,89 @@ def local_gpu_conv_transpd(node):

 #If the input size wasn't a multiple of D we may need to cause some automatic padding to get the right size of reconstruction
 def computeR(W,b,d,H,Rshape = None):
-		assert len(W.shape) == 5
-		assert len(H.shape) == 5
-		assert len(b.shape) == 1
-		assert len(d) == 3
-
-
-		outputChannels, inputChannels, filterHeight, filterWidth, filterDur = W.shape
-		batchSize, outputChannelsAgain, outputHeight, outputWidth, outputDur = H.shape
-		assert outputChannelsAgain == outputChannels
-		assert b.shape[0] == inputChannels
-
-		dr,dc,dt = d
-		assert dr > 0
-		assert dc > 0
-		assert dt > 0
-
-		videoHeight = (outputHeight-1) * dr + filterHeight
-		videoWidth = (outputWidth-1) * dc + filterWidth
-		videoDur = (outputDur-1) * dt + filterDur
-
-		if Rshape != None and Rshape[0] != -1:
-			if Rshape[0] < videoHeight:
-				print (Rshape[0], videoHeight)
-				assert False
-			assert Rshape[1] >= videoWidth
-			assert Rshape[2] >= videoDur
-
-			#print "setting video size to Rshape = "+str(Rshape)
-
-			videoHeight, videoWidth, videoDur = Rshape
-		#else:
-		#	print "No Rshape passed in"
-
-		#print "video size: "+str((videoHeight, videoWidth, videoDur))
-
-		R =  numpy.zeros( (batchSize, inputChannels, videoHeight,
-			videoWidth, videoDur ) , dtype=H.dtype)
-
-		#R[i,j,r,c,t] = b_j + sum_{rc,rk | d \circ rc + rk = r} sum_{cc,ck | ...} sum_{tc,tk | ...} sum_k W[k, j, rk, ck, tk] * H[i,k,rc,cc,tc]
-		for i in xrange(0,batchSize):
-			#print '\texample '+str(i+1)+'/'+str(batchSize)
-			for j in xrange(0,inputChannels):
-				#print '\t\tfeature map '+str(j+1)+'/'+str(inputChannels)
-				for r in xrange(0,videoHeight):
-					#print '\t\t\trow '+str(r+1)+'/'+str(videoHeight)
-					for c in xrange(0,videoWidth):
-						for t in xrange(0,videoDur):
-							R[i,j,r,c,t] = b[j]
-
-							ftc = max([0, int(numpy.ceil(float(t-filterDur +1  )/float(dt))) ])
-							fcc = max([0, int(numpy.ceil(float(c-filterWidth +1)/float(dc))) ])
-
-							rc =  max([0, int(numpy.ceil(float(r-filterHeight+1)/float(dr))) ])
-							while rc < outputHeight:
-								rk = r - rc * dr
-								if rk < 0:
-									break
-
-								cc = fcc
-								while cc < outputWidth:
-									ck = c - cc * dc
-									if ck < 0:
-										break
-
-									tc = ftc
-									while tc < outputDur:
-										tk = t - tc * dt
-										if tk < 0:
-											break
-
-										R[i,j,r,c,t] += numpy.dot(W[:,j,rk,ck,tk], H[i,:,rc,cc,tc] )
-
-										tc += 1
-									"" #close loop over tc
-									cc += 1
-								"" #close loop over cc
-
-								rc += 1
-							"" #close loop over rc
-						"" #close loop over t
-					"" #close loop over c
-				"" #close loop over r
-			"" #close loop over j
-		"" #close loop over i
-
-		return R
+        assert len(W.shape) == 5
+        assert len(H.shape) == 5
+        assert len(b.shape) == 1
+        assert len(d) == 3
+
+
+        outputChannels, inputChannels, filterHeight, filterWidth, filterDur = W.shape
+        batchSize, outputChannelsAgain, outputHeight, outputWidth, outputDur = H.shape
+        assert outputChannelsAgain == outputChannels
+        assert b.shape[0] == inputChannels
+
+        dr,dc,dt = d
+        assert dr > 0
+        assert dc > 0
+        assert dt > 0
+
+        videoHeight = (outputHeight-1) * dr + filterHeight
+        videoWidth = (outputWidth-1) * dc + filterWidth
+        videoDur = (outputDur-1) * dt + filterDur
+
+        if Rshape != None and Rshape[0] != -1:
+            if Rshape[0] < videoHeight:
+                print (Rshape[0], videoHeight)
+                assert False
+            assert Rshape[1] >= videoWidth
+            assert Rshape[2] >= videoDur
+
+            #print "setting video size to Rshape = "+str(Rshape)
+
+            videoHeight, videoWidth, videoDur = Rshape
+        #else:
+        #    print "No Rshape passed in"
+
+        #print "video size: "+str((videoHeight, videoWidth, videoDur))
+
+        R =  numpy.zeros( (batchSize, inputChannels, videoHeight,
+            videoWidth, videoDur ) , dtype=H.dtype)
+
+        #R[i,j,r,c,t] = b_j + sum_{rc,rk | d \circ rc + rk = r} sum_{cc,ck | ...} sum_{tc,tk | ...} sum_k W[k, j, rk, ck, tk] * H[i,k,rc,cc,tc]
+        for i in xrange(0,batchSize):
+            #print '\texample '+str(i+1)+'/'+str(batchSize)
+            for j in xrange(0,inputChannels):
+                #print '\t\tfeature map '+str(j+1)+'/'+str(inputChannels)
+                for r in xrange(0,videoHeight):
+                    #print '\t\t\trow '+str(r+1)+'/'+str(videoHeight)
+                    for c in xrange(0,videoWidth):
+                        for t in xrange(0,videoDur):
+                            R[i,j,r,c,t] = b[j]
+
+                            ftc = max([0, int(numpy.ceil(float(t-filterDur +1  )/float(dt))) ])
+                            fcc = max([0, int(numpy.ceil(float(c-filterWidth +1)/float(dc))) ])
+
+                            rc =  max([0, int(numpy.ceil(float(r-filterHeight+1)/float(dr))) ])
+                            while rc < outputHeight:
+                                rk = r - rc * dr
+                                if rk < 0:
+                                    break
+
+                                cc = fcc
+                                while cc < outputWidth:
+                                    ck = c - cc * dc
+                                    if ck < 0:
+                                        break
+
+                                    tc = ftc
+                                    while tc < outputDur:
+                                        tk = t - tc * dt
+                                        if tk < 0:
+                                            break
+
+                                        R[i,j,r,c,t] += numpy.dot(W[:,j,rk,ck,tk], H[i,:,rc,cc,tc] )
+
+                                        tc += 1
+                                    "" #close loop over tc
+                                    cc += 1
+                                "" #close loop over cc
+
+                                rc += 1
+                            "" #close loop over rc
+                        "" #close loop over t
+                    "" #close loop over c
+                "" #close loop over r
+            "" #close loop over j
+        "" #close loop over i
+
+        return R