pep8

015b42a4 · Frederic · c3a461f7 · 015b42a4 · 015b42a4 · 015b42a4
--- a/theano/sandbox/cuda/GpuConv3D.py
+++ b/theano/sandbox/cuda/GpuConv3D.py
@@ -3,12 +3,14 @@ import numpy
 import theano
 import theano.tensor as T
 from theano.gof import local_optimizer
-from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable, host_from_gpu, HostFromGpu
+from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
+                                           host_from_gpu, HostFromGpu)
 from theano.misc import strutil
 from theano.tensor.nnet.Conv3D import Conv3D
 from theano.sandbox.cuda.opt import register_opt
 from theano.sandbox.cuda import CudaNdarrayType, GpuOp

+
 class GpuConv3D(GpuOp):
    """ GPU implementation of Conv3D """

@@ -32,19 +34,21 @@ class GpuConv3D(GpuOp):
        W_ = as_cuda_ndarray_variable(W)
        b_ = as_cuda_ndarray_variable(b)
        d_ = T.as_tensor_variable(d)
-
+        broad = (V_.broadcastable[0], W_.broadcastable[0], False, False, False)
        return theano.Apply(self, inputs=[V_, W_, b_, d_],
-                            outputs = [ CudaNdarrayType(dtype=V_.dtype, broadcastable=(V_.broadcastable[0],W_.broadcastable[0],False,False,False))() ] )
+                            outputs=[CudaNdarrayType(dtype=V_.dtype,
+                                                     broadcastable=broad)()])

    def c_code_cache_version(self):
        return ()
+
    def c_code(self, node, nodename, inputs, outputs, sub):
        V, W, b, d = inputs
        fail = sub['fail']

        H = outputs[0]

-        codeSource =  """
+        codeSource = """
                        ///////////// < code generated by GpuConv3D >

                        //printf("\t\t\t\tConv3DGPU c code\\n");
@@ -220,13 +224,13 @@ if(!work_complete){
 }}}}}}} //extra scope so error handler jumps don't cross declarations
                        ///////////// < /code generated by GpuConv3D >
        """
-        return strutil.render_string(codeSource,locals())
+        return strutil.render_string(codeSource, locals())

    def c_support_code_apply(self, node, nodename):
        # This code is not sensitive to the ignore_border flag.
        # It runs for every position in the output z, and then computes the gradient for the
        # input pixels that were downsampled to that z-position.
-        codeSource =  """
+        codeSource = """
 __global__ void
 //thread block size = out_dur
 //grid block size =(out_len*out_wid, nb kern *nb batch)
@@ -283,11 +287,16 @@ conv_rows_stack( float* img, float* kern, float* bias, float* out,

 gpu_convd = GpuConv3D()

+
 @register_opt()
 @local_optimizer([Conv3D])
 def local_gpu_conv3d(node):
    if isinstance(node.op, Conv3D):
-        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]):
+        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
+                      for i in node.inputs]):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                V, W, b, d = node.inputs
-                return [host_from_gpu(gpu_convd(as_cuda_ndarray_variable(V),as_cuda_ndarray_variable(W), as_cuda_ndarray_variable(b), d))]
+                return [host_from_gpu(gpu_convd(as_cuda_ndarray_variable(V),
+                                                as_cuda_ndarray_variable(W),
+                                                as_cuda_ndarray_variable(b),
+                                                d))]
--- a/theano/sandbox/cuda/GpuConvGrad3D.py
+++ b/theano/sandbox/cuda/GpuConvGrad3D.py
@@ -12,7 +12,6 @@ from theano.sandbox.cuda import (CudaNdarrayType, HostFromGpu,
                                 host_from_gpu, GpuOp)


-
 class GpuConvGrad3D(GpuOp):
    """ GPU version of gradient of ConvGrad3D with respect to W """

@@ -27,9 +26,10 @@ class GpuConvGrad3D(GpuOp):
        d_ = T.as_tensor_variable(d)
        WShape_ = T.as_tensor_variable(WShape)
        dCdH_ = as_cuda_ndarray_variable(dCdH)
-
+        broad = (False,)*5
        return theano.Apply(self, inputs=[V_, d_, WShape_, dCdH_],
-                            outputs = [ CudaNdarrayType(dtype=V_.dtype, broadcastable=(False,)*5)()])
+                            outputs=[CudaNdarrayType(dtype=V_.dtype,
+                                                     broadcastable=broad)()])

    def perform_(self, node, inputs, output_storage):
        V, d, WShape, dCdH = inputs
@@ -51,18 +51,18 @@ class GpuConvGrad3D(GpuOp):

        dCdW = numpy.zeros(WShape, dtype=V.dtype)

-        #block
-        for j in xrange(0,WShape[0]):
-            for z in xrange(0,WShape[1]):
-                for k in xrange(0,WShape[2]):
-                    for l in xrange(0,WShape[3]):
-                        #threads
-                        for m in xrange(0,WShape[4]):
-                            #thread
-                            for i in xrange(0,batchSize):
-                                for p in xrange(0,outputHeight):
-                                    for q in xrange(0,outputWidth):
-                                        for r in xrange(0,outputDur):
+        # block
+        for j in xrange(0, WShape[0]):
+            for z in xrange(0, WShape[1]):
+                for k in xrange(0, WShape[2]):
+                    for l in xrange(0, WShape[3]):
+                        # threads
+                        for m in xrange(0, WShape[4]):
+                            # thread
+                            for i in xrange(0, batchSize):
+                                for p in xrange(0, outputHeight):
+                                    for q in xrange(0, outputWidth):
+                                        for r in xrange(0, outputDur):
                                            dCdW[j,z,k,l,m] += dCdH[i,j,p,q,r] * V[i,z,dr*p+k,dc*q+l,dt*r+m]

        output_storage[0][0] = dCdW
@@ -340,11 +340,17 @@ convgrad_rows_stack( float* img, float* dCdH, float* dCdW,

 gpu_conv_grad3d = GpuConvGrad3D()

+
 @register_opt()
 @local_optimizer([ConvGrad3D])
 def local_gpu_conv_gradd(node):
    if isinstance(node.op, ConvGrad3D):
-        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]):
+        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
+                      for i in node.inputs]):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                V, d, WShape, dCdH = node.inputs
-                return [host_from_gpu(gpu_conv_grad3d(as_cuda_ndarray_variable(V),d, WShape, as_cuda_ndarray_variable(dCdH)))]
+                return [host_from_gpu(gpu_conv_grad3d(
+                    as_cuda_ndarray_variable(V),
+                    d,
+                    WShape,
+                    as_cuda_ndarray_variable(dCdH)))]
--- a/theano/sandbox/cuda/GpuConvTransp3D.py
+++ b/theano/sandbox/cuda/GpuConvTransp3D.py
@@ -15,13 +15,13 @@ from theano.sandbox.cuda import (CudaNdarrayType, HostFromGpu,

 class GpuConvTransp3D(GpuOp):
    """ The gpu version of ConvTransp3D """
-    def __eq__(self,other):
+    def __eq__(self, other):
        return type(self) == type(other)

    def __hash__(self):
        return hash(type(self))

-    def make_node(self, W, b, d, H, RShape = None):
+    def make_node(self, W, b, d, H, RShape=None):
        W_ = as_cuda_ndarray_variable(W)
        b_ = as_cuda_ndarray_variable(b)
        d_ = T.as_tensor_variable(d)
@@ -29,22 +29,21 @@ class GpuConvTransp3D(GpuOp):
        if RShape:
            RShape_ = T.as_tensor_variable(RShape)
        else:
-            RShape_ = T.as_tensor_variable([-1,-1,-1])
+            RShape_ = T.as_tensor_variable([-1, -1, -1])

-        return theano.Apply(self, inputs=[W_,b_,d_,H_, RShape_],
-                            outputs = [CudaNdarrayType(dtype=H_.dtype,
-                                                       broadcastable=(False,)*5)()])
+        return theano.Apply(self, inputs=[W_, b_, d_, H_, RShape_],
+                            outputs=[CudaNdarrayType(dtype=H_.dtype,
+                                                     broadcastable=(False,)*5)()])

    def infer_shape(self, node, input_shapes):
-        W,b,d,H,RShape = node.inputs
+        W, b, d, H, RShape = node.inputs
        W_shape, b_shape, d_shape, H_shape, RShape_shape = input_shapes
        return [(H_shape[0], W_shape[1], RShape[0], RShape[1], RShape[2])]

-
    def perform_(self, node, inputs, output_storage):
        W, b, d, H, RShape = inputs
        print "\t\t\t\tGpuConvTransp3D python code still uses old format"
-        output_storage[0][0] = computeR(W,b,d,H,RShape)
+        output_storage[0][0] = computeR(W, b, d, H, RShape)

    def c_code_cache_version(self):
        return ()
@@ -55,7 +54,7 @@ class GpuConvTransp3D(GpuOp):

        R = outputs[0]

-        codeSource =  """
+        codeSource = """
            ///////////// < code generated by GpuConvTransp3D >

            //printf("\t\t\t\tGpuConvTransp c code\\n");
@@ -263,13 +262,13 @@ if(!work_complete){
 }}}}}} // for fail
            ///////////// < /code generated by GpuConvTransp3D >
        """
-        return strutil.render_string(codeSource,locals())
+        return strutil.render_string(codeSource, locals())

    def c_support_code_apply(self, node, nodename):
        # This code is not sensitive to the ignore_border flag.
        # It runs for every position in the output z, and then computes the gradient for the
        # input pixels that were downsampled to that z-position.
-        codeSource =  """
+        codeSource = """
 __global__ void
 //thread block size = videoDur
 //grid block size =(batchSize * inputChannels, videoHeight * videoWidth)
@@ -347,18 +346,20 @@ conv_transp_rows_stack( float* H, float* kern, float* bias, float* R,

 gpu_conv_transpd = GpuConvTransp3D()

+
 @register_opt()
 @local_optimizer([ConvTransp3D])
 def local_gpu_conv_transpd(node):
    if isinstance(node.op, ConvTransp3D):
-        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]):
+        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
+                      for i in node.inputs]):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                W, b, d, H, RShape = node.inputs
                return [host_from_gpu(gpu_conv_transpd(W, b, d, H, RShape))]


 #If the input size wasn't a multiple of D we may need to cause some automatic padding to get the right size of reconstruction
-def computeR(W,b,d,H,Rshape = None):
+def computeR(W, b, d, H, Rshape=None):
        assert len(W.shape) == 5
        assert len(H.shape) == 5
        assert len(b.shape) == 1
@@ -370,7 +371,7 @@ def computeR(W,b,d,H,Rshape = None):
        assert outputChannelsAgain == outputChannels
        assert b.shape[0] == inputChannels

-        dr,dc,dt = d
+        dr, dc, dt = d
        assert dr > 0
        assert dc > 0
        assert dt > 0
@@ -398,14 +399,14 @@ def computeR(W,b,d,H,Rshape = None):
            videoWidth, videoDur ) , dtype=H.dtype)

        #R[i,j,r,c,t] = b_j + sum_{rc,rk | d \circ rc + rk = r} sum_{cc,ck | ...} sum_{tc,tk | ...} sum_k W[k, j, rk, ck, tk] * H[i,k,rc,cc,tc]
-        for i in xrange(0,batchSize):
+        for i in xrange(0, batchSize):
            #print '\texample '+str(i+1)+'/'+str(batchSize)
-            for j in xrange(0,inputChannels):
+            for j in xrange(0, inputChannels):
                #print '\t\tfeature map '+str(j+1)+'/'+str(inputChannels)
-                for r in xrange(0,videoHeight):
+                for r in xrange(0, videoHeight):
                    #print '\t\t\trow '+str(r+1)+'/'+str(videoHeight)
-                    for c in xrange(0,videoWidth):
-                        for t in xrange(0,videoDur):
+                    for c in xrange(0, videoWidth):
+                        for t in xrange(0, videoDur):
                            R[i,j,r,c,t] = b[j]

                            ftc = max([0, int(numpy.ceil(float(t-filterDur +1  )/float(dt))) ])
@@ -432,16 +433,16 @@ def computeR(W,b,d,H,Rshape = None):
                                        R[i,j,r,c,t] += numpy.dot(W[:,j,rk,ck,tk], H[i,:,rc,cc,tc] )

                                        tc += 1
-                                    "" #close loop over tc
+                                    ""  # close loop over tc
                                    cc += 1
-                                "" #close loop over cc
+                                ""  # close loop over cc

                                rc += 1
-                            "" #close loop over rc
-                        "" #close loop over t
-                    "" #close loop over c
-                "" #close loop over r
-            "" #close loop over j
-        "" #close loop over i
+                            ""  # close loop over rc
+                        ""  # close loop over t
+                    ""  # close loop over c
+                ""  # close loop over r
+            ""  # close loop over j
+        ""  # close loop over i

        return R