Merge pull request #871 from delallea/minor

Minor stuff

Merge pull request #871 from delallea/minor
554ba22f · nouiz · c6875ba4 · bdcec1ab · 554ba22f · 554ba22f
--- a/doc/tutorial/using_gpu.txt
+++ b/doc/tutorial/using_gpu.txt
@@ -232,7 +232,7 @@ Tips for improving performance on GPU
  taking more time than its share, then if you know something about GPU
  programming have a look at how it's implemented in theano.sandbox.cuda.
  Check the line like 'Spent Xs(X%) in cpu Op, Xs(X%) in gpu Op and Xs(X%) transfert Op'
-  that can tell you if not enought of your graph is on the gpu or if their
+  that can tell you if not enough of your graph is on the gpu or if their
  is too much memory transfert.



--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -64,7 +64,7 @@ def test_sum():
                           ((5,4,3,10,11),[1,2]),
                           ((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),

-                           #test shape bigger then 4096 on each dimension to make sure that we work correctly when we don't have enought thread/block in each dimensions
+                           #test shape bigger then 4096 on each dimension to make sure that we work correctly when we don't have enough thread/block in each dimensions
                           ((4100,3),[0]),((3,4101),[0]),#10
                           ((1024,33),[0]),((33,1024),[0]),#10
                           ((1025,33),[0]),((33,1025),[0]),#10
@@ -880,7 +880,7 @@ class T_subtensor(theano.tensor.tests.test_basic.T_subtensor):
                                 ((4, 4, 2, 3), [3, 3, 1, 1, 2, 2, 0, 0,
                                                 -1, -2, -3, -4], False),
                             ]:
-            # If there is not enought memory on the GPU, skip the test
+            # If there is not enough memory on the GPU, skip the test
            size_needed = numpy.prod(shape) * (4 + 1)
            if isinstance(theano.compile.get_default_mode(),
                          theano.compile.DebugMode):
@@ -905,7 +905,7 @@ class T_subtensor(theano.tensor.tests.test_basic.T_subtensor):

            # Test with input strided
            t = self.adv_sub1()(n[::-1], idx)
-            #DebugMode do a copy of the input, so we loose the strides.
+            #DebugMode does a copy of the input, so we lose the strides.
            if not isinstance(theano.compile.get_default_mode(),
                              theano.compile.DebugMode):
                t.owner.op.perform_using_take = fast

--- a/theano/sandbox/cuda/tests/test_blas.py
+++ b/theano/sandbox/cuda/tests/test_blas.py
@@ -310,7 +310,7 @@ def test_downsample():

                # The grad is too slow on GT220 GPU
                # This cause the computer to freeze...
-                # Remove this when it get optimized enought
+                # Remove this when it gets optimized enough
                # This only bypass the last 2 checks
                # Those tests where passing in all Mode on a GTX470
                if shp[0] > 30000 or shp[1] > 30000:

--- a/theano/sandbox/cuda/tests/test_nnet.py
+++ b/theano/sandbox/cuda/tests/test_nnet.py
@@ -46,7 +46,7 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    #we precompute the dot with big shape before to allow the test of
    #GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
    #(the launch timed out and was terminated) on GPU card not
-    #powerfull enought. We need the big shape to check for corner
+    #powerful enough. We need the big shape to check for corner
    #case.
    dot_result = T.fmatrix('dot_result')


--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -55,10 +55,10 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
    :type subsample: tuple of len 2
    :param subsample: factor by which to subsample the output

-    :type image_shape: tuple of len 4 of int or Contant variable
+    :type image_shape: tuple of len 4 of int or Constant variable
    :param image_shape: (batch size, stack size, nb row, nb col)
                        Optional, used for optimization.
-    :type filter_shape: tuple of len 4 of int or Contant variable
+    :type filter_shape: tuple of len 4 of int or Constant variable
    :param filter_shape: (nb filters, stack size, nb row, nb col)
                         Optional, used for optimization.

@@ -1744,15 +1744,15 @@ if (%(z)s->strides[3] != (npy_intp)sizeof(%(type)s)) %(fail)s;
 for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
  for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){

-"""%d
-    ret+=my_dup2("%(type)s * __restrict__ out%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(%(z)s,b+%(unroll_biter)s,n_kern+%(unroll_kiter)s));")
-    ret+=my_dup("for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out%(unroll_iter)s[i] = 0;",unroll_bsize*unroll_ksize)
-    ret+="""
+""" % d
+    ret += my_dup2("%(type)s * __restrict__ out%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(%(z)s,b+%(unroll_biter)s,n_kern+%(unroll_kiter)s));")
+    ret += my_dup("for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out%(unroll_iter)s[i] = 0;", unroll_bsize * unroll_ksize)
+    ret += """
    for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
-"""%d
-    ret+=my_dup("const %(type)s * __restrict__ in%(unroll_iter)d=(%(type)s *)(PyArray_GETPTR2(img2d,b+%(unroll_iter)s,stack_size));", unroll_bsize)
-    ret+=my_dup("const %(type)s * __restrict__ hvals%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(filtersflipped,n_kern+%(unroll_iter)s,stack_size));",unroll_ksize)
-    ret+="""
+""" % d
+    ret += my_dup("const %(type)s * __restrict__ in%(unroll_iter)d=(%(type)s *)(PyArray_GETPTR2(img2d,b+%(unroll_iter)s,stack_size));", unroll_bsize)
+    ret += my_dup("const %(type)s * __restrict__ hvals%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(filtersflipped,n_kern+%(unroll_iter)s,stack_size));", unroll_ksize)
+    ret += """

      int new_m;

@@ -1764,9 +1764,9 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){

        for (int iter_n=0; iter_n < Os[1]; iter_n++) {  // loop over columns
          int pos_n=iter_n*%(self_dy)s;
-        """%d
-    ret+=my_dup("%(type)s sum%(unroll_iter)s=0;", unroll_bsize*unroll_ksize)
-    ret+="""
+        """ % d
+    ret += my_dup("%(type)s sum%(unroll_iter)s=0;", unroll_bsize * unroll_ksize)
+    ret += """

          // Sum over kernel, if index into image is out of bounds
          // fill with the value
@@ -1774,15 +1774,15 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
            int ind0 = (new_m-j);

            if(mode==FULL){
-"""%d
-    ret+=my_dup("const %(type)s * idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker1];",unroll_ksize)
-    ret+="""
+""" % d
+    ret += my_dup("const %(type)s * idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker1];", unroll_ksize)
+    ret += """
              if(ind0 < 0 || ind0 >= dim_im[0]){
                if(fill_value!=0)
                  for (int k=0; k < dim_ker1; k++) {
-"""%d
-    ret+=my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
-    ret+="""
+""" % d
+    ret += my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
+    ret += """
                  }
              }else{
                //do the part where kernel is to the right of the img
@@ -1791,49 +1791,49 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
                if(fill_value!=0){

                  for(k=0;k<max_k;k++){
-"""%d
-    ret+=my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
-    ret+="""
+""" % d
+    ret += my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
+    ret += """
                  }
                }else {k=max_k;}

                //do the part where the kernel is on the img
                max_k=min(pos_n+1,(int)dim_ker1);
-"""%d
-    ret+=my_dup("const %(type)s * idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
-    ret+="""
+""" % d
+    ret += my_dup("const %(type)s * idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
+    ret += """
                for (int ind1=pos_n-k; k<max_k; k++,ind1--) {

-"""%d
-    ret+=my_dup2("sum%(unroll_iter)s+= idx_hvals%(unroll_kiter)s[k] * idx_in%(unroll_biter)s[ind1];")
-    ret+="""
+""" % d
+    ret += my_dup2("sum%(unroll_iter)s+= idx_hvals%(unroll_kiter)s[k] * idx_in%(unroll_biter)s[ind1];")
+    ret += """
                }
                //do the part to the left of the img
                if(fill_value!=0)
                  for(;k<dim_ker1;k++){
-"""%d
-    ret+=my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
-    ret+="""
+""" % d
+    ret += my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
+    ret += """
                  }
              }
            }else{//valid mode
-"""%d
-    ret+=my_dup("const %(type)s* idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
-    ret+=my_dup("const %(type)s* idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker1];",unroll_ksize)
-    ret+="""
+""" % d
+    ret += my_dup("const %(type)s* idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
+    ret += my_dup("const %(type)s* idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker1];", unroll_ksize)
+    ret += """
              int new_n = (pos_n+dim_ker1-1);

              for (int k=0,last=new_n; k < dim_ker1; k++,last--) {
-"""%d
-    ret+=my_dup2("sum%(unroll_iter)s+=idx_hvals%(unroll_kiter)s[k]*idx_in%(unroll_biter)s[last];")
-    ret+="""
+""" % d
+    ret += my_dup2("sum%(unroll_iter)s+=idx_hvals%(unroll_kiter)s[k]*idx_in%(unroll_biter)s[last];")
+    ret += """
              }
            }

          }//for j
-"""%d
-    ret+=my_dup("out%(unroll_iter)s[iter_m*dim_zz[1]+iter_n] %(affectation)s sum%(unroll_iter)s;", unroll_bsize*unroll_ksize)
-    ret+="""
+""" % d
+    ret += my_dup("out%(unroll_iter)s[iter_m*dim_zz[1]+iter_n] %(affectation)s sum%(unroll_iter)s;", unroll_bsize * unroll_ksize)
+    ret += """
        }//for n
      }//for m
    }//for stack_size