small doc fix.

4c8d04ff · Frederic · 76811aa7 · 4c8d04ff · 4c8d04ff · 4c8d04ff
--- a/doc/library/tensor/nnet/conv.txt
+++ b/doc/library/tensor/nnet/conv.txt
@@ -53,19 +53,18 @@ TODO: Give examples for how to use these things! They are pretty complicated.
      Also, there is restrictions on which shape are supported.
    - :func:`GpuCorrMM <theano.sandbox.cuda.blas.GpuCorrMM>`
      This is a GPU-only version of a correlation that computes correlations
-      as `caffe`(https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu). 
+      as `caffe <https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu>`_.
      For each element in a batch, it first creates a 
-      Toeplitz(http://en.wikipedia.org/wiki/Toeplitz_matrix) matrix in a cuda kernel. 
-      Then, it performs a `gemm` call to multiply this Toeplitz matrix and the kernel. 
+      `Toeplitz <http://en.wikipedia.org/wiki/Toeplitz_matrix>`_ matrix in a cuda kernel.
+      Then, it performs a ``gemm`` call to multiply this Toeplitz matrix and the kernel.
      It need extra memory equal to the size of the Toeplitz matrix. Precisely, 
-      the dimensions of this 2D Toeplitz matrix is equal to= 
-      (no of channels * filter width * filter height, output width * output height).
-      You can enable it for call to conv2d 2d by setting 'THEANO_FLAGS=optimizer_including=conv_gemm'
+      the dimensions of this 2D Toeplitz matrix is equal to
+      ``(no of channels * filter width * filter height, output width * output height)``.
+      You can enable it for call to conv2d 2d by setting ``THEANO_FLAGS=optimizer_including=conv_gemm``
      in your environment. This is not enabled by default because it
-      uses some extra memory. 
+      uses some extra memory. MM mean matrix multiply.

 .. autofunction:: theano.tensor.nnet.conv.conv2d
 .. autofunction:: theano.tensor.nnet.Conv3D.conv3D
 .. autofunction:: theano.tensor.nnet.conv3d2d.conv3d
 .. autofunction:: theano.sandbox.cuda.fftconv.conv2d_fft
-.. autofunction:: theano.sandbox.cuda.blas.GpuCorrMM
--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -499,16 +499,21 @@ gpu_ger_inplace = GpuGer(inplace=True)


 class GpuCorrMM(GpuOp):
-    """
-    Author: Arjun Jain
-    Implement the caffe convolution
+    """GPU correlation implementation using Matrix Multiply.
+
+    :note: It don't implement the grad. So you should use it by
+        enabling the Theano flag ``optimizer_including=conv_gemm`` and
+        use :func:`conv2d <theano.tensor.nnet.conv.conv2d>`.
+
    """
    def __init__(self, border_mode,
            subsample=(1, 1),
            pad=0):
        """
        :param border_mode: "valid" or "full"
-        :param subsample: not yet supported
+        :param subsample: the subsample operation applied on each output image.
+            Should be a tuple with 2 elements.
+            (sv, sh) is equivalent to GpuCorrMM(...)(...)[:,:,::sv, ::sh]
        :param pad: not yet supported
        """
        self.border_mode = border_mode
@@ -552,7 +557,6 @@ class GpuCorrMM(GpuOp):
        return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()])

    def flops(self, inputs, outputs):
-        """ Useful with the hack in profilemode to print the MFlops"""
        images, kerns = inputs
        out, = outputs
        assert images[1] == kerns[1]

--- a/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
@@ -640,7 +640,6 @@ def test_valid():
    mode = theano_mode.including("conv_gemm")

    version = [-1]
-    # Remove case not supported
    # Add tests with strided inputs by still square images and filters.
    shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 2))
    shapes += get_shapes2(scales_kern=(2, 2), kern_stride=(2, 2))

--- a/theano/tensor/nnet/Conv3D.py
+++ b/theano/tensor/nnet/Conv3D.py
@@ -40,7 +40,9 @@ from theano.gradient import grad_undefined
 #the output function is only defined when dr, dc, dt are natural numbers.

 class Conv3D(theano.Op):
-    """ 3D "convolution" of multiple filters on a minibatch (does not flip the kernel, moves kernel with a user specified stride) """
+    """ 3D `convolution` of multiple filters on a minibatch
+        :note: does not flip the kernel, moves kernel with a user specified stride
+    """
    def __eq__(self,other):
        return type(self) == type(other)