Better doc.

4c956b83 · Frederic · bf4d8e37 · 4c956b83
--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -776,27 +776,6 @@ class BaseGpuCorrMM(GpuOp):
 class GpuCorrMM(BaseGpuCorrMM):
    """GPU correlation implementation using Matrix Multiplication.
-    :note: You can either enable the Theano flag `optimizer_including=conv_gemm`
-        to automatically replace all convolution operations with `GpuCorrMM`
-        or one of its gradients, or you can use it as a replacement for
-        :func:`conv2d <theano.tensor.nnet.conv.conv2d>`, called as
-        `GpuCorrMM(subsample=...)(image, filters)`. The latter is currently
-        faster, but note that it computes a correlation -- if you need to
-        compute a convolution, flip the filters as `filters[:,:,::-1,::-1]`.
-    :warning: For 700 series Nvidia GPUs of compute capability 3.5 and CUDA 5.0
-        to 6.0, there is a bug in CUBLAS' matrix multiplication function that
-        can make GpuCorrMM or its gradients crash for some input and filter
-        shapes. So if you have a Tesla K20, Tesla K40, Quadro K6000, GeForce GT
-        640 (DDR5), GeForce GTX 780 (or Ti), GeForce GTX TITAN (or Black or Z)
-        and experience a crash, switching to CUDA 6.5 or CUDA 4.2 should fix it.
-        If this is not possible, changing the input or filter shapes (e.g., the
-        batchsize or number of filters) may also work around the CUBLAS bug.
-    """
-    def __init__(self, border_mode="valid",
-            subsample=(1, 1),
-            pad=(0, 0)):
-        """
    :param border_mode: currently supports "valid" only; "full" can be
        simulated by setting `pad="full"` (at the cost of performance), or
        by using `GpuCorrMM_gradInputs`
@@ -816,7 +795,27 @@ class GpuCorrMM(BaseGpuCorrMM):
        C-contiguous. Use :func:`gpu_contiguous
        <theano.sandbox.cuda.basic_ops.gpu_contiguous>` on these arguments
        if needed.
+    :note: You can either enable the Theano flag `optimizer_including=conv_gemm`
+        to automatically replace all convolution operations with `GpuCorrMM`
+        or one of its gradients, or you can use it as a replacement for
+        :func:`conv2d <theano.tensor.nnet.conv.conv2d>`, called as
+        `GpuCorrMM(subsample=...)(image, filters)`. The latter is currently
+        faster, but note that it computes a correlation -- if you need to
+        compute a convolution, flip the filters as `filters[:,:,::-1,::-1]`.
+    :warning: For 700 series Nvidia GPUs of compute capability 3.5 and CUDA 5.0
+        to 6.0, there is a bug in CUBLAS' matrix multiplication function that
+        can make GpuCorrMM or its gradients crash for some input and filter
+        shapes. So if you have a Tesla K20, Tesla K40, Quadro K6000, GeForce GT
+        640 (DDR5), GeForce GTX 780 (or Ti), GeForce GTX TITAN (or Black or Z)
+        and experience a crash, switching to CUDA 6.5 or CUDA 4.2 should fix it.
+        If this is not possible, changing the input or filter shapes (e.g., the
+        batchsize or number of filters) may also work around the CUBLAS bug.
    """
+    def __init__(self, border_mode="valid",
+                 subsample=(1, 1),
+                 pad=(0, 0)):
        super(GpuCorrMM, self).__init__(border_mode, subsample, pad)
    def make_node(self, img, kern):