Merge pull request #4716 from nouiz/doc_cudnn

Add in the doc cuDNN BN and new CPU parallelized operations

Merge pull request #4716 from nouiz/doc_cudnn
9bf778f8 · Frédéric Bastien · GitHub · 0c57414e · aab3f35f · 9bf778f8
--- a/doc/library/sandbox/cuda/dnn.txt
+++ b/doc/library/sandbox/cuda/dnn.txt
@@ -173,3 +173,14 @@ Softmax Ops
 .. automodule:: theano.sandbox.cuda.dnn
   :noindex:
   :members: GpuDnnSoftmax, GpuDnnSoftmaxGrad
+.. _libdoc_cuda_dnn_bn:
+Batch Normalization
+===================
+.. automodule:: theano.sandbox.cuda.dnn
+   :noindex:
+   :members: dnn_batch_normalization_train, dnn_batch_normalization_test
--- a/doc/library/tensor/nnet/bn.txt
+++ b/doc/library/tensor/nnet/bn.txt
@@ -10,4 +10,7 @@
 .. moduleauthor:: LISA
+.. seealso:: :ref:`cuDNN batch normalization <libdoc_cuda_dnn_bn>`
+    must be added manually.
 .. autofunction:: theano.tensor.nnet.bn.batch_normalization
--- a/doc/tutorial/multi_cores.txt
+++ b/doc/tutorial/multi_cores.txt
@@ -4,6 +4,13 @@
 Multi cores support in Theano
 =============================
+Convolution and Pooling
+=======================
+Since Theano 0.9dev2, the convolution and pooling are parallelized on
+CPU.
 BLAS operation
 ==============

--- a/setup.py
+++ b/setup.py
@@ -54,7 +54,7 @@ PLATFORMS           = ["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"]
 MAJOR               = 0
 MINOR               = 9
 MICRO               = 0
-SUFFIX              = "dev1"  # Should be blank except for rc's, betas, etc.
+SUFFIX              = "dev2"  # Should be blank except for rc's, betas, etc.
 ISRELEASED          = False
 VERSION             = '%d.%d.%d%s' % (MAJOR, MINOR, MICRO, SUFFIX)

--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -2672,12 +2672,16 @@ def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation',
    Notes
    -----
+    Request cuDNN 5 and Theano 0.9dev2 or more recent.
    For 4d tensors, returned values are equivalent to:
-    >>> axes = 0 if mode == 'per-activation' else (0, 2, 3)
+    .. code-block:: python
-    >>> mean = inputs.mean(axes, keepdims=True)
-    >>> stdinv = T.inv(T.sqrt(inputs.var(axes, keepdims=True) + epsilon))
+        axes = 0 if mode == 'per-activation' else (0, 2, 3)
-    >>> out = (inputs - mean) * gamma * stdinv + beta
+        mean = inputs.mean(axes, keepdims=True)
+        stdinv = T.inv(T.sqrt(inputs.var(axes, keepdims=True) + epsilon))
+        out = (inputs - mean) * gamma * stdinv + beta
    """
    ndim = inputs.ndim
    if ndim > 4:
@@ -2736,12 +2740,16 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
    Notes
    -----
+    Request cuDNN 5 and Theano 0.9dev2 or more recent.
    For 4d tensors, the returned value is equivalent to:
-    >>> axes = (0,) if mode == 'per-activation' else (0, 2, 3)
+    .. code-block:: python
-    >>> gamma, beta, mean, var = (T.addbroadcast(t, *axes)
-    ...                           for t in (gamma, beta, mean, var))
+        axes = (0,) if mode == 'per-activation' else (0, 2, 3)
-    >>> out = (inputs - mean) * gamma / T.sqrt(var + epsilon) + beta
+        gamma, beta, mean, var = (T.addbroadcast(t, *axes)
+                                  for t in (gamma, beta, mean, var))
+        out = (inputs - mean) * gamma / T.sqrt(var + epsilon) + beta
    """
    ndim = inputs.ndim
    if ndim > 4: