Merge pull request #2239 from nouiz/dnn

Dnn default and doc

Merge pull request #2239 from nouiz/dnn
95d3add9 · abergeron · 52cb8ec7 · 7a85fa42 · 95d3add9 · 95d3add9
--- a/doc/library/sandbox/cuda/dnn.txt
+++ b/doc/library/sandbox/cuda/dnn.txt
+.. _libdoc_cuda_dnn:
+================================
+:mod:`sandbox.cuda.dnn` -- cuDNN
+================================
+.. moduleauthor:: LISA
+`cuDNN <https://developer.nvidia.com/cuDNN>`_ is an NVIDIA library with
+functionality used by deep neural network. It provides optimized versions
+of some operations like the convolution. cuDNN is not currently
+installed with CUDA 6.5. You must download and install it
+yourself.
+To install it, decompress the downloaded file and make the ``*.h`` and
+``*.so*`` files available to the compilation environment. On Linux,
+this can be done by setting the environment variables
+``LD_LIBRARY_PATH``, ``LIBRARY_PATH`` and ``CPATH`` to the
+uncompressed directory path. Separate multiple directory with ``:`` as
+the ``PATH`` environment variable. Or you can copy the ``*.h`` files
+to ``/usr/include`` and the ``*.so*`` files to ``/lib64``.
+By default, Theano will detect if it can use cuDNN. If so, it will use
+it.  If not, Theano optimizations will not introduce cuDNN ops. So
+Theano will still work if the user did not introduce them manually.
+To get an error if Theano can not use cuDNN, use this Theano flag:
+``optimizer_including=cudnn``.
+.. note::
+    Normally you should not call GPU Ops directly, but the CPU interface
+    currently does not allow all options supported by cuDNN ops. So it is
+    possible that you will need to call them manually.
+Functions
+=========
+.. automodule:: theano.sandbox.cuda.dnn
+    :members: dnn_conv, dnn_pool
+Convolution Ops
+===============
+.. automodule:: theano.sandbox.cuda.dnn
+    :members: GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI,
+Pooling Ops
+===========
+.. automodule:: theano.sandbox.cuda.dnn
+    :members: GpuDnnPoolDesc, GpuDnnPool, GpuDnnPoolGrad,
+Softmax Ops
+===========
+.. automodule:: theano.sandbox.cuda.dnn
+    :members: GpuDnnSoftmax, GpuDnnSoftmaxGrad
--- a/doc/library/sandbox/cuda/index.txt
+++ b/doc/library/sandbox/cuda/index.txt
@@ -13,6 +13,7 @@
 .. toctree::
    :maxdepth: 1
+    op
    var
    type
-    op
+    dnn
--- a/theano/gof/cmodule.py
+++ b/theano/gof/cmodule.py
@@ -1788,7 +1788,8 @@ class GCC_compiler(object):
        return cxxflags
    @staticmethod
-    def try_compile_tmp(src_code, tmp_prefix='', flags=(), try_run=False):
+    def try_compile_tmp(src_code, tmp_prefix='', flags=(),
+                        try_run=False, output=False):
        """Try to compile (and run) a test program.
        This is useful in various occasions, to check if libraries
@@ -1799,6 +1800,7 @@ class GCC_compiler(object):
        If try_run is False, returns the compilation status.
        If try_run is True, returns a (compile_status, run_status) pair.
+        If output is there, we append the stdout and stderr to the output.
        """
        if not theano.config.cxx:
            return False
@@ -1818,14 +1820,14 @@ class GCC_compiler(object):
                os.write(fd, src_code)
                os.close(fd)
                fd = None
-                p_ret = call_subprocess_Popen(
+                out, err, p_ret = output_subprocess_Popen(
                    ['g++', path, '-o', exe_path] + flags)
                if p_ret != 0:
                    compilation_ok = False
                elif try_run:
                    # Try to execute the program
                    try:
-                        p_ret = call_subprocess_Popen([exe_path])
+                        out, err, p_ret = output_subprocess_Popen([exe_path])
                        run_ok = (p_ret == 0)
                    finally:
                        os.remove(exe_path)
@@ -1839,13 +1841,18 @@ class GCC_compiler(object):
        except OSError, e:
            compilation_ok = False
-        if not try_run:
+        if not try_run and not output:
            return compilation_ok
-        else:
+        elif not try_run and output:
+            return (compilation_ok, out, err)
+        elif not output:
            return (compilation_ok, run_ok)
+        else:
+            return (compilation_ok, run_ok, out, err)
    @staticmethod
-    def try_flags(flag_list):
+    def try_flags(flag_list, preambule="", body="",
+                  try_run=False, output=False):
        '''
        Try to compile a dummy file with these flags.
@@ -1856,13 +1863,16 @@ class GCC_compiler(object):
            return False
        code = b("""
+        %(preambule)s
        int main(int argc, char** argv)
        {
+            %(body)s
            return 0;
        }
-        """)
+        """ % locals())
        return GCC_compiler.try_compile_tmp(code, tmp_prefix='try_flags_',
-                flags=flag_list, try_run=False)
+                                            flags=flag_list, try_run=try_run,
+                                            output=output)
    @staticmethod
    def compile_str(module_name, src_code, location=None,

--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1163,11 +1163,6 @@ def local_conv_fft_full(node):
        return
-# Needs to be registered before local_gpu_conv_legacy. Otherwise, it
-# will have priority over this optimization.  We want, if cudnn is
-# available and the GPU supports it, to use it.  Otherwise, the gemm
-# version should be used.  If the users want the legacy convolution,
-# they should use the Theano flag to disable the dnn and/or gemm version.
 @local_optimizer([GpuConv])
 def local_gpu_conv(node):
    """
@@ -1350,7 +1345,7 @@ conv_groupopt.register("conv_fft_valid", local_conv_fft_valid, 1)
 conv_groupopt.register("conv_fft_full", local_conv_fft_full, 1)
 # Use dnn if avail, so have the dnn tag to be able to disable it.
 conv_groupopt.register('local_gpu_conv', local_gpu_conv, 10,
-                       'fast_compile', 'fast_run', 'dnn')
+                       'fast_compile', 'fast_run', 'cudnn')
 conv_groupopt.register('local_conv_gemm', local_conv_gemm, 12,
                       'fast_compile', 'fast_run')

--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
+import logging
+import unittest
 from nose.plugins.skip import SkipTest
 import numpy
-import unittest
 import theano
+from theano.compat.six import StringIO
 from theano.gof.python25 import any
 import theano.tensor as T
 import theano.tests.unittest_tools as utt
@@ -85,7 +88,7 @@ def test_pooling_opt():
    f = theano.function(
        [x],
        max_pool_2d(x, ds=(2, 2)),
-        mode=mode_with_gpu.including("cudnn"))
+        mode=mode_with_gpu)
    assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
                for n in f.maker.fgraph.toposort()])
@@ -97,3 +100,36 @@ def test_pooling_opt():
    assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
                for n in f.maker.fgraph.toposort()])
+def test_dnn_tag():
+    """
+    We test that if cudnn isn't avail we crash and that if it is avail, we use it.
+    """
+    x = T.ftensor4()
+    old = theano.config.on_opt_error
+    theano.config.on_opt_error = "raise"
+    sio = StringIO()
+    handler = logging.StreamHandler(sio)
+    logging.getLogger('theano.compile.tests.test_dnn').addHandler(handler)
+    # Silence original handler when intentionnally generating warning messages
+    logging.getLogger('theano').removeHandler(theano.logging_default_handler)
+    raised = False
+    try:
+        f = theano.function(
+            [x],
+            max_pool_2d(x, ds=(2, 2)),
+            mode=mode_with_gpu.including("cudnn"))
+    except RuntimeError, e:
+        assert not cuda.dnn.dnn_available()
+        raised = True
+    finally:
+        theano.config.on_opt_error = old
+        logging.getLogger('theano.compile.tests.test_dnn').removeHandler(handler)
+        logging.getLogger('theano').addHandler(theano.logging_default_handler)
+    if not raised:
+        assert cuda.dnn.dnn_available()
+        assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
+                    for n in f.maker.fgraph.toposort()])