提交 666e371d authored 作者: Frederic's avatar Frederic

Now by default, if cuDNN is available, always use it.

上级 03f42b36
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
.. moduleauthor:: LISA .. moduleauthor:: LISA
Normally you should not call directly those Ops, but the CPU interface Normally you should not call directly GPU Ops, but the CPU interface
currently don't allow all option supported by those ops, so it is currently do not allow all options supported by cuDNN ops. So it is
possible that you need to call them manually. possible that you need to call them manually.
`cuDNN <https://developer.nvidia.com/cuDNN>`_ is NVIDIA library with `cuDNN <https://developer.nvidia.com/cuDNN>`_ is NVIDIA library with
...@@ -16,21 +16,20 @@ implementation of some operation like the convolution. cuDNN currently ...@@ -16,21 +16,20 @@ implementation of some operation like the convolution. cuDNN currently
is not installed with CUDA 6.5. You must download it and install it is not installed with CUDA 6.5. You must download it and install it
yourself. yourself.
To install it, decompress the downloaded file and make the *.h and To install it, decompress the downloaded file and make the ``*.h`` and
*.so* files available to the compilation environment. On Linux, this ``*.so*`` files available to the compilation environment. On Linux,
can be done by setting the environment variable LD_LIBRARY_PATH, this can be done by setting the environment variable
LIBRARY_PATH and CPATH to the uncompressed directory path. They work ``LD_LIBRARY_PATH``, ``LIBRARY_PATH`` and ``CPATH`` to the
the same way as PATH. Or you can copy the *.h files to /usr/include uncompressed directory path. They work the same way as ``PATH``. Or
and the files *.so* to /lib64. you can copy the ``*.h`` files to ``/usr/include`` and the files
``*.so*`` to ``/lib64``.
Then you need to tell Theano to use it. For the convolution, if cuDNN By default, Theano will detect if it can use cuDNN. If so, it will use
is available, we will use it by default, but not for other it. If not, Theano optimization will not introduce cuDNN op. So
operations. Also, it do not give you an error in case it can't use Theano will still work if the user did not introduce them manually.
cuDNN as it will fall back to a slower and more memory hungry version.
To enable the use of all cuDNN operation and get an error if we can't
use cuDNN, use the Theano flags: ``optimizer_including=cudnn``.
To get an error if Theano can not use cuDNN, use this Theano flags:
``optimizer_including=cudnn``.
Functions Functions
...@@ -39,8 +38,20 @@ Functions ...@@ -39,8 +38,20 @@ Functions
.. automodule:: theano.sandbox.cuda.dnn .. automodule:: theano.sandbox.cuda.dnn
:members: dnn_conv, dnn_pool :members: dnn_conv, dnn_pool
Ops Convolution Ops
=== ===============
.. automodule:: theano.sandbox.cuda.dnn
:members: GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI,
Pooling Ops
===========
.. automodule:: theano.sandbox.cuda.dnn
:members: GpuDnnPoolDesc, GpuDnnPool, GpuDnnPoolGrad,
Softmax Ops
===========
.. automodule:: theano.sandbox.cuda.dnn .. automodule:: theano.sandbox.cuda.dnn
:members: GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI, GpuDnnPoolDesc, GpuDnnPool, GpuDnnPoolGrad, GpuDnnSoftmax :members: GpuDnnSoftmax, GpuDnnSoftmaxGrad
...@@ -2,6 +2,7 @@ import os ...@@ -2,6 +2,7 @@ import os
import theano import theano
from theano import Apply, tensor from theano import Apply, tensor
from theano.gof import Optimizer
from theano.gof.type import CDataType from theano.gof.type import CDataType
from theano.compat import PY3 from theano.compat import PY3
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
...@@ -12,6 +13,7 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, ...@@ -12,6 +13,7 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax, from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
GpuDownsampleFactorMaxGrad) GpuDownsampleFactorMaxGrad)
from theano.sandbox.cuda.nnet import GpuSoftmax from theano.sandbox.cuda.nnet import GpuSoftmax
from theano.sandbox.cuda.opt import register_opt
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
...@@ -54,14 +56,6 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) { ...@@ -54,14 +56,6 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) {
""" % dict(var=var, err=err, desc=desc, fail=fail) """ % dict(var=var, err=err, desc=desc, fail=fail)
def raise_no_dnn():
""" Raise a RuntimeError if cudnn can't be used"""
if not dnn_available():
raise RuntimeError(
"cuDNN optimization was enabled, but cuDNN is not available. " +
dnn_available.msg)
class DnnBase(GpuOp): class DnnBase(GpuOp):
""" """
Creates a handle for cudnn and pulls in the cudnn libraries and headers. Creates a handle for cudnn and pulls in the cudnn libraries and headers.
...@@ -981,11 +975,14 @@ err%(name)s = cudnnSoftmaxForward( ...@@ -981,11 +975,14 @@ err%(name)s = cudnnSoftmaxForward(
# We need this since other stuff from opt is not importable. # We need this since other stuff from opt is not importable.
if cuda_available: if cuda_available:
from theano.sandbox.cuda.opt import local_optimizer, gpu_optimizer from theano.sandbox.cuda.opt import (
local_optimizer, gpu_optimizer, gpu_seqopt)
@register_opt('cudnn')
@local_optimizer([GpuConv]) @local_optimizer([GpuConv])
def local_conv_dnn(node): def local_conv_dnn(node):
raise_no_dnn() if not dnn_available():
return
if isinstance(node.op, GpuConv): if isinstance(node.op, GpuConv):
if node.op.border_mode not in ['full', 'valid']: if node.op.border_mode not in ['full', 'valid']:
return return
...@@ -995,10 +992,11 @@ if cuda_available: ...@@ -995,10 +992,11 @@ if cuda_available:
return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern), return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
border_mode=border_mode, subsample=subsample)] border_mode=border_mode, subsample=subsample)]
gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn') @register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMax]) @local_optimizer([GpuDownsampleFactorMax])
def local_pool_dnn(node): def local_pool_dnn(node):
if not dnn_available():
return
if isinstance(node.op, GpuDownsampleFactorMax): if isinstance(node.op, GpuDownsampleFactorMax):
if node.op.ignore_border: if node.op.ignore_border:
return return
...@@ -1006,10 +1004,11 @@ if cuda_available: ...@@ -1006,10 +1004,11 @@ if cuda_available:
ds = node.op.ds ds = node.op.ds
return [dnn_pool(gpu_contiguous(img), ds, ds)] return [dnn_pool(gpu_contiguous(img), ds, ds)]
gpu_optimizer.register("pool_cudnn", local_pool_dnn, 'cudnn') @register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMaxGrad]) @local_optimizer([GpuDownsampleFactorMaxGrad])
def local_pool_dnn_grad(node): def local_pool_dnn_grad(node):
if not dnn_available():
return
if isinstance(node.op, GpuDownsampleFactorMaxGrad): if isinstance(node.op, GpuDownsampleFactorMaxGrad):
if node.op.ignore_border: if node.op.ignore_border:
return return
...@@ -1022,11 +1021,11 @@ if cuda_available: ...@@ -1022,11 +1021,11 @@ if cuda_available:
gpu_contiguous(inp_grad), gpu_contiguous(inp_grad),
gpu_contiguous(out), desc)] gpu_contiguous(out), desc)]
gpu_optimizer.register("pool_cudnn_grad", local_pool_dnn_grad, 'cudnn') @register_opt('cudnn')
@local_optimizer([GpuSoftmax]) @local_optimizer([GpuSoftmax])
def local_softmax_dnn(node): def local_softmax_dnn(node):
raise_no_dnn() if not dnn_available():
return
if isinstance(node.op, GpuSoftmax): if isinstance(node.op, GpuSoftmax):
ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x') ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
ins = gpu_contiguous(ins) ins = gpu_contiguous(ins)
...@@ -1034,4 +1033,11 @@ if cuda_available: ...@@ -1034,4 +1033,11 @@ if cuda_available:
out = as_cuda_ndarray_variable(out.dimshuffle(0, 1)) out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
return [out] return [out]
gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn') class NoCuDNNRaise(Optimizer):
def apply(self, fgraph):
""" Raise a RuntimeError if cudnn can't be used"""
if not dnn_available():
raise RuntimeError(
"cuDNN optimization was enabled, but cuDNN is not available. " +
dnn_available.msg)
gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
import logging
import unittest
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import numpy import numpy
import unittest
import theano import theano
from theano.compat.six import StringIO
from theano.gof.python25 import any from theano.gof.python25 import any
import theano.tensor as T import theano.tensor as T
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
...@@ -85,7 +88,7 @@ def test_pooling_opt(): ...@@ -85,7 +88,7 @@ def test_pooling_opt():
f = theano.function( f = theano.function(
[x], [x],
max_pool_2d(x, ds=(2, 2)), max_pool_2d(x, ds=(2, 2)),
mode=mode_with_gpu.including("cudnn")) mode=mode_with_gpu)
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool) assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()]) for n in f.maker.fgraph.toposort()])
...@@ -97,3 +100,36 @@ def test_pooling_opt(): ...@@ -97,3 +100,36 @@ def test_pooling_opt():
assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad) assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
for n in f.maker.fgraph.toposort()]) for n in f.maker.fgraph.toposort()])
def test_dnn_tag():
"""
We test that if cudnn isn't avail we crash and that if it is avail, we use it.
"""
x = T.ftensor4()
old = theano.config.on_opt_error
theano.config.on_opt_error = "raise"
sio = StringIO()
handler = logging.StreamHandler(sio)
logging.getLogger('theano.compile.tests.test_dnn').addHandler(handler)
# Silence original handler when intentionnally generating warning messages
logging.getLogger('theano').removeHandler(theano.logging_default_handler)
raised = False
try:
f = theano.function(
[x],
max_pool_2d(x, ds=(2, 2)),
mode=mode_with_gpu.including("cudnn"))
except RuntimeError, e:
assert not cuda.dnn.dnn_available()
raised = True
finally:
theano.config.on_opt_error = old
logging.getLogger('theano.compile.tests.test_dnn').removeHandler(handler)
logging.getLogger('theano').addHandler(theano.logging_default_handler)
if not raised:
assert cuda.dnn.dnn_available()
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论