提交 666e371d authored 作者: Frederic's avatar Frederic

Now by default, if cuDNN is available, always use it.

上级 03f42b36
......@@ -6,8 +6,8 @@
.. moduleauthor:: LISA
Normally you should not call directly those Ops, but the CPU interface
currently don't allow all option supported by those ops, so it is
Normally you should not call directly GPU Ops, but the CPU interface
currently do not allow all options supported by cuDNN ops. So it is
possible that you need to call them manually.
`cuDNN <https://developer.nvidia.com/cuDNN>`_ is NVIDIA library with
......@@ -16,21 +16,20 @@ implementation of some operation like the convolution. cuDNN currently
is not installed with CUDA 6.5. You must download it and install it
yourself.
To install it, decompress the downloaded file and make the *.h and
*.so* files available to the compilation environment. On Linux, this
can be done by setting the environment variable LD_LIBRARY_PATH,
LIBRARY_PATH and CPATH to the uncompressed directory path. They work
the same way as PATH. Or you can copy the *.h files to /usr/include
and the files *.so* to /lib64.
To install it, decompress the downloaded file and make the ``*.h`` and
``*.so*`` files available to the compilation environment. On Linux,
this can be done by setting the environment variable
``LD_LIBRARY_PATH``, ``LIBRARY_PATH`` and ``CPATH`` to the
uncompressed directory path. They work the same way as ``PATH``. Or
you can copy the ``*.h`` files to ``/usr/include`` and the files
``*.so*`` to ``/lib64``.
Then you need to tell Theano to use it. For the convolution, if cuDNN
is available, we will use it by default, but not for other
operations. Also, it do not give you an error in case it can't use
cuDNN as it will fall back to a slower and more memory hungry version.
To enable the use of all cuDNN operation and get an error if we can't
use cuDNN, use the Theano flags: ``optimizer_including=cudnn``.
By default, Theano will detect if it can use cuDNN. If so, it will use
it. If not, Theano optimization will not introduce cuDNN op. So
Theano will still work if the user did not introduce them manually.
To get an error if Theano can not use cuDNN, use this Theano flags:
``optimizer_including=cudnn``.
Functions
......@@ -39,8 +38,20 @@ Functions
.. automodule:: theano.sandbox.cuda.dnn
:members: dnn_conv, dnn_pool
Ops
===
Convolution Ops
===============
.. automodule:: theano.sandbox.cuda.dnn
:members: GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI,
Pooling Ops
===========
.. automodule:: theano.sandbox.cuda.dnn
:members: GpuDnnPoolDesc, GpuDnnPool, GpuDnnPoolGrad,
Softmax Ops
===========
.. automodule:: theano.sandbox.cuda.dnn
:members: GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI, GpuDnnPoolDesc, GpuDnnPool, GpuDnnPoolGrad, GpuDnnSoftmax
:members: GpuDnnSoftmax, GpuDnnSoftmaxGrad
......@@ -2,6 +2,7 @@ import os
import theano
from theano import Apply, tensor
from theano.gof import Optimizer
from theano.gof.type import CDataType
from theano.compat import PY3
from theano.sandbox.cuda.type import CudaNdarrayType
......@@ -12,6 +13,7 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
GpuDownsampleFactorMaxGrad)
from theano.sandbox.cuda.nnet import GpuSoftmax
from theano.sandbox.cuda.opt import register_opt
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
......@@ -54,14 +56,6 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) {
""" % dict(var=var, err=err, desc=desc, fail=fail)
def raise_no_dnn():
""" Raise a RuntimeError if cudnn can't be used"""
if not dnn_available():
raise RuntimeError(
"cuDNN optimization was enabled, but cuDNN is not available. " +
dnn_available.msg)
class DnnBase(GpuOp):
"""
Creates a handle for cudnn and pulls in the cudnn libraries and headers.
......@@ -981,11 +975,14 @@ err%(name)s = cudnnSoftmaxForward(
# We need this since other stuff from opt is not importable.
if cuda_available:
from theano.sandbox.cuda.opt import local_optimizer, gpu_optimizer
from theano.sandbox.cuda.opt import (
local_optimizer, gpu_optimizer, gpu_seqopt)
@register_opt('cudnn')
@local_optimizer([GpuConv])
def local_conv_dnn(node):
raise_no_dnn()
if not dnn_available():
return
if isinstance(node.op, GpuConv):
if node.op.border_mode not in ['full', 'valid']:
return
......@@ -995,10 +992,11 @@ if cuda_available:
return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
border_mode=border_mode, subsample=subsample)]
gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
@register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMax])
def local_pool_dnn(node):
if not dnn_available():
return
if isinstance(node.op, GpuDownsampleFactorMax):
if node.op.ignore_border:
return
......@@ -1006,10 +1004,11 @@ if cuda_available:
ds = node.op.ds
return [dnn_pool(gpu_contiguous(img), ds, ds)]
gpu_optimizer.register("pool_cudnn", local_pool_dnn, 'cudnn')
@register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMaxGrad])
def local_pool_dnn_grad(node):
if not dnn_available():
return
if isinstance(node.op, GpuDownsampleFactorMaxGrad):
if node.op.ignore_border:
return
......@@ -1022,11 +1021,11 @@ if cuda_available:
gpu_contiguous(inp_grad),
gpu_contiguous(out), desc)]
gpu_optimizer.register("pool_cudnn_grad", local_pool_dnn_grad, 'cudnn')
@register_opt('cudnn')
@local_optimizer([GpuSoftmax])
def local_softmax_dnn(node):
raise_no_dnn()
if not dnn_available():
return
if isinstance(node.op, GpuSoftmax):
ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
ins = gpu_contiguous(ins)
......@@ -1034,4 +1033,11 @@ if cuda_available:
out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
return [out]
gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn')
class NoCuDNNRaise(Optimizer):
def apply(self, fgraph):
""" Raise a RuntimeError if cudnn can't be used"""
if not dnn_available():
raise RuntimeError(
"cuDNN optimization was enabled, but cuDNN is not available. " +
dnn_available.msg)
gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
import logging
import unittest
from nose.plugins.skip import SkipTest
import numpy
import unittest
import theano
from theano.compat.six import StringIO
from theano.gof.python25 import any
import theano.tensor as T
import theano.tests.unittest_tools as utt
......@@ -85,7 +88,7 @@ def test_pooling_opt():
f = theano.function(
[x],
max_pool_2d(x, ds=(2, 2)),
mode=mode_with_gpu.including("cudnn"))
mode=mode_with_gpu)
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
......@@ -97,3 +100,36 @@ def test_pooling_opt():
assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
for n in f.maker.fgraph.toposort()])
def test_dnn_tag():
"""
We test that if cudnn isn't avail we crash and that if it is avail, we use it.
"""
x = T.ftensor4()
old = theano.config.on_opt_error
theano.config.on_opt_error = "raise"
sio = StringIO()
handler = logging.StreamHandler(sio)
logging.getLogger('theano.compile.tests.test_dnn').addHandler(handler)
# Silence original handler when intentionnally generating warning messages
logging.getLogger('theano').removeHandler(theano.logging_default_handler)
raised = False
try:
f = theano.function(
[x],
max_pool_2d(x, ds=(2, 2)),
mode=mode_with_gpu.including("cudnn"))
except RuntimeError, e:
assert not cuda.dnn.dnn_available()
raised = True
finally:
theano.config.on_opt_error = old
logging.getLogger('theano.compile.tests.test_dnn').removeHandler(handler)
logging.getLogger('theano').addHandler(theano.logging_default_handler)
if not raised:
assert cuda.dnn.dnn_available()
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论