提交 eabfd16f authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2145 from abergeron/dnn_conv_doc

Dnn conv doc
...@@ -95,12 +95,16 @@ TODO: Give examples on how to use these things! They are pretty complicated. ...@@ -95,12 +95,16 @@ TODO: Give examples on how to use these things! They are pretty complicated.
f = theano.function(..., mode=mode) f = theano.function(..., mode=mode)
- :func:`GpuDnnConv <theano.sandbox.cuda.dnn.GpuDnnConv>` GPU-only - :func:`dnn_conv <theano.sandbox.cuda.dnn.dnn_conv>` GPU-only
convolution using NVIDIA's cuDNN library. To enable it (and convolution using NVIDIA's cuDNN library. To have conv2d()
other cudnn-accelerated ops), set automatically converted set
``THEANO_FLAGS=optimizer_including=cudnn`` in your environment. ``THEANO_FLAGS=optimizer_including=cudnn`` in your environment.
This requires that you have cuDNN installed and available. It This will also replace other operations by their a
also requires a GPU with compute capability 3.0 or more. cuDNN-accelerated equivalent. This requires that you have cuDNN
installed and available. It requires a GPU with compute
capability 3.0 or more.
Since it has a gradient defined it can also be used manually.
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>` - :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`
3D Convolution applying multi-channel 3D filters to batches of 3D Convolution applying multi-channel 3D filters to batches of
...@@ -146,6 +150,7 @@ TODO: Give examples on how to use these things! They are pretty complicated. ...@@ -146,6 +150,7 @@ TODO: Give examples on how to use these things! They are pretty complicated.
.. autofunction:: theano.tensor.nnet.conv.conv2d .. autofunction:: theano.tensor.nnet.conv.conv2d
.. autofunction:: theano.sandbox.cuda.fftconv.conv2d_fft .. autofunction:: theano.sandbox.cuda.fftconv.conv2d_fft
.. autofunction:: theano.sandbox.cuda.blas.GpuCorrMM .. autofunction:: theano.sandbox.cuda.blas.GpuCorrMM
.. autofunction:: theano.sandbox.cuda.dnn.dnn_conv
.. autofunction:: theano.tensor.nnet.Conv3D.conv3D .. autofunction:: theano.tensor.nnet.Conv3D.conv3D
.. autofunction:: theano.sandbox.cuda.fftconv.conv3d_fft .. autofunction:: theano.sandbox.cuda.fftconv.conv3d_fft
.. autofunction:: theano.tensor.nnet.conv3d2d.conv3d .. autofunction:: theano.tensor.nnet.conv3d2d.conv3d
...@@ -7,7 +7,8 @@ from theano.gof.type import CDataType ...@@ -7,7 +7,8 @@ from theano.gof.type import CDataType
from theano.compat import PY3 from theano.compat import PY3
from theano.compat.six import StringIO from theano.compat.six import StringIO
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp, active_device_number, device_properties from theano.sandbox.cuda import (GpuOp, cuda_available, active_device_number,
device_properties)
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
gpu_contiguous) gpu_contiguous)
from theano.sandbox.cuda.blas import GpuConv from theano.sandbox.cuda.blas import GpuConv
...@@ -376,12 +377,24 @@ class GpuDnnConvGradI(GpuDnnConvBase): ...@@ -376,12 +377,24 @@ class GpuDnnConvGradI(GpuDnnConvBase):
conv_op = 'cudnnConvolutionBackwardData' conv_op = 'cudnnConvolutionBackwardData'
from theano.sandbox.cuda.opt import (local_optimizer, gpu_contiguous,
gpu_optimizer)
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
conv_mode='conv'): conv_mode='conv'):
"""
GPU convolution using cuDNN from NVIDIA.
The memory layout to use is 'bc01', that is 'batch', 'channel',
'first dim', 'second dim' in that order.
:param img: images to do the convolution over
:param kerns: convolution filters
:param border_mode: one of 'valid', 'full' (default: 'valid')
:param subsample: perform subsampling of the output (default: (1, 1))
:param conv_mode: perform convolution (kernels flipped) or cross-correlation. One of 'conv', 'cross'. (default: 'conv')
:warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
work with this Op.
"""
img = gpu_contiguous(img) img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
...@@ -389,20 +402,6 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -389,20 +402,6 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
return GpuDnnConv()(img, kerns, desc) return GpuDnnConv()(img, kerns, desc)
@local_optimizer([GpuConv])
def local_conv_dnn(node):
if isinstance(node.op, GpuConv):
if node.op.border_mode not in ['full', 'valid']:
return
img, kern = node.inputs
border_mode = node.op.border_mode
subsample = node.op.subsample
return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
border_mode=border_mode, subsample=subsample)]
gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
class GpuDnnSoftmax(DnnBase): class GpuDnnSoftmax(DnnBase):
""" """
Op for the cuDNN Softmax. Op for the cuDNN Softmax.
...@@ -555,12 +554,32 @@ err%(name)s = cudnnSoftmaxForward( ...@@ -555,12 +554,32 @@ err%(name)s = cudnnSoftmaxForward(
return (0, 3) return (0, 3)
@local_optimizer([GpuSoftmax]) # We need this since other stuff from opt is not importable.
def local_softmax_dnn(node): if cuda_available:
if isinstance(node.op, GpuSoftmax):
ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x') from theano.sandbox.cuda.opt import (local_optimizer, gpu_contiguous,
out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins)) gpu_optimizer)
out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
return [out] @local_optimizer([GpuConv])
def local_conv_dnn(node):
if isinstance(node.op, GpuConv):
if node.op.border_mode not in ['full', 'valid']:
return
img, kern = node.inputs
border_mode = node.op.border_mode
subsample = node.op.subsample
return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
border_mode=border_mode, subsample=subsample)]
gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
@local_optimizer([GpuSoftmax])
def local_softmax_dnn(node):
if isinstance(node.op, GpuSoftmax):
ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins))
out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
return [out]
gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn') gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn')
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论