提交 eabfd16f authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2145 from abergeron/dnn_conv_doc

Dnn conv doc
......@@ -95,12 +95,16 @@ TODO: Give examples on how to use these things! They are pretty complicated.
f = theano.function(..., mode=mode)
- :func:`GpuDnnConv <theano.sandbox.cuda.dnn.GpuDnnConv>` GPU-only
convolution using NVIDIA's cuDNN library. To enable it (and
other cudnn-accelerated ops), set
- :func:`dnn_conv <theano.sandbox.cuda.dnn.dnn_conv>` GPU-only
convolution using NVIDIA's cuDNN library. To have conv2d()
automatically converted set
``THEANO_FLAGS=optimizer_including=cudnn`` in your environment.
This requires that you have cuDNN installed and available. It
also requires a GPU with compute capability 3.0 or more.
This will also replace other operations by their a
cuDNN-accelerated equivalent. This requires that you have cuDNN
installed and available. It requires a GPU with compute
capability 3.0 or more.
Since it has a gradient defined it can also be used manually.
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`
3D Convolution applying multi-channel 3D filters to batches of
......@@ -146,6 +150,7 @@ TODO: Give examples on how to use these things! They are pretty complicated.
.. autofunction:: theano.tensor.nnet.conv.conv2d
.. autofunction:: theano.sandbox.cuda.fftconv.conv2d_fft
.. autofunction:: theano.sandbox.cuda.blas.GpuCorrMM
.. autofunction:: theano.sandbox.cuda.dnn.dnn_conv
.. autofunction:: theano.tensor.nnet.Conv3D.conv3D
.. autofunction:: theano.sandbox.cuda.fftconv.conv3d_fft
.. autofunction:: theano.tensor.nnet.conv3d2d.conv3d
......@@ -7,7 +7,8 @@ from theano.gof.type import CDataType
from theano.compat import PY3
from theano.compat.six import StringIO
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp, active_device_number, device_properties
from theano.sandbox.cuda import (GpuOp, cuda_available, active_device_number,
device_properties)
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
gpu_contiguous)
from theano.sandbox.cuda.blas import GpuConv
......@@ -376,12 +377,24 @@ class GpuDnnConvGradI(GpuDnnConvBase):
conv_op = 'cudnnConvolutionBackwardData'
from theano.sandbox.cuda.opt import (local_optimizer, gpu_contiguous,
gpu_optimizer)
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
conv_mode='conv'):
"""
GPU convolution using cuDNN from NVIDIA.
The memory layout to use is 'bc01', that is 'batch', 'channel',
'first dim', 'second dim' in that order.
:param img: images to do the convolution over
:param kerns: convolution filters
:param border_mode: one of 'valid', 'full' (default: 'valid')
:param subsample: perform subsampling of the output (default: (1, 1))
:param conv_mode: perform convolution (kernels flipped) or cross-correlation. One of 'conv', 'cross'. (default: 'conv')
:warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
work with this Op.
"""
img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
......@@ -389,20 +402,6 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
return GpuDnnConv()(img, kerns, desc)
@local_optimizer([GpuConv])
def local_conv_dnn(node):
if isinstance(node.op, GpuConv):
if node.op.border_mode not in ['full', 'valid']:
return
img, kern = node.inputs
border_mode = node.op.border_mode
subsample = node.op.subsample
return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
border_mode=border_mode, subsample=subsample)]
gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
class GpuDnnSoftmax(DnnBase):
"""
Op for the cuDNN Softmax.
......@@ -555,12 +554,32 @@ err%(name)s = cudnnSoftmaxForward(
return (0, 3)
@local_optimizer([GpuSoftmax])
def local_softmax_dnn(node):
# We need this since other stuff from opt is not importable.
if cuda_available:
from theano.sandbox.cuda.opt import (local_optimizer, gpu_contiguous,
gpu_optimizer)
@local_optimizer([GpuConv])
def local_conv_dnn(node):
if isinstance(node.op, GpuConv):
if node.op.border_mode not in ['full', 'valid']:
return
img, kern = node.inputs
border_mode = node.op.border_mode
subsample = node.op.subsample
return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
border_mode=border_mode, subsample=subsample)]
gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
@local_optimizer([GpuSoftmax])
def local_softmax_dnn(node):
if isinstance(node.op, GpuSoftmax):
ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins))
out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
return [out]
gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn')
gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn')
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论