提交 cf2229a3 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add documenation to some dnn functions and make the file importable without a GPU.

上级 22788242
...@@ -7,7 +7,8 @@ from theano.gof.type import CDataType ...@@ -7,7 +7,8 @@ from theano.gof.type import CDataType
from theano.compat import PY3 from theano.compat import PY3
from theano.compat.six import StringIO from theano.compat.six import StringIO
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp, active_device_number, device_properties from theano.sandbox.cuda import (GpuOp, cuda_available, active_device_number,
device_properties)
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
gpu_contiguous) gpu_contiguous)
from theano.sandbox.cuda.blas import GpuConv from theano.sandbox.cuda.blas import GpuConv
...@@ -376,12 +377,21 @@ class GpuDnnConvGradI(GpuDnnConvBase): ...@@ -376,12 +377,21 @@ class GpuDnnConvGradI(GpuDnnConvBase):
conv_op = 'cudnnConvolutionBackwardData' conv_op = 'cudnnConvolutionBackwardData'
from theano.sandbox.cuda.opt import (local_optimizer, gpu_contiguous,
gpu_optimizer)
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
conv_mode='conv'): conv_mode='conv'):
"""
GPU convolution using cuDNN from NVIDIA.
:param img: images to do the convolution over
:param kerns: convolution filters
:param border_mode: one of 'valid', 'full' (default: 'valid')
:param subsample: perform subsampling of the output (default: (1, 1))
:param conv_mode: perform convolution (kernels flipped) or cross-correlation. One of 'conv', 'cross'. (default: 'conv')
:warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
work with this Op.
"""
img = gpu_contiguous(img) img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
...@@ -389,20 +399,6 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -389,20 +399,6 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
return GpuDnnConv()(img, kerns, desc) return GpuDnnConv()(img, kerns, desc)
@local_optimizer([GpuConv])
def local_conv_dnn(node):
if isinstance(node.op, GpuConv):
if node.op.border_mode not in ['full', 'valid']:
return
img, kern = node.inputs
border_mode = node.op.border_mode
subsample = node.op.subsample
return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
border_mode=border_mode, subsample=subsample)]
gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
class GpuDnnSoftmax(DnnBase): class GpuDnnSoftmax(DnnBase):
""" """
Op for the cuDNN Softmax. Op for the cuDNN Softmax.
...@@ -555,12 +551,32 @@ err%(name)s = cudnnSoftmaxForward( ...@@ -555,12 +551,32 @@ err%(name)s = cudnnSoftmaxForward(
return (0, 3) return (0, 3)
@local_optimizer([GpuSoftmax]) # We need this since other stuff from opt is not importable.
def local_softmax_dnn(node): if cuda_available:
from theano.sandbox.cuda.opt import (local_optimizer, gpu_contiguous,
gpu_optimizer)
@local_optimizer([GpuConv])
def local_conv_dnn(node):
if isinstance(node.op, GpuConv):
if node.op.border_mode not in ['full', 'valid']:
return
img, kern = node.inputs
border_mode = node.op.border_mode
subsample = node.op.subsample
return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
border_mode=border_mode, subsample=subsample)]
gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
@local_optimizer([GpuSoftmax])
def local_softmax_dnn(node):
if isinstance(node.op, GpuSoftmax): if isinstance(node.op, GpuSoftmax):
ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x') ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins)) out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins))
out = as_cuda_ndarray_variable(out.dimshuffle(0, 1)) out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
return [out] return [out]
gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn') gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn')
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论