提交 cdea94b3 authored 作者: Frederic's avatar Frederic

Change the default GPU 2d convolution.

上级 35e2aa0e
...@@ -22,6 +22,29 @@ ...@@ -22,6 +22,29 @@
.. moduleauthor:: LISA .. moduleauthor:: LISA
.. note::
As October 20, 2014, the default GPU image convolution
changed. Now, if `cuDNN <https://developer.nvidia.com/cuDNN>`_ is
available and the GPU selected is supported by it. This give
faster GPU convolution without using more memory then the legacy
convolution.
- If can use cuDNN, use it.
- If not, use gemm version (slower then cuDNN, use more memory).
- If the user don't want the extra memory of the gemm version,
they can enable the legacy code that is even slower, but don't
use extra memory.
- There is also the fft version that is the fastest in some cases,
but use even more memory. It don't support striding to remove
computation and have some shape restriction.
- There is also the cuda_convnet convolution in Pylearn2. It use a
different memory layout, have shapes restriction, but don't use
extra memory and is faster then the legacy convolution.
TODO: Give examples on how to use these things! They are pretty complicated. TODO: Give examples on how to use these things! They are pretty complicated.
- Convolution operators implemented: - Convolution operators implemented:
......
...@@ -1109,9 +1109,36 @@ def local_gpu_softmax_with_bias(node): ...@@ -1109,9 +1109,36 @@ def local_gpu_softmax_with_bias(node):
from theano.tensor.nnet import conv from theano.tensor.nnet import conv
@register_opt() # Need to be registered before local_gpu_conv_legacy. Otherwise, it
# will have priority over this optimization. We want, if cudnn is
# available and the GPU support it, use it. Otherwise, the gemm
# version should be used. If the user want the legacy convolution,
# they should use the Theano flag:
# optimizer_excluding=local_conv_gemm.
# If cudnn is available, this flag should be added:
# optimizer_excluding=local_gpu_conv
@register_opt("dnn")
@local_optimizer([gpu_from_host, conv.ConvOp]) @local_optimizer([gpu_from_host, conv.ConvOp])
def local_gpu_conv(node): def local_gpu_conv(node):
"""
If cudnn is available, use it. Otherwise, use the gemm version.
"""
if theano.sandbox.cuda.dnn.dnn_available():
repl = local_gpu_conv_legacy.transform(node)
if repl:
n = repl[0].owner.inputs[0].owner
assert isinstance(n.op, GpuConv)
ret = theano.sandbox.cuda.dnn.local_conv_dnn.transform(n)
if ret:
return [host_from_gpu(ret[0])]
# If dnn isn't avail, the local_gpu_conv_legacy wil introduce the
# legacy opt. Then the local_conv_gemm will convert it to gemm
# opt.
@register_opt()
@local_optimizer([gpu_from_host, conv.ConvOp])
def local_gpu_conv_legacy(node):
""" """
gpu_from_host(conv) -> gpu_conv(gpu_from_host) gpu_from_host(conv) -> gpu_conv(gpu_from_host)
...@@ -1438,6 +1465,7 @@ def local_gpu_downsample_factor_max_grad(node): ...@@ -1438,6 +1465,7 @@ def local_gpu_downsample_factor_max_grad(node):
gpu_from_host(gz)))] gpu_from_host(gz)))]
@register_opt()
@local_optimizer([GpuConv]) @local_optimizer([GpuConv])
def local_conv_gemm(node): def local_conv_gemm(node):
if (isinstance(node.op, GpuConv) and if (isinstance(node.op, GpuConv) and
...@@ -1493,7 +1521,6 @@ def local_conv_gemm(node): ...@@ -1493,7 +1521,6 @@ def local_conv_gemm(node):
return [GpuCorrMM_gradInputs('valid', subsample, pad)( return [GpuCorrMM_gradInputs('valid', subsample, pad)(
gpu_contiguous(kern), gpu_contiguous(img))] gpu_contiguous(kern), gpu_contiguous(img))]
gpu_optimizer.register("conv_gemm", local_conv_gemm)
from theano.sandbox.cuda.basic_ops import gpu_join, GpuJoin from theano.sandbox.cuda.basic_ops import gpu_join, GpuJoin
......
...@@ -586,6 +586,31 @@ def test_dnn_valid(): ...@@ -586,6 +586,31 @@ def test_dnn_valid():
yield t yield t
def test_default_conv():
"""Just test that we introduce the right GPU convolution
versoin.
"""
img = theano.tensor.ftensor4()
fil = theano.tensor.ftensor4()
c = theano.tensor.nnet.conv2d(img, fil)
f = theano.function([img, fil], c, mode=theano_mode)
if cuda.dnn.dnn_available():
assert any([isinstance(a.op, GpuDnnConv)
for a in f.maker.fgraph.apply_nodes])
else:
assert any([isinstance(a.op, cuda.blas.GpuCorrMM)
for a in f.maker.fgraph.apply_nodes])
mode = theano_mode.excluding('local_gpu_conv', 'local_conv_gemm')
f = theano.function([img, fil], c, mode=mode)
assert any([isinstance(a.op, cuda.blas.GpuConv)
for a in f.maker.fgraph.apply_nodes])
def _test_full(cls, mode=None, version=[-1], extra_shapes=[]): def _test_full(cls, mode=None, version=[-1], extra_shapes=[]):
seed_rng() seed_rng()
shapes = get_basic_shapes() shapes = get_basic_shapes()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论