提交 95d3add9 authored 作者: abergeron's avatar abergeron

Merge pull request #2239 from nouiz/dnn

Dnn default and doc
.. _libdoc_cuda_dnn:
================================
:mod:`sandbox.cuda.dnn` -- cuDNN
================================
.. moduleauthor:: LISA
`cuDNN <https://developer.nvidia.com/cuDNN>`_ is an NVIDIA library with
functionality used by deep neural network. It provides optimized versions
of some operations like the convolution. cuDNN is not currently
installed with CUDA 6.5. You must download and install it
yourself.
To install it, decompress the downloaded file and make the ``*.h`` and
``*.so*`` files available to the compilation environment. On Linux,
this can be done by setting the environment variables
``LD_LIBRARY_PATH``, ``LIBRARY_PATH`` and ``CPATH`` to the
uncompressed directory path. Separate multiple directory with ``:`` as
the ``PATH`` environment variable. Or you can copy the ``*.h`` files
to ``/usr/include`` and the ``*.so*`` files to ``/lib64``.
By default, Theano will detect if it can use cuDNN. If so, it will use
it. If not, Theano optimizations will not introduce cuDNN ops. So
Theano will still work if the user did not introduce them manually.
To get an error if Theano can not use cuDNN, use this Theano flag:
``optimizer_including=cudnn``.
.. note::
Normally you should not call GPU Ops directly, but the CPU interface
currently does not allow all options supported by cuDNN ops. So it is
possible that you will need to call them manually.
Functions
=========
.. automodule:: theano.sandbox.cuda.dnn
:members: dnn_conv, dnn_pool
Convolution Ops
===============
.. automodule:: theano.sandbox.cuda.dnn
:members: GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI,
Pooling Ops
===========
.. automodule:: theano.sandbox.cuda.dnn
:members: GpuDnnPoolDesc, GpuDnnPool, GpuDnnPoolGrad,
Softmax Ops
===========
.. automodule:: theano.sandbox.cuda.dnn
:members: GpuDnnSoftmax, GpuDnnSoftmaxGrad
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
op
var var
type type
op dnn
...@@ -1788,7 +1788,8 @@ class GCC_compiler(object): ...@@ -1788,7 +1788,8 @@ class GCC_compiler(object):
return cxxflags return cxxflags
@staticmethod @staticmethod
def try_compile_tmp(src_code, tmp_prefix='', flags=(), try_run=False): def try_compile_tmp(src_code, tmp_prefix='', flags=(),
try_run=False, output=False):
"""Try to compile (and run) a test program. """Try to compile (and run) a test program.
This is useful in various occasions, to check if libraries This is useful in various occasions, to check if libraries
...@@ -1799,6 +1800,7 @@ class GCC_compiler(object): ...@@ -1799,6 +1800,7 @@ class GCC_compiler(object):
If try_run is False, returns the compilation status. If try_run is False, returns the compilation status.
If try_run is True, returns a (compile_status, run_status) pair. If try_run is True, returns a (compile_status, run_status) pair.
If output is there, we append the stdout and stderr to the output.
""" """
if not theano.config.cxx: if not theano.config.cxx:
return False return False
...@@ -1818,14 +1820,14 @@ class GCC_compiler(object): ...@@ -1818,14 +1820,14 @@ class GCC_compiler(object):
os.write(fd, src_code) os.write(fd, src_code)
os.close(fd) os.close(fd)
fd = None fd = None
p_ret = call_subprocess_Popen( out, err, p_ret = output_subprocess_Popen(
['g++', path, '-o', exe_path] + flags) ['g++', path, '-o', exe_path] + flags)
if p_ret != 0: if p_ret != 0:
compilation_ok = False compilation_ok = False
elif try_run: elif try_run:
# Try to execute the program # Try to execute the program
try: try:
p_ret = call_subprocess_Popen([exe_path]) out, err, p_ret = output_subprocess_Popen([exe_path])
run_ok = (p_ret == 0) run_ok = (p_ret == 0)
finally: finally:
os.remove(exe_path) os.remove(exe_path)
...@@ -1839,13 +1841,18 @@ class GCC_compiler(object): ...@@ -1839,13 +1841,18 @@ class GCC_compiler(object):
except OSError, e: except OSError, e:
compilation_ok = False compilation_ok = False
if not try_run: if not try_run and not output:
return compilation_ok return compilation_ok
else: elif not try_run and output:
return (compilation_ok, out, err)
elif not output:
return (compilation_ok, run_ok) return (compilation_ok, run_ok)
else:
return (compilation_ok, run_ok, out, err)
@staticmethod @staticmethod
def try_flags(flag_list): def try_flags(flag_list, preambule="", body="",
try_run=False, output=False):
''' '''
Try to compile a dummy file with these flags. Try to compile a dummy file with these flags.
...@@ -1856,13 +1863,16 @@ class GCC_compiler(object): ...@@ -1856,13 +1863,16 @@ class GCC_compiler(object):
return False return False
code = b(""" code = b("""
%(preambule)s
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
%(body)s
return 0; return 0;
} }
""") """ % locals())
return GCC_compiler.try_compile_tmp(code, tmp_prefix='try_flags_', return GCC_compiler.try_compile_tmp(code, tmp_prefix='try_flags_',
flags=flag_list, try_run=False) flags=flag_list, try_run=try_run,
output=output)
@staticmethod @staticmethod
def compile_str(module_name, src_code, location=None, def compile_str(module_name, src_code, location=None,
......
import os import os
import theano import theano
from theano import Apply, tensor from theano import Apply, gof, tensor
from theano.gof import Optimizer
from theano.gof.type import CDataType from theano.gof.type import CDataType
from theano.compat import PY3 from theano.compat import PY3
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
...@@ -12,6 +13,7 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, ...@@ -12,6 +13,7 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax, from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
GpuDownsampleFactorMaxGrad) GpuDownsampleFactorMaxGrad)
from theano.sandbox.cuda.nnet import GpuSoftmax from theano.sandbox.cuda.nnet import GpuSoftmax
from theano.sandbox.cuda.opt import register_opt
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
...@@ -23,9 +25,35 @@ def dnn_available(): ...@@ -23,9 +25,35 @@ def dnn_available():
dnn_available.msg = "Device not supported by cuDNN" dnn_available.msg = "Device not supported by cuDNN"
dnn_available.avail = False dnn_available.avail = False
else: else:
dnn_available.msg = "Can not find the cuDNN library" preambule = """
dnn_available.avail = theano.gof.cmodule.GCC_compiler.try_flags( #include <cudnn.h>
["-l", "cudnn"]) #include <stdio.h>
#include <cuda.h>
#include <cudnn_helper.h>
"""
body = """
cudnnHandle_t _handle = NULL;
cudnnStatus_t err;
if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
fprintf(stderr, "could not create cuDNN handle: %s",
cudnnGetErrorString(err));
return 1;
}
"""
comp, run, out, err = gof.cmodule.GCC_compiler.try_flags(
["-l", "cudnn", "-I" + os.path.dirname(__file__)],
preambule=preambule, body=body,
try_run=True, output=True)
dnn_available.avail = comp and run
if dnn_available.avail:
dnn_available.msg = "cuDNN should work"
else:
dnn_available.msg = (
"Theano is not able to use cuDNN. We got this error: \n" +
err)
return dnn_available.avail return dnn_available.avail
...@@ -54,14 +82,6 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) { ...@@ -54,14 +82,6 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) {
""" % dict(var=var, err=err, desc=desc, fail=fail) """ % dict(var=var, err=err, desc=desc, fail=fail)
def raise_no_dnn():
""" Raise a RuntimeError if cudnn can't be used"""
if not dnn_available():
raise RuntimeError(
"cuDNN optimization was enabled, but cuDNN is not available. " +
dnn_available.msg)
class DnnBase(GpuOp): class DnnBase(GpuOp):
""" """
Creates a handle for cudnn and pulls in the cudnn libraries and headers. Creates a handle for cudnn and pulls in the cudnn libraries and headers.
...@@ -88,7 +108,7 @@ cudnnHandle_t _handle = NULL; ...@@ -88,7 +108,7 @@ cudnnHandle_t _handle = NULL;
return ["""{ return ["""{
cudnnStatus_t err; cudnnStatus_t err;
if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) { if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not create cudnn handle: %%s", PyErr_Format(PyExc_RuntimeError, "could not create cuDNN handle: %%s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
return %s; return %s;
} }
...@@ -96,6 +116,14 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) { ...@@ -96,6 +116,14 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
class GpuDnnConvDesc(GpuOp): class GpuDnnConvDesc(GpuOp):
"""This Op builds a convolution descriptor for use in the other
convolution operations.
:param border_mode: 'valid' or 'full'
:param subsample: The subsample, tuple like (dx, dy)
:param conv_mode: 'conv' or 'cross'
"""
__props__ = ('border_mode', 'subsample', 'conv_mode') __props__ = ('border_mode', 'subsample', 'conv_mode')
def c_headers(self): def c_headers(self):
...@@ -354,6 +382,14 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -354,6 +382,14 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
class GpuDnnConv(GpuDnnConvBase): class GpuDnnConv(GpuDnnConvBase):
"""
The forward convolution.
:param image:
:param kernel:
:param descr: the convolution descriptor
"""
conv_inputs = 'input', 'kerns' conv_inputs = 'input', 'kerns'
conv_output = 'output' conv_output = 'output'
conv_types = 'tensor4d', 'filter', 'tensor4d' conv_types = 'tensor4d', 'filter', 'tensor4d'
...@@ -377,6 +413,15 @@ class GpuDnnConv(GpuDnnConvBase): ...@@ -377,6 +413,15 @@ class GpuDnnConv(GpuDnnConvBase):
class GpuDnnConvGradW(GpuDnnConvBase): class GpuDnnConvGradW(GpuDnnConvBase):
"""
The convolution gradient with respect to the weights.
:param image:
:param kernel:
:param descr: the convolution descriptor
"""
conv_inputs = 'input', 'output', conv_inputs = 'input', 'output',
conv_output = 'kerns' conv_output = 'kerns'
conv_types = 'tensor4d', 'tensor4d', 'filter' conv_types = 'tensor4d', 'tensor4d', 'filter'
...@@ -385,6 +430,15 @@ class GpuDnnConvGradW(GpuDnnConvBase): ...@@ -385,6 +430,15 @@ class GpuDnnConvGradW(GpuDnnConvBase):
class GpuDnnConvGradI(GpuDnnConvBase): class GpuDnnConvGradI(GpuDnnConvBase):
"""
The convolution gradient with respect to the inputs.
:param image:
:param kernel:
:param descr: the convolution descriptor
"""
conv_inputs = 'kerns', 'output', conv_inputs = 'kerns', 'output',
conv_output = 'input' conv_output = 'input'
conv_types = 'filter', 'tensor4d', 'tensor4d' conv_types = 'filter', 'tensor4d', 'tensor4d'
...@@ -418,7 +472,15 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -418,7 +472,15 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
class GpuDnnPoolDesc(GpuOp): class GpuDnnPoolDesc(GpuOp):
__props__ = ('mode', 'ws', 'stride') """
This Op builds a pooling descriptor for use in the other
pooling operations.
:param ws: windows size
:param stride: (dx, dy)
:param mode: 'max' or 'average'
"""
__props__ = ('ws', 'stride', 'mode')
def c_headers(self): def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h'] return ['cudnn.h', 'cudnn_helper.h']
...@@ -489,13 +551,19 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -489,13 +551,19 @@ class GpuDnnPoolDesc(GpuOp):
class GpuDnnPool(DnnBase): class GpuDnnPool(DnnBase):
"""
Pooling.
:param img: the image 4d tensor.
:param desc: the pooling descriptor.
"""
__props__ = () __props__ = ()
def make_node(self, img, desc): def make_node(self, img, desc):
img = as_cuda_ndarray_variable(img) img = as_cuda_ndarray_variable(img)
if img.type.ndim != 4: if img.type.ndim != 4:
raise TypeError('img must be 4D tensor') raise TypeError('img must be 4D tensor')
if not isinstance(desc.type, CDataType) \ if not isinstance(desc.type, CDataType) \
or desc.type.ctype != 'cudnnPoolingDescriptor_t': or desc.type.ctype != 'cudnnPoolingDescriptor_t':
raise TypeError('desc must be cudnnPoolingDescriptor_t') raise TypeError('desc must be cudnnPoolingDescriptor_t')
...@@ -537,10 +605,10 @@ if (output%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output%(id)d); } ...@@ -537,10 +605,10 @@ if (output%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output%(id)d); }
out, = outputs out, = outputs
set_in = c_set_tensor4d(inputs[0], "input" + str(sub['struct_id']), set_in = c_set_tensor4d(inputs[0], "input" + str(sub['struct_id']),
'err' + name, sub['fail']) 'err' + name, sub['fail'])
set_out = c_set_tensor4d(out, "output" + str(sub['struct_id']), set_out = c_set_tensor4d(out, "output" + str(sub['struct_id']),
'err' + name, sub['fail']) 'err' + name, sub['fail'])
return """ return """
cudnnStatus_t err%(name)s; cudnnStatus_t err%(name)s;
...@@ -615,6 +683,14 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -615,6 +683,14 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
class GpuDnnPoolGrad(DnnBase): class GpuDnnPoolGrad(DnnBase):
"""
The pooling gradient.
:param inp: the input of the pooling.
:param inp_grad: same size as out, but is the corresponding gradient information.
:param out: the output of the pooling in the forward.
:param desc: The pooling descriptor.
"""
__props__ = () __props__ = ()
def make_node(self, inp, inp_grad, out, desc): def make_node(self, inp, inp_grad, out, desc):
...@@ -625,7 +701,7 @@ class GpuDnnPoolGrad(DnnBase): ...@@ -625,7 +701,7 @@ class GpuDnnPoolGrad(DnnBase):
inp_grad = as_cuda_ndarray_variable(inp_grad) inp_grad = as_cuda_ndarray_variable(inp_grad)
if inp_grad.type.ndim != 4: if inp_grad.type.ndim != 4:
raise TypeError('inp_grad must be 4D tensor') raise TypeError('inp_grad must be 4D tensor')
out = as_cuda_ndarray_variable(out) out = as_cuda_ndarray_variable(out)
if out.type.ndim != 4: if out.type.ndim != 4:
raise TypeError('out must be 4D tensor') raise TypeError('out must be 4D tensor')
...@@ -688,15 +764,15 @@ if (output_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id) ...@@ -688,15 +764,15 @@ if (output_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id)
set_in = "\n".join([ set_in = "\n".join([
c_set_tensor4d(inp, "input" + str(sub['struct_id']), c_set_tensor4d(inp, "input" + str(sub['struct_id']),
'err' + name, sub['fail']), 'err' + name, sub['fail']),
c_set_tensor4d(inp_grad, "input_grad" + str(sub['struct_id']), c_set_tensor4d(inp_grad, "input_grad" + str(sub['struct_id']),
'err' + name, sub['fail']), 'err' + name, sub['fail']),
c_set_tensor4d(out, "output" + str(sub['struct_id']), c_set_tensor4d(out, "output" + str(sub['struct_id']),
'err' + name, sub['fail']) 'err' + name, sub['fail'])
]) ])
set_out = c_set_tensor4d(out, "output_grad" + str(sub['struct_id']), set_out = c_set_tensor4d(out, "output_grad" + str(sub['struct_id']),
'err' + name, sub['fail']) 'err' + name, sub['fail'])
return """ return """
cudnnStatus_t err%(name)s; cudnnStatus_t err%(name)s;
...@@ -738,7 +814,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -738,7 +814,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
cudnnGetErrorString(err%(name)s)); cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
""" % dict(output_grad=out_grad, desc=desc, fail=sub['fail'], id=sub['struct_id'], """ % dict(output_grad=out_grad, desc=desc,
fail=sub['fail'], id=sub['struct_id'],
name=name, set_in=set_in, name=name, set_in=set_in,
set_out=set_out, input=inp, input_grad=inp_grad, output=out, set_out=set_out, input=inp, input_grad=inp_grad, output=out,
input_desc="input"+str(sub['struct_id']), input_desc="input"+str(sub['struct_id']),
...@@ -776,13 +853,12 @@ class GpuDnnSoftmax(DnnBase): ...@@ -776,13 +853,12 @@ class GpuDnnSoftmax(DnnBase):
""" """
Op for the cuDNN Softmax. Op for the cuDNN Softmax.
Parameters'' :param tensor_format: Whether the data format is 'bc01' or 'b01c'
-tensor_format: Whether the data format is 'bc01' or 'b01c' :param algo: 'fast' or 'accurate' indicating whether computations should be
-algo: 'fast' or 'accurate' indicating whether computations should be optimized for speed or accuracy respectively.
optimized for speed or accuracy respectively. :param mode: 'instance' or 'channel' indicating whether the softmax should
-mode: 'instance' or 'channel' indicating whether the softmax should be be computed per image across 'c01' or per spationali location '01' per
computed per image across 'c01' or per spationali location '01' per image image across 'c'.
across 'c'.
""" """
__props__ = ('tensor_format', 'mode', 'algo') __props__ = ('tensor_format', 'mode', 'algo')
...@@ -927,11 +1003,14 @@ err%(name)s = cudnnSoftmaxForward( ...@@ -927,11 +1003,14 @@ err%(name)s = cudnnSoftmaxForward(
# We need this since other stuff from opt is not importable. # We need this since other stuff from opt is not importable.
if cuda_available: if cuda_available:
from theano.sandbox.cuda.opt import local_optimizer, gpu_optimizer from theano.sandbox.cuda.opt import (
local_optimizer, gpu_optimizer, gpu_seqopt)
@register_opt('cudnn')
@local_optimizer([GpuConv]) @local_optimizer([GpuConv])
def local_conv_dnn(node): def local_conv_dnn(node):
raise_no_dnn() if not dnn_available():
return
if isinstance(node.op, GpuConv): if isinstance(node.op, GpuConv):
if node.op.border_mode not in ['full', 'valid']: if node.op.border_mode not in ['full', 'valid']:
return return
...@@ -941,11 +1020,11 @@ if cuda_available: ...@@ -941,11 +1020,11 @@ if cuda_available:
return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern), return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
border_mode=border_mode, subsample=subsample)] border_mode=border_mode, subsample=subsample)]
gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn') @register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMax]) @local_optimizer([GpuDownsampleFactorMax])
def local_pool_dnn(node): def local_pool_dnn(node):
if not dnn_available():
return
if isinstance(node.op, GpuDownsampleFactorMax): if isinstance(node.op, GpuDownsampleFactorMax):
if node.op.ignore_border: if node.op.ignore_border:
return return
...@@ -953,32 +1032,43 @@ if cuda_available: ...@@ -953,32 +1032,43 @@ if cuda_available:
ds = node.op.ds ds = node.op.ds
return [dnn_pool(gpu_contiguous(img), ds, ds)] return [dnn_pool(gpu_contiguous(img), ds, ds)]
gpu_optimizer.register("pool_cudnn", local_pool_dnn, 'cudnn') @register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMaxGrad]) @local_optimizer([GpuDownsampleFactorMaxGrad])
def local_pool_dnn_grad(node): def local_pool_dnn_grad(node):
if not dnn_available():
return
if isinstance(node.op, GpuDownsampleFactorMaxGrad): if isinstance(node.op, GpuDownsampleFactorMaxGrad):
if node.op.ignore_border: if node.op.ignore_border:
return return
inp, out, inp_grad = node.inputs inp, out, inp_grad = node.inputs
ds = node.op.ds ds = node.op.ds
desc = GpuDnnPoolDesc(ws=ds, stride=ds, mode="max")()
return [GpuDnnPoolGrad()(gpu_contiguous(inp),
gpu_contiguous(inp_grad), gpu_contiguous(out), desc)]
gpu_optimizer.register("pool_cudnn_grad", local_pool_dnn_grad, 'cudnn') desc = GpuDnnPoolDesc(ws=ds, stride=ds, mode="max")()
return [GpuDnnPoolGrad()(gpu_contiguous(inp),
gpu_contiguous(inp_grad),
gpu_contiguous(out), desc)]
@register_opt('cudnn')
@local_optimizer([GpuSoftmax]) @local_optimizer([GpuSoftmax])
def local_softmax_dnn(node): def local_softmax_dnn(node):
raise_no_dnn() if not dnn_available():
return
if isinstance(node.op, GpuSoftmax): if isinstance(node.op, GpuSoftmax):
ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x') ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins)) ins = gpu_contiguous(ins)
out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(ins)
out = as_cuda_ndarray_variable(out.dimshuffle(0, 1)) out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
return [out] return [out]
gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn') class NoCuDNNRaise(Optimizer):
def apply(self, fgraph):
""" Raise a RuntimeError if cudnn can't be used"""
if not dnn_available():
# Make an assert error as we want Theano to fail, not
# just skip this optimization.
raise AssertionError(
"cuDNN optimization was enabled, but Theano was not able"
" to use it. We got this error: \n" +
dnn_available.msg)
gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
...@@ -1163,11 +1163,6 @@ def local_conv_fft_full(node): ...@@ -1163,11 +1163,6 @@ def local_conv_fft_full(node):
return return
# Needs to be registered before local_gpu_conv_legacy. Otherwise, it
# will have priority over this optimization. We want, if cudnn is
# available and the GPU supports it, to use it. Otherwise, the gemm
# version should be used. If the users want the legacy convolution,
# they should use the Theano flag to disable the dnn and/or gemm version.
@local_optimizer([GpuConv]) @local_optimizer([GpuConv])
def local_gpu_conv(node): def local_gpu_conv(node):
""" """
...@@ -1350,7 +1345,7 @@ conv_groupopt.register("conv_fft_valid", local_conv_fft_valid, 1) ...@@ -1350,7 +1345,7 @@ conv_groupopt.register("conv_fft_valid", local_conv_fft_valid, 1)
conv_groupopt.register("conv_fft_full", local_conv_fft_full, 1) conv_groupopt.register("conv_fft_full", local_conv_fft_full, 1)
# Use dnn if avail, so have the dnn tag to be able to disable it. # Use dnn if avail, so have the dnn tag to be able to disable it.
conv_groupopt.register('local_gpu_conv', local_gpu_conv, 10, conv_groupopt.register('local_gpu_conv', local_gpu_conv, 10,
'fast_compile', 'fast_run', 'dnn') 'fast_compile', 'fast_run', 'cudnn')
conv_groupopt.register('local_conv_gemm', local_conv_gemm, 12, conv_groupopt.register('local_conv_gemm', local_conv_gemm, 12,
'fast_compile', 'fast_run') 'fast_compile', 'fast_run')
......
import logging
import unittest
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import numpy import numpy
import unittest
import theano import theano
from theano.compat.six import StringIO
from theano.gof.python25 import any from theano.gof.python25 import any
import theano.tensor as T import theano.tensor as T
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
...@@ -85,7 +88,7 @@ def test_pooling_opt(): ...@@ -85,7 +88,7 @@ def test_pooling_opt():
f = theano.function( f = theano.function(
[x], [x],
max_pool_2d(x, ds=(2, 2)), max_pool_2d(x, ds=(2, 2)),
mode=mode_with_gpu.including("cudnn")) mode=mode_with_gpu)
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool) assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()]) for n in f.maker.fgraph.toposort()])
...@@ -97,3 +100,36 @@ def test_pooling_opt(): ...@@ -97,3 +100,36 @@ def test_pooling_opt():
assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad) assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
for n in f.maker.fgraph.toposort()]) for n in f.maker.fgraph.toposort()])
def test_dnn_tag():
"""
We test that if cudnn isn't avail we crash and that if it is avail, we use it.
"""
x = T.ftensor4()
old = theano.config.on_opt_error
theano.config.on_opt_error = "raise"
sio = StringIO()
handler = logging.StreamHandler(sio)
logging.getLogger('theano.compile.tests.test_dnn').addHandler(handler)
# Silence original handler when intentionnally generating warning messages
logging.getLogger('theano').removeHandler(theano.logging_default_handler)
raised = False
try:
f = theano.function(
[x],
max_pool_2d(x, ds=(2, 2)),
mode=mode_with_gpu.including("cudnn"))
except RuntimeError, e:
assert not cuda.dnn.dnn_available()
raised = True
finally:
theano.config.on_opt_error = old
logging.getLogger('theano.compile.tests.test_dnn').removeHandler(handler)
logging.getLogger('theano').addHandler(theano.logging_default_handler)
if not raised:
assert cuda.dnn.dnn_available()
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论