提交 3ae18f61 authored 作者: Frederic Bastien's avatar Frederic Bastien

Use cuDNN instead of CuDNN.

上级 5855468f
......@@ -329,12 +329,14 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
if v == -1 or v[0] < 4007:
# 4007 is the final release of cudnn v4
dnn_available.avail = False
dnn_available.msg = "Version too old. Update to v5, was %d" % v[0]
dnn_available.msg = "Version too old. Update to v5, was %d." % v[0]
raise RuntimeError(dnn_available.msg)
else:
dnn_available.avail = comp
if config.dnn.enabled == "True":
if not dnn_available.avail:
raise RuntimeError(
"You enabled CuDNN, but we aren't able to use it: %s" %
"You enabled cuDNN, but we aren't able to use it: %s" %
dnn_available.msg)
return dnn_available.avail
......@@ -587,7 +589,7 @@ def use(device,
" downgrading cuDNN to version 5.")
except Exception:
cudnn_version = dnn_available.msg
print("Using gpu device %d: %s (CNMeM is %s, CuDNN %s)" % (
print("Using gpu device %d: %s (CNMeM is %s, cuDNN %s)" % (
active_device_number(),
active_device_name(),
cnmem_enabled,
......
......@@ -322,30 +322,30 @@ class GpuDnnConv(DnnBase, COp):
if self.inplace:
self.destroy_map = {0: [2]}
# In CuDNN version older than V3, the FFT implementation and the
# In cuDNN version older than V3, the FFT implementation and the
# option to time the different implementations to get the fastest
# are both unavailable.
if version() < (3000, 3000):
if self.algo == 'fft':
raise RuntimeError("CuDNN FFT convolution requires CuDNN v3")
raise RuntimeError("cuDNN FFT convolution requires cuDNN v3")
elif self.algo in ['guess_once', 'guess_on_shape_change']:
raise RuntimeError("CuDNN selection of convolution "
raise RuntimeError("cuDNN selection of convolution "
"implementation based on heuristics "
"requires CuDNN v3")
"requires cuDNN v3")
elif self.algo in ['time_once', 'time_on_shape_change']:
raise RuntimeError("CuDNN convolution timing requires CuDNN "
raise RuntimeError("cuDNN convolution timing requires cuDNN "
"v3")
# The fft_tiling implementation is only available from CuDNN V4 onward
# The fft_tiling implementation is only available from cuDNN V4 onward
if version() < (4000, 4000):
if self.algo == 'fft_tiling':
raise RuntimeError("CuDNN tiled-FFT convolution requires "
"CuDNN v4 or more recent")
raise RuntimeError("cuDNN tiled-FFT convolution requires "
"cuDNN v4 or more recent")
if version() < (5000, 5000):
if self.algo == 'winograd':
raise RuntimeError("CuDNN winograd convolution requires "
"CuDNN v5 or more recent")
raise RuntimeError("cuDNN winograd convolution requires "
"cuDNN v5 or more recent")
assert self.algo in ['none', 'small', 'large', 'fft', 'fft_tiling',
'winograd', 'guess_once', 'guess_on_shape_change',
......@@ -516,11 +516,11 @@ class GpuDnnConv3d(GpuDnnConv):
if version() < (5000, 5000):
if self.algo == 'fft_tiling':
raise RuntimeError("CuDNN 3d tiled-FFT convolution requires "
"CuDNN v5 or more recent")
raise RuntimeError("cuDNN 3d tiled-FFT convolution requires "
"cuDNN v5 or more recent")
elif self.algo == 'winograd':
raise RuntimeError("CuDNN 3d winograd convolution requires "
"CuDNN v5 or more recent")
raise RuntimeError("cuDNN 3d winograd convolution requires "
"cuDNN v5 or more recent")
def make_node(self, img, kern, output, desc, alpha=None, beta=None):
......@@ -833,17 +833,17 @@ class GpuDnnConvGradI(DnnBase, COp):
if self.inplace:
self.destroy_map = {0: [2]}
# The small-workspace implementation is only available from CuDNN V4
# The small-workspace implementation is only available from cuDNN V4
# onward.
if version() < (4000, 4000):
if self.algo == 'fft_tiling':
raise RuntimeError("CuDNN's tiled-FFT convolution requires "
"CuDNN v4 or more recent")
raise RuntimeError("cuDNN's tiled-FFT convolution requires "
"cuDNN v4 or more recent")
if version() < (5000, 5000):
if self.algo == 'winograd':
raise RuntimeError("CuDNN's winograd convolution requires "
"CuDNN v5 or more recent")
raise RuntimeError("cuDNN's winograd convolution requires "
"cuDNN v5 or more recent")
assert self.algo in ['none', 'deterministic', 'fft', 'fft_tiling',
'winograd', 'guess_once', 'guess_on_shape_change',
......@@ -996,11 +996,11 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
assert self.algo in good_algo
if version() < (5000, 5000):
if self.algo == 'fft_tiling':
raise RuntimeError("CuDNN 3d tiled-FFT convolution requires "
"CuDNN v5 or more recent")
raise RuntimeError("cuDNN 3d tiled-FFT convolution requires "
"cuDNN v5 or more recent")
elif self.algo == 'winograd':
raise RuntimeError("CuDNN 3d winograd convolution requires "
"CuDNN v5 or more recent")
raise RuntimeError("cuDNN 3d winograd convolution requires "
"cuDNN v5 or more recent")
def grad(self, inp, grads):
kerns, top, output, desc, alpha, beta = inp
......@@ -1078,7 +1078,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
*deprecated*, use parameter algo instead.
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Convolution implementation to use. Some of its values may require certain
versions of CuDNN to be installed. Default is the value of
versions of cuDNN to be installed. Default is the value of
:attr:`config.dnn.conv.algo_fwd`.
precision : {'as_input', 'float16', 'float32', 'float64'}
Description of the dtype in which the computation of the convolution
......@@ -1364,13 +1364,13 @@ class GpuDnnPoolDesc(GpuOp):
self.pad = pad
if (pad[0] != 0 or pad[1] != 0) and version() == -1:
raise RuntimeError("CuDNN pooling with padding requires CuDNN v2")
raise RuntimeError("cuDNN pooling with padding requires cuDNN v2")
if self.get_ndim() == 3 and version() < (3000, 3000):
raise RuntimeError("CuDNN 3d pooling requires CuDNN v3")
raise RuntimeError("cuDNN 3d pooling requires cuDNN v3")
if (mode == 'average_exc_pad' and max(pad) > 0 and
version() < (4004, 4004)):
raise RuntimeError(
"CuDNN pooling mode 'average_exc_pad' requires at least v4")
"cuDNN pooling mode 'average_exc_pad' requires at least v4")
def get_ndim(self):
return len(self.ws)
......@@ -1382,7 +1382,7 @@ class GpuDnnPoolDesc(GpuOp):
def make_node(self):
if self.pad != (0, 0) and version() == -1:
raise RuntimeError("CuDNN pooling with padding requires CuDNN v2")
raise RuntimeError("cuDNN pooling with padding requires cuDNN v2")
node = Apply(self, [],
[CDataType("cudnnPoolingDescriptor_t",
......@@ -1982,7 +1982,7 @@ class GpuDnnSoftmaxBase(DnnBase):
Always set this to 'bc01'.
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
speed, for accuracy, or if cuDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
......@@ -2003,7 +2003,7 @@ class GpuDnnSoftmaxBase(DnnBase):
self.tensor_format = tensor_format
if algo == 'log' and version() < (3000, 3000):
raise RuntimeError("CuDNN log-softmax requires CuDNN v3")
raise RuntimeError("cuDNN log-softmax requires cuDNN v3")
assert(algo in ('fast', 'accurate', 'log'))
self.algo = algo
......@@ -2525,7 +2525,7 @@ if True:
@register_opt('cudnn')
@local_optimizer([GpuElemwise, LogSoftmax])
def local_log_softmax_dnn(node):
# The log-softmax implementation is only available starting at CuDNN V3
# The log-softmax implementation is only available starting at cuDNN V3
if not dnn_available() or version() < (3000, 3000):
return
......
......@@ -68,15 +68,15 @@ def init_dev(dev, name=None):
warn = None
cudnn_version = ""
if dev.startswith('cuda'):
cudnn_version = " (CuDNN not available)"
cudnn_version = " (cuDNN not available)"
try:
cudnn_version = dnn.version()
# 5100 should not print warning with cudnn 5 final.
if cudnn_version > 5100:
warn = ("Your CuDNN version is more recent than Theano."
warn = ("Your cuDNN version is more recent than Theano."
" If you see problems, try updating Theano or"
" downgrading CuDNN to version 5.")
cudnn_version = " (CuDNN version %s)" % cudnn_version
" downgrading cuDNN to version 5.")
cudnn_version = " (cuDNN version %s)" % cudnn_version
except Exception:
cudnn_version = dnn.dnn_present.msg
print("Mapped name %s to device %s: %s%s" % (
......
......@@ -36,7 +36,7 @@ from .opt_util import alpha_merge, output_merge, inplace_allocempty
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
def raise_no_cudnn(msg="CuDNN is required for convolution and pooling"):
def raise_no_cudnn(msg="cuDNN is required for convolution and pooling"):
raise RuntimeError(msg)
......@@ -74,7 +74,7 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
try_run=False, output=True)
if not avail:
return False, ("cannot compile with CuDNN. "
return False, ("cannot compile with cuDNN. "
"We got this error:\n" + str(err))
return True, None
......@@ -108,7 +108,7 @@ def dnn_present():
if config.dnn.enabled == "True":
if not dnn_present.avail:
raise RuntimeError(
"You enabled CuDNN, but we aren't able to use it: %s" %
"You enabled cuDNN, but we aren't able to use it: %s" %
dnn_present.msg)
return dnn_present.avail
......@@ -229,7 +229,7 @@ def version(raises=True):
This also does a check that the header version matches the runtime version.
:raises: If True, raise an exception if CuDNN is not present or badly installed.
:raises: If True, raise an exception if cuDNN is not present or badly installed.
Otherwise, return -1.
"""
......@@ -421,23 +421,23 @@ class GpuDnnConv(DnnBase):
if version() < 3000:
if self.algo == 'fft':
raise RuntimeError("CuDNN FFT convolution requires CuDNN v3")
raise RuntimeError("cuDNN FFT convolution requires cuDNN v3")
elif self.algo in ['guess_once', 'guess_on_shape_change']:
raise RuntimeError("CuDNN selection of convolution "
raise RuntimeError("cuDNN selection of convolution "
"implementation based on heuristics "
"requires CuDNN v3")
"requires cuDNN v3")
elif self.algo in ['time_once', 'time_on_shape_change']:
raise RuntimeError("CuDNN convolution timing requires CuDNN v3")
raise RuntimeError("cuDNN convolution timing requires cuDNN v3")
# The fft_tiling implementation is only available from CuDNN V4 onward
# The fft_tiling implementation is only available from cuDNN V4 onward
if version() < 4000:
if self.algo == 'fft_tiling':
raise RuntimeError("CuDNN tiled-FFT convolution requires "
"CuDNN v4 or more recent")
raise RuntimeError("cuDNN tiled-FFT convolution requires "
"cuDNN v4 or more recent")
if version() < 5000 and self.algo == 'winograd':
raise RuntimeError("CuDNN winograd convolution requires "
"CuDNN v5 or more recent")
raise RuntimeError("cuDNN winograd convolution requires "
"cuDNN v5 or more recent")
assert self.algo in ['none', 'small', 'large', 'fft', 'fft_tiling',
'winograd', 'guess_once', 'guess_on_shape_change',
......@@ -714,13 +714,13 @@ class GpuDnnConvGradI(DnnBase):
algo = config.dnn.conv.algo_bwd_data
self.algo = algo
# The small-workspace implementation is only available from CuDNN V4
# The small-workspace implementation is only available from cuDNN V4
# onward.
if version() < 4000 and self.algo == 'fft_tiling':
raise RuntimeError("CuDNN's tiled-FFT convolution requires CuDNN "
raise RuntimeError("cuDNN's tiled-FFT convolution requires cuDNN "
"v4 or more recent")
if version() < 5000 and self.algo == 'winograd':
raise RuntimeError("CuDNN's winograd convolution requires CuDNN "
raise RuntimeError("cuDNN's winograd convolution requires cuDNN "
"v5 or more recent")
assert self.algo in ['none', 'deterministic', 'fft', 'fft_tiling',
......@@ -856,7 +856,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
removed at any time without a deprecation period. You have been warned.
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Convolution implementation to use. Some of its values may
require certain versions of CuDNN to be installed. Default is
require certain versions of cuDNN to be installed. Default is
the value of :attr:`config.dnn.conv.algo_fwd`.
precision : {'as_input', 'float16', 'float32', 'float64'}
Description of the dtype in which the computation of the convolution
......@@ -1014,10 +1014,10 @@ class GpuDnnPoolDesc(Op):
self.pad = pad
if self.get_ndim() == 3 and version() < 3000:
raise RuntimeError("CuDNN 3d pooling requires v3")
raise RuntimeError("cuDNN 3d pooling requires v3")
if mode == 'average_exc_pad' and max(pad) > 0 and version() < 4004:
raise RuntimeError(
"CuDNN pooling mode 'average_exc_pad' requires at least v4")
"cuDNN pooling mode 'average_exc_pad' requires at least v4")
def get_ndim(self):
return len(self.ws)
......@@ -1277,7 +1277,7 @@ class GpuDnnSoftmaxBase(DnnBase):
----------
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
speed, for accuracy, or if cuDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
......@@ -1291,7 +1291,7 @@ class GpuDnnSoftmaxBase(DnnBase):
assert(algo in ('fast', 'accurate', 'log'))
if algo == 'log' and version(raises=False) < 3000:
raise RuntimeError("Need CuDNN v3 for log-softmax")
raise RuntimeError("Need cuDNN v3 for log-softmax")
self.algo = algo
assert(mode in ('instance', 'channel'))
......@@ -1326,7 +1326,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
speed, for accuracy, or if cuDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
......@@ -1360,7 +1360,7 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
----------
algo
'fast', 'accurate' or 'log' indicating whether, respectively,
computations should be optimized for speed, for accuracy, or if CuDNN
computations should be optimized for speed, for accuracy, or if cuDNN
should rather compute the gradient of the log-softmax instead.
mode
'instance' or 'channel' indicating whether the softmax should
......@@ -1549,7 +1549,7 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name):
cg = gpu_contiguous(out_grad)
# We reuse cg because CuDNN does not use the value of the `out`
# We reuse cg because cuDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know.
return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), cg, cg, ds, st, pad)
......@@ -1579,7 +1579,7 @@ def local_log_softmax_dnn(node):
len(node.inputs[0].clients) == 1):
if version(raises=False) < 3000:
# No log-softmax before cudnn v3
raise_no_cudnn("Need CuDNN v3 for LogSoftmax")
raise_no_cudnn("Need cuDNN v3 for LogSoftmax")
softmax_node = node.inputs[0].owner
new_softmax = GpuDnnSoftmax('log', softmax_node.op.mode)
return [new_softmax(softmax_node.inputs[0])]
......@@ -1594,7 +1594,7 @@ def local_logsoftmax_to_dnn(node, ctx_name):
return
if not dnn_available(ctx_name) or version(raises=False) < 3000:
# No log-softmax before cudnn v3
raise_no_cudnn("Need CuDNN v3 for LogSoftmax")
raise_no_cudnn("Need cuDNN v3 for LogSoftmax")
inp = inp.dimshuffle(0, 1, 'x', 'x')
inp.tag.context_name = ctx_name
......@@ -1627,7 +1627,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@op_lifter([SoftmaxGrad])
def local_softmax_dnn_grad(node, ctx_name):
if not dnn_available(ctx_name):
raise_no_cudnn("CuDNN needed for SoftmaxGrad")
raise_no_cudnn("cuDNN needed for SoftmaxGrad")
ins = []
for n in node.inputs:
n = as_gpuarray_variable(n, ctx_name)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论