提交 3ae18f61 authored 作者: Frederic Bastien's avatar Frederic Bastien

Use cuDNN instead of CuDNN.

上级 5855468f
...@@ -329,12 +329,14 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) { ...@@ -329,12 +329,14 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
if v == -1 or v[0] < 4007: if v == -1 or v[0] < 4007:
# 4007 is the final release of cudnn v4 # 4007 is the final release of cudnn v4
dnn_available.avail = False dnn_available.avail = False
dnn_available.msg = "Version too old. Update to v5, was %d" % v[0] dnn_available.msg = "Version too old. Update to v5, was %d." % v[0]
raise RuntimeError(dnn_available.msg) raise RuntimeError(dnn_available.msg)
else:
dnn_available.avail = comp
if config.dnn.enabled == "True": if config.dnn.enabled == "True":
if not dnn_available.avail: if not dnn_available.avail:
raise RuntimeError( raise RuntimeError(
"You enabled CuDNN, but we aren't able to use it: %s" % "You enabled cuDNN, but we aren't able to use it: %s" %
dnn_available.msg) dnn_available.msg)
return dnn_available.avail return dnn_available.avail
...@@ -587,7 +589,7 @@ def use(device, ...@@ -587,7 +589,7 @@ def use(device,
" downgrading cuDNN to version 5.") " downgrading cuDNN to version 5.")
except Exception: except Exception:
cudnn_version = dnn_available.msg cudnn_version = dnn_available.msg
print("Using gpu device %d: %s (CNMeM is %s, CuDNN %s)" % ( print("Using gpu device %d: %s (CNMeM is %s, cuDNN %s)" % (
active_device_number(), active_device_number(),
active_device_name(), active_device_name(),
cnmem_enabled, cnmem_enabled,
......
...@@ -322,30 +322,30 @@ class GpuDnnConv(DnnBase, COp): ...@@ -322,30 +322,30 @@ class GpuDnnConv(DnnBase, COp):
if self.inplace: if self.inplace:
self.destroy_map = {0: [2]} self.destroy_map = {0: [2]}
# In CuDNN version older than V3, the FFT implementation and the # In cuDNN version older than V3, the FFT implementation and the
# option to time the different implementations to get the fastest # option to time the different implementations to get the fastest
# are both unavailable. # are both unavailable.
if version() < (3000, 3000): if version() < (3000, 3000):
if self.algo == 'fft': if self.algo == 'fft':
raise RuntimeError("CuDNN FFT convolution requires CuDNN v3") raise RuntimeError("cuDNN FFT convolution requires cuDNN v3")
elif self.algo in ['guess_once', 'guess_on_shape_change']: elif self.algo in ['guess_once', 'guess_on_shape_change']:
raise RuntimeError("CuDNN selection of convolution " raise RuntimeError("cuDNN selection of convolution "
"implementation based on heuristics " "implementation based on heuristics "
"requires CuDNN v3") "requires cuDNN v3")
elif self.algo in ['time_once', 'time_on_shape_change']: elif self.algo in ['time_once', 'time_on_shape_change']:
raise RuntimeError("CuDNN convolution timing requires CuDNN " raise RuntimeError("cuDNN convolution timing requires cuDNN "
"v3") "v3")
# The fft_tiling implementation is only available from CuDNN V4 onward # The fft_tiling implementation is only available from cuDNN V4 onward
if version() < (4000, 4000): if version() < (4000, 4000):
if self.algo == 'fft_tiling': if self.algo == 'fft_tiling':
raise RuntimeError("CuDNN tiled-FFT convolution requires " raise RuntimeError("cuDNN tiled-FFT convolution requires "
"CuDNN v4 or more recent") "cuDNN v4 or more recent")
if version() < (5000, 5000): if version() < (5000, 5000):
if self.algo == 'winograd': if self.algo == 'winograd':
raise RuntimeError("CuDNN winograd convolution requires " raise RuntimeError("cuDNN winograd convolution requires "
"CuDNN v5 or more recent") "cuDNN v5 or more recent")
assert self.algo in ['none', 'small', 'large', 'fft', 'fft_tiling', assert self.algo in ['none', 'small', 'large', 'fft', 'fft_tiling',
'winograd', 'guess_once', 'guess_on_shape_change', 'winograd', 'guess_once', 'guess_on_shape_change',
...@@ -516,11 +516,11 @@ class GpuDnnConv3d(GpuDnnConv): ...@@ -516,11 +516,11 @@ class GpuDnnConv3d(GpuDnnConv):
if version() < (5000, 5000): if version() < (5000, 5000):
if self.algo == 'fft_tiling': if self.algo == 'fft_tiling':
raise RuntimeError("CuDNN 3d tiled-FFT convolution requires " raise RuntimeError("cuDNN 3d tiled-FFT convolution requires "
"CuDNN v5 or more recent") "cuDNN v5 or more recent")
elif self.algo == 'winograd': elif self.algo == 'winograd':
raise RuntimeError("CuDNN 3d winograd convolution requires " raise RuntimeError("cuDNN 3d winograd convolution requires "
"CuDNN v5 or more recent") "cuDNN v5 or more recent")
def make_node(self, img, kern, output, desc, alpha=None, beta=None): def make_node(self, img, kern, output, desc, alpha=None, beta=None):
...@@ -833,17 +833,17 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -833,17 +833,17 @@ class GpuDnnConvGradI(DnnBase, COp):
if self.inplace: if self.inplace:
self.destroy_map = {0: [2]} self.destroy_map = {0: [2]}
# The small-workspace implementation is only available from CuDNN V4 # The small-workspace implementation is only available from cuDNN V4
# onward. # onward.
if version() < (4000, 4000): if version() < (4000, 4000):
if self.algo == 'fft_tiling': if self.algo == 'fft_tiling':
raise RuntimeError("CuDNN's tiled-FFT convolution requires " raise RuntimeError("cuDNN's tiled-FFT convolution requires "
"CuDNN v4 or more recent") "cuDNN v4 or more recent")
if version() < (5000, 5000): if version() < (5000, 5000):
if self.algo == 'winograd': if self.algo == 'winograd':
raise RuntimeError("CuDNN's winograd convolution requires " raise RuntimeError("cuDNN's winograd convolution requires "
"CuDNN v5 or more recent") "cuDNN v5 or more recent")
assert self.algo in ['none', 'deterministic', 'fft', 'fft_tiling', assert self.algo in ['none', 'deterministic', 'fft', 'fft_tiling',
'winograd', 'guess_once', 'guess_on_shape_change', 'winograd', 'guess_once', 'guess_on_shape_change',
...@@ -996,11 +996,11 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI): ...@@ -996,11 +996,11 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
assert self.algo in good_algo assert self.algo in good_algo
if version() < (5000, 5000): if version() < (5000, 5000):
if self.algo == 'fft_tiling': if self.algo == 'fft_tiling':
raise RuntimeError("CuDNN 3d tiled-FFT convolution requires " raise RuntimeError("cuDNN 3d tiled-FFT convolution requires "
"CuDNN v5 or more recent") "cuDNN v5 or more recent")
elif self.algo == 'winograd': elif self.algo == 'winograd':
raise RuntimeError("CuDNN 3d winograd convolution requires " raise RuntimeError("cuDNN 3d winograd convolution requires "
"CuDNN v5 or more recent") "cuDNN v5 or more recent")
def grad(self, inp, grads): def grad(self, inp, grads):
kerns, top, output, desc, alpha, beta = inp kerns, top, output, desc, alpha, beta = inp
...@@ -1078,7 +1078,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1078,7 +1078,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
*deprecated*, use parameter algo instead. *deprecated*, use parameter algo instead.
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'} algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Convolution implementation to use. Some of its values may require certain Convolution implementation to use. Some of its values may require certain
versions of CuDNN to be installed. Default is the value of versions of cuDNN to be installed. Default is the value of
:attr:`config.dnn.conv.algo_fwd`. :attr:`config.dnn.conv.algo_fwd`.
precision : {'as_input', 'float16', 'float32', 'float64'} precision : {'as_input', 'float16', 'float32', 'float64'}
Description of the dtype in which the computation of the convolution Description of the dtype in which the computation of the convolution
...@@ -1364,13 +1364,13 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -1364,13 +1364,13 @@ class GpuDnnPoolDesc(GpuOp):
self.pad = pad self.pad = pad
if (pad[0] != 0 or pad[1] != 0) and version() == -1: if (pad[0] != 0 or pad[1] != 0) and version() == -1:
raise RuntimeError("CuDNN pooling with padding requires CuDNN v2") raise RuntimeError("cuDNN pooling with padding requires cuDNN v2")
if self.get_ndim() == 3 and version() < (3000, 3000): if self.get_ndim() == 3 and version() < (3000, 3000):
raise RuntimeError("CuDNN 3d pooling requires CuDNN v3") raise RuntimeError("cuDNN 3d pooling requires cuDNN v3")
if (mode == 'average_exc_pad' and max(pad) > 0 and if (mode == 'average_exc_pad' and max(pad) > 0 and
version() < (4004, 4004)): version() < (4004, 4004)):
raise RuntimeError( raise RuntimeError(
"CuDNN pooling mode 'average_exc_pad' requires at least v4") "cuDNN pooling mode 'average_exc_pad' requires at least v4")
def get_ndim(self): def get_ndim(self):
return len(self.ws) return len(self.ws)
...@@ -1382,7 +1382,7 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -1382,7 +1382,7 @@ class GpuDnnPoolDesc(GpuOp):
def make_node(self): def make_node(self):
if self.pad != (0, 0) and version() == -1: if self.pad != (0, 0) and version() == -1:
raise RuntimeError("CuDNN pooling with padding requires CuDNN v2") raise RuntimeError("cuDNN pooling with padding requires cuDNN v2")
node = Apply(self, [], node = Apply(self, [],
[CDataType("cudnnPoolingDescriptor_t", [CDataType("cudnnPoolingDescriptor_t",
...@@ -1982,7 +1982,7 @@ class GpuDnnSoftmaxBase(DnnBase): ...@@ -1982,7 +1982,7 @@ class GpuDnnSoftmaxBase(DnnBase):
Always set this to 'bc01'. Always set this to 'bc01'.
algo : {'fast', 'accurate', 'log'} algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead. speed, for accuracy, or if cuDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'} mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01' Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'. or per spatial location '01' per image across 'c'.
...@@ -2003,7 +2003,7 @@ class GpuDnnSoftmaxBase(DnnBase): ...@@ -2003,7 +2003,7 @@ class GpuDnnSoftmaxBase(DnnBase):
self.tensor_format = tensor_format self.tensor_format = tensor_format
if algo == 'log' and version() < (3000, 3000): if algo == 'log' and version() < (3000, 3000):
raise RuntimeError("CuDNN log-softmax requires CuDNN v3") raise RuntimeError("cuDNN log-softmax requires cuDNN v3")
assert(algo in ('fast', 'accurate', 'log')) assert(algo in ('fast', 'accurate', 'log'))
self.algo = algo self.algo = algo
...@@ -2525,7 +2525,7 @@ if True: ...@@ -2525,7 +2525,7 @@ if True:
@register_opt('cudnn') @register_opt('cudnn')
@local_optimizer([GpuElemwise, LogSoftmax]) @local_optimizer([GpuElemwise, LogSoftmax])
def local_log_softmax_dnn(node): def local_log_softmax_dnn(node):
# The log-softmax implementation is only available starting at CuDNN V3 # The log-softmax implementation is only available starting at cuDNN V3
if not dnn_available() or version() < (3000, 3000): if not dnn_available() or version() < (3000, 3000):
return return
......
...@@ -68,15 +68,15 @@ def init_dev(dev, name=None): ...@@ -68,15 +68,15 @@ def init_dev(dev, name=None):
warn = None warn = None
cudnn_version = "" cudnn_version = ""
if dev.startswith('cuda'): if dev.startswith('cuda'):
cudnn_version = " (CuDNN not available)" cudnn_version = " (cuDNN not available)"
try: try:
cudnn_version = dnn.version() cudnn_version = dnn.version()
# 5100 should not print warning with cudnn 5 final. # 5100 should not print warning with cudnn 5 final.
if cudnn_version > 5100: if cudnn_version > 5100:
warn = ("Your CuDNN version is more recent than Theano." warn = ("Your cuDNN version is more recent than Theano."
" If you see problems, try updating Theano or" " If you see problems, try updating Theano or"
" downgrading CuDNN to version 5.") " downgrading cuDNN to version 5.")
cudnn_version = " (CuDNN version %s)" % cudnn_version cudnn_version = " (cuDNN version %s)" % cudnn_version
except Exception: except Exception:
cudnn_version = dnn.dnn_present.msg cudnn_version = dnn.dnn_present.msg
print("Mapped name %s to device %s: %s%s" % ( print("Mapped name %s to device %s: %s%s" % (
......
...@@ -36,7 +36,7 @@ from .opt_util import alpha_merge, output_merge, inplace_allocempty ...@@ -36,7 +36,7 @@ from .opt_util import alpha_merge, output_merge, inplace_allocempty
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
def raise_no_cudnn(msg="CuDNN is required for convolution and pooling"): def raise_no_cudnn(msg="cuDNN is required for convolution and pooling"):
raise RuntimeError(msg) raise RuntimeError(msg)
...@@ -74,7 +74,7 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) { ...@@ -74,7 +74,7 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
try_run=False, output=True) try_run=False, output=True)
if not avail: if not avail:
return False, ("cannot compile with CuDNN. " return False, ("cannot compile with cuDNN. "
"We got this error:\n" + str(err)) "We got this error:\n" + str(err))
return True, None return True, None
...@@ -108,7 +108,7 @@ def dnn_present(): ...@@ -108,7 +108,7 @@ def dnn_present():
if config.dnn.enabled == "True": if config.dnn.enabled == "True":
if not dnn_present.avail: if not dnn_present.avail:
raise RuntimeError( raise RuntimeError(
"You enabled CuDNN, but we aren't able to use it: %s" % "You enabled cuDNN, but we aren't able to use it: %s" %
dnn_present.msg) dnn_present.msg)
return dnn_present.avail return dnn_present.avail
...@@ -229,7 +229,7 @@ def version(raises=True): ...@@ -229,7 +229,7 @@ def version(raises=True):
This also does a check that the header version matches the runtime version. This also does a check that the header version matches the runtime version.
:raises: If True, raise an exception if CuDNN is not present or badly installed. :raises: If True, raise an exception if cuDNN is not present or badly installed.
Otherwise, return -1. Otherwise, return -1.
""" """
...@@ -421,23 +421,23 @@ class GpuDnnConv(DnnBase): ...@@ -421,23 +421,23 @@ class GpuDnnConv(DnnBase):
if version() < 3000: if version() < 3000:
if self.algo == 'fft': if self.algo == 'fft':
raise RuntimeError("CuDNN FFT convolution requires CuDNN v3") raise RuntimeError("cuDNN FFT convolution requires cuDNN v3")
elif self.algo in ['guess_once', 'guess_on_shape_change']: elif self.algo in ['guess_once', 'guess_on_shape_change']:
raise RuntimeError("CuDNN selection of convolution " raise RuntimeError("cuDNN selection of convolution "
"implementation based on heuristics " "implementation based on heuristics "
"requires CuDNN v3") "requires cuDNN v3")
elif self.algo in ['time_once', 'time_on_shape_change']: elif self.algo in ['time_once', 'time_on_shape_change']:
raise RuntimeError("CuDNN convolution timing requires CuDNN v3") raise RuntimeError("cuDNN convolution timing requires cuDNN v3")
# The fft_tiling implementation is only available from CuDNN V4 onward # The fft_tiling implementation is only available from cuDNN V4 onward
if version() < 4000: if version() < 4000:
if self.algo == 'fft_tiling': if self.algo == 'fft_tiling':
raise RuntimeError("CuDNN tiled-FFT convolution requires " raise RuntimeError("cuDNN tiled-FFT convolution requires "
"CuDNN v4 or more recent") "cuDNN v4 or more recent")
if version() < 5000 and self.algo == 'winograd': if version() < 5000 and self.algo == 'winograd':
raise RuntimeError("CuDNN winograd convolution requires " raise RuntimeError("cuDNN winograd convolution requires "
"CuDNN v5 or more recent") "cuDNN v5 or more recent")
assert self.algo in ['none', 'small', 'large', 'fft', 'fft_tiling', assert self.algo in ['none', 'small', 'large', 'fft', 'fft_tiling',
'winograd', 'guess_once', 'guess_on_shape_change', 'winograd', 'guess_once', 'guess_on_shape_change',
...@@ -714,13 +714,13 @@ class GpuDnnConvGradI(DnnBase): ...@@ -714,13 +714,13 @@ class GpuDnnConvGradI(DnnBase):
algo = config.dnn.conv.algo_bwd_data algo = config.dnn.conv.algo_bwd_data
self.algo = algo self.algo = algo
# The small-workspace implementation is only available from CuDNN V4 # The small-workspace implementation is only available from cuDNN V4
# onward. # onward.
if version() < 4000 and self.algo == 'fft_tiling': if version() < 4000 and self.algo == 'fft_tiling':
raise RuntimeError("CuDNN's tiled-FFT convolution requires CuDNN " raise RuntimeError("cuDNN's tiled-FFT convolution requires cuDNN "
"v4 or more recent") "v4 or more recent")
if version() < 5000 and self.algo == 'winograd': if version() < 5000 and self.algo == 'winograd':
raise RuntimeError("CuDNN's winograd convolution requires CuDNN " raise RuntimeError("cuDNN's winograd convolution requires cuDNN "
"v5 or more recent") "v5 or more recent")
assert self.algo in ['none', 'deterministic', 'fft', 'fft_tiling', assert self.algo in ['none', 'deterministic', 'fft', 'fft_tiling',
...@@ -856,7 +856,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -856,7 +856,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
removed at any time without a deprecation period. You have been warned. removed at any time without a deprecation period. You have been warned.
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'} algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Convolution implementation to use. Some of its values may Convolution implementation to use. Some of its values may
require certain versions of CuDNN to be installed. Default is require certain versions of cuDNN to be installed. Default is
the value of :attr:`config.dnn.conv.algo_fwd`. the value of :attr:`config.dnn.conv.algo_fwd`.
precision : {'as_input', 'float16', 'float32', 'float64'} precision : {'as_input', 'float16', 'float32', 'float64'}
Description of the dtype in which the computation of the convolution Description of the dtype in which the computation of the convolution
...@@ -1014,10 +1014,10 @@ class GpuDnnPoolDesc(Op): ...@@ -1014,10 +1014,10 @@ class GpuDnnPoolDesc(Op):
self.pad = pad self.pad = pad
if self.get_ndim() == 3 and version() < 3000: if self.get_ndim() == 3 and version() < 3000:
raise RuntimeError("CuDNN 3d pooling requires v3") raise RuntimeError("cuDNN 3d pooling requires v3")
if mode == 'average_exc_pad' and max(pad) > 0 and version() < 4004: if mode == 'average_exc_pad' and max(pad) > 0 and version() < 4004:
raise RuntimeError( raise RuntimeError(
"CuDNN pooling mode 'average_exc_pad' requires at least v4") "cuDNN pooling mode 'average_exc_pad' requires at least v4")
def get_ndim(self): def get_ndim(self):
return len(self.ws) return len(self.ws)
...@@ -1277,7 +1277,7 @@ class GpuDnnSoftmaxBase(DnnBase): ...@@ -1277,7 +1277,7 @@ class GpuDnnSoftmaxBase(DnnBase):
---------- ----------
algo : {'fast', 'accurate', 'log'} algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead. speed, for accuracy, or if cuDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'} mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01' Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'. or per spatial location '01' per image across 'c'.
...@@ -1291,7 +1291,7 @@ class GpuDnnSoftmaxBase(DnnBase): ...@@ -1291,7 +1291,7 @@ class GpuDnnSoftmaxBase(DnnBase):
assert(algo in ('fast', 'accurate', 'log')) assert(algo in ('fast', 'accurate', 'log'))
if algo == 'log' and version(raises=False) < 3000: if algo == 'log' and version(raises=False) < 3000:
raise RuntimeError("Need CuDNN v3 for log-softmax") raise RuntimeError("Need cuDNN v3 for log-softmax")
self.algo = algo self.algo = algo
assert(mode in ('instance', 'channel')) assert(mode in ('instance', 'channel'))
...@@ -1326,7 +1326,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase): ...@@ -1326,7 +1326,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
algo : {'fast', 'accurate', 'log'} algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead. speed, for accuracy, or if cuDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'} mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01' Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'. or per spatial location '01' per image across 'c'.
...@@ -1360,7 +1360,7 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase): ...@@ -1360,7 +1360,7 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
---------- ----------
algo algo
'fast', 'accurate' or 'log' indicating whether, respectively, 'fast', 'accurate' or 'log' indicating whether, respectively,
computations should be optimized for speed, for accuracy, or if CuDNN computations should be optimized for speed, for accuracy, or if cuDNN
should rather compute the gradient of the log-softmax instead. should rather compute the gradient of the log-softmax instead.
mode mode
'instance' or 'channel' indicating whether the softmax should 'instance' or 'channel' indicating whether the softmax should
...@@ -1549,7 +1549,7 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name): ...@@ -1549,7 +1549,7 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name):
cg = gpu_contiguous(out_grad) cg = gpu_contiguous(out_grad)
# We reuse cg because CuDNN does not use the value of the `out` # We reuse cg because cuDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This # argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know. # has been observed in v2 and v3 as far as I know.
return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), cg, cg, ds, st, pad) return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), cg, cg, ds, st, pad)
...@@ -1579,7 +1579,7 @@ def local_log_softmax_dnn(node): ...@@ -1579,7 +1579,7 @@ def local_log_softmax_dnn(node):
len(node.inputs[0].clients) == 1): len(node.inputs[0].clients) == 1):
if version(raises=False) < 3000: if version(raises=False) < 3000:
# No log-softmax before cudnn v3 # No log-softmax before cudnn v3
raise_no_cudnn("Need CuDNN v3 for LogSoftmax") raise_no_cudnn("Need cuDNN v3 for LogSoftmax")
softmax_node = node.inputs[0].owner softmax_node = node.inputs[0].owner
new_softmax = GpuDnnSoftmax('log', softmax_node.op.mode) new_softmax = GpuDnnSoftmax('log', softmax_node.op.mode)
return [new_softmax(softmax_node.inputs[0])] return [new_softmax(softmax_node.inputs[0])]
...@@ -1594,7 +1594,7 @@ def local_logsoftmax_to_dnn(node, ctx_name): ...@@ -1594,7 +1594,7 @@ def local_logsoftmax_to_dnn(node, ctx_name):
return return
if not dnn_available(ctx_name) or version(raises=False) < 3000: if not dnn_available(ctx_name) or version(raises=False) < 3000:
# No log-softmax before cudnn v3 # No log-softmax before cudnn v3
raise_no_cudnn("Need CuDNN v3 for LogSoftmax") raise_no_cudnn("Need cuDNN v3 for LogSoftmax")
inp = inp.dimshuffle(0, 1, 'x', 'x') inp = inp.dimshuffle(0, 1, 'x', 'x')
inp.tag.context_name = ctx_name inp.tag.context_name = ctx_name
...@@ -1627,7 +1627,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn') ...@@ -1627,7 +1627,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@op_lifter([SoftmaxGrad]) @op_lifter([SoftmaxGrad])
def local_softmax_dnn_grad(node, ctx_name): def local_softmax_dnn_grad(node, ctx_name):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn("CuDNN needed for SoftmaxGrad") raise_no_cudnn("cuDNN needed for SoftmaxGrad")
ins = [] ins = []
for n in node.inputs: for n in node.inputs:
n = as_gpuarray_variable(n, ctx_name) n = as_gpuarray_variable(n, ctx_name)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论