提交 d5373b57 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #4764 from abergeron/dnn_f16

Float16 convolutions
...@@ -345,8 +345,9 @@ AddConfigVar('dnn.conv.algo_bwd_filter', ...@@ -345,8 +345,9 @@ AddConfigVar('dnn.conv.algo_bwd_filter',
AddConfigVar('dnn.conv.precision', AddConfigVar('dnn.conv.precision',
"Default data precision to use for the computation in cuDNN " "Default data precision to use for the computation in cuDNN "
"convolutions (defaults to the same dtype as the inputs of the " "convolutions (defaults to the same dtype as the inputs of the "
"convolutions).", "convolutions, or float32 if inputs are float16).",
EnumStr('as_input', 'float16', 'float32', 'float64'), EnumStr('as_input_f32', 'as_input', 'float16', 'float32',
'float64'),
in_c_key=False) in_c_key=False)
......
...@@ -393,6 +393,8 @@ _one = constant(numpy.asarray(1.0, dtype='float64')) ...@@ -393,6 +393,8 @@ _one = constant(numpy.asarray(1.0, dtype='float64'))
def ensure_dt(val, default, name, dtype): def ensure_dt(val, default, name, dtype):
if dtype == 'float16':
dtype = 'float32'
if val is None: if val is None:
val = default.clone() val = default.clone()
if not isinstance(val, Variable): if not isinstance(val, Variable):
...@@ -422,7 +424,7 @@ class GpuDnnConv(DnnBase): ...@@ -422,7 +424,7 @@ class GpuDnnConv(DnnBase):
Default is the value of :attr:`config.dnn.conv.algo_fwd`. Default is the value of :attr:`config.dnn.conv.algo_fwd`.
""" """
_f16_ok = True
__props__ = ('algo', 'inplace') __props__ = ('algo', 'inplace')
def __init__(self, algo=None, inplace=False): def __init__(self, algo=None, inplace=False):
...@@ -437,22 +439,6 @@ class GpuDnnConv(DnnBase): ...@@ -437,22 +439,6 @@ class GpuDnnConv(DnnBase):
if self.inplace: if self.inplace:
self.destroy_map = {0: [2]} self.destroy_map = {0: [2]}
if version() < 3000:
if self.algo == 'fft':
raise RuntimeError("cuDNN FFT convolution requires cuDNN v3")
elif self.algo in ['guess_once', 'guess_on_shape_change']:
raise RuntimeError("cuDNN selection of convolution "
"implementation based on heuristics "
"requires cuDNN v3")
elif self.algo in ['time_once', 'time_on_shape_change']:
raise RuntimeError("cuDNN convolution timing requires cuDNN v3")
# The fft_tiling implementation is only available from cuDNN V4 onward
if version() < 4000:
if self.algo == 'fft_tiling':
raise RuntimeError("cuDNN tiled-FFT convolution requires "
"cuDNN v4 or more recent")
if version() < 5000 and self.algo == 'winograd': if version() < 5000 and self.algo == 'winograd':
raise RuntimeError("cuDNN winograd convolution requires " raise RuntimeError("cuDNN winograd convolution requires "
"cuDNN v5 or more recent") "cuDNN v5 or more recent")
...@@ -488,7 +474,6 @@ class GpuDnnConv(DnnBase): ...@@ -488,7 +474,6 @@ class GpuDnnConv(DnnBase):
elif self.algo == 'fft': elif self.algo == 'fft':
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT' alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT'
elif self.algo == 'fft_tiling': elif self.algo == 'fft_tiling':
# need v4
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING' alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
elif self.algo == 'winograd': elif self.algo == 'winograd':
# need v5 # need v5
...@@ -605,7 +590,7 @@ class GpuDnnConvGradW(DnnBase): ...@@ -605,7 +590,7 @@ class GpuDnnConvGradW(DnnBase):
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`. Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
""" """
_f16_ok = True
__props__ = ('algo', 'inplace') __props__ = ('algo', 'inplace')
def __init__(self, inplace=False, algo=None): def __init__(self, inplace=False, algo=None):
...@@ -650,9 +635,6 @@ class GpuDnnConvGradW(DnnBase): ...@@ -650,9 +635,6 @@ class GpuDnnConvGradW(DnnBase):
if self.inplace: if self.inplace:
defs.append(('CONV_INPLACE', '1')) defs.append(('CONV_INPLACE', '1'))
if version() < 3000:
alg = '0'
else:
alg = 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0' alg = 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0'
if self.algo == 'none': if self.algo == 'none':
alg = 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0' alg = 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0'
...@@ -720,7 +702,6 @@ gpu_dnn_conv_gradW.cache = {} ...@@ -720,7 +702,6 @@ gpu_dnn_conv_gradW.cache = {}
class GpuDnnConvGradI(DnnBase): class GpuDnnConvGradI(DnnBase):
""" """
The convolution gradient with respect to the inputs. The convolution gradient with respect to the inputs.
...@@ -735,7 +716,7 @@ class GpuDnnConvGradI(DnnBase): ...@@ -735,7 +716,7 @@ class GpuDnnConvGradI(DnnBase):
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`. Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
""" """
_f16_ok = True
__props__ = ('algo', 'inplace',) __props__ = ('algo', 'inplace',)
def __init__(self, inplace=False, algo=None): def __init__(self, inplace=False, algo=None):
...@@ -748,11 +729,6 @@ class GpuDnnConvGradI(DnnBase): ...@@ -748,11 +729,6 @@ class GpuDnnConvGradI(DnnBase):
algo = config.dnn.conv.algo_bwd_data algo = config.dnn.conv.algo_bwd_data
self.algo = algo self.algo = algo
# The small-workspace implementation is only available from cuDNN V4
# onward.
if version() < 4000 and self.algo == 'fft_tiling':
raise RuntimeError("cuDNN's tiled-FFT convolution requires cuDNN "
"v4 or more recent")
if version() < 5000 and self.algo == 'winograd': if version() < 5000 and self.algo == 'winograd':
raise RuntimeError("cuDNN's winograd convolution requires cuDNN " raise RuntimeError("cuDNN's winograd convolution requires cuDNN "
"v5 or more recent") "v5 or more recent")
...@@ -791,9 +767,6 @@ class GpuDnnConvGradI(DnnBase): ...@@ -791,9 +767,6 @@ class GpuDnnConvGradI(DnnBase):
if self.inplace: if self.inplace:
defs.append(('CONV_INPLACE', '1')) defs.append(('CONV_INPLACE', '1'))
if version() < 3000:
alg = '0'
else:
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0' alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
if self.algo == 'none': if self.algo == 'none':
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0' alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
...@@ -915,8 +888,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -915,8 +888,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# Establish dtype in which to perform the computation of the convolution # Establish dtype in which to perform the computation of the convolution
if precision is None: if precision is None:
precision = theano.config.dnn.conv.precision precision = theano.config.dnn.conv.precision
if precision == 'as_input': if precision == 'as_input' or precision == 'as_input_f32':
precision = theano.scalar.upcast(img.dtype, kerns.dtype) nprec = theano.scalar.upcast(img.dtype, kerns.dtype)
if nprec == 'float16' and precision == 'as_input_f32':
precision = 'float32'
else:
precision = nprec
if workmem is not None: if workmem is not None:
if algo is not None: if algo is not None:
...@@ -1059,12 +1036,6 @@ class GpuDnnPoolDesc(Op): ...@@ -1059,12 +1036,6 @@ class GpuDnnPoolDesc(Op):
self.stride = stride self.stride = stride
self.pad = pad self.pad = pad
if self.get_ndim() == 3 and version() < 3000:
raise RuntimeError("cuDNN 3d pooling requires v3")
if mode == 'average_exc_pad' and max(pad) > 0 and version() < 4004:
raise RuntimeError(
"cuDNN pooling mode 'average_exc_pad' requires at least v4")
def get_ndim(self): def get_ndim(self):
return len(self.ws) return len(self.ws)
...@@ -1149,7 +1120,7 @@ class GpuDnnPool(DnnBase): ...@@ -1149,7 +1120,7 @@ class GpuDnnPool(DnnBase):
(padX, padY) or (padX, padY, padZ) (padX, padY) or (padX, padY, padZ)
""" """
_f16_ok = True
__props__ = ('mode',) __props__ = ('mode',)
def __init__(self, mode='max'): def __init__(self, mode='max'):
...@@ -1234,7 +1205,7 @@ class GpuDnnPoolGrad(DnnBase): ...@@ -1234,7 +1205,7 @@ class GpuDnnPoolGrad(DnnBase):
(padX, padY) or (padX, padY, padZ) (padX, padY) or (padX, padY, padZ)
""" """
_f16_ok = True
__props__ = ('mode',) __props__ = ('mode',)
def __init__(self, mode='max'): def __init__(self, mode='max'):
......
...@@ -308,6 +308,8 @@ class Scalar(Type): ...@@ -308,6 +308,8 @@ class Scalar(Type):
""" % locals() """ % locals()
def c_extract(self, name, sub, check_input=True): def c_extract(self, name, sub, check_input=True):
if self.dtype == 'float16':
raise NotImplementedError('float16')
specs = self.dtype_specs() specs = self.dtype_specs()
if(check_input): if(check_input):
pre = """ pre = """
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论