提交 27f03a00 authored 作者: notoraptor's avatar notoraptor

Disallow float16 precision for convolution gradients functions.

上级 aeb6e3c5
...@@ -329,7 +329,7 @@ handle_type = CUDNNDataType('cudnnHandle_t', 'cudnnDestroy') ...@@ -329,7 +329,7 @@ handle_type = CUDNNDataType('cudnnHandle_t', 'cudnnDestroy')
cudnn = cudnn_defs.get_definitions(version(raises=False)) cudnn = cudnn_defs.get_definitions(version(raises=False))
def get_precision(precision, inputs): def get_precision(precision, inputs, for_grad=False):
if precision is None: if precision is None:
precision = theano.config.dnn.conv.precision precision = theano.config.dnn.conv.precision
if precision == 'as_input' or precision == 'as_input_f32': if precision == 'as_input' or precision == 'as_input_f32':
...@@ -338,6 +338,8 @@ def get_precision(precision, inputs): ...@@ -338,6 +338,8 @@ def get_precision(precision, inputs):
precision = 'float32' precision = 'float32'
else: else:
precision = nprec precision = nprec
if for_grad and precision == 'float16':
raise TypeError("Float16 precision is disabled for cuDNN backward convolutions due to computation errors.")
return precision return precision
...@@ -1006,6 +1008,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1), ...@@ -1006,6 +1008,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1) shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
if precision == 'float16':
precision = 'float32'
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='cross', precision=precision)(out.shape) conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc) conv = GpuDnnConvGradW()(img, kerns, out, desc)
...@@ -1025,6 +1029,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1), ...@@ -1025,6 +1029,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
shape_i(img, 3, fgraph) + (shape_i(kerns, 3, fgraph) - 1) * dilation[1]) shape_i(img, 3, fgraph) + (shape_i(kerns, 3, fgraph) - 1) * dilation[1])
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
if precision == 'float16':
precision = 'float32'
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=dilation, desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc) return GpuDnnConvGradI()(kerns, img, out, desc)
...@@ -1129,6 +1135,8 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1 ...@@ -1129,6 +1135,8 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1
shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1) shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
if precision == 'float16':
precision = 'float32'
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode='cross', precision=precision)(out.shape) conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc) conv = GpuDnnConvGradW()(img, kerns, out, desc)
...@@ -1149,6 +1157,8 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1 ...@@ -1149,6 +1157,8 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1
shape_i(img, 4, fgraph) + (shape_i(kerns, 4, fgraph) - 1) * dilation[2]) shape_i(img, 4, fgraph) + (shape_i(kerns, 4, fgraph) - 1) * dilation[2])
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
if precision == 'float16':
precision = 'float32'
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=dilation, desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc) return GpuDnnConvGradI()(kerns, img, out, desc)
...@@ -1187,7 +1197,7 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid', ...@@ -1187,7 +1197,7 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
img = gpu_contiguous(img) img = gpu_contiguous(img)
topgrad = gpu_contiguous(topgrad) topgrad = gpu_contiguous(topgrad)
kerns_shp = as_tensor_variable(kerns_shp) kerns_shp = as_tensor_variable(kerns_shp)
precision = get_precision(precision, [img, topgrad]) precision = get_precision(precision, [img, topgrad], for_grad=True)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision, conv_mode=conv_mode, precision=precision,
...@@ -1219,7 +1229,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid', ...@@ -1219,7 +1229,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
topgrad = gpu_contiguous(topgrad) topgrad = gpu_contiguous(topgrad)
img_shp = as_tensor_variable(img_shp) img_shp = as_tensor_variable(img_shp)
precision = get_precision(precision, [kerns, topgrad]) precision = get_precision(precision, [kerns, topgrad], for_grad=True)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision, conv_mode=conv_mode, precision=precision,
......
...@@ -171,6 +171,27 @@ def test_dnn_conv_inplace(): ...@@ -171,6 +171,27 @@ def test_dnn_conv_inplace():
assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2 assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2
def test_dnn_conv_invalid_precision():
img = T.tensor4()
kerns = T.tensor4()
topgrad = T.tensor4()
shape = (1, 2, 3, 4)
def dnn_gradw(precision):
return dnn.dnn_gradweight(img, topgrad, shape, precision=precision)
def dnn_gradi(precision):
return dnn.dnn_gradinput(kerns, topgrad, shape, precision=precision)
dnn_gradw('float64')
dnn_gradw('float32')
assert_raises(TypeError, dnn_gradw, 'float16')
dnn_gradi('float64')
dnn_gradi('float32')
assert_raises(TypeError, dnn_gradi, 'float16')
def test_pooling(): def test_pooling():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论