Unverified 提交 49c8b94e authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #6617 from abergeron/fix_dnn_conv

Don't use the dnn precision as the dtype of inputs
...@@ -343,17 +343,20 @@ cudnn = cudnn_defs.get_definitions(version(raises=False)) ...@@ -343,17 +343,20 @@ cudnn = cudnn_defs.get_definitions(version(raises=False))
def get_precision(precision, inputs, for_grad=False): def get_precision(precision, inputs, for_grad=False):
common_dtype = theano.scalar.upcast(*[i.dtype for i in inputs])
if not common_dtype.startswith('float'):
raise TypeError("cuDNN convolution only works on real numbers")
if precision is None: if precision is None:
precision = theano.config.dnn.conv.precision precision = theano.config.dnn.conv.precision
if precision == 'as_input' or precision == 'as_input_f32': if precision == 'as_input' or precision == 'as_input_f32':
nprec = theano.scalar.upcast(*[i.dtype for i in inputs]) if common_dtype == 'float16' and precision == 'as_input_f32':
if nprec == 'float16' and precision == 'as_input_f32':
precision = 'float32' precision = 'float32'
else: else:
precision = nprec precision = common_dtype
if for_grad and precision == 'float16': if for_grad and precision == 'float16':
raise TypeError("Float16 precision is disabled for cuDNN backward convolutions due to computation errors.") raise TypeError("Float16 precision is disabled for cuDNN backward convolutions due to computation errors.")
return precision return precision, common_dtype
class DnnBase(COp): class DnnBase(COp):
...@@ -950,10 +953,10 @@ def _dnn_conv(img, kerns, alpha=1, beta=0, out=None, border_mode='valid', subsam ...@@ -950,10 +953,10 @@ def _dnn_conv(img, kerns, alpha=1, beta=0, out=None, border_mode='valid', subsam
img = as_gpuarray_variable(img, ctx_name) img = as_gpuarray_variable(img, ctx_name)
kerns = as_gpuarray_variable(kerns, ctx_name) kerns = as_gpuarray_variable(kerns, ctx_name)
precision = get_precision(precision, [img, kerns]) precision, dt = get_precision(precision, [img, kerns])
img = gpu_contiguous(img.astype(precision)) img = gpu_contiguous(img.astype(dt))
kerns = gpu_contiguous(kerns.astype(precision)) kerns = gpu_contiguous(kerns.astype(dt))
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision, num_groups=num_groups)(kerns.shape) conv_mode=conv_mode, precision=precision, num_groups=num_groups)(kerns.shape)
...@@ -982,10 +985,10 @@ def _dnn_gradweight(img, topgrad, kerns_shp, alpha=1, beta=0, out=None, border_m ...@@ -982,10 +985,10 @@ def _dnn_gradweight(img, topgrad, kerns_shp, alpha=1, beta=0, out=None, border_m
topgrad = as_gpuarray_variable(topgrad, ctx_name) topgrad = as_gpuarray_variable(topgrad, ctx_name)
kerns_shp = theano.tensor.as_tensor_variable(kerns_shp) kerns_shp = theano.tensor.as_tensor_variable(kerns_shp)
precision = get_precision(precision, [img, topgrad], for_grad=True) precision, dt = get_precision(precision, [img, topgrad], for_grad=True)
img = gpu_contiguous(img.astype(precision)) img = gpu_contiguous(img.astype(dt))
topgrad = gpu_contiguous(topgrad.astype(precision)) topgrad = gpu_contiguous(topgrad.astype(dt))
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision, num_groups=num_groups)(kerns_shp) conv_mode=conv_mode, precision=precision, num_groups=num_groups)(kerns_shp)
...@@ -1007,10 +1010,10 @@ def _dnn_gradinput(kerns, topgrad, img_shp, alpha=1, beta=0, out=None, border_mo ...@@ -1007,10 +1010,10 @@ def _dnn_gradinput(kerns, topgrad, img_shp, alpha=1, beta=0, out=None, border_mo
topgrad = as_gpuarray_variable(topgrad, ctx_name) topgrad = as_gpuarray_variable(topgrad, ctx_name)
img_shp = theano.tensor.as_tensor_variable(img_shp) img_shp = theano.tensor.as_tensor_variable(img_shp)
precision = get_precision(precision, [kerns, topgrad], for_grad=True) precision, dt = get_precision(precision, [kerns, topgrad], for_grad=True)
kerns = gpu_contiguous(kerns.astype(precision)) kerns = gpu_contiguous(kerns.astype(dt))
topgrad = gpu_contiguous(topgrad.astype(precision)) topgrad = gpu_contiguous(topgrad.astype(dt))
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
conv_mode=conv_mode, precision=precision, num_groups=num_groups)(kerns.shape) conv_mode=conv_mode, precision=precision, num_groups=num_groups)(kerns.shape)
...@@ -1103,7 +1106,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1), ...@@ -1103,7 +1106,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1) shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
precision = get_precision(precision, [img, kerns], for_grad=True) precision, _ = get_precision(precision, [img, kerns], for_grad=True)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode='cross', precision=precision)(out.shape) conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc) conv = GpuDnnConvGradW()(img, kerns, out, desc)
...@@ -1123,7 +1126,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1), ...@@ -1123,7 +1126,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
shape_i(img, 3, fgraph) + (shape_i(kerns, 3, fgraph) - 1) * dilation[1]) shape_i(img, 3, fgraph) + (shape_i(kerns, 3, fgraph) - 1) * dilation[1])
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
precision = get_precision(precision, [img, kerns], for_grad=True) precision, _ = get_precision(precision, [img, kerns], for_grad=True)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=dilation, desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc) return GpuDnnConvGradI()(kerns, img, out, desc)
...@@ -1208,7 +1211,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1 ...@@ -1208,7 +1211,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1
shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1) shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
precision = get_precision(precision, [img, kerns], for_grad=True) precision, _ = get_precision(precision, [img, kerns], for_grad=True)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=(1, 1, 1),
conv_mode='cross', precision=precision)(out.shape) conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc) conv = GpuDnnConvGradW()(img, kerns, out, desc)
...@@ -1229,7 +1232,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1 ...@@ -1229,7 +1232,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1
shape_i(img, 4, fgraph) + (shape_i(kerns, 4, fgraph) - 1) * dilation[2]) shape_i(img, 4, fgraph) + (shape_i(kerns, 4, fgraph) - 1) * dilation[2])
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
precision = get_precision(precision, [img, kerns], for_grad=True) precision, _ = get_precision(precision, [img, kerns], for_grad=True)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=dilation, desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), dilation=dilation,
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc) return GpuDnnConvGradI()(kerns, img, out, desc)
...@@ -3280,7 +3283,7 @@ def local_abstractconv_cudnn_alt(node): ...@@ -3280,7 +3283,7 @@ def local_abstractconv_cudnn_alt(node):
subsample = node.op.subsample subsample = node.op.subsample
filter_dilation = node.op.filter_dilation filter_dilation = node.op.filter_dilation
num_groups = node.op.num_groups num_groups = node.op.num_groups
precision = get_precision(None, [inp1, inp2]) precision, _ = get_precision(None, [inp1, inp2])
if node.op.filter_flip: if node.op.filter_flip:
conv_mode = 'conv' conv_mode = 'conv'
...@@ -3386,7 +3389,7 @@ def local_abstractconv3d_cudnn_alt(node): ...@@ -3386,7 +3389,7 @@ def local_abstractconv3d_cudnn_alt(node):
subsample = node.op.subsample subsample = node.op.subsample
filter_dilation = node.op.filter_dilation filter_dilation = node.op.filter_dilation
num_groups = node.op.num_groups num_groups = node.op.num_groups
precision = get_precision(None, [inp1, inp2]) precision, _ = get_precision(None, [inp1, inp2])
if node.op.filter_flip: if node.op.filter_flip:
conv_mode = 'conv' conv_mode = 'conv'
......
...@@ -2936,3 +2936,14 @@ def test_conv_guess_once_with_dtypes(): ...@@ -2936,3 +2936,14 @@ def test_conv_guess_once_with_dtypes():
f_pseudo_half_config() f_pseudo_half_config()
f_float_config() f_float_config()
f_double_config() f_double_config()
def test_opt_f16_prec32():
inputs = T.TensorType('float16', (False,) * 4)()
filters = T.TensorType('float16', (False,) * 4)()
conv = T.nnet.conv2d(inputs, filters)
gfilt = theano.grad(conv.sum(), filters)
# If this compiles we are good
theano.function([inputs, filters], [conv, gfilt], mode=mode_with_gpu)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论