提交 d18c15ce authored 作者: Frederic Bastien's avatar Frederic Bastien

Fix gh-5649

上级 02d11f7d
...@@ -198,6 +198,18 @@ handle_type = CDataType('cudnnHandle_t', 'cudnnDestroy', ...@@ -198,6 +198,18 @@ handle_type = CDataType('cudnnHandle_t', 'cudnnDestroy',
lib_dirs=[config.dnn.library_path]) lib_dirs=[config.dnn.library_path])
def get_precision(precision, inputs):
if precision is None:
precision = theano.config.dnn.conv.precision
if precision == 'as_input' or precision == 'as_input_f32':
nprec = theano.scalar.upcast(*[i.dtype for i in inputs])
if nprec == 'float16' and precision == 'as_input_f32':
precision = 'float32'
else:
precision = nprec
return precision
class DnnBase(COp): class DnnBase(COp):
""" """
...@@ -963,14 +975,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -963,14 +975,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
""" """
# Establish dtype in which to perform the computation of the convolution # Establish dtype in which to perform the computation of the convolution
if precision is None: precision = get_precision(precision, [img, kerns])
precision = theano.config.dnn.conv.precision
if precision == 'as_input' or precision == 'as_input_f32':
nprec = theano.scalar.upcast(img.dtype, kerns.dtype)
if nprec == 'float16' and precision == 'as_input_f32':
precision = 'float32'
else:
precision = nprec
if workmem is not None: if workmem is not None:
if algo is not None: if algo is not None:
...@@ -1086,14 +1091,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1086,14 +1091,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
""" """
# Establish dtype in which to perform the computation of the convolution # Establish dtype in which to perform the computation of the convolution
if precision is None: precision = get_precision(precision, [img, kerns])
precision = theano.config.dnn.conv.precision
if precision == 'as_input' or precision == 'as_input_f32':
nprec = theano.scalar.upcast(img.dtype, kerns.dtype)
if nprec == 'float16' and precision == 'as_input_f32':
precision = 'float32'
else:
precision = nprec
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None) fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
ctx_name = infer_context_name(img, kerns) ctx_name = infer_context_name(img, kerns)
...@@ -1159,7 +1157,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1159,7 +1157,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid', def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
subsample=(1, 1), conv_mode='conv'): subsample=(1, 1), conv_mode='conv', precision=None):
""" """
TODO: document this TODO: document this
""" """
...@@ -1169,14 +1167,17 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid', ...@@ -1169,14 +1167,17 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
img = gpu_contiguous(img) img = gpu_contiguous(img)
topgrad = gpu_contiguous(topgrad) topgrad = gpu_contiguous(topgrad)
kerns_shp = as_tensor_variable(kerns_shp) kerns_shp = as_tensor_variable(kerns_shp)
precision = get_precision(precision, [img, topgrad])
desc = gpu_dnn_conv_desc(border_mode=border_mode, subsample=subsample, desc = gpu_dnn_conv_desc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(kerns_shp) conv_mode=conv_mode, precision=precision)(
kerns_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*kerns_shp) out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*kerns_shp)
return gpu_dnn_conv_gradW()(img, topgrad, out, desc) return gpu_dnn_conv_gradW()(img, topgrad, out, desc)
def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid', def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid',
subsample=(1, 1, 1), conv_mode='conv'): subsample=(1, 1, 1), conv_mode='conv', precision=None):
""" """
TODO: document this TODO: document this
""" """
...@@ -1186,14 +1187,17 @@ def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid', ...@@ -1186,14 +1187,17 @@ def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid',
img = gpu_contiguous(img) img = gpu_contiguous(img)
topgrad = gpu_contiguous(topgrad) topgrad = gpu_contiguous(topgrad)
kerns_shp = as_tensor_variable(kerns_shp) kerns_shp = as_tensor_variable(kerns_shp)
precision = get_precision(precision, [img, topgrad])
desc = gpu_dnn_conv_desc(border_mode=border_mode, subsample=subsample, desc = gpu_dnn_conv_desc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(kerns_shp) conv_mode=conv_mode, precision=precision)(
kerns_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*kerns_shp) out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*kerns_shp)
return gpu_dnn_conv_gradW()(img, topgrad, out, desc) return gpu_dnn_conv_gradW()(img, topgrad, out, desc)
def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid', def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
subsample=(1, 1), conv_mode='conv'): subsample=(1, 1), conv_mode='conv', precision=None):
""" """
TODO: document this TODO: document this
""" """
...@@ -1203,14 +1207,17 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid', ...@@ -1203,14 +1207,17 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
topgrad = gpu_contiguous(topgrad) topgrad = gpu_contiguous(topgrad)
img_shp = as_tensor_variable(img_shp) img_shp = as_tensor_variable(img_shp)
precision = get_precision(precision, [kerns, topgrad])
desc = gpu_dnn_conv_desc(border_mode=border_mode, subsample=subsample, desc = gpu_dnn_conv_desc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(kerns.shape) conv_mode=conv_mode, precision=precision)(
kerns.shape)
out = gpu_alloc_empty(ctx_name, kerns.dtype)(*img_shp) out = gpu_alloc_empty(ctx_name, kerns.dtype)(*img_shp)
return gpu_dnn_conv_gradI()(kerns, topgrad, out, desc) return gpu_dnn_conv_gradI()(kerns, topgrad, out, desc)
def dnn_gradinput3d(kerns, topgrad, img_shp, border_mode='valid', def dnn_gradinput3d(kerns, topgrad, img_shp, border_mode='valid',
subsample=(1, 1, 1), conv_mode='conv'): subsample=(1, 1, 1), conv_mode='conv', precision=None):
""" """
TODO: document this TODO: document this
""" """
...@@ -1220,8 +1227,11 @@ def dnn_gradinput3d(kerns, topgrad, img_shp, border_mode='valid', ...@@ -1220,8 +1227,11 @@ def dnn_gradinput3d(kerns, topgrad, img_shp, border_mode='valid',
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
topgrad = gpu_contiguous(topgrad) topgrad = gpu_contiguous(topgrad)
img_shp = as_tensor_variable(img_shp) img_shp = as_tensor_variable(img_shp)
precision = get_precision(precision, [kerns, topgrad])
desc = gpu_dnn_conv_desc(border_mode=border_mode, subsample=subsample, desc = gpu_dnn_conv_desc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(kerns.shape) conv_mode=conv_mode, precision=precision)(
kerns.shape)
out = gpu_alloc_empty(ctx_name, kerns.dtype)(*img_shp) out = gpu_alloc_empty(ctx_name, kerns.dtype)(*img_shp)
return gpu_dnn_conv_gradI()(kerns, topgrad, out, desc) return gpu_dnn_conv_gradI()(kerns, topgrad, out, desc)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论