提交 2a84aa6b authored 作者: carriepl's avatar carriepl 提交者: Frederic

Make dnn_conv and dnn_conv3d support param precision (cuda backend)

上级 0ed16db1
...@@ -1085,7 +1085,8 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI): ...@@ -1085,7 +1085,8 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
conv_mode='conv', direction_hint=None, workmem=None, algo=None): conv_mode='conv', direction_hint=None, workmem=None, algo=None,
precision=None):
""" """
GPU convolution using cuDNN from NVIDIA. GPU convolution using cuDNN from NVIDIA.
...@@ -1120,10 +1121,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1120,10 +1121,14 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
removed at any time without a deprecation period. You have been warned. removed at any time without a deprecation period. You have been warned.
workmem workmem
*deprecated*, use parameter algo instead. *deprecated*, use parameter algo instead.
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'} algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Convolution implementation to use. Some of its values may require certain Convolution implementation to use. Some of its values may require certain
versions of CuDNN to be installed. Default is the value of versions of CuDNN to be installed. Default is the value of
:attr:`config.dnn.conv.algo_fwd`. :attr:`config.dnn.conv.algo_fwd`.
precision : {'float16', 'float32', 'float64', 'floatX'}
Description of the dtype in which the computation of the convolution
should be done. Default is the value of
:attr:`config.dnn.conv.precision`.
""" """
...@@ -1153,7 +1158,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1153,7 +1158,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
out = gpu_alloc_empty(shape_i(kerns, 1, fgraph), out = gpu_alloc_empty(shape_i(kerns, 1, fgraph),
shape_i(img, 1, fgraph), shape2, shape3) shape_i(img, 1, fgraph), shape2, shape3)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='cross')(img.shape, out.shape) conv_mode='cross', precision=precision)(img.shape,
out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc) conv = GpuDnnConvGradW()(img, kerns, out, desc)
return as_cuda_ndarray_variable(conv.dimshuffle(1, 0, 2, 3)) return as_cuda_ndarray_variable(conv.dimshuffle(1, 0, 2, 3))
...@@ -1169,7 +1175,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1169,7 +1175,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
out = gpu_alloc_empty(shape_i(img, 0, fgraph), out = gpu_alloc_empty(shape_i(img, 0, fgraph),
shape_i(kerns, 1, fgraph), shape2, shape3) shape_i(kerns, 1, fgraph), shape2, shape3)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode=conv_mode)(out.shape, kerns.shape) conv_mode=conv_mode, precision=precision)(out.shape,
kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc) return GpuDnnConvGradI()(kerns, img, out, desc)
# Standard case: We use GpuDnnConv with suitable padding. # Standard case: We use GpuDnnConv with suitable padding.
...@@ -1178,7 +1185,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1178,7 +1185,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
img = gpu_contiguous(img) img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(img.shape, kerns.shape) conv_mode=conv_mode, precision=precision)(img.shape,
kerns.shape)
desc_op = desc.owner.op desc_op = desc.owner.op
out_shp = GpuDnnConv.get_out_shape(img.shape, kerns.shape, out_shp = GpuDnnConv.get_out_shape(img.shape, kerns.shape,
desc_op.border_mode, desc_op.border_mode,
...@@ -1189,7 +1197,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1189,7 +1197,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
conv_mode='conv', direction_hint=None, workmem=None, conv_mode='conv', direction_hint=None, workmem=None,
algo='none'): algo='none', precision=None):
""" """
GPU convolution using cuDNN from NVIDIA. GPU convolution using cuDNN from NVIDIA.
...@@ -1216,6 +1224,9 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1216,6 +1224,9 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
:param algo: convolution implementation to use. Only 'none' is implemented :param algo: convolution implementation to use. Only 'none' is implemented
for the conv3d. Default is the value of for the conv3d. Default is the value of
:attr:`config.dnn.conv.algo_fwd`. :attr:`config.dnn.conv.algo_fwd`.
:param precision : dtype in which the computation of the convolution
should be done. Possible values are 'float16', 'float32', 'float64' and
'floatX'. Default is the value of :attr:`config.dnn.conv.precision`.
:warning: The cuDNN library only works with GPU that have a compute :warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not capability of 3.0 or higer. This means that older GPU will not
...@@ -1251,7 +1262,8 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1251,7 +1262,8 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
out = gpu_alloc_empty(shape_i(kerns, 1, fgraph), out = gpu_alloc_empty(shape_i(kerns, 1, fgraph),
shape_i(img, 1, fgraph), shape2, shape3, shape4) shape_i(img, 1, fgraph), shape2, shape3, shape4)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
conv_mode='cross')(img.shape, out.shape) conv_mode='cross', precision=precision)(img.shape,
out.shape)
conv = GpuDnnConv3dGradW()(img, kerns, out, desc) conv = GpuDnnConv3dGradW()(img, kerns, out, desc)
return as_cuda_ndarray_variable(conv.dimshuffle(1, 0, 2, 3, 4)) return as_cuda_ndarray_variable(conv.dimshuffle(1, 0, 2, 3, 4))
...@@ -1261,7 +1273,8 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1261,7 +1273,8 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
img = gpu_contiguous(img) img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(img.shape, kerns.shape) conv_mode=conv_mode, precision=precision)(img.shape,
kerns.shape)
desc_op = desc.owner.op desc_op = desc.owner.op
out_shp = GpuDnnConv3d.get_out_shape(img.shape, kerns.shape, out_shp = GpuDnnConv3d.get_out_shape(img.shape, kerns.shape,
desc_op.border_mode, desc_op.border_mode,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论