提交 68428b12 authored 作者: f0k's avatar f0k

Annotate ConvOp and GpuConv with direction hint and use this to select algorithm…

Annotate ConvOp and GpuConv with direction hint and use this to select algorithm in conv_dnn optimizer
上级 447ab32d
......@@ -1513,6 +1513,7 @@ class GpuConv(GpuOp):
logical_kern_hw=None,
logical_kern_align_top=True,
version=-1,
direction_hint=None,
verbose=0,
kshp=None,
imshp=None,
......@@ -1525,6 +1526,10 @@ class GpuConv(GpuOp):
convolution. By default we try to guess the best one.
You can force one version with this parameter. This
parameter is used by the tests.
:param direction_hint: 'forward', 'bprop weights' or 'bprop inputs'.
Serves as a hint for graph optimizers replacing
GpuConv by other implementations. If the GpuConv is
inserted automatically, we take its value from ConvOp.
:param verbose: for value of 1,2 and 3. Print more information during
the execution of the convolution. Mostly used for
optimization or debugging.
......@@ -1570,6 +1575,7 @@ class GpuConv(GpuOp):
self.logical_kern_hw = logical_kern_hw
self.logical_kern_align_top = logical_kern_align_top
self.version = version
self.direction_hint = direction_hint
self.verbose = verbose
self.kshp = kshp
self.imshp = imshp
......
......@@ -469,7 +469,7 @@ class GpuDnnConvGradI(GpuDnnConvBase):
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
conv_mode='conv'):
conv_mode='conv', direction_hint=None):
"""
GPU convolution using cuDNN from NVIDIA.
......@@ -481,13 +481,41 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
:param border_mode: one of 'valid', 'full'; additionally, the padding size
could be directly specified by an integer or a pair of integers
:param subsample: perform subsampling of the output (default: (1, 1))
:param conv_mode: perform convolution (kernels flipped) or cross-correlation. One of 'conv', 'cross'. (default: 'conv')
:param conv_mode: perform convolution (kernels flipped) or cross-correlation.
One of 'conv', 'cross'. (default: 'conv')
:param direction_hint: Used by graph optimizers to change algorithm choice.
By default, GpuDnnConv will be used to carry out the convolution.
If border_mode is 'valid', subsample is (1,1) and direction_hint is
'bprop weights', it will use GpuDnnConvGradW.
If border_mode is 'full', subsample is (1,1) and direction_hint is
*not* 'forward!', it will use GpuDnnConvGradI.
This parameter is used internally by graph optimizers and may be
removed at any time without a deprecation period. You have been warned.
:warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
work with this Op.
"""
if border_mode == 'full' and subsample == (1, 1):
if (border_mode == 'valid' and subsample == (1,1) and
direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for.
img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3))
if conv_mode == 'conv':
# We need to flip manually. These 'kerns' are not the kernels
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns = kerns[:, :, ::-1, ::-1]
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
shape = theano.tensor.stack(kerns.shape[1], img.shape[1],
img.shape[2] - kerns.shape[2] + 1,
img.shape[3] - kerns.shape[3] + 1)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='cross')(img.shape, shape)
conv = GpuDnnConvGradW()(img, kerns, desc)
return as_cuda_ndarray_variable(conv.dimshuffle(1, 0, 2, 3))
elif (border_mode == 'full' and subsample == (1, 1) and
direction_hint != 'forward!'):
# Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution.
# We just need to set up a suitable 'fake' valid convolution.
......@@ -501,6 +529,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
conv_mode=conv_mode)(shape, kerns.shape)
return GpuDnnConvGradI()(kerns, img, desc)
# Standard case: We use GpuDnnConv with suitable padding.
img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
......@@ -1134,8 +1163,11 @@ if cuda_available:
img, kern = node.inputs
border_mode = node.op.border_mode
subsample = node.op.subsample
direction_hint = node.op.direction_hint
return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
border_mode=border_mode, subsample=subsample)]
border_mode=border_mode, subsample=subsample,
direction_hint=direction_hint)]
# DISABLED as there is problems in the handling of borders
# @register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMax])
......
......@@ -1181,6 +1181,7 @@ def local_gpu_conv(node):
logical_kern_align_top=op.kshp_logical_top_aligned,
kshp=op.kshp,
version=op.version,
direction_hint=op.direction_hint,
verbose=op.verbose,
imshp=op.imshp,
nkern=op.nkern,
......
......@@ -280,6 +280,7 @@ class ConvOp(OpenMPOp):
kshp_logical_top_aligned=True,
verbose=0,
version=-1,
direction_hint='forward',
openmp=None):
"""
Initializes a ConvOp with given output_mode (full/valid). All other
......@@ -348,6 +349,8 @@ class ConvOp(OpenMPOp):
:type version: int or str
:param version: passed to GpuConv, if version='no_fft', fft
optimization will be desactivated at the op level.
:param direction_hint: 'forward', 'bprop weights' or 'bprop inputs'.
Passed to GpuConv, used by graph optimizers to aid algorithm choice
The 3 following parameters are used internally when we generate
the gradient when dx!=1 or dy!=1.
......@@ -423,6 +426,7 @@ class ConvOp(OpenMPOp):
self.dy = dy
self.verbose = verbose
self.version = version
self.direction_hint = direction_hint
# a triple
if imshp_logical is None:
......@@ -888,6 +892,7 @@ class ConvOp(OpenMPOp):
kshp_logical=kshp_logical,
kshp_logical_top_aligned=kshp_logical_top_aligned,
version=self.version,
direction_hint='bprop weights',
verbose=self.verbose)
else: # let __init__ choose c params be chosen automatically from shapes
......@@ -897,6 +902,7 @@ class ConvOp(OpenMPOp):
kshp_logical=kshp_logical,
kshp_logical_top_aligned=kshp_logical_top_aligned,
version=self.version,
direction_hint='bprop weights',
verbose=self.verbose)
dw = dw(img, filters)
......@@ -929,6 +935,7 @@ class ConvOp(OpenMPOp):
imshp_logical=imshp_logical,
kshp_logical=None,
version=-1, # we we change the mode, we don't forward the version.
direction_hint='bprop inputs',
verbose=self.verbose)
else: # let __init__ figure out the unrolling / patch sizes
din = ConvOp(imshp, self.kshp, nkern, self.bsize,
......@@ -938,6 +945,7 @@ class ConvOp(OpenMPOp):
imshp_logical=imshp_logical,
kshp_logical=None,
version=-1, # we we change the mode, we don't forward the version.
direction_hint='bprop inputs',
verbose=self.verbose)
din = din(gz, filters)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论