提交 6e3837ba authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2608 from nouiz/dnn

[WIP] Disable some optimization that could cause problem with cudnn v2 rc3.
...@@ -3434,6 +3434,9 @@ class CopyOnNegativeStrides(GpuOp): ...@@ -3434,6 +3434,9 @@ class CopyOnNegativeStrides(GpuOp):
i = i.copy() i = i.copy()
out[0][0] = i out[0][0] = i
def infer_shape(self, node, xshp):
return xshp
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
input, = inp input, = inp
z, = out z, = out
......
...@@ -649,12 +649,19 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -649,12 +649,19 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
capability of 3.0 or higer. This means that older GPU will not capability of 3.0 or higer. This means that older GPU will not
work with this Op. work with this Op.
""" """
def contig_version(var):
if version() == -1:
var = gpu_contiguous(var)
else:
var = cp_on_negative_strides(var)
return var
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None) fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
if (border_mode == 'valid' and subsample == (1,1) and if (border_mode == 'valid' and subsample == (1,1) and
direction_hint == 'bprop weights'): direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set # Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for. # up a suitable 'fake' convolution to compute the gradient for.
img = cp_on_negative_strides(img.dimshuffle(1, 0, 2, 3)) img = contig_version(img.dimshuffle(1, 0, 2, 3))
if conv_mode == 'conv': if conv_mode == 'conv':
# We need to flip manually. These 'kerns' are not the kernels # We need to flip manually. These 'kerns' are not the kernels
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW. # that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
...@@ -674,7 +681,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -674,7 +681,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# Special case: We can be faster by using GpuDnnConvGradI to compute # Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution. # the full convolution as the backward pass of a valid convolution.
# We just need to set up a suitable 'fake' valid convolution. # We just need to set up a suitable 'fake' valid convolution.
img = cp_on_negative_strides(img) img = gpu_contiguous(img) # cudnn v1 and v2 rc3 need contiguous data
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3)) kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
conv_mode = 'cross' if conv_mode == 'conv' else 'conv' conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1 shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1
...@@ -686,9 +693,9 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -686,9 +693,9 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
return GpuDnnConvGradI()(kerns, img, out, desc) return GpuDnnConvGradI()(kerns, img, out, desc)
# Standard case: We use GpuDnnConv with suitable padding. # Standard case: We use GpuDnnConv with suitable padding.
# cp_on_negative_strides will return a gpu_contiguous copy # contig_version will return a gpu_contiguous copy
# if the img contains negative strides # if the img contains negative strides
img = cp_on_negative_strides(img) img = contig_version(img)
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(img.shape, kerns.shape) conv_mode=conv_mode)(img.shape, kerns.shape)
...@@ -746,7 +753,7 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -746,7 +753,7 @@ class GpuDnnPoolDesc(GpuOp):
self.stride = stride self.stride = stride
assert len(stride) == 2 assert len(stride) == 2
self.pad = pad self.pad = pad
if (pad[0] != 0 or pad[1] != 0) and version() < 20: if (pad[0] != 0 or pad[1] != 0) and version() == -1:
raise RuntimeError("CuDNN pooling with padding requires CuDNN v2") raise RuntimeError("CuDNN pooling with padding requires CuDNN v2")
def __setstate__(self, d): def __setstate__(self, d):
...@@ -755,7 +762,7 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -755,7 +762,7 @@ class GpuDnnPoolDesc(GpuOp):
self.pad = (0, 0) self.pad = (0, 0)
def make_node(self): def make_node(self):
if self.pad != (0, 0) and version() < 20: if self.pad != (0, 0) and version() == -1:
raise RuntimeError("CuDNN pooling with padding requires CuDNN v2") raise RuntimeError("CuDNN pooling with padding requires CuDNN v2")
return Apply(self, [], return Apply(self, [],
......
...@@ -1763,7 +1763,7 @@ def get_device_type_sizes(): ...@@ -1763,7 +1763,7 @@ def get_device_type_sizes():
del t del t
except Exception, e: except Exception, e:
_logger.warning(("Optimization Warning: " _logger.warning(("Optimization Warning: "
"Got the following error, but we can ignore it. " "Got the following error, but you can ignore it. "
"This could cause less GpuElemwise fused together.\n" "This could cause less GpuElemwise fused together.\n"
"%s") % e) "%s") % e)
......
...@@ -70,7 +70,7 @@ def test_pooling(): ...@@ -70,7 +70,7 @@ def test_pooling():
x = T.ftensor4() x = T.ftensor4()
for func, pad in product((T.max, T.mean), for func, pad in product((T.max, T.mean),
((0, 0), (1, 0), (1, 0), (2, 3), (3, 2))): ((0, 0), (1, 0), (1, 0), (2, 3), (3, 2))):
if pad != (0, 0) and cuda.dnn.version() < 20: if pad != (0, 0) and cuda.dnn.version() == -1:
continue continue
if pad != (0, 0) and func is T.mean: if pad != (0, 0) and func is T.mean:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论