提交 076e7b06 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

It seems cudnn has some dark corners when dealing with non-contiguous data so disable that for now.

上级 cc463fb9
......@@ -19,7 +19,6 @@ from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
host_from_gpu,
gpu_contiguous, HostFromGpu,
cp_on_negative_strides,
gpu_alloc)
from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
GpuDownsampleFactorMaxGrad)
......@@ -435,7 +434,7 @@ class GpuDnnConv(DnnBase, COp):
img, kerns, output, desc, alpha = inp
top, = grads
top = cp_on_negative_strides(top)
top = gpu_contiguous(top)
d_img = GpuDnnConvGradI()(kerns, top, img.zeros_like(), desc)
d_kerns = GpuDnnConvGradW()(img, top, kerns.zeros_like(), desc)
......@@ -574,7 +573,7 @@ class GpuDnnConvGradI(DnnBase, COp):
kerns, top, output, desc, alpha = inp
img, = grads
img = cp_on_negative_strides(img)
img = gpu_contiguous(img)
d_kerns = GpuDnnConvGradW()(img, top, kerns.zeros_like(), desc)
d_top = GpuDnnConv()(img, kerns, top.zeros_like(), desc)
......@@ -649,19 +648,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
capability of 3.0 or higer. This means that older GPU will not
work with this Op.
"""
def contig_version(var):
if version() == -1:
var = gpu_contiguous(var)
else:
var = cp_on_negative_strides(var)
return var
fgraph = getattr(img, 'fgraph', None) or getattr(kerns, 'fgraph', None)
if (border_mode == 'valid' and subsample == (1,1) and
direction_hint == 'bprop weights'):
# Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for.
img = contig_version(img.dimshuffle(1, 0, 2, 3))
img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3))
if conv_mode == 'conv':
# We need to flip manually. These 'kerns' are not the kernels
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
......@@ -695,7 +687,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# Standard case: We use GpuDnnConv with suitable padding.
# contig_version will return a gpu_contiguous copy
# if the img contains negative strides
img = contig_version(img)
img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(img.shape, kerns.shape)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论