提交 a5588fce authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6333 from abergeron/dnn_redux_contig

Restrict cuDNN reduction to contiguous inputs
...@@ -59,7 +59,12 @@ int APPLY_SPECIFIC(dnn_redux)(PyGpuArrayObject *input, ...@@ -59,7 +59,12 @@ int APPLY_SPECIFIC(dnn_redux)(PyGpuArrayObject *input,
static float falpha = 1.0f; static float falpha = 1.0f;
static double dalpha = 1.0; static double dalpha = 1.0;
static float fbeta = 0.0f; static float fbeta = 0.0f;
static double dbeta = 0.0; static double dbeta = 0.0;
if (!GpuArray_IS_C_CONTIGUOUS(&input->ga)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
return 1;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) != 0) if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) != 0)
return 1; return 1;
......
...@@ -1695,6 +1695,7 @@ class GpuDnnReduction(DnnBase): ...@@ -1695,6 +1695,7 @@ class GpuDnnReduction(DnnBase):
def make_node(self, inp): def make_node(self, inp):
ctx_name = infer_context_name(inp) ctx_name = infer_context_name(inp)
inp = as_gpuarray_variable(inp, ctx_name) inp = as_gpuarray_variable(inp, ctx_name)
inp = gpu_contiguous(inp)
if inp.ndim > 8: if inp.ndim > 8:
raise ValueError("cuDNN reduction doesn't support nd > 8") raise ValueError("cuDNN reduction doesn't support nd > 8")
assert inp.dtype in ['float16', 'float32', 'float64'] assert inp.dtype in ['float16', 'float32', 'float64']
......
...@@ -1534,6 +1534,24 @@ def test_dnn_reduction_strides(): ...@@ -1534,6 +1534,24 @@ def test_dnn_reduction_strides():
yield dnn_reduction_strides, (2, 3, 2), (0, 1, 2), slice(None, None, -1) yield dnn_reduction_strides, (2, 3, 2), (0, 1, 2), slice(None, None, -1)
def test_dnn_reduction_error():
nLoops = 5
vec = np.arange(0, 10, dtype=np.float32)
slow_output = np.zeros((5, 10))
for i in range(nLoops):
slow_output[i, :] = 2.0 * vec
slow_output = np.sum(slow_output.transpose(), axis=1)
vecT = T.vector(dtype=theano.config.floatX)
outputT = T.alloc(2.0 * vecT, 5, vecT.shape[0])
outputSummedT = T.sum(T.transpose(outputT), axis=1)
f3 = theano.function(inputs=[vecT], outputs=outputSummedT)
output = f3(vec)
utt.assert_allclose(slow_output, output)
def dnn_maxargmax(nd, idtype, axis): def dnn_maxargmax(nd, idtype, axis):
inp = T.TensorType(idtype, (False,) * nd)() inp = T.TensorType(idtype, (False,) * nd)()
res = T.max_and_argmax(inp, axis=axis) res = T.max_and_argmax(inp, axis=axis)
......
...@@ -15,7 +15,7 @@ from theano.tensor.tests import test_opt ...@@ -15,7 +15,7 @@ from theano.tensor.tests import test_opt
class test_fusion(test_opt.test_fusion): class test_fusion(test_opt.test_fusion):
mode = mode_with_gpu mode = mode_with_gpu.excluding('local_dnn_reduction')
_shared = staticmethod(gpuarray_shared_constructor) _shared = staticmethod(gpuarray_shared_constructor)
topo_exclude = (GpuFromHost, HostFromGpu) topo_exclude = (GpuFromHost, HostFromGpu)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论