Merge pull request #6333 from abergeron/dnn_redux_contig

Restrict cuDNN reduction to contiguous inputs

Merge pull request #6333 from abergeron/dnn_redux_contig
a5588fce · Frédéric Bastien · GitHub · e72c8872 · 40d204d5 · a5588fce
--- a/theano/gpuarray/c_code/dnn_redux.c
+++ b/theano/gpuarray/c_code/dnn_redux.c
@@ -59,7 +59,12 @@ int APPLY_SPECIFIC(dnn_redux)(PyGpuArrayObject *input,
  static float falpha = 1.0f;
  static double dalpha = 1.0;
  static float fbeta = 0.0f;
-  static double dbeta = 0.0;  
+  static double dbeta = 0.0;
+  if (!GpuArray_IS_C_CONTIGUOUS(&input->ga)) {
+    PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
+    return 1;
+  }
  if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) != 0)
    return 1;

--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -1695,6 +1695,7 @@ class GpuDnnReduction(DnnBase):
    def make_node(self, inp):
        ctx_name = infer_context_name(inp)
        inp = as_gpuarray_variable(inp, ctx_name)
+        inp = gpu_contiguous(inp)
        if inp.ndim > 8:
            raise ValueError("cuDNN reduction doesn't support nd > 8")
        assert inp.dtype in ['float16', 'float32', 'float64']

--- a/theano/gpuarray/tests/test_dnn.py
+++ b/theano/gpuarray/tests/test_dnn.py
@@ -1534,6 +1534,24 @@ def test_dnn_reduction_strides():
    yield dnn_reduction_strides, (2, 3, 2), (0, 1, 2), slice(None, None, -1)
+def test_dnn_reduction_error():
+    nLoops = 5
+    vec = np.arange(0, 10, dtype=np.float32)
+    slow_output = np.zeros((5, 10))
+    for i in range(nLoops):
+        slow_output[i, :] = 2.0 * vec
+    slow_output = np.sum(slow_output.transpose(), axis=1)
+    vecT = T.vector(dtype=theano.config.floatX)
+    outputT = T.alloc(2.0 * vecT, 5, vecT.shape[0])
+    outputSummedT = T.sum(T.transpose(outputT), axis=1)
+    f3 = theano.function(inputs=[vecT], outputs=outputSummedT)
+    output = f3(vec)
+    utt.assert_allclose(slow_output, output)
 def dnn_maxargmax(nd, idtype, axis):
    inp = T.TensorType(idtype, (False,) * nd)()
    res = T.max_and_argmax(inp, axis=axis)

--- a/theano/gpuarray/tests/test_others.py
+++ b/theano/gpuarray/tests/test_others.py
@@ -15,7 +15,7 @@ from theano.tensor.tests import test_opt
 class test_fusion(test_opt.test_fusion):
-    mode = mode_with_gpu
+    mode = mode_with_gpu.excluding('local_dnn_reduction')
    _shared = staticmethod(gpuarray_shared_constructor)
    topo_exclude = (GpuFromHost, HostFromGpu)