Incorporated reviewer comments.

8a83fdcb · Dustin Webb · b9609e87 · 8a83fdcb · 8a83fdcb
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -411,9 +411,6 @@ class GpuDnnSoftmax(GpuOp):
        assert(mode in ('instance', 'channel'))
        self.mode = mode
-    def __str__(self):
-        return self.__class__.__name__
    def make_node(self, x):
        x = as_cuda_ndarray_variable(x)
        assert x.ndim == 4
@@ -504,6 +501,11 @@ cudnnSoftmaxMode_t mode%(id)d = CUDNN_SOFTMAX_MODE_CHANNEL;
 if (%(mode)d == 1)
  mode%(id)d = CUDNN_SOFTMAX_MODE_INSTANCE;
+if (!CudaNdarray_is_c_contiguous(%(ins)s)) {
+  PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
+  %(fail)s
+}
 err%(name)s = cudnnSetTensor4dDescriptor(
  softmax_input_%(id)d,
  format%(id)d,
@@ -519,7 +521,10 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
  %(fail)s
 }
-CudaNdarray_prep_output(&(%(outs)s), 4, CudaNdarray_HOST_DIMS(%(ins)s));
+if (CudaNdarray_prep_output(&%(outs)s, 4, CudaNdarray_HOST_DIMS(%(ins)s)) != 0)
+{
+  $(fail)s
+}
 err%(name)s = cudnnSetTensor4dDescriptor(
  softmax_output_%(id)d,
@@ -556,7 +561,7 @@ err%(name)s = cudnnSoftmaxForward(
 def local_softmax_dnn(node):
    if isinstance(node.op, GpuSoftmax):
        ins = node.inputs[0].dimshuffle('x', 'x', 0, 1)
-        out = GpuDnnSoftmax('bc01', 'accurate', 'instance')(ins)
+        out = GpuDnnSoftmax('bc01', 'accurate', 'instance')(gpu_contiguous(ins))
        out = as_cuda_ndarray_variable(out.dimshuffle(2, 3))
        return [out]

--- a/theano/sandbox/cuda/tests/test_nnet.py
+++ b/theano/sandbox/cuda/tests/test_nnet.py
@@ -242,9 +242,9 @@ def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx):
    cmp(128, 16 * 1024, f, f_gpu)
    cmp(128, 64 * 1024, f, f_gpu)
    cmp((2 << 15) - 1, 5, f, f_gpu) # cudnn permits no more than 2^15 - 1 rows
-    cmp(2 << 15, 5, f, f_gpu)
+    cmp(5, 2 << 15, f, f_gpu)
-    cmp(0, 10, f, f_gpu)
+    return f, f_gpu
 def test_softmax():
    def cmp(n, m, f, f_gpu):
@@ -256,8 +256,10 @@ def test_softmax():
    x = T.fmatrix('x')
    z = T.nnet.softmax
-    _test_softmax(x, x, z, z, type(z), cuda.nnet.GpuSoftmax, cmp, -2)
+    f, f_gpu = _test_softmax(x, x, z, z, type(z), cuda.nnet.GpuSoftmax, cmp, -2)
+    cmp(2 << 15, 5, f, f_gpu)
+    cmp(0, 10, f, f_gpu)
 def test_cudnn_softmax():
    def cmp(n, m, f, f_gpu):