Refactored GpuDnnSoftmax to use new DnnBase.

113afd29 · Dustin Webb · aef17262 · 113afd29 · 113afd29
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -386,7 +386,7 @@ def local_conv_dnn(node):
 gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')


-class GpuDnnSoftmax(GpuOp):
+class GpuDnnSoftmax(DnnBase):
    """
    Op for the cuDNN Softmax.

@@ -416,30 +416,19 @@ class GpuDnnSoftmax(GpuOp):
        assert x.ndim == 4
        return Apply(self, [x], [x.type()])

-    def c_headers(self):
-        return ['cudnn.h', 'cudnn_helper.h']
-
-    def c_header_dirs(self):
-        return [os.path.dirname(__file__)]
-
-    def c_libraries(self):
-        return ['cudnn']
-
    def c_support_code_struct(self, node, struct_id):
        return """
-cudnnHandle_t softmax_handle_%(id)d;
 cudnnTensor4dDescriptor_t softmax_input_%(id)d;
 cudnnTensor4dDescriptor_t softmax_output_%(id)d;
 """ % dict(id=struct_id)

    def c_init_code_struct(self, node, struct_id, sub):
        return """
-softmax_handle_%(id)d = NULL;
 softmax_input_%(id)d = NULL;
 softmax_output_%(id)d = NULL;

 cudnnStatus_t err%(id)d;
-if ((err%(id)d = cudnnCreate(&softmax_handle_%(id)d)) != CUDNN_STATUS_SUCCESS) {
+if ((err%(id)d = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
  PyErr_Format(PyExc_RuntimeError, "could not create cudnn handle: %%s",
               cudnnGetErrorString(err%(id)d));
  %(fail)s
@@ -463,9 +452,6 @@ if(softmax_input_%(id)d != NULL)

 if(softmax_output_%(id)d != NULL)
  cudnnDestroyTensor4dDescriptor(softmax_output_%(id)d);
-
-if(softmax_handle_%(id)d != NULL)
-  cudnnDestroy(softmax_handle_%(id)d);
 """ % dict(id=struct_id)

    def c_code(self, node, name, inputs, outputs, sub):
@@ -542,7 +528,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
 }

 err%(name)s = cudnnSoftmaxForward(
-  softmax_handle_%(id)d,
+  _handle,
  algo%(id)d,
  mode%(id)d,
  softmax_input_%(id)d,
@@ -561,7 +547,7 @@ err%(name)s = cudnnSoftmaxForward(
 def local_softmax_dnn(node):
    if isinstance(node.op, GpuSoftmax):
        ins = node.inputs[0].dimshuffle('x', 'x', 0, 1)
-        out = GpuDnnSoftmax('bc01', 'accurate', 'instance')(gpu_contiguous(ins))
+        out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins))
        out = as_cuda_ndarray_variable(out.dimshuffle(2, 3))
        return [out]


--- a/theano/sandbox/cuda/tests/test_nnet.py
+++ b/theano/sandbox/cuda/tests/test_nnet.py
@@ -241,11 +241,12 @@ def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx):
    cmp(2, 10000, f, f_gpu)
    cmp(128, 16 * 1024, f, f_gpu)
    cmp(128, 64 * 1024, f, f_gpu)
-    cmp((2 << 15) - 1, 5, f, f_gpu) # cudnn permits no more than 2^15 - 1 rows
+    cmp((2 << 15) - 1, 5, f, f_gpu)  # cudnn permits no more than 2^15 - 1 rows
    cmp(5, 2 << 15, f, f_gpu)

    return f, f_gpu

+
 def test_softmax():
    def cmp(n, m, f, f_gpu):
        #print "test_softmax",n,m
@@ -256,13 +257,23 @@ def test_softmax():

    x = T.fmatrix('x')
    z = T.nnet.softmax
-    f, f_gpu = _test_softmax(x, x, z, z, type(z), cuda.nnet.GpuSoftmax, cmp, -2)
+    f, f_gpu = _test_softmax(
+        x,
+        x,
+        z,
+        z,
+        type(z),
+        cuda.nnet.GpuSoftmax,
+        cmp,
+        -2
+    )

    # cuDNN cannot handle these test cases but the Theano softmax can so we
    # test them only for the Theano softmax.
    cmp(2 << 15, 5, f, f_gpu)
    cmp(0, 10, f, f_gpu)

+
 def test_cudnn_softmax():
    def cmp(n, m, f, f_gpu):
        #print "test_softmax",n,m