Added test for the GpuSoftmax -> GpuDnnSoftmax optimization.

bc6e4d4e · Dustin Webb · 113afd29 · bc6e4d4e · bc6e4d4e
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -546,9 +546,9 @@ err%(name)s = cudnnSoftmaxForward(
 @local_optimizer([GpuSoftmax])
 def local_softmax_dnn(node):
    if isinstance(node.op, GpuSoftmax):
-        ins = node.inputs[0].dimshuffle('x', 'x', 0, 1)
+        ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
        out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins))
-        out = as_cuda_ndarray_variable(out.dimshuffle(2, 3))
+        out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
        return [out]

 gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn')
--- a/theano/sandbox/cuda/tests/test_nnet.py
+++ b/theano/sandbox/cuda/tests/test_nnet.py
 from nose.plugins.skip import SkipTest
 import numpy
+import unittest

 import theano
 from theano.gof.python25 import any
@@ -208,7 +209,17 @@ def test_softmax_with_bias():
    cmp(128, 64 * 1024)


-def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx):
+class test_SoftMax(unittest.TestCase):
+    def _test_softmax(
+        self,
+        x,
+        x_gpu,
+        f_z,
+        f_gpu_z,
+        cmp,
+        gpu_mode,
+        check_types
+    ):
        """
        This is basic test for GpuSoftmax and GpuDnnSoftmax

@@ -217,10 +228,10 @@ def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx):
        """
        f_z_out = f_z(x)
        f_gpu_z_out = f_gpu_z(x_gpu)
+
        f = theano.function([x], f_z_out, mode=mode_without_gpu)
-    f_gpu = theano.function([x_gpu], f_gpu_z_out, mode=mode_with_gpu)
-    assert isinstance(f.maker.fgraph.toposort()[-1].op, cpu_type)
-    assert isinstance(f_gpu.maker.fgraph.toposort()[topo_idx].op, gpu_type)
+        f_gpu = theano.function([x_gpu], f_gpu_z_out, mode=gpu_mode)
+        check_types(f, f_gpu)

        #we need to test n>32*1024 to check that we make the block loop.
        cmp(1, 5, f, f_gpu)
@@ -241,46 +252,60 @@ def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx):
        cmp(2, 10000, f, f_gpu)
        cmp(128, 16 * 1024, f, f_gpu)
        cmp(128, 64 * 1024, f, f_gpu)
-    cmp((2 << 15) - 1, 5, f, f_gpu)  # cudnn permits no more than 2^15 - 1 rows
+        # cudnn permits no more than 2^15 - 1 rows
+        cmp((2 << 15) - 1, 5, f, f_gpu)
        cmp(5, 2 << 15, f, f_gpu)

        return f, f_gpu

-
-def test_softmax():
-    def cmp(n, m, f, f_gpu):
+    def _cmp(self, n, m, f, f_gpu):
        #print "test_softmax",n,m
        data = numpy.arange(n * m, dtype='float32').reshape(n, m)
        out = f(data)
        gout = f_gpu(data)
        assert numpy.allclose(out, gout), numpy.absolute(out - gout)

+    def _check_types(self, graph, graph_gpu, topo_idx, f_type, f_gpu_type):
+        assert isinstance(graph.maker.fgraph.toposort()[-1].op, f_type)
+        assert isinstance(
+            graph_gpu.maker.fgraph.toposort()[topo_idx].op,
+            f_gpu_type
+        )
+
+    def test_softmax(self):
        x = T.fmatrix('x')
        z = T.nnet.softmax
-    f, f_gpu = _test_softmax(
+
+        def check_types(graph, graph_gpu):
+            self._check_types(
+                graph,
+                graph_gpu,
+                -2,
+                type(z),
+                cuda.nnet.GpuSoftmax
+            )
+
+        f, f_gpu = self._test_softmax(
            x,
            x,
            z,
            z,
-        type(z),
-        cuda.nnet.GpuSoftmax,
-        cmp,
-        -2
+            self._cmp,
+            mode_with_gpu,
+            check_types
        )

        # cuDNN cannot handle these test cases but the Theano softmax can so we
        # test them only for the Theano softmax.
-    cmp(2 << 15, 5, f, f_gpu)
-    cmp(0, 10, f, f_gpu)
-
+        self._cmp(2 << 15, 5, f, f_gpu)
+        self._cmp(0, 10, f, f_gpu)

-def test_cudnn_softmax():
+    def test_cudnn_softmax(self):
        def cmp(n, m, f, f_gpu):
-        #print "test_softmax",n,m
            data = numpy.arange(n * m, dtype='float32').reshape(n, m)
-        gdata = numpy.asarray(data).transpose()[None, :, :, None]
+            gdata = numpy.asarray(data)[:, :, None, None]
            out = f(data)
-        gout = numpy.asarray(f_gpu(gdata))[0, :, :, 0].transpose()
+            gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0]
            assert numpy.allclose(out, gout), numpy.absolute(out - gout)

        x = T.matrix('x', 'float32')
@@ -291,4 +316,33 @@ def test_cudnn_softmax():
            'accurate',
            'channel'
        )
-    _test_softmax(x, x_gpu, f_z, f_gpu, type(f_z), type(f_gpu), cmp, -1)
+
+        def check_types(graph, graph_gpu):
+            self._check_types(
+                graph,
+                graph_gpu,
+                -1,
+                type(f_z),
+                theano.sandbox.cuda.dnn.GpuDnnSoftmax
+            )
+
+        def check_types_opt(graph, graph_gpu):
+            assert isinstance(graph.maker.fgraph.toposort()[-1].op, type(f_z))
+            assert len([n for n in graph_gpu.maker.fgraph.toposort()
+                        if isinstance(
+                            n.op,
+                            theano.sandbox.cuda.dnn.GpuDnnSoftmax
+                        )]) == 1
+
+        self._test_softmax(
+            x,
+            x_gpu,
+            f_z,
+            f_gpu,
+            cmp,
+            mode_with_gpu,
+            check_types
+        )
+
+        mode = mode_with_gpu.including("cudnn")
+        self._test_softmax(x, x, f_z, f_z, self._cmp, mode, check_types_opt)