Merge pull request #6108 from abergeron/careduce_compile

Make the CAReduce tests faster

Merge pull request #6108 from abergeron/careduce_compile
d7f7854a · Frédéric Bastien · GitHub · 9069b555 · bef2354d · d7f7854a
--- a/theano/gof/params_type.py
+++ b/theano/gof/params_type.py
@@ -655,7 +655,7 @@ class ParamsType(Type):
        return list(sorted(list(c_support_code_set))) + [final_struct_code]

    def c_code_cache_version(self):
-        return ((2,), tuple(t.c_code_cache_version() for t in self.types))
+        return ((3,), tuple(t.c_code_cache_version() for t in self.types))

    # As this struct has constructor and destructor, it could be instanciated on stack,
    # but current implementations of C ops will then pass the instance by value at functions,
@@ -684,6 +684,7 @@ class ParamsType(Type):
        /* Seems c_init() is not called for a op param. So I call `new` here. */
        %(name)s = new %(struct_name)s;

+        { // This need a separate namespace for Clinker
        const char* fields[] = {%(fields_list)s};
        if (py_%(name)s == Py_None) {
            PyErr_SetString(PyExc_ValueError, "ParamsType: expected an object, not None.");
@@ -703,5 +704,6 @@ class ParamsType(Type):
                %(fail)s
            }
        }
+        }
        """ % dict(name=name, struct_name=self.name, length=self.length, fail=sub['fail'],
                   fields_list='"%s"' % '", "'.join(self.fields))
--- a/theano/gpuarray/tests/test_elemwise.py
+++ b/theano/gpuarray/tests/test_elemwise.py
@@ -7,7 +7,7 @@ import scipy.special

 import theano
 from theano import scalar, gof, tensor
-from theano.compile import DebugMode
+from theano.compile import DebugMode, Mode
 from theano.tests.unittest_tools import SkipTest, assert_allclose

 from theano.tensor.tests import test_elemwise
@@ -212,32 +212,40 @@ class test_GpuCAReduceCPY(test_elemwise.test_CAReduce):
    def test_perform(self):
        for dtype in self.dtypes + self.bin_dtypes:
            for op in self.reds:
-                self.with_linker(gof.PerformLinker(), op, dtype=dtype,
-                                 pre_scalar_op=self.pre_scalar_op)
+                self.with_mode(Mode(linker='py',
+                                    optimizer=mode_with_gpu.optimizer),
+                               op, dtype=dtype,
+                               pre_scalar_op=self.pre_scalar_op)

    def test_perform_nan(self):
        for dtype in self.dtypes:
            if not dtype.startswith('float'):
                continue
            for op in self.reds:
-                self.with_linker(gof.PerformLinker(), op, dtype=dtype,
-                                 test_nan=True,
-                                 pre_scalar_op=self.pre_scalar_op)
+                self.with_mode(Mode(linker='py',
+                                    optimizer=mode_with_gpu.optimizer),
+                               op, dtype=dtype,
+                               test_nan=True,
+                               pre_scalar_op=self.pre_scalar_op)

    def test_c(self):
        for dtype in self.dtypes + self.bin_dtypes:
            for op in self.reds:
-                self.with_linker(gof.CLinker(), op, dtype=dtype,
-                                 pre_scalar_op=self.pre_scalar_op)
+                self.with_mode(Mode(linker='c',
+                                    optimizer=mode_with_gpu.optimizer),
+                               op, dtype=dtype,
+                               pre_scalar_op=self.pre_scalar_op)

    def test_c_nan(self):
        for dtype in self.dtypes:
            if not dtype.startswith('float'):
                continue
            for op in self.reds:
-                self.with_linker(gof.CLinker(), op, dtype=dtype,
-                                 test_nan=True,
-                                 pre_scalar_op=self.pre_scalar_op)
+                self.with_mode(Mode(linker='c',
+                                    optimizer=mode_with_gpu.optimizer),
+                               op, dtype=dtype,
+                               test_nan=True,
+                               pre_scalar_op=self.pre_scalar_op)

    def test_infer_shape(self):
        for dtype in self.dtypes:
@@ -334,6 +342,9 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
            scalar.maximum, scalar.minimum]
    pre_scalar_op = None

+    def test_perform_noopt(self):
+        return
+
    def test_perform(self):
        return


--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -1202,7 +1202,7 @@ second dimension
        return support_code

    def c_code_cache_version_apply(self, node):
-        version = [12]  # the version corresponding to the c code in this Op
+        version = [13]  # the version corresponding to the c code in this Op

        # now we insert versions for the ops on which we depend...
        scalar_node = Apply(
@@ -1622,7 +1622,7 @@ class CAReduce(Op):

    def c_code_cache_version_apply(self, node):
        # the version corresponding to the c code in this Op
-        version = [7]
+        version = [8]

        # now we insert versions for the ops on which we depend...
        scalar_node = Apply(

--- a/theano/tensor/elemwise_cgen.py
+++ b/theano/tensor/elemwise_cgen.py
@@ -72,16 +72,12 @@ def make_checks(loop_orders, dtypes, sub):
                %(var)s_n%(index)s = PyArray_DIMS(%(var)s)[%(index)s];
                %(var)s_stride%(index)s = PyArray_STRIDES(%(var)s)[%(index)s] / sizeof(%(dtype)s);
                %(var)s_jump%(index)s_%(j)s = %(jump)s;
-                //printf("%(var)s_jump%(index)s_%(j)s is:");
-                //std::cout << %(var)s_jump%(index)s_%(j)s << std::endl;
                """ % locals()
                adjust = "%(var)s_n%(index)s*%(var)s_stride%(index)s" % locals()
            else:
                jump = "-(%s)" % adjust
                init += """
                %(var)s_jump%(index)s_%(j)s = %(jump)s;
-                //printf("%(var)s_jump%(index)s_%(j)s is:");
-                //std::cout << %(var)s_jump%(index)s_%(j)s << std::endl;
                """ % locals()
                adjust = "0"
    check = ""
@@ -169,6 +165,8 @@ def make_alloc(loop_orders, dtype, sub, fortran='0'):
                PyErr_Clear();
                Py_XDECREF(%(olv)s);
                %(olv)s = (PyArrayObject*)PyArray_EMPTY(%(nd)s, dims, %(type)s, 0);
+            } else {
+                Py_DECREF(success);
            }
        }
        if (!%(olv)s) {

--- a/theano/tensor/tests/test_elemwise.py
+++ b/theano/tensor/tests/test_elemwise.py
@@ -15,7 +15,7 @@ from theano import gof, scalar, config

 from theano import tensor
 from theano.tensor import TensorType, as_tensor_variable
-from theano.compile.mode import get_default_mode
+from theano.compile.mode import get_default_mode, Mode
 from theano.tensor.elemwise import (CAReduce, Elemwise, DimShuffle,
                                    Prod, ProdWithoutZeros)
 from theano.tests import unittest_tools
@@ -365,9 +365,9 @@ class test_CAReduce(unittest_tools.InferShapeTester):
             ((), ())]
    type = TensorType

-    def with_linker(self, linker, scalar_op=scalar.add, dtype="floatX",
-                    pre_scalar_op=None,
-                    test_nan=False, tensor_op=None):
+    def with_mode(self, mode, scalar_op=scalar.add, dtype="floatX",
+                  pre_scalar_op=None,
+                  test_nan=False, tensor_op=None):
        for xsh, tosum in self.cases:
            if dtype == "floatX":
                dtype = theano.config.floatX
@@ -383,7 +383,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
            if tosum is None:
                tosum = list(range(len(xsh)))

-            f = copy(linker).accept(FunctionGraph([x], [e])).make_function()
+            f = theano.function([x], e, mode=mode)
            xv = np.asarray(np.random.rand(*xsh))

            if dtype not in tensor.discrete_dtypes:
@@ -495,8 +495,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
                e = tensor_op(x, axis=tosum)
            if tosum is None:
                tosum = list(range(len(xsh)))
-            f = copy(linker).accept(FunctionGraph([x],
-                                                  [e.shape])).make_function()
+            f = theano.function([x], e.shape, mode=mode)
            if not(scalar_op in [scalar.maximum, scalar.minimum] and
                   ((xsh == () or np.prod(xsh) == 0))):
                try:
@@ -505,70 +504,79 @@ class test_CAReduce(unittest_tools.InferShapeTester):
                    # GpuCAReduce don't implement all cases when size is 0
                    assert xv.size == 0

+    def test_perform_noopt(self):
+        self.with_mode(Mode(linker='py', optimizer=None), scalar.add, dtype='floatX')
+
    def test_perform(self):
        for dtype in ["bool", "floatX", "complex64", "complex128", "int8", "uint8"]:
-            self.with_linker(gof.PerformLinker(), scalar.add, dtype=dtype)
-            self.with_linker(gof.PerformLinker(), scalar.mul, dtype=dtype)
-            self.with_linker(gof.PerformLinker(), scalar.maximum, dtype=dtype)
-            self.with_linker(gof.PerformLinker(), scalar.minimum, dtype=dtype)
-            self.with_linker(gof.PerformLinker(), scalar.and_, dtype=dtype,
-                             tensor_op=tensor.all)
-            self.with_linker(gof.PerformLinker(), scalar.or_, dtype=dtype,
-                             tensor_op=tensor.any)
+            self.with_mode(Mode(linker='py'), scalar.add, dtype=dtype)
+            self.with_mode(Mode(linker='py'), scalar.mul, dtype=dtype)
+            self.with_mode(Mode(linker='py'), scalar.maximum, dtype=dtype)
+            self.with_mode(Mode(linker='py'), scalar.minimum, dtype=dtype)
+            self.with_mode(Mode(linker='py'), scalar.and_, dtype=dtype,
+                           tensor_op=tensor.all)
+            self.with_mode(Mode(linker='py'), scalar.or_, dtype=dtype,
+                           tensor_op=tensor.any)
        for dtype in ["int8", "uint8"]:
-            self.with_linker(gof.PerformLinker(), scalar.or_, dtype=dtype)
-            self.with_linker(gof.PerformLinker(), scalar.and_, dtype=dtype)
-            self.with_linker(gof.PerformLinker(), scalar.xor, dtype=dtype)
+            self.with_mode(Mode(linker='py'), scalar.or_, dtype=dtype)
+            self.with_mode(Mode(linker='py'), scalar.and_, dtype=dtype)
+            self.with_mode(Mode(linker='py'), scalar.xor, dtype=dtype)

    def test_perform_nan(self):
        for dtype in ["floatX", "complex64", "complex128"]:
-            self.with_linker(gof.PerformLinker(), scalar.add, dtype=dtype,
-                             test_nan=True)
-            self.with_linker(gof.PerformLinker(), scalar.mul, dtype=dtype,
-                             test_nan=True)
-            self.with_linker(gof.PerformLinker(), scalar.maximum, dtype=dtype,
-                             test_nan=True)
-            self.with_linker(gof.PerformLinker(), scalar.minimum, dtype=dtype,
-                             test_nan=True)
-            self.with_linker(gof.PerformLinker(), scalar.or_, dtype=dtype,
-                             test_nan=True, tensor_op=tensor.any)
-            self.with_linker(gof.PerformLinker(), scalar.and_, dtype=dtype,
-                             test_nan=True, tensor_op=tensor.all)
+            self.with_mode(Mode(linker='py'), scalar.add, dtype=dtype,
+                           test_nan=True)
+            self.with_mode(Mode(linker='py'), scalar.mul, dtype=dtype,
+                           test_nan=True)
+            self.with_mode(Mode(linker='py'), scalar.maximum, dtype=dtype,
+                           test_nan=True)
+            self.with_mode(Mode(linker='py'), scalar.minimum, dtype=dtype,
+                           test_nan=True)
+            self.with_mode(Mode(linker='py'), scalar.or_, dtype=dtype,
+                           test_nan=True, tensor_op=tensor.any)
+            self.with_mode(Mode(linker='py'), scalar.and_, dtype=dtype,
+                           test_nan=True, tensor_op=tensor.all)
+
+    def test_c_noopt(self):
+        # We need to make sure that we cover the corner cases that
+        # optimizations normally cover
+        if not theano.config.cxx:
+            raise SkipTest("G++ not available, so we need to skip this test.")
+        self.with_mode(Mode(linker='c', optimizer=None), scalar.add, dtype='floatX')

    @attr('slow')
    def test_c(self):
        if not theano.config.cxx:
            raise SkipTest("G++ not available, so we need to skip this test.")
-
        for dtype in ["bool", "floatX", "complex64", "complex128", "int8", "uint8"]:
-            self.with_linker(gof.CLinker(), scalar.add, dtype=dtype)
-            self.with_linker(gof.CLinker(), scalar.mul, dtype=dtype)
+            self.with_mode(Mode(linker='c'), scalar.add, dtype=dtype)
+            self.with_mode(Mode(linker='c'), scalar.mul, dtype=dtype)
        for dtype in ["bool", "floatX", "int8", "uint8"]:
-            self.with_linker(gof.CLinker(), scalar.minimum, dtype=dtype)
-            self.with_linker(gof.CLinker(), scalar.maximum, dtype=dtype)
-            self.with_linker(gof.CLinker(), scalar.and_, dtype=dtype,
-                             tensor_op=tensor.all)
-            self.with_linker(gof.CLinker(), scalar.or_, dtype=dtype,
-                             tensor_op=tensor.any)
+            self.with_mode(Mode(linker='c'), scalar.minimum, dtype=dtype)
+            self.with_mode(Mode(linker='c'), scalar.maximum, dtype=dtype)
+            self.with_mode(Mode(linker='c'), scalar.and_, dtype=dtype,
+                           tensor_op=tensor.all)
+            self.with_mode(Mode(linker='c'), scalar.or_, dtype=dtype,
+                           tensor_op=tensor.any)
        for dtype in ["bool", "int8", "uint8"]:
-            self.with_linker(gof.CLinker(), scalar.or_, dtype=dtype)
-            self.with_linker(gof.CLinker(), scalar.and_, dtype=dtype)
-            self.with_linker(gof.CLinker(), scalar.xor, dtype=dtype)
+            self.with_mode(Mode(linker='c'), scalar.or_, dtype=dtype)
+            self.with_mode(Mode(linker='c'), scalar.and_, dtype=dtype)
+            self.with_mode(Mode(linker='c'), scalar.xor, dtype=dtype)

    @attr('slow')
    def test_c_nan(self):
        if not theano.config.cxx:
            raise SkipTest("G++ not available, so we need to skip this test.")
        for dtype in ["floatX", "complex64", "complex128"]:
-            self.with_linker(gof.CLinker(), scalar.add, dtype=dtype,
-                             test_nan=True)
-            self.with_linker(gof.CLinker(), scalar.mul, dtype=dtype,
-                             test_nan=True)
+            self.with_mode(Mode(linker='c'), scalar.add, dtype=dtype,
+                           test_nan=True)
+            self.with_mode(Mode(linker='c'), scalar.mul, dtype=dtype,
+                           test_nan=True)
        for dtype in ["floatX"]:
-            self.with_linker(gof.CLinker(), scalar.minimum, dtype=dtype,
-                             test_nan=True)
-            self.with_linker(gof.CLinker(), scalar.maximum, dtype=dtype,
-                             test_nan=True)
+            self.with_mode(Mode(linker='c'), scalar.minimum, dtype=dtype,
+                           test_nan=True)
+            self.with_mode(Mode(linker='c'), scalar.maximum, dtype=dtype,
+                           test_nan=True)

    def test_infer_shape(self, dtype=None, pre_scalar_op=None):
        if dtype is None:
@@ -1241,16 +1249,6 @@ def test_gt_grad():
    cost = (scores * (scores > 0)).sum()
    T.grad(cost, input_)

-"""
-if __name__ == '__main__':
-    #unittest.main()
-    suite = unittest.TestSuite([test_Prod('test_mul_without_zeros_zeros')])
-    #suite.addTest(test_Prod('test_verify_grad_with_zeros'))
-    #suite.addTest(test_Prod('test_prod_without_zeros'))
-    #suite.addTest(test_Prod('test_other_grad_tests'))
-    unittest.TextTestRunner().run(suite)
-"""
-

 def test_clip_grad():