Merge pull request #892 from nouiz/no_gxx

No gxx More things work when g++ is not available.

Merge pull request #892 from nouiz/no_gxx
7d56c521 · lamblin · 48a64a38 · 8d71b8c5 · 7d56c521 · 7d56c521
--- a/doc/extending/optimization.txt
+++ b/doc/extending/optimization.txt
@@ -19,7 +19,11 @@ In this section we will define a couple optimizations on doubles.
   Later, the rest is more useful for when that decorator syntax type thing
   doesn't work. (There are optimizations that don't fit that model).

+.. note::

+   There is the optimization tag `cxx_only` that tell this
+   optimization will insert Op that only have c code. So we should not
+   run them when we don't have a c++ compiler.

 Global and local optimizations
 ==============================

--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -363,6 +363,21 @@ import theano and print the config variable, as in:
    Extra parameters to pass to gcc when compiling.  Extra include paths,
    library paths, configuration options, etc.

+.. attribute:: config.cxx
+
+
+    Default: 'g++' if g++ is present. '' Otherwise.
+
+    Tell the c++ compiler to use. If empty, don't compile c++ code.
+    We automatically detect if g++ is present and disable it if not
+    present.
+
+    We print a warning if we detect that g++ is not present. It is
+    recommended to run with c++ compilation as Theano will be much
+    slower otherwise.
+
+    Currently only g++ is supported, but supporting others is easy.
+
 .. attribute:: optimizer_excluding

    Default: ""

--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
@@ -35,7 +35,7 @@ AddConfigVar('DebugMode.patience',

 AddConfigVar('DebugMode.check_c',
        "Run C implementations where possible",
-        BoolParam(True),
+        BoolParam(bool(theano.config.cxx)),
        in_c_key=False)

 AddConfigVar('DebugMode.check_py',

--- a/theano/compile/mode.py
+++ b/theano/compile/mode.py
@@ -83,10 +83,13 @@ def register_linker(name, linker):
 # If a string is passed as the optimizer argument in the constructor
 # for Mode, it will be used as the key to retrieve the real optimizer
 # in this dictionary
-OPT_FAST_RUN = gof.Query(include=['fast_run'])
+exclude=[]
+if not theano.config.cxx:
+    exclude = ['cxx_only']
+OPT_FAST_RUN = gof.Query(include=['fast_run'], exclude=exclude)
 OPT_FAST_RUN_STABLE = OPT_FAST_RUN.requiring('stable')
-OPT_FAST_COMPILE = gof.Query(include=['fast_compile'])
-OPT_STABILIZE = gof.Query(include=['fast_run'])
+OPT_FAST_COMPILE = gof.Query(include=['fast_compile'], exclude=exclude)
+OPT_STABILIZE = gof.Query(include=['fast_run'], exclude=exclude)
 OPT_STABILIZE.position_cutoff = 1.5000001
 OPT_FAST_RUN.name = 'OPT_FAST_RUN'
 OPT_FAST_RUN_STABLE.name = 'OPT_FAST_RUN_STABLE'
@@ -367,7 +370,10 @@ class Mode(object):
 # FunctionMaker, the Mode will be taken from this dictionary using the
 # string as the key
 FAST_COMPILE = Mode('py', 'fast_compile')
-FAST_RUN = Mode('cvm', 'fast_run')
+if theano.config.cxx:
+    FAST_RUN = Mode('cvm', 'fast_run')
+else:
+    FAST_RUN = Mode('vm', 'fast_run')

 predefined_modes = {'FAST_COMPILE': FAST_COMPILE,
                    'FAST_RUN': FAST_RUN,

--- a/theano/compile/tests/test_debugmode.py
+++ b/theano/compile/tests/test_debugmode.py
@@ -197,19 +197,22 @@ wb1 = WeirdBrokenOp('times1')


 def test_badthunkoutput():
-
+# Check if the c and python code is consistent.
    a = theano.tensor.dvector()
    b = theano.tensor.dvector()

    f_good = theano.function([a, b],
            off_by_half(a, b),
-            mode=debugmode.DebugMode(check_c_code=True))
+            mode=debugmode.DebugMode(check_c_code=theano.config.cxx))
    f_inconsistent = theano.function([a, b],
            inconsistent(a, b),
-            mode=debugmode.DebugMode(check_c_code=True))
+            mode=debugmode.DebugMode(check_c_code=theano.config.cxx))

    #this should evaluate with no error
    f_good([1.0, 2.0, 3.0], [2, 3, 4])
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
+
    try:
        f_inconsistent([1.0, 2.0, 3.0], [2, 3, 4])
    except debugmode.BadThunkOutput, e:
@@ -234,7 +237,7 @@ def test_badoptimization():
    b = theano.tensor.dvector()

    f = theano.function([a, b], a + b,
-            mode=debugmode.DebugMode(optimizer=opt, check_c_code=True))
+            mode=debugmode.DebugMode(optimizer=opt))

    try:
        f([1.0, 2.0, 3.0], [2, 3, 4],)
@@ -282,6 +285,8 @@ def test_stochasticoptimization():


 def test_just_c_code():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    x = theano.tensor.dvector()
    f = theano.function([x], wb2(x),
            mode=debugmode.DebugMode(check_py_code=False))
@@ -312,6 +317,8 @@ def test_baddestroymap():


 def test_baddestroymap_c():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    x = theano.tensor.dvector()
    f = theano.function([x], wb2i(x),
            mode=debugmode.DebugMode(check_py_code=False))
@@ -378,6 +385,8 @@ class Test_ViewMap(unittest.TestCase):
            assert False  # failed to raise error

    def test_badviewmap_c(self):
+        if not theano.config.cxx:
+            raise SkipTest("G++ not available, so we need to skip this test.")
        x = theano.tensor.dvector()
        f = theano.function([x], wb1i(x),
                mode=debugmode.DebugMode(check_py_code=False))
@@ -721,7 +730,12 @@ class Test_preallocated_output(unittest.TestCase):
                check_preallocated_output=['f_contiguous'])

        f = theano.function([a, b], out, mode=mode)
-        self.assertRaises(debugmode.BadThunkOutput, f, a_val, b_val)
+        
+        if theano.config.cxx:
+            self.assertRaises(debugmode.BadThunkOutput, f, a_val, b_val)
+        else:
+            # The python code of this op is good.
+            f(a_val, b_val)

    def test_output_broadcast_tensor(self):
        v = theano.tensor.fvector('v')

--- a/theano/compile/tests/test_modes.py
+++ b/theano/compile/tests/test_modes.py
@@ -22,7 +22,11 @@ class T_bunch_of_modes(unittest.TestCase):
        predef_modes.append(ProfileMode())

        # Linkers to use with regular Mode
-        linkers = ['c|py', 'c|py_nogc', 'vm', 'vm_nogc', 'cvm', 'cvm_nogc']
+        if theano.config.cxx:
+            linkers = ['py', 'c|py', 'c|py_nogc', 'vm', 'vm_nogc',
+                       'cvm', 'cvm_nogc']
+        else:
+            linkers = ['py', 'c|py', 'c|py_nogc', 'vm', 'vm_nogc']
        modes = predef_modes + [Mode(linker, 'fast_run') for linker in linkers]

        for mode in modes:

--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -87,7 +87,8 @@ AddConfigVar('mode',
                'FAST_COMPILE', 'PROFILE_MODE', 'DEBUG_MODE'),
        in_c_key=False)

-gxx_avail = True
+enum = EnumStr("g++", "")
+
 # Test whether or not g++ is present: disable C code if it is not.
 # Using the dummy file descriptor below is a workaround for a crash experienced
 # in an unusual Python 2.4.4 Windows environment with the default stdin=None.
@@ -113,11 +114,19 @@ except OSError:
            'optimized C-implementations (for both CPU and GPU) and will '
            'default to Python implementations. Performance will be severely '
            'degraded.')
-    gxx_avail = False
+    enum = EnumStr("")

 del dummy_stdin
+AddConfigVar('cxx',
+             "The c++ compiler to use. Currently only g++ is"
+             " supported. But supporting more is easy if someone want this."
+             "If it is empty, we don't compile c++ code.",
+             enum,
+             in_c_key=False)
+del enum

-#Keep the default optimizer the same as the one for the mode FAST_RUN
+
+#Keep the default value the same as the one for the mode FAST_RUN
 AddConfigVar('allow_gc',
             "Do we default to delete intermediate results during Theano"
             " function calls? Doing so lowers the memory requirement, but"

--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -1453,7 +1453,8 @@ class OpWiseCLinker(link.LocalLinker):
                # make_thunk
                old_value = getattr(node.op, '_op_use_c_code', False)
                try:
-                    node.op._op_use_c_code = True
+                    if theano.config.cxx:
+                        node.op._op_use_c_code = True
                    thunks += [node.op.make_thunk(node,
                                        storage_map,
                                        compute_map,

--- a/theano/gof/cmodule.py
+++ b/theano/gof/cmodule.py
@@ -83,6 +83,14 @@ METH_VARARGS = "METH_VARARGS"
 METH_NOARGS = "METH_NOARGS"


+class MissingGXX(Exception):
+    """
+    This error is raised when we try to generate c code,
+    but g++ is not available
+    """
+    pass
+
+
 def debug_counter(name, every=1):
    """Debug counter to know how often we go through some piece of code.

@@ -1473,6 +1481,9 @@ class GCC_compiler(object):
        :returns: dynamically-imported python module of the compiled code.
        """
        #TODO: Do not do the dlimport in this function
+        
+        if not theano.config.cxx:
+            raise MissingGXX("g++ not available! We can't compile c code.")

        if include_dirs is None:
            include_dirs = []

--- a/theano/gof/op.py
+++ b/theano/gof/op.py
@@ -530,10 +530,10 @@ class Op(utils.object2, PureOp, CLinkerOp):
        # existing Ops get a _op_use_c_code attribute
        obj = object.__new__(cls)
        if not hasattr(obj, '_op_use_c_code'):
-            obj._op_use_c_code = True
+            obj._op_use_c_code = theano.config.cxx
        return obj

-    def __init__(self, use_c_code=True):
+    def __init__(self, use_c_code=theano.config.cxx):
        self._op_use_c_code = use_c_code

    def make_thunk(self, node, storage_map, compute_map, no_recycling):

--- a/theano/gof/optdb.py
+++ b/theano/gof/optdb.py
@@ -138,6 +138,10 @@ class Query(object):
        self.exclude = exclude or set()
        self.subquery = subquery or {}
        self.position_cutoff = position_cutoff
+        if isinstance(self.require, (list, tuple)):
+            self.require = set(self.require)
+        if isinstance(self.exclude, (list, tuple)):
+            self.exclude = set(self.exclude)

    #add all opt with this tag
    def including(self, *tags):

--- a/theano/gof/tests/test_cc.py
+++ b/theano/gof/tests/test_cc.py

 import unittest

+from nose.plugins.skip import SkipTest
+
 from theano.gof.link import PerformLinker
 from theano.gof.cc import *
 from theano.gof.type import Type
@@ -129,7 +131,7 @@ class Add(Binary):
 add = Add()


-class Sub(Binary):
+class BadSub(Binary):
    def c_code(self, node, name, inp, out, sub):
        x, y = inp
        z, = out
@@ -137,7 +139,7 @@ class Sub(Binary):

    def impl(self, x, y):
        return -10  # erroneous (most of the time)
-sub = Sub()
+bad_sub = BadSub()


 class Mul(Binary):
@@ -179,17 +181,21 @@ def Env(inputs, outputs):
 ################

 def test_clinker_straightforward():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    x, y, z = inputs()
-    e = add(mul(add(x, y), div(x, y)), sub(sub(x, y), z))
+    e = add(mul(add(x, y), div(x, y)), bad_sub(bad_sub(x, y), z))
    lnk = CLinker().accept(Env([x, y, z], [e]))
    fn = lnk.make_function()
    assert fn(2.0, 2.0, 2.0) == 2.0


 def test_clinker_literal_inlining():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    x, y, z = inputs()
    z = Constant(tdouble, 4.12345678)
-    e = add(mul(add(x, y), div(x, y)), sub(sub(x, y), z))
+    e = add(mul(add(x, y), div(x, y)), bad_sub(bad_sub(x, y), z))
    lnk = CLinker().accept(Env([x, y], [e]))
    fn = lnk.make_function()
    assert abs(fn(2.0, 2.0) + 0.12345678) < 1e-9
@@ -200,6 +206,8 @@ def test_clinker_literal_inlining():


 def test_clinker_single_node():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    x, y, z = inputs()
    node = add.make_node(x, y)
    lnk = CLinker().accept(Env(node.inputs, node.outputs))
@@ -208,6 +216,8 @@ def test_clinker_single_node():


 def test_clinker_dups():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    # Testing that duplicate inputs are allowed.
    x, y, z = inputs()
    e = add(x, x)
@@ -218,6 +228,8 @@ def test_clinker_dups():


 def test_clinker_dups_inner():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    # Testing that duplicates are allowed inside the graph
    x, y, z = inputs()
    e = add(mul(y, y), add(x, z))
@@ -232,11 +244,14 @@ def test_clinker_dups_inner():

 def test_opwiseclinker_straightforward():
    x, y, z = inputs()
-    e = add(mul(add(x, y), div(x, y)), sub(sub(x, y), z))
+    e = add(mul(add(x, y), div(x, y)), bad_sub(bad_sub(x, y), z))
    lnk = OpWiseCLinker().accept(Env([x, y, z], [e]))
    fn = lnk.make_function()
-    assert fn(2.0, 2.0, 2.0) == 2.0
-
+    if theano.config.cxx:
+        assert fn(2.0, 2.0, 2.0) == 2.0
+    else:
+        # The python version of bad_sub always return -10.
+        assert fn(2.0, 2.0, 2.0) == -6

 def test_opwiseclinker_constant():
    x, y, z = inputs()
@@ -272,9 +287,11 @@ def test_duallinker_straightforward():


 def test_duallinker_mismatch():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    x, y, z = inputs()
-    # sub is correct in C but erroneous in Python
-    e = sub(mul(x, y), mul(y, z))
+    # bad_sub is correct in C but erroneous in Python
+    e = bad_sub(mul(x, y), mul(y, z))
    g = Env([x, y, z], [e])
    lnk = DualLinker(checker=_my_checker).accept(g)
    fn = lnk.make_function()
@@ -283,6 +300,7 @@ def test_duallinker_mismatch():
    assert CLinker().accept(g).make_function()(1.0, 2.0, 3.0) == -4.0
    # good
    assert OpWiseCLinker().accept(g).make_function()(1.0, 2.0, 3.0) == -4.0
+
    # (purposely) wrong
    assert PerformLinker().accept(g).make_function()(1.0, 2.0, 3.0) == -10.0

@@ -314,7 +332,9 @@ class AddFail(Binary):
 add_fail = AddFail()


-def test_fail_error():
+def test_c_fail_error():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    x, y, z = inputs()
    x = Constant(tdouble, 7.2, name='x')
    e = add_fail(mul(x, y), mul(y, z))

--- a/theano/gof/tests/test_compute_test_value.py
+++ b/theano/gof/tests/test_compute_test_value.py
 import os, sys, traceback, warnings

 import numpy
+from nose.plugins.skip import SkipTest
 import unittest

 import theano
@@ -334,6 +335,8 @@ class TestComputeTestValue(unittest.TestCase):
            theano.config.compute_test_value = orig_compute_test_value

    def test_no_perform(self):
+        if not theano.config.cxx:
+            raise SkipTest("G++ not available, so we need to skip this test.")
        class IncOneC(Op):
            """An Op with only a C (c_code) implementation"""


--- a/theano/gof/tests/test_op.py
+++ b/theano/gof/tests/test_op.py
@@ -184,12 +184,15 @@ class TestMakeThunk(unittest.TestCase):

        thunk = o.owner.op.make_thunk(o.owner, storage_map, compute_map,
                no_recycling=[])
-
-        required = thunk()
-        # Check everything went OK
-        assert not required # We provided all inputs
-        assert compute_map[o][0]
-        assert storage_map[o][0] == 4
+        if theano.config.cxx:
+            required = thunk()
+            # Check everything went OK
+            assert not required # We provided all inputs
+            assert compute_map[o][0]
+            assert storage_map[o][0] == 4
+        else:
+            self.assertRaises((NotImplementedError, utils.MethodNotDefined),
+                              thunk)


 def test_test_value_python_objects():

--- a/theano/gof/tests/test_vm.py
+++ b/theano/gof/tests/test_vm.py
@@ -6,6 +6,8 @@ try:
    import line_profiler
 except ImportError:
    pass
+
+from nose.plugins.skip import SkipTest
 import numpy

 from theano import function
@@ -55,6 +57,8 @@ class TestCallbacks(unittest.TestCase):


 def test_speed():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")

    def build_graph(x, depth=5):
        z = x
@@ -124,8 +128,9 @@ def test_speed():
    time_linker('c|py', OpWiseCLinker)
    time_linker('vmLinker', vm.VM_Linker)
    time_linker('vmLinker_nogc', lambda : vm.VM_Linker(allow_gc=False))
-    time_linker('vmLinker_CLOOP', lambda : vm.VM_Linker(allow_gc=False,
-        use_cloop=True))
+    if theano.config.cxx:
+        time_linker('vmLinker_CLOOP', lambda : vm.VM_Linker(allow_gc=False,
+                                                            use_cloop=True))
    time_numpy()

 def test_speed_lazy():
@@ -175,8 +180,9 @@ def test_speed_lazy():

    time_linker('vmLinker', vm.VM_Linker)
    time_linker('vmLinker_nogc', lambda : vm.VM_Linker(allow_gc=False))
-    time_linker('vmLinker_C', lambda : vm.VM_Linker(allow_gc=False,
-        use_cloop=True))
+    if theano.config.cxx:
+        time_linker('vmLinker_C', lambda : vm.VM_Linker(allow_gc=False,
+                                                        use_cloop=True))

 run_memory_usage_tests = False
 if run_memory_usage_tests:

--- a/theano/gof/vm.py
+++ b/theano/gof/vm.py
@@ -492,7 +492,7 @@ try:
            # skip VM.__init__
 except ImportError:
    pass
-except OSError:
+except (OSError, theano.gof.cmodule.MissingGXX):
    # OSError happens when g++ is not installed.  In that case, we
    # already changed the default linker to something else then CVM.
    # Currently this is the py linker.

--- a/theano/sandbox/multinomial.py
+++ b/theano/sandbox/multinomial.py
+import numpy
+
 import theano
 from theano import Op, Apply
 import theano.tensor as T
@@ -119,6 +121,33 @@ class MultinomialFromUniform(Op):
        }
        } // END NESTED SCOPE
        """ % locals()
+    def perform(self, node, ins, outs):
+        (pvals, unis) = ins
+        (z,) = outs
+
+        if unis.shape[0] != pvals.shape[0]:
+            raise ValueError("unis.shape[0] != pvals.shape[0]",
+                             unis.shape[0], pvals.shape[0])
+        if not z[0] or z[0].shape != pvals.shape:
+            z[0] = numpy.zeros(pvals.shape, dtype=node.outputs[0].dtype)
+
+        nb_multi = pvals.shape[0]
+        nb_outcomes = pvals.shape[1]
+
+        # For each multinomial, loop over each possible outcome
+        for n in range(nb_multi):
+            waiting = True
+            cummul = 0
+            unis_n = unis[n]
+
+            for m in range(nb_outcomes):
+                z_nm = z[0][n, m]
+                cummul += pvals[n, m]
+                if (waiting and (cummul > unis_n)):
+                    z[0][n, m] = 1
+                    waiting = False
+                else:
+                    z[0][n, m] = 0


 class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):

--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -24,31 +24,28 @@ if cuda_available:
                                     float32_shared_constructor)


-def mulmod(a, b, c, m):
-    r = numpy.int32((numpy.int64(a)*b + c) % m)
-    if r >= 0:
-        return r
-    else:
-        return r+m
-
 def matVecModM(A, s, m):
    # return (A * s) % m
-    err_orig = numpy.seterr(over='ignore')
-    try:
-        x = numpy.zeros_like(s)
-        for i in xrange(len(x)):
-            for j in xrange(len(s)):
-                x[i] = mulmod(A[i][j], s[j], x[i], m)
-        return x
-    finally:
-        numpy.seterr(**err_orig)
+    x = numpy.zeros_like(s)
+    for i in xrange(len(x)):
+        for j in xrange(len(s)):
+            r = numpy.int32((numpy.int64(A[i][j]) * s[j] + x[i]) % m)
+            if r >= 0:
+                x[i] = r
+            else:
+                x[i] = r + m
+    return x

 def multMatVect(v, A, m1, B, m2):
    #multiply the first half of v by A with a modulo of m1
    #and the second half by B with a modulo of m2
-    r = numpy.zeros_like(v)
-    r[:3] = matVecModM(A, v[:3], m1)
-    r[3:] = matVecModM(B, v[3:], m2)
+    err_orig = numpy.seterr(over='ignore')
+    try:
+        r = numpy.zeros_like(v)
+        r[:3] = matVecModM(A, v[:3], m1)
+        r[3:] = matVecModM(B, v[3:], m2)
+    finally:
+        numpy.seterr(**err_orig)
    return r


@@ -80,6 +77,7 @@ A2p134 = numpy.asarray(
  [[796789021, 1464208080, 607337906],
   [1241679051, 1431130166, 1464208080],
   [1401213391, 1178684362, 1431130166]])
+np_int32_vals = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]

 def ff_2p134(rstate):
    return multMatVect(rstate, A1p134, M1, A2p134, M2)
@@ -87,59 +85,55 @@ def ff_2p134(rstate):
 def ff_2p72(rstate):
    return multMatVect(rstate, A1p72, M1, A2p72, M2)

+
 def mrg_next_value(rstate, new_rstate):
-    err_orig = numpy.seterr(over='ignore')
-    try:
-        x11, x12, x13, x21, x22, x23 = rstate
-        assert type(x11) == numpy.int32
-
-        i0, i7, i9, i15, i16, i22, i24 = [numpy.int32(i)
-                for i in (0,7, 9, 15, 16, 22, 24)]
-
-        #first component
-        y1 = (((x12 & MASK12) << i22) + (x12 >> i9)
-            + ((x13 & MASK13) << i7) + (x13 >> i24))
-
-        assert type(y1) == numpy.int32
-        if (y1 < 0 or y1 >= M1):     #must also check overflow
-            y1 -= M1;
-        y1 += x13;
-        if (y1 < 0 or y1 >= M1):
-            y1 -= M1;
-
-        x13 = x12;
-        x12 = x11;
-        x11 = y1;
-
-        #second component
-        y1 = ((x21 & MASK2) << i15) + (MULT2 * (x21 >> i16));
-        assert type(y1) == numpy.int32
-        if (y1 < 0 or y1 >= M2):
-            y1 -= M2;
-        y2 = ((x23 & MASK2) << i15) + (MULT2 * (x23 >> i16));
-        assert type(y2) == numpy.int32
-        if (y2 < 0 or y2 >= M2):
-            y2 -= M2;
-        y2 += x23;
-        if (y2 < 0 or y2 >= M2):
-            y2 -= M2;
-        y2 += y1;
-        if (y2 < 0 or y2 >= M2):
-            y2 -= M2;
-
-        x23 = x22;
-        x22 = x21;
-        x21 = y2;
-
-        # Must never return either 0 or M1+1
-        new_rstate[...] = [x11, x12, x13, x21, x22, x23]
-        assert new_rstate.dtype == numpy.int32
-        if (x11 <= x21):
-            return (x11 - x21 + M1) * NORM
-        else:
-            return (x11 - x21) * NORM
-    finally:
-        numpy.seterr(**err_orig)
+    x11, x12, x13, x21, x22, x23 = rstate
+    assert type(x11) == numpy.int32
+
+    #i0, i7, i9, i15, i16, i22, i24 = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
+    i0, i7, i9, i15, i16, i22, i24 = np_int32_vals
+    #first component
+    y1 = (((x12 & MASK12) << i22) + (x12 >> i9)
+        + ((x13 & MASK13) << i7) + (x13 >> i24))
+
+    assert type(y1) == numpy.int32
+    if (y1 < 0 or y1 >= M1):     #must also check overflow
+        y1 -= M1;
+    y1 += x13;
+    if (y1 < 0 or y1 >= M1):
+        y1 -= M1;
+
+    x13 = x12;
+    x12 = x11;
+    x11 = y1;
+
+    #second component
+    y1 = ((x21 & MASK2) << i15) + (MULT2 * (x21 >> i16));
+    assert type(y1) == numpy.int32
+    if (y1 < 0 or y1 >= M2):
+        y1 -= M2;
+    y2 = ((x23 & MASK2) << i15) + (MULT2 * (x23 >> i16));
+    assert type(y2) == numpy.int32
+    if (y2 < 0 or y2 >= M2):
+        y2 -= M2;
+    y2 += x23;
+    if (y2 < 0 or y2 >= M2):
+        y2 -= M2;
+    y2 += y1;
+    if (y2 < 0 or y2 >= M2):
+        y2 -= M2;
+
+    x23 = x22;
+    x22 = x21;
+    x21 = y2;
+
+    # Must never return either 0 or M1+1
+    new_rstate[...] = [x11, x12, x13, x21, x22, x23]
+    assert new_rstate.dtype == numpy.int32
+    if (x11 <= x21):
+        return (x11 - x21 + M1) * NORM
+    else:
+        return (x11 - x21) * NORM

 class mrg_uniform_base(Op):
    def __init__(self, output_type, inplace=False):
@@ -211,9 +205,13 @@ class mrg_uniform(mrg_uniform_base):

        rval = numpy.zeros(n_elements, dtype=self.output_type.dtype)

-        for i in xrange(n_elements):
-            sample = mrg_next_value(rstate[i%n_streams], rstate[i%n_streams])
-            rval[i] = sample
+        err_orig = numpy.seterr(over='ignore')
+        try:
+            for i in xrange(n_elements):
+                sample = mrg_next_value(rstate[i%n_streams], rstate[i%n_streams])
+                rval[i] = sample
+        finally:
+            numpy.seterr(**err_orig)

        o_rstate[0] = node.outputs[0].type.filter(rstate) # send to GPU if necessary
        o_sample[0] = node.outputs[1].type.filter(rval.reshape(size))# send to GPU if necessary

--- a/theano/sandbox/test_neighbours.py
+++ b/theano/sandbox/test_neighbours.py
+from nose.plugins.skip import SkipTest
 import numpy

 import theano
@@ -14,6 +15,8 @@ if theano.config.mode == 'FAST_COMPILE':
 else:
    mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')

+if not theano.config.cxx:
+    raise SkipTest("G++ not available, so we need to skip this test.")

 class T_Images2Neibs(unittest_tools.InferShapeTester):
    def __init__(self, name):

--- a/theano/sandbox/test_rng_mrg.py
+++ b/theano/sandbox/test_rng_mrg.py
@@ -359,7 +359,8 @@ def test_uniform():
 #TODO: test ndim!=size.ndim
 #TODO: test bad seed
 #TODO: test size=Var, with shape that change from call to call
-    if mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE']:
+    if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or
+        mode == 'Mode' and config.linker in ['py']):
        sample_size = (10, 100)
        steps = 50
    else:
@@ -440,7 +441,8 @@ def test_binomial():
 #we test size in a tuple of int and a tensor.shape.
 #we test the param p with int.

-    if mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE']:
+    if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or
+        mode == 'Mode' and config.linker in ['py']):
        sample_size = (10, 50)
        steps = 50
        rtol = 0.02
@@ -510,7 +512,8 @@ def test_normal0():

    steps = 50
    std = 2.
-    if mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE']:
+    if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or
+        mode == 'Mode' and config.linker in ['py']):
        sample_size = (25, 30)
        default_rtol = .02
    else:
@@ -617,7 +620,8 @@ def test_multinomial():
    if mode == 'FAST_COMPILE':
        mode_ = 'FAST_RUN'

-    if mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE']:
+    if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or
+        mode == 'Mode' and config.linker in ['py']):
        sample_size = (49, 5)
    else:
        sample_size = (450, 6)

--- a/theano/scan_module/scan_op.py
+++ b/theano/scan_module/scan_op.py
@@ -571,7 +571,7 @@ class Scan(PureOp):
                        args,
                        outs,
                        self)
-        except ImportError:
+        except (ImportError, theano.gof.cmodule.MissingGXX):
            p = self.execute
        # default arguments are stored in the closure of `rval`


--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
@@ -6,6 +6,7 @@ import unittest

 import cPickle
 import numpy
+from nose.plugins.skip import SkipTest
 from numpy.testing import dec

 import theano
@@ -360,7 +361,6 @@ class T_Scan(unittest.TestCase):
    # as test_one_sequence_one_output_weights, but on the gpu
    # This first version test the first case in the optimizer to the gpu.
    def test_one_sequence_one_output_weights_gpu1(self):
-        from nose.plugins.skip import SkipTest
        from theano.sandbox import cuda
        if cuda.cuda_available == False:
            raise SkipTest('Optional package cuda disabled')
@@ -442,7 +442,6 @@ class T_Scan(unittest.TestCase):

    # This second version test the second case in the optimizer to the gpu.
    def test_one_sequence_one_output_weights_gpu2(self):
-        from nose.plugins.skip import SkipTest
        from theano.sandbox import cuda
        if cuda.cuda_available == False:
            raise SkipTest('Optional package cuda disabled')
@@ -1114,8 +1113,6 @@ class T_Scan(unittest.TestCase):
        assert numpy.allclose(theano_v, numpy_v[5:, :])

    def test_cuda_gibbs_chain(self):
-        import theano
-        from nose.plugins.skip import SkipTest
        from theano.sandbox import cuda
        if cuda.cuda_available == False:
            raise SkipTest('Optional package cuda disabled')
@@ -3142,6 +3139,9 @@ def test_speed():
    # The computation being tested here is a recurrent addition.
    #
    #
+    #We need the CVM for this speed test
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")

    r = numpy.arange(10000).astype(theano.config.floatX).reshape(1000, 10)

@@ -3219,6 +3219,10 @@ def test_speed_rnn():
    #
    import theano.scalar.sharedvar

+    #We need the CVM for this speed test
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
+
    L = 10000
    N = 50

@@ -3295,6 +3299,9 @@ def test_speed_batchrnn():
    #
    import theano.scalar.sharedvar

+    #We need the CVM for this speed test
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    L = 100
    B = 50
    N = 400

--- a/theano/sparse/opt.py
+++ b/theano/sparse/opt.py
@@ -686,7 +686,7 @@ register_specialize(local_usmm, name="local_usmm")
 def local_usmm_csc_dense_inplace(node):
    if node.op == usmm_csc_dense:
        return [usmm_csc_dense_inplace(*node.inputs)]
-register_specialize(local_usmm_csc_dense_inplace, 'inplace')
+register_specialize(local_usmm_csc_dense_inplace, 'cxx_only', 'inplace')


 # This is tested in tests/test_basic.py:UsmmTests
@@ -714,7 +714,7 @@ def local_usmm_csx(node):
                return [usmm_csc_dense(alpha, x_val, x_ind, x_ptr,
                                       x_nsparse, y, z)]
    return False
-sparse.register_specialize(local_usmm_csx)
+sparse.register_specialize(local_usmm_csx, 'cxx_only')


 class CSMGradC(gof.Op):
@@ -850,7 +850,7 @@ def local_csm_grad_c(node):
    if node.op == csm_grad(None):
        return [csm_grad_c(*node.inputs)]
    return False
-register_specialize(local_csm_grad_c)
+register_specialize(local_csm_grad_c, 'cxx_only')


 class MulSDCSC(gof.Op):
@@ -1117,7 +1117,7 @@ def local_mul_s_d(node):
                    sparse.csm_shape(svar))]

    return False
-sparse.register_specialize(local_mul_s_d)
+sparse.register_specialize(local_mul_s_d, 'cxx_only')


 class MulSVCSR(gof.Op):
@@ -1259,7 +1259,7 @@ def local_mul_s_v(node):
        return [CSx(c_data, s_ind, s_ptr, s_shape)]

    return False
-sparse.register_specialize(local_mul_s_v)
+sparse.register_specialize(local_mul_s_v, 'cxx_only')


 class StructuredAddSVCSR(gof.Op):
@@ -1416,7 +1416,7 @@ def local_structured_add_s_v(node):
        return [CSx(c_data, s_ind, s_ptr, s_shape)]

    return False
-sparse.register_specialize(local_structured_add_s_v)
+sparse.register_specialize(local_structured_add_s_v, 'cxx_only')


 class SamplingDotCSR(gof.Op):
@@ -1656,4 +1656,5 @@ def local_sampling_dot_csr(node):
            return [sparse.CSR(z_data, z_ind, z_ptr, p_shape)]
    return False
 sparse.register_specialize(local_sampling_dot_csr,
+                           'cxx_only',
                           name='local_sampling_dot_csr')
--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -1012,7 +1012,7 @@ class test_structureddot(unittest.TestCase):
            overhead_tol = 0.002  # seconds
            overhead_rtol = 1.1  # times as long
            self.assertTrue(numpy.allclose(theano_result, scipy_result))
-            if not theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
+            if not theano.config.mode in ["DebugMode", "DEBUG_MODE"] and theano.config.cxx:
                self.assertFalse(theano_time > overhead_rtol * scipy_time +
                                 overhead_tol)

@@ -1198,7 +1198,8 @@ class UsmmTests(unittest.TestCase):
            fast_compile = theano.config.mode == "FAST_COMPILE"

            if (y.type.dtype == up and format1 == 'csc' and format2 == 'dense'
-                and not fast_compile) and up in ('float32', 'float64'):
+                and not fast_compile and theano.config.cxx and
+                up in ('float32', 'float64')):
                # The op UsmmCscDense should be inserted
                assert (sum([isinstance(node.op, tensor.Elemwise) and
                             isinstance(node.op.scalar_op,

--- a/theano/sparse/tests/test_opt.py
+++ b/theano/sparse/tests/test_opt.py
@@ -37,6 +37,8 @@ def test_local_csm_properties_csm():


 def test_local_csm_grad_c():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    data = tensor.vector()
    indices, indptr, shape = (tensor.ivector(), tensor.ivector(),
                              tensor.ivector())
@@ -60,6 +62,8 @@ def test_local_csm_grad_c():


 def test_local_mul_s_d():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    mode = theano.compile.mode.get_default_mode()
    mode = mode.including("specialize", "local_mul_s_d")

@@ -76,6 +80,8 @@ def test_local_mul_s_d():


 def test_local_mul_s_v():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    mode = theano.compile.mode.get_default_mode()
    mode = mode.including("specialize", "local_mul_s_v")

@@ -92,6 +98,8 @@ def test_local_mul_s_v():


 def test_local_structured_add_s_v():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    mode = theano.compile.mode.get_default_mode()
    mode = mode.including("specialize", "local_structured_add_s_v")

@@ -108,6 +116,8 @@ def test_local_structured_add_s_v():


 def test_local_sampling_dot_csr():
+    if not theano.config.cxx:
+        raise SkipTest("G++ not available, so we need to skip this test.")
    mode = theano.compile.mode.get_default_mode()
    mode = mode.including("specialize", "local_sampling_dot_csr")


--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -4197,6 +4197,8 @@ class t_dot(unittest.TestCase):
                    e[0].split()[1:4] == ['are', 'not', 'aligned'] or
                    # Reported by blas or Theano.
                    e[0].split()[0:2] == ['Shape', 'mismatch:'] or
+                    # Reported by Theano perform
+                    e[0].split()[0:4] == ['Incompatible', 'shapes', 'for', 'gemv'] or
                    # Reported by Theano when 'exception_verbosity' is set
                    # to 'high'.
                    e[0].split()[0:3] == ['dot', 'product', 'failed.'],
@@ -5906,8 +5908,9 @@ def test_unalign():
    f.maker.fgraph.toposort()
    # FAST_COMPILE use the python code that support unaligned data
    # The DebugMode make a copy of the inputs, so they will be aligned.
-    should_raise = theano.config.mode not in ["FAST_COMPILE", "DebugMode",
-                                              "DEBUG_MODE"]
+    should_raise = (theano.config.mode not in ["FAST_COMPILE", "DebugMode",
+                                              "DEBUG_MODE"] and
+                    theano.config.linker != 'py')
    try:
        out_theano = f(a, b)
        assert not a.flags.aligned

--- a/theano/tensor/tests/test_blas.py
+++ b/theano/tensor/tests/test_blas.py
@@ -101,7 +101,8 @@ class t_gemm(TestCase):

            cmp_linker(copy(z), a, x, y, b, 'c|py')
            cmp_linker(copy(z), a, x, y, b, 'py')
-            if config.blas.ldflags and not dtype.startswith("complex"):
+            if (config.blas.ldflags and not dtype.startswith("complex")
+                and theano.config.cxx):
                # If blas.ldflags is equal to '', the C code will not
                # be generated
                cmp_linker(copy(z), a, x, y, b, 'c')

--- a/theano/tensor/tests/test_elemwise.py
+++ b/theano/tensor/tests/test_elemwise.py
@@ -5,6 +5,7 @@ import time
 import unittest

 import numpy
+from nose.plugins.skip import SkipTest
 from numpy.testing import dec

 import theano
@@ -166,15 +167,21 @@ class test_Broadcast(unittest.TestCase):
        self.with_linker(gof.PerformLinker())

    def test_c(self):
+        if not theano.config.cxx:
+            raise SkipTest("G++ not available, so we need to skip this test.")
        self.with_linker(gof.CLinker())

    def test_perform_inplace(self):
        self.with_linker_inplace(gof.PerformLinker())

    def test_c_inplace(self):
+        if not theano.config.cxx:
+            raise SkipTest("G++ not available, so we need to skip this test.")
        self.with_linker_inplace(gof.CLinker())

    def test_fill(self):
+        if not theano.config.cxx:
+            raise SkipTest("G++ not available, so we need to skip this test.")
        x = TensorType('float64', [0, 0])('x')
        y = TensorType('float64', [1, 1])('y')
        e = Elemwise(scalar.Second(scalar.transfer_type(0)), {0: 0})(x, y)
@@ -185,6 +192,8 @@ class test_Broadcast(unittest.TestCase):
        assert (xv == yv).all()

    def test_weird_strides(self):
+        if not theano.config.cxx:
+            raise SkipTest("G++ not available, so we need to skip this test.")
        x = TensorType('float64', [0, 0, 0, 0, 0])('x')
        y = TensorType('float64', [0, 0, 0, 0, 0])('y')
        e = Elemwise(scalar.add)(x, y)
@@ -195,6 +204,8 @@ class test_Broadcast(unittest.TestCase):
        assert (f(xv, yv) == zv).all()

    def test_same_inputs(self):
+        if not theano.config.cxx:
+            raise SkipTest("G++ not available, so we need to skip this test.")
        x = TensorType('float64', [0, 0])('x')
        e = Elemwise(scalar.add)(x, x)
        f = gof.CLinker().accept(FunctionGraph([x], [e])).make_function()
@@ -374,6 +385,9 @@ class test_CAReduce(unittest_tools.InferShapeTester):
                             test_nan=True, tensor_op=tensor.all)

    def test_c(self):
+        if not theano.config.cxx:
+            raise SkipTest("G++ not available, so we need to skip this test.")
+
        for dtype in ["floatX", "complex64", "complex128", "int8", "uint8"]:
            self.with_linker(gof.CLinker(), scalar.add, dtype=dtype)
            self.with_linker(gof.CLinker(), scalar.mul, dtype=dtype)
@@ -390,6 +404,8 @@ class test_CAReduce(unittest_tools.InferShapeTester):
            self.with_linker(gof.CLinker(), scalar.xor, dtype=dtype)

    def test_c_nan(self):
+        if not theano.config.cxx:
+            raise SkipTest("G++ not available, so we need to skip this test.")
        for dtype in ["floatX", "complex64", "complex128"]:
            self.with_linker(gof.CLinker(), scalar.add, dtype=dtype,
                             test_nan=True)