Merge pull request #911 from nouiz/mixed

Mixed

Merge pull request #911 from nouiz/mixed
e75ca8a0 · lamblin · ef8aa276 · 62882b0c · e75ca8a0 · e75ca8a0
--- a/bin/theano-nose
+++ b/bin/theano-nose
@@ -20,6 +20,7 @@ import logging
 _logger = logging.getLogger('theano.bin.theano-nose')
 _logger.setLevel(logging.WARN)
+import os
 import nose
 import textwrap
 import sys
@@ -32,6 +33,23 @@ def main():
        import theano
        sys.argv[i] = theano.__path__[0]
+    # Many Theano tests suppose device=cpu, so we need to raise an
+    # error if device==gpu.
+    # I don't know how to do this check only if we use theano-nose on
+    # Theano tests.  So I make an try..except in case the script get
+    # reused elsewhere.
+    # We should not import theano before call nose.main()
+    # As this cause import problem with nosetests.
+    # Should we find a way to don't modify sys.path?
+    if not os.path.exists('theano/__init__.py'):
+        try:
+            from theano import config
+            if config.device != "cpu":
+                raise ValueError("Theano tests must be run with device=cpu."
+                                 " This will also run GPU tests when possible.")
+        except ImportError:
+            pass
    # Handle --batch[=n] arguments
    batch_args = [arg for arg in sys.argv if arg.startswith('--batch')]
    for arg in batch_args:

--- a/doc/dev_start_guide.txt
+++ b/doc/dev_start_guide.txt
@@ -506,3 +506,4 @@ Other tools that can help you
 * `memory_profiler <http://fseoane.net/blog/2012/line-by-line-report-of-memory-usage/>`_: memory profiler
 * `runsnake <http://www.vrplumber.com/programming/runsnakerun/>`_: Gui for cProfile(time profiler) and Meliae(memory profiler)
 * `hub <https://github.com/defunkt/hub>`_: A tool that adds github commands to the git command line.
+ * `git pull-requests <http://www.splitbrain.org/blog/2011-06/19-automate_github_pull_requests>`_: Another tool for git/github command line.
--- a/doc/index.txt
+++ b/doc/index.txt
@@ -51,6 +51,7 @@ installation and configuration, see :ref:`installing Theano <install>`.
 Master Tests Status:
 .. image:: https://secure.travis-ci.org/Theano/Theano.png
+    :target: http://travis-ci.org/Theano/Theano/builds
 .. _available on PyPI: http://pypi.python.org/pypi/Theano
 .. _Related Projects: https://github.com/Theano/Theano/wiki/Related-projects

--- a/doc/install.txt
+++ b/doc/install.txt
@@ -206,6 +206,7 @@ Bleeding-edge install instructions
 Master Tests Status:
 .. image:: https://secure.travis-ci.org/Theano/Theano.png
+    :target: http://travis-ci.org/Theano/Theano/builds
 If you are a developer of Theano, then check out the :ref:`dev_start_guide`.

--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -161,6 +161,14 @@ import theano and print the config variable, as in:
    Theano initialize the GPU device.  Newer version of PyCUDA
    (currently only in the trunk) don't have this restriction.
+.. attribute:: config.print_active_device
+    Bool value: either ``True`` or ``False``
+    Default: ``True``
+    Print active device at when the GPU device is initialized.
 .. attribute:: floatX
    String value: either 'float64' or 'float32'

--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -76,7 +76,7 @@ AddConfigVar('force_device',
        in_c_key=False)
 AddConfigVar('print_active_device',
-        "Print active device at startup",
+        "Print active device at when the GPU device is initialized.",
        BoolParam(True, allow_override=False),
        in_c_key=False)

--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -1362,7 +1362,8 @@ class _CThunk(object):
                # this can be used to retrieve the location the Op was declared
                exc_value.__thunk_trace__ = trace
            except Exception:
-                print >> sys.stderr, 'ERROR retrieving error_storage',
+                print >> sys.stderr, ('ERROR retrieving error_storage.'
+                                      ' Was the error set in the c code?'),
                print >> sys.stderr, self.error_storage
                raise

--- a/theano/misc/do_nightly_build
+++ b/theano/misc/do_nightly_build
@@ -53,8 +53,8 @@ if [ "$RELEASE" ]; then
    ls ${COMPILEDIR}|wc -l
 fi
-echo "Executing nosetests with mode=FAST_COMPILE"
+echo "Executing nosetests with mode=FAST_COMPILE with --batch=1000"
-THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} ${ARGS}
+THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} --batch=1000 ${ARGS}
 echo "Number of elements in the compiledir:"
 ls ${COMPILEDIR}|wc -l

--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -86,8 +86,9 @@ class HostFromGpu(GpuOp):
        fail = sub['fail']
        return """
        %(out)s = (PyArrayObject *) CudaNdarray_CreateArrayObj(%(inp)s);
-        if(!%(out)s)
+        if(!%(out)s){
            %(fail)s;
+        }
        """ % locals()
    def c_code_cache_version(self):
@@ -133,6 +134,27 @@ class GpuFromHost(GpuOp):
    def infer_shape(self, node, xshp):
        return xshp
+    def c_code(self, node, name, inputs, outputs, sub):
+        inp = inputs[0]
+        out = outputs[0]
+        fail = sub['fail']
+        return """
+        int err = 0;
+        Py_XDECREF(%(out)s);
+        %(out)s = (CudaNdarray*) CudaNdarray_New();
+        if(!%(out)s){
+            %(fail)s;
+        }
+        err = CudaNdarray_CopyFromArray(%(out)s, %(inp)s);
+        if(err){
+            %(fail)s;
+        }
+        """ % locals()
+    def c_code_cache_version(self):
+        return (1,)
 gpu_from_host = GpuFromHost()
@@ -1898,16 +1920,19 @@ class GpuSubtensor(tensor.Subtensor, GpuOp):
        assert isinstance(x.type, CudaNdarrayType)
        rval = tensor.Subtensor.make_node(self, x, *inputs)
        otype = CudaNdarrayType(rval.outputs[0].type.broadcastable)
-        return Apply(self, [x] + rval.inputs[1:], [otype()])
+        #We reverse the index here as a speed optimization
+        #this opt was saving 0.40e-05s of 3.49e05s
+        return Apply(self, [x] + list(reversed(rval.inputs[1:])), [otype()])
    def perform(self, node, inputs, out_):
        out, = out_
        x = inputs[0]
-        indices = list(reversed(inputs[1:]))
+        indices = inputs[1:]
        def convert(entry):
            if isinstance(entry, Type):
                rval = indices.pop()
+                #the if take about .25e-05s
                if sys.version_info < (2, 5):
                    # Before Python 2.5, PySlice_GetIndicesEx requires
                    # Python int to be passed.

--- a/theano/sandbox/multinomial.py
+++ b/theano/sandbox/multinomial.py
@@ -175,6 +175,12 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
                    'self.odtype == pvals.dtype', odtype, pvals.dtype)
        return Apply(self, [pvals, unis], [pvals.type()])
+    def perform(self, node, ins, outs):
+        #The perform from parent don't work with CudaNdarray.  We
+        #don't need it as DebugMode will test again it as an
+        #optimization insert the GPU op.
+        return Op.perform(self, node, ins, outs)
    def c_code_cache_version(self):
        return (8,)

--- a/theano/sandbox/neighbours.py
+++ b/theano/sandbox/neighbours.py
@@ -5,7 +5,6 @@ TODO: implement Images2Neibs.{perform,infer_shape}() methods
 import theano
 from theano import Op, Apply
 import theano.tensor as T
-from theano.gof import local_optimizer
 from theano.gradient import grad_not_implemented

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -1193,7 +1193,7 @@ class UsmmTests(unittest.TestCase):
                    theano.tensor.basic.float64_atol = orig_atol
                    theano.tensor.basic.float64_rtol = orig_rtol
-            assert _allclose(f_a_out, f_b_out, rtol=1e-5)
+            assert _allclose(f_a_out, f_b_out, rtol=1e-5), (f_a_out, f_b_out)
            topo = f_a.maker.fgraph.toposort()
            up = theano.scalar.upcast(dtype1, dtype2, dtype3, dtype4)

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -246,7 +246,8 @@ class Gemv(Op):
 gemv_no_inplace = Gemv(inplace=False)
 gemv_inplace = Gemv(inplace=True)
+# For the user interface. Opt will make them inplace later
+gemv = gemv_no_inplace
 class Ger(Op):
    """
@@ -991,6 +992,8 @@ class Gemm(GemmRelated):
 gemm_inplace = Gemm(inplace=True)
 gemm_no_inplace = Gemm(inplace=False)
+# For the user interface. Theano optimization will make them inplace
+gemm = gemm_no_inplace
 pprint.assign(gemm_inplace, FunctionPrinter('gemm_inplace'))
 pprint.assign(gemm_no_inplace, FunctionPrinter('gemm_no_inplace'))

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -1409,6 +1409,7 @@ GammaTester = makeBroadcastTester(
    good=_good_broadcast_unary_gammaln,
    grad=_grad_broadcast_unary_gammaln,
    mode=mode_no_scipy,
+    eps=1e-5,
    skip=skip_scipy)
 GammaInplaceTester = makeBroadcastTester(
    op=inplace.gamma_inplace,
@@ -1416,6 +1417,7 @@ GammaInplaceTester = makeBroadcastTester(
    good=_good_broadcast_unary_gammaln,
    grad=_grad_broadcast_unary_gammaln,
    mode=mode_no_scipy,
+    eps=1e-5,
    inplace=True,
    skip=skip_scipy)

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -3736,7 +3736,7 @@ class Test_lift_transpose_through_dot(unittest.TestCase):
 def test_local_upcast_elemwise_constant_inputs():
    s = dvector("s")
-    x = tensor.sum(tensor.log(10**s))
+    x = tensor.sum(tensor.log(10 ** s))
    f = function([s], [tensor.grad(x, s)])
    f([-42, -2.1, -1, -0.5, 0, 0.2, 1, 2, 12])
@@ -3748,22 +3748,22 @@ class TestShape_i(utt.InferShapeTester):
    def test_perform(self):
-        advec = dvector()
+        advec = vector()
-        advec_val = numpy.random.rand(3)
+        advec_val = numpy.random.rand(3).astype(config.floatX)
        f = function([advec], Shape_i(0)(advec))
        out = f(advec_val)
        assert numpy.allclose(out, advec_val.shape[0])
-        admat = dmatrix()
+        admat = matrix()
-        admat_val = numpy.random.rand(4, 3)
+        admat_val = numpy.random.rand(4, 3).astype(config.floatX)
        for i in xrange(2):
            f = function([admat], Shape_i(i)(admat))
            out = f(admat_val)
            assert numpy.allclose(out, admat_val.shape[i])
    def test_infer_shape(self):
-        admat = dmatrix()
+        admat = matrix()
-        admat_val = numpy.random.rand(3, 4)
+        admat_val = numpy.random.rand(3, 4).astype(config.floatX)
        self._compile_and_check([admat], [Shape_i(0)(admat)],
                        [admat_val], Shape_i)