Merge pull request #911 from nouiz/mixed

Mixed

Merge pull request #911 from nouiz/mixed
e75ca8a0 · lamblin · ef8aa276 · 62882b0c · e75ca8a0 · e75ca8a0
--- a/bin/theano-nose
+++ b/bin/theano-nose
@@ -20,6 +20,7 @@ import logging
 _logger = logging.getLogger('theano.bin.theano-nose')
 _logger.setLevel(logging.WARN)
+import os
 import nose
 import textwrap
 import sys
@@ -32,6 +33,23 @@ def main():
        import theano
        sys.argv[i] = theano.__path__[0]
+    # Many Theano tests suppose device=cpu, so we need to raise an
+    # error if device==gpu.
+    # I don't know how to do this check only if we use theano-nose on
+    # Theano tests.  So I make an try..except in case the script get
+    # reused elsewhere.
+    # We should not import theano before call nose.main()
+    # As this cause import problem with nosetests.
+    # Should we find a way to don't modify sys.path?
+    if not os.path.exists('theano/__init__.py'):
+        try:
+            from theano import config
+            if config.device != "cpu":
+                raise ValueError("Theano tests must be run with device=cpu."
+                                 " This will also run GPU tests when possible.")
+        except ImportError:
+            pass
    # Handle --batch[=n] arguments
    batch_args = [arg for arg in sys.argv if arg.startswith('--batch')]
    for arg in batch_args:

--- a/doc/dev_start_guide.txt
+++ b/doc/dev_start_guide.txt
@@ -506,3 +506,4 @@ Other tools that can help you
 * `memory_profiler <http://fseoane.net/blog/2012/line-by-line-report-of-memory-usage/>`_: memory profiler
 * `runsnake <http://www.vrplumber.com/programming/runsnakerun/>`_: Gui for cProfile(time profiler) and Meliae(memory profiler)
 * `hub <https://github.com/defunkt/hub>`_: A tool that adds github commands to the git command line.
+ * `git pull-requests <http://www.splitbrain.org/blog/2011-06/19-automate_github_pull_requests>`_: Another tool for git/github command line.
--- a/doc/index.txt
+++ b/doc/index.txt
@@ -51,6 +51,7 @@ installation and configuration, see :ref:`installing Theano <install>`.
 Master Tests Status:
 .. image:: https://secure.travis-ci.org/Theano/Theano.png
+    :target: http://travis-ci.org/Theano/Theano/builds
 .. _available on PyPI: http://pypi.python.org/pypi/Theano
 .. _Related Projects: https://github.com/Theano/Theano/wiki/Related-projects

--- a/doc/install.txt
+++ b/doc/install.txt
@@ -206,6 +206,7 @@ Bleeding-edge install instructions
 Master Tests Status:
 .. image:: https://secure.travis-ci.org/Theano/Theano.png
+    :target: http://travis-ci.org/Theano/Theano/builds
 If you are a developer of Theano, then check out the :ref:`dev_start_guide`.

--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -161,6 +161,14 @@ import theano and print the config variable, as in:
    Theano initialize the GPU device.  Newer version of PyCUDA
    (currently only in the trunk) don't have this restriction.
+.. attribute:: config.print_active_device
+    Bool value: either ``True`` or ``False``
+    Default: ``True``
+    Print active device at when the GPU device is initialized.
 .. attribute:: floatX
    String value: either 'float64' or 'float32'

--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -280,6 +280,13 @@ class Function(object):
    A Function instance may be serialized using the `pickle` or `cPickle` modules.
    This will save all default inputs, the graph, and *** to the pickle file (WRITEME).
+    A Function instance have a ``trust_input`` field that default to
+    False. When True, we don't do extra check of the input to give
+    better error message. In some case, python code will still return
+    the good results if you pass a python or numpy scalar instead of a
+    numpy tensor.  C code should raise an error if you pass an object
+    of the wrong type.
    """
    pickle_aliased_memory_strategy = 'warn'
@@ -351,12 +358,12 @@ class Function(object):
    It maps container -> SymbolicInput
    """
-    def __init__(self, fn, input_storage, output_storage, indices, outputs, defaults, unpack_single, return_none, maker):
+    def __init__(self, fn, input_storage, output_storage, indices, outputs,
+                 defaults, unpack_single, return_none, maker):
        """
        Initialize attributes. create finder, inv_finder.
        """
        self.fn = fn
        self.input_storage = input_storage
        self.output_storage = output_storage
@@ -367,6 +374,7 @@ class Function(object):
        self.return_none = return_none
        self.maker = maker
        self.profile = None  # reassigned in FunctionMaker.create
+        self.trust_input = False  # If True, we don't check the input parameter
        # We will be popping stuff off this `containers` object.  It is a copy.
        containers = list(self.input_storage)
@@ -487,7 +495,8 @@ class Function(object):
                except KeyError:
                    # Print informative error message.
                    msg = get_info_on_inputs(named_inputs, n_unnamed_inputs)
-                    raise TypeError("Unknown input or state: %s. %s" % (str(item), msg))
+                    raise TypeError("Unknown input or state: %s. %s" %
+                                    (str(item), msg))
                if s is DUPLICATE:
                    raise TypeError("Ambiguous name: %s - please check the names "\
                        "of the inputs of your function for duplicates." % str(item))
@@ -531,11 +540,12 @@ class Function(object):
    def __setitem__(self, item, value):
        self.value[item] = value
    def __copy__(self):
        defaults = [default for _1, _2, default in self.defaults]
-        cpy = self.maker.create(defaults, trustme = True)
+        cpy = self.maker.create(defaults, trustme=True)
-        for (input,_1,_2), here, there in zip(self.indices, self.input_storage, cpy.input_storage):
+        for (input, _1, _2), here, there in zip(self.indices,
+                                                self.input_storage,
+                                                cpy.input_storage):
            if input.mutable and here is not None:
                there.data = copy.copy(here.data)
            else:
@@ -547,10 +557,17 @@ class Function(object):
        t0 = time.time()
        # Reinitialize each container's 'provided' counter
+        if self.trust_input:
+            i = 0
+            for arg in args:
+                s = self.input_storage[i]
+                s.storage[0] = arg
+                i += 1
+        else:
            for c in self.input_storage:
                c.provided = 0
-        if len(args)+len(kwargs)>len(self.input_storage):
+            if len(args) + len(kwargs) > len(self.input_storage):
                raise TypeError("Too many parameter passed to theano function")
            # Set positional arguments
@@ -569,32 +586,33 @@ class Function(object):
                                allow_downcast=s.allow_downcast)
                    except Exception, e:
-                    function_name="theano function"
+                        function_name = "theano function"
                        if self.name:
-                        function_name += 'with name "'+self.name+'" '
+                            function_name += 'with name "' + self.name + '" '
                        #end if
                        e.args = tuple(["Bad input argument to " + function_name +
-                                    " at index %d(0-based)" % i] + list(e.args))
+                                        " at index %d(0-based)" % i] +
+                                       list(e.args))
                        raise
                    #end except
                #end if
                s.provided += 1
-            i+=1
+                i += 1
        # Set keyword arguments
        if kwargs:  # for speed, skip the iteritems for empty kwargs
            for k, arg in kwargs.iteritems():
                self[k] = arg
-        if (not hasattr(self, '_check_for_aliased_inputs') or
+        if not self.trust_input and (
+            not hasattr(self, '_check_for_aliased_inputs') or
            self._check_for_aliased_inputs):
            ## Collect aliased inputs among the storage space
            args_share_memory = []
            for i in xrange(len(self.input_storage)):
                i_var = self.maker.inputs[i].variable
                i_val = self.input_storage[i].storage[0]
-                if hasattr( i_var.type, 'may_share_memory'):
+                if hasattr(i_var.type, 'may_share_memory'):
                    is_aliased = False
                    for j in xrange(len(args_share_memory)):
@@ -603,9 +621,9 @@ class Function(object):
                             in args_share_memory[j]],
                            [self.input_storage[k].storage[0] for k
                             in args_share_memory[j]])
-                        if numpy.any([ (var.type is i_var.type and
+                        if numpy.any([(var.type is i_var.type and
-                                        var.type.may_share_memory(val,i_val)
+                                        var.type.may_share_memory(val,i_val))
-                                       ) for (var,val) in group_j]):
+                                       for (var,val) in group_j]):
                            is_aliased = True
                            args_share_memory[j].append(i)
@@ -619,25 +637,28 @@ class Function(object):
                    if len(group) > 1:
                        # see if any of these arguments are mutable
                        mutable = numpy.any([(self.maker.inputs[idx].mutable or
-                                             self.maker.inputs[idx].borrow )
+                                             self.maker.inputs[idx].borrow)
-                                             for idx in group ])
+                                             for idx in group])
                        # copy all but the first
                        for idx in group[1:]:
                            self.input_storage[i].storage[0] = copy.copy(
                                self.input_storage[i].storage[0])
        # Check if inputs are missing, or if inputs were set more than once, or
        # if we tried to provide inputs that are supposed to be implicit.
+        if not self.trust_input:
            for c in self.input_storage:
                if c.required and not c.provided:
-                raise TypeError("Missing required input: %s" % getattr(self.inv_finder[c], 'variable', self.inv_finder[c]))
+                    raise TypeError("Missing required input: %s" %
+                                    getattr(self.inv_finder[c], 'variable',
+                                            self.inv_finder[c]))
                if c.provided > 1:
-                raise TypeError("Multiple values for input: %s" % getattr(self.inv_finder[c], 'variable', self.inv_finder[c]))
+                    raise TypeError("Multiple values for input: %s" %
+                                    getattr(self.inv_finder[c], 'variable',
+                                            self.inv_finder[c]))
                if c.implicit and c.provided > 0:
-                raise TypeError('Tried to provide value for implicit input: %s'
+                    raise TypeError(
+                        'Tried to provide value for implicit input: %s'
                        % getattr(self.inv_finder[c], 'variable',
                                  self.inv_finder[c]))
@@ -671,11 +692,12 @@ class Function(object):
            if c.required:
                c.storage[0] = None
-        # if we are allowing garbage collection, remove the input and output reference from the internal
+        # if we are allowing garbage collection, remove the input and
-        # storage cells
+        # output reference from the internal storage cells
        if getattr(self.fn, 'allow_gc', False):
            assert len(self.output_storage) == len(self.maker.fgraph.outputs)
-            for o_container, o_variable in zip(self.output_storage, self.maker.fgraph.outputs):
+            for o_container, o_variable in zip(self.output_storage,
+                                               self.maker.fgraph.outputs):
                if o_variable.owner is not None:
                    # this node is the variable of computation
                    # WARNING: This circumvents the 'readonly' attribute in x
@@ -683,7 +705,8 @@ class Function(object):
        if getattr(self.fn, 'need_update_inputs', True):
            # Update the inputs that have an update function
-            for input, storage in reversed(zip(self.maker.expanded_inputs, self.input_storage)):
+            for input, storage in reversed(zip(self.maker.expanded_inputs,
+                                               self.input_storage)):
                if input.update is not None:
                    storage.data = outputs.pop()
        else:
@@ -719,7 +742,7 @@ class Function(object):
    value = property(
        lambda self: self._value,
        None,  # this property itself is not settable
-        doc="""dictionary-like access to the values associated with Variables""")
+        doc="dictionary-like access to the values associated with Variables")
    container = property(
        lambda self: self._container,
        None,  # this property itself is not settable
@@ -727,6 +750,7 @@ class Function(object):
 # pickling/deepcopy support for Function
 def _pickle_Function(f):
    #copy of the input storage list
    ins = list(f.input_storage)

--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -76,7 +76,7 @@ AddConfigVar('force_device',
        in_c_key=False)
 AddConfigVar('print_active_device',
-        "Print active device at startup",
+        "Print active device at when the GPU device is initialized.",
        BoolParam(True, allow_override=False),
        in_c_key=False)

--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -1362,7 +1362,8 @@ class _CThunk(object):
                # this can be used to retrieve the location the Op was declared
                exc_value.__thunk_trace__ = trace
            except Exception:
-                print >> sys.stderr, 'ERROR retrieving error_storage',
+                print >> sys.stderr, ('ERROR retrieving error_storage.'
+                                      ' Was the error set in the c code?'),
                print >> sys.stderr, self.error_storage
                raise

--- a/theano/misc/do_nightly_build
+++ b/theano/misc/do_nightly_build
@@ -53,8 +53,8 @@ if [ "$RELEASE" ]; then
    ls ${COMPILEDIR}|wc -l
 fi
-echo "Executing nosetests with mode=FAST_COMPILE"
+echo "Executing nosetests with mode=FAST_COMPILE with --batch=1000"
-THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} ${ARGS}
+THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} --batch=1000 ${ARGS}
 echo "Number of elements in the compiledir:"
 ls ${COMPILEDIR}|wc -l

--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -86,8 +86,9 @@ class HostFromGpu(GpuOp):
        fail = sub['fail']
        return """
        %(out)s = (PyArrayObject *) CudaNdarray_CreateArrayObj(%(inp)s);
-        if(!%(out)s)
+        if(!%(out)s){
            %(fail)s;
+        }
        """ % locals()
    def c_code_cache_version(self):
@@ -133,6 +134,27 @@ class GpuFromHost(GpuOp):
    def infer_shape(self, node, xshp):
        return xshp
+    def c_code(self, node, name, inputs, outputs, sub):
+        inp = inputs[0]
+        out = outputs[0]
+        fail = sub['fail']
+        return """
+        int err = 0;
+        Py_XDECREF(%(out)s);
+        %(out)s = (CudaNdarray*) CudaNdarray_New();
+        if(!%(out)s){
+            %(fail)s;
+        }
+        err = CudaNdarray_CopyFromArray(%(out)s, %(inp)s);
+        if(err){
+            %(fail)s;
+        }
+        """ % locals()
+    def c_code_cache_version(self):
+        return (1,)
 gpu_from_host = GpuFromHost()
@@ -1898,16 +1920,19 @@ class GpuSubtensor(tensor.Subtensor, GpuOp):
        assert isinstance(x.type, CudaNdarrayType)
        rval = tensor.Subtensor.make_node(self, x, *inputs)
        otype = CudaNdarrayType(rval.outputs[0].type.broadcastable)
-        return Apply(self, [x] + rval.inputs[1:], [otype()])
+        #We reverse the index here as a speed optimization
+        #this opt was saving 0.40e-05s of 3.49e05s
+        return Apply(self, [x] + list(reversed(rval.inputs[1:])), [otype()])
    def perform(self, node, inputs, out_):
        out, = out_
        x = inputs[0]
-        indices = list(reversed(inputs[1:]))
+        indices = inputs[1:]
        def convert(entry):
            if isinstance(entry, Type):
                rval = indices.pop()
+                #the if take about .25e-05s
                if sys.version_info < (2, 5):
                    # Before Python 2.5, PySlice_GetIndicesEx requires
                    # Python int to be passed.

--- a/theano/sandbox/multinomial.py
+++ b/theano/sandbox/multinomial.py
@@ -175,6 +175,12 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
                    'self.odtype == pvals.dtype', odtype, pvals.dtype)
        return Apply(self, [pvals, unis], [pvals.type()])
+    def perform(self, node, ins, outs):
+        #The perform from parent don't work with CudaNdarray.  We
+        #don't need it as DebugMode will test again it as an
+        #optimization insert the GPU op.
+        return Op.perform(self, node, ins, outs)
    def c_code_cache_version(self):
        return (8,)

--- a/theano/sandbox/neighbours.py
+++ b/theano/sandbox/neighbours.py
@@ -5,7 +5,6 @@ TODO: implement Images2Neibs.{perform,infer_shape}() methods
 import theano
 from theano import Op, Apply
 import theano.tensor as T
-from theano.gof import local_optimizer
 from theano.gradient import grad_not_implemented

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -1193,7 +1193,7 @@ class UsmmTests(unittest.TestCase):
                    theano.tensor.basic.float64_atol = orig_atol
                    theano.tensor.basic.float64_rtol = orig_rtol
-            assert _allclose(f_a_out, f_b_out, rtol=1e-5)
+            assert _allclose(f_a_out, f_b_out, rtol=1e-5), (f_a_out, f_b_out)
            topo = f_a.maker.fgraph.toposort()
            up = theano.scalar.upcast(dtype1, dtype2, dtype3, dtype4)

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -246,7 +246,8 @@ class Gemv(Op):
 gemv_no_inplace = Gemv(inplace=False)
 gemv_inplace = Gemv(inplace=True)
+# For the user interface. Opt will make them inplace later
+gemv = gemv_no_inplace
 class Ger(Op):
    """
@@ -991,6 +992,8 @@ class Gemm(GemmRelated):
 gemm_inplace = Gemm(inplace=True)
 gemm_no_inplace = Gemm(inplace=False)
+# For the user interface. Theano optimization will make them inplace
+gemm = gemm_no_inplace
 pprint.assign(gemm_inplace, FunctionPrinter('gemm_inplace'))
 pprint.assign(gemm_no_inplace, FunctionPrinter('gemm_no_inplace'))

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -1409,6 +1409,7 @@ GammaTester = makeBroadcastTester(
    good=_good_broadcast_unary_gammaln,
    grad=_grad_broadcast_unary_gammaln,
    mode=mode_no_scipy,
+    eps=1e-5,
    skip=skip_scipy)
 GammaInplaceTester = makeBroadcastTester(
    op=inplace.gamma_inplace,
@@ -1416,6 +1417,7 @@ GammaInplaceTester = makeBroadcastTester(
    good=_good_broadcast_unary_gammaln,
    grad=_grad_broadcast_unary_gammaln,
    mode=mode_no_scipy,
+    eps=1e-5,
    inplace=True,
    skip=skip_scipy)

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -3736,7 +3736,7 @@ class Test_lift_transpose_through_dot(unittest.TestCase):
 def test_local_upcast_elemwise_constant_inputs():
    s = dvector("s")
-    x = tensor.sum(tensor.log(10**s))
+    x = tensor.sum(tensor.log(10 ** s))
    f = function([s], [tensor.grad(x, s)])
    f([-42, -2.1, -1, -0.5, 0, 0.2, 1, 2, 12])
@@ -3748,22 +3748,22 @@ class TestShape_i(utt.InferShapeTester):
    def test_perform(self):
-        advec = dvector()
+        advec = vector()
-        advec_val = numpy.random.rand(3)
+        advec_val = numpy.random.rand(3).astype(config.floatX)
        f = function([advec], Shape_i(0)(advec))
        out = f(advec_val)
        assert numpy.allclose(out, advec_val.shape[0])
-        admat = dmatrix()
+        admat = matrix()
-        admat_val = numpy.random.rand(4, 3)
+        admat_val = numpy.random.rand(4, 3).astype(config.floatX)
        for i in xrange(2):
            f = function([admat], Shape_i(i)(admat))
            out = f(admat_val)
            assert numpy.allclose(out, admat_val.shape[i])
    def test_infer_shape(self):
-        admat = dmatrix()
+        admat = matrix()
-        admat_val = numpy.random.rand(3, 4)
+        admat_val = numpy.random.rand(3, 4).astype(config.floatX)
        self._compile_and_check([admat], [Shape_i(0)(admat)],
                        [admat_val], Shape_i)