merge no conflict

2e855e05 · James Bergstra · c9b42fdf · 2f95561e · 2e855e05 · 2e855e05
--- a/.hgignore
+++ b/.hgignore
@@ -35,3 +35,5 @@ theano/version.py
 theano/version.py.out
 distribute-*.egg
 distribute-*.tar.gz
+out1
+out2
--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
-"""Provides `DebugMode`, an evaluation mode for debugging theano internals."""
+"""Provides `DebugMode`, an evaluation mode for debugging theano internals.
+:TODO: add support for Cond Op, LazyLinker, PureOp, etc.
+"""
 __docformat__ = "restructuredtext en"
 import time, copy, sys, copy_reg, gc, os
@@ -1552,7 +1556,8 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions
    def __init__(self, inputs, outputs, optimizer, mode,
            accept_inplace = False,
-            function_builder = Function):
+            function_builder = Function,
+            profile=None):
        """
        :type inputs: a list of SymbolicInput instances
@@ -1567,7 +1572,7 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions
        :note: this function sets TensorType.filter_checks_isfinite when `mode.check_isfinite` is True
        """
+        self.profile = profile
        # Handle the case where inputs and/or outputs is a single Variable (not in a list)
        unpack_single = False
        return_none = False

--- a/theano/compile/function.py
+++ b/theano/compile/function.py
@@ -7,12 +7,13 @@ _logger = logging.getLogger('theano.compile.function')
 from io import In
 from function_module import orig_function
+from profiling import ProfileStats
 from pfunc import pfunc
 from numpy import any #for to work in python 2.4
 def function(inputs, outputs=None, mode=None, updates=[], givens=[],
             no_default_updates=False, accept_inplace=False, name=None,
-             rebuild_strict=True, allow_input_downcast=None):
+             rebuild_strict=True, allow_input_downcast=None, profile=None):
    """
    Return a callable object that will calculate `outputs` from `inputs`.
@@ -62,6 +63,11 @@ def function(inputs, outputs=None, mode=None, updates=[], givens=[],
    precise, type. None (default) is almost like False, but allows
    downcasting of Python float scalars to floatX.
+    :type profile: None, True, or ProfileStats instance
+    :param profile: accumulate profiling information into a given ProfileStats
+    instance. If argument is `True` then a new ProfileStats instance will be
+    used.  This profiling object will be available via self.profile.
    :note: Regarding givens: Be careful to make sure that these substitutions are
    independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in
    another expression is undefined.  Replacements specified with givens are different from
@@ -88,6 +94,8 @@ def function(inputs, outputs=None, mode=None, updates=[], givens=[],
    if uses_In or uses_tuple:
        # we must use old semantics in this case.
+        if profile:
+            raise NotImplementedError('profiling not supported in old-style function')
        if uses_updates or uses_givens:
            raise NotImplementedError("In() instances and tuple inputs triggers the old semantics, which disallow using updates and givens")
        fn =  orig_function(inputs, outputs,
@@ -102,7 +110,8 @@ def function(inputs, outputs=None, mode=None, updates=[], givens=[],
                no_default_updates=no_default_updates,
                accept_inplace=accept_inplace,name=name,
                rebuild_strict=rebuild_strict,
-                allow_input_downcast=allow_input_downcast)
+                allow_input_downcast=allow_input_downcast,
+                profile=profile)
    # We need to add the flag check_aliased inputs if we have any mutable or
    # borrowed used defined inputs
    fn._check_for_aliased_inputs = check_for_aliased_inputs

--- a/theano/compile/pfunc.py
+++ b/theano/compile/pfunc.py
 """Provide a simple user friendly API """
 __docformat__ = 'restructuredtext en'
+import numpy # for backport to 2.4, to get any().
+from profiling import ProfileStats
 from theano.gof import Container, Variable, generic, graph, Constant, Value
 from theano.compile import orig_function, In, Out
 from theano.compile.sharedvalue import SharedVariable, shared
-import numpy # for backport to 2.4, to get any().
+from theano import config
 def rebuild_collect_shared( outputs
                           , inputs             = None
@@ -292,7 +295,8 @@ class Param(object):
 def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
        no_default_updates=False, accept_inplace=False, name=None,
-        rebuild_strict=True, allow_input_downcast=None):
+        rebuild_strict=True, allow_input_downcast=None,
+        profile=None):
    """Function-constructor for graphs with shared variables.
    :type params: list of either Variable or Param instances.
@@ -319,11 +323,9 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
    If False (default), perform them all. Else, perform automatic updates on all Variables
    that are neither in "updates" nor in "no_default_updates".
-    :param name: an optional name for this fct. If used, the profile mode will print the time spent in this fct.
+    :type name: None or string
+    :param name: attaches a name to the Profiling result of this function when
-    :rtype: theano.compile.Function
+    using ProfileMode (will be deprecated).
-    :returns: a callable object that will compute the outputs (given the inputs)
-    and update the implicit function arguments according to the `updates`.
    :type allow_input_downcast: Boolean
    :param allow_input_downcast: True means that the values passed as
@@ -333,6 +335,21 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
    precise, type. None (default) is almost like False, but allows
    downcasting of Python float scalars to floatX.
+    :type profile: None, True, str, or ProfileStats instance
+    :param profile: accumulate profiling information into a given ProfileStats
+    instance. None is the default, and means to use the value of
+    config.profile.
+    If argument is `True` then a new ProfileStats instance will be
+    used.  If argument is a string, a new ProfileStats instance will be created
+    with that string as its `message` attribute.  This profiling object will be
+    available via self.profile.
+    :rtype: theano.compile.Function
+    :returns: a callable object that will compute the outputs (given the inputs)
+    and update the implicit function arguments according to the `updates`.
    :note: Regarding givens: Be careful to make sure that these substitutions are
    independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in
    another expression is undefined.  Replacements specified with givens are different from
@@ -354,6 +371,17 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
    # Then it clones the outputs and the update expressions.  This rebuilds a computation graph
    # from the inputs and the givens.
    #
+    if profile is None:
+        profile = config.profile
+        # profile -> True or False
+    if profile == True:
+        profile = ProfileStats(message=name)
+        # profile -> object
+    if type(profile) == str:
+        profile = ProfileStats(message=profile)
+    # profile is typically either False or an object at this point.
+    # No need to block other objects being passed through though. It might be
+    # useful.
    if not isinstance(params,(list,tuple)):
        raise Exception("in pfunc() the first argument must be a list or a tuple")
@@ -393,7 +421,7 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
        inputs.append(si)
    return orig_function(inputs, cloned_outputs, mode,
-            accept_inplace=accept_inplace, name=name)
+            accept_inplace=accept_inplace, name=name, profile=profile)
 def _pfunc_param_to_in(param, strict=False, allow_downcast=None):

--- a/theano/compile/profiling.py
+++ b/theano/compile/profiling.py
--- a/theano/compile/tests/test_modes.py
+++ b/theano/compile/tests/test_modes.py
+"""
+Test compilation modes
+"""
+from nose.plugins.skip import SkipTest
+import unittest
+import theano
+import numpy
+import random
+import numpy.random
+from theano.tests  import unittest_tools as utt
+class T_bunch_of_modes(unittest.TestCase):
+    def test1(self):
+        # this is a quick test after the LazyLinker branch merge
+        # to check that all the current modes can still be used.
+        linker_classes_involved = []
+        for modename in theano.config.__class__.__dict__['mode'].all:
+            x = T.matrix()
+            y = T.vector()
+            f = theano.function([x,y], x+y, mode=modename)
+            # test that it runs something
+            f([[1,2],[3,4]], [5, 6])
+            linker_classes_involved.append(f.maker.mode.linker.__class__)
+            print 'MODE:', modename, f.maker.mode.linker, 'stop'
+        # regression check:
+        # there should be
+        # - VM_Linker
+        # - OpWiseCLinker (FAST_RUN)
+        # - WrapLinker (PROFILE_MODE)
+        # - PerformLinker (FAST_COMPILE)
+        # - DebugMode's Linker  (DEBUG_MODE)
+        assert 5 == len(set(linker_classes_involved))
+if __name__ == '__main__':
+    unittest.main()
--- a/theano/gof/__init__.py
+++ b/theano/gof/__init__.py
@@ -146,7 +146,7 @@ from link import \
    Container, Linker, LocalLinker, PerformLinker, WrapLinker, WrapLinkerMany
 from op import \
-    Op
+    Op, PureOp
 from opt import (Optimizer, optimizer, SeqOptimizer,
    MergeOptimizer, MergeOptMerge, 

--- a/theano/gof/env.py
+++ b/theano/gof/env.py
@@ -311,6 +311,9 @@ class Env(utils.object2):
        self.__import_r__([new_r])
        self.__add_clients__(new_r, [(node, i)])
        prune = self.__remove_clients__(r, [(node, i)], False)
+        # Precondition: the substitution is semantically valid
+        # However it may introduce cycles to the graph,  in which case the
+        # transaction will be reverted later.
        self.execute_callbacks('on_change_input', node, i, r, new_r, reason=reason)
        if prune:
@@ -438,16 +441,32 @@ class Env(utils.object2):
        if len(self.nodes) < 2:
            # optimization
            # when there are 0 or 1 nodes, no sorting is necessary
+            # This special case happens a lot because the OpWiseCLinker produces
+            # 1-element graphs.
            return list(self.nodes)
        env = self
-        ords = {}
+        ords = self.orderings()
-        for feature in env._features:
-            if hasattr(feature, 'orderings'):
-                for op, prereqs in feature.orderings(env).items():
-                    ords.setdefault(op, []).extend(prereqs)
        order = graph.io_toposort(env.inputs, env.outputs, ords)
        return order
+    def orderings(self):
+        """
+        Return dict d s.t. d[node] is a list of nodes that must be evaluated
+        before node itself can be evaluated.
+        This is used primarily by the destroy_handler feature to ensure that all
+        clients of any destroyed inputs have already computed their outputs.
+        """
+        ords = {}
+        for feature in self._features:
+            if hasattr(feature, 'orderings'):
+                for node, prereqs in feature.orderings(self).items():
+                    ords.setdefault(node, []).extend(prereqs)
+        # eliminate duplicate prereqs
+        for (node,prereqs) in ords.items():
+            ords[node] = list(set(prereqs))
+        return ords
    def nclients(self, r):
        """WRITEME Same as len(self.clients(r))."""
        return len(self.clients(r))

--- a/theano/sandbox/cuda/nvcc_compiler.py
+++ b/theano/sandbox/cuda/nvcc_compiler.py
@@ -13,7 +13,7 @@ AddConfigVar('nvcc.compiler_bindir',
        "If defined, nvcc compiler driver will seek g++ and gcc in this directory",
        StrParam(""))
-AddConfigVar('cuda.nvccflags',
+AddConfigVar('nvcc.flags',
        "Extra compiler flags for nvcc",
        StrParam(""))
@@ -183,11 +183,9 @@ def nvcc_module_compile_str(
        if sys.platform != 'darwin':
            # the 64bit CUDA libs are in the same files as are named by the function above
            rpaths.append(os.path.join(config.cuda.root,'lib64'))
    for rpath in rpaths:
        cmd.extend(['-Xlinker',','.join(['-rpath',rpath])])
-    nvccflags = [flag for flag in config.cuda.nvccflags.split(' ') if flag]
+    cmd.extend([flag for flag in config.nvcc.flags.split(' ') if flag])
-    cmd.extend(nvccflags)
    cmd.extend('-I%s'%idir for idir in include_dirs)
    cmd.extend(['-o',lib_filename])
    cmd.append(os.path.split(cppfilename)[-1])

--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -133,6 +133,79 @@ def sp_ones_like(x):
    data, indices, indptr, shape = csm_properties(x) #TODO: don't restrict to CSM formats
    return CSM(format=x.format)(tensor.ones_like(data), indices, indptr, shape)
+class _sparse_py_operators:
+    T = property(lambda self: transpose(self), doc = "Return aliased transpose of self (read-only)")
+    def __neg__(self): return neg(self)
+    def __add__(left, right): return add(left, right)
+    def __radd__(right, left): return add(left, right)
+    def __sub__(left, right): return sub(left, right)
+    def __rsub__(right, left): return sub(left, right)
+    def __mul__(left, right): return mul(left, right)
+    def __rmul__(left, right): return mul(left, right)
+    #extra pseudo-operator symbols
+    def __dot__(left, right): return structured_dot(left, right)
+    def __rdot__(right, left): return structured_dot(left, right)
+    #N.B. THIS IS COMMENTED OUT ON PURPOSE!!!
+    #     Discussion with Fred & James (at least, and maybe others before)
+    #     we decided that casting from a sparse to dense should be explicit
+    #     because it's usually something you want to be pretty careful about,
+    #     and not to do by accident.
+    #def _as_TensorVariable(self):
+    #    return dense_from_sparse(self)
+    shape = property(lambda self: tensor.shape(dense_from_sparse(self))) # don't worry!
+    # ... the plan is that the ShapeFeature in tensor.opt will do shape propagation
+    # ... and remove the dense_from_sparse from the graph.  This will *NOT* actually expand
+    # ... your sparse matrix just to get the shape.
+    ndim = property(lambda self: self.type.ndim)
+    dtype = property(lambda self: self.type.dtype)
+class SparseVariable(gof.Variable, _sparse_py_operators):
+    dtype = property(lambda self: self.type.dtype)
+    format = property(lambda self: self.type.format)
+    def __str__(self):
+        return '%s{%s,%s}'%(
+                self.__class__.__name__,
+                self.format,
+                self.dtype)
+    def __repr__(self):
+        return str(self)
+class SparseConstantSignature(tuple):
+    def __eq__(self, other):
+        (a, b), (x,y) = self, other
+        return a == x \
+                and (b.dtype == y.dtype)\
+                and (type(b) == type(y))\
+                and (b.shape == y.shape)\
+                and (abs(b-y).sum() < 1e-6 * b.nnz)
+    def __hash__(self):
+        (a,b) = self
+        return hash(type(self)) ^ hash(a) ^ hash(type(b))
+class SparseConstant(gof.Constant, _sparse_py_operators):
+    dtype = property(lambda self: self.type.dtype)
+    format = property(lambda self: self.type.format)
+    def signature(self):
+        assert self.data is not None
+        return SparseConstantSignature((self.type, self.data))
+    def __str__(self):
+        return '%s{%s,%s,shape=%s,nnz=%s}'%(
+                self.__class__.__name__,
+                self.format,
+                self.dtype,
+                self.data.shape,
+                self.data.nnz)
+    def __repr__(self):
+        return str(self)
+class SparseValue(gof.Value, _sparse_py_operators):
+    dtype = property(lambda self: self.type.dtype)
+    format = property(lambda self: self.type.format)
 class SparseType(gof.Type):
    """
@@ -149,6 +222,9 @@ class SparseType(gof.Type):
    dtype_set = set(['int', 'int8', 'int16','int32', 'int64', 'float32', 'float64', 'complex64','complex128'])
    ndim = 2
+    Variable = SparseVariable
+    Constant = SparseConstant
    def __init__(self, format, dtype):
        """
        Fundamental way to create a sparse node.
@@ -248,65 +324,6 @@ csr_dmatrix = SparseType(format='csr', dtype='float64')
 csc_fmatrix = SparseType(format='csc', dtype='float32')
 csr_fmatrix = SparseType(format='csr', dtype='float32')
-class _sparse_py_operators:
-    T = property(lambda self: transpose(self), doc = "Return aliased transpose of self (read-only)")
-    def __neg__(self): return neg(self)
-    def __add__(left, right): return add(left, right)
-    def __radd__(right, left): return add(left, right)
-    def __sub__(left, right): return sub(left, right)
-    def __rsub__(right, left): return sub(left, right)
-    def __mul__(left, right): return mul(left, right)
-    def __rmul__(left, right): return mul(left, right)
-    #extra pseudo-operator symbols
-    def __dot__(left, right): return structured_dot(left, right)
-    def __rdot__(right, left): return structured_dot(left, right)
-    #N.B. THIS IS COMMENTED OUT ON PURPOSE!!!
-    #     Discussion with Fred & James (at least, and maybe others before)
-    #     we decided that casting from a sparse to dense should be explicit
-    #     because it's usually something you want to be pretty careful about,
-    #     and not to do by accident.
-    #def _as_TensorVariable(self):
-    #    return dense_from_sparse(self)
-    shape = property(lambda self: tensor.shape(dense_from_sparse(self))) # don't worry!
-    # ... the plan is that the ShapeFeature in tensor.opt will do shape propagation
-    # ... and remove the dense_from_sparse from the graph.  This will *NOT* actually expand
-    # ... your sparse matrix just to get the shape.
-    ndim = property(lambda self: self.type.ndim)
-    dtype = property(lambda self: self.type.dtype)
-class SparseVariable(gof.Variable, _sparse_py_operators):
-    dtype = property(lambda self: self.type.dtype)
-    format = property(lambda self: self.type.format)
-class SparseConstantSignature(tuple):
-    def __eq__(self, other):
-        (a, b), (x,y) = self, other
-        return a == x \
-                and (b.dtype == y.dtype)\
-                and (type(b) == type(y))\
-                and (b.shape == y.shape)\
-                and (abs(b-y).sum() < 1e-6 * b.nnz)
-    def __hash__(self):
-        (a,b) = self
-        return hash(type(self)) ^ hash(a) ^ hash(type(b))
-class SparseConstant(gof.Constant, _sparse_py_operators):
-    dtype = property(lambda self: self.type.dtype)
-    format = property(lambda self: self.type.format)
-    def signature(self):
-        assert self.data is not None
-        return SparseConstantSignature((self.type, self.data))
-class SparseValue(gof.Value, _sparse_py_operators):
-    dtype = property(lambda self: self.type.dtype)
-    format = property(lambda self: self.type.format)
 # CONSTRUCTION
 class CSMProperties(gof.Op):
    """Extract all of .data .indices and .indptr"""

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -937,6 +937,9 @@ def _gemm_from_node2(node):
        lst = _factor_canonicalized(lst)
        rval = _gemm_from_factored_list(lst)
        #print "RVAL", rval
+        # THIS GOT COMMENTED OUT AT SOME POINT - ASK P.Lamblin maybe why?
+        #if rval:
+        #    assert rval[0].type == node.outputs[0].type, (rval[0].type, node.outputs[0].type)
        if rval and (rval[0].type == node.outputs[0].type):
            return rval

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -3057,30 +3057,33 @@ def constant_folding(node):
    for input in node.inputs:
        if not isinstance(input, Constant):
            return False
-    try:
+    #condition:  all inputs are constant
-        storage = [[None] for output in node.outputs]
-        node.op.perform(node, [x.data for x in node.inputs], storage)
+    storage_map=dict([(i,[i.data]) for i in node.inputs])
-    except MethodNotDefined:
+    compute_map=dict([(i,[True]) for i in node.inputs])
-        tmp_inputs = [x.type() for x in node.inputs]
+    for o in node.outputs:
-        f = compile.function(
+        storage_map[o] = [None]
-                inputs=tmp_inputs,
+        compute_map[o] = [False]
-                outputs=node.op.make_node(*tmp_inputs).outputs,
-                mode=compile.Mode(linker='c|py',optimizer=None))
+    thunk = node.op.make_thunk(node, storage_map, compute_map,
-        xvals = f(*[x.data for x in node.inputs])
+            no_recycling=[])
-        storage = [[xv] for xv in xvals]
+    required = thunk()
-    msg = []
+    assert not required # a node whose inputs are all provided should always
-    assert len(storage) == len(node.outputs)
+    # return successfully
-    for s, output in zip(storage, node.outputs):
+    rval = []
+    for output in node.outputs:
+        assert compute_map[output][0], (output, storage_map[output][0])
        try:
            constant = output.type.Constant
-        except:
+        except AttributeError:
            constant = Constant
-        msg += [constant(output.type, s[0])]
+        rval.append(constant(output.type, storage_map[output][0]))
-    return msg
+    return rval
 register_canonicalize(constant_folding, 'fast_compile')
-register_stabilize(constant_folding) # because
+register_stabilize(constant_folding)
 register_specialize(constant_folding)
 def _is_1(expr):

--- a/theano/tensor/tests/test_raw_random.py
+++ b/theano/tensor/tests/test_raw_random.py
@@ -49,11 +49,14 @@ class T_random_function(unittest.TestCase):
        rng_R = random_state_type()
        # use make_node to override some of the self.args
-        post_r2, out2 = rf2(rng_R, (4,), -2, 2)
+        post_r2,     out2     = rf2(rng_R, (4,), -2, 2) # NOT INPLACE
-        post_r2_4, out2_4 = rf2(rng_R, (4,), -4.0, 2)
+        post_r4,     out4     = rf4(rng_R, (4,), -4, 4) # INPLACE
-        post_r2_4_4, out2_4_4 = rf2(rng_R, (4,), -4.0, 4.0)
+        post_r2_4,   out2_4   = rf2(rng_R, (4,), -4.0, 2) # NOT INPLACE
-        post_r4, out4 = rf4(rng_R, (4,), -4, 4)
+        post_r2_4_4, out2_4_4 = rf2(rng_R, (4,), -4.0, 4.0) # NOT INPLACE
+        # configure out4 to be computed inplace
+        # The update expression means that the random state rng_R will
+        # be maintained by post_r4
        f = compile.function(
                [compile.In(rng_R,
                            value=numpy.random.RandomState(utt.fetch_seed()),
@@ -65,9 +68,25 @@ class T_random_function(unittest.TestCase):
        f2, f4, f2_4, f2_4_4 = f()
        f2b, f4b, f2_4b, f2_4_4b = f()
-        assert numpy.allclose(f2*2, f4)
+        print f2
-        assert numpy.allclose(f2_4_4, f4)
+        print f4
-        assert not numpy.allclose(f4, f4b)
+        print f2_4
+        print f2_4_4
+        #print f2b
+        #print f4b
+        #print f2_4b
+        #print f2_4_4b
+        # setting bounds is same as multiplying by 2
+        assert numpy.allclose(f2*2, f4), (f2, f4)
+        # retrieving from non-inplace generator
+        # is same as inplace one for first call
+        assert numpy.allclose(f2_4_4, f4), (f2_4_4, f4)
+        # f4 changes from call to call, that the update has worked
+        assert not numpy.allclose(f4, f4b), (f4, f4b)
    def test_inplace_optimization(self):
        """Test that FAST_RUN includes the random_make_inplace optimization"""

--- a/theano/tests/diverse_tests.py
+++ b/theano/tests/diverse_tests.py
@@ -13,19 +13,32 @@ from theano.tests  import unittest_tools as utt
  should ensure that it will remain operational
 '''
-class T_diverse(unittest.TestCase):
+class T_scipy(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()
+        self.orig_floatX = theano.config.floatX
+    def tearDown(self):
+        theano.config.floatX = self.orig_floatX
-    def scipy_paper_example1(self):
+    def test_scipy_paper_example1(self):
        a = theano.tensor.vector('a') # declare variable
        b = a + a**10                 # build expression
        f = theano.function([a], b)   # compile function
        assert numpy.all(f([0,1,2]) == numpy.array([0,2,1026]))
-    def scipy_papaer_example2(self):
+    def test_scipy_paper_example2(self):
        ''' This just sees if things compile well and if they run '''
+        # PREAMPBLE
+        T = theano.tensor
+        shared = theano.shared
+        function = theano.function
+        rng = numpy.random
+        theano.config.floatX='float64'
+        #
+        # ACTUAL SCRIPT FROM PAPER
        x = T.matrix()
        y = T.vector()
        w = shared(rng.randn(100))
@@ -52,6 +65,7 @@ class T_diverse(unittest.TestCase):
        for i in range(training_steps):
            pred, err = train(D[0], D[1])
 if __name__ == '__main__':
    unittest.main()