merge

86e77a37 · Joseph Turian · 710b5d77 · d0cfadcb · 86e77a37 · 86e77a37
--- a/doc/README.txt
+++ b/doc/README.txt
@@ -65,7 +65,7 @@ Software Requirements
 - python 2.5
- SciPy (specifically numpy, sparse, weave).  Numpy version >= 1.1 fixes memory leak.
+- SciPy (specifically numpy, sparse, weave).  Numpy version >= 1.1 fixes memory leak. Numpy version >=1.2 fixes more memory leak.
 - docutils, pygments (optional, to build documentation)
@@ -73,6 +73,8 @@ Software Requirements
 - g++, python-dev (optional, to compile generated C code)
+- nose, for testing
 -  `psyco <http://psyco.sourceforge.net/>`__ can make your python code much faster, if you are on a 32-bit x86 architecture.  If you use compiled C code, this can be less important.
 Downloading Theano
@@ -83,13 +85,13 @@ There are no stable releases yet.
 *To get the source via mercurial,* you must have `mercurial <http://www.selenic.com/mercurial/wiki/>`__ installed.
-Get the source and run the auto-tests like this:
+Get the source and run the tests like this:
 .. code-block:: bash
    hg clone http://pylearn.org/hg/theano theano
    cd theano
-    python autotest.py
+    nosetests
 To update your library to the latest on pylearn.org, change directory (`cd`) to this `theano` folder and type
@@ -172,15 +174,17 @@ Use something like the following in your .bashrc:
 Running the Test Suite
 ======================
-Test your installation by running the autotests.  Type at the shell:
+Test your installation by running the tests.  Type at the shell:
 .. code-block:: bash
    cd theano
-    python2.5 autotest.py
+    nosetests
 All tests should pass.
+python-nose must be installed. On red-hat or fedora core: sudo yum install python-nose.noarch
 Using Theano
 ============

--- a/doc/doc/index.txt
+++ b/doc/doc/index.txt
@@ -7,7 +7,7 @@ Theano Project Documentation Overview
   *
-Documentation is divided broadly into two kinds: user documentation and
+Documentation is broadly divided into two kinds: user documentation and
 developer documentation.  
 - `Using Theano` covers how to *use* what is already in the Theano library to

--- a/doc/index.txt
+++ b/doc/index.txt
@@ -25,7 +25,7 @@ Our project uses the name to honour the ancient Greek mathematician.
 Overview
 ========
-**To get up & running quickly** see README_.
+**To get up and running quickly** see README_.
 All **documentation** can be reached from the `Theano Project Documentation Overview`_.

--- a/examples/tests/test_wiki.py
+++ b/examples/tests/test_wiki.py
--- a/theano/__init__.py
+++ b/theano/__init__.py
@@ -44,7 +44,9 @@ from compile import \
    predefined_modes, predefined_linkers, predefined_optimizers, \
    FunctionMaker, function, OpFromGraph, \
    Component, External, Member, KitComponent, Method, \
-    Composite, ComponentList, Module, FancyModule
+    Composite, ComponentList, ComponentDict, Module
+FancyModule = Module
 from printing import \
    pprint, pp

--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -19,6 +19,8 @@ def infer_reuse_pattern(env, outputs_to_disown):
    Given an env and a list of results, returns the list of all
    results which may share the same underlying data storage as any of
    the specified results. Used internally by function, FunctionMaker.
+    This list is also refered to as no_recycling sometimes.
    """
    do_not_reuse = list()
    seen = set()
@@ -130,14 +132,14 @@ class Function(object):
            input.distribute(value, indices, cs)
            for c in cs:
                c.provided += 1
-        def set(c, v):
+        def assign(c, v):
            c.data = v
        setters = []
        # Initialize the storage
        for i, ((input, indices, sinputs), (required, refeed, value)) in enumerate(zip(self.indices, defaults)):
            if indices is None: # this is true iff input is not a SymbolicInputKit
-                c = containers[0]
+                c = containers[0]  #containers is being used as a stack. Here we pop off the next one.
                if input.strict:
                    c.strict = True
                if value is not None:
@@ -155,7 +157,7 @@ class Function(object):
                finder[input.name] = c if input.name not in finder else DUPLICATE
                # inv_finder maps the container to the input (useful for one error message)
                inv_finder[c] = input
-                setters.append(partial(set, c))
+                setters.append(partial(assign, c))
                containers[:1] = []
            else:
                # The input is a SymbolicInputKit, so we take as many containers as the Kit provides inputs
@@ -440,11 +442,12 @@ class FunctionMaker(object):
            raise ValueError("'linker' parameter of FunctionFactory should be a Linker with an accept method " \
                             "or one of %s" % predefined_linkers.keys())
+        #the 'no_borrow' outputs are the ones for which that we can't return the internal storage pointer.
        no_borrow = [output for output, spec in zip(env.outputs, outputs+additional_outputs) if not spec.borrow]
-        if not no_borrow:
+        if no_borrow:
-            self.linker = linker.accept(env)
-        else:
            self.linker = linker.accept(env, no_recycling = infer_reuse_pattern(env, no_borrow))
+        else:
+            self.linker = linker.accept(env)
        self.indices = indices
        self.inputs = inputs
@@ -592,10 +595,10 @@ def function(inputs, outputs, mode='FAST_RUN', accept_inplace = False):
      The return value of the returned function will match the format of this
      argument (either the value itself or a list of one or more return values)
    mode -> a descriptive string or a Mode instance; descriptive strings can be one of:
-      * SANITY_CHECK
+      * SANITY_CHECK TODO: NotImplemented
-      * FAST_COMPILE
+      * FAST_COMPILE (apply only optimization that are fast to apply)
-      * FAST_RUN (default)
+      * FAST_RUN (default) (optimize without too much time)
-      * EXPENSIVE_OPTIMIZATION
+      * EXPENSIVE_OPTIMIZATION TODO: NotImplemented
    accept_inplace -> True iff the graph can contain inplace operations
      prior to the optimization phase (default is False)

--- a/theano/compile/module.py
+++ b/theano/compile/module.py
--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -756,7 +756,7 @@ class OpWiseCLinker(link.LocalLinker):
    no_recycling can contain a list of Results that belong to the env.
    If a Result is in no_recycling, CLinker will clear the output storage
-    associated to it during the computation (to avoid reusing it).
+    associated to it prior to computation (to avoid reusing it).
    """
    __cache__ = {}

--- a/theano/gof/link.py
+++ b/theano/gof/link.py
@@ -131,7 +131,7 @@ class Container(object):
            self.type = r
        else:
            self.type = r.type
-        self.name = name or r.name
+        self.name = r.name if name is None else name
        self.storage = storage
        self.readonly = readonly
        self.strict = strict
@@ -149,7 +149,7 @@ class Container(object):
            else:
                self.storage[0] = self.type.filter(value)
        except Exception, e:
-            e.args = e.args + (self.name,)
+            e.args = e.args + (('Container name "%s"' % self.name),)
            raise
    data = property(__get, __set)
    value = property(__get, __set)
@@ -160,11 +160,33 @@ class Container(object):
 def map_storage(env, order, input_storage, output_storage):
-    """WRITEME"""
+    """Ensure there is storage for inputs, outputs, and interior nodes.
+    :param env: The current env.  This function uses the inputs and outputs attributes.
+    :param order: an iterable over Apply instances (in program running order)
+    :param input_storage: None or existing input storage (see below)
+    :param output_storage: None or existing output storage (see below)
+    :rtype: 3-tuple
+    :returns: (list of storage for inputs, list of storage for outputs, and the `storage_map`)
+    This function iterates over the nodes in `order` and ensures that for every
+    input and output `Result`, there is a unique storage container.  This is
+    returned as a dictionary Result->storage called the `storage_map`.
+    This function also returns `input_storage` which is a list of storages corresponding to env.inputs.
+    This function also returns `output_storage` which is a list of storages corresponding to env.outputs.
+    """
+    #each Apply argument's data is stored in a list of length 1 (these lists act like pointers)
+    # input_storage is a list of data-containers for the inputs.
    if input_storage is None:
        input_storage = [[None] for input in env.inputs]
    else:
        assert len(env.inputs) == len(input_storage)
    storage_map = {}
    for r, storage in zip(env.inputs, input_storage):
        storage_map[r] = storage
@@ -172,10 +194,12 @@ def map_storage(env, order, input_storage, output_storage):
 #         if not isinstance(orphan, Constant):
 #             raise TypeError("Cannot link a graph with non-constant orphans.", orphan)
 #         storage_map[orphan] = [orphan.data]
    if output_storage is not None:
        assert len(env.outputs) == len(output_storage)
        for r, storage in zip(env.outputs, output_storage):
            storage_map[r] = storage
    thunks = []
    for node in order:
        for r in node.inputs:
@@ -193,13 +217,16 @@ def map_storage(env, order, input_storage, output_storage):
    return input_storage, output_storage, storage_map
+def clear_storage_thunk(stg):
+    """This is useful for inserting thunks that zero-out storage, which allows memory to be freed by gc."""
+    def thunk():
+        stg[0] = None
+    thunk.outputs = []
+    thunk.inputs = [stg]
+    return thunk
 def streamline(env, thunks, order, no_recycling = [], profiler = None):
    """WRITEME"""
-    def clear():
-        for thunk in thunks:
-            for output in thunk.outputs:
-                output[0] = None
    if profiler is None:
        def f():
            for x in no_recycling:
@@ -218,7 +245,6 @@ def streamline(env, thunks, order, no_recycling = [], profiler = None):
                    profiler.profile_node(thunk, node)
            profiler.profile_env(g, env)
        f.profiler = profiler
-    f.clear = clear
    return f
 class LocalLinker(Linker):
@@ -246,14 +272,24 @@ class LocalLinker(Linker):
 class PerformLinker(LocalLinker):
    """WRITEME
    Basic L{Linker} subclass that calls the perform method on each L{Op} in
    the L{Env} in the order given by L{Env.toposort}.
    """
-    def __init__(self):
+    def __init__(self, allow_gc=False):
+        #TODO: set allow_gc = True by default, when it works with the c&py linker
        self.env = None
+        self.allow_gc = allow_gc
    def accept(self, env, no_recycling = []):
+        """
+        :param env: a PerformLinker can have accepted one Env instance at a time.
+        :param no_recycling: WRITEME
+        :returns: self (TODO: WHY? Who calls this function?)
+        """
        if self.env is not None and self.env is not env:
            return type(self)().accept(env, no_recycling)
            #raise Exception("Cannot accept from a Linker that is already tied to another Env.")
@@ -262,33 +298,74 @@ class PerformLinker(LocalLinker):
        return self
    def make_all(self, profiler = None, input_storage = None, output_storage = None):
+        """
+        :param profiler: WRITEME
+        :param input_storage: WRITEME
+        :param output_storage: WRITEME
+        :returns: function to run all nodes, list of input containers, list of output containers, list of thunks (for all of program), list of nodes (for all of program)
+        """
        env = self.env
        order = env.toposort()
        no_recycling = self.no_recycling
        thunks = []
+        new_order = []
        input_storage, output_storage, storage_map = map_storage(env, order, input_storage, output_storage)
+        #for freeing memory
+        if self.allow_gc:
+            last_user = {}
+            computed = set()
+            for node in order:
+                for idx, input in enumerate(node.inputs):
+                    last_user[input] = (node, idx)
+                for output in node.outputs:
+                    computed.add(output)
        for node in order:
            node_input_storage = tuple(storage_map[input] for input in node.inputs)
            node_output_storage = tuple(storage_map[output] for output in node.outputs)
            p = node.op.perform
+            # Thunk is meant to be called without arguments.
+            # The arguments are given in the lambda expression so that they are saved in the lambda expression.
+            # Using the closure in a simple way didn't work.
            thunk = lambda p = p, i = node_input_storage, o = node_output_storage, n = node: p(n, [x[0] for x in i], o)
            thunk.inputs = node_input_storage
            thunk.outputs = node_output_storage
            thunk.perform = p
            thunks.append(thunk)
+            new_order.append(node)
+            if self.allow_gc:
+                for idx, input in enumerate(node.inputs):
+                    if input not in computed:
+                        continue
+                    if input in env.outputs:
+                        continue
+                    if (node, idx) == last_user[input]:
+                        #print '... zeroing', id(storage_map[input])
+                        thunks.append(clear_storage_thunk(storage_map[input]))
+                        new_order.append(node)
        if no_recycling is True: 
+            #True is like some special code for *everything*.
+            #FunctionMaker always passes a list I think   -JB
            no_recycling = storage_map.values()
            no_recycling = utils.difference(no_recycling, input_storage)
        else:
            no_recycling = [storage_map[r] for r in no_recycling if r not in env.inputs]
-        f = streamline(env, thunks, order, no_recycling = no_recycling, profiler = profiler)
+        # The function that actually runs your program is one of the f's in streamline.
+        f = streamline(env, thunks, new_order, no_recycling = no_recycling, profiler = profiler)
        return f, [Container(input, storage) for input, storage in zip(env.inputs, input_storage)], \
            [Container(output, storage, True) for output, storage in zip(env.outputs, output_storage)], \
-            thunks, order
+            thunks, new_order

--- a/theano/gof/tests/test_link.py
+++ b/theano/gof/tests/test_link.py
@@ -133,7 +133,7 @@ class TestWrapLinker:
        x, y, z = inputs()
        e = mul(add(x, y), div(x, y))
-        fn, i, o = wrap_linker(Env([x, y, z], [e]), [PerformLinker()], wrap).make_thunk()
+        fn, i, o = wrap_linker(Env([x, y, z], [e]), [PerformLinker(allow_gc=False)], wrap).make_thunk()
        i[0].data = 1
        i[1].data = 2
        fn()
@@ -148,7 +148,7 @@ class TestWrapLinker:
        x, y, z = inputs()
        e = mul(add(x, y), div(x, y))
-        fn, i, o = wrap_linker(Env([x, y, z], [e]), [PerformLinker()], wrap).make_thunk()
+        fn, i, o = wrap_linker(Env([x, y, z], [e]), [PerformLinker(allow_gc=False)], wrap).make_thunk()
        i[0].data = 1
        i[1].data = 2
        fn()

--- a/theano/printing.py
+++ b/theano/printing.py
+"""Pretty-printing graphs, and the 'Print' Op.
+"""
 import gof
 from copy import copy
 import sys
+from gof import Op, Apply
+class Print(Op):
+    """This identity-like Op has the side effect of printing a message followed by its inputs
+    when it runs.
+    """
+    def __init__(self,message=""):
+        self.message=message
+        self.view_map={0:[0]}
+    def make_node(self,xin):
+        xout = xin.type.make_result()
+        return Apply(op = self, inputs = [xin], outputs=[xout])
+    def perform(self,node,inputs,output_storage):
+        xin, = inputs
+        xout, = output_storage
+        xout[0] = xin
+        print self.message,xin
+    def grad(self,input,output_gradients):
+        return output_gradients
 class PrinterState(gof.utils.scratchpad):
@@ -232,3 +255,4 @@ pprint.assign(lambda pstate, r: hasattr(pstate, 'target') and pstate.target is n
 pp = pprint
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -21,7 +21,7 @@ from .. import scalar as scal
 from ..gof.python25 import partial
 from .. import compile, printing
-from ..printing import pprint
+from ..printing import pprint, Print
 ### set up the external interface
@@ -457,9 +457,10 @@ class _tensor_py_operators:
    def __neg__(self): return neg(self)
    #CASTS 
-    def __int__(self): return AsInt(self).out
+    #### REMOVED THESE BECAUSE PYTHON appears to require __int__ to return an int. -JB 20081112
-    def __float__(self): return AsInt(self).out
+    #def __int__(self): return convert_to_int32(self)
-    def __complex__(self): return AsComplex(self).out
+    #def __float__(self): return convert_to_float64(self)
+    #def __complex__(self): return convert_to_complex128(self)
    #COMPARISONS
    def __lt__(self,other): return lt(self, other)
@@ -712,7 +713,7 @@ class Shape(Op):
        x = as_tensor(x)
        return Apply(self, [x], [lvector()])
    def perform(self, node, (x, ), (out, )):
-        out[0] = numpy.asarray(x.shape)
+        out[0] = numpy.asarray(x.shape, dtype = 'int64')
    def grad(self, (x,), (gz,)):
        return [None]
 @_redefine_asRoutine(Shape())
@@ -1012,6 +1013,10 @@ pprint.assign(Sum(), printing.FunctionPrinter('sum'))
 @constructor
 def mean(input, axis = None):
    """WRITEME"""
+    if str(input.dtype).startswith('int'):
+        # we need to cast eventually anyway, and this helps
+        # to prevents overflow
+        input = convert_to_float64(input)
    s = sum(input, axis)
    shp = shape(input)
    if axis is None:
@@ -1554,6 +1559,11 @@ def shape_padleft(tensor, n_ones):
    pattern = ['x']*n_ones + [i for i in range(tensor.type.ndim)]
    return DimShuffle(tensor.broadcastable, pattern)(tensor)
+@constructor
+def rightpad_shape(tensor, n_ones):
+    """Reshape `tensor` by right-padding the shape with `n_ones` 1s"""
+    pattern = [i for i in range(tensor.type.ndim)] + ['x']*n_ones
+    return DimShuffle(tensor.broadcastable, pattern)(tensor)
 @constructor
 def shape_padright(tensor, n_ones):
@@ -2210,7 +2220,7 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0
        t_r = as_tensor(random_projection)
        #random projection of o onto t_r
-        cost = sum(t_r * o_output)
+        cost = sum(t_r * o_output)  #This sum() is defined above, it's not the builtin sum.
        cost_fn = function(tensor_pt, cost)
        num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps)

--- a/theano/tensor/nnet.py
+++ b/theano/tensor/nnet.py
@@ -125,7 +125,7 @@ class SoftmaxWithBias(gof.Op):
        return dx, db
    def c_headers(self):
-        return ['<iostream>']
+        return ['<iostream> <math>']
    @staticmethod
    def c_code_template():
@@ -214,7 +214,7 @@ class SoftmaxWithBias(gof.Op):
                sum += sm_ij;
                sm_i[j * Ssm] = sm_ij;
            }
-            if ( (0.0 == sum) || (isinf(sum)))
+            if ( (0.0 == sum) || (std::isinf(sum)))
            {
                //that was our best...
                %(fail)s;

--- a/theano/tensor/raw_random.py
+++ b/theano/tensor/raw_random.py
@@ -55,8 +55,9 @@ class RandomFunction(gof.Op):
            r = copy(r)
        rout[0] = r
        rval = self.fn(r, *(args + [shape]))
-        if not isinstance(rval, numpy.ndarray):
+        if not isinstance(rval, numpy.ndarray) \
-            out[0] = numpy.asarray(rval, dtype = node.outputs[0].type.dtype)
+               or str(rval.dtype) != node.outputs[1].type.dtype:
+            out[0] = numpy.asarray(rval, dtype = node.outputs[1].type.dtype)
        else:
            out[0] = rval
@@ -237,7 +238,7 @@ class RandomKit(SymbolicInputKit):
 rk = RandomKit('rk', 0xBAD5EED)
-class RModule(compile.FancyModule):
+class RModule(compile.Module):
    def __init__(self, components = {}, **kwcomponents):
        super(RModule, self).__init__(components, **kwcomponents)

--- a/theano/tensor/tests/_test_xlogx.py
+++ b/theano/tensor/tests/_test_xlogx.py
+from xlogx import xlogx
+import unittest
+from theano import compile
+from theano import gradient
+from theano.tensor import as_tensor
+import theano._test_tensor as TT
+import random
+import numpy.random
+class T_XlogX(unittest.TestCase):
+    def test0(self):
+        x = as_tensor([1, 0])
+        y = xlogx(x)
+        y = compile.eval_outputs([y])
+        self.failUnless(numpy.all(y == numpy.asarray([0, 0.])))
+    def test1(self):
+        class Dummy(object):
+            def make_node(self, a):
+                return [xlogx(a)[:,2]]
+        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4)])
+if __name__ == '__main__':
+    unittest.main()
--- a/theano/tensor/xlogx.py
+++ b/theano/tensor/xlogx.py
+import theano
+from theano import tensor, scalar
+import numpy
+class XlogX(scalar.UnaryScalarOp):
+    """
+    Compute X * log(X), with special case 0 log(0) = 0.
+    """
+    @staticmethod
+    def st_impl(x):
+        if x == 0.0:
+            return 0.0
+        return x * numpy.log(x)
+    def impl(self, x):
+        return XlogX.st_impl(x)
+    def grad(self, (x,), (gz,)):
+        return [gz * (1 + scalar.log(x))]
+    def c_code(self, node, name, (x,), (z,), sub):
+        if node.inputs[0].type in [scalar.float32, scalar.float64]:
+            return """%(z)s =
+                %(x)s == 0.0
+                ? 0.0
+                : %(x)s * log(%(x)s);""" % locals()
+        raise NotImplementedError('only floatingpoint is implemented')
+scalar_xlogx  = XlogX(scalar.upgrade_to_float, name='scalar_xlogx')
+xlogx = tensor.Elemwise(scalar_xlogx, name='xlogx')