merge

86e77a37 · Joseph Turian · 710b5d77 · d0cfadcb · 86e77a37 · 86e77a37
--- a/doc/README.txt
+++ b/doc/README.txt
@@ -65,7 +65,7 @@ Software Requirements
 - python 2.5
- SciPy (specifically numpy, sparse, weave).  Numpy version >= 1.1 fixes memory leak.
+- SciPy (specifically numpy, sparse, weave).  Numpy version >= 1.1 fixes memory leak. Numpy version >=1.2 fixes more memory leak.
 - docutils, pygments (optional, to build documentation)
@@ -73,6 +73,8 @@ Software Requirements
 - g++, python-dev (optional, to compile generated C code)
+- nose, for testing
 -  `psyco <http://psyco.sourceforge.net/>`__ can make your python code much faster, if you are on a 32-bit x86 architecture.  If you use compiled C code, this can be less important.
 Downloading Theano
@@ -83,13 +85,13 @@ There are no stable releases yet.
 *To get the source via mercurial,* you must have `mercurial <http://www.selenic.com/mercurial/wiki/>`__ installed.
-Get the source and run the auto-tests like this:
+Get the source and run the tests like this:
 .. code-block:: bash
    hg clone http://pylearn.org/hg/theano theano
    cd theano
-    python autotest.py
+    nosetests
 To update your library to the latest on pylearn.org, change directory (`cd`) to this `theano` folder and type
@@ -172,15 +174,17 @@ Use something like the following in your .bashrc:
 Running the Test Suite
 ======================
-Test your installation by running the autotests.  Type at the shell:
+Test your installation by running the tests.  Type at the shell:
 .. code-block:: bash
    cd theano
-    python2.5 autotest.py
+    nosetests
 All tests should pass.
+python-nose must be installed. On red-hat or fedora core: sudo yum install python-nose.noarch
 Using Theano
 ============

--- a/doc/doc/index.txt
+++ b/doc/doc/index.txt
@@ -7,7 +7,7 @@ Theano Project Documentation Overview
   *
-Documentation is divided broadly into two kinds: user documentation and
+Documentation is broadly divided into two kinds: user documentation and
 developer documentation.  
 - `Using Theano` covers how to *use* what is already in the Theano library to

--- a/doc/index.txt
+++ b/doc/index.txt
@@ -25,7 +25,7 @@ Our project uses the name to honour the ancient Greek mathematician.
 Overview
 ========
-**To get up & running quickly** see README_.
+**To get up and running quickly** see README_.
 All **documentation** can be reached from the `Theano Project Documentation Overview`_.

--- a/examples/tests/test_wiki.py
+++ b/examples/tests/test_wiki.py
+import unittest
+from theano import gof
+from theano import compile
+from theano.compile.function_module import *
+from theano.scalar import *
+import theano
+from theano import tensor
+from theano import tensor as T
+from theano.tensor import nnet as NN
+import random
+import numpy as N
+from theano.compile import module as M
+class RegressionLayer(M.Module):
+    def __init__(self, input = None, target = None, regularize = True):
+        super(RegressionLayer, self).__init__() #boilerplate
+        # MODEL CONFIGURATION
+        self.regularize = regularize
+        # ACQUIRE/MAKE INPUT AND TARGET
+        if not input:
+            input = T.matrix('input')
+        if not target:
+            target = T.matrix('target')
+        # HYPER-PARAMETERS
+        self.stepsize = M.Member(T.scalar())  # a stepsize for gradient descent
+        # PARAMETERS
+        self.w = M.Member(T.matrix())  #the linear transform to apply to our input points
+        self.b = M.Member(T.vector())  #a vector of biases, which make our transform affine instead of linear
+        # REGRESSION MODEL
+        self.activation = T.dot(input, self.w) + self.b
+        self.prediction = self.build_prediction()
+        # CLASSIFICATION COST
+        self.classification_cost = self.build_classification_cost(target)
+        # REGULARIZATION COST
+        self.regularization = self.build_regularization()
+        # TOTAL COST
+        self.cost = self.classification_cost
+        if self.regularize:
+            self.cost = self.cost + self.regularization
+        # GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
+        self.grad_w, self.grad_b = T.grad(self.cost, [self.w, self.b])
+        # INTERFACE METHODS
+        self.update = M.Method([input, target],
+                                  self.cost,
+                                  w = self.w - self.stepsize * self.grad_w,
+                                  b = self.b - self.stepsize * self.grad_b)
+        self.apply = M.Method(input, self.prediction)
+    def params(self):
+        return self.w, self.b
+    def _instance_initialize(self, obj, input_size = None, target_size = None, **init):
+        # obj is an "instance" of this module holding values for each member and
+        # functions for each method
+        #super(RegressionLayer, self).initialize(obj, **init)
+        # here we call the superclass's initialize method, which takes all the name: value
+        # pairs in init and sets the property with that name to the provided value
+        # this covers setting stepsize, l2_coef; w and b can be set that way too
+        if input_size and target_size:
+            # initialize w and b in a special way using input_size and target_size
+            sz = (input_size, target_size)
+            obj.w = N.random.uniform(size = sz, low = -0.5, high = 0.5)
+            obj.b = N.zeros(target_size)
+            obj.stepsize = 0.01
+    def build_regularization(self):
+        return T.zero() # no regularization!
+class SoftmaxXERegression(RegressionLayer):
+    """ XE mean cross entropy"""
+    def build_prediction(self):
+        return NN.softmax(self.activation)
+    def build_classification_cost(self, target):
+        #self.classification_cost_matrix = target * T.log(self.prediction) + (1 - target) * T.log(1 - self.prediction)
+        self.classification_cost_matrix = (target - self.prediction)**2
+        self.classification_costs = -T.sum(self.classification_cost_matrix, axis=1)
+        return T.sum(self.classification_costs)
+    def build_regularization(self):
+        self.l2_coef = M.Member(T.scalar()) # we can add a hyper parameter if we need to
+        return self.l2_coef * T.sum(self.w * self.w)
+class T_function_module(unittest.TestCase):
+    def test_Klass_basic_example1(self):
+        n, c = T.scalars('nc')
+        inc = theano.function([n, ((c, c + n), 0)], [])
+        dec = theano.function([n, ((c, c - n), inc.container[c])], []) # we need to pass inc's container in order to share
+        plus10 = theano.function([(c, inc.container[c])], c + 10)
+        assert inc[c] == 0
+        inc(2)
+        assert inc[c] == 2 and dec[c] == inc[c]
+        dec(3)
+        assert inc[c] == -1 and dec[c] == inc[c]
+        assert plus10() == 9
+    def test_Klass_basic_example2(self):
+        m = M.Module()
+        n = T.scalar('n')
+        m.c = M.Member(T.scalar()) # state variables must be wrapped with ModuleMember
+        m.inc = M.Method(n, [], c = m.c + n) # m.c <= m.c + n
+        m.dec = M.Method(n, [], c = m.c - n) # k.c <= k.c - n
+        m.plus10 = M.Method([], m.c + 10) # m.c is always accessible since it is a member of this mlass
+        inst = m.make(c = 0) # here, we make an "instance" of the module with c initialized to 0
+        assert inst.c == 0
+        inst.inc(2)
+        assert inst.c == 2
+        inst.dec(3)
+        assert inst.c == -1
+        assert inst.plus10() == 9
+    def test_Klass_nesting_example1(self):
+        def make_incdec_function():
+            n, c = T.scalars('nc')
+            inc = theano.function([n, ((c, c + n), 0)], [])
+            dec = theano.function([n, ((c, c - n), inc.container[c])], [])
+            return inc,dec
+        inc1, dec1 = make_incdec_function()
+        inc2, dec2 = make_incdec_function()
+        a, b = T.scalars('ab')
+        sum = theano.function([(a, inc1.container['c']), (b, inc2.container['c'])], a + b)
+        inc1(2)
+        dec1(4)
+        inc2(6)
+        assert inc1['c'] == -2 and inc2['c'] == 6
+        assert sum() == 4 # -2 + 6
+    def test_Klass_nesting_example2(self):
+        def make_incdec_module():
+            m = M.Module()
+            n = T.scalar('n')
+            m.c = M.Member(T.scalar()) # state variables must be wrapped with ModuleMember
+            m.inc = M.Method(n, [], c = m.c + n) # m.c <= m.c + n
+            m.dec = M.Method(n, [], c = m.c - n) # k.c <= k.c - n
+            return m
+        m = M.Module()
+        m.incdec1 = make_incdec_module()
+        m.incdec2 = make_incdec_module()
+        m.sum = M.Method([], m.incdec1.c + m.incdec2.c)
+        inst = m.make(incdec1 = dict(c=0), incdec2 = dict(c=0))
+        inst.incdec1.inc(2)
+        inst.incdec1.dec(4)
+        inst.incdec2.inc(6)
+        assert inst.incdec1.c == -2 and inst.incdec2.c == 6
+        assert inst.sum() == 4 # -2 + 6
+    def test_Klass_Advanced_example(self):
+        model_module = SoftmaxXERegression(regularize = False)
+        model = model_module.make(input_size = 10,
+                                  target_size = 1,
+                                  stepsize = 0.1)
+        data_x = N.random.randn(4, 10)
+        data_y = [ [int(x)] for x in N.random.randn(4) > 0]
+        print data_x
+        print
+        print data_y
+        for i in xrange(1000):
+            xe = model.update(data_x, data_y)
+            if i % 100 == 0:
+                print i, xe
+        #for inputs, targets in my_training_set():
+            #print "cost:", model.update(inputs, targets)
+        print "final weights:", model.w
+        print "final biases:", model.b
+        #print "some prediction:", model.prediction(some_inputs)
+    def test_Klass_extending_klass_methods(self):
+        model_module = SoftmaxXERegression(regularize = False)
+        model_module.sum = M.Member(T.scalar()) # we add a module member to hold the sum
+        model_module.update.extend(sum = model_module.sum + model_module.cost) # now update will also update sum!
+        model = model_module.make(input_size = 4,
+                                 target_size = 2,
+                                 stepsize = 0.1,
+                                 sum = 0) # we mustn't forget to initialize the sum
+        test = model.update([[0,0,1,0]], [[0,1]]) + model.update([[0,1,0,0]], [[1,0]])
+        assert model.sum == test
+        def make_incdec_function():
+            n, c = T.scalars('nc')
+            inc = theano.function([n, ((c, c + n), 0)], [])
+            dec = theano.function([n, ((c, c - n), inc.container[c])], [])
+            return inc,dec
+        inc1, dec1 = make_incdec_function()
+        inc2, dec2 = make_incdec_function()
+        a, b = T.scalars('ab')
+        sum = theano.function([(a, inc1.container['c']), (b, inc2.container['c'])], a + b)
+        inc1(2)
+        dec1(4)
+        inc2(6)
+        assert inc1['c'] == -2 and inc2['c'] == 6
+        assert sum() == 4 # -2 + 6
+    def test_Klass_basic_example2_more(self):
+        m = M.Module()
+        m2 = M.Module()
+        m2.name="m2" # for better error
+        #top level don't have name, but other have auto name.
+        n = T.scalar('n')
+        m.c = M.Member(T.scalar()) # state variables must be wrapped with ModuleMember
+        m2.c = M.Member(T.scalar()) # state variables must be wrapped with ModuleMember
+        m.dec = M.Method(n, [], c = m.c - n)
+        m.inc = M.Method(n, [], c = m.c + n) # m.c <= m.c + n
+#        m.inc = M.Method(n, [], c = c + n)#fail c not defined
+#syntax error
+#        m.inc = M.Method(n, [], m.c = m.c + n)#fail
+        m.inc = M.Method(n, [], updates={m.c: m.c + n})
+#        m.inc = M.Method(n, [], updates={c: m.c + n})#fail with NameError
+#        m.inc = M.Method(n, [], updates={m.c: c + n})#fail with NameError
+#        m.inc = M.Method(n, [], updates={c: c + n})#fail with NameError
+        m.inc = M.Method(n, [], updates={m.c: m2.c + n})#work! should be allowed?
+        a = M.Module()
+        a.m1 = m
+        a.m2 = m2
+        a.make()#should work.
+#        self.assertRaises(m.make(c = 0), Error)
+        m.inc = M.Method(n, [], updates={m2.c: m.c + n})#work! should be allowed?
+#        self.assertRaises(m.make(c = 0), Error)
+#        m.inc = M.Method(n, [], updates={m2.c: m2.c + n})#work! should be allowed?
+#        self.assertRaises(m.make(c = 0), Error)
+if __name__ == '__main__':
+    if 0:
+        unittest.main()
+    elif 1:
+        module = __import__("test_wiki")
+        tests = unittest.TestLoader().loadTestsFromModule(module)
+        tests.debug()
+    else:
+        testcases = []
+        testcases.append(T_function_module)
+        #<testsuite boilerplate>
+        testloader = unittest.TestLoader()
+        suite = unittest.TestSuite()
+        for testcase in testcases:
+            suite.addTest(testloader.loadTestsFromTestCase(testcase))
+        unittest.TextTestRunner(verbosity=2).run(suite)
+        #</boilerplate>
--- a/theano/__init__.py
+++ b/theano/__init__.py
@@ -44,7 +44,9 @@ from compile import \
    predefined_modes, predefined_linkers, predefined_optimizers, \
    FunctionMaker, function, OpFromGraph, \
    Component, External, Member, KitComponent, Method, \
-    Composite, ComponentList, Module, FancyModule
+    Composite, ComponentList, ComponentDict, Module
+FancyModule = Module
 from printing import \
    pprint, pp

--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -19,6 +19,8 @@ def infer_reuse_pattern(env, outputs_to_disown):
    Given an env and a list of results, returns the list of all
    results which may share the same underlying data storage as any of
    the specified results. Used internally by function, FunctionMaker.
+    This list is also refered to as no_recycling sometimes.
    """
    do_not_reuse = list()
    seen = set()
@@ -130,14 +132,14 @@ class Function(object):
            input.distribute(value, indices, cs)
            for c in cs:
                c.provided += 1
-        def set(c, v):
+        def assign(c, v):
            c.data = v
        setters = []
        # Initialize the storage
        for i, ((input, indices, sinputs), (required, refeed, value)) in enumerate(zip(self.indices, defaults)):
            if indices is None: # this is true iff input is not a SymbolicInputKit
-                c = containers[0]
+                c = containers[0]  #containers is being used as a stack. Here we pop off the next one.
                if input.strict:
                    c.strict = True
                if value is not None:
@@ -155,7 +157,7 @@ class Function(object):
                finder[input.name] = c if input.name not in finder else DUPLICATE
                # inv_finder maps the container to the input (useful for one error message)
                inv_finder[c] = input
-                setters.append(partial(set, c))
+                setters.append(partial(assign, c))
                containers[:1] = []
            else:
                # The input is a SymbolicInputKit, so we take as many containers as the Kit provides inputs
@@ -440,11 +442,12 @@ class FunctionMaker(object):
            raise ValueError("'linker' parameter of FunctionFactory should be a Linker with an accept method " \
                             "or one of %s" % predefined_linkers.keys())
+        #the 'no_borrow' outputs are the ones for which that we can't return the internal storage pointer.
        no_borrow = [output for output, spec in zip(env.outputs, outputs+additional_outputs) if not spec.borrow]
-        if not no_borrow:
+        if no_borrow:
-            self.linker = linker.accept(env)
-        else:
            self.linker = linker.accept(env, no_recycling = infer_reuse_pattern(env, no_borrow))
+        else:
+            self.linker = linker.accept(env)
        self.indices = indices
        self.inputs = inputs
@@ -592,10 +595,10 @@ def function(inputs, outputs, mode='FAST_RUN', accept_inplace = False):
      The return value of the returned function will match the format of this
      argument (either the value itself or a list of one or more return values)
    mode -> a descriptive string or a Mode instance; descriptive strings can be one of:
-      * SANITY_CHECK
+      * SANITY_CHECK TODO: NotImplemented
-      * FAST_COMPILE
+      * FAST_COMPILE (apply only optimization that are fast to apply)
-      * FAST_RUN (default)
+      * FAST_RUN (default) (optimize without too much time)
-      * EXPENSIVE_OPTIMIZATION
+      * EXPENSIVE_OPTIMIZATION TODO: NotImplemented
    accept_inplace -> True iff the graph can contain inplace operations
      prior to the optimization phase (default is False)

--- a/theano/compile/module.py
+++ b/theano/compile/module.py
@@ -10,34 +10,73 @@ import function_module as F
 def join(*args):
+    """
+    Creates a string representation for the given names:
+    join('a', 'b', 'c') => 'a.b.c'
+    """
    return ".".join(arg for arg in args if arg)
 def split(sym, n=-1):
+    """
+    Gets the names from their joined representation
+    split('a.b.c') => 'a', 'b', 'c'
+    Returns the n first names, if n==-1 returns all of them.
+    """
    return sym.split('.', n)
 def canonicalize(name):
+    """
+    Splits the name and converts each name to the
+    right type (e.g. "2" -> 2)
+    """
    if isinstance(name, str):
        name = split(name)
    def convert(x):
        try:
            return int(x)
-        except ValueError:
+        except (ValueError, TypeError):
            return x
    return map(convert, name)
 class AllocationError(Exception):
+    """
+    Exception raised when a Result has no associated storage.
+    """
    pass
 class BindError(Exception):
+    """
+    Exception raised when a Component is already bound and we try to
+    bound it again.
+    """
    pass
 class Component(object):
+    """
+    Base class for the various kinds of components which are not
+    structural but may be meaningfully used in structures (Member,
+    Method, etc.)
+    """
    def __init__(self):
        self.__dict__['_name'] = ''
        self.__dict__['parent'] = None
    def bind(self, parent, name, dup_ok=True):
+        """
+        Marks this component as belonging to the parent (the parent is
+        typically a Composite instance). The component can be accessed
+        through the parent with the specified name. If dup_ok is True
+        and that this Component is already bound, a duplicate of the
+        component will be made using the dup() method and the
+        duplicate will be bound instead of this Component. If dup_ok
+        is False and this Component is already bound, a BindError wil
+        be raised.
+        bind() returns the Component instance which has been bound to
+        the parent. For an unbound instance, this will usually be
+        self.
+        """
        if self.bound():
            if dup_ok:
                try:
@@ -54,21 +93,48 @@ class Component(object):
        return self
    def bound(self):
+        """
+        Returns True if this Component instance is bound to a
+        Composite.
+        """
        return self.parent is not None
    def allocate(self, memo):
+        """
+        Populates the memo dictionary with Result -> Container
+        pairings.
+        """
        raise NotImplementedError
    def build(self, mode, memo):
+        """
+        Makes an instance of this Component using the mode provided
+        and taking the containers in the memo dictionary.
+        A Component which builds nothing may return None.
+        """
        raise NotImplementedError
    def make_no_init(self, mode='FAST_COMPILE'):
+        """
+        Allocates the necessary containers using allocate() and uses
+        build() with the provided mode to make an instance which will
+        be returned.  The initialize() method of the instance will not
+        be called.
+        """
        memo = {}
        self.allocate(memo)
        rval = self.build(mode, memo)
        return rval
    def make(self, *args, **kwargs):
+        """
+        Allocates the necessary containers using allocate() and uses
+        build() to make an instance which will be returned. The
+        initialize() method of the instance will be called with the
+        arguments and the keyword arguments. If 'mode' is in the
+        keyword arguments it will be passed to build().
+        """
        mode = kwargs.pop('mode', 'FAST_COMPILE')
        rval = self.make_no_init(mode)
        if hasattr(rval, 'initialize'):
@@ -82,20 +148,34 @@ class Component(object):
        return self.__class__.__name__
    def pretty(self, **kwargs):
+        """
+        Returns a pretty representation of this Component, suitable
+        for reading.
+        """
        raise NotImplementedError
    def __get_name__(self):
+        """
+        Getter for self.name
+        """
        return self._name
    def __set_name__(self, name):
+        """
+        Setter for self.name
+        """
        self._name = name
    name = property(lambda self: self.__get_name__(),
-                    lambda self, value: self.__set_name__(value))
+                    lambda self, value: self.__set_name__(value),
+                    "Contains the name of this Component")
 class _RComponent(Component):
+    """
+    Base class for a Component wrapping a Result. For internal use.
+    """
    def __init__(self, r):
        super(_RComponent, self).__init__()
@@ -119,12 +199,19 @@ class _RComponent(Component):
 class External(_RComponent):
+    """
+    External represents a Result which comes from somewhere else
+    (another module) or is a temporary calculation.
+    """
    def allocate(self, memo):
        # nothing to allocate
        return None
    def build(self, mode, memo):
+        """
+        Builds nothing.
+        """
        return None
    def pretty(self, **kwargs):
@@ -136,8 +223,19 @@ class External(_RComponent):
 class Member(_RComponent):
+    """
+    Member represents a Result which is a state of a Composite. That
+    Result will be accessible from a built Composite and it is
+    possible to do updates on Members.
+    Member builds a gof.Container.
+    """
    def allocate(self, memo):
+        """
+        If the memo does not have a Container associated to this
+        Member's Result, instantiates one and sets it in the memo.
+        """
        r = self.r
        if memo and r in memo:
            return memo[r]
@@ -146,6 +244,9 @@ class Member(_RComponent):
        return rval
    def build(self, mode, memo):
+        """
+        Returns the Container associated to this Member's Result.
+        """
        return memo[self.r]
@@ -153,6 +254,20 @@ class Member(_RComponent):
 class Method(Component):
    def __init__(self, inputs, outputs, updates = {}, kits = [], **kwupdates):
+        """
+        Method is a declaration of a function. It contains inputs,
+        outputs, updates and kits. If the Method is part of a
+        Composite which holds references to Members, the Method may
+        use them without declaring them in the inputs, outputs or
+        updates list.
+        inputs, outputs or updates may be strings. In that case, they
+        will be resolved in the Composite which is the parent of this
+        Method.
+        Method builds a Function (same structure as a call to
+        theano.function)
+        """
        super(Method, self).__init__()
        self.inputs = inputs
        self.outputs = outputs
@@ -165,6 +280,9 @@ class Method(Component):
        return rval
    def resolve(self, name):
+        """
+        Resolves the name of an input or output in the parent.
+        """
        if not self.bound():
            raise ValueError('Trying to resolve a name on an unbound Method.')
        result = self.parent.resolve(name)
@@ -175,16 +293,23 @@ class Method(Component):
    def resolve_result(self, x):
        if isinstance(x, gof.Result):
            return x
+        elif isinstance(x, _RComponent):
+            return x.r
        else:
            return self.resolve(x).r
    def resolve_all(self):
-        if not isinstance(self.inputs, (list, tuple)):
+        """
+        Resolves all inputs, outputs and updates that were given as
+        strings so that the fields contain the corresponding Result
+        instances instead.
+        """
+        if isinstance(self.inputs, (gof.Result, str)):
            inputs = [self.inputs]
        else:
-            inputs = self.inputs
+            inputs = list(self.inputs)
        self.inputs = [self.resolve_result(input) for input in inputs]
-        if isinstance(self.outputs, (list, tuple)):
+        if isinstance(self.outputs, (list, tuple, ComponentList)):
            self.outputs = [self.resolve_result(output) for output in self.outputs]
        else:
            self.outputs = self.resolve_result(self.outputs)
@@ -195,11 +320,22 @@ class Method(Component):
            self.updates[k] = v
    def allocate(self, memo):
+        """
+        Method allocates nothing.
+        """
        return None
    def build(self, mode, memo, allocate_all = False):
-        self.resolve_all()
+        """
+        Produces a function. If allocate_all is True, storage will be
+        allocated for all needed Results, even if there is no
+        associated storage for them in the memo. If allocate_all is
+        False, storage will only be allocated for Results that are
+        reachable from the inputs list.
+        """
+        self.resolve_all() # resolve all so we don't have to mess with strings
        def get_storage(r, require = False):
+            # If require is True, we can only get storage from the memo.
            try:
                return memo[r]
            except KeyError:
@@ -209,11 +345,13 @@ class Method(Component):
                                          ' enclosing module or of one of its submodules.' % (r, self))
                else:
                    return gof.Container(r, storage = [None])
+        # Wrap the inputs in In instances.
        inputs = self.inputs
        inputs = [io.In(result = input,
                        value = get_storage(input),
                        mutable = False)
                  for input in inputs]
+        # Add the members to update to the inputs.
        inputs += [io.In(result = k,
                         update = v,
                         value = get_storage(k, not allocate_all),
@@ -222,13 +360,20 @@ class Method(Component):
                   for k, v in self.updates.iteritems()]
        outputs = self.outputs
        _inputs = [x.result for x in inputs]
+        # Grab the results that are not accessible from either the inputs or the updates.
        for input in gof.graph.inputs((list(outputs) if isinstance(outputs, (list, tuple)) else [outputs])
                                      + [x.update for x in inputs if getattr(x, 'update', False)],
                                      blockers = _inputs):
            if input not in _inputs and not isinstance(input, gof.Value):
+                # Add this input to the inputs; we require that storage already exists for them,
+                # but otherwise they are immutable.
                inputs += [io.In(result = input,
                                 value = get_storage(input, not allocate_all),
                                 mutable = False)]
+        # Add the kits to the input. The kit should be associated in
+        # memo to a list of Containers. theano.function handles that
+        # case by picking only the needed Containers from the list, so
+        # here we can just delegate to theano.function.
        inputs += [(kit, get_storage(kit, not allocate_all)) for kit in self.kits]
        return F.function(inputs, outputs, mode)
@@ -238,8 +383,10 @@ class Method(Component):
            rval = 'inputs: %s\n' % ", ".join(map(str, self.inputs))
        else:
            rval = ''
-        mode = kwargs.pop('mode', None)
        inputs, outputs, updates = self.inputs, self.outputs if isinstance(self.outputs, (list, tuple)) else [self.outputs], self.updates
+        # If mode is in kwargs, prints the optimized version of the method
+        mode = kwargs.pop('mode', None)
        if mode:
            f = self.build(mode, {}, True)
            einputs, eoutputs = f.maker.env.inputs, f.maker.env.outputs
@@ -282,13 +429,21 @@ class Method(Component):
 class CompositeInstance(object):
+    """
+    Generic type which various Composite subclasses are intended to
+    build.
+    """
    def __init__(self, component, __items__):
+        # The Component that built this CompositeInstance
        self.__dict__['component'] = component
+        # Some data structure indexable using []
        self.__dict__['__items__'] = __items__
    def __getitem__(self, item):
        x = self.__items__[item]
+        # For practical reasons, if the item is a Container, we
+        # return its contents.
        if isinstance(x, gof.Container):
            return x.value
        return x
@@ -296,14 +451,20 @@ class CompositeInstance(object):
    def __setitem__(self, item, value):
        x = self.__items__[item]
        if isinstance(x, gof.Container):
+            # If the item is a Container, we set its value
            x.value = value
        elif hasattr(x, 'initialize'):
+            # If the item has an initialize() method, we use
+            # it with the value as argument
            x.initialize(value)
        else:
            ##self.__items__[item] = value
            raise KeyError('Cannot set item %s' % item)
 class Composite(Component):
+    """
+    Composite represents a structure that contains Components.
+    """
    def resolve(self, name):
        raise NotImplementedError
@@ -321,6 +482,12 @@ class Composite(Component):
        raise NotImplementedError
    def flat_components(self, include_self = False):
+        """
+        Generator that yields each component in a flattened hierarchy
+        of composites and components. If include_self is True, the
+        list will include the Composite instances, else it will only
+        yield the list of leaves.
+        """
        if include_self:
            yield self
        for component in self.components():
@@ -331,6 +498,15 @@ class Composite(Component):
                yield component
    def flat_components_map(self, include_self = False, path = []):
+        """
+        Generator that yields (path, component) pairs in a flattened
+        hierarchy of composites and components, where path is a
+        sequence of keys such that
+          component is self[path[0]][path[1]]...
+        If include_self is True, the list will include the Composite
+        instances, else it will only yield the list of leaves.
+        """
        if include_self:
            yield path, self
        for name, component in self.components_map():
@@ -342,22 +518,33 @@ class Composite(Component):
                yield path2, component
    def allocate(self, memo):
+        """
+        Does allocation for each component in the composite.
+        """
        for member in self.components():
            member.allocate(memo)
    def get(self, item):
+        """
+        Get the Component associated to the key.
+        """
        raise NotImplementedError
    def set(self, item, value):
+        """
+        Set the Component associated to the key.
+        """
        raise NotImplementedError
    def __getitem__(self, item):
+        # Uses get() internally
        x = self.get(item)
        if isinstance(x, (External, Member)):
            return x.r
        return x
    def __setitem__(self, item, value):
+        # Uses set() internally
        self.set(item, value)
    def __iter__(self):
@@ -378,6 +565,10 @@ class ComponentListInstance(CompositeInstance):
            self[i] = initv
 class ComponentList(Composite):
+    """
+    ComponentList represents a sequence of Component. It builds a
+    ComponentListInstance.
+    """
    def __init__(self, *_components):
        super(ComponentList, self).__init__()
@@ -388,6 +579,9 @@ class ComponentList(Composite):
            self.append(c)
    def resolve(self, name):
+        # resolves # to the #th number in the list
+        # resolves name string to parent.resolve(name)
+        # TODO: eliminate canonicalize
        name = canonicalize(name)
        try:
            item = self.get(name[0])
@@ -397,6 +591,7 @@ class ComponentList(Composite):
                raise TypeError('Cannot resolve a non-integer name on an unbound ComponentList.')
            return self.parent.resolve(name)
        if len(name) > 1:
+            # TODO: eliminate
            return item.resolve(name[1:])
        return item
@@ -469,13 +664,18 @@ class ComponentList(Composite):
        return self.__class__(*[c.dup() for c in self._components])
-class ModuleInstance(CompositeInstance):
+class ComponentDictInstance(CompositeInstance):
+    """
+    ComponentDictInstance is meant to be instantiated by ComponentDict.
+    """
    def __setitem__(self, item, value):
        if item not in self.__items__:
+            # Set it if it's not there
+            # TODO: is this needed here? move to ModuleInstance?
            self.__items__[item] = value
            return
-        super(ModuleInstance, self).__setitem__(item, value)
+        super(ComponentDictInstance, self).__setitem__(item, value)
    def __str__(self):
        strings = []
@@ -488,11 +688,11 @@ class ModuleInstance(CompositeInstance):
        return '{%s}' % '\n'.join(strings).replace('\n', '\n ')
-class Module(Composite):
+class ComponentDict(Composite):
-    InstanceType = ModuleInstance
+    InstanceType = ComponentDictInstance # Type used by build() to make the instance
    def __init__(self, components = {}, **kwcomponents):
-        super(Module, self).__init__()
+        super(ComponentDict, self).__init__()
        components = dict(components, **kwcomponents)
        self.__dict__['_components'] = components
@@ -522,7 +722,7 @@ class Module(Composite):
    def set(self, item, value):
        if not isinstance(value, Component):
-            raise TypeError('Module may only contain Components.', value, type(value))
+            raise TypeError('ComponentDict may only contain Components.', value, type(value))
        value = value.bind(self, item)
        self._components[item] = value
@@ -530,7 +730,7 @@ class Module(Composite):
        cr = '\n    ' #if header else '\n'
        strings = []
 #         if header:
-#             rval += "Module:"
+#             rval += "ComponentDict:"
        for name, component in self.components_map():
            if name.startswith('_'):
                continue
@@ -539,10 +739,10 @@ class Module(Composite):
        return '\n'.join(strings)
    def __str__(self):
-        return "Module(%s)" % ', '.join(x for x in sorted(map(str, self._components)) if x[0] != '_')
+        return "ComponentDict(%s)" % ', '.join(x for x in sorted(map(str, self._components)) if x[0] != '_')
    def __set_name__(self, name):
-        super(Module, self).__set_name__(name)
+        super(ComponentDict, self).__set_name__(name)
        for mname, member in self._components.iteritems():
            member.name = '%s.%s' % (name, mname)
@@ -556,6 +756,10 @@ def register_wrapper(condition, wrapper):
    __autowrappers.append((condition, wrapper))
 def wrap(x):
+    """
+    Wraps x in a Component. Wrappers can be registered using
+    register_wrapper to allow wrapping more types.
+    """
    if isinstance(x, Component):
        return x
    for condition, wrapper in __autowrappers:
@@ -563,12 +767,15 @@ def wrap(x):
            return wrapper(x)
    return x
+# Result -> External
 register_wrapper(lambda x: isinstance(x, gof.Result),
                 lambda x: External(x))
+# [Component1, Component2, ...] -> ComponentList(Component1, Component2, ...)
 register_wrapper(lambda x: isinstance(x, (list, tuple)) and all(isinstance(r, Component) for r in x),
                 lambda x: ComponentList(*x))
+# [Result1, Result2, ...] -> ComponentList(Member(Result1), Member(Result2), ...)
 register_wrapper(lambda x: isinstance(x, (list, tuple)) \
                     and all(isinstance(r, gof.Result) and not r.owner for r in x),
                 lambda x: ComponentList(*map(Member, x)))
@@ -589,7 +796,14 @@ class Curry:
        self.meth = getattr(self.obj, self.name)
-class FancyModuleInstance(ModuleInstance):
+class ModuleInstance(ComponentDictInstance):
+    """
+    ModuleInstance is meant to be instantiated by Module. This differs
+    from ComponentDictInstance on a key point, which is that getattr
+    does a similar thing to getitem.
+    ModuleInstance is compatible for use as ComponentDict.InstanceType.
+    """
    def __getattr__(self, attr):
        if attr == '__items__' and '__items__' not in self.__dict__:
@@ -605,10 +819,14 @@ class FancyModuleInstance(ModuleInstance):
        except KeyError:
            self.__dict__[attr] = value
-class FancyModule(Module):
+class Module(ComponentDict):
-    InstanceType = FancyModuleInstance
+    InstanceType = ModuleInstance # By default, we use build ModuleInstance
    def __wrapper__(self, x):
+        """
+        This function is called whenever x is set as an attribute of
+        the Module.
+        """
        return wrap(x)
    def __getattr__(self, attr):
@@ -619,6 +837,8 @@ class FancyModule(Module):
        except KeyError:
            raise AttributeError('%s has no %s attribute.' % (self.__class__, attr))
        if isinstance(rval, (External, Member)):
+            # Special treatment for External and Member, so that
+            # the user may use them to build graphs more easily.
            return rval.r
        return rval
@@ -640,25 +860,40 @@ class FancyModule(Module):
                self.__dict__[attr] = value
    def build(self, mode, memo):
-        inst = super(FancyModule, self).build(mode, memo)
+        inst = super(Module, self).build(mode, memo)
        for method in dir(self):
+            # Any method with a name like '_instance_XXX' is added to
+            # the object built under the name obj.XXX
            if method.startswith('_instance_'):
                setattr(inst, method[10:], Curry(self, method, inst))
        return inst
    def _instance_initialize(self, inst, init = {}, **kwinit):
+        """
+        Default initialization method.
+        """
        for name, value in chain(init.iteritems(), kwinit.iteritems()):
            inst[name] = value
+FancyModule = Module
+FancyModuleInstance = ModuleInstance
 class KitComponent(Component):
+    """
+    Represents a SymbolicInputKit (see io.py).
+    """
    def __init__(self, kit):
        super(KitComponent, self).__init__()
        self.kit = kit
    def allocate(self, memo):
+        """
+        Allocates a Container for each input in the kit. Sets a key in
+        the memo that maps the SymbolicInputKit to the list of
+        Containers.
+        """
        kit = self.kit
        if kit in memo:
            return memo[kit]

--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -756,7 +756,7 @@ class OpWiseCLinker(link.LocalLinker):
    no_recycling can contain a list of Results that belong to the env.
    If a Result is in no_recycling, CLinker will clear the output storage
-    associated to it during the computation (to avoid reusing it).
+    associated to it prior to computation (to avoid reusing it).
    """
    __cache__ = {}

--- a/theano/gof/link.py
+++ b/theano/gof/link.py
@@ -131,7 +131,7 @@ class Container(object):
            self.type = r
        else:
            self.type = r.type
-        self.name = name or r.name
+        self.name = r.name if name is None else name
        self.storage = storage
        self.readonly = readonly
        self.strict = strict
@@ -149,7 +149,7 @@ class Container(object):
            else:
                self.storage[0] = self.type.filter(value)
        except Exception, e:
-            e.args = e.args + (self.name,)
+            e.args = e.args + (('Container name "%s"' % self.name),)
            raise
    data = property(__get, __set)
    value = property(__get, __set)
@@ -160,11 +160,33 @@ class Container(object):
 def map_storage(env, order, input_storage, output_storage):
-    """WRITEME"""
+    """Ensure there is storage for inputs, outputs, and interior nodes.
+    :param env: The current env.  This function uses the inputs and outputs attributes.
+    :param order: an iterable over Apply instances (in program running order)
+    :param input_storage: None or existing input storage (see below)
+    :param output_storage: None or existing output storage (see below)
+    :rtype: 3-tuple
+    :returns: (list of storage for inputs, list of storage for outputs, and the `storage_map`)
+    This function iterates over the nodes in `order` and ensures that for every
+    input and output `Result`, there is a unique storage container.  This is
+    returned as a dictionary Result->storage called the `storage_map`.
+    This function also returns `input_storage` which is a list of storages corresponding to env.inputs.
+    This function also returns `output_storage` which is a list of storages corresponding to env.outputs.
+    """
+    #each Apply argument's data is stored in a list of length 1 (these lists act like pointers)
+    # input_storage is a list of data-containers for the inputs.
    if input_storage is None:
        input_storage = [[None] for input in env.inputs]
    else:
        assert len(env.inputs) == len(input_storage)
    storage_map = {}
    for r, storage in zip(env.inputs, input_storage):
        storage_map[r] = storage
@@ -172,10 +194,12 @@ def map_storage(env, order, input_storage, output_storage):
 #         if not isinstance(orphan, Constant):
 #             raise TypeError("Cannot link a graph with non-constant orphans.", orphan)
 #         storage_map[orphan] = [orphan.data]
    if output_storage is not None:
        assert len(env.outputs) == len(output_storage)
        for r, storage in zip(env.outputs, output_storage):
            storage_map[r] = storage
    thunks = []
    for node in order:
        for r in node.inputs:
@@ -193,13 +217,16 @@ def map_storage(env, order, input_storage, output_storage):
    return input_storage, output_storage, storage_map
+def clear_storage_thunk(stg):
+    """This is useful for inserting thunks that zero-out storage, which allows memory to be freed by gc."""
+    def thunk():
+        stg[0] = None
+    thunk.outputs = []
+    thunk.inputs = [stg]
+    return thunk
 def streamline(env, thunks, order, no_recycling = [], profiler = None):
    """WRITEME"""
-    def clear():
-        for thunk in thunks:
-            for output in thunk.outputs:
-                output[0] = None
    if profiler is None:
        def f():
            for x in no_recycling:
@@ -218,7 +245,6 @@ def streamline(env, thunks, order, no_recycling = [], profiler = None):
                    profiler.profile_node(thunk, node)
            profiler.profile_env(g, env)
        f.profiler = profiler
-    f.clear = clear
    return f
 class LocalLinker(Linker):
@@ -246,14 +272,24 @@ class LocalLinker(Linker):
 class PerformLinker(LocalLinker):
    """WRITEME
    Basic L{Linker} subclass that calls the perform method on each L{Op} in
    the L{Env} in the order given by L{Env.toposort}.
    """
-    def __init__(self):
+    def __init__(self, allow_gc=False):
+        #TODO: set allow_gc = True by default, when it works with the c&py linker
        self.env = None
+        self.allow_gc = allow_gc
    def accept(self, env, no_recycling = []):
+        """
+        :param env: a PerformLinker can have accepted one Env instance at a time.
+        :param no_recycling: WRITEME
+        :returns: self (TODO: WHY? Who calls this function?)
+        """
        if self.env is not None and self.env is not env:
            return type(self)().accept(env, no_recycling)
            #raise Exception("Cannot accept from a Linker that is already tied to another Env.")
@@ -262,33 +298,74 @@ class PerformLinker(LocalLinker):
        return self
    def make_all(self, profiler = None, input_storage = None, output_storage = None):
+        """
+        :param profiler: WRITEME
+        :param input_storage: WRITEME
+        :param output_storage: WRITEME
+        :returns: function to run all nodes, list of input containers, list of output containers, list of thunks (for all of program), list of nodes (for all of program)
+        """
        env = self.env
        order = env.toposort()
        no_recycling = self.no_recycling
        thunks = []
+        new_order = []
        input_storage, output_storage, storage_map = map_storage(env, order, input_storage, output_storage)
+        #for freeing memory
+        if self.allow_gc:
+            last_user = {}
+            computed = set()
+            for node in order:
+                for idx, input in enumerate(node.inputs):
+                    last_user[input] = (node, idx)
+                for output in node.outputs:
+                    computed.add(output)
        for node in order:
            node_input_storage = tuple(storage_map[input] for input in node.inputs)
            node_output_storage = tuple(storage_map[output] for output in node.outputs)
            p = node.op.perform
+            # Thunk is meant to be called without arguments.
+            # The arguments are given in the lambda expression so that they are saved in the lambda expression.
+            # Using the closure in a simple way didn't work.
            thunk = lambda p = p, i = node_input_storage, o = node_output_storage, n = node: p(n, [x[0] for x in i], o)
            thunk.inputs = node_input_storage
            thunk.outputs = node_output_storage
            thunk.perform = p
            thunks.append(thunk)
+            new_order.append(node)
+            if self.allow_gc:
+                for idx, input in enumerate(node.inputs):
+                    if input not in computed:
+                        continue
+                    if input in env.outputs:
+                        continue
+                    if (node, idx) == last_user[input]:
+                        #print '... zeroing', id(storage_map[input])
+                        thunks.append(clear_storage_thunk(storage_map[input]))
+                        new_order.append(node)
        if no_recycling is True: 
+            #True is like some special code for *everything*.
+            #FunctionMaker always passes a list I think   -JB
            no_recycling = storage_map.values()
            no_recycling = utils.difference(no_recycling, input_storage)
        else:
            no_recycling = [storage_map[r] for r in no_recycling if r not in env.inputs]
-        f = streamline(env, thunks, order, no_recycling = no_recycling, profiler = profiler)
+        # The function that actually runs your program is one of the f's in streamline.
+        f = streamline(env, thunks, new_order, no_recycling = no_recycling, profiler = profiler)
        return f, [Container(input, storage) for input, storage in zip(env.inputs, input_storage)], \
            [Container(output, storage, True) for output, storage in zip(env.outputs, output_storage)], \
-            thunks, order
+            thunks, new_order

--- a/theano/gof/tests/test_link.py
+++ b/theano/gof/tests/test_link.py
@@ -133,7 +133,7 @@ class TestWrapLinker:
        x, y, z = inputs()
        e = mul(add(x, y), div(x, y))
-        fn, i, o = wrap_linker(Env([x, y, z], [e]), [PerformLinker()], wrap).make_thunk()
+        fn, i, o = wrap_linker(Env([x, y, z], [e]), [PerformLinker(allow_gc=False)], wrap).make_thunk()
        i[0].data = 1
        i[1].data = 2
        fn()
@@ -148,7 +148,7 @@ class TestWrapLinker:
        x, y, z = inputs()
        e = mul(add(x, y), div(x, y))
-        fn, i, o = wrap_linker(Env([x, y, z], [e]), [PerformLinker()], wrap).make_thunk()
+        fn, i, o = wrap_linker(Env([x, y, z], [e]), [PerformLinker(allow_gc=False)], wrap).make_thunk()
        i[0].data = 1
        i[1].data = 2
        fn()

--- a/theano/printing.py
+++ b/theano/printing.py
+"""Pretty-printing graphs, and the 'Print' Op.
+"""
 import gof
 from copy import copy
 import sys
+from gof import Op, Apply
+class Print(Op):
+    """This identity-like Op has the side effect of printing a message followed by its inputs
+    when it runs.
+    """
+    def __init__(self,message=""):
+        self.message=message
+        self.view_map={0:[0]}
+    def make_node(self,xin):
+        xout = xin.type.make_result()
+        return Apply(op = self, inputs = [xin], outputs=[xout])
+    def perform(self,node,inputs,output_storage):
+        xin, = inputs
+        xout, = output_storage
+        xout[0] = xin
+        print self.message,xin
+    def grad(self,input,output_gradients):
+        return output_gradients
 class PrinterState(gof.utils.scratchpad):
@@ -232,3 +255,4 @@ pprint.assign(lambda pstate, r: hasattr(pstate, 'target') and pstate.target is n
 pp = pprint
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -21,7 +21,7 @@ from .. import scalar as scal
 from ..gof.python25 import partial
 from .. import compile, printing
-from ..printing import pprint
+from ..printing import pprint, Print
 ### set up the external interface
@@ -457,9 +457,10 @@ class _tensor_py_operators:
    def __neg__(self): return neg(self)
    #CASTS 
-    def __int__(self): return AsInt(self).out
+    #### REMOVED THESE BECAUSE PYTHON appears to require __int__ to return an int. -JB 20081112
-    def __float__(self): return AsInt(self).out
+    #def __int__(self): return convert_to_int32(self)
-    def __complex__(self): return AsComplex(self).out
+    #def __float__(self): return convert_to_float64(self)
+    #def __complex__(self): return convert_to_complex128(self)
    #COMPARISONS
    def __lt__(self,other): return lt(self, other)
@@ -712,7 +713,7 @@ class Shape(Op):
        x = as_tensor(x)
        return Apply(self, [x], [lvector()])
    def perform(self, node, (x, ), (out, )):
-        out[0] = numpy.asarray(x.shape)
+        out[0] = numpy.asarray(x.shape, dtype = 'int64')
    def grad(self, (x,), (gz,)):
        return [None]
 @_redefine_asRoutine(Shape())
@@ -1012,6 +1013,10 @@ pprint.assign(Sum(), printing.FunctionPrinter('sum'))
 @constructor
 def mean(input, axis = None):
    """WRITEME"""
+    if str(input.dtype).startswith('int'):
+        # we need to cast eventually anyway, and this helps
+        # to prevents overflow
+        input = convert_to_float64(input)
    s = sum(input, axis)
    shp = shape(input)
    if axis is None:
@@ -1554,6 +1559,11 @@ def shape_padleft(tensor, n_ones):
    pattern = ['x']*n_ones + [i for i in range(tensor.type.ndim)]
    return DimShuffle(tensor.broadcastable, pattern)(tensor)
+@constructor
+def rightpad_shape(tensor, n_ones):
+    """Reshape `tensor` by right-padding the shape with `n_ones` 1s"""
+    pattern = [i for i in range(tensor.type.ndim)] + ['x']*n_ones
+    return DimShuffle(tensor.broadcastable, pattern)(tensor)
 @constructor
 def shape_padright(tensor, n_ones):
@@ -2210,7 +2220,7 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0
        t_r = as_tensor(random_projection)
        #random projection of o onto t_r
-        cost = sum(t_r * o_output)
+        cost = sum(t_r * o_output)  #This sum() is defined above, it's not the builtin sum.
        cost_fn = function(tensor_pt, cost)
        num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps)

--- a/theano/tensor/nnet.py
+++ b/theano/tensor/nnet.py
@@ -125,7 +125,7 @@ class SoftmaxWithBias(gof.Op):
        return dx, db
    def c_headers(self):
-        return ['<iostream>']
+        return ['<iostream> <math>']
    @staticmethod
    def c_code_template():
@@ -214,7 +214,7 @@ class SoftmaxWithBias(gof.Op):
                sum += sm_ij;
                sm_i[j * Ssm] = sm_ij;
            }
-            if ( (0.0 == sum) || (isinf(sum)))
+            if ( (0.0 == sum) || (std::isinf(sum)))
            {
                //that was our best...
                %(fail)s;

--- a/theano/tensor/raw_random.py
+++ b/theano/tensor/raw_random.py
@@ -55,8 +55,9 @@ class RandomFunction(gof.Op):
            r = copy(r)
        rout[0] = r
        rval = self.fn(r, *(args + [shape]))
-        if not isinstance(rval, numpy.ndarray):
+        if not isinstance(rval, numpy.ndarray) \
-            out[0] = numpy.asarray(rval, dtype = node.outputs[0].type.dtype)
+               or str(rval.dtype) != node.outputs[1].type.dtype:
+            out[0] = numpy.asarray(rval, dtype = node.outputs[1].type.dtype)
        else:
            out[0] = rval
@@ -237,7 +238,7 @@ class RandomKit(SymbolicInputKit):
 rk = RandomKit('rk', 0xBAD5EED)
-class RModule(compile.FancyModule):
+class RModule(compile.Module):
    def __init__(self, components = {}, **kwcomponents):
        super(RModule, self).__init__(components, **kwcomponents)

--- a/theano/tensor/tests/_test_xlogx.py
+++ b/theano/tensor/tests/_test_xlogx.py
+from xlogx import xlogx
+import unittest
+from theano import compile
+from theano import gradient
+from theano.tensor import as_tensor
+import theano._test_tensor as TT
+import random
+import numpy.random
+class T_XlogX(unittest.TestCase):
+    def test0(self):
+        x = as_tensor([1, 0])
+        y = xlogx(x)
+        y = compile.eval_outputs([y])
+        self.failUnless(numpy.all(y == numpy.asarray([0, 0.])))
+    def test1(self):
+        class Dummy(object):
+            def make_node(self, a):
+                return [xlogx(a)[:,2]]
+        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4)])
+if __name__ == '__main__':
+    unittest.main()
--- a/theano/tensor/xlogx.py
+++ b/theano/tensor/xlogx.py
+import theano
+from theano import tensor, scalar
+import numpy
+class XlogX(scalar.UnaryScalarOp):
+    """
+    Compute X * log(X), with special case 0 log(0) = 0.
+    """
+    @staticmethod
+    def st_impl(x):
+        if x == 0.0:
+            return 0.0
+        return x * numpy.log(x)
+    def impl(self, x):
+        return XlogX.st_impl(x)
+    def grad(self, (x,), (gz,)):
+        return [gz * (1 + scalar.log(x))]
+    def c_code(self, node, name, (x,), (z,), sub):
+        if node.inputs[0].type in [scalar.float32, scalar.float64]:
+            return """%(z)s =
+                %(x)s == 0.0
+                ? 0.0
+                : %(x)s * log(%(x)s);""" % locals()
+        raise NotImplementedError('only floatingpoint is implemented')
+scalar_xlogx  = XlogX(scalar.upgrade_to_float, name='scalar_xlogx')
+xlogx = tensor.Elemwise(scalar_xlogx, name='xlogx')