merge

0ffa6ba5 · Pascal Lamblin · 383d965b · 0a540fe8 · 0ffa6ba5 · 0ffa6ba5
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -23,7 +23,9 @@ import sys, os

 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'ext']
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'ext']
+
+todo_include_todos = True

 try:
    from sphinx.ext import pngmath
@@ -166,7 +168,7 @@ latex_font_size = '11pt'
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, document class [howto/manual]).
 latex_documents = [
-  ('contents', 'theano.tex', 'theano Documentation',
+  ('index', 'theano.tex', 'theano Documentation',
   'LISA lab, University of Montreal', 'manual'),
 ]


--- a/doc/extending/cop.txt
+++ b/doc/extending/cop.txt
@@ -26,26 +26,28 @@ What needs to be defined

 There are less methods to define for an Op than for a Type:

-.. function:: c_code(node, name, input_names, output_names, sub)
+.. class:: Op

-  This must return C code that carries the computation we want to do.
+    .. method:: c_code(node, name, input_names, output_names, sub)

-.. function:: c_code_cleanup(node, name, input_names, output_names, sub)
+      This must return C code that carries the computation we want to do.

-  This must return C code that cleans up whatever c_code allocated and
-  that we must free.
+    .. method:: c_code_cleanup(node, name, input_names, output_names, sub)

-  *Default:* The default behavior is to do nothing.
+      This must return C code that cleans up whatever c_code allocated and
+      that we must free.

-.. function:: c_compile_args()
-              c_no_compile_args()
-              c_headers()
-              c_libraries()
-              c_support_code()
+      *Default:* The default behavior is to do nothing.

-  Allows you to specify headers, libraries,
-  special g++ arguments to add/exclude or
-  helper functions/structs that the type needs. See :ref:`op`.
+    .. method:: c_compile_args()
+    .. method:: c_no_compile_args()
+    .. method:: c_headers()
+    .. method:: c_libraries()
+    .. method:: c_support_code()
+
+      Allows you to specify headers, libraries,
+      special g++ arguments to add/exclude or
+      helper functions/structs that the type needs. See :ref:`op`.


 The ``name`` argument is currently given an invalid value, so steer

--- a/doc/extending/ctype.txt
+++ b/doc/extending/ctype.txt
@@ -46,43 +46,45 @@ be found in the documentation for :api:`gof.type.Type`. Here, we'll focus on
 the most important ones:


-.. function:: c_declare(name, sub)
+.. class:: CLinkerType

-  This must return C code which declares variables. These variables
-  will be available to operations defined in C. You may also write
-  typedefs.
+    .. method:: c_declare(name, sub)

-.. function:: c_init(name, sub)
+        This must return C code which declares variables. These variables
+        will be available to operations defined in C. You may also write
+        typedefs.

-  This must return C code which initializes the variables declared in
-  ``c_declare``. Either this or ``c_extract`` will be called.
+    .. method:: c_init(name, sub)

-.. function:: c_extract(name, sub)
+        This must return C code which initializes the variables declared in
+        ``c_declare``. Either this or ``c_extract`` will be called.

-  This must return C code which takes a reference to a Python object
-  and initializes the variables declared in ``c_declare`` to match the
-  Python object's data. Either this or ``c_init`` will be called.
+    .. method:: c_extract(name, sub)

-.. function:: c_sync(name, sub)
+        This must return C code which takes a reference to a Python object
+        and initializes the variables declared in ``c_declare`` to match the
+        Python object's data. Either this or ``c_init`` will be called.

-  When the computations are done, transfer the variables from the C
-  structure we put them in to the destination Python object. This will
-  only be called for the outputs.
+    .. method:: c_sync(name, sub)

-.. function:: c_cleanup(name, sub)
+        When the computations are done, transfer the variables from the C
+        structure we put them in to the destination Python object. This will
+        only be called for the outputs.

-  When we are done using the data, clean up whatever we allocated and
-  decrease the appropriate reference counts.
+    .. method:: c_cleanup(name, sub)

-.. function:: c_compile_args()
-              c_no_compile_args()
-              c_headers()
-              c_libraries()
-              c_support_code()
+        When we are done using the data, clean up whatever we allocated and
+        decrease the appropriate reference counts.

-  Allows you to specify headers, libraries, 
-  special g++ arguments to add/exclude or
-  helper functions/structs that the type needs. See :ref:`type`.
+    .. method:: c_compile_args()
+                  c_no_compile_args()
+                  c_headers()
+                  c_libraries()
+                  c_support_code()
+
+        Allows you to specify headers, libraries, 
+        special g++ arguments to add/exclude or
+        helper functions/structs that the type needs. See :ref:`type`.


 Each of these functions take two arguments, ``name`` and ``sub`` which
@@ -391,7 +393,7 @@ done. Note which variables get extracted (the three inputs ``x``, ``y`` and
 output ``b``) and which one is synced (the final output ``b``).

 The C code above is a single C block for the whole graph. Depending on
-which :ref:`linker` is used to process the computation graph, it is
+which :term:`linker` is used to process the computation graph, it is
 possible that one such block is generated for each operation and that
 we transit through Python after each operation. In that situation,
 ``a`` would be synced by the addition block and extracted by the

--- a/doc/extending/debug_faq.txt
+++ b/doc/extending/debug_faq.txt
-
-.. _debug_faq:
-
-=========================================
-Debugging Theano: FAQ and Troubleshooting
-=========================================
-
-There are many kinds of bugs that might come up in a computer program.
-This page is structured as an FAQ.  It should provide recipes to tackle common
-problems, and introduce some of the tools that we use to find problems in our
-Theano code, and even (it happens) in Theano's internals, such as
-:ref:`using_debugmode`.
-
-
-
-
-How do I print an intermediate value in a Function/Method?
----------------------------------------------------------
-
-Theano provides a 'Print' Op to do this.
-
-.. code-block:: python
-
-    x = theano.tensor.dvector('x')
-
-    x_printed = theano.Print('this is a very important value')(x)
-
-    f = theano.function([x], x * 5)
-    f_with_print = theano.function([x], x_printed * 5)
-
-    #this runs the graph without any printing
-    assert numpy.all( f([1,2,3]) == [5, 10, 15])
-
-    #this runs the graph with the message, and value printed
-    assert numpy.all( f_with_print([1,2,3]) == [5, 10, 15])
-
-
-Since Theano runs your program in a topological order, you won't have precise
-control over the order in which multiple Print() Ops are evaluted.  For a more
-precise inspection of what's being computed where, when, and how, see the
-:ref:`faq_wraplinker`.
-
-
-
-
-
-I wrote a new Op/Type, and weird stuff is happening...
------------------------------------------------------
-
-First, check the :ref:`op_contract` and the :ref:`type_contract` 
-and make sure you're following the rules.
-Then try running your program in :ref:`using_debugmode`.  DebugMode might catch
-something that you're not seeing.
-
-
-I wrote a new optimization, but it's not getting used...
---------------------------------------------------------
-
-Remember that you have to register optimizations with the :ref:`optdb`
-for them to get used by the normal modes like FAST_COMPILE, FAST_RUN,
-and DEBUG_MODE.
-
-
-I wrote a new optimization, and it changed my results even though I'm pretty sure it is correct.
------------------------------------------------------------------------------------------------
-
-First, check the :ref:`op_contract` and make sure you're following the rules.
-Then try running your program in :ref:`using_debugmode`.  DebugMode might 
-catch something that you're not seeing.
-
-
-The function I compiled is too slow, what's up?
-----------------------------------------------
-
-First, make sure you're running in FAST_RUN mode, by passing
-``mode='FAST_RUN'`` to ``theano.function`` or ``theano.make``. Some
-operations have excruciatingly slow Python implementations and that
-can negatively effect the performance of FAST_COMPILE.
-
-Second, try the theano :ref:`using_profilemode`.  This will tell you which
-Apply nodes, and which Ops are eating up your CPU cycles.
-
-
-.. _faq_wraplinker:
-
-How do I step through a compiled function with the WrapLinker?
--------------------------------------------------------------
-
-This is not exactly an FAQ, but the doc is here for now...
-It's pretty easy to roll-your-own evaluation mode.
-Check out this one:
-
-.. code-block:: python
-
-    class PrintEverythingMode(Mode):
-        def __init__(self):
-            def print_eval(i, node, fn):
-                print i, node, [input[0] for input in fn.inputs],
-                fn()
-                print [output[0] for output in fn.outputs]
-            wrap_linker = theano.gof.WrapLinkerMany([theano.gof.OpWiseCLinker()], [print_eval])
-            super(PrintEverythingMode, self).__init__(wrap_linker, optimizer='fast_run')
-
-When you use ``mode=PrintEverythingMode()`` as the mode for Function or Method,
-then you should see a lot of output.  Every Apply node will be printed out,
-along with its position in the graph, the arguments to the ``perform`` or
-``c_code`` and the output it computed.  Admittedly, this is a huge amount of
-output to read through if you are using big tensors... but you can choose to
-put logic inside of the print_eval function  that would, for example, only
-print something out if a certain kind of Op was used, at a certain program
-position, or if a particular value shows up in one of the inputs or outputs.
-
-.. TODO: documentation for link.WrapLinkerMany
-
-This can be a really powerful debugging tool.  Note the call to ``fn`` inside the call to ``print_eval``; without it, the graph wouldn't get computed at all!
-
--- a/doc/extending/extending_faq.txt
+++ b/doc/extending/extending_faq.txt
+
+.. _extend_faq:
+
+=========================================
+Extending Theano: FAQ and Troubleshooting
+=========================================
+
+I wrote a new Op/Type, and weird stuff is happening...
+------------------------------------------------------
+
+First, check the :ref:`op_contract` and the :ref:`type_contract` 
+and make sure you're following the rules.
+Then try running your program in :ref:`using_debugmode`.  DebugMode might catch
+something that you're not seeing.
+
+
+I wrote a new optimization, but it's not getting used...
+---------------------------------------------------------
+
+Remember that you have to register optimizations with the :ref:`optdb`
+for them to get used by the normal modes like FAST_COMPILE, FAST_RUN,
+and DEBUG_MODE.
+
+
+I wrote a new optimization, and it changed my results even though I'm pretty sure it is correct.
+------------------------------------------------------------------------------------------------
+
+First, check the :ref:`op_contract` and make sure you're following the rules.
+Then try running your program in :ref:`using_debugmode`.  DebugMode might 
+catch something that you're not seeing.
+
+
--- a/doc/extending/graphstructures.txt
+++ b/doc/extending/graphstructures.txt
@@ -146,7 +146,7 @@ Automatic wrapping

 All nodes in the graph must be instances of ``Apply`` or ``Result``, but
 ``<Op subclass>.make_node()`` typically wraps constants to satisfy those
-constraints. For example, the :api:`tensor.add <theano.tensor.basic.add>`
+constraints. For example, the :func:`tensor.add`
 Op instance is written so that:

 .. code-block:: python
@@ -189,8 +189,8 @@ An *Apply node* is a type of internal node used to represent a
 manipulated directly by the end user. They may be accessed via
 a Variable's ``owner`` field.

-An Apply node is typically an instance of the :api:`Apply
-<theano.gof.graph.Apply>` class. It represents the application
+An Apply node is typically an instance of the :class:`Apply`
+class. It represents the application
 of an :ref:`op` on one or more inputs, where each input is a
 :ref:`variable`. By convention, each Op is responsible for
 knowing how to build an Apply node from a list of
@@ -215,8 +215,7 @@ An Apply instance has three important fields:
  A list of :ref:`Variables <variable>` that represent the return values
  of the function.

-An Apply instance can be created by calling ``gof.Apply(op, inputs,
-outputs)``.
+An Apply instance can be created by calling ``gof.Apply(op, inputs, outputs)``.



@@ -260,7 +259,7 @@ Type
 A :ref:`type` in Theano represents a set of constraints on potential
 data objects. These constraints allow Theano to tailor C code to handle
 them and to statically optimize the computation graph. For instance,
-the :ref:`irow <predefinedtypes>` type in the ``theano.tensor`` package
+the :ref:`irow <libdoc_tensor_creation>` type in the ``theano.tensor`` package
 gives the following constraints on the data the Variables of type ``irow``
 may contain:

@@ -273,8 +272,8 @@ that declares the right data types and that contains the right number
 of loops over the dimensions.

 Note that a Theano :ref:`type` is not equivalent to a Python type or
-class. Indeed, in Theano, :ref:`irow <predefinedtypes>` and :ref:`dmatrix
-<predefinedtypes>` both use ``numpy.ndarray`` as the underlying type
+class. Indeed, in Theano, :ref:`irow <libdoc_tensor_creation>` and :ref:`dmatrix
+<libdoc_tensor_creation>` both use ``numpy.ndarray`` as the underlying type
 for doing computations and storing data, yet they are different Theano
 Types. Indeed, the constraints set by ``dmatrix`` are:

@@ -311,8 +310,7 @@ Variables. For example, when I type
 >>> x = theano.tensor.ivector()
 >>> y = -x

-``x`` and ``y`` are both Variables, i.e. instances of the :api:`Variable
-<theano.gof.graph.Variable>` class. The :ref:`type` of both ``x`` and
+``x`` and ``y`` are both Variables, i.e. instances of the :class:`Variable` class. The :ref:`type` of both ``x`` and
 ``y`` is ``theano.tensor.ivector``.

 Unlike ``x``, ``y`` is a Variable produced by a computation (in this
@@ -324,7 +322,7 @@ through ``y.owner``.

 More specifically, a Variable is a basic structure in Theano that
 represents a datum at a certain point in computation. It is typically
-an instance of the class :api:`Variable <theano.gof.graph.Variable>` or
+an instance of the class :class:`Variable` or
 one of its subclasses.

 A Variable ``r`` contains four important fields:
@@ -365,6 +363,7 @@ any circumstances modify the input. This means that a constant is
 eligible to participate in numerous optimizations: constant inlining
 in C code, constant folding, etc.

-A constant does not need to be specified in a :ref:`function`'s list
+A constant does not need to be specified in a :func:`function
+<function.function>`'s list
 of inputs.  In fact, doing so will raise an exception.

--- a/doc/extending/index.txt
+++ b/doc/extending/index.txt
@@ -14,7 +14,7 @@ also good for you if you are interested in getting more under the hood with
 Theano itself.

 Before tackling this tutorial, it is highly recommended to read the
-:ref:`basictutorial`.
+:ref:`tutorial`.

 The first few pages will walk you through the definition of a new :ref:`type`,
 ``double``, and a basic arithmetic :ref:`operations <op>` on that Type. We
@@ -34,5 +34,6 @@ a C implementation.
    optimization
    tips
    unittest
+    extending_faq


--- a/doc/extending/op.txt
+++ b/doc/extending/op.txt
@@ -12,7 +12,7 @@ computations. We'll start by defining multiplication.
 Op's contract
 =============

-An Op (:api:`gof.op.Op`) is any object which defines the
+An Op (:class:`gof.Op`) is any object which defines the
 following methods:


@@ -134,9 +134,7 @@ following methods:
  includes this Op.


-For each method, the *default* is what :api:`theano.gof.op.Op` defines
-for you.  At a bare minimum, a new Op must define ``make_node`` and
-``perform``, which have no defaults.
+At a bare minimum, a new Op must define ``make_node`` and ``perform``, which have no defaults.

 For more details, including the interface for providing a C
 implementation of ``perform()``, refer to the documentation for :ref:`op`.

--- a/doc/extending/optimization.txt
+++ b/doc/extending/optimization.txt
@@ -26,9 +26,9 @@ Global and local optimizations

 First, let's lay out the way optimizations work in Theano. There are
 two types of optimizations: *global* optimizations and *local*
-optimizations. A global optimization takes an :ref:`env` object (an
+optimizations. A global optimization takes an ``Env`` object (an
 Env is a wrapper around a whole computation graph, you can see its
-:ref:`documentation <env>` for more details) and navigates through it
+:class:`documentation <Env>` for more details) and navigates through it
 in a suitable way, replacing some Variables by others in the process. A
 local optimization, on the other hand, is defined as a function on a
 *single* :ref:`apply` node and must return either ``False`` (to mean that
@@ -52,26 +52,28 @@ Global optimization
 A global optimization (or optimizer) is an object which defines the following
 methods:

-.. function:: apply(env)
+.. class:: Optimizer

-  This method takes an Env object which contains the computation graph
-  and does modifications in line with what the optimization is meant
-  to do. This is of the main method of the optimizer.
+    .. method:: apply(env)

-.. function:: add_requirements(env)
+      This method takes an Env object which contains the computation graph
+      and does modifications in line with what the optimization is meant
+      to do. This is of the main method of the optimizer.

-  This method takes an Env object and adds :ref:`features
-  <envfeature>` to it. These features are "plugins" that are needed
-  for the ``apply`` method to do its job properly.
+    .. method:: add_requirements(env)

-.. function:: optimize(env)
+      This method takes an Env object and adds :ref:`features
+      <libdoc_gof_envfeature>` to it. These features are "plugins" that are needed
+      for the ``apply`` method to do its job properly.

-  This is the interface function called by Theano.
+    .. method:: optimize(env)

-  *Default:* this is defined by Optimizer as ``add_requirement(env);
-  apply(env)``.
+      This is the interface function called by Theano.

-See the section about :ref:`env` to understand how to define these
+      *Default:* this is defined by Optimizer as ``add_requirement(env);
+      apply(env)``.
+
+See the section about :class:`Env` to understand how to define these
 methods.


@@ -80,14 +82,16 @@ Local optimization

 A local optimization is an object which defines the following methods:

-.. function:: transform(node)
+.. class:: LocalOptimizer
+
+    .. method:: transform(node)

-  This method takes an :ref:`apply` node and returns either ``False`` to
-  signify that no changes are to be done or a list of Variables which
-  matches the length of the node's ``outputs`` list. When the
-  LocalOptimizer is applied by a Navigator, the outputs of the node
-  passed as argument to the LocalOptimizer will be replaced by the
-  list returned.
+      This method takes an :ref:`apply` node and returns either ``False`` to
+      signify that no changes are to be done or a list of Variables which
+      matches the length of the node's ``outputs`` list. When the
+      LocalOptimizer is applied by a Navigator, the outputs of the node
+      passed as argument to the LocalOptimizer will be replaced by the
+      list returned.



@@ -138,8 +142,8 @@ simplification described above:
   requirements we might want to  know about?

 Here's how it works: first, in ``add_requirements``, we add the
-``ReplaceValidate`` :ref:`envfeature` located in
-:api:`theano.gof.toolbox`. This feature adds the ``replace_validate``
+``ReplaceValidate`` :ref:`libdoc_gof_envfeature` located in
+:ref:`libdoc_gof_toolbox`. This feature adds the ``replace_validate``
 method to ``env``, which is an enhanced version of ``replace`` that
 does additional checks to ensure that we are not messing up the
 computation graph (note: if ``ReplaceValidate`` was already added by
@@ -147,9 +151,9 @@ another optimizer, ``extend`` will do nothing). In a nutshell,
 ``toolbox.ReplaceValidate`` grants access to ``env.replace_validate``,
 and ``env.replace_validate`` allows us to replace a Variable with
 another while respecting certain validation constraints. You can
-browse the list of :ref:`features <envfeaturelist>` and see if some of
+browse the list of :ref:`libdoc_gof_envfeaturelist` and see if some of
 them might be useful to write optimizations with. For example, as an
-exercise, try to rewrite Simplify using :ref:`nodefinder`. (Hint: you
+exercise, try to rewrite Simplify using :class:`NodeFinder`. (Hint: you
 want to use the method it publishes instead of the call to toposort!)

 Then, in ``apply`` we do the actual job of simplification. We start by
@@ -222,12 +226,12 @@ arithmetics that your Ops implement. Theano might provide facilities
 for this somewhere in the future.

 .. note::
-   :ref:`env` is a Theano structure intended for the optimization
+   :class:`Env` is a Theano structure intended for the optimization
   phase. It is used internally by function and Module and is rarely
   exposed to the end user. You can use it to test out optimizations,
   etc. if you are comfortable with it, but it is recommended to use
   the function/Module frontends and to interface optimizations with
-   :ref:`optdb <optdb>` (we'll see how to do that soon).
+   :class:`optdb` (we'll see how to do that soon).


 Local optimization
@@ -305,7 +309,7 @@ Theano defines some shortcuts to make LocalOptimizers:
 .. function:: PatternSub(pattern1, pattern2)

  Replaces all occurrences of the first pattern by the second pattern.
-  See :api:`theano.gof.opt.PatternSub`.
+  See :class:`PatternSub`.


 .. code-block:: python
@@ -342,7 +346,7 @@ or ``PatternSub``, it is highly recommended to use them.

 WRITEME: more about using PatternSub (syntax for the patterns, how to
 use constraints, etc. - there's some decent doc at
-:api:`theano.gof.opt.PatternSub` for those interested)
+:class:`PatternSub` for those interested)



@@ -376,8 +380,8 @@ Definition of optdb
 -------------------

 optdb is an object which is an instance of
-:api:`theano.gof.SequenceDB <theano.gof.optdb.SequenceDB>`,
-itself a subclass of :api:`theano.gof.DB <theano.gof.optdb.DB>`.
+:class:`SequenceDB <optdb.SequenceDB>`,
+itself a subclass of :class:`DB <optdb.DB>`.
 There exist (for now) two types of DB, SequenceDB and EquilibriumDB.
 When given an appropriate Query, DB objects build an Optimizer matching
 the query.
@@ -399,7 +403,7 @@ well and the LocalOptimizers they return will be put in their places
 (note that as of yet no DB can produce LocalOptimizer objects, so this
 is a moot point).

-Theano contains one principal DB object, :api:`theano.gof.optdb`, which
+Theano contains one principal DB object, :class:`optdb`, which
 contains all of Theano's optimizers with proper tags. It is
 recommended to insert new Optimizers in it. As mentioned previously,
 optdb is a SequenceDB, so, at the top level, Theano applies a sequence
@@ -411,33 +415,35 @@ Query

 A Query is built by the following call:

-::
+.. code-block:: python

   theano.gof.Query(include, require = None, exclude = None, subquery = None)

-.. attribute:: include
+.. class:: Query
+
+    .. attribute:: include

-   A set of tags (a tag being a string) such that every
-   optimization obtained through this Query must have **one** of the tags
-   listed. This field is required and basically acts as a starting point
-   for the search.
+       A set of tags (a tag being a string) such that every
+       optimization obtained through this Query must have **one** of the tags
+       listed. This field is required and basically acts as a starting point
+       for the search.

-.. attribute:: require
+    .. attribute:: require

-   A set of tags such that every optimization obtained
-   through this Query must have **all** of these tags.
+       A set of tags such that every optimization obtained
+       through this Query must have **all** of these tags.

-.. attribute:: exclude
+    .. attribute:: exclude

-   A set of tags such that every optimization obtained
-   through this Query must have **none** of these tags.
+       A set of tags such that every optimization obtained
+       through this Query must have **none** of these tags.

-.. attribute:: subquery
+    .. attribute:: subquery

-   optdb can contain sub-databases; subquery is a
-   dictionary mapping the name of a sub-database to a special Query.
-   If no subquery is given for a sub-database, the original Query will be
-   used again.
+       optdb can contain sub-databases; subquery is a
+       dictionary mapping the name of a sub-database to a special Query.
+       If no subquery is given for a sub-database, the original Query will be
+       used again.

 Furthermore, a Query object includes three methods, ``including``,
 ``requiring`` and ``excluding`` which each produce a new Query object

--- a/doc/extending/pipeline.txt
+++ b/doc/extending/pipeline.txt
@@ -40,17 +40,17 @@ Step 1 - Create an Env
 ^^^^^^^^^^^^^^^^^^^^^^

 The subgraph given by the end user is wrapped in a structure called
-:ref:`env`. That structure defines several hooks on adding and
+*Env*. That structure defines several hooks on adding and
 removing (pruning) nodes as well as on modifying links between nodes
 (for example, modifying an input of an :ref:`apply` node) (see the
-article about :ref:`env` for more information).
+article about :ref:`libdoc_gof_env` for more information).

 Env provides a method to change the input of an Apply node from one
 Variable to another and a more high-level method to replace a Variable
 with another. This is the structure that :ref:`Optimizers
 <optimization>` work on.

-Some relevant :ref:`Features <envfeature>` are typically added to the
+Some relevant :ref:`Features <libdoc_gof_envfeature>` are typically added to the
 Env, namely to prevent any optimization from operating inplace on
 inputs declared as immutable.

@@ -58,19 +58,19 @@ inputs declared as immutable.
 Step 2 - Execute main Optimizer
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-Once the Env is made, an :ref:`optimizer <optimization>` is produced
-by the :ref:`function_mode` passed to ``function`` or to the Method/Module's
+Once the Env is made, an :term:`optimizer` is produced
+by the :term:`mode` passed to ``function`` or to the Method/Module's
 ``make`` (the Mode basically has two important fields, ``linker`` and
 ``optimizer``). That optimizer is applied on the Env using its
 optimize() method.

-The optimizer is typically obtained through :ref:`optdb <optdb>`.
+The optimizer is typically obtained through :attr:`optdb`.


 Step 3 - Execute linker to obtain a thunk
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-Once the computation graph is optimized, the :ref:`linker` is
+Once the computation graph is optimized, the :term:`linker` is
 extracted from the Mode. It is then called with the Env as argument to
 produce a ``thunk``, which is a function with no arguments that
 returns nothing. Along with the thunk, one list of input containers (a

--- a/doc/extending/tips.txt
+++ b/doc/extending/tips.txt
@@ -40,8 +40,7 @@ Theano provides some generic Op classes which allow you to generate a
 lot of Ops at a lesser effort. For instance, Elemwise can be used to
 make :term:`elementwise` operations easily whereas DimShuffle can be
 used to make transpose-like transformations. These higher order Ops
-are mostly Tensor-related, as this is Theano's specialty. An exposé of
-them can therefore be found in :ref:`tensoroptools`.
+are mostly Tensor-related, as this is Theano's specialty. 


 .. _opchecklist:

--- a/doc/extending/type.txt
+++ b/doc/extending/type.txt
@@ -22,69 +22,71 @@ i.e.  the same default argument names and values. If you wish to add
 extra arguments to any of these methods, these extra arguments must have
 default values.

-.. function:: filter(value, strict=False)
+.. class:: PureType

-  This casts a value to match the Type and returns the
-  casted value. If ``value`` is incompatible with the Type,
-  the method must raise an exception. If ``strict`` is True, ``filter`` must return a
-  reference to ``value`` (i.e. casting prohibited)
+    .. method:: filter(value, strict=False)

-  We need to define ``filter`` with two arguments. The second argument
-  must be called ``strict`` (Theano often calls it by keyword) and must
-  have a default value of ``False``.
+      This casts a value to match the Type and returns the
+      casted value. If ``value`` is incompatible with the Type,
+      the method must raise an exception. If ``strict`` is True, ``filter`` must return a
+      reference to ``value`` (i.e. casting prohibited)

-.. function:: is_valid_value(value)
+      We need to define ``filter`` with two arguments. The second argument
+      must be called ``strict`` (Theano often calls it by keyword) and must
+      have a default value of ``False``.

-  Returns True iff the value is compatible with the Type. If
-  ``filter(value, strict = True)`` does not raise an exception, the
-  value is compatible with the Type.
+    .. method:: is_valid_value(value)

-  *Default:* True iff ``filter(value, strict = True)`` does not raise
-  an exception.
+      Returns True iff the value is compatible with the Type. If
+      ``filter(value, strict = True)`` does not raise an exception, the
+      value is compatible with the Type.

-.. function:: values_eq(a, b)
+      *Default:* True iff ``filter(value, strict = True)`` does not raise
+      an exception.

-  Returns True iff ``a`` and ``b`` are equal.
+    .. method:: values_eq(a, b)

-  *Default:* ``a == b``
+      Returns True iff ``a`` and ``b`` are equal.

-.. function:: values_eq_approx(a, b)
+      *Default:* ``a == b``

-  Returns True iff ``a`` and ``b`` are approximately equal, for a
-  definition of "approximately" which varies from Type to Type.
+    .. method:: values_eq_approx(a, b)

-  *Default:* ``values_eq(a, b)``
+      Returns True iff ``a`` and ``b`` are approximately equal, for a
+      definition of "approximately" which varies from Type to Type.

-.. function:: make_variable(name=None)
+      *Default:* ``values_eq(a, b)``

-  Makes a :term:`Variable` of this Type with the specified name, if
-  ``name`` is not ``None``. If ``name`` is ``None``, then the Variable does
-  not have a name. The Variable will have its ``type`` field set to
-  the Type object.
+    .. method:: make_variable(name=None)

-  *Default:* there is a generic definition of this in Type. The
-  Variable's ``type`` will be the object that defines this method (in
-  other words, ``self``).
+      Makes a :term:`Variable` of this Type with the specified name, if
+      ``name`` is not ``None``. If ``name`` is ``None``, then the Variable does
+      not have a name. The Variable will have its ``type`` field set to
+      the Type object.

-.. function:: __call__(name=None)
+      *Default:* there is a generic definition of this in Type. The
+      Variable's ``type`` will be the object that defines this method (in
+      other words, ``self``).

-  Syntactic shortcut to ``make_variable``.
+    .. method:: __call__(name=None)

-  *Default:* ``make_variable``
+      Syntactic shortcut to ``make_variable``.

-.. function:: __eq__(other)
+      *Default:* ``make_variable``

-  Used to compare Type instances themselves
+    .. method:: __eq__(other)

-  *Default:* ``object.__eq__``
+      Used to compare Type instances themselves

-.. function:: __hash__()
+      *Default:* ``object.__eq__``

-  Types should not be mutable, so it should be OK to define a hash
-  function.  Typically this function should hash all of the terms
-  involved in ``__eq__``.
+    .. method:: __hash__()

-  *Default:* ``id(self)``
+      Types should not be mutable, so it should be OK to define a hash
+      function.  Typically this function should hash all of the terms
+      involved in ``__eq__``.
+
+      *Default:* ``id(self)``

 For each method, the *default* is what ``Type`` defines
 for you. So, if you create an instance of ``Type`` or an
@@ -249,7 +251,7 @@ attempt to clear up the confusion:
  there is actually only one Type in that set, therefore the subclass
  doesn't represent anything that one of its instances doesn't. In this
  case it is a singleton, a set with one element. However, the
-  :api:`TensorType`
+  :class:`TensorType`
  class in Theano (which is a subclass of Type)
  represents a set of types of tensors
  parametrized by their data type or number of dimensions. We could say

--- a/doc/glossary.txt
+++ b/doc/glossary.txt
@@ -6,111 +6,164 @@ Glossary of terminology
 .. glossary::

    Apply
-        WRITEME
+        Instances of :class:`Apply` represent the application of an :term:`Op`
+        to some input :term:`Variable` (or variables) to produce some output
+        :term:`Variable` (or variables).  They are like the application of a [symbolic]
+        mathematical function to some [symbolic] inputs.

-    broadcasting
+    Broadcasting
        Broadcasting is a mechanism which allows tensors with
-        different numbers of dimensions to be added or multiplied
-        together by (virtually) replicating the smaller tensor along
-        the dimensions that it is lacking.
-
-        In a nutshell, broadcasting is the mechanism by which a scalar
-        may be added to a matrix, a vector to a matrix or a scalar to
-        a vector.
-
-        .. figure:: bcast.png
-
-           Broadcasting a row matrix. T and F respectively stand for
-           True and False and indicate along which dimensions we allow
-           broadcasting.
-
-           If the second argument were a vector, its shape would be
-           ``(2,)`` and its broadcastable pattern ``(F,)``. They would
-           be automatically expanded to the **left** to match the
-           dimensions of the matrix (adding ``1`` to the shape and ``T``
-           to the pattern), resulting in ``(1, 2)`` and ``(T, F)``.
-           It would then behave just like the example above.
-
-
-        Unlike numpy which does broadcasting dynamically, Theano needs
-        to know, for any operation which supports broadcasting, which
-        dimensions will need to be broadcasted. When applicable, this
-        information is given in the :term:`Type` of a :term:`Variable`.
-
-        See also:
-
-        * :ref:`How broadcasting is used in Theano's tensor types <tensortypes>`
-
+        different numbers of dimensions to be used in element-by-element
+        (elementwise) computations.  It works by
+        (virtually) replicating the smaller tensor along
+        the dimensions that it is lacking.  
+        
+        For more detail, see :ref:`libdoc_tensor_broadcastable`, and also
        * `SciPy documentation about numpy's broadcasting <http://www.scipy.org/EricsBroadcastingDoc>`_
        * `OnLamp article about numpy's broadcasting <http://www.onlamp.com/pub/a/python/2000/09/27/numerically.html>`_

-    constant
-        WRITEME
+    Constant
+        A variable with an immutable value.
+        For example, when you type
+        >>> x = tensor.ivector()
+        >>> y = x + 3
+        Then a `constant` is created to represent the ``3`` in the graph.
+
+        See also: :class:`gof.Constant`

-    dynamic
-        WRITEME

-    elementwise
-        An elementwise operation ``f`` on two matrices ``M`` and ``N``
+    Elementwise
+        An elementwise operation ``f`` on two tensor variables ``M`` and ``N``
        is one such that:

-        ``f(M, N)[i, j] = f(M[i, j], N[i, j])``
+        ``f(M, N)[i, j] == f(M[i, j], N[i, j])``

        In other words, each element of an input matrix is combined
        with the corresponding element of the other(s). There are no
        dependencies between elements whose ``[i, j]`` coordinates do
        not correspond, so an elementwise operation is like a scalar
-        operation generalized along several dimensions.
+        operation generalized along several dimensions.  Elementwise
+        operations are defined for tensors of different numbers of dimensions by
+        :term:`broadcasting` the smaller ones.
+
+    Expression
+        See :term:`Apply`
+
+    Expression Graph
+        A directed, acyclic set of connected :term:`Variable` and
+        :term:`Apply` nodes that express symbolic functional relationship
+        between variables.  You use Theano by defining expression graphs, and
+        then compiling them with :term:`theano.function`.
+
+        See also :term:`Variable`, :term:`Op`, :term:`Apply`, and
+        :term:`Type`, or read more about :ref:`tutorial_graphstructures`.
+
+    Destructive
+        An :term:`Op` is destructive (of particular input[s]) if its
+        computation requires that one or more inputs be overwritten or
+        otherwise invalidated.  For example, :term:`inplace` Ops are
+        destructive.  Destructive Ops can sometimes be faster than
+        non-destructive alternatives.  Theano encourages users not to put
+        destructive Ops into graphs that are given to :term:`theano.function`,
+        but instead to trust the optimizations to insert destructive ops
+        judiciously.
+
+        Destructive Ops are indicated via a ``destroy_map`` Op attribute. (See
+        :class:`gof.Op`.
+
+
+    Graph 
+        see :term:`expression graph`
+
+    Inplace
+        Inplace computations are computations that destroy their inputs as a
+        side-effect.  For example, if you iterate over a matrix and double
+        every element, this is an inplace operation because when you are done,
+        the original input has been overwritten.  Ops representing inplace
+        computations are :term:`destructive`, and by default these can only be
+        inserted by optimizations, not user code.

-        There exist unary, binary, ternary, etc. elementwise
-        operations and they can work on scalars, vectors, matrices,
-        etc. as long as all the inputs have the same dimensions or can
-        be :term:`broadcasted <broadcasting>` to the same dimensions.
+    Linker
+        Part of a function :term:`Mode` -- an object responsible for 'running'
+        the compiled function.  Among other things, the linker determines whether computations are carried out with C or Python code.
+        
+    Merge
+        A simple optimization in which redundant :term:`Apply` nodes are
+        combined.  For example, in ``function([x,y], [(x+y)*2, (x+y)*3])`` the merge
+        optimization will ensure that ``x`` and ``y`` are only added once.

-        Examples of elementwise operations in Theano: ``add, sub, mul,
-        div, neg, inv, log, exp, sin, cos, tan`` and many
-        others. These operations are all instances of :api:`Elemwise
-        <theano.tensor.elemwise.Elemwise>`.
+    Mode 
+        An object providing an :term:`optimizer` and a :term:`linker` that is
+        passed to :term:`theano.function`.  It parametrizes how an expression
+        graph is converted to a callable object.

-    graph
-        WRITEME
+    Op
+        The ``.op`` of an :term:`Apply`, together with its symbolic inputs
+        fully determines what kind of computation will be carried out for that
+        ``Apply`` at run-time.  Mathematical functions such as addition
+        (``T.add``) and indexing  ``x[i]`` are Ops in Theano.  Much of the
+        library documentation is devoted to describing the various Ops that
+        are provided with Theano, but you can add more.

-    inplace
-        WRITEME
+        See also :term:`Variable`, :term:`Type`, and :term:`Apply`, 
+        or read more about :ref:`tutorial_graphstructures`.

-    merge
-        WRITEME
+    Optimizer
+        An instance of :class:`Optimizer`, which has the capacity to provide
+        an :term:`optimization` (or optimizations).

-    op
-        WRITEME
+    Optimization
+        A :term:`graph` transformation applied by an :term:`optimizer` during
+        the compilation of a :term:`graph` by :term:`theano.function`.

-    pure
-        WRITEME
+    Pure
+        An :term:`Op` is *pure* if it has no :term:`destructive` side-effects.

-    static
-        WRITEME
+    Storage
+        The memory that is used to store the value of a Variable.  In most
+        cases storage is internal to a compiled function, but in some cases
+        (such as :term:`constant` and :term:`shared variable <shared variable>` the storage is not internal.
+
+    Shared Variable
+        A :term:`Variable` whose value may be shared between multiple functions.  See :func:`shared <shared.shared>` and :func:`theano.function <function.function>`.
+
+    theano.function
+        The interface for Theano's compilation from symbolic expression graphs
+        to callable objects.  See :func:`function.function`.

-    type
-        See :ref:`tensortypes` or :ref:`type`.
+    Type
+        The ``.type`` of a
+        :term:`Variable` indicates what kinds of values might be computed for it in a
+        compiled graph.
+        An instance that inherits from :class:`Type`, and is used as the
+        ``.type`` attribute of a :term:`Variable`.  
+
+        See also :term:`Variable`, :term:`Op`, and :term:`Apply`, 
+        or read more about :ref:`tutorial_graphstructures`.

    Variable
-        A :ref:`Variable` is the main data structure you work with when
-        using Theano. The symbolic inputs that you operate on are
-        Variables and what you get from applying various operations to
-        these inputs are also Variables. For example, when I type
+        The the main data structure you work with when using Theano.
+        For example,

        >>> x = theano.tensor.ivector()
-        >>> y = -x
+        >>> y = -x**2

-        ``x`` and ``y`` are both Variables, i.e. instances of the
-        :api:`Variable <theano.gof.graph.Variable>` class. The
-        :term:`Type` of both ``x`` and ``y`` is
-        ``theano.tensor.ivector``.
+        ``x`` and ``y`` are both `Variables`, i.e. instances of the :class:`Variable` class.

-        For more information, see: :ref:`variable`.
+        See also :term:`Type`, :term:`Op`, and :term:`Apply`, 
+        or read more about :ref:`tutorial_graphstructures`.

-    view
-        WRITEME
+    View
+        Some Tensor Ops (such as Subtensor and Transpose) can be computed in
+        constant time by simply re-indexing their inputs.   The outputs from
+        [the Apply instances from] such Ops are called `Views` because their
+        storage might be aliased to the storage of other variables (the inputs
+        of the Apply).  It is important for Theano to know which Variables are
+        views of which other ones in order to introduce :term:`Destructive`
+        Ops correctly.
+
+        View Ops are indicated via a ``view_map`` Op attribute. (See
+        :class:`gof.Op`.




--- a/doc/index.txt
+++ b/doc/index.txt
@@ -37,7 +37,7 @@ Roughly in order of what you'll want to check out:
 * :ref:`extending` -- Learn to add a Type, Op, or graph optimization.
 * :ref:`internal` -- How to maintaining Theano, LISA-specific tips, and more...

-You can download the latest `PDF documentation <http://pylearn.org/theano/theano.pdf>`_, rather than reading it online.
+You can download the latest `PDF documentation <http://deeplearning.net/theanodoc/theano.pdf>`_, rather than reading it online.

 Community
 =========
@@ -60,7 +60,6 @@ Community
   tutorial/index
   library/index
   extending/index
-   indexes/index
   glossary
   links
   internal/index

--- a/doc/install.txt
+++ b/doc/install.txt
@@ -20,7 +20,7 @@ to be installed:
        We develop mainly on 64-bit Linux machines. 32-bit architectures are
        not well-tested.

-    python >= 2.5
+    python >= 2.5 (2.4 should be supported as well)

    `numpy <http://numpy.scipy.org/>`_ >= 1.2
        Earlier versions have memory leaks.
@@ -30,6 +30,8 @@ to be installed:
        is buggy in 0.6. (scipy.csc_matrix dot has a bug with singleton
        dimensions. There may be more bugs.)

+    A BLAS installation (with Level 3 functionality)
+
 The following libraries and software are optional:

    g++, python-dev
@@ -42,41 +44,49 @@ The following libraries and software are optional:
    `mercurial <http://www.selenic.com/mercurial/>`_
        To download bleeding-edge version of Theano.

+.. _install_bleeding_edge:
+
+Getting the code
+-----------------

-Easy install
------------
+If you are a developer of Theano, then check out the :ref:`dev_start_guide` guide. 

-The following command will install the latest release of Theano
-on your system:
+The following are general instructions that will set you up with the bleeding-edge 
+version of Theano. First, get the code using `mercurial <http://www.selenic.com/mercurial/wiki/>`__:

 .. code-block:: bash

-    easy_install Theano
+    hg clone http://hg.assembla.com/theano Theano

-Manual install
--------------
+Configuring PYTHONPATH
+---------------------------
+
+The subdirectory Theano/theano has to be located in a path
+mentioned in your PYTHONPATH. In order to do that, you can either
+create a symbolic link to Theano/theano in a directory already
+mentioned in your PYTHONPATH environment variable, or modify the
+PYTHONPATH so that it mentions Theano.

-To install the latest release of Theano from source, visit the `downloads
-<http://pylearn.org/theano/downloads/>`_ page and download the release you
-want. Unpack the release, and type:
+To create a symbolic link:

 .. code-block:: bash

-    python setup.py build
-    python setup.py test
-    python setup.py install
+    ln -s Theano/theano <someplace on your PYTHONPATH>/theano

-.. _install_bleeding_edge:
+To modify the environment variable PYTHONPATH in bash, you may do this:

-Bleeding Edge
--------------
+.. code-block:: bash

-Feeling lucky and want to run bleeding-edge code?
-Then check out the :ref:`dev_start_guide` guide.
+    export PYTHONPATH=<path to Theano's parent dir>/Theano:$PYTHONPATH

+In csh:

-Configuring the environment
---------------------------
+.. code-block:: csh
+
+    setenv PYTHONPATH <path to Theano's parent dir>/Theano:$PYTHONPATH
+
+Configuring Theano's environmental variables
+---------------------------------------------

 Two environment variables are used to control automatic code
 generation. It is possible to use Theano in a way which avoids all
@@ -118,6 +128,33 @@ automatic code generation, but that way is much, much slower.

    Omitting this variable defaults the mode to ``'FAST_RUN'``.

+Testing your installation
+---------------------------
+
+Once you have completed these steps, you should run the theano test suite like this:
+
+.. code-block:: bash
+
+    cd Theano
+    nosetests #execute all the tests
+
+All tests should pass. If some test fails on your machine, you are
+encouraged to tell us what went wrong on the ``theano-users@googlegroups.com``
+mailing list.
+
+Updating
+-------------
+
+
+To update your library to the latest revision, change directory (``cd``)
+to your ``Theano`` folder and execute the following command:
+
+.. code-block:: bash
+
+    hg pull -u
+
+You should update frequently, bugs are fixed on a very regular basis.
+
 Mac
 ---

@@ -126,20 +163,21 @@ Mac
 -
    .. code-block:: bash

-        $ sudo port install gcc42 py25-zlib py25-numpy py25-scipy mercurial
+        $ sudo port install gcc44 py25-zlib py25-numpy py25-scipy mercurial

-    Note that compiling gcc42 takes a significant time (hours) so it is probably
+    Note that compiling gcc takes a significant time (hours) so it is probably
    not the best solution if you are in a rush! It may happen that SciPy
    fails to compile the first time and still compiles just fine on a second
    try. Same thing with py25-zlib.

- Install some kind of BLAS library (TODO: how?)
+- scipy depends on ATLAS (a BLAS library), which will be installed by MacPorts.

 - Set ``THEANO_BLAS_LDFLAGS`` to something which will link against said BLAS
  library.  E.g., ``THEANO_BLAS_LDFLAGS='-lcblas -latlas -lgfortran'``.

-This advice has not been tested recently, so please inform us of your results.
-
+These installation instructions have not tested recently, please infom us of your results! 
+We would be especially interested in dependencies that we missed listing, as well as tests
+that fail on your platform (use the ``theano-users@googlegroups.com`` mailing list).


 Windows
@@ -216,7 +254,8 @@ but this has not been tested yet.
        tar zxvf lapack.tgz
        cd lapack-3.2.1
        gfortran -shared -O3 -o libblas.dll BLAS/SRC/*.f
-        mv libblas.dll /mingw/lib
+        cp libblas.dll /mingw/lib
+        mv libblas.dll /mingw/bin

 - Install `Mercurial <http://mercurial.selenic.com/downloads/>`__
  (you can use the regular Windows release, you do not need TortoiseHg).
@@ -246,9 +285,9 @@ Generating the documentation
 ----------------------------

 You can read the latest HTML documentation `here
-<http://pylearn.org/theano/contents.html>`__.
+<http://deeplearning.net/theanodoc>`__.
 You can download the latest PDF documentation `here
-<http://pylearn.org/theano/theano.pdf>`__.
+<http://deeplearning.net/theanodoc/theano.pdf>`__.

 We recommend you look at the documentation on the website, since it
 will be more current than the documentation included with the package.

--- a/doc/internal/dev_start_guide.txt
+++ b/doc/internal/dev_start_guide.txt
@@ -21,11 +21,10 @@ Developer Start Guide
 Accounts
 ========

-To obtain developer access: send an email to an admin with an username and
-temporary password. Pending approval, this will give you access to both the
-repository and Trac. You should then change your password in the
-`<http://pylearn.org/theano/prefs preferences>` tab - do *NOT* use a good 
-password! We are using plain text http which is not secure.
+To obtain developer access: register with `Assembla
+<http://www.assembla.com/>`_ and add yourself as a watcher on the `Theano space 
+<http://www.assembla.com/spaces/theano>`_. Then send an email to an admin asking 
+to be promoted to a member of the project.


 Theano code
@@ -34,10 +33,9 @@ Theano code
 *To get the source via mercurial,* you must have `mercurial
 <http://www.selenic.com/mercurial/wiki/>`__ installed.

-The code that makes up Theano is in a single repository available in
-`<http://pylearn.org/hg/Theano>`__.
-
-As a developer, you should clone this repository like this:
+The code that makes up Theano is in a `single repository
+<http://www.assembla.com/spaces/theano/trac_mercurial_tool>`__. As a developer, 
+you should clone this repository like this:

 .. code-block:: bash


--- a/doc/introduction.txt
+++ b/doc/introduction.txt
@@ -5,43 +5,40 @@
 Theano at a Glance
 ==================

-Theano is a Python library that allows you to define, optimize, and evaluate
-mathematical expressions involving multi-dimensional arrays. Using Theano it is
+Theano is a Python library that lets you to define, optimize, and evaluate
+mathematical expressions, especially ones with multi-dimensional arrays
+(numpy.ndarray).  Using Theano it is
 possible to attain speeds rivaling hand-crafted C implementations for problems
 involving large amounts of data.  It can also surpass C on a CPU by many orders
 of magnitude by taking advantage of recent GPUs.

-Theano melds some aspects of a computer algebra system (CAS) with
-aspects of an optimizing compiler. It can even transform some or all
-of the mathematical expression into C code and compile it into native
-machine instructions. This combination of CAS with optimizing
-compilation is particularly useful for tasks in which complicated
-mathematical expressions are evaluated repeatedly and evaluation speed
-is critical.
-
-Theano supports a range of numerical types in multiple dimensions and
-a number of well-tested operations. It also allows you to compute the
-gradient of an expression with respect to another. Symbolic
-expressions may be compiled into functions, which work on the same
-data structures as numpy_, allowing for easy interoperability.
+Theano combines aspects of a computer algebra system (CAS) with aspects of an
+optimizing compiler. It can also generate customized C code for many
+mathematical operations.  This combination of CAS with optimizing compilation
+is particularly useful for tasks in which complicated mathematical expressions
+are evaluated repeatedly and evaluation speed is critical.  For situations
+where many different expressions are each evaluated once Theano can minimize
+the amount of compilation/analysis overhead, but still provide symbolic
+features such as automatic differentiation.

 Theano's compiler applies many optimizations of varying complexity to
 these symbolic expressions. These optimizations include, but are not
 limited to:

+* use of GPU for computations
 * constant folding
-* merging of similar subgraphs, to avoid calculating the same values
-  more than once
-* arithmetic simplification (``x*y/x -> y``)
-* inserting efficient BLAS_ operations
-* using inplace operations wherever it is safe to do so.
-
-Theano defines several optimizations which improve the numerical
-stability of computations.
-
-Theano was written at the LISA_ lab to support the development of
-efficient machine learning algorithms while minimizing human time. We
-use it especially in gradient-based learning techniques.  Theano is
+* merging of similar subgraphs, to avoid redundant calculation
+* arithmetic simplification (e.g. ``x*y/x -> y``, ``--x -> x``)
+* inserting efficient BLAS_ operations (e.g. ``GEMM``) in a variety of
+  contexts
+* using memory aliasing to avoid calculation
+* using inplace operations wherever it does not interfere with aliasing
+* loop fusion for elementwise sub-expressions
+* improvements to numerical stability (e.g.  :math:`\log(1+\exp(x))` and :math:`\log(\sum_i \exp(x[i]))`)
+* for a complete list, see :ref:`_optimizations`
+
+Theano was written at the LISA_ lab to support rapid development of
+efficient machine learning algorithms. Theano is
 named after the `Greek mathematician`_, who may have been Pythagoras'
 wife.  Theano is released under a BSD license (:ref:`link <license>`).

@@ -92,30 +89,28 @@ machine instructions.
 What does it do that they don't?
 ================================

-Theano is a python library and optimizing compiler for manipulating
+Theano is a Python library and optimizing compiler for manipulating
 and evaluating expressions, especially matrix-valued
 ones. Manipulation of matrices is typically done using the numpy
 package, so what does Theano do that Python and numpy do not?

- *execution speed optimizations*: Theano can use `g++` to compile
-  parts your expression graph into native machine code, which runs
-  much faster than python.
+- *execution speed optimizations*: Theano can use `g++` or `nvcc` to compile
+  parts your expression graph into CPU or GPU instructions, which run
+  much faster than pure Python.

 - *symbolic differentiation*: Theano can automatic build symbolic graphs
  for computing gradients.

- *stability optimizations*: Theano can recognize numerically unstable
+- *stability optimizations*: Theano can recognize [some] numerically unstable
  expressions and compute them with more stable algorithms.

-There exist another symbolic package in Python, namely sympy_. Theano
-is different from sympy in the sense that while Theano allows symbolic
-manipulation it puts more emphasis on the evaluation of these expressions
-and being able to repeatedly evaluate them on many different inputs. Theano
-is also better suited to handling large tensors which have no
-assumed structures.
+The closest Python package to Theano is sympy_.
+Theano focuses more on tensor expressions than Sympy, and has more machinery
+for compilation.  Sympy has more sophisticated algebra rules and can
+handle a wider variety of mathematical operations (such as series, limits, and integrals).

 If numpy_ is to be compared to MATLAB_ and sympy_ to Mathematica_,
-Theano is a sort of hybrid of the two which tries to make the best of
+Theano is a sort of hybrid of the two which tries to combine the best of
 both worlds.


@@ -134,7 +129,8 @@ Getting started
  the :ref:`tutorial` first though.


-A PDF version of the online documentation may be found `here <theano.pdf>`_.
+A PDF version of the online documentation may be found `here
+<http://deeplearning.net/theanodoc/theano.pdf>`_.


 Contact us

--- a/doc/library/compile/function.txt
+++ b/doc/library/compile/function.txt
@@ -102,7 +102,7 @@ Reference
    :type updates: iterable over pairs (shared_variable, new_expression).
       List, tuple or dict.

-    :param updates: expressions for new SharedVariable values
+    :param updates: expressions for new :class:`SharedVariable` values
    

    :type givens: iterable over pairs (Var1, Var2) of Variables. 

--- a/doc/library/compile/index.txt
+++ b/doc/library/compile/index.txt
@@ -13,6 +13,7 @@
 .. toctree::
    :maxdepth: 1

+    shared
    function
    io
    mode

--- a/doc/library/compile/io.txt
+++ b/doc/library/compile/io.txt
@@ -122,7 +122,7 @@ array(10.0)
 Advanced: Sharing Storage Between Functions
 -------------------------------------------

-``value`` can be a :api:`theano.gof.link.Container` as well as a literal.
+``value`` can be a :class:`Container` as well as a literal.
 This permits linking a value of a Variable in one function to the value of a Variable in another function.
 By using a ``Container`` as a value we can implement shared variables between functions.


--- a/doc/library/compile/mode.txt
+++ b/doc/library/compile/mode.txt
@@ -26,8 +26,10 @@ environment variable 'THEANO_DEFAULT_MODE', which can in turn be overridden by
 setting ``theano.compile.mode.default_mode`` directly, which can in turn be
 overridden by passing the keyword argument to ``theano.function``.

-For a finer level of control over which optimizations are applied, and whether
-C or python implementations are used, read :api:`compile.mode.Mode`.
+.. TODO::
+
+    For a finer level of control over which optimizations are applied, and whether
+    C or Python implementations are used, read.... what exactly?


 Reference

--- a/doc/library/compile/module.txt
+++ b/doc/library/compile/module.txt
@@ -175,7 +175,7 @@ Using Inheritance
 A friendlier way to use Module is to implement your functionality as a
 subclass of Module:

-.. literalinclude:: ../examples/module/accumulator.py
+.. literalinclude:: ../../examples/module/accumulator.py

 This is just like the previous example except slightly fancier.


--- a/doc/library/compile/shared.txt
+++ b/doc/library/compile/shared.txt
+
+.. _libdoc_compile_shared:
+
+===========================================
+:mod:`shared` - defines theano.shared
+===========================================
+
+.. module:: shared
+   :platform: Unix, Windows
+   :synopsis: defines theano.shared and related classes
+.. moduleauthor:: LISA
+
+
+.. class:: SharedVariable
+
+    Variable with Storage that is shared between functions that it appears in.
+    These variables are meant to be created by registered *shared constructors*
+    (see :func:`shared_constructor`).
+
+    The user-friendly constructor is :func:`shared`
+
+    .. attribute:: value
+
+        Read/write access to the [non-symbolic] value/data associated with this SharedVariable.
+        
+        Changes to this value will be visible to all functions using this SharedVariable.  
+
+    .. method:: __init__(self, name, type, value, strict, container=None)
+
+        :param name: The name for this variable.
+        :type name: None or str
+
+        :param type: The :term:`Type` for this Variable.
+
+        :param value: A value to associate with this variable (a new container will be created).
+
+        :param strict: True -> assignments to ``self.value`` will not be casted
+          or copied, so they must have the correct type or an exception will be
+          raised.
+
+        :param container: The container to use for this variable.   This should
+           instead of the `value` parameter.  Using both is an error.
+
+    .. attribute:: container
+
+        A container to use for this SharedVariable when it is an implicit function parameter.
+
+        :type: class:`Container`
+
+.. function:: shared(value, name=None, strict=False, **kwargs)
+
+    Return a :class:`SharedVariable` Variable, initialized with a copy or reference of `value`.
+
+    This function iterates over constructor functions (see `shared_constructor`) to find a
+    suitable SharedVariable subclass.  The suitable one is the first constructor
+    that doesn't raise an exception.
+
+    This function is meant as a convenient default.  If you want to use a
+    specific shared variable constructor, consider calling it directly.
+
+    .. note::
+
+        By passing `kwargs`, you effectively limit the set of potential constructors to those that
+        can accept those kwargs.
+
+    Each registered constructor ``ctor`` will be called like this:
+
+    .. code-block:: python
+
+        ctor(value, name=name, strict=strict, **kwargs)
+
+    .. attribute:: constructors
+
+        A list of shared variable constructors that will be tried in reverse
+        order.
+
+.. function:: shared_constructor(ctor)
+
+    Append `ctor` to the list of shared constructors (see :func:`shared`).
+
--- a/doc/library/gof/env.txt
+++ b/doc/library/gof/env.txt
+
+.. _libdoc_gof_env:
+
+================================================
+:mod:`env` -- Graph Container [doc TODO]
+================================================
+
+.. module:: env
+   :platform: Unix, Windows
+   :synopsis: Theano Internals
+.. moduleauthor:: LISA
+
+
+Guide
+=====
+
+Env
+---
+
+.. _libdoc_gof_envfeature:
+
+Env Features
+-------------
+
+.. _libdoc_gof_envfeaturelist:
+
+Env Feature List
+^^^^^^^^^^^^^^^^
+* ReplaceValidate
+* DestroyHandler
+
+Reference
+=========
+
+.. class:: Env
+
+    ***TODO***
+
--- a/doc/library/gof/index.txt
+++ b/doc/library/gof/index.txt
@@ -4,3 +4,17 @@
 ================================================
 :mod:`gof` -- Theano Internals [doc TODO]
 ================================================
+
+.. module:: gof
+   :platform: Unix, Windows
+   :synopsis: Theano Internals
+.. moduleauthor:: LISA
+
+.. toctree::
+    :maxdepth: 1
+
+    env
+    toolbox
+
+
+
--- a/doc/library/gof/toolbox.txt
+++ b/doc/library/gof/toolbox.txt
+.. _libdoc_gof_toolbox:
+
+================================================
+:mod:`toolbox` -- [doc TODO]
+================================================
+
+.. module:: toolbox
+   :platform: Unix, Windows
+   :synopsis: Theano Internals
+.. moduleauthor:: LISA
+
+Guide
+=====
+
+.. class:: Bookkeeper(object)
+
+.. class:: History(object)
+
+    .. method:: revert(env, checkpoint)
+        Reverts the graph to whatever it was at the provided
+        checkpoint (undoes all replacements).  A checkpoint at any
+        given time can be obtained using self.checkpoint().
+
+.. class:: Validator(object)
+
+.. class:: ReplaceValidate(History, Validator)
+
+    .. method:: replace_validate(env, var, new_var, reason=None)
+
+.. class:: NodeFinder(Bookkeeper)
+
+.. class:: PrintListener(object)
+
+
--- a/doc/library/tensor/basic.txt
+++ b/doc/library/tensor/basic.txt
@@ -7,20 +7,74 @@
 TensorType
 ==========

-.. class:: TensorType
+.. class:: TensorType(Type)

+    .. attribute:: broadcastable

-.. _libdoc_tensor_variable
+    .. attribute:: ndim
+
+    .. attribute:: dtype
+
+.. _libdoc_tensor_variable:

 TensorVariable
 ==============

+.. class:: _tensor_py_operators(object)

-.. _libdoc_tensor_constant
+    This mix-in class adds convenient attributes, methods, and support for Python operators.

-TensorConstant
-==============
+    .. method:: reshape(shape, ndim=None)
+
+        Returns a view of this tensor that has been reshaped as in
+        numpy.reshape.  If the shape is a Variable argument, then you might
+        need to use the optional `ndim` parameter to declare how many elements
+        the shape has, and therefore how many dimensions the reshaped Variable
+        will have.
+
+        See :func:`reshape`.
+
+    .. method:: dimshuffle(*pattern)
+
+        Returns a view of this tensor with permuted dimensions.  Typically the
+        pattern will include the integers 0, 1, ... ndim-1, and any number of
+        'x' characters in dimensions where this tensor should be broadcasted.
+
+        See :func:`dimshuffle`.
+
+    .. method:: flatten(ndim=1)
+
+        Returns a view of this tensor with `ndim` dimensions, whose shape for the first
+        `ndim-1` dimensions will be the same as `self`, and shape in the
+        remaining dimension will be expanded to fit in all the data from self.
+
+        See :func:`flatten`.
+
+    .. attribute:: T
+
+        Transpose of this tensor.
+
+        >>> x = T.zmatrix()
+        >>> y = 3+.2j * x.T 

+        .. note::
+            
+            In numpy and in Theano, the transpose of a vector is exactly the
+            same vector!  Use `reshape` or `dimshuffle` to turn your vector
+            into a row or column matrix.
+
+.. class:: TensorVariable(Variable, _tensory_py_operators)
+
+    The result of symbolic operations typically have this type.
+
+.. class:: TensorConstant(Variable, _tensory_py_operators)
+
+    Python and numpy numbers used in Theano graphs are wrapped in this type.
+
+.. class:: TensorSharedVariable(Variable, _tensory_py_operators)
+
+    This type is returned by :func:`shared` when the initial value is a numpy
+    ndarray.


 .. _libdoc_tensor_creation:
@@ -168,32 +222,27 @@ bytes, we would do:
 Shared Variable
 ---------------

-Yet another way of creating a special type of Theano variable is by using
-:func:`shared` as in the example below:
+Another way of creating a TensorVariable (a TensorSharedVariable to be
+precise) is by calling :func:`shared()`

 .. code-block:: python

-  x = shared(value, name)
+  x = shared(numpy.random.randn(3,4))

-Shared takes two parameters, `value` and `name` and creates a Theano 
-variable with the name `name` and initial value `value`. The type of this
-variable is obtained from the type of the value `value`, so if value is a
-numpy float matrix the shared variable will be of type `fmatrix`. 
+This will return a :term:`shared variable <shared variable>` whose ``.value`` is
+a numpy ndarray.  The number of dimensions and dtype of the Variable are
+inferred from the ndarray argument.

-Note that a shared variable is not like other Theano variables. For more
-details of how to use shared variables look :ref:`here <functionstateexample>` (or for more details
-:ref:`here <sharedvars>`). TODO : make the last link to a detailed
-description of shared variables.
+For additional information, see the :func:`shared` documentation.


 Autocasting
 -----------

- Theano does autocasting of numpy ndarray or python floats/ints into 
- Theano constants.
+Theano does autocasting of python floats/ints into Theano constants.

-TODO: What does (or compatible) mean?  Talk about casting rules, refer .
-TODO: link to floatX (?) 
+.. TODO: What does (or compatible) mean?  Talk about casting rules, refer .
+.. TODO: link to floatX (?) 

 .. function:: as_tensor_variable(x, ...)

@@ -211,32 +260,71 @@ Shaping and Shuffling

    Returns the symbolic shape vector of `x`

-.. function:: reshape(x)
+.. function:: reshape()
+
+.. function:: dimshuffle()

-.. function:: dimshuffle(x)
+.. function:: flatten()


 Reductions 
 ==========


-.. function:: max(x)
+.. function:: max(x,axis=None)

-    :param x:  symbolic Tensor (or compatible)
+    :Parameter: *x* - symbolic Tensor (or compatible)
+    :Parameter: *axis* - axis along which to compute the maximum
+    :Returns: the maximum value along a given axis

-    Returns TODO
+.. note::

-.. function:: min(x)
+    If axis=None, then axis is assumed to be ndim(x)-1

-    :param x:  symbolic Tensor (or compatible)
+.. function:: min(x,axis=None)

-    Returns TODO
+    :Parameter: *x* - symbolic Tensor (or compatible)
+    :Parameter: *axis* - axis along which to compute the minimum
+    :Returns: the minimum value along a given axis

-.. function:: sum(x)
+.. note::

-    :param x:  symbolic Tensor (or compatible)
+    if axis=None, then axis is assumed to be ndim(x)-1
+
+.. function:: sum(x,axis=None)
+
+    :Parameter: *x* -  symbolic Tensor (or compatible)
+    :Parameter: *axis* - axis or axes along which to compute the sum
+    :Returns: sum of *x* along *axis*
+
+    axis can be:
+     * *None* - in which case the sum is computed along all axes (like numpy)
+     * an *int* - computed along this axis
+     * a *list of ints* - computed along these axes
+
+
+.. function:: mean(x,axis=None)
+
+    :Parameter: *x* -  symbolic Tensor (or compatible)
+    :Parameter: *axis* - axis or axes along which to compute the mean
+    :Returns: mean value of *x* along *axis*
+
+    axis can be:
+     * *None* - in which case the mean is computed along all axes (like numpy)
+     * an *int* - computed along this axis
+     * a *list of ints* - computed along these axes
+
+.. function:: var(x,axis=None)
+
+    :Parameter: *x* -  symbolic Tensor (or compatible)
+    :Parameter: *axis* - axis or axes along which to compute the variance
+    :Returns: variance of *x* along *axis*
+
+    axis can be:
+     * *None* - variance computed along all axes (like numpy)
+     * an *int* - computed along this axis
+     * a *list of ints* - computed along these axes

-    Returns TODO

 Indexing
 ========
@@ -257,24 +345,37 @@ Advanced indexing.
 Operator Support
 ================

-Python arithmetic operators are supported:
+Many Python operators are supported.
+
+>>> a, b = T.itensor3(), T.itensor3() # example inputs
+
+Arithmetic
+--------------

->>> a = T.itensor3()
 >>> a + 3      # T.add(a, 3) -> itensor3
 >>> 3 - a      # T.sub(3, a)
 >>> a * 3.5    # T.mul(a, 3.5) -> ftensor3 or dtensor3 (depending on autocasting)
 >>> 2.2 / a    # T.truediv(2.2, a)
 >>> 2.2 // a   # T.intdiv(2.2, a)
 >>> 2.2**a     # T.pow(2.2, a)
+>>> b % a      # T.mod(b, a)

+Bitwise
+-------------

-.. note::
+>>> a & b      # T.and_(a,b)    bitwise and
+>>> a ^ 1      # T.xor(a,1)     bitwise xor
+>>> a | b      # T.or_(a,b)     bitwise or
+>>> ~a         # T.invert(a)    bitwise invert
+
+Inplace
+-------------

-    In-place operators are *not* supported.  Theano's graph-optimizations
-    will determine which intermediate values to use for in-place
-    computations.  If you would like to update the value of a
-    :term:`shared variable`, consider using the ``updates`` argument to
-    :func:`theano.function`.
+In-place operators are *not* supported.  Theano's graph-optimizations
+will determine which intermediate values to use for in-place
+computations.  If you would like to update the value of a
+:term:`shared variable`, consider using the ``updates`` argument to
+:func:`theano.function`.

 Elementwise
 ===========
@@ -285,11 +386,25 @@ Casting
 Comparisons
 ------------

-.. note::
+The six usual equality and inequality operators share the same interface.
+  :Parameter:  *a* - symbolic Tensor (or compatible)
+  :Parameter:  *b* - symbolic Tensor (or compatible)
+  :Return type: symbolic Tensor
+  :Returns: a symbolic tensor representing the application of the logical elementwise operator.
+
+  .. note::

    Theano has no boolean dtype.  Instead, all boolean tensors are represented
    in ``'int8'``.

+  Here is an example with the less-than operator.
+
+  .. code-block:: python 
+
+    import theano.tensor as T
+    x,y = T.dmatrices('x','y')
+    z = T.le(x,y)
+
 .. function:: lt(a, b)

    Returns a symbolic ``'int8'`` tensor representing the result of logical less-than (a<b).
@@ -304,69 +419,143 @@ Comparisons

 .. function:: le(a, b)

-    Returns a variable representing the result of logical less than or
-    equal (a<=b).
-      :Parameter:  *a* - symbolic Tensor (or compatible)
-      :Parameter:  *b* - symbolic Tensor (or compatible)
-      :Return type: symbolic Tensor
-      :Returns: a symbolic tensor representing the application of logical 
-      elementwise less than or equal.
-
-    .. code-block:: python 
+    Returns a variable representing the result of logical less than or equal (a<=b).

-        import theano.tensor as T
-        x,y = T.dmatrices('x','y')
-        z = T.le(x,y)
+    Also available using syntax ``a <= b``

 .. function:: ge(a, b)

    Returns a variable representing the result of logical greater or equal than (a>=b).
-      :Parameter:  *a* - symbolic Tensor (or compatible)
-      :Parameter:  *b* - symbolic Tensor (or compatible)
-      :Return type: symbolic Tensor
-      :Returns: a symbolic tensor representing the application of logical 
-      elementwise greater than or equal.
-
-    .. code-block:: python 

-        import theano.tensor as T
-        x,y = T.dmatrices('x','y')
-        z = T.ge(x,y)
+    Also available using syntax ``a >= b``

 .. function:: eq(a, b)

    Returns a variable representing the result of logical equality (a==b).
-      :Parameter:  *a* - symbolic Tensor (or compatible)
-      :Parameter:  *b* - symbolic Tensor (or compatible)
-      :Return type: symbolic Tensor
-      :Returns: a symbolic tensor representing the application of logical 
-      elementwise equality.

-    .. code-block:: python 
+.. function:: neq(a, b)

-        import theano.tensor as T
-        x,y = T.dmatrices('x','y')
-        z = T.eq(x,y)
+    Returns a variable representing the result of logical inequality (a!=b).

-.. function:: neq(a, b)

-    Returns a variable representing the result of logical inequality
-    (a!=b).
-      :Parameter:  *a* - symbolic Tensor (or compatible)
-      :Parameter:  *b* - symbolic Tensor (or compatible)
+Condition
+---------
+
+.. function:: switch(cond, ift, iff)
+
+    Returns a variable representing a switch between ift (iftrue) and iff (iffalse)
+     based on the condition cond.
+      :Parameter:  *cond* - symbolic Tensor (or compatible)
+      :Parameter:  *ift* - symbolic Tensor (or compatible)
+      :Parameter:  *iff* - symbolic Tensor (or compatible)
      :Return type: symbolic Tensor
-      :Returns: a symbolic tensor representing the application of logical 
-      elementwise inequality.

    .. code-block:: python 

-        import theano.tensor as T
-        x,y = T.dmatrices('x','y')
-        z = T.neq(x,y)
+      import theano.tensor as T
+      a,b = T.dmatrices('a','b')
+      x,y = T.dmatrices('x','y')
+      z = T.switch(T.lt(a,b), x, y)
+
+Bit-wise
+--------
+
+
+The bitwise operators possess this interface: 
+    :Parameter:  *a* - symbolic Tensor of integer type.
+    :Parameter:  *b* - symbolic Tensor of integer type.
+
+    .. note:: 
+
+        The bitwise operators must have an integer type as input.
+    
+        The bit-wise not (invert) takes only one parameter.
+
+    :Return type: symbolic Tensor with corresponding dtype.
+
+.. function:: and_(a, b)
+
+    Returns a variable representing the result of the bitwise and.
+
+.. function:: or_(a, b)
+
+    Returns a variable representing the result of the bitwise or.
+
+.. function:: xor(a, b)
+
+    Returns a variable representing the result of the bitwise xor.
+
+.. function:: invert(a)
+
+    Returns a variable representing the result of the bitwise not.
+
+Here is an example using the bit-wise ``and_`` via the ``&`` operator:
+
+.. code-block:: python 
+
+    import theano.tensor as T
+    x,y = T.imatrices('x','y')
+    z = x & y
+

 Mathematical
 ------------

+.. function:: abs_(a)
+
+    Returns a variable representingthe absolute of a, ie ``|a|``.
+
+    .. note:: Can also be accessed with ``abs(a)``.
+
+.. function:: exp(a)
+
+    Returns a variable representing the exponential of a, ie e^a.
+
+.. function:: neg(a)
+
+    Returns a variable representing the opposite of a, ie -a.
+
+.. function:: inv(a)
+
+    Returns a variable representing the inverse of a, ie 1.0/a.
+
+.. function:: log(a), log2(a), log10(a)
+
+    Returns a variable representing the base e, 2 or 10 logarithm of a.
+
+.. function:: sgn(a)
+
+    Returns a variable representing the sign of a.
+
+.. function:: ceil(a)
+
+    Returns a variable representing the ceiling of a (for example ceil(2.1) is 3).
+
+.. function:: floor(a)
+
+    Returns a variable representing the floor of a (for example floor(2.9) is 2).
+
+.. function:: iround(a)
+
+    Returns a variable representing the of rounding of a, ie int(a).
+
+.. function:: sqr(a)
+
+    Returns a variable representing the square of a, ie a^2.
+
+.. function:: sqrt(a)
+
+    Returns a variable representing the of a, ie a^0.5.
+
+.. function:: cos(a), sin(a), tan(a)
+
+    Returns a variable representing the trigonometric functions of a (cosine, sine and tangent).
+
+.. function:: cosh(a), sinh(a), tanh(a)
+
+    Returns a variable representing the hyperbolic trigonometric functions of a (hyperbolic cosine, sine and tangent).
+
+
 .. _libdoc_tensor_broadcastable:

 Broadcasting in Theano vs. Numpy
@@ -402,8 +591,6 @@ information is given in the :ref:`type` of a *Variable*.

 See also:

-* :ref:`How broadcasting is used in Theano's tensor types <tensortypes>`
-
 * `SciPy documentation about numpy's broadcasting <http://www.scipy.org/EricsBroadcastingDoc>`_

 * `OnLamp article about numpy's broadcasting <http://www.onlamp.com/pub/a/python/2000/09/27/numerically.html>`_
@@ -449,11 +636,6 @@ Linear Algebra



-Fourier Transforms
-==================
-
-[James has some code for this, but hasn't gotten it into the source tree yet.]
-
 Gradient / Differentiation
 ==========================


--- a/doc/library/tensor/index.txt
+++ b/doc/library/tensor/index.txt
 .. _libdoc_tensor:
+
 ==================================================
 :mod:`tensor`  -- Types and Ops for Symbolic numpy
 ==================================================

--- a/doc/library/tensor/shared_randomstreams.txt
+++ b/doc/library/tensor/shared_randomstreams.txt
@@ -109,9 +109,10 @@ Reference
    .. method:: updates()

        :returns: a list of all the (state, new_state) update pairs from the
-        random variables it has returned.  This can be a convenient shortcut
-        to enumerating all the random variables in a large graph in the
-        ``update`` paramter of function.
+          random variables it has returned.  
+          
+        This can be a convenient shortcut to enumerating all the random
+        variables in a large graph in the ``update`` parameter of function.

    .. method:: seed(meta_seed)


--- a/doc/library/tensor/signal.txt
+++ b/doc/library/tensor/signal.txt
@@ -17,3 +17,6 @@ TODO: Give examples for how to use these things! They are pretty complicated.
 .. function:: downsample2D(*todo)

 .. function:: fft(*todo)
+
+    [James has some code for this, but hasn't gotten it into the source tree yet.]
+
--- a/doc/tutorial/adding.txt
+++ b/doc/tutorial/adding.txt
@@ -110,7 +110,7 @@ and giving ``z`` as output:

 >>> f = function([x, y], z)

-The first argument to :ref:`function <libdoc_compile_function>` is a list of Variables
+The first argument to :func:`function <function.function>` is a list of Variables
 that will be provided as inputs to the function. The second argument
 is a single Variable *or* a list of Variables. For either case, the second
 argument is what we want to see as output when we apply the function.

--- a/doc/tutorial/apply.png
+++ b/doc/tutorial/apply.png
--- a/doc/tutorial/apply.svg
+++ b/doc/tutorial/apply.svg
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://web.resource.org/cc/"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="298.12207"
+   height="286.03781"
+   id="svg2"
+   sodipodi:version="0.32"
+   inkscape:version="0.45.1"
+   sodipodi:docbase="/home/olivier/hg/theano"
+   sodipodi:docname="apply1_2.svg"
+   inkscape:output_extension="org.inkscape.output.svg.inkscape"
+   version="1.0"
+   inkscape:export-filename="/home/olivier/hg/theano/apply.png"
+   inkscape:export-xdpi="120.76"
+   inkscape:export-ydpi="120.76">
+  <defs
+     id="defs4">
+    <marker
+       inkscape:stockid="Dot_m"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Dot_m"
+       style="overflow:visible">
+      <path
+         id="path4340"
+         d="M -2.5,-1.0 C -2.5,1.7600000 -4.7400000,4.0 -7.5,4.0 C -10.260000,4.0 -12.5,1.7600000 -12.5,-1.0 C -12.5,-3.7600000 -10.260000,-6.0 -7.5,-6.0 C -4.7400000,-6.0 -2.5,-3.7600000 -2.5,-1.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;marker-end:none"
+         transform="scale(0.4) translate(7.4, 1)" />
+    </marker>
+    <marker
+       inkscape:stockid="Dot_l"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Dot_l"
+       style="overflow:visible">
+      <path
+         id="path4337"
+         d="M -2.5,-1.0 C -2.5,1.7600000 -4.7400000,4.0 -7.5,4.0 C -10.260000,4.0 -12.5,1.7600000 -12.5,-1.0 C -12.5,-3.7600000 -10.260000,-6.0 -7.5,-6.0 C -4.7400000,-6.0 -2.5,-3.7600000 -2.5,-1.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;marker-end:none"
+         transform="scale(0.8) translate(7.4, 1)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Lstart"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Lstart"
+       style="overflow:visible">
+      <path
+         id="path4293"
+         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.97309,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+         transform="matrix(1.1,0,0,1.1,1.1,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow1Lstart"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow1Lstart"
+       style="overflow:visible">
+      <path
+         id="path4275"
+         d="M 0,0 L 5,-5 L -12.5,0 L 5,5 L 0,0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
+         transform="matrix(0.8,0,0,0.8,10,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow1Lend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow1Lend"
+       style="overflow:visible">
+      <path
+         id="path4278"
+         d="M 0,0 L 5,-5 L -12.5,0 L 5,5 L 0,0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
+         transform="matrix(-0.8,0,0,-0.8,-10,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow1Mend"
+       style="overflow:visible">
+      <path
+         id="path4284"
+         d="M 0,0 L 5,-5 L -12.5,0 L 5,5 L 0,0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
+         transform="matrix(-0.4,0,0,-0.4,-4,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow1Mstart"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow1Mstart"
+       style="overflow:visible">
+      <path
+         id="path4281"
+         d="M 0,0 L 5,-5 L -12.5,0 L 5,5 L 0,0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
+         transform="matrix(0.4,0,0,0.4,4,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     gridtolerance="10000"
+     guidetolerance="10"
+     objecttolerance="10"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="1.4142136"
+     inkscape:cx="255.9959"
+     inkscape:cy="191.5532"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer1"
+     inkscape:window-width="1680"
+     inkscape:window-height="1030"
+     inkscape:window-x="0"
+     inkscape:window-y="0" />
+  <metadata
+     id="metadata7">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(-240.86593,-100.33369)">
+    <rect
+       style="fill:#5599ff;fill-rule:evenodd;stroke:#000000;stroke-width:0.77611327;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect2184"
+       width="149.22389"
+       height="54.223885"
+       x="339.93231"
+       y="238.6571" />
+    <text
+       xml:space="preserve"
+       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       x="388.49954"
+       y="269.95786"
+       id="text4470"><tspan
+         sodipodi:role="line"
+         id="tspan4472"
+         x="388.49954"
+         y="269.95786"
+         style="font-size:18px;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;font-family:Courier 10 Pitch">Apply</tspan></text>
+    <rect
+       style="fill:#5599ff;fill-rule:evenodd;stroke:#000000;stroke-width:0.77780414;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect4492"
+       width="149.2222"
+       height="15.222196"
+       x="339.9332"
+       y="222.61548" />
+    <rect
+       style="fill:#5599ff;fill-rule:evenodd;stroke:#000000;stroke-width:0.77780414;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect4483"
+       width="149.2222"
+       height="15.222196"
+       x="339.91492"
+       y="293.67752" />
+    <g
+       id="g8467"
+       transform="translate(110.2947,-116.37302)"
+       style="stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none">
+      <path
+         transform="matrix(0.90677,0,0,1.0746904,0.5745625,-32.177075)"
+         d="M 187.15544 385.49426 A 21.620844 18.242588 0 1 1  143.91375,385.49426 A 21.620844 18.242588 0 1 1  187.15544 385.49426 z"
+         sodipodi:ry="18.242588"
+         sodipodi:rx="21.620844"
+         sodipodi:cy="385.49426"
+         sodipodi:cx="165.53459"
+         id="path8457"
+         style="fill:#87deaa;fill-opacity:1;stroke:#000000;stroke-width:1.01300073;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         sodipodi:type="arc" />
+      <text
+         id="text8459"
+         y="394.30408"
+         x="151.00809"
+         style="font-size:38.98389435px;font-style:normal;font-weight:normal;text-align:center;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+         xml:space="preserve"><tspan
+           y="394.30408"
+           x="151.00809"
+           id="tspan8461"
+           sodipodi:role="line">+</tspan></text>
+    </g>
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;marker-start:url(#Dot_m);marker-end:url(#Arrow1Lend);stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline"
+       d="M 347.13075,266.30169 L 280.97106,265.86799"
+       id="path8463"
+       inkscape:connector-type="polyline"
+       inkscape:connection-end="#g8467" />
+    <text
+       xml:space="preserve"
+       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       x="308.77267"
+       y="259.82309"
+       id="text9514"><tspan
+         sodipodi:role="line"
+         id="tspan9516"
+         x="308.77267"
+         y="259.82309">op</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       x="424.25516"
+       y="333.46909"
+       id="text9518"><tspan
+         sodipodi:role="line"
+         id="tspan9520"
+         x="424.25516"
+         y="333.46909">owner</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       x="493.24387"
+       y="234.14835"
+       id="text9522"><tspan
+         sodipodi:role="line"
+         id="tspan9524"
+         x="493.24387"
+         y="234.14835">inputs</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       x="493.71262"
+       y="305.09174"
+       id="text9526"><tspan
+         sodipodi:role="line"
+         id="tspan9528"
+         x="493.71262"
+         y="305.09174">outputs</tspan></text>
+    <rect
+       ry="11.309009"
+       y="159.52519"
+       x="349.08124"
+       height="37.13615"
+       width="46.799088"
+       id="rect19190"
+       style="fill:#b35b4f;fill-opacity:1;stroke:#000000;stroke-width:0.79999995;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+    <text
+       xml:space="preserve"
+       style="font-size:23.51551628px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       x="366.89929"
+       y="185.69189"
+       id="text19200"><tspan
+         sodipodi:role="line"
+         id="tspan19202"
+         x="366.89929"
+         y="185.69189"
+         style="font-size:23.51551628px;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none">x</tspan></text>
+    <rect
+       ry="7.3372421"
+       rx="7.3372421"
+       y="134.86392"
+       x="300.77298"
+       height="19.022192"
+       width="58.017704"
+       id="rect19204"
+       style="fill:#c88fd5;fill-opacity:1;stroke:#000000;stroke-width:0.56307727;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+    <text
+       id="text19206"
+       y="148.37491"
+       x="308.72882"
+       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       xml:space="preserve"><tspan
+         y="148.37491"
+         x="308.72882"
+         id="tspan19208"
+         sodipodi:role="line">matrix</tspan></text>
+    <path
+       id="path19210"
+       d="M 353.53683,179.12906 L 329.27775,179.12906 L 329.27775,154.10602"
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;marker-end:url(#Arrow1Lend);stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-start:url(#Dot_m)" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;marker-end:url(#Arrow1Lend);stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-start:url(#Dot_m)"
+       d="M 372.64527,162.11475 C 372.64527,115.4457 372.64527,115.4457 372.64527,115.4457"
+       id="path19212" />
+    <text
+       xml:space="preserve"
+       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       x="356.38177"
+       y="109.08174"
+       id="text19214"><tspan
+         sodipodi:role="line"
+         id="tspan19216"
+         x="356.38177"
+         y="109.08174">None</tspan></text>
+    <rect
+       style="fill:#b35b4f;fill-opacity:1;stroke:#000000;stroke-width:0.79999995;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect19218"
+       width="46.799088"
+       height="37.13615"
+       x="431.08124"
+       y="159.52519"
+       ry="11.309009" />
+    <text
+       id="text19224"
+       y="185.69189"
+       x="448.89929"
+       style="font-size:23.51551628px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       xml:space="preserve"><tspan
+         style="font-size:23.51551628px;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none"
+         y="185.69189"
+         x="448.89929"
+         id="tspan19226"
+         sodipodi:role="line">y</tspan></text>
+    <rect
+       style="fill:#c88fd5;fill-opacity:1;stroke:#000000;stroke-width:0.56307727;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect19228"
+       width="58.017704"
+       height="19.022192"
+       x="382.77298"
+       y="134.86392"
+       rx="7.3372421"
+       ry="7.3372421" />
+    <text
+       xml:space="preserve"
+       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       x="390.72882"
+       y="148.37491"
+       id="text19230"><tspan
+         sodipodi:role="line"
+         id="tspan19232"
+         x="390.72882"
+         y="148.37491">matrix</tspan></text>
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;marker-end:url(#Arrow1Lend);stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-start:url(#Dot_m)"
+       d="M 435.53683,179.12906 L 411.27775,179.12906 L 411.27775,154.10602"
+       id="path19234" />
+    <path
+       id="path19236"
+       d="M 454.64527,162.11475 C 454.64527,115.4457 454.64527,115.4457 454.64527,115.4457"
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;marker-end:url(#Arrow1Lend);stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-start:url(#Dot_m)" />
+    <text
+       id="text19238"
+       y="109.08174"
+       x="438.38177"
+       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       xml:space="preserve"><tspan
+         y="109.08174"
+         x="438.38177"
+         id="tspan19240"
+         sodipodi:role="line">None</tspan></text>
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;marker-start:url(#Dot_m);marker-end:url(#Arrow1Lend);stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="M 372.86394,230.00276 L 372.66107,197.10627"
+       id="path19242"
+       inkscape:connector-type="polyline" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;marker-start:url(#Dot_m);marker-end:url(#Arrow1Lend);stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="M 455.78696,230.00276 L 454.98829,197.10627"
+       id="path19244"
+       inkscape:connector-type="polyline" />
+    <rect
+       ry="11.309009"
+       y="348.83536"
+       x="393.08124"
+       height="37.13615"
+       width="46.799088"
+       id="rect19246"
+       style="fill:#b35b4f;fill-opacity:1;stroke:#000000;stroke-width:0.79999995;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+    <text
+       xml:space="preserve"
+       style="font-size:23.51551628px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       x="410.89929"
+       y="375.00204"
+       id="text19252"><tspan
+         sodipodi:role="line"
+         id="tspan19254"
+         x="410.89929"
+         y="375.00204"
+         style="font-size:23.51551628px;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none">z</tspan></text>
+    <rect
+       ry="7.3372421"
+       rx="7.3372421"
+       y="324.17407"
+       x="344.77298"
+       height="19.022192"
+       width="58.017704"
+       id="rect19256"
+       style="fill:#c88fd5;fill-opacity:1;stroke:#000000;stroke-width:0.56307727;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+    <text
+       id="text19258"
+       y="337.68506"
+       x="352.72882"
+       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       xml:space="preserve"><tspan
+         y="337.68506"
+         x="352.72882"
+         id="tspan19260"
+         sodipodi:role="line">matrix</tspan></text>
+    <path
+       id="path19262"
+       d="M 397.53683,368.4392 L 373.27775,368.4392 L 373.27775,343.41616"
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;marker-end:url(#Arrow1Lend);stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-start:url(#Dot_m)" />
+    <text
+       id="text19274"
+       y="147.46909"
+       x="460.25516"
+       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       xml:space="preserve"><tspan
+         y="147.46909"
+         x="460.25516"
+         id="tspan19276"
+         sodipodi:role="line">owner</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;font-family:Bitstream Vera Sans"
+       x="296.25516"
+       y="173.46909"
+       id="text19278"><tspan
+         sodipodi:role="line"
+         id="tspan19280"
+         x="296.25516"
+         y="173.46909">type</tspan></text>
+    <path
+       inkscape:connector-type="polyline"
+       id="path21431"
+       d="M 418.00152,353.75266 L 416.8028,304.37924"
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;marker-start:url(#Dot_m);marker-end:url(#Arrow1Lend);stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;marker-start:url(#Arrow1Lstart);marker-end:url(#Dot_m);stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="M 418.00152,351.75266 L 416.8028,302.37924"
+       id="path21433"
+       inkscape:connector-type="polyline" />
+  </g>
+</svg>
--- a/doc/tutorial/debug_faq.txt
+++ b/doc/tutorial/debug_faq.txt
@@ -97,3 +97,88 @@ Use your imagination :)
 This can be a really powerful debugging tool.
 Note the call to ``fn`` inside the call to ``print_eval``; without it, the graph wouldn't get computed at all!

+How to use pdb ?
+----------------
+
+In the majority of cases, you won't be executing from the interactive shell
+but from a set of Python scripts. In such cases, the use of the Python
+debugger can come in handy, especially as your models become more complex.
+Intermediate results don't necessarily have a clear name and you can get
+exceptions which are hard to decipher, due to the "compiled" nature of
+functions.
+
+Consider this example script ("ex.py"):
+
+.. code-block:: python
+
+        import theano
+        import numpy
+        import theano.tensor as T
+
+        a = T.dmatrix('a')
+        b = T.dmatrix('b')
+
+        f = theano.function([a,b], [a*b])
+
+        # matrices chosen so dimensions are unsuitable for multiplication
+        mat1 = numpy.arange(12).reshape((3,4))
+        mat2 = numpy.arange(25).reshape((5,5))
+
+        f(mat1, mat2)
+
+This is actually so simple the debugging could be done easily, but it's for
+illustrative purposes. As the matrices can't be element-wise multiplied
+(unsuitable shapes), we get the following exception:
+
+.. code-block:: text
+
+    File "ex.py", line 14, in <module>
+      f(mat1, mat2)
+    File "/u/username/Theano/theano/compile/function_module.py", line 451, in __call__
+    File "/u/username/Theano/theano/gof/link.py", line 271, in streamline_default_f
+    File "/u/username/Theano/theano/gof/link.py", line 267, in streamline_default_f
+    File "/u/username/Theano/theano/gof/cc.py", line 1049, in execute ValueError: ('Input dimension mis-match. (input[0].shape[0] = 3, input[1].shape[0] = 5)', Elemwise{mul,no_inplace}(a, b), Elemwise{mul,no_inplace}(a, b))
+
+The call stack contains a few useful informations to trace back the source
+of the error. There's the script where the compiled function was called --
+but if you're using (improperly parameterized) prebuilt modules, the error
+might originate from ops in these modules, not this script. The last line
+tells us about the Op that caused the exception. In thise case it's a "mul"
+involving Variables name "a" and "b". But suppose we instead had an
+intermediate result to which we hadn't given a name.
+
+After learning a few things about the graph structure in Theano, we can use
+the Python debugger to explore the graph, and then we can get runtime
+information about the error. Matrix dimensions, especially, are useful to
+pinpoint the source of the error. In the printout, there are also 2 of the 4
+dimensions of the matrices involved, but for the sake of example say we'd
+need the other dimensions to pinpoint the error. First, we re-launch with
+the debugger module and run the program with "c":
+
+.. code-block:: text
+
+    python -m pdb ex.py
+    > /u/username/experiments/doctmp1/ex.py(1)<module>()
+    -> import theano
+    (Pdb) c
+
+Then we get back the above error printout, but the interpreter breaks in
+that state. Useful commands here are
+
+* "up" and "down" (to move up and down the call stack),
+* "l" (to print code around the line in the current stack position),
+* "p variable_name" (to print the string representation of 'variable_name'),
+* "p dir(object_name)", using the Python dir() function to print the list of an object's members
+
+Here, for example, I do "up", and a simple "l" shows me there's a local
+variable "node". This is the "node" from the computation graph, so by
+following the "node.inputs", "node.owner" and "node.outputs" links I can
+explore around the graph.
+
+That graph is purely symbolic (no data, just symbols to manipulate it
+abstractly). To get information about the actual parameters, you explore the
+"thunks" objects, which bind the storage for the inputs (and outputs) with
+the function itself (a "thunk" is a concept related to closures). Here, to
+get the current node's first input's shape, you'd therefore do "p
+thunk.inputs[0][0].shape", which prints out "(3, 4)".
+
--- a/doc/tutorial/examples.txt
+++ b/doc/tutorial/examples.txt
@@ -197,8 +197,8 @@ array(33.0)

 .. _functionstateexample:

-Including values in a symbolic graph
-====================================
+Using shared variables
+======================

 It is also possible to make a function with an internal state. For
 example, let's say we want to make an accumulator: at the beginning,
@@ -214,7 +214,7 @@ internal state, and returns the old state value.
 >>> accumulator = function([inc], state, updates=[(state, state+inc)])

 This code introduces a few new concepts.  The ``shared`` function constructs
-so-called *shared variables*.  These are hybrid symbolic and non-symbolic
+so-called :term:shared variables:.  These are hybrid symbolic and non-symbolic
 variables.  Shared variables can be used in symbolic expressions just like
 the objects returned by ``dmatrices(...)`` but they also have a ``.value``
 property that defines the value taken by this symbolic variable in *all* the
@@ -268,8 +268,8 @@ updates).  Also, theano has more control over where and how shared variables are
 allocated, which is one of the important elements of getting good performance
 on the GPU.

-It may happen that you have constructed a symbolic graph on top of a
-shared variable, but you do *not* want to use its value. In this case, you can use the
+It may happen that you expressed some formula using a shared variable, but 
+you do *not* want to use its value. In this case, you can use the
 ``givens`` parameter of ``function`` which replaces a particular node in a graph
 for the purpose of one particular function.

@@ -290,5 +290,94 @@ substitution to be co-dependent, the order of substitution is not defined, so
 the substitutions have to work in any order.


+Using Random Numbers
+====================
+
+Because in Theano you first express everything symbolically and
+afterwards compile this expression to get functions, 
+using pseudo-random numbers is not as straightforward as it is in 
+numpy, though also not to complicated. 
+
+The way to think about putting randomness into Theano's computations is 
+to put random variables in your graph. Theano will allocate a numpy 
+RandomStream object (a random number generator) for each such 
+variable, and draw from it as necessary. I'll call this sort of 
+sequence of random numbers a *random stream*. *Random streams* are at 
+their core shared variables, so the observations on shared variables
+hold here as well.
+
+Brief example
+-------------
+
+Here's a brief example.  The setup code is:
+
+.. code-block:: python
+
+    from theano.tensor.shared_randomstreams import RandomStreams
+    srng = RandomStreams(seed=234)
+    rv_u = srng.uniform((2,2))
+    rv_n = srng.normal((2,2))
+    f = function([], rv_u, updates=[rv_u.update])
+    g = function([], rv_n)                              #omitting rv_n.update
+    nearly_zeros = function([], rv_u + rv_u - 2 * rv_u, updates=[rv_u.update])
+
+Here, 'rv_u' represents a random stream of 2x2 matrices of draws from a uniform
+distribution.  Likewise,  'rv_n' represenents a random stream of 2x2 matrices of
+draws from a normal distribution.  The distributions that are implemented are
+defined in :class:`RandomStreams`.
+
+Now let's use these things.  If we call f(), we get random uniform numbers.
+Since we are updating the internal state of the random number generator (via
+the ``updates`` argument), we get different random numbers every time.
+
+>>> f_val0 = f()
+>>> f_val1 = f()  #different numbers from f_val0
+
+When we omit the updates argument (as in ``g``) to ``function``, then the
+random number generator state is not affected by calling the returned function.  So for example, 
+calling ``g`` multiple times will return the same numbers.
+
+>>> g_val0 = g()  # different numbers from f_val0 and f_val1
+>>> g_val0 = g()  # same numbers as g_val0 !!!
+
+An important remark is that a random variable is drawn at most once during any
+single function execution.  So the ``nearly_zeros`` function is guaranteed to
+return approximately 0 (except for rounding error) even though the ``rv_u``
+random variable appears three times in the output expression.
+
+>>> nearly_zeros = function([], rv_u + rv_u - 2 * rv_u, updates=[rv_u.update])
+
+Seedings Streams
+----------------
+
+Random variables can be seeded individually or collectively.
+
+You can seed just one random variable by seeding or assigning to the
+``.rng.value`` attribute.
+
+>>> rv_u.rng.value.seed(89234)  # seeds the generator for rv_u
+
+You can also seed *all* of the random variables allocated by a :class:`RandomStreams`
+object by that object's ``seed`` method.  This seed will be used to seed a
+temporary random number generator, that will in turn generate seeds for each
+of the random variables.
+
+>>> srng.seed(902340)  # seeds rv_u and rv_n with different seeds each
+
+Sharing Streams between Functions
+---------------------------------
+
+As usual for shared variables, the random number generators used for random
+variables are common between functions.  So our ``nearly_zeros`` function will
+update the state of the generators used in function ``f`` above.
+
+For example:
+
+>>> state_after_v0 = rv_u.rng.value.get_state()
+>>> nearly_zeros()       # this affects rv_u's generator
+>>> v1 = f()             
+>>> rv_u.rng.value.set_state(state_after_v0)
+>>> v2 = f()             # v2 != v1
+


--- a/doc/tutorial/index.txt
+++ b/doc/tutorial/index.txt
@@ -18,12 +18,16 @@ of Theano. Let's import that subpackage under a handy name. I like
 If that worked you're ready for the tutorial, otherwise check your
 installation (see :ref:`install`).

+Throughout the tutorial, bear in mind that there is a :ref:`glossary` to help
+you out.
+
 .. toctree::

    numpy
    adding
    examples
    loading_and_saving
+    symbolic_graphs
    modes
    remarks
    debug_faq

--- a/doc/tutorial/loading_and_saving.txt
+++ b/doc/tutorial/loading_and_saving.txt

-.. tutorial_loadsave:
+.. _tutorial_loadsave:
+
 ==================
 Loading and Saving
 ==================

--- a/doc/tutorial/modes.txt
+++ b/doc/tutorial/modes.txt
@@ -8,7 +8,7 @@ Using different compiling modes
 Mode
 ====

-Everytime :ref:`theano.function <libdoc_compile_function>` is called
+Everytime :func:`theano.function <function.function>` is called
 the symbolic relationships between the input and output Theano *variables* 
 are optimized and compiled. The way this compilation occurs 
 is controlled by the value of the ``mode`` parameter.
@@ -25,7 +25,7 @@ The default mode is typically ``FAST_RUN``, but it can be controlled via
 the environment variable ``THEANO_DEFAULT_MODE``, which can in turn be
 overridden by setting `theano.compile.mode.default_mode` directly,
 which can in turn be overridden by passing the keyword argument to
-:ref:`theano.function <libdoc_compile_function>`.
+:func:`theano.function <function.function>`.

 ================= =============================================================== ===============================================================================
 short name        Full constructor                                                What does it do?
@@ -91,7 +91,7 @@ ProfileMode

 Beside checking for errors, another important task is to profile your 
 code. For this Theano uses a special mode called ProfileMode which has 
-to be passed as an argument to :ref:`theano.function <libdoc_compile_function>`. Using the ProfileMode is a three-step process.
+to be passed as an argument to :func:`theano.function <function.function>`. Using the ProfileMode is a three-step process.


 Creating a ProfileMode Instance

--- a/doc/tutorial/symbolic_graphs.txt
+++ b/doc/tutorial/symbolic_graphs.txt
+
+.. _tutorial_graphstructures:
+
+================
+Graph Structures
+================
+
+Debugging or profiling code written in Theano is not that simple if you
+do not know what goes on under the hood. This chapter is meant to
+introduce you to a required minimum of the inner workings of Theano, 
+for more details see :ref:`extending`.
+
+The first step in writing Theano code is to write down all mathematical 
+relations using symbolic placeholders (**variables**). When writing down 
+these expressions you use operations like ``+``, ``-``, ``**``,
+``sum()``, ``tanh()``. All these are represented internally as **ops**. 
+An **op** represents a certain computation on some type of inputs
+producing some type of output. You can see it as a function definition
+in most programming languages. 
+
+Theano builds internally a graph structure composed of interconnected 
+**variable** nodes, **op** nodes and **apply** nodes. An 
+**apply** node represents the application of an **op** to some 
+**variables**. It is important to make the difference between the
+definition of a computation represented by an **op** and its application
+to some actual data which is represented by the **apply** node. For more
+details about these building blocks see :ref:`variable`, :ref:`op`, 
+:ref:`apply`. A graph example is the following:
+
+
+**Code**
+
+.. code-block:: python
+
+    x = T.dmatrix('x')
+    y = T.dmatrix('y')
+    z = x + y
+
+**Diagram**
+
+.. figure:: apply.png 
+    :align: center
+
+
+Arrows represent references to the Python objects pointed at. The blue
+box is an :ref:`apply` node. Red boxes are :ref:`variable` nodes. Green
+circles are :ref:`Ops <op>`. Purple boxes are :ref:`Types <type>`.
+
+
+The graph can be traversed starting from outputs (the result of some
+computation) down to its inputs using the owner field.
+Take for example the following code:
+
+.. code-block:: python
+
+    x = T.dmatrix('x')
+    y = x*2.
+
+If you print `type(y.owner)`` you get ``<class 'theano.gof.graph.Apply'>``, 
+which is the apply node that connects the op and the inputs to get this
+output. You can now print the name of the op that is applied to get 
+``y``:
+
+>>> y.owner.op.name
+'Elemwise{mul,no_inplace}'
+
+So a elementwise multiplication is used to compute ``y``. This
+muliplication is done between the inputs
+
+>>> len(y.owner.inputs)
+2
+>>> y.owner.inputs[0]
+x
+>>> y.owner.inputs[1]
+InplaceDimShuffle{x,x}.0
+
+Note that the second input is not 2 as we would have expected. This is 
+because 2 was first :term:`broadcasted <broadcasting>` to a matrix of 
+same shape as x. This is done by using the op ``DimShuffle`` :
+
+>>> type(y.owner.inputs[1])
+<class 'theano.tensor.basic.TensorVariable'>
+>>> type(y.owner.inputs[1].owner)
+<class 'theano.gof.graph.Apply'>
+>>> y.owner.inputs[1].owner.op
+<class 'theano.tensor.elemwise.DimShuffle object at 0x14675f0'>
+>>> y.owner.inputs[1].owner.inputs
+[2.0]
+
+
+Starting from this graph structure is easy to understand how 
+*automatic differentiation* is done, or how the symbolic relations
+can be optimized for performance or stability.
+
+
+Automatic Differentiation
+=========================
+
+Having the graph structure, computing automatic differentiation is
+simple. The only thing :func:`tensor.grad` has to do is to traverse the
+graph from the outputs back towards the inputs through all :ref:`apply`
+nodes ( :ref:`apply` nodes are those who define what computations the
+graph does). For each such :ref:`apply` node, its  :ref:`op` defines 
+how to compute the gradient of the node's outputs with respect to its
+inputs. Note that if an :ref:`op` does not provide this information, 
+it is assumed that the gradient does not defined.
+Using the 
+`chain rule <http://en.wikipedia.org/wiki/Chain_rile>`_ 
+these gradients can be composed in order to obtain the expression of the 
+gradient of the graph's output with respect to the graph's inputs .
+
+
+Optimizations
+=============
+
+When compiling a Theano function, what you give to the
+:func:`theano.function <function.function>` is actually a graph
+(starting from the outputs variables you can traverse the graph up to
+the input variables). While this graph structure shows how to compute
+the output from the input, it also offers the posibility to improve the  
+the way this computation is carried out. The way optimizations work in 
+Theano is by indentifying and replacing certain patterns in the graph 
+with other specialized patterns that produce the same results but are either 
+faster or more stable. Optimizations can also detect 
+identical subgraphs and ensure that the same values are not computed
+twice or reformulate parts of the graph to a GPU specific version.
+
+For example, one (simple) optimization that Theano uses is to replace 
+the pattern :math:`\frac{xy}{y}` by :math:`x`.
--- a/doc/tutorial/tools.txt
+++ b/doc/tutorial/tools.txt
-
-=============================
-Basic Tutorial Mini-Reference
-=============================
-
-      .. miniref_mode:
-
-Mode
-====
-
-================= =============================================================== ===============================================================================
-short name        Full constructor                                                What does it do?
-================= =============================================================== ===============================================================================
-(default)         ``compile.mode.Mode(linker='py', optimizer=None)``              Python implementations with zero graph modifications.
-FAST_COMPILE      ``compile.mode.Mode(linker='c|py', optimizer='fast_compile')``  C implementations where available, quick and cheap graph transformations
-FAST_RUN          ``compile.mode.Mode(linker='c|py', optimizer='fast_run')``      C implementations where available, all available graph transformations.
-DEBUG_MODE        ``compile.debugmode.DebugMode()``                               Both implementations where available, all available graph transformations.
-================= =============================================================== ===============================================================================
-
-      .. _tensortypes:
-
-Types
-=====
-
-      .. _predefinedtypes:
-
-Predefined types
----------------
-
-Predefined types are
-located in the :ref:`theano.tensor <libdoc_tensor>` package. The name of the types follow
-a recipe:
-
-``<dtype><dimensionality>``
-
-Where ``<dtype>`` is one of:
-
-==== ======== ============== ====
-code type     domain         bits
-==== ======== ============== ====
-b    byte     signed integer 8
-w    word     signed integer 16
-i    integer  signed integer 32
-l    long     signed integer 64
-f    float    floating point 32
-d    double   floating point 64
-==== ======== ============== ====
-
-Dimensionality is one of:
-
-
-So, if you want a row of 32-bit floats, it is available
-as :ref:`theano.tensor.frow <libdoc_tensor_type>`.
-If you want a matrix of unsigned 32-bit integers it is available as
-:ref:`theano.tensor.imatrix <libdoc_tensor_type>`.
-
-Each of the types described above can be constructed by two methods:
-a singular version (e.g., :ref:`dmatrix <libdoc_tensor_creation>`)
-and a plural version (:ref:`dmatrices <libdoc_tensor_creation>`).
-When called, the singular version takes a single
-argument which is the name of the *Variable* we want to make and it
-makes a single Variable of that type. The plural version can either take
-an integer or several strings. If an integer is provided, the method
-will return that many Variables and if strings are provided, it will
-create one Variable for each string, using the string as the Variable's
-name. For example:
-
-.. code-block:: python
-
-   from theano.tensor import *
-
-   x = dmatrix() # creates one Variable with no name
-   x = dmatrix('x') # creates one Variable with name 'x'
-   xyz = dmatrix('xyz') # creates one Variable with name 'xyz'
-
-   x, y, z = dmatrices(3) # creates three Variables with no names
-   x, y, z = dmatrices('x', 'y', 'z') # creates three Variables named 'x', 'y' and 'z'
-
-
-Custom tensor types
-------------------
-
-If you wish to use a type of tensor which is not already available here
-(for example, a 3D tensor) you can build an appropriate type using
-:ref:`theano.tensor.TensorType <libdoc_tensor_type>`.
-The first argument you pass is the `dtype` and the second is the
-`broadcastable pattern`.
-
-Where `dtype` is one of:
-
-=========== ================ =================
-dtype       domain           bits
-=========== ================ =================
-int8        signed integer   8
-int16       signed integer   16
-int32       signed integer   32
-int64       signed integer   64
-uint8       unsigned integer 8
-uint16      unsigned integer 16
-uint32      unsigned integer 32
-uint64      unsigned integer 64
-float32     floating point   32
-float64     floating point   64
-complex64   complex          64 (two float32)
-complex128  complex          128 (two float64)
-=========== ================ =================
-
-.. note::
-
-   Even though :ref:`theano.tensor <libdoc_tensor>` does not define any type
-   using ``complex`` dtypes (``complex64`` or ``complex128``),
-   you can define them explicitly with
-   :ref:`TensorType <libdoc_tensor_type>` (see example
-   below). However, few operations are fully supported for complex
-   types: as of version 0.1, only elementary operations (``+-*/``)
-   have C implementations. Additionally, complex types have received
-   little testing.
-
-
-The broadcastable pattern indicates both the number of dimensions and
-whether a particular dimension must have length 1.
-Here is a table mapping the :ref:`broadcastable <libdoc_tensor_broadcastable>` pattern to what kind of tensor it encodes:
-
-===================== =================================
-pattern               interpretation
-===================== =================================
-[]                    scalar
-[True]                1D scalar (vector of length 1)
-[True, True]          2D scalar (1x1 matrix)
-[False]               vector
-[False, False]        matrix
-[False] * n           nD tensor
-[True, False]         row (1xN matrix)
-[False, True]         column (Mx1 matrix)
-[False, True, False]  A Mx1xP tensor (a)
-[True, False, False]  A 1xNxP tensor (b)
-[False, False, False] A MxNxP tensor (pattern of a + b)
-===================== =================================
-
-For dimensions in which broadcasting is False, the length of this
-dimension can be 1 or more.  For dimensions in which broadcasting is True,
-the length of this dimension must be 1.
-
-When two tensors have a different number of dimensions, the broadcastable
-pattern is *expanded to the left*, by padding with ``True``. For example,
-a vector's pattern, ``[False]``, could be expanded to ``[True, False]``, and
-would behave like a row (1xN matrix). In the same way, a matrix (``[False,
-False]``) would behave like a 1xNxP tensor (``[True, False, False]``).
-
-If we wanted to create a type representing a 3D array of unsigned
-bytes, we would do:
-
-.. code-block:: python
-
-   # 3D tensor of signed bytes
-   mytype = theano.tensor.TensorType('uint8', [False]*3)
-
-   # complex types (based on complex64)
-   my_cscalar = theano.tensor.TensorType('complex64', [])
-   my_cmatrix = theano.tensor.TensorType('complex64', [False, False])
-
-
--- a/theano/__init__.py
+++ b/theano/__init__.py
@@ -60,8 +60,8 @@ import scalar
 import sparse
 import gradient
 import gof
-import floatx
-floatx.set_floatX()
+import floatX
+floatX.set_floatX()

 ## import scalar_opt


--- a/theano/floatx.py
+++ b/theano/floatx.py
--- a/theano/gof/compilelock.py
+++ b/theano/gof/compilelock.py
@@ -150,6 +150,10 @@ def lock(tmp_dir, timeout=120, min_wait=5, max_wait=10, verbosity=1):
            while os.path.isdir(tmp_dir):
                try:
                    read_owner = open(lock_file).readlines()[0].strip()
+                    # The following line does nothing but raise an exception
+                    # if somehow something is wrong in the owner format, to
+                    # avoid crashing later on.
+                    read_owner.split('_')[0]
                except:
                    read_owner = 'failure'
                if last_owner == read_owner:
@@ -163,9 +167,13 @@ def lock(tmp_dir, timeout=120, min_wait=5, max_wait=10, verbosity=1):
                    time_start = time.time()
                    no_display = (verbosity == 0)
                if not no_display:
-                    info('Waiting for existing lock by %s (I am %s)' % (
-                            read_owner, my_pid))
-                    info("To manually release the lock, delete", lock_file)
+                    if read_owner == 'failure':
+                        msg = 'unknown process'
+                    else:
+                        msg = "process '%s'" % read_owner.split('_')[0]
+                    info("Waiting for existing lock by %s (I am "
+                         "process '%s')" % (msg, my_pid))
+                    info("To manually release the lock, delete", tmp_dir)
                    if verbosity <= 1:
                        no_display = True
                time.sleep(random.uniform(min_wait, max_wait))

--- a/theano/misc/buildbot_filter.py
+++ b/theano/misc/buildbot_filter.py
+#!/usr/bin/env python
+import sys
+
+def filter_output(fd_in):
+    s=""
+    for line in fd_in:
+        toks = line.split()
+        if len(toks):
+            if toks[0] == "File" and toks[-1].startswith('test'):
+                s+=line
+            if toks[0].startswith("ImportError"):
+                s+=line
+    return s
+        
+if __name__ == "__main__":
+    import pdb;pdb.set_trace()
+    if len(sys.argv)>1:
+        print filter_output(open(sys.argv[1]))
+    else:
+        print filter_output(sys.stdin)
--- a/theano/sandbox/conv.py
+++ b/theano/sandbox/conv.py
@@ -29,9 +29,10 @@ class ConvOp(Op):

    #TODO: make the stacksize its own parameter, and make imshp a pair

-    def __init__(self, imshp, kshp, nkern, bsize, dx, dy, output_mode='valid',
-            unroll_batch=4,
-            unroll_kern=4,
+    def __init__(self, imshp=None, kshp=None, nkern=None, bsize=None, dx=None, dy=None, output_mode='valid',
+            unroll_batch=0,
+            unroll_kern=0,
+            unroll_patch=False,
            imshp_logical=None,
            kshp_logical=None,
            kshp_logical_top_aligned=True,
@@ -47,6 +48,7 @@ class ConvOp(Op):
        dx - patch stride rows
        dy - patch stride cols
        out_mode - 'valid', 'full'
+        unroll_patch - c code generation option
        unroll_batch - c code generation option
        unroll_kern - c code generation option
        verbose - passed to GpuConv
@@ -60,6 +62,7 @@ class ConvOp(Op):
        gradient on the filters.


+        unroll_patch. If True will use a version that is faster then without not unroll by unroll the patch loop.
        unroll_batch. If >0 will use a version that will unroll the batch loop by the value of the option. By default don't use this version of the code.
        unroll_nkern. idem as unroll_batch but unroll the kernel loop.

@@ -95,6 +98,7 @@ class ConvOp(Op):

        self.unroll_batch=unroll_batch
        self.unroll_kern=unroll_kern
+        self.unroll_patch=unroll_patch

        if self.unroll_batch>0 and self.bsize % self.unroll_batch!=0:
            if self.bsize<=self.unroll_batch:
@@ -407,6 +411,7 @@ using namespace std;
        d["self_imshp0"]=self.imshp[0]
        d["self_imshp1"]=self.imshp[1]
        d["self_imshp2"]=self.imshp[2]
+        d["mode"]=self.out_mode.upper()
        d["self_kshp0"]=self.kshp[0]
        d["self_kshp1"]=self.kshp[1]
        d["self_kshp_logical_r"] = self.kshp_logical[0]
@@ -439,8 +444,12 @@ using namespace std;
        #print self.out_mode, d["self_imshp_logical_stride_r"]

        if self.imshp != self.imshp_logical or self.kshp != self.kshp_logical:
+#            print "return imshp!=imshp_logical or self.kshp != self.kshp_logical shape version"
            return _conv_op_code_a % d

+        if self.unroll_patch:
+#            print "return unroll patch version",self.dx,self.dy
+            return _conv_op_code_unroll_patch%d
        if self.unroll_batch>0 or self.unroll_kern>0:
            if self.unroll_batch<=0: self.unroll_batch=1
            if self.unroll_kern<=0: self.unroll_kern=1
@@ -1212,3 +1221,295 @@ Py_XDECREF(img2d);
 Py_XDECREF(filtersflipped);
 """
    return ret
+
+_conv_op_code_unroll_patch = """
+const int mode=%(mode)s;
+int typenum=0, typenum_f=0;
+PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
+const %(type)s fill_value = 0;
+
+int type_im=PyArray_TYPE(%(img2d)s);
+int type_ker=PyArray_TYPE(%(filtersflipped)s);
+
+npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
+npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
+npy_intp dim_ker[2]={%(self_kshp0)s,%(self_kshp1)s};
+
+PyArray_Dims img2d_shape;
+npy_intp img2d_dim[4]={1,1,0,0};
+img2d_shape.ptr=img2d_dim;
+img2d_shape.len=4;
+
+PyArray_Dims kerns_shape;
+npy_intp kerns_dim[4]={1,1,0,0};
+kerns_shape.ptr=kerns_dim;
+kerns_shape.len=4;
+PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
+
+if(%(img2d)s->nd==2){
+  img2d_dim[3]=%(img2d)s->dimensions[1];
+  img2d_dim[2]=%(img2d)s->dimensions[0];
+}else if(%(img2d)s->nd==3){
+  img2d_dim[3]=%(img2d)s->dimensions[2];
+  img2d_dim[2]=%(img2d)s->dimensions[1];
+  img2d_dim[0]=%(img2d)s->dimensions[0];
+}else if(%(img2d)s->nd==4){
+  img2d_dim[3]=%(img2d)s->dimensions[3];
+  img2d_dim[2]=%(img2d)s->dimensions[2];
+  img2d_dim[1]=%(img2d)s->dimensions[1];
+  img2d_dim[0]=%(img2d)s->dimensions[0];
+}else {
+    PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
+    %(fail)s;
+}
+
+if(%(filtersflipped)s->nd==3){
+  kerns_dim[3]=%(filtersflipped)s->dimensions[2];
+  kerns_dim[2]=%(filtersflipped)s->dimensions[1];
+  kerns_dim[0]=%(filtersflipped)s->dimensions[0];
+}else if(%(filtersflipped)s->nd==4){
+  kerns_dim[3]=%(filtersflipped)s->dimensions[3];
+  kerns_dim[2]=%(filtersflipped)s->dimensions[2];
+  kerns_dim[1]=%(filtersflipped)s->dimensions[1];
+  kerns_dim[0]=%(filtersflipped)s->dimensions[0];
+}else{
+    std:stringstream temp;
+    temp << "nddim="<<%(filtersflipped)s->nd;
+    std::string param = temp.str();
+    PyErr_SetString(PyExc_ValueError,
+      ("kernel don't have a good shape. " + param).c_str());
+    %(fail)s;
+}
+
+img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER);
+img2d_arr = (PyArrayObject*)img2d;
+if ((img2d_arr->strides[3] != sizeof(%(type)s)) 
+     || (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(%(type)s))){
+    contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
+    Py_DECREF(img2d);
+    img2d = contig;
+    if (!PyArray_ISCONTIGUOUS(img2d)){
+        PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
+        %(fail)s;
+    }
+}
+img2d_arr = (PyArrayObject*)img2d;
+
+filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, PyArray_CORDER);
+filtersflipped_arr = (PyArrayObject*)filtersflipped;
+if ((filtersflipped_arr->strides[3] != sizeof(%(type)s)) 
+     || (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*sizeof(%(type)s))){
+    contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
+    Py_DECREF(filtersflipped);
+    filtersflipped = contig;
+    if (!PyArray_ISCONTIGUOUS(filtersflipped)){
+        PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
+        %(fail)s;
+    }
+}
+filtersflipped_arr = (PyArrayObject*)filtersflipped;
+
+if(mode != VALID && mode != FULL){
+  PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;
+}
+typenum = PyArray_ObjectType((PyObject*)%(img2d)s, 0);
+typenum_f = PyArray_ObjectType((PyObject*)%(filtersflipped)s, 0);
+if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type"); %(fail)s;}
+if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match"); %(fail)s;}
+
+if (!img2d) %(fail)s;
+if (!filtersflipped) %(fail)s;
+if ((!%(z)s)
+  || *PyArray_DIMS(%(z)s)!=4
+  ||(%(z)s->dimensions[0] != %(self_bsize)s)
+  ||(%(z)s->dimensions[1] != %(self_nkern)s)
+  ||(%(z)s->dimensions[2] != dim_zz[0])
+  || (%(z)s->dimensions[3] != dim_zz[1])
+  )
+{
+  if (%(z)s) Py_DECREF(%(z)s);
+  npy_intp dims[4] = {0,0,0,0};
+  if(!dims) %(fail)s;
+  dims[0]=%(self_bsize)s;
+  dims[1]=%(self_nkern)s;
+  dims[2]=dim_zz[0];
+  dims[3]=dim_zz[1];
+  %(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
+}else{
+  //PyArray_FILLWBYTE((PyObject*)%(z)s,0);
+}
+
+int Os[2];
+Os[0]=%(self_outshp0)s;
+Os[1]=%(self_outshp1)s;
+//I keep the formula to calculte Os in case we need it in the futur.
+//if (mode == FULL) {Os[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s)); Os[1] = ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s));}
+//else {Os[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s)); Os[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s));}
+
+for(int b=0;b< %(self_bsize)s;b++){
+  for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){
+
+    //assertions
+    if (%(z)s->strides[0] != %(z)s->dimensions[1] *%(z)s->dimensions[2] *%(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
+    if (%(z)s->strides[1] != %(z)s->dimensions[2] * %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
+    if (%(z)s->strides[2] != %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
+    if (%(z)s->strides[3] != sizeof(%(type)s)) %(fail)s;
+
+    %(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(%(z)s,b,n_kern));
+    for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
+
+    for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
+
+      const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d,b,stack_size));
+      const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped,n_kern,stack_size));
+
+      int new_m;
+
+      for (int iter_m=0; iter_m < Os[0]; iter_m++) {
+        // Reposition index into input image based on requested output size
+        int pos_m = iter_m*%(self_dx)s;//The position of the patch in the image
+        if (mode == FULL) new_m = pos_m ;
+        else new_m = (pos_m+dim_ker[0]-1);
+
+        for (int iter_n=0; iter_n < Os[1]; iter_n++) {  // loop over columns
+          int pos_n=iter_n*%(self_dy)s;
+          %(type)s sum=0;
+          %(type)s sum2=0;
+          %(type)s sum3=0;
+          %(type)s sum4=0;
+          int nb_sum=0;
+          // Sum over kernel, if index into image is out of bounds
+          // fill with the value
+          for (int j=0; j < dim_ker[0]; j++) {
+            int ind0 = (new_m-j);
+
+            if(mode==FULL){
+              const %(type)s * idx_hvals=&hvals[j*dim_ker[1]];
+              if(ind0 < 0 || ind0 >= dim_im[0]){
+                if(fill_value!=0)
+                  for (int k=0; k < dim_ker[1]; k++) {
+                    sum+= idx_hvals[k] * fill_value;
+                  }
+              }else{
+                //do the part where kernel is to the right of the img
+//TODO: implement unroll patch for fill_value!=0
+                int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
+                if(fill_value!=0){ 
+                
+                  for(k=0;k<max_k;k++){
+                    sum+= idx_hvals[k]*fill_value;
+                  }
+                }else {k=max_k;}
+                
+                //do the part where the kernel is on the img
+                max_k=min(pos_n+1,(int)dim_ker[1]);
+                const %(type)s * idx_in=&in[ind0*dim_im[1]];
+
+                if(iter_n + 4*%(self_dy)s < Os[1]
+                         && iter_n>dim_ker[1]-1+3 
+                         && iter_n<dim_im[1]-dim_ker[1]+1-3){
+                  nb_sum=4;
+//cout<<4<<endl;
+                  for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
+                    sum+=idx_hvals[k]*idx_in[ind1];
+                    sum2+=idx_hvals[k]*idx_in[ind1+%(self_dy)s];
+                    sum3+=idx_hvals[k]*idx_in[ind1+2*%(self_dy)s];
+                    sum4+=idx_hvals[k]*idx_in[ind1+3*%(self_dy)s];
+                  }
+                }else if(iter_n + 2*%(self_dy)s < Os[1] 
+                         && iter_n>dim_ker[1]-1
+                         && iter_n<dim_im[1]-dim_ker[1]+1){
+//cout<<2<<endl;
+                  nb_sum=2;
+//                  if(iter_n==dim_ker[1]-1){//k-1<min(pos_n+%(self_dy)s,(int)dim_ker[1])){
+//                    sum2+=idx_hvals[k-1]*idx_in[pos_n-k-%(self_dy)s];
+//                  }
+                  for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
+                    sum+=idx_hvals[k]*idx_in[ind1];
+                    sum2+=idx_hvals[k]*idx_in[ind1+%(self_dy)s];
+                  }
+//                  sum2+=idx_hvals[k]*idx_in[pos_n-k+%(self_dy)s];
+//                  sum+=idx_hvals[k]*idx_in[pos_n-k];
+//                  k++;
+                }else{
+//cout<<1<<endl;
+                  nb_sum=1;
+                  /*
+                  %(type)s sum_=0;
+                  if((k-max_k) & 0x1 != 0){
+                    sum+= idx_hvals[k] * idx_in[pos_n-k];
+                  }
+                  for (int ind1=pos_n-k; k<max_k; k+=2,ind1-=2) {
+                    sum+= idx_hvals[k] * idx_in[ind1];
+                    sum_+= idx_hvals[k+1] * idx_in[ind1-1];
+                  }
+                  sum+=sum_;
+                  */
+                  for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
+                    sum+=idx_hvals[k]*idx_in[ind1];
+                  }
+                }
+                //do the part to the left of the img
+                if(fill_value!=0)
+                  for(;k<dim_ker[1];k++) sum+= idx_hvals[k]*fill_value;
+              }
+            }else{//valid mode
+              const %(type)s* idx_in=&in[ind0*dim_im[1]];
+              const %(type)s* idx_hvals=&hvals[j*dim_ker[1]];
+              if(iter_n + 4*%(self_dy)s < Os[1]){
+                nb_sum=4;
+                for (int k=dim_ker[1]-1,im_idx=pos_n; k >=0; k--,im_idx++) {
+                  sum+=idx_hvals[k]*idx_in[im_idx];
+                  sum2+=idx_hvals[k]*idx_in[im_idx+%(self_dy)s];
+                  sum3+=idx_hvals[k]*idx_in[im_idx+2*%(self_dy)s];
+                  sum4+=idx_hvals[k]*idx_in[im_idx+3*%(self_dy)s];
+                }
+              }else if(iter_n + 2*%(self_dy)s < Os[1]){
+                nb_sum=2;
+                for (int k=dim_ker[1]-1,im_idx=pos_n; k >=0; k--,im_idx++) {
+                  sum+=idx_hvals[k]*idx_in[im_idx];
+                  sum2+=idx_hvals[k]*idx_in[im_idx+%(self_dy)s];
+                }
+              }else{
+                nb_sum=1;
+                for (int k=dim_ker[1]-1,im_idx=pos_n; k >=0; k--,im_idx++) {
+                  sum+=idx_hvals[k]*idx_in[im_idx];
+                }
+              }
+            }//else valid mode
+          }//for j
+          switch(nb_sum){
+          case 4: out[iter_m*dim_zz[1]+iter_n+3] %(affectation)s sum4;
+          case 3: out[iter_m*dim_zz[1]+iter_n+2] %(affectation)s sum3;
+          case 2: out[iter_m*dim_zz[1]+iter_n+1] %(affectation)s sum2;
+          case 1: out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum;
+          }
+          iter_n+=nb_sum-1;
+/*
+          out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum;
+          if(nb_sum>=2){
+            iter_n++;
+            out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum2;
+          }
+          if(nb_sum>=3){
+            iter_n++;
+            out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum3;
+          }
+          if(nb_sum>=4){
+            iter_n++;
+            out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum4;
+          }
+*/
+        }//for iter_n
+      }//for iter_m
+    }//for stack_size
+    if (0 && (mode==FULL)){
+      for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) 
+        std::cout << " " << out[i];
+      std::cout << "\\n";
+    }
+  }//for n_kern
+}//for b
+Py_XDECREF(img2d);
+Py_XDECREF(filtersflipped);
+"""
--- a/theano/sandbox/my_test_scan.py
+++ b/theano/sandbox/my_test_scan.py
+import numpy
+import theano
+import theano.sandbox.scan
+
+
+
+# generator network, only one output , type scalar ; no sequence or 
+# non sequence arguments
+def test_1():
+  def f_pow2(x_tm1):
+    return (2*x_tm1, {})
+
+  s = theano.tensor.dvector()
+  n_steps = theano.tensor.dscalar()
+  Y = theano.sandbox.scan.scan(f_pow2, [],s, [],n_steps = n_steps)
+
+  f1 = theano.function([s,n_steps], Y)
+  assert( numpy.any(f1([1],3)== [2,4,8])  )
+
+# simple rnn, one input, one state, weights for each; input/state are 
+# vectors, weights are scalars
+def test_2():
+    def f_rnn(u_t,x_tm1,W_in, W):
+        return (u_t*W_in+x_tm1*W, {})
+
+    u    = theano.tensor.dvector()
+    x0   = theano.tensor.dvector()
+    W_in = theano.tensor.dscalar()
+    W    = theano.tensor.dscalar()
+
+    Y = theano.sandbox.scan.scan(f_rnn, u,x0,[W_in,W])
+
+    f2 = theano.function([u,x0,W_in,W], Y)
+    
+    assert(numpy.any(f2([1,2,3,4],[1],.1,1)== numpy.array([1.1,1.3,1.6,2.])))
+
+# simple rnn, one input, one state, weights for each; input/state are 
+# vectors, weights are scalars; using shared variables
+def test_3():
+
+    u    = theano.tensor.dvector()
+    x0   = theano.tensor.dvector()
+    W_in = theano.shared(.1, name = 'w_in')
+    W    = theano.shared(1., name ='w')
+
+    def f_rnn_shared(u_t,x_tm1):
+        return (u_t*W_in+x_tm1*W, {})
+
+    Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0,[])
+
+    f3 = theano.function([u,x0], Y)
+    
+    assert(numpy.any(f3([1,2,3,4],[1])== numpy.array([1.1,1.3,1.6,2.])))
+
+
+# some rnn with multiple outputs and multiple inputs; other dimension 
+# instead of scalars/vectors
+def test_4():
+
+    W_in2 = theano.shared(numpy.array([1.,2.]), name='win2')
+    W     = theano.shared(numpy.array([[2.,1.],[1.,1.]]), name='w')
+    W_out = theano.shared(numpy.array([.5,1.]), name = 'wout')
+    W_in1 = theano.tensor.dmatrix('win')
+    u1    = theano.tensor.dmatrix('u1')
+    u2    = theano.tensor.dvector('u2')
+    x0    = theano.tensor.dmatrix('x0')
+    y0    = theano.tensor.dvector('y0')
+
+## Why dot doesn;t work with scalars !??
+## Why  *  doesn't support SharedVariable and TensorVariable
+
+    def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
+        return ({}, [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
+                    theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)])
+
+    Y = theano.sandbox.scan.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1)
+
+    f4 = theano.function([u1,u2,x0,y0,W_in1], Y)
+
+    (x,y) =  f4( numpy.array([[1,2],[1,2],[1,2]]), \
+              numpy.array([1,2,3]),             \
+              numpy.array([[0,0]]),             \
+              numpy.array([1]),                 \
+              numpy.array([[1,1],[1,1]]))
+
+    assert( numpy.all(x == numpy.array([[4.,5.],[18.,16.],[58.,43.]])))
+    assert( numpy.all(y == numpy.array([0.,7.,25.])))
+
+
+# basic ESN using updates 
+def test_5(): 
+    W_in = theano.shared(numpy.array([1.,1.]), name='win')
+    W    = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
+    W_out= theano.shared(numpy.array([.5,1.]), name='wout')
+
+    u  = theano.tensor.dvector('u')
+    x  = theano.shared(numpy.array([0.,0.]),'x')
+    y0 = theano.tensor.dvector('y0')
+
+    def f_ESN(u_t):
+        return ( theano.dot(x,W_out), \
+         { x: W_in*u_t + theano.dot(x,W) } )
+
+    Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]})
+
+    f5 = theano.function([u,y0],Y)
+    assert( f5( numpy.array([1,2,3]), numpy.array([0])) == \
+             numpy.array([0.,1.4,3.15]))
+
+# basic ESN using updates ; moving backwards
+def test_6(): 
+    W_in = theano.shared(numpy.array([1.,1.]), name='win')
+    W    = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
+    W_out= theano.shared(numpy.array([.5,1.]), name='wout')
+
+    u  = theano.tensor.dvector('u')
+    x  = theano.shared(numpy.array([0.,0.]),'x')
+    y0 = theano.tensor.dvector('y0')
+
+    def f_ESN(u_t):
+        return ( theano.dot(x,W_out), \
+         { x: W_in*u_t + theano.dot(x,W) } )
+
+    Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]}, \
+                                 go_backwards = True)
+
+    f6 = theano.function([u,y0],Y)
+    assert( f6( numpy.array([1,2,3]), numpy.array([0])) == \
+             numpy.array([0., 4.5, 3.45]))
+
+
+'''
+ TO TEST: 
+    - test taps (for sequences and outputs )
+    - test gradient (one output)
+    - test gradient (multiple outputs)
+    - test gradient (go_bacwards) 
+    - test gradient (multiple outputs / some uncomputable )
+    - test gradient (truncate_gradient)
+    - test gradient (force_gradient) 
+    - test inplace map
+'''
+
+
+if __name__=='__main__':
+
+    test_1()
+    test_2()
+    test_3()
+    test_4()
+    test_5()
+    test_6()
+
+
+
+
--- a/theano/sandbox/scan.py
+++ b/theano/sandbox/scan.py
-"""Provide Scan and related functions
-
-
- Scanning a function over sequential input(s) producing sequential output(s).
-
- Scanning is a general form of recurrence, which can be used for looping.
-
- The idea is that you 'scan' a function along some input sequence, producing 
- an output at each time-step that can be seen (but not modified) by the 
- function at the next time-step. (Technically, the function can see the 
- previous K  time-steps.)
-
- So for example, ``sum()`` could be computed by scanning the ``z+x_i`` 
- function over a list, given an initial state of ``z=0``. 
-
- Special cases:
-
-    - A ``reduce()`` operation can be performed by returning only the last 
-      output of a scan.
-    
-    - A ``map()`` operation can be performed by applying a function that 
-      ignores each previous output.
-
- Often a for loop can be expressed as a scan() operation, and scan is the 
- closest that theano comes to looping.
-
- This module provides scanning functionality with the `Scan` Op.
-
-"""
-__docformat__ = 'restructedtext en'
-
 import numpy 
 import theano
 from theano.tensor import opt
@@ -49,7 +18,7 @@ def info(*msg):
 def hash_list(list):
    hash_value = 0
    for v in list:
-        hash_value ^= v
+        hash_value ^= hash(v)
    return hash_value


@@ -57,137 +26,43 @@ def hash_list(list):
 # as values either numbers or list of numbers
 def hash_dict(dictionary):
    hash_value = 0
-    for k,v in dictionary,iteritems():
+    for k,v in dictionary.iteritems():
        # hash key
-        hash_value ^= k
+        hash_value ^= hash(k)
        if type(v) in (list,tuple):
            hash_value ^= hash_list(v)
        else:
-            hash_value ^= v
+            hash_value ^= hash(v)
    return hash_value


-def scan(fn, sequnces, non_sequences, seed_values, inplace_map={}, 
+def scan(fn, sequences, initial_states, non_sequences, inplace_map={}, 
         sequences_taps={}, outputs_taps = {},
-         len = theano.tensor.zero(), force_gradient = False, 
+         n_steps = theano.tensor.zero(), force_gradient = False, 
         truncate_gradient = -1, go_backwards = False, mode = 'FAST_RUN'):
-    '''The function creates a more intuitive interface to the scan op.
-
-    This function first creates a scan op object, and afterwards applies it 
-    to the input data. The scan operation iterates over X sequences producing
-    Y outputs. The function that is applied recursively may consult several 
-    previous outputs from the past as well as past values and future values 
-    of the input. You can see it as havin the inputs :
-
-        X sequences inptus x_1, x_2, .. x_X
-
-        Y seeds/initial values ( u_1, u_2, .. u_Y) for the outputs
-
-        W non sequences inputs w_1, w_2, .. w_W
-
-    Outputs :
-        
-        Y sequence outputs y_1, y_2, .. y_Y
-
-    Each otuput y_j computed one time step at a time according to the 
-    formula:
-
-    .. code-block:: python
-
-      (y_1[t], y_2[t], .. y_Y[t]) = f( 
-        x_1[t-K_1],.. x_1[t],x_1[t+1],.. x_1[t+L_1], # x_1 past and future 
-                                                     #values
-        x_2[t-K-2],.. x_2[t],x_2[t+1],.. x_2[t+L_2], # x_2 past and future 
-                                                     # values
-        ...                                          # ...
-        y_1[t-1], y_1[t-2], .. y[t - T_1],           # past values of y_1
-        y_2[t-1], y_2[t-2], .. y[t - T_2],,          # past values of y_2 
-        ...
-        w_1, w_2, .., w_W)                           # 'timeless' inputs 
-
-
-    
-    :param fn: fn is a lambda expression or a function that given a list of 
-    symbolic inputs returns the update list and symbolic outputs list of the 
-    function that shall be applied recursively. 
-
-    :param sequences:list of sequences over which the scan op should iterate;
-    sequnces length should also cover past and future taps; for example if 
-    you also use for a sequence the past tap -3 and future tap +4, to total 
-    length should be n+7, where first 3 values of sequence are those 
-    corresponding to -3 -2 -1 and the last 4 values correspond to n+1 n+2 
-    n+3 and n+4
-
-    :param non_sequences: list of inputs over which it shouldn't iterate 
-
-    :param seed_values: seeds (initial values) of the outputs; if past taps 
-    are this seeds should contain enough values to cover this past values; 
-    note that index 0 of a seed belongs to the largest past tap 
-    
-    :param inplace_map: a dictionary telling which output should be 
-    computed in place of which input sequence ; input sequence has to be 
-    of the same shape as the output
-
-    :param sequence_taps: a dictionary telling for each sequence what past 
-    and future taps it should use; past values should be negative, future
-    taps positives; by default 0 is added in this dictionary (current value)
-    if nothing is provided
-
-    :param outputs_taps: a dictionary telling for each output what past 
-    taps it should use (negative values); by default -1 is added to this 
-    dictionary if nothing is provided
-
-    :param len: a value (or theano scalar) describing for how many steps 
-    the scan should iterate; 0 means that it should iterate over the entire
-    length of the input sequence(s)
-
-    :param force_gradient: a flag telling scan op that the gradient can be 
-    computed even though inplace or updates are used - use this on your own
-    risk
-
-    :param truncate_gradient: tells for how many steps should scan go 
-    back in time on the backward pass of backpropagation through time 
-
-    :param go_backwards: a flag indicating if scan should iterate back from 
-    the end of the sequence to the begining (if it is true) or from 0 to 
-    the end
-
-    :param mode: indicates the mode that should be used to compile the
-    function that will be applied recursively
-
-    '''


    # check if inputs are just single variables instead of lists     
    if not (type(sequences) in (list, tuple)):
        seqs = [sequences]
-    elif seqs = sequences
+    else:
+        seqs = sequences
        
-    if not type(seed_values) in (list,tuple)):
-        seeds = [seed_values]
-    elif 
-        seeds = seed_values
+    if not (type(initial_states) in (list,tuple)):
+        init_outs = [initial_states]
+    else: 
+        init_outs = initial_states
        
    if not (type(non_sequences) in (list,tuple)):
        non_seqs = [non_sequences]
-    elif 
+    else:
        non_seqs = non_sequences



-    # compute number of sequences and number of seeds    
+    # compute number of sequences and number of seqs   
    n_seqs     = len(seqs)
-
-    # see if there are outputs that do not feed anything back to the function
-    # applied recursively
-    outs_tapkeys = outputs_taps.keys()
-    for k in outs_tapkeys.sort():
-        if outputs_taps[k] == []
-            # add empty lists where you have outputs that do not have past 
-            # values
-            seeds = seeds[:k] + [[]] + seeds[k:]
-
-    n_seeds   = len(seeds)
+    n_outs   = len(init_outs)


    # update sequences_taps[idx] to contain 0 if it is not defined
@@ -197,93 +72,79 @@ def scan(fn, sequnces, non_sequences, seed_values, inplace_map={},
        # if input sequence is not actually used by the recursive function
        elif sequences_taps[i] == []:
            sequences_taps.__delitem__(i)
-        elif not (sequences_taps[i] in (list,tuple)):
+        elif not (type(sequences_taps[i]) in (list,tuple)):
            sequences_taps[i] = [sequences_taps[i]]
-
    # update outputs_taps[idx] to contain -1 if it is not defined
-    for i in xrange(n_seeds):
+    for i in xrange(n_outs):
        if not outputs_taps.has_key(i):
-            outputs_taps.update({i:-1})
+            outputs_taps.update({i:[-1]})
        # if output sequence is not actually used as input to the recursive 
        # function
        elif outputs_taps[i] == []:
            outputs_taps.__delitem__(i)
-        elif not(outputs_taps[i] in (list,tuple)):
+        elif not(type(outputs_taps[i]) in (list,tuple)):
            outputs_taps[i] = [outputs_taps[i]]

-
    # create theano inputs for the recursive function  
    args = []
    for (i,seq) in enumerate(seqs):
      if sequences_taps.has_key(i):
-        for k in len(sequences_taps[i]):
+        for k in xrange(len(sequences_taps[i])):
            args += [seq[0].type() ]
-    for (i,seed) in enumerate(seeds):
+    for (i,init_out) in enumerate(init_outs):
      if outputs_taps.has_key(i):
-        for k in len(outputs_taps[i]):
-            args += [seed[0].type() ]
+        for k in xrange(len(outputs_taps[i])):
+            args += [init_out[0].type() ]

    args += non_seqs
-    next_outs, updates = fn(*args)
+    t1,t2 = fn(*args)
+
+    # check to see which is the updates list and which is the list of outs
+    if   not ( (type(t1) in (list,tuple)) or (type(t1) == dict)) :
+        next_outs = [t1]
+        updates   = t2
+    elif not ( (type(t2) in (list,tuple)) or (type(t2) == dict)) :
+        next_outs = [t2]
+        updates   = t1
+    elif type(t1) == dict : 
+        next_outs = t2
+        updates   = t1
+    elif type(t2) == dict : 
+        next_outs = t1
+        updates   = t2
+    elif type(t1[0]) in (list,tuple):
+        next_outs = t2
+        updates   = t1
+    else:
+        next_outs = t1
+        updates   = t2
+

    # Create the Scan op object
-    local_op = Scan( (args,next_outs, updates), n_seqs,n_seeds,inplace_map,
+    local_op = Scan( (args,next_outs, updates), n_seqs,n_outs,inplace_map,
            sequences_taps, outputs_taps, force_gradient, truncate_gradient,
            go_backwards, mode)

-    # Call the object on the input sequences, seeds, and non sequences
-    return local_op( *(    [thenao.tensor.as_tensor(len)]  \
+    # Call the object on the input sequences, initial values for outs, 
+    # and non sequences
+    return local_op( *(    [theano.tensor.as_tensor(n_steps)]  \
                         + seqs \
-                         + seeds \
+                         + init_outs \
                         + non_seqs))




-''' The class implementing the scan op 
-
-The actual class. I would not recommend using it directly unless you really 
-know what you are doing' 
-'''
 class Scan(theano.Op):
-    def __init__(self,(inputs, outputs, updates),n_seqs, n_seeds,
+    def __init__(self,(inputs, outputs, updates),n_seqs, n_outs,
                 inplace_map={}, seqs_taps={}, outs_taps={},
                 force_gradient = False, truncate_gradient = -1,
-                 go_backwards = False, inplace=False):
-        '''
-        :param inputs: list of symbolic inputs of the function that will 
-        be applied recursively 
-
-        :param outputs: list of symbolic outputs for the function applied 
-        recursively
-
-        :param updates: list of updates for the function applied recursively
-
-        :param n_seqs: number of sequences in the input over which it needs
-        to iterate
-
-        :param n_seeds: number of outputs (same as the number of seeds) 
-
-        :param inplace_map: dictionary discribing which output should be 
-        computed inplace of which input 
-
-        :param seqs_taps: dictionary discribing which past and future taps
-        of the input sequences are used by the recursive function
-
-        :param outs_taps: dictionary discribing which past taps of the 
-        outputs the recursive function is using 
-
-        :param force_gradient: a flag indicating if the gradient is still 
-        computable even though inplace operation or updates are used
-
-        :param truncate_gradient: if different from -1 it tells after how 
-        many steps in the backward pass of BPTT 
-        '''
+                 go_backwards = False, mode = 'FAST_RUN', inplace=False):
        

        # check inplace map
        for _out,_in in inplace_map.iteritems():
-            if _out > n_seeds:
+            if _out > n_outs:
                raise ValueError(('Inplace map reffers to an unexisting'\
                          'output %d')% _out)
            if _in > n_seqs:
@@ -295,19 +156,19 @@ class Scan(theano.Op):


        #check sequences past taps
-        for k,v in seqs_taps.map_iteritems():
+        for k,v in seqs_taps.iteritems():
          if k > n_seqs:
            raise ValueError(('Sequences past taps dictionary reffers to '
                    'an unexisting sequence %d')%k)

        #check outputs past taps
-        for k,v in outs_taps.map_iteritems():
-          if k > n_seeds:
+        for k,v in outs_taps.iteritems():
+          if k > n_outs:
            raise ValueError(('Sequences past taps dictionary reffers to '
                    'an unexisting sequence %d')%k)
          if max(v) > -1:
-            raise ValueError(('Can not require future value %d of output'
-                    '%d')%(k,max(v)))
+            raise ValueError(('Can not require future value %d of output' \
+                    ' %d')%(k,max(v)))



@@ -318,8 +179,8 @@ class Scan(theano.Op):
        self.seqs_taps      = seqs_taps
        self.outs_taps      = outs_taps
        self.n_seqs         = n_seqs
-        self.n_seeds        = n_seeds
-        self.n_args         = n_seqs+n_seeds+1
+        self.n_outs        = n_outs
+        self.n_args         = n_seqs+n_outs+1
        self.inplace_map    = inplace_map
        self.inplace        = inplace
        self.inputs         = inputs
@@ -328,8 +189,7 @@ class Scan(theano.Op):
        self.force_gradient = force_gradient
        self.truncate_gradient = truncate_gradient
        self.go_backwards   = go_backwards
-    
-
+   
        self.fn = theano.function(inputs,outputs, \
                                   updates = updates, mode = mode)

@@ -355,9 +215,13 @@ class Scan(theano.Op):

      # Create list of output datatypes
      out_types = []
-      for i in xrange(self.n_seqs+1, self.n_seqs+self.n_seeds+1):
-         out_types += [theano.tensor.Tensor(dtype=inputs[i].dtype,\
+      for i in xrange(self.n_seqs+1, self.n_seqs+self.n_outs+1):
+         if not (inputs[i] == []):
+            out_types += [theano.tensor.Tensor(dtype=inputs[i].dtype,\
                 broadcastable=(False,)+inputs[i].broadcastable[1:])()]
+         else:
+            raise ValueError(('You need to provide initial state for outputs'
+                      ' such that scan can infer what dataype they are'))
      return theano.Apply(self,inputs, out_types)


@@ -376,15 +240,15 @@ class Scan(theano.Op):
               (self.inplace == other.inplace) and\
               (self.go_backwards == other.go_backwards) and\
               (self.truncate_gradient == other.truncate_gradient) and\
-               (self.force_gradient = other.force_gradient) and\
-               (self.n_seeds == other.n_seeds) and\
+               (self.force_gradient == other.force_gradient) and\
+               (self.n_outs == other.n_outs) and\
               (self.n_args == other.n_args)
      return rval

    def __hash__(self):
      return hash(type(self)) ^ \
             hash(self.n_seqs) ^ \
-             hash(self.n_seeds) ^ \
+             hash(self.n_outs) ^ \
             hash(self.force_gradient) ^\
             hash(self.inplace) ^\
             hash(self.go_backwards) ^\
@@ -392,11 +256,10 @@ class Scan(theano.Op):
             hash(self.n_args) ^ \
             hash_list(self.outputs) ^ \
             hash_list(self.inputs) ^ \
-             hash_list(g_ins) ^ \
-             hash_list(h_outs) ^ \
+             hash_list(self.g_ins) ^ \
+             hash_list(self.g_outs) ^ \
             hash_dict(self.seqs_taps) ^\
             hash_dict(self.outs_taps) ^\
-             hash_dict(self.inplace_map) ^\
             hash_dict(self.updates)


@@ -405,7 +268,7 @@ class Scan(theano.Op):
    def perform(self,node,args, outs):

        n_steps = 0 
-        if (self.n_seqs ==0 ) and (args[0] == 0)
+        if (self.n_seqs ==0 ) and (args[0] == 0):
            raise ValueError('Scan does not know over how many steps it '
                'should iterate! No input sequence or number of steps to '
                'iterate given !')
@@ -417,10 +280,10 @@ class Scan(theano.Op):
          if self.seqs_taps.has_key(i):
              # compute actual length of the sequence ( we need to see what
              # past taps this sequence has, and leave room for them 
-              seq_len = args[i+1].shape[0] + min(self.seqs_taps[i+1])
-              if self.seqs_taps[i+1][2] > 0: 
+              seq_len = args[i+1].shape[0] + min(self.seqs_taps[i])
+              if  max( self.seqs_taps[i]) > 0: 
                  # using future values, so need to end the sequence earlier
-                  seq_len -= self.seqs_taps[i+1][2]
+                  seq_len -= max(self.seqs_taps[i])
              if n_steps == 0 :
                  # length of the sequences, leaving room for the largest
                  n_steps = seq_len
@@ -437,9 +300,9 @@ class Scan(theano.Op):
            inplace_map = {}

 
-        # check lengths of seeds
+        # check lengths of init_outs
        for i in xrange(self.n_seqs+1, \
-                        self.n_seqs+self.n_seeds+1):
+                        self.n_seqs+self.n_outs+1):
          if self.outs_taps.has_key(i-self.n_seqs-1):
            req_size = abs(min(self.outs_taps[i-self.n_seqs-1]))-1
            if args[i].shape[0] < req_size:
@@ -448,83 +311,82 @@ class Scan(theano.Op):
                 ' for missing values')%(i-self.n_iterable-1,req_size))
            
        self.n_steps = n_steps
-        y = self.scan(self.fn, args[1:],self.n_seqs, self.n_seeds, 
+        y = self.scan(self.fn, args[1:],self.n_seqs, self.n_outs, 
                 self.seqs_taps, self.outs_taps, n_steps, self.go_backwards, 
                 inplace_map)


        # write to storage
-        for i in xrange(self.n_seeds):
+        for i in xrange(self.n_outs):
            outs[i][0]=y[i]



-    def scan(fn, args, n_seqs, n_seeds, seqs_taps, outs_taps,  n_steps, 
+    def scan(self,fn, args, n_seqs, n_outs, seqs_taps, outs_taps,  n_steps, 
             go_backwards, inplace_map):
+
      y = []
-      for i in xrange(self.n_seeds):
+      for i in xrange(n_outs):
        if inplace_map.has_key(i) and (inplace_map[i] >= 0):
          y += [args[inplace_map[i]]]
        else:
-          y_shape = (n_steps,)+args[i+self.n_seqs].shape[1:]
+          y_shape = (n_steps,)+args[i+n_seqs].shape[1:]
          y += [numpy.empty(y_shape,
-                            dtype=args[i+self.n_seqs].dtype)]
-      #iterate
-      if go_backwards:
-        the_range = xrange(n_steps-1,-1,-1)
-      else:
-        the_range = xrange(n_steps)
-
+                            dtype=args[i+n_seqs].dtype)]
      seqs_mins = {}
-      for j in xrange(self.n_seqs):
+      for j in xrange(n_seqs):
        if seqs_taps.has_key(j):
          seqs_mins.update({j:  min(seqs_taps[j])})

      outs_mins = {}
-      seed_size = {}
-      for j in xrange(self.n_seeds):
+      initOuts_size = {}
+      for j in xrange(n_outs):
        if outs_taps.has_key(j):
          outs_mins.update({j: min(outs_taps[j])})
-          seed_size.update({j: args[n_seqs+j].shape[0]})
+          initOuts_size.update({j: args[n_seqs+j].shape[0]})


-      for i in the_range:
+      for i in xrange(n_steps):
        fn_args = []

        # sequences over which scan iterates
-        for j in xrange(self.n_seqs):
+        # check to see if we are scaning them backwards or no
+        _i = i
+        if go_backwards:
+            _i = n_steps-1-i
+        for j in xrange(n_seqs):
          if seqs_taps.has_key(j):
            ls_taps = seqs_taps[j]
            min_tap = seqs_mins[j]
            for tap_value in ls_taps:
-                k = i - min_tap + tap_value
+                k = _i - min_tap + tap_value
                fn_args += [args[j][k]]

-        # seeds or past values of outputs
-        for j in xrange(self.n_seeds):
+
+        # past values of outputs
+        for j in xrange(n_outs):
          if outs_taps.has_key(j):
            ls_taps = outs_taps[j]
            min_tap = outs_mins[j]
-            seed_sz = seed_size[j]
+            sz = initOuts_size[j]
            for tap_value in ls_taps:
              if i + tap_value < 0:
-                k = i + seed_sz + tap_value
-                if k < 0
+                k = i + sz + tap_value
+                if k < 0:
                  # past value not provided.. issue a warning and use 0s
-                  fn_args += [numpy.zeros(args[j][0].shape)]
-                  warning('Past value %d for output %d not given in seeds' %
-                           (j,tap_value))
+                  fn_args += [numpy.zeros(args[j+n_seqs][0].shape)]
+                  warning(('Past value %d for output %d not given in inital '
+                           'out') % (j,tap_value))
                else:
-                  fn_args += [args[j][k]]
+                  fn_args += [args[j+n_seqs][k]]
              else:
                fn_args += [y[j][i + tap_value]]
-
        # get the non-iterable sequences
-        fn_args += list(args[(self.n_seqs+self.n_seedss):]
+        fn_args += list(args[(n_seqs+n_outs):])
        # compute output
        something = fn(*fn_args)
        #update outputs 
-        for j in xrange(self.n_seeds):
+        for j in xrange(n_outs):
          y[j][i] = something[j]
      return y

@@ -560,7 +422,7 @@ class Scan(theano.Op):


            g_scan = ScanGrad((self.g_ins,self.g_outs), self.n_seqs, \
-                              self.n_seeds,self.seqs_taps, self.outs_taps,
+                              self.n_outs,self.seqs_taps, self.outs_taps,
                              self.truncate_gradient)

            return g_scan(g_args)
@@ -573,7 +435,7 @@ def scan_make_inplace(node):
    if isinstance(op, Scan) and (not op.inplace) \
                            and (op.inplace_map.keys() != []):
        return Scan((op.inputs, op.outputs, op.updates), op.n_seqs,  \
-                    op.n_seeds, op.inplace_map, op.seqs_taps, op.outs_taps, \
+                    op.n_outs, op.inplace_map, op.seqs_taps, op.outs_taps, \
                    op.force_gradient, op.truncate_gradient, \
                    op.go_backwards, inplace=True \
                      ).make_node(*node.inputs).outputs
@@ -673,12 +535,12 @@ class ScanGrad(theano.Op):
            for j in xrange(self.n_outs):
              if self.outs_taps.has_key(j):
                outs_mins.update({j: min(self.outs_taps[j])})
-                seed_size.update({j: g_seeds[j]..shape[0]})
+                seed_size.update({j: g_seeds[j].shape[0]})

            for i in the_range:
              # time slice of inputs
              _ins = []
-              for j in xrange(self.n_seqs)
+              for j in xrange(self.n_seqs):
                if self.seqs_taps.has_key(j):
                  ls_taps = self.seqs_taps[j] 
                  min_tap =      seqs_mins[j]
@@ -701,7 +563,7 @@ class ScanGrad(theano.Op):
                        warning('Past value %d for output $d not given' \
                              %(j,tap_value))
                      else:
-                        _outs += [seeds[j][[k]]
+                        _outs += [seeds[j][k]]
                    else:
                      _outs += [outs[j][i + tap_value]]


--- a/theano/sandbox/test_conv.py
+++ b/theano/sandbox/test_conv.py
@@ -41,7 +41,7 @@ def flip(kern, kshp):
 global_rng = N.random.RandomState(3423489)

 dmatrix4=T.TensorType('float64', (False, False, False, False))
-def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), validate=True, conv_op_py=False, do_convolve2=False, do_print=True, repeat=1):
+def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), validate=True, conv_op_py=False, do_convolve2=False, do_print=True, repeat=1, unroll_patch=0):

        # build actual input images
        imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2])
@@ -121,7 +121,7 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
                hidval1=outval.copy()

            # ConvOp
-            conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4)
+            conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch)(inputs4, kerns4)
            l1shp=N.hstack((nkern,
                            getFilterOutShp(imshp, kshp, ss, conv_mode)))
            propup2 = function([inputs4, kerns4], conv_op)
@@ -280,8 +280,10 @@ class TestConvOp(unittest.TestCase):
                    assert (N.abs(out2_-out3_)<1e-5).all()

                    # REFERENCE IMPLEMENTATION: compute output with convolve2d
-                    fulloutshp = N.array(imshp[1:]) - N.array(kshp) + 1 if conv_mode=='valid'\
-                             else N.array(imshp[1:]) + N.array(kshp) - 1
+		    if conv_mode=='valid':
+			fulloutshp = N.array(imshp[1:]) - N.array(kshp) + 1
+		    else:
+			fulloutshp = N.array(imshp[1:]) + N.array(kshp) - 1
                    ntime1 = time.time()
                    refout = N.zeros((bsize,)+tuple(fulloutshp)+(nkern,))
                    for b in range(bsize):
@@ -326,7 +328,7 @@ class TestConvOp(unittest.TestCase):
        ssizess = [[(1,1),(1,2)],[(1,1),(2,2)]]
        convmodes = ['valid','full']
        do_convolve2=True
-        unroll = [(0,0),(1,1),(2,2),(3,2)]#(batch,kern)
+        unroll = [(0,0,False),(0,0,True),(1,1,False),(2,2,False),(3,2,False)]#(batch,kern,patch)
        do_speed_test = False

        # TODO: this version show a bug that was fixed
@@ -336,6 +338,11 @@ class TestConvOp(unittest.TestCase):
 #        nkerns = [2,2] # per output pixel
 #        ssizes = [(1,1),(2,2)]#2,2)]

+#        bsizes = [1,1] # batch size
+#        imshp_starts = [(1,10,10),(1,5,6)]
+#        kshpss = ([[2,3],[3,2]],[[2,2],[2,2]])
+#        nkernss = [[1,1],[1,1]] # per output pixel
+
        N.set_printoptions(threshold=N.nan)

        # symbolic stuff
@@ -354,8 +361,8 @@ class TestConvOp(unittest.TestCase):

            unroll_batch = [1,2,4,5,10,20]
            unroll_kern = [1,2,4,5,10,20]
-            unroll_batch = [1,2,5]
-            unroll_kern = [1,2,5]
+            unroll_batch = [1,4,5]
+            unroll_kern = [1,4,5]
            
            bsize = 20 # batch size
            imshp_start = (1,48,48)#un square shape to test more corner case.
@@ -372,46 +379,86 @@ class TestConvOp(unittest.TestCase):
            timing = N.zeros((len(unroll_batch),len(unroll_kern),3))
            t_b_k=[]
            #calculate the timing with unrolling
+
+            t_=[[ 7.60572791,  3.95069814,  3.74271464], [ 4.05631089,  2.90384555,  2.93613672], [ 3.90551591,  2.92595196,  3.00102282]]
+            best=[]
+            worst=[]
+            best=[0.52690219879150391, 2.4266397953033447]
+            worst=[0.92042708396911621, 6.8822150230407715]
+            t_=[]
            for unroll_b, n_b in zip(unroll_batch,range(len(unroll_batch))):
                for unroll_k, n_k in zip(unroll_kern,range(len(unroll_kern))):
                    t_b_k.append(str(unroll_b)+"/"+str(unroll_k))
-                    tctot, tpytot, ntot=[],[],[]
-                    for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
-                        for ss, n_ss in zip(ssizes,range(len(ssizes))):
-                            tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate)
-                            tctot+=[tctot_]
-                            tpytot+=[tpytot_]
-                            ntot+=[ntot_]
-                    timing[n_b,n_k]=[sum(tctot), sum(tpytot), sum(ntot)]
-
+                    if not t_:
+                        tctot, tpytot, ntot=[],[],[]
+                        for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
+                            for ss, n_ss in zip(ssizes,range(len(ssizes))):
+                                tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate)
+                                tctot+=[tctot_]
+                                tpytot+=[tpytot_]
+                                ntot+=[ntot_]
+                        if unroll_b==4 and unroll_k==4:
+                            print "unroll 4/4",tctot
+                            best=tctot
+                        if unroll_b==1 and unroll_k==1:
+                            print "unroll 1/1",tctot
+                            worst=tctot
+                        timing[n_b,n_k]=[sum(tctot), sum(tpytot), sum(ntot)]
+            if not t_:
+                t=timing[:,:,0]#We select only the c timing.
+            else:
+                t=t_
+            t=N.asarray(t)
            #calculate the old timing
-            tctot,tpytot,ntot=0,0,0
-            for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
-                for ss, n_ss in zip(ssizes,range(len(ssizes))):
-                    tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate)
-                    tctot+=tctot_
-                    tpytot+=tpytot_
-                    ntot+=ntot_
-            print "old code timing %.3fs"%tctot
-
-#            print timing
-            t=timing[:,:,0]#We select only the c timing.
+            tctot_=[0.52555489540100098, 6.6634182929992676]
+#            tctot_=[]
+            tctot,tpytot,ntot=[],[],[]
+            if not tctot_:
+                for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
+                    for ss, n_ss in zip(ssizes,range(len(ssizes))):
+                        tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate)
+                        tctot+=[tctot_]
+                        tpytot+=[tpytot_]
+                        ntot+=[ntot_]
+            else: tctot=N.asarray(tctot_)
+            print "old code timing %.3fs"%sum(tctot),tctot
+            best=N.asarray(best)
+            worst=N.asarray(worst)
            print "timing for unrolled version"
            print t_b_k
            print t
            print "max %.3fs"%t.max(), "max param(batch unloop size/kernel unloop size)", t_b_k[t.argmax()]
            print "min %.3fs"%t.min(), "min param(batch unloop size/kernel unloop size)", t_b_k[t.argmin()]
-            print "speedup vs (1/1)%.3fx, vs old %.3fx"% (t.max()/t.min(),tctot/t.min())
+            print "speedup vs (1/1)%.3fx, vs old %.3fx"% (t.max()/t.min(),sum(tctot)/t.min())
+            print worst/best,tctot/best
+
+            tctot_patch = []
+            for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
+                for ss, n_ss in zip(ssizes,range(len(ssizes))):
+                     tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate,unroll_patch=2)
+                     tctot_patch += [tctot_]
+
+            t_patch=sum(tctot_patch)
+            print "unroll_patch time", tctot_patch
+            print "speedup vs (1/1)%.3fx, vs old %.3fx"% (t.max()/t_patch,sum(tctot)/t_patch)
+            print best/tctot_patch, worst/tctot_patch
+            
+            print best
+            print worst
+            print tctot
+            print tctot_patch
            return
+
        
        for i in range(len(kshpss)):
            for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
                for ss, n_ss in zip(ssizess[i],range(len(ssizess[i]))):
-                    for un_b, un_k in unroll:
+                    for un_b, un_k, un_p in unroll:
                        tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
                            conv_mode, ss, bsizes[i], imshp_starts[i], 
                            kshpss[i], nkernss[i],
                            img=img, unroll_batch=un_b, unroll_kern=un_k,
+                            unroll_patch=un_p,
                            validate=True)
                        tctot+=[tctot_]
                        tpytot+=[tpytot_]
@@ -426,6 +473,11 @@ class TestConvOp(unittest.TestCase):
        d=N.asarray(ntot)/tpytot
        print 'speed up py theano(ConvOp) vs convolve2d: %.3fx'%d.mean(),d

+
+    def init_data(self,shape):
+        return N.ones(shape)
+        return N.random.random(shape)
+        
    def test_ConvOpGrad(self):
        """
        test the gradient in float and double
@@ -440,24 +492,27 @@ class TestConvOp(unittest.TestCase):
        kshps = [(2,3)]
        imshps = [(2,3,4)]
        modes = ['valid', 'full']
-        unroll = [(0,0),(1,1),(2,3)]
+        unroll = [(0,0,True),(1,1,False),(2,3,False),(1,1,False),(0,0,False)]#(batch,kern,patch)
        ssizes = [(1,1),(2,2)]
-        
+
        for typ in types:
            imgs  = T.TensorType(typ, (False, False, False, False),'imgs')
            kerns = T.TensorType(typ, (False, False, False, False),'kerns')
            for mode in modes:
                for imshp in imshps:
-                    visdim = 1 if len(imshp)!=3 else imshp[0]
+		    if len(imshp)!=3:
+			visdim = 1
+		    else:
+		        visdim = imshp[0]
                    imgvals = N.array(N.random.random(N.hstack((bsize,imshp))),dtype=imgs.dtype)
                    for kshp in kshps:
                        t=numpy.array([imshp[1]-kshp[0],imshp[2]-kshp[1]])
-                        kernvals = N.array(N.random.rand(nkern,visdim,kshp[0],
-                                                         kshp[1]),dtype=kerns.dtype)
+                        kernvals = N.array(self.init_data((nkern,visdim,kshp[0],
+                                                          kshp[1])),dtype=kerns.dtype)
                        # 'full' mode should support kernels bigger than the input
                        if mode == 'valid' and (t<0).any():
                            continue
-                        for un_b,un_k in unroll:
+                        for un_b,un_k, un_p in unroll:
                                for ss in ssizes:
                                    print 'test_ConvOpGrad'
                                    print 'mode type:', mode, typ
@@ -471,22 +526,25 @@ class TestConvOp(unittest.TestCase):

                                    def test_i(imgs):
                                        convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
-                                                        output_mode=mode, unroll_batch=un_b, unroll_kern=un_k)
+                                                        output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p)
                                        return convop(imgs, kernvals)

                                    def test_k(kerns):
                                        convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
-                                                        output_mode=mode, unroll_batch=un_b, unroll_kern=un_k)
+                                                        output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p)
                                        return convop(imgvals, kerns)
-
+                                    print mode, imshp, kshp, un_b, un_k, ss
                                    #TODO the tolerance needed to pass is very high for float32(0.17). Is this acceptable? Expected?
+				    tol = None
+				    if typ=="float32":
+					tol = 0.17
                                    utt.verify_grad(test_i, [imgvals],
                                                    cast_to_output_type=True,
-                                                    tol=None if typ!="float32" else 0.17)
+                                                    tol=tol)

                                    utt.verify_grad(test_k, [kernvals],
                                                    cast_to_output_type=True,
-                                                    tol=None if typ!="float32" else 0.17)
+                                                    tol=tol)


 if __name__ == '__main__':

--- a/theano/sandbox/test_scan.py
+++ b/theano/sandbox/test_scan.py
-from scan import Scan

 import unittest
 import theano
+import theano.sandbox.scan
+

 import random
 import numpy.random
@@ -74,20 +75,214 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps = None, tol = None,



+def compareArrays(a,b):
+    if type(a) in (list,tuple):
+        a = numpy.array(a)
+    if type(b) in (list, tuple):
+        b = numpy.array(b)
+
+    return numpy.all( abs(a-b) < 1e-5)
+


-# Naming convention : 
-#  u_1,u_2,..   -> sequences
-#  s_1,s_2,..   -> initial states
-#  w_1,w_2,..   -> non-sequences
-###################################
 
 class T_Scan(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()

-   def test_one(self):
-      pass
+
+
+    # generator network, only one output , type scalar ; no sequence or 
+    # non sequence arguments
+    def test_1(self):
+      def f_pow2(x_tm1):
+        return (2*x_tm1, {})
+    
+      s = theano.tensor.dvector()
+      n_steps = theano.tensor.dscalar()
+      Y = theano.sandbox.scan.scan(f_pow2, [],s, [],n_steps = n_steps)
+    
+      f1 = theano.function([s,n_steps], Y)
+      
+      assert(compareArrays(f1([1],3), [2,4,8]))
+
+    # simple rnn, one input, one state, weights for each; input/state are 
+    # vectors, weights are scalars
+    def test_2(self):
+        def f_rnn(u_t,x_tm1,W_in, W):
+            return (u_t*W_in+x_tm1*W, {})
+    
+        u    = theano.tensor.dvector()
+        x0   = theano.tensor.dvector()
+        W_in = theano.tensor.dscalar()
+        W    = theano.tensor.dscalar()
+
+        Y = theano.sandbox.scan.scan(f_rnn, u,x0,[W_in,W])
+    
+        f2    = theano.function([u,x0,W_in,W], Y)
+        v_u   = numpy.array([1.,2.,3.,4.])
+        v_x0  = numpy.array([1])
+        v_out = numpy.array([1.1,1.3,1.6,2.])
+        assert(compareArrays( f2(v_u,v_x0,.1,1), v_out   ) )
+
+    # simple rnn, one input, one state, weights for each; input/state are 
+    # vectors, weights are scalars; using shared variables
+    def test_3(self):
+    
+        u    = theano.tensor.dvector()
+        x0   = theano.tensor.dvector()
+        W_in = theano.shared(.1, name = 'w_in')
+        W    = theano.shared(1., name ='w')
+    
+        def f_rnn_shared(u_t,x_tm1):
+            return (u_t*W_in+x_tm1*W, {})
+    
+        Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0,[])
+
+        f3    = theano.function([u,x0], Y)
+        v_u   = numpy.array([1.,2.,3.,4.])
+        v_x0  = numpy.array([1.])
+        v_out = numpy.array([1.1,1.3,1.6,2.])
+        assert(compareArrays(f3(v_u,v_x0),v_out))
+
+
+    # some rnn with multiple outputs and multiple inputs; other dimension 
+    # instead of scalars/vectors
+    def test_4(self):
+    
+        W_in2 = theano.shared(numpy.array([1.,2.]), name='win2')
+        W     = theano.shared(numpy.array([[2.,1.],[1.,1.]]), name='w')
+        W_out = theano.shared(numpy.array([.5,1.]), name = 'wout')
+        W_in1 = theano.tensor.dmatrix('win')
+        u1    = theano.tensor.dmatrix('u1')
+        u2    = theano.tensor.dvector('u2')
+        x0    = theano.tensor.dmatrix('x0')
+        y0    = theano.tensor.dvector('y0')
+    
+        def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
+            return ({}, [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
+                    theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)])
+
+        Y = theano.sandbox.scan.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1)
+    
+        f4     = theano.function([u1,u2,x0,y0,W_in1], Y)
+        v_u1   = numpy.array([[1.,2.],[1.,2.],[1.,2.]])
+        v_u2   = numpy.array([1.,2.,3.])
+        v_x0   = numpy.array([[0.,0.]])
+        v_y0   = numpy.array([1])
+        v_Win1 = numpy.array([[1.,1.],[1.,1.]])
+        v_x    = numpy.array([[4.,5.],[18.,16.],[58.,43.]])
+        v_y    = numpy.array([0.,7.,25.])
+        (x,y) =  f4( v_u1, v_u2, v_x0, v_y0, v_Win1)
+         
+        assert( compareArrays(x,v_x)) 
+        assert( compareArrays(y,v_y))
+
+
+    # basic ESN using updates 
+    def test_5(self): 
+        W_in = theano.shared(numpy.array([1.,1.]), name='win')
+        W    = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
+        W_out= theano.shared(numpy.array([.5,1.]), name='wout')
+    
+        u  = theano.tensor.dvector('u')
+        x  = theano.shared(numpy.array([0.,0.]),'x')
+        y0 = theano.tensor.dvector('y0')
+    
+        def f_ESN(u_t):
+            return ( theano.dot(x,W_out), \
+             { x: W_in*u_t + theano.dot(x,W) } )
+    
+        Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]})
+    
+        f5    = theano.function([u,y0],Y)
+        v_u   = numpy.array([1.,2.,3.])
+        v_y0  = numpy.array([0.])
+        v_out  = numpy.array([0.,1.5,3.15])
+        out = f5( v_u, v_y0 )
+        assert( compareArrays(v_out, out))
+
+    # basic ESN using updates ; moving backwards
+    def test_6(self): 
+        W_in = theano.shared(numpy.array([1.,1.]), name='win')
+        W    = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
+        W_out= theano.shared(numpy.array([.5,1.]), name='wout')
+    
+        u  = theano.tensor.dvector('u')
+        x  = theano.shared(numpy.array([0.,0.]),'x')
+        y0 = theano.tensor.dvector('y0')
+    
+        def f_ESN(u_t):
+            return ( theano.dot(x,W_out), \
+             { x: W_in*u_t + theano.dot(x,W) } )
+    
+        Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]}, \
+                                     go_backwards = True)
+    
+        f6    = theano.function([u,y0],Y)
+        v_u   = numpy.array([1.,2.,3.])
+        v_y0  = numpy.array([0])
+        v_out = numpy.array([0.,4.5,3.45])
+        out   = f6(v_u, v_y0)
+        
+        assert( compareArrays(out, v_out))
+
+    # simple rnn, one input, one state, weights for each; input/state are 
+    # vectors, weights are scalars; using shared variables and past 
+    # taps (sequences and outputs)
+    def test_7(self):
+    
+        u    = theano.tensor.dvector()
+        x0   = theano.tensor.dvector()
+        W_in = theano.shared(.1, name = 'w_in')
+        W    = theano.shared(1., name ='w')
+    
+        def f_rnn_shared(u_tm2, x_tm1, x_tm2):
+            return (u_tm2*W_in+x_tm1*W+x_tm2, {})
+    
+        Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0, [], \
+                 sequences_taps = {0:[-2]}, outputs_taps = {0:[-1,-2]})
+
+        f7 = theano.function([u,x0], Y)
+        
+        #print f7([1,2,3,4],[1,2])
+        
+    # simple rnn, one input, one state, weights for each; input/state are 
+    # vectors, weights are scalars; using shared variables and past 
+    # taps (sequences and outputs) and future taps for sequences
+    def test_8(self):
+    
+        u    = theano.tensor.dvector()
+        x0   = theano.tensor.dvector()
+        W_in = theano.shared(.1, name = 'w_in')
+        W    = theano.shared(1., name ='w')
+    
+        def f_rnn_shared(u_tm2,u_tp2, x_tm1, x_tm2):
+            return ((u_tm2+u_tp2)*W_in+x_tm1*W+x_tm2, {})
+    
+        Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0, [], \
+                 sequences_taps = {0:[-2,2]}, outputs_taps = {0:[-1,-2]})
+
+        f8 = theano.function([u,x0], Y)
+        
+        #print f8([1,2,3,4,5,6],[1,2])
+        
+
+
+
+    '''
+     TO TEST: 
+        - test taps (for sequences and outputs )
+        - test gradient (one output)
+        - test gradient (multiple outputs)
+        - test gradient (go_bacwards) 
+        - test gradient (multiple outputs / some uncomputable )
+        - test gradient (truncate_gradient)
+        - test gradient (force_gradient)
+        - test_gradient (taps past/future)
+        - test inplace map
+    '''
+

 if __name__ == '__main__':
    unittest.main()
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -1423,7 +1423,7 @@ def neg(a):

 @_scal_elemwise
 def inv(a):
-    """1.0/a (inplace on a)"""
+    """1.0/a"""

 @_scal_elemwise
 def log(a):

--- a/theano/tensor/tests/test_nnet.py
+++ b/theano/tensor/tests/test_nnet.py
@@ -461,6 +461,10 @@ def test_asymptotic_32():
    """
    This test makes sure that our functions behave sensibly when huge values are present
    """
+
+    #TODO: consider adding the optimization of crossentropy into the current mode for the
+    # purpose of running this test
+
    for dtype in 'float32', 'float64':
        if dtype == 'float32':
            x = tensor.fmatrix()
@@ -471,7 +475,7 @@ def test_asymptotic_32():
        y = tensor.lvector()

        c = categorical_crossentropy(softmax(x+x2), y)
-        f = theano.function([x,y,x2], [c.sum(), tensor.grad(c, x)])
+        f = theano.function([x,y,x2], [c.sum(), tensor.grad(c.sum(), x)], mode='FAST_RUN')
        if 0:
            for i, n in enumerate( f.maker.env.toposort()):
                print i, n

--- a/theano/tests/test_floatx.py
+++ b/theano/tests/test_floatx.py
@@ -2,7 +2,7 @@ from theano.tensor import *
 import theano.config as config
 from theano import function
 #from theano.floatx import set_floatX, xscalar, xmatrix, xrow, xcol, xvector, xtensor3, xtensor4
-import theano.floatx as FX
+import theano.floatX as FX

 def test_floatX():
    def test():