merge

f6f7cbf4 · Joseph Turian · 75d9444d · 50030aa8 · 75d9444d · f6f7cbf4
--- a/doc/advtutorial/index.txt
+++ b/doc/advtutorial/index.txt
-
-.. _advtutorial:
-
-=================
-Advanced Tutorial
-=================
-
-Before tackling this tutorial, it is highly recommended to read the :ref:`basictutorial`.
-
-
-
-
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -39,11 +39,11 @@ templates_path = ['.templates']
 source_suffix = '.txt'

 # The master toctree document.
-master_doc = 'index'
+master_doc = 'contents'

 # General substitutions.
 project = 'theano'
-copyright = '2008, LISA lab'
+copyright = '2008-2009, LISA lab'

 # The default replacements for |version| and |release|, also used in various
 # other places throughout the built documents.
@@ -164,7 +164,7 @@ htmlhelp_basename = 'theanodoc'
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, document class [howto/manual]).
 latex_documents = [
-  ('index', 'theano.tex', 'theano Documentation',
+  ('contents', 'theano.tex', 'theano Documentation',
   'LISA lab', 'manual'),
 ]


--- a/doc/contents.txt
+++ b/doc/contents.txt
+
+.. _contents:
+
+========
+Contents
+========
+
+.. toctree::
+   :maxdepth: 2
+
+   index
+   install
+   tutorials/index
+   advanced/index
+   indexes/index
+   examples/index
+   glossary
+   links
+   LICENSE
+
+
--- a/doc/glossary.txt
+++ b/doc/glossary.txt
@@ -28,8 +28,6 @@ Glossary of terminology
        to know, for any operation which supports broadcasting, which
        dimensions will need to be broadcasted. When applicable, this
        information is given in the :term:`Type` of a :term:`Result`.
-
-        For more information, see the article about broadcasting_.
        
        See also:

@@ -58,12 +56,18 @@ Glossary of terminology

        Examples of elementwise operations in Theano: ``add, sub, mul,
        div, neg, inv, log, exp, sin, cos, tan`` and many
-        others. These operations are all subclasses of :api:`Elemwise
+        others. These operations are all instances of :api:`Elemwise
        <theano.tensor.elemwise.Elemwise>`.

    graph
        WRITEME

+    inplace
+        WRITEME
+
+    merge
+        WRITEME
+
    op
        WRITEME

@@ -86,8 +90,9 @@ Glossary of terminology
    type
        WRITEME

+    view
+        WRITEME

-.. _broadcasting: concepts/broadcasting.html




--- a/doc/index.txt
+++ b/doc/index.txt
@@ -37,46 +37,101 @@ been Pythagoras' wife.

 Theano is released under a BSD license (:ref:`link <license>`)

-You can keep reading from :ref:`here <usingtheano>`.
+
+Sneak peek
+==========
+
+Here's a very simple example of how to use Theano.  It doesn't show
+off many of Theano's features, but it illustrates concretely what
+Theano is.
+
+.. code-block:: python
+
+    import theano
+    from theano import tensor
+
+    # declare two symbolic floating-point scalars
+    a = tensor.dscalar()
+    b = tensor.dscalar()
+
+    # create a simple expression
+    c = a + b
+
+    # convert the expression into a callable object that takes (a,b)
+    # values as input and computes a value for c
+    f = theano.function([a,b], c)
+
+    # bind 1.5 to 'a', 2.5 to 'b', and evaluate 'c'
+    assert 4.0 == f(1.5, 2.5)
+
+
+Theano is not a programming language in the normal sense because you
+write a program in Python that builds expressions for Theano.  Still
+it is like a programming language in the sense that to use theano, you
+have to
+
+- declare variables (``a,b``) and give their types
+
+- build expressions for how to put those variables together
+
+- compile expression graphs to functions in order to use them for computation.
+
+It is good to think of ``theano.function`` as the interface to a
+compiler which builds a callable object from a purely symbolic graph;
+one of theano's most important features is that ``theano.function``
+can optimize a graph and even compile some or all of it into native
+machine instructions.
+
+
+What does it do that they don't?
+================================
+
+Theano is a python library and optimizing compiler for manipulating
+and evaluating expressions, especially matrix-valued
+ones. Manipulation of matrices is typically done using the numpy
+package, so what does Theano do that Python and numpy do not?
+
+- *execution speed optimizations*: Theano can use `g++` to compile
+  parts your expression graph into native machine code, which runs
+  much faster than python.
+
+- *symbolic differentiation*: Theano can convert a symbolic graph
+  build symbolic graphs for computing gradients.
+
+- *stability optimizations*: Theano can recognize numerically unstable
+  expressions and compute them with more stable algorithms.
+
+There also exists symbolic packages in Python, namely sympy_. Theano
+is different from them in the sense that while it allows symbolic
+manipulation it puts more emphasis on the evaluation of these
+expressions and being able to repeatedly evaluate them on many
+different sets of inputs. It is also better suited to handling very
+large tensors which have no assumed structures.
+
+If numpy_ is to be compared to MATLAB_ and sympy_ to Mathematica_,
+Theano is a sort of hybrid of the two which tries to make the best of
+both worlds.



 Getting started
 ===============

-
 :ref:`install`
  Instructions to download and install Theano on your system.

-
 :ref:`basictutorial`
  Getting started with Theano's basic features. Go there if you are
  new!

-
 :ref:`advtutorial`
  This tutorial is for more advanced users who want to define their
  own operations and optimizations. It is recommended to go through
  the :ref:`basictutorial` first.

-
-
-Contents
-========
-
-.. toctree::
-   :maxdepth: 2
-
-   theano
-   install
-   tutorial/index
-   advtutorial/index
-   advanced/index
-   indexes/index
-   examples/index
-   glossary
-   links
-   LICENSE
+For a complete map of the documentation you may check the
+:ref:`contents`. Also, a PDF version of the online documentation may
+be found `here <theano.pdf>`_.


 Contact us
@@ -102,6 +157,10 @@ theano-dev_ mailing list.
 .. _numpy: http://numpy.scipy.org/
 .. _BLAS: http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms

+.. _sympy: http://code.google.com/p/sympy/
+.. _MATLAB: http://www.mathworks.com/products/matlab/
+.. _Mathematica: http://www.wolfram.com/products/mathematica/index.html
+
 .. _theano-users: http://groups.google.com/group/theano-users?pli=1
 .. _theano-dev: http://groups.google.com/group/theano-dev?pli=1
 .. _task list: http://lgcm.iro.umontreal.ca/theano/query?status=accepted&status=assigned&status=new&status=reopened&group=milestone&max=200&col=id&col=summary&col=status&col=owner&col=type&col=priority&col=component&col=time&report=9&order=priority

--- a/doc/theano.txt
+++ b/doc/theano.txt
-
-.. _whatistheano:
-
-===============
-What is Theano?
-===============
-
-
-Introduction
-============
-
-Theano is a Python library aiming to allow definition, optimization
-and efficient evaluation of mathematical expressions involving
-multi-dimensional arrays (though it may be extended to support many
-other types). Theano melds some aspects of a computer algebra system
-(CAS) with aspects of an optimizing compiler. This is particularly
-useful in fields such as machine learning where complicated algorithms
-must be run over large amounts of data.
-
-Theano supports a wide range of numerical types in multiple
-dimensions, a rapidly growing number of well-tested operations as well
-as utilities to compute the gradient of an expression with respect to
-another. Symbolic expressions may be compiled into functions, which
-work merrily on the same data structures as numpy_, allowing for easy
-interoperability.
-
-Theano's compiler applies many optimizations of varying
-complexity. These optimizations include, but are not limited to
-constant folding, merging of similar subgraphs (to avoid calculating
-the same values more than once), simple arithmetic simplification
-(``x*y/x -> y``), inserting efficient BLAS_ operations and using
-inplace operations wherever it is safe to do so. Theano also defines
-several optimizations which improve the numerical stability of
-computations and it provides a framework to add and test new
-optimizers.
-
-Theano was written at the LISA_ to support the development of
-efficient machine learning algorithms while minimizing human
-time. Theano was named after the `Greek mathematician`_ who may have
-been Pythagoras' wife.
-
-Theano is released under a BSD license (:ref:`link <license>`)
-
-
-.. _usingtheano:
-
-Using Theano
-============
-
-Here's a very simple example of how to use Theano.  It doesn't show
-off many of Theano's features, but it illustrates concretely what
-Theano is.
-
-.. code-block:: python
-
-    import theano
-    from theano import tensor
-
-    # declare two symbolic floating-point scalars
-    a = tensor.dscalar()
-    b = tensor.dscalar()
-
-    # create a simple expression
-    c = a + b
-
-    # convert the expression into a callable object that takes (a,b)
-    # values as input and computes a value for c
-    f = theano.function([a,b], c)
-
-    # bind 1.5 to 'a', 2.5 to 'b', and evaluate 'c'
-    assert 4.0 == f(1.5, 2.5)
-
-
-Theano is not a programming language in the normal sense because you
-write a program in Python that builds expressions for Theano.  Still
-it is like a programming language in the sense that to use theano, you
-have to
-
- declare variables (``a,b``) and give their types
-
- build expressions for how to put those variables together
-
- compile expression graphs to functions in order to use them for computation.
-
-It is good to think of ``theano.function`` as the interface to a
-compiler which builds a callable object from a purely symbolic graph;
-one of theano's most important features is that ``theano.function``
-can optimize a graph and even compile some or all of it into native
-machine instructions.
-
-
-What does it do that they don't?
-================================
-
-Theano is a python library and optimizing compiler for manipulating
-and evaluating expressions, especially matrix-valued
-ones. Manipulation of matrices is typically done using the numpy
-package, so what does Theano do that Python and numpy do not?
-
- *execution speed optimizations*: Theano can use `g++` to compile
-  parts your expression graph into native machine code, which runs
-  much faster than python.
-
- *symbolic differentiation*: Theano can convert a symbolic graph
-  build symbolic graphs for computing gradients.
-
- *stability optimizations*: Theano can recognize numerically unstable
-  expressions and compute them with more stable algorithms.
-
-There also exists symbolic packages in Python, namely sympy_. Theano
-is different from them in the sense that while it allows symbolic
-manipulation it puts more emphasis on the evaluation of these
-expressions and being able to repeatedly evaluate them on many
-different sets of inputs. It is also better suited to handling very
-large tensors which have no assumed structures.
-
-If numpy_ is to be compared to MATLAB_ and sympy_ to Mathematica_,
-Theano is a sort of hybrid of the two which tries to make the best of
-both worlds.
-
-
-Getting Started
-===============
-
-:ref:`install`
-  Instructions to download and install Theano on your system.
-
-
-:ref:`basictutorial`
-  Getting started with Theano's basic features. Go there if you are new!
-
-
-:ref:`advtutorial`
-  This tutorial is for more advanced users who want to define their own
-  operations and optimizations. It is recommended to go through the
-  :ref:`basictutorial` first.
-
-
-
-
-.. _LISA:  http://www.iro.umontreal.ca/rubrique.php3?id_rubrique=27
-.. _Greek mathematician: http://en.wikipedia.org/wiki/Theano_(mathematician)
-.. _numpy: http://numpy.scipy.org/
-.. _BLAS: http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms
-
-.. _sympy: http://code.google.com/p/sympy/
-.. _MATLAB: http://www.mathworks.com/products/matlab/
-.. _Mathematica: http://www.wolfram.com/products/mathematica/index.html
-
--- a/doc/tutorials/advanced/ex1/cop.txt
+++ b/doc/tutorials/advanced/ex1/cop.txt
+
+====================================
+Implementing the arithmetic Ops in C
+====================================
+
+
+**Next:** `Example 2 - cons_cell`_
+
+.. _Example 2 - cons_cell: ../ex2/index.html
--- a/doc/tutorials/advanced/ex1/ctype.txt
+++ b/doc/tutorials/advanced/ex1/ctype.txt
+
+========================
+Implementing double in C
+========================
+
+
+**Next:** `Implementing the arithmetic Ops in C`_
+
+.. _Implementing the arithmetic Ops in C: cop.html
--- a/doc/tutorials/advanced/ex1/index.txt
+++ b/doc/tutorials/advanced/ex1/index.txt
+
+==================
+Example 1 - double
+==================
+
+.. toctree::
+
+   type
+   op
+   ctype
+   cop
+
+WRITEME
--- a/doc/tutorials/advanced/ex1/op.txt
+++ b/doc/tutorials/advanced/ex1/op.txt
--- a/doc/tutorials/advanced/ex1/type.txt
+++ b/doc/tutorials/advanced/ex1/type.txt
+
+
+======================
+Making the double type
+======================
+
+
+What is a Type?
+===============
+
+A :ref:`type` in Theano, generally speaking, represents a set of
+constraints on potential data objects. These constraints allow Theano
+to tailor C code to handle them and to statically optimize the
+computation graph. For instance, the :ref:`irow <predefinedtypes>`
+type in the ``theano.tensor`` package gives the following constraints
+on the data the Results of type ``irow`` may contain:
+
+#. Must be an instance of ``numpy.ndarray`` (``isinstance(x, numpy.ndarray)``)
+#. Must be an array of 32-bit integers (``str(x.dtype) == 'int32'``)
+#. Must have a shape of 1xN (``len(x.shape) == 2 and x.shape[0] == 1``)
+
+Knowing these restrictions, Theano may generate C code for addition,
+etc. which contains the right number of loops over the dimensions and
+declares the right data types.
+
+Note that a Theano :ref:`type` is not equivalent to a Python type or
+class. Indeed, in Theano, :ref:`irow <predefinedtypes>` and
+:ref:`dmatrix <predefinedtypes>` both use ``numpy.ndarray`` as the
+working data type, yet they are different Types. Indeed, the
+constraints set by ``dmatrix`` are:
+
+#. Must be an instance of ``numpy.ndarray`` (``isinstance(x, numpy.ndarray)``)
+#. Must be an array of 64-bit floating point numbers (``str(x.dtype) == 'float64'``)
+#. Must have a shape of MxN, no restriction on M or N (``len(x.shape) == 2``)
+
+These are different from ``irow``'s which I listed above. There are
+cases where a Type can fully correspond to a Python type (such as the
+``double`` type we will define here which corresponds to Python's
+``float``) but it's good to know that this is not always the case.
+
+
+Type's contract
+===============
+
+Concretely speaking, in Theano's framework, a Type is any object which
+defines the following methods:
+
+- **filter(value, strict [= False])**
+
+  - This casts or wraps a value to match the Type and returns the
+    casted/wrapped value. If the value is incompatible with the type,
+    it must raise an exception. If strict is True, filter must return a
+    reference to ``value`` (i.e. casting prohibited)
+
+- **is_valid_value(value)**
+
+  - Returns True iff the value is exactly compatible with the Type.
+
+  - *Default*: defined in terms of ``filter(value, strict = True)``
+
+- **values_eq(a, b)**
+
+  - Returns True iff ``a`` and ``b`` are valid values of this Type and
+    are equal.
+
+  - *Default*: a == b
+
+- **values_eq_approx(a, b)**
+
+  - Returns True iff ``a`` and ``b`` are valid values of this Type and
+    are approximately equal, for a definition of approximately which
+    varies from Type to Type.
+
+  - *Default*: same as values_eq
+
+- **make_result(name [= None])**
+
+  - Makes a :term:`Result` of this Type with the specified name. The
+    Result will have its ``type`` field set to the Type object.
+
+  - *Default*: there is a generic definition of this in Type.
+
+- **__call__()**:
+
+  - Syntactic shortcut to make_result.
+
+  - *Default*: this is done for you by Type.
+
+
+For each method, the *default* is what Type defines for you. This
+means you will rarely need to define all of these methods.
+
+For more details you can go see the documentation for :ref:`type`.
+
+
+Defining double
+===============
+
+We are going to piggyback ``double`` on Python's ``float``. We are
+going to redefine two functions: filter and values_eq_approx.
+
+
+**filter**
+
+.. code-block:: python
+
+   def filter(x, strict=False):
+       if strict and not isinstance(x, float):
+           raise TypeError('Expected a float!')
+       return float(x)
+
+We need to define ``filter`` with two arguments. The second argument
+must be called ``strict`` (Theano often calls it by keyword) and must
+have a default value of False.
+
+If ``strict == True`` we need to return ``x``. If ``x`` is not a
+``float`` (for example, ``x`` could easily be an ``int``) then it is
+incompatible with our Type and we are forced to raise an exception. If
+``strict == False`` then we are allowed to cast ``x`` to a ``float``,
+so if ``x`` is an ``int`` it we will return an equivalent ``float``.
+
+
+**values_eq_approx**
+
+.. code-block:: python
+
+   def values_eq_approx(x, y, tolerance=1e-4):
+       return abs(x - y) / (x + y) < tolerance
+
+The second function we are defining is ``values_eq_approx``. This
+method is meant to allow approximate comparison between two values
+respecting our Type's constraints. It might happen that an
+optimization changes the computation graph in such a way that it
+produces slightly different results due to numerical instability such
+as rounding errors at the end of the mantissa. For instance, ``a + a +
+a + a + a + a`` might not actually produce the exact same output as
+``6 * a`` (try with a=0.1), but we don't necessarily mind.
+
+We added an extra ``tolerance`` argument here. Since this argument is
+not part of the API, it must have a default value which we reasonably
+chose to be 1e-4.
+
+
+**Putting them together**
+
+What we want in the end is an object which respects the aforementioned
+contract. Any way to achieve this is fine, but one must not forget
+that Type defines standard, default implementations for most required
+methods of the interface. One way to make the Type is to just
+instantiate a plain Type and set the needed fields:
+
+.. code-block:: python
+
+   from theano import gof
+
+   double = gof.Type()
+   double.filter = filter
+   double.values_eq_approx = values_eq_approx
+
+
+Another is to make a subclass of Type and define filter and
+values_eq_approx there:
+
+.. code-block:: python
+
+   from theano import gof
+
+   class Double(gof.Type):
+   
+       def filter(self, x, strict=False):
+           if strict and not isinstance(x, float):
+               raise TypeError('Expected a float!')
+           return float(x)
+   
+       def values_eq_approx(self, x, y, tolerance=1e-4):
+           return abs(x - y) / (x + y) < tolerance
+   
+   double = Double()
+
+
+There is a small issue with defining double that way in that all
+instances of Double are technically the same Type. Indeed, they all
+filter in the same way. This is relevant because Theano often compares
+Types using ``==`` to see if they are the same - for example, if the
+inputs of two different :ref:`applications <apply>` have the same
+Types and that the operation applied on them is the same, they can be
+:term:`merged <merge>`.  The workarounds are to define
+``Double.__eq__`` so that all instances of Double are equal *or* to
+override ``Double.__new__`` to always return the same instance *or* to
+hide Double and only publish a single instance of it.
+
+
+Untangling some concepts
+========================
+
+From feedback I have gotten in the past, confusion is common on what
+an instance of Type is versus a subclass of Type or an instance of
+Result. Some of this confusion is syntactic. A Type is any object
+which has fields corresponding to the functions defined above. The
+Type class provides sensible defaults for most of them but the most
+important one (filter) so when defining new types it is natural to
+subclass Type. Therefore, we often end up with Type subclasses and it
+is not completely clear what these represent semantically. Here is an
+attempt to clear up the confusion:
+
+
+* An **instance of Type** is a set of constraints on real data. It is
+  akin to a primitive type or class in C and it is a *static*
+  annotation.
+
+* An **instance of Result** symbolizes data nodes in a data flow
+  graph. If you were to parse the C expression ``int x;``, ``int``
+  would be a Type instance and ``x`` would be a Result instance of
+  that Type instance. If you were to parse the C expression ``c = a +
+  b;``, ``a``, ``b`` and ``c`` would all be Result instances.
+
+* A **subclass of Type** represents a set of Type instances that share
+  structural similarities. In the ``double`` example that we are
+  doing, there is actually only one Type in that set, therefore the
+  subclass doesn't represent anything that one of its instances
+  doesn't. In this case it is a singleton. However, the Tensor class
+  which is a subclass of Type represents a set of types of tensors
+  parametrized by their data type or number of dimensions. We could
+  say that subclassing Type builds a hierarchy of Types which is based
+  upon structural similarity rather than compatibility.
+
+
+Final version
+=============
+
+.. code-block:: python
+
+   from theano import gof
+
+   class Double(gof.Type):
+   
+       def filter(self, x, strict=False):
+           if strict and not isinstance(x, float):
+               raise TypeError('Expected a float!')
+           return float(x)
+   
+       def values_eq_approx(self, x, y, tolerance=1e-4):
+           return abs(x - y) / (x + y) < tolerance
+
+       def __str__(self):
+           return "double"
+   
+   double = Double()
+
+
+I added one utility function, ``__str__``. That way, when we print
+``double`` it will print out something sensible!
+
+
+
+**Next:** `Making arithmetic Ops on double`_
+
+.. _Making arithmetic Ops on double: op.html
+
+
+
+
+
+
+
--- a/doc/tutorials/advanced/ex2/index.txt
+++ b/doc/tutorials/advanced/ex2/index.txt
+
+
+=====================
+Example 2 - cons_cell
+=====================
+
+.. toctree::
+
+   type
+
--- a/doc/tutorials/advanced/ex2/type.txt
+++ b/doc/tutorials/advanced/ex2/type.txt
+
+
+====================
+Making the cons type
+====================
+
+WRITEME
--- a/doc/tutorials/advanced/index.txt
+++ b/doc/tutorials/advanced/index.txt
+
+.. _advtutorial:
+
+=================
+Advanced Tutorial
+=================
+
+Before tackling this tutorial, it is highly recommended to read the
+:ref:`basictutorial`.
+
+
+The advanced tutorial is meant to give the reader a greater
+understanding of the building blocks of Theano. It contains two
+examples which cover most of the conceptual space associated with
+:ref:`type` and :ref:`op` and then expands on other important matters
+such as optimization.
+
+
+This tutorial should be of most use to users who want to extend Theano
+with custom types and operations related to these types. Users who
+want to extend Theano with new operations on tensors should check
+:ref:`tensoroptutorial`, but it is a good idea to read this tutorial
+as well since it probably provides better grounding for the many
+concepts at work here.
+
+
+.. toctree::
+
+   ex1/index
+   ex2/index
+   inplace
+   optimization
+   tips
+   wrapup
+
+
+
+..
+    `Example 1`_
+      Making a basic arithmetic system on doubles
+    
+    `Example 2`_
+      Making a higher-level type: ``cons_cell`` (pair)
+    
+    `Views and inplace operations`_
+      A guide to making Ops that return a :term:`view` on their inputs or
+      operate :term:`inplace` on them.
+    
+    `Graph optimization`_
+      A guide to the different ways of defining new custom optimizations
+      to simplify the computation graph and/or improve its numerical
+      stability or other desirable properties.
+    
+    `Tips`_
+      Tips and tricks about writing types, ops and optimizations. This
+      page is good reference - check it and come back to it!
+    
+    `Wrapping up`_
+      A guide to what to look at next
+    
+    
+    .. _Example 1: ex1/index.html
+    .. _Example 2: ex2/index.html
+    .. _Views and inplace operations: inplace.html
+    .. _Graph optimization: optimization.html
+    .. _Tips: tips.html
+    .. _Wrapping up: wrapup.html
+
+..
+
+
+
+
+
--- a/doc/tutorials/advanced/inplace.txt
+++ b/doc/tutorials/advanced/inplace.txt
+
+
+============================
+Views and inplace operations
+============================
+
+WRITEME
+
--- a/doc/tutorials/advanced/optimization.txt
+++ b/doc/tutorials/advanced/optimization.txt
+
+
+==================
+Graph optimization
+==================
+
+WRITEME
--- a/doc/tutorials/advanced/tips.txt
+++ b/doc/tutorials/advanced/tips.txt
+
+
+====
+Tips
+====
+
+
+Don't define new Ops unless you have to
+=======================================
+
+It is usually not very useful to define Ops that can be easily
+implemented using other already existing Ops. For example, instead of
+writing a "sum_square_difference" Op, you should probably just write a
+simple function:
+
+.. code-block:: python
+
+   from theano import tensor as T
+
+   def sum_square_difference(a, b):
+       return T.sum((a - b)**2)
+
+Even without taking Theano's optimizations into account, it is likely
+to work just as well as a custom implementation. It also supports all
+data types, tensors of all dimensions as well as broadcasting, whereas
+a custom implementation would probably only bother to support
+contiguous vectors/matrices of doubles...
+
+
+Use Theano's high order Ops when applicable
+===========================================
+
+Theano provides some generic Op classes which allow you to generate a
+lot of ops at a lesser effort. For instance, Elemwise can be used to
+make :term:`elementwise` operations easily whereas DimShuffle can be
+used to make transpose-like transformations. These higher order Ops
+are mostly Tensor-related, as this is Theano's specialty. An exposé of
+them can therefore be found in :ref:`tensoroptools`.
+
+
+.. _opchecklist:
+
+Op Checklist
+============
+
+Use this list to make sure you haven't forgotten anything when
+defining a new Op. It might not be exhaustive but it covers a lot of
+common mistakes.
+
+WRITEME
--- a/doc/tutorials/advanced/wrapup.txt
+++ b/doc/tutorials/advanced/wrapup.txt
+
+
+===========
+Wrapping up
+===========
+
+WRITEME
--- a/doc/tutorial/adding.txt
+++ b/doc/tutorial/adding.txt

-===========================
-Adding two numbers together
-===========================
+========================================
+Baby steps - Adding two numbers together
+========================================


 Adding two scalars
@@ -125,7 +125,7 @@ array([[ 2.,  2.,  2.,  2.,  2.],

 It is possible to add scalars to matrices, vectors to matrices,
 scalars to vectors, etc. The behavior of these operations is defined
-by broadcasting_.
+by :term:`broadcasting`.

 The following types are readily available:

@@ -141,17 +141,12 @@ The following types are readily available:
   prefix vs the l prefix) and between 32 and 64 bit floats (f prefix
   vs the d prefix).

-
-Section
-------
-
-Try to mix and match them and see what happens. A complete list of
-types compatible with numpy arrays may be found :ref:`here
-<typelist>`.
+Try to mix and match them and see what happens. The previous list is
+not exhaustive. A guide to all types compatible with numpy arrays may
+be found :ref:`here <predefinedtypes>`.


 **Next:** `More examples`_


-.. _broadcasting: ../concepts/broadcasting.html
 .. _More examples: examples.html
--- a/doc/tutorial/examples.txt
+++ b/doc/tutorial/examples.txt
@@ -9,8 +9,9 @@ More examples
 Logistic function
 =================

-Let's say that you want to compute the logistic curve, which is given
-by:
+Here's another straightforward example, though a bit more elaborate
+than adding two numbers together. Let's say that you want to compute
+the logistic curve, which is given by:

 .. math::

@@ -19,6 +20,8 @@ by:
 You want to compute the function :term:`elementwise` on matrices of
 doubles.

+Well, what you do is this:
+
 >>> x = T.dmatrix('x')
 >>> s = 1 / (1 + T.exp(-x))
 >>> logistic = function([x], s)
@@ -33,16 +36,17 @@ Computing more than one thing at the same time
 ==============================================

 Theano supports functions with multiple outputs. For example, we can
-compute the absolute :term:`elementwise` difference between two
+compute the :term:`elementwise` absolute difference between two
 matrices ``x`` and ``y`` and the squared difference at the same time:

 >>> x, y = T.dmatrices('xy')
->>> d = x - y
->>> f = function([x, y], [abs(d), d**2])
+>>> diff = x - y
+>>> abs_diff = abs(x - y)
+>>> diff_squared = diff**2
+>>> f = function([x, y], [abs_diff, diff_squared])

-Theano will make ``f`` in such a way that it will only compute the
-difference once. When we use the function, it will return the two
-results (reformatted for readability):
+When we use the function, it will return the two results (the printing
+was reformatted for readability):

 >>> f([[1, 1], [1, 1]], [[0, 1], [2, 3]])
 [array([[ 1.,  0.],

--- a/doc/tutorial/index.txt
+++ b/doc/tutorial/index.txt
@@ -25,39 +25,11 @@ of theano. Let's import that subpackage under a handy name. I like

 Now we're ready for the tour:

---------------------------------------
-
-`Adding two numbers together`_
-  Starting small
-
-`More examples`_
-  Getting comfortable
-
-`Using Module`_
-  Getting serious
-
-WRITEME: using modes?
-
-`Wrapping up`_
-  A guide to what to look at next
-
---------------------------------------
-
-
-.. rubric:: Contents

 .. toctree::
-   :maxdepth: 2

   adding
   examples
   module
+   tools
   wrapup
-
-
-
-.. _Adding two numbers together: adding.html
-.. _More examples: examples.html
-.. _Using Module: module.html
-.. _Wrapping up: wrapup.html
-
--- a/doc/tutorial/module.txt
+++ b/doc/tutorial/module.txt
@@ -262,9 +262,16 @@ initialize a state with a matrix of zeros:
   #  [ 0.  0.  0.  0.  0.]]


-**Next:** `Wrapping up`_
+Nesting Modules
+===============

-.. _Wrapping up: wrapup.html
+WRITEME
+
+
+
+**Next:** `Tools`_
+
+.. _Tools: tools.html




--- a/doc/tutorials/basic/tools.txt
+++ b/doc/tutorials/basic/tools.txt
+
+=====
+Tools
+=====
+
+
+Mode
+====
+
+WRITEME
+
+
+Types
+=====
+
+NOTE: I'm not sure this actually goes in the tutorial - it ended up
+much longer than intended - maybe we should just link to it! --OB
+
+.. _predefinedtypes:
+
+Predefined types
+----------------
+
+Theano gives you many premade types to work with. These types are
+located in the ``theano.tensor`` package. The name of the types follow
+a recipe:
+
+``<dtype><dimensionality>``
+
+Where ``<dtype>`` is one of:
+
+==== ======== ============== ====
+code type     domain         bits
+==== ======== ============== ====
+b    byte     signed integer 8
+w    word     signed integer 16
+i    integer  signed integer 32
+l    long     signed integer 64
+f    float    floating point 32
+d    double   floating point 64
+==== ======== ============== ====
+
+Dimensionality is one of:
+
+====== ====== ========================================== =============================================
+code   shape  Rows :term:`broadcastable <broadcasting>`? Columns :term:`broadcastable <broadcasting>`?
+====== ====== ========================================== =============================================
+scalar []     Yes                                        Yes
+vector [n]    Yes                                        N/A
+row    [1, n] Yes                                        No
+col    [m, 1] No                                         Yes
+matrix [m, n] No                                         No
+====== ====== ========================================== =============================================
+
+So for example if you want a row of 32-bit floats, it is available
+under ``theano.tensor.frow`` and if you want a matrix of unsigned
+32-bit integers it is available under ``theano.tensor.imatrix``.
+
+Each of the methods described above have a singular version and a
+plural version. When called, the singular version takes a single
+argument which is the name of the :term:`Result` we want to make and
+it makes a single Result of that type. The plural version can either
+take an integer or a string. If an integer is provided, it will return
+that many Results and if a string is provided, it will create one
+Result for each letter of the string, using the letter as the Result's
+name. For example:
+
+.. code-block:: python
+
+   from theano.tensor import *
+
+   x = dmatrix() # creates one Result with no name
+   x = dmatrix('x') # creates one Result with name 'x'
+   xyz = dmatrix('xyz') # creates one Result with name 'xyz'
+
+   x, y, z = dmatrices(3) # creates three Results with no names
+   x, y, z = dmatrices('xyz') # creates three Results named 'x', 'y' and 'z'
+
+
+Custom tensor types
+-------------------
+
+If you wish to use a type which is not available here (for example, a
+3D tensor) you can build an appropriate type using
+``theano.tensor.Tensor``. The first argument you pass is the ``dtype``
+and the second is the ``broadcastable pattern``.
+
+Where ``dtype`` is one of:
+
+=========== ================ =================
+dtype       domain           bits
+=========== ================ =================
+int8        signed integer   8
+int16       signed integer   16
+int32       signed integer   32
+int64       signed integer   64
+uint8       unsigned integer 8
+uint16      unsigned integer 16
+uint32      unsigned integer 32
+uint64      unsigned integer 64
+float32     floating point   32
+float64     floating point   64
+complex64   complex          64 (two float32)
+complex128  complex          128 (two float64)
+=========== ================ =================
+
+.. note::
+
+   There are no premade complex types, so you need to make them
+   explicitly with Tensor. Furthermore, few operations are fully
+   supported for complex types: as of version 0.1, only elementary
+   operations (``+-*/``) have C implementations.
+
+
+The broadcastable pattern, on the other hand, indicates both the
+number of dimensions and whether a particular dimension has length
+1. Here is a handy table mapping the :term:`broadcastable
+<broadcasting>` pattern to what kind of tensor it encodes:
+
+===================== =================================
+pattern               interpretation
+===================== =================================
+[]                    scalar
+[True]                1D scalar (vector of length 1)
+[True, True]          2D scalar (1x1 matrix)
+[False]               vector
+[False, False]        matrix
+[False] * n           nD tensor
+[True, False]         row (1xN matrix)
+[False, True]         column (Mx1 matrix)
+[False, True, False]  A Mx1xP tensor (a)
+[True, False, False]  A 1xNxP tensor (b)
+[False, False, False] A MxNxP tensor (pattern of a + b)
+===================== =================================
+
+So if we wanted to create a type representing a 3D array of unsigned
+bytes, we would simply do: 
+
+.. code-block:: python
+
+   mytype = theano.tensor.Tensor('uint8', [False]*3)
+
+
+Ops
+===
+
+There's a lot of operations readily available in the ``theano.tensor``
+package. They do not require much explanation according to this
+tutorial's author, so he will simply direct you to the :ref:`oplist`
+:)
+
+
+
+**Next:** `Wrapping up`_
+
+.. _Wrapping up: wrapup.html
+
+
+
+
+
--- a/doc/tutorial/wrapup.txt
+++ b/doc/tutorial/wrapup.txt
--- a/doc/tutorials/howtotest.txt
+++ b/doc/tutorials/howtotest.txt
+
+.. _howtotest:
+
+===========
+How to test
+===========
+
+
+How to test an Op
+=================
+
+blah blah WRITEME
+
+
+How to test an Optimizer
+========================
+
+yadda WRITEME yadda
+
--- a/doc/tutorials/index.txt
+++ b/doc/tutorials/index.txt
+
+=========
+Tutorials
+=========
+
+.. toctree::
+
+   basic/index
+   advanced/index
+   tensorop
+   tensoroptools
+   howtotest
+
--- a/doc/tutorials/tensorop.txt
+++ b/doc/tutorials/tensorop.txt
+
+.. _tensoroptutorial:
+
+===============================
+How to make a new Op on tensors
+===============================
+
+This tutorial aims to explain how to create a new operation operating
+on numpy's ndarrays and using Theano's Tensor type. It is optional but
+recommended to go through the :ref:`advtutorial` beforehand, which
+explains more in detail the purpose of each of the methods you will
+define here.
+
+The operation we will implement will be multiplication of two matrices
+of doubles. Of course, this operation already exists in Theano, but so
+do all simple operations and a tutorial works better when all concepts
+are kept as simple as possible. We will proceed by steps: the first
+step is to implement the Op in Python using numpy's multiplication
+operator. In the second step, we will extend our Op to (optionally)
+operate inplace on its inputs. In the third step, which is the most
+difficult, we will give our Op a solid C implementation.
+
+
+Implementing a new Op in Python
+===============================
+
+This is actually very simple to do. You are required to define two
+methods - one to create the :ref:`apply` node every time your Op is
+applied to some inputs, declaring the outputs in the process and
+another to operate on the inputs. There is also one optional method
+you may define which will compute the gradient of your Op.
+
+
+Extending the Op to work inplace
+================================
+
+WRITEME
+
+
+Writing a C implementation
+==========================
+
+WRITEME
+
+
+
+What's next
+===========
+
+Theano provides several special Ops that can make your job
+easier. Check the :ref:`tensoroptools` to see if you can leverage them
+to do what you need.
+
+It is highly recommended that you read the :ref:`opchecklist` before
+making any new Op. This can avoid you a lot of problems.
+
+
--- a/doc/tutorials/tensoroptools.txt
+++ b/doc/tutorials/tensoroptools.txt
+
+.. _tensoroptools:
+
+===============
+Tensor Op Tools
+===============
+
+WRITEME - describe how to use Elemwise here
+
--- a/scripts/docgen.py
+++ b/scripts/docgen.py

 import sys
 import os
+import shutil
 import inspect

 from epydoc import docintrospecter 
@@ -98,13 +99,36 @@ if __name__ == '__main__':
    if options['--all'] or options['--epydoc']:
        from epydoc.cli import cli
        sys.path[0:0] = throot
+
+        #Generate HTML doc
+        #os.system("epydoc --config doc/api/epydoc.conf -o html/api")
        sys.argv[:] = ['', '--config', '%s/doc/api/epydoc.conf' % throot, '-o', 'api']
        cli()
-#        os.system("epydoc --config doc/api/epydoc.conf -o html/api")
+
+        # Generate PDF doc
+        # TODO

    if options['--all'] or options['--rst']:
        import sphinx
        sys.path[0:0] = [os.path.join(throot, 'doc')]
        sphinx.main(['', '-E', os.path.join(throot, 'doc'), '.'])

+        # Generate latex file in a temp directory
+        import tempfile
+        workdir = tempfile.mkdtemp()
+        sphinx.main(['', '-E', '-b', 'latex',
+            os.path.join(throot, 'doc'), workdir])
+        # Compile to PDF
+        currentdir = os.getcwd()
+        os.chdir(workdir)
+        os.system('make')
+        try:
+            shutil.copy(os.path.join(workdir, 'theano.pdf'), currentdir)
+            os.chdir(currentdir)
+            shutil.rmtree(workdir)
+        except OSError, e:
+            print 'OSError:', e
+
+
+

--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -288,8 +288,12 @@ class Function(object):
        # if we are allowing garbage collection, remove the input and output reference from the internal
        # storage cells
        if getattr(self.fn, 'allow_gc', False):
-            for x in self.output_storage:
-                x.storage[0] = None  #WARNING: This circumvents the 'readonly' attribute in x
+            assert len(self.output_storage) == len(self.maker.env.outputs)
+            for o_container, o_result in zip(self.output_storage, self.maker.env.outputs):
+                if o_result.owner is not None:
+                    # this node is the result of computation
+                    # WARNING: This circumvents the 'readonly' attribute in x
+                    o_container.storage[0] = None

        # Update the inputs that have an update function
        for input, storage in reversed(zip(self.maker.expanded_inputs, self.input_storage)):

--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -652,82 +652,6 @@ def mul(x,y):
    elif y_is_sparse_result and not x_is_sparse_result: return mul_s_d(y,x)
    else: raise NotImplementedError()

-###############
-#
-# TrueDot
-#
-class TrueDot(gof.op.Op):
-    """
-    Attributes:
-    grad_preserves_dense - a boolean flags [default: True].
-    grad_preserves_dense controls whether gradients with respect to inputs
-    are converted to dense matrices when the corresponding input y is
-    dense (not in a L{SparseResult} wrapper). This is generally a good idea
-    when L{Dot} is in the middle of a larger graph, because the types
-    of gy will match that of y. This conversion might be inefficient if
-    the gradients are graph outputs though, hence this mask.
-
-    @todo: Simplify code by splitting into DotSS and DotSD.
-    """
-    def __init__(self, grad_preserves_dense=True):
-        self.grad_preserves_dense = grad_preserves_dense
-    def make_node(self, x, y):
-        """
-        Because of trickiness of implementing, we assume that the left argument x is SparseResult (not dense)
-        """
-        if x.type.dtype != y.type.dtype:
-            raise NotImplementedError()
-
-        assert _is_sparse_result(x)
-        # These are the conversions performed by scipy.sparse.dot
-        if x.type.format == "csc" or x.type.format == "coo":
-            myformat = "csc"
-        elif x.type.format == "csr":
-            myformat = "csr"
-        else:
-            raise NotImplementedError()
-
-        inputs = [x, y]    # Need to convert? e.g. assparse
-        outputs = [Sparse(dtype = x.type.dtype, format = myformat).make_result()]
-        return gof.Apply(self, inputs, outputs)
-    def perform(self, node, (x, y), (out, )):
-        """
-        @todo: Verify that output is sufficiently sparse, and raise a warning if it is not
-        @todo: Also determine that we are storing the output in the best storage format?
-        """
-        out[0] = x.dot(y)
-    def grad(self, (x, y), (gz,)):
-        assert _is_sparse_result(gz)
-        assert _is_sparse_result(x)
-        rval = [true_dot(gz, y.T), true_dot(x.T, gz)]
-        if _is_dense_result(y):
-            if self.grad_preserves_dense:
-                rval[1] = dense_from_sparse(rval[1])
-        return rval
-    def __eq__(self, other):
-        return type(self) == type(other) and self.grad_preserves_dense == other.grad_preserves_dense
-    def __hash__(self):
-        return hash(self.grad_preserves_dense)
-    
-def true_dot(x, y, grad_preserves_dense=True):
-    """
-    @todo: Maybe the triple-transposition formulation (when x is dense)
-    is slow. See if there is a direct way to do this.
-    """
-    if hasattr(x, 'getnnz'): x = as_sparse(x)
-    if hasattr(y, 'getnnz'): y = as_sparse(y)
-
-    x_is_sparse_result = _is_sparse_result(x)
-    y_is_sparse_result = _is_sparse_result(y)
-    if not x_is_sparse_result and not y_is_sparse_result:
-        raise TypeError()
-    if x_is_sparse_result:
-        return TrueDot(grad_preserves_dense)(x, y)
-    else:
-        assert y_is_sparse_result
-        return transpose(TrueDot(grad_preserves_dense)(y.T, x.T))
-
-
 ###############
 #
 # StructuredDot

--- a/theano/sparse/sandbox/truedot.py
+++ b/theano/sparse/sandbox/truedot.py
+
+###############
+#
+# TrueDot
+#
+class TrueDot(gof.op.Op):
+    """
+    Attributes:
+    grad_preserves_dense - a boolean flags [default: True].
+    grad_preserves_dense controls whether gradients with respect to inputs
+    are converted to dense matrices when the corresponding input y is
+    dense (not in a L{SparseResult} wrapper). This is generally a good idea
+    when L{Dot} is in the middle of a larger graph, because the types
+    of gy will match that of y. This conversion might be inefficient if
+    the gradients are graph outputs though, hence this mask.
+
+    @todo: Simplify code by splitting into DotSS and DotSD.
+    """
+    def __init__(self, grad_preserves_dense=True):
+        self.grad_preserves_dense = grad_preserves_dense
+    def __eq__(self, other):
+        return type(self) == type(other) and self.grad_preserves_dense == other.grad_preserves_dense
+    def __hash__(self):
+        return hash(self.grad_preserves_dense)
+    def __ne__(self, other):
+        return not (self == other)
+    def make_node(self, x, y):
+        """
+        :note: Because of trickiness of implementing, we assume that the left argument x is SparseResult (not dense)
+        """
+        if x.type.dtype != y.type.dtype:
+            raise NotImplementedError()
+
+        if not _is_sparse_result(x):
+            raise TypeError(x)
+
+        # These are the conversions performed by scipy.sparse.dot
+        if x.type.format == "csc" or x.type.format == "coo":
+            myformat = "csc"
+        elif x.type.format == "csr":
+            myformat = "csr"
+        else:
+            raise NotImplementedError()
+
+        inputs = [x, y]    # Need to convert? e.g. assparse
+        outputs = [Sparse(dtype = x.type.dtype, format = myformat).make_result()]
+        return gof.Apply(self, inputs, outputs)
+    def perform(self, node, (x, y), (out, )):
+        """
+        @todo: Verify that output is sufficiently sparse, and raise a warning if it is not
+        @todo: Also determine that we are storing the output in the best storage format?
+        """
+        rval = x.dot(y)
+        out[0] = rval
+    def grad(self, (x, y), (gz,)):
+        assert _is_sparse_result(gz)
+        assert _is_sparse_result(x)
+        rval = [true_dot(gz, y.T), true_dot(x.T, gz)]
+        if _is_dense_result(y):
+            if self.grad_preserves_dense:
+                rval[1] = dense_from_sparse(rval[1])
+        return rval
+    
+def true_dot(x, y, grad_preserves_dense=True):
+    """
+    @todo: Maybe the triple-transposition formulation (when x is dense)
+    is slow. See if there is a direct way to do this.
+    """
+    if hasattr(x, 'getnnz'): x = as_sparse(x)
+    if hasattr(y, 'getnnz'): y = as_sparse(y)
+
+    x_is_sparse_result = _is_sparse_result(x)
+    y_is_sparse_result = _is_sparse_result(y)
+    if not x_is_sparse_result and not y_is_sparse_result:
+        raise TypeError()
+    if x_is_sparse_result:
+        return TrueDot(grad_preserves_dense)(x, y)
+    else:
+        assert y_is_sparse_result
+        return transpose(TrueDot(grad_preserves_dense)(y.T, x.T))
+
+
+class test_true_dot(unittest.TestCase):
+    def setUp(self):
+        numpy.random.seed(44)
+
+    def test_basicSS(self):
+        for mtype in _mtypes:
+            x = as_sparse(mtype((500,3)))
+            x.data[(10, 1)] = 1
+            x.data[(20, 2)] = 2
+            self.failUnless(_is_sparse_result(x))
+
+            xT = x.T
+            self.failUnless(_is_sparse_result(xT))
+
+            zop = true_dot(x,xT)
+            self.failUnless(_is_sparse_result(zop))
+            z = eval_outputs([zop])
+            self.failUnless(_is_sparse(z))
+            self.failUnless(z.shape == (500,500))
+            self.failUnless(type(z) is mtype)
+
+            w = mtype((500,500))
+            w[(10, 10)] = 1
+            w[(20, 20)] = 4
+            self.failUnless(z.shape == w.shape)
+            self.failUnless(type(z) == type(w))
+            self.failUnless(z.dtype == w.dtype)
+
+            #self.failUnless(z == w)
+            self.failUnless(abs(z-w).nnz == 0)
+
+            z = z.todense()
+            w = w.todense()
+            self.failUnless((z == w).all() == True)
+
+    def test_basicSD(self):
+        for mtype in _mtypes:
+            x = as_sparse(mtype((500,3)))
+            x.data[(10, 1)] = 1
+            x.data[(20, 2)] = 2
+            self.failUnless(_is_sparse_result(x))
+
+            y = tensor.as_tensor([[1., 2], [3, 4], [2, 1]])
+            self.failUnless(_is_dense_result(y))
+
+            zop = true_dot(x,y)
+            self.failUnless(_is_sparse_result(zop))
+            z = eval_outputs([zop])
+            self.failUnless(_is_sparse(z))
+            self.failUnless(z.shape == (500,2))
+            self.failUnless(type(z) is mtype)
+
+            w = mtype((500,2))
+            w[(10, 0)] = 3.
+            w[(20, 0)] = 4
+            w[(10, 1)] = 4
+            w[(20, 1)] = 2
+            self.failUnless(z.shape == w.shape)
+            self.failUnless(type(z) == type(w))
+            self.failUnless(z.dtype == w.dtype)
+
+            #self.failUnless(z == w)
+            self.failUnless(abs(z-w).nnz == 0)
+
+            z = z.todense()
+            w = w.todense()
+            self.failUnless((z == w).all() == True)
+
+    def test_basicDS(self):
+        for mtype in _mtypes:
+            x = as_sparse(mtype((500,3)))
+            x.data[(10, 1)] = 1
+            x.data[(20, 2)] = 2
+            self.failUnless(_is_sparse_result(x))
+
+            y = tensor.as_tensor([[1., 2], [3, 4], [2, 1]])
+            self.failUnless(_is_dense_result(y))
+
+            x.data = x.data.T
+            y.data = y.data.T
+
+            zop = true_dot(y, x)
+            zop = transpose(true_dot(y, x))
+            self.failUnless(_is_sparse_result(zop))
+            z = eval_outputs([zop])
+            self.failUnless(_is_sparse(z))
+            self.failUnless(z.shape == (500,2))
+#            self.failUnless(type(z) is mtype)
+
+            w = mtype((500,2))
+            w[(10, 0)] = 3.
+            w[(20, 0)] = 4
+            w[(10, 1)] = 4
+            w[(20, 1)] = 2
+            self.failUnless(z.shape == w.shape)
+            # Type should switch from csr to csc and vice-versa, so don't perform this test
+            #self.failUnless(type(z) == type(w))
+            self.failUnless(z.dtype == w.dtype)
+
+            # Type should switch from csr to csc and vice-versa, so don't perform this test
+            #self.failUnless(z == w)
+            self.failUnless(abs(z-w).nnz == 0)
+
+            z = z.todense()
+            w = w.todense()
+            self.failUnless((z == w).all() == True)
+
+    def test_graph_bprop0(self):
+        for mtype in _mtypes:
+            x = tensor.matrix('x') #Tensor('float64', broadcastable=[False,False], name='x')
+            w = Sparse(dtype = 'float64', format = _mtype_to_str[mtype]).make_result()
+            xw = dense_from_sparse(true_dot(w, x))
+            y = dense_from_sparse(true_dot(w.T, xw))
+            diff = x-y
+            loss = tensor.sum(tensor.sqr(diff))
+            gw = tensor.grad(loss, w)
+            trainfn = compile.function([x, w], [y, loss, gw])
+
+            x = numpy.asarray([[1., 2], [3, 4], [2, 1]])
+            w = mtype((500,3))
+            w[(10, 1)] = 1
+            w[(20, 2)] = 2
+            lr = 0.001
+            y, origloss, gw = trainfn(x, w)
+            for epoch in xrange(50):
+                y, loss, gw = trainfn(x, w)
+                w = w - (lr * gw)
+                print loss
+
+            self.failUnless(origloss > loss)
+            self.failUnless('1.05191241115' == str(loss))
+
+    def test_graph_bprop_rand(self):
+        for i in range(10):
+            xorig = numpy.random.rand(3,2)
+            for mtype in _mtypes:
+                x = tensor.matrix('x')
+                w = Sparse(dtype = 'float64', format = _mtype_to_str[mtype]).make_result()
+                xw = dense_from_sparse(true_dot(w, x))
+                y = dense_from_sparse(true_dot(w.T, xw))
+                diff = x-y
+                loss = tensor.sum(tensor.sqr(diff))
+                gw = tensor.grad(loss, w)
+                trainfn = compile.function([x, w], [y, loss, gw])
+
+                x = xorig
+                w = mtype((500,3))
+                w[(10, 1)] = 1
+                w[(20, 2)] = 2
+                lr = 0.001
+                y, origloss, gw = trainfn(x, w)
+                for epoch in xrange(50):
+                    y, loss, gw = trainfn(x, w)
+                    w = w - (lr * gw)
+
+                self.failUnless(origloss > loss)
+
--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -19,7 +19,7 @@ class T_transpose(unittest.TestCase):
    def setUp(self):
        numpy.random.seed(44)
    def test_transpose_csc(self):
-        sp = sparse.csc_matrix(sparse.speye(5,3))
+        sp = sparse.csc_matrix(sparse.eye(5,3))
        a = as_sparse(sp)
        self.failUnless(a.data is sp)
        self.failUnless(a.data.shape == (5,3))
@@ -32,7 +32,7 @@ class T_transpose(unittest.TestCase):
        vta = eval_outputs([ta])
        self.failUnless(vta.shape == (3,5))
    def test_transpose_csr(self):
-        a = as_sparse(sparse.csr_matrix(sparse.speye(5,3)))
+        a = as_sparse(sparse.csr_matrix(sparse.eye(5,3)))
        self.failUnless(a.data.shape == (5,3))
        self.failUnless(a.type.dtype == 'float64')
        self.failUnless(a.type.format == 'csr')
@@ -149,163 +149,6 @@ class T_conversion(unittest.TestCase):
            self.failUnless(numpy.all(val[0] == [1,0,0,0,0]))


-class test_true_dot(unittest.TestCase):
-    def setUp(self):
-        numpy.random.seed(44)
-
-    def test_basicSS(self):
-        for mtype in _mtypes:
-            x = as_sparse(mtype((500,3)))
-            x.data[(10, 1)] = 1
-            x.data[(20, 2)] = 2
-            self.failUnless(_is_sparse_result(x))
-
-            xT = x.T
-            self.failUnless(_is_sparse_result(xT))
-
-            zop = true_dot(x,xT)
-            self.failUnless(_is_sparse_result(zop))
-            z = eval_outputs([zop])
-            self.failUnless(_is_sparse(z))
-            self.failUnless(z.shape == (500,500))
-            self.failUnless(type(z) is mtype)
-
-            w = mtype((500,500))
-            w[(10, 10)] = 1
-            w[(20, 20)] = 4
-            self.failUnless(z.shape == w.shape)
-            self.failUnless(type(z) == type(w))
-            self.failUnless(z.dtype == w.dtype)
-
-            #self.failUnless(z == w)
-            self.failUnless(abs(z-w).nnz == 0)
-
-            z = z.todense()
-            w = w.todense()
-            self.failUnless((z == w).all() == True)
-
-    def test_basicSD(self):
-        for mtype in _mtypes:
-            x = as_sparse(mtype((500,3)))
-            x.data[(10, 1)] = 1
-            x.data[(20, 2)] = 2
-            self.failUnless(_is_sparse_result(x))
-
-            y = tensor.as_tensor([[1., 2], [3, 4], [2, 1]])
-            self.failUnless(_is_dense_result(y))
-
-            zop = true_dot(x,y)
-            self.failUnless(_is_sparse_result(zop))
-            z = eval_outputs([zop])
-            self.failUnless(_is_sparse(z))
-            self.failUnless(z.shape == (500,2))
-            self.failUnless(type(z) is mtype)
-
-            w = mtype((500,2))
-            w[(10, 0)] = 3.
-            w[(20, 0)] = 4
-            w[(10, 1)] = 4
-            w[(20, 1)] = 2
-            self.failUnless(z.shape == w.shape)
-            self.failUnless(type(z) == type(w))
-            self.failUnless(z.dtype == w.dtype)
-
-            #self.failUnless(z == w)
-            self.failUnless(abs(z-w).nnz == 0)
-
-            z = z.todense()
-            w = w.todense()
-            self.failUnless((z == w).all() == True)
-
-    def test_basicDS(self):
-        for mtype in _mtypes:
-            x = as_sparse(mtype((500,3)))
-            x.data[(10, 1)] = 1
-            x.data[(20, 2)] = 2
-            self.failUnless(_is_sparse_result(x))
-
-            y = tensor.as_tensor([[1., 2], [3, 4], [2, 1]])
-            self.failUnless(_is_dense_result(y))
-
-            x.data = x.data.T
-            y.data = y.data.T
-
-            zop = true_dot(y, x)
-            zop = transpose(true_dot(y, x))
-            self.failUnless(_is_sparse_result(zop))
-            z = eval_outputs([zop])
-            self.failUnless(_is_sparse(z))
-            self.failUnless(z.shape == (500,2))
-#            self.failUnless(type(z) is mtype)
-
-            w = mtype((500,2))
-            w[(10, 0)] = 3.
-            w[(20, 0)] = 4
-            w[(10, 1)] = 4
-            w[(20, 1)] = 2
-            self.failUnless(z.shape == w.shape)
-            # Type should switch from csr to csc and vice-versa, so don't perform this test
-            #self.failUnless(type(z) == type(w))
-            self.failUnless(z.dtype == w.dtype)
-
-            # Type should switch from csr to csc and vice-versa, so don't perform this test
-            #self.failUnless(z == w)
-            self.failUnless(abs(z-w).nnz == 0)
-
-            z = z.todense()
-            w = w.todense()
-            self.failUnless((z == w).all() == True)
-
-    def test_graph_bprop0(self):
-        for mtype in _mtypes:
-            x = tensor.matrix('x') #Tensor('float64', broadcastable=[False,False], name='x')
-            w = Sparse(dtype = 'float64', format = _mtype_to_str[mtype]).make_result()
-            xw = dense_from_sparse(true_dot(w, x))
-            y = dense_from_sparse(true_dot(w.T, xw))
-            diff = x-y
-            loss = tensor.sum(tensor.sqr(diff))
-            gw = tensor.grad(loss, w)
-            trainfn = compile.function([x, w], [y, loss, gw])
-
-            x = numpy.asarray([[1., 2], [3, 4], [2, 1]])
-            w = mtype((500,3))
-            w[(10, 1)] = 1
-            w[(20, 2)] = 2
-            lr = 0.001
-            y, origloss, gw = trainfn(x, w)
-            for epoch in xrange(50):
-                y, loss, gw = trainfn(x, w)
-                w = w - (lr * gw)
-                print loss
-
-            self.failUnless(origloss > loss)
-            self.failUnless('1.05191241115' == str(loss))
-
-    def test_graph_bprop_rand(self):
-        for i in range(10):
-            xorig = numpy.random.rand(3,2)
-            for mtype in _mtypes:
-                x = tensor.matrix('x')
-                w = Sparse(dtype = 'float64', format = _mtype_to_str[mtype]).make_result()
-                xw = dense_from_sparse(true_dot(w, x))
-                y = dense_from_sparse(true_dot(w.T, xw))
-                diff = x-y
-                loss = tensor.sum(tensor.sqr(diff))
-                gw = tensor.grad(loss, w)
-                trainfn = compile.function([x, w], [y, loss, gw])
-
-                x = xorig
-                w = mtype((500,3))
-                w[(10, 1)] = 1
-                w[(20, 2)] = 2
-                lr = 0.001
-                y, origloss, gw = trainfn(x, w)
-                for epoch in xrange(50):
-                    y, loss, gw = trainfn(x, w)
-                    w = w - (lr * gw)
-
-                self.failUnless(origloss > loss)
-
 import scipy.sparse as sp
 class test_structureddot(unittest.TestCase):


--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -160,6 +160,8 @@ class DimShuffle(Op):
    def perform(self, node, (input, ), (storage, )):
        # drop
        res = input
+        if type(res) != numpy.ndarray:
+            raise TypeError(res)
        shape = list(res.shape)
        for drop in reversed(self.drop):
            shape.pop(drop)
@@ -178,7 +180,7 @@ class DimShuffle(Op):
        if not self.inplace:
            res = numpy.copy(res)

-        storage[0] = res
+        storage[0] = numpy.asarray(res) #asarray puts scalars back into array

    def c_code(self, node, name, (input,), (res,), sub):
        def statements(lst):

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -1076,7 +1076,7 @@ class T_add(unittest.TestCase):
                f = inplace_func([a,b], fn(a, b))
                print 'valid output:', fn(a.data, b.data)
                print 'theano output:', f(a.data, b.data)
-                self.failUnless(numpy.all(fn(a.data, b.data) == f(a.data, b.data)))
+                self.failUnless(a.type.values_eq_approx(fn(a.data, b.data), f(a.data, b.data)))

    def test_grad_scalar_l(self):
        verify_grad(self, add, [numpy.asarray([3.0]), numpy.random.rand(3)])