Merge pull request #3284 from abergeron/doctest

Use the testcode facility to test code examples directly

Merge pull request #3284 from abergeron/doctest
752f1f73 · Frédéric Bastien · c042a9c4 · 80264d01 · 752f1f73 · 752f1f73
--- a/doc/cifarSC2011/advanced_theano.txt
+++ b/doc/cifarSC2011/advanced_theano.txt
@@ -312,8 +312,7 @@ Pretty Printing
 ~~~~~~~~~~~~~~~

 >>> theano.printing.pprint(prediction) # doctest: +NORMALIZE_WHITESPACE
-'gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))),
-TensorConstant{0.5})'
+'gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))), TensorConstant{0.5})'


 Debug Print
@@ -321,7 +320,7 @@ Debug Print

 The graph before optimization:

->>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE
+>>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE, +SKIP
    Elemwise{gt,no_inplace} [@A] ''
    |Elemwise{true_div,no_inplace} [@B] ''
    | |DimShuffle{x} [@C] ''
@@ -342,7 +341,7 @@ The graph before optimization:

 The graph after optimization:

->>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE
+>>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE, +SKIP
    Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] ''   4
     |CGemv{inplace} [@B] ''   3
     | |Alloc [@C] ''   2
@@ -364,7 +363,7 @@ Picture Printing of Graphs

 The graph before optimization:

->>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True)
+>>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True)  # doctest: +SKIP
 The output file is available at pics/logreg_pydotprint_prediction.png

 .. image:: ./pics/logreg_pydotprint_prediction.png
@@ -372,7 +371,7 @@ The output file is available at pics/logreg_pydotprint_prediction.png

 The graph after optimization:

->>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True)
+>>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True)  # doctest: +SKIP
 The output file is available at pics/logreg_pydotprint_predict.png

 .. image:: ./pics/logreg_pydotprint_predict.png
@@ -380,7 +379,7 @@ The output file is available at pics/logreg_pydotprint_predict.png

 The optimized training graph:

->>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True)
+>>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True)  # doctest: +SKIP
 The output file is available at pics/logreg_pydotprint_train.png

 .. image:: ./pics/logreg_pydotprint_train.png

--- a/doc/cifarSC2011/theano.txt
+++ b/doc/cifarSC2011/theano.txt
@@ -56,7 +56,8 @@ Simple example
 >>> a = theano.tensor.vector("a")      # declare symbolic variable
 >>> b = a + a**10                      # build symbolic expression
 >>> f = theano.function([a], b)        # compile function
->>> print f([0,1,2])                   # prints `array([0,2,1026])`
+>>> f([0,1,2])
+array([    0.,     2.,  1026.])


 ======================================================  =====================================================
@@ -332,7 +333,7 @@ Details regarding symbolic broadcasting...
 Differentiation details
 -----------------------

->>> gw,gb = T.grad(cost, [w,b])
+>>> gw,gb = T.grad(cost, [w,b])  # doctest: +SKIP

 * T.grad works symbolically: takes and returns a Theano variable


--- a/doc/crei2013/advanced_theano.txt
+++ b/doc/crei2013/advanced_theano.txt
@@ -148,8 +148,7 @@ Pretty Printing
 ~~~~~~~~~~~~~~~

 >>> theano.printing.pprint(prediction) # doctest: +NORMALIZE_WHITESPACE
-'gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))),
-TensorConstant{0.5})'
+'gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))), TensorConstant{0.5})'


 Debug Print
@@ -157,8 +156,11 @@ Debug Print

 The graph before optimization:

->>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE
-    Elemwise{gt,no_inplace} [@A] ''
+.. doctest::
+   :options: +SKIP
+
+   >>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE
+   Elemwise{gt,no_inplace} [@A] ''
    |Elemwise{true_div,no_inplace} [@B] ''
    | |DimShuffle{x} [@C] ''
    | | |TensorConstant{1} [@D]
@@ -178,20 +180,23 @@ The graph before optimization:

 The graph after optimization:

->>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE
-    Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] ''   4
-     |CGemv{inplace} [@B] ''   3
-     | |Alloc [@C] ''   2
-     | | |TensorConstant{0.0} [@D]
-     | | |Shape_i{0} [@E] ''   1
-     | |   |x [@F]
-     | |TensorConstant{1.0} [@G]
-     | |x [@F]
-     | |w [@H]
-     | |TensorConstant{0.0} [@D]
-     |InplaceDimShuffle{x} [@I] ''   0
-     | |b [@J]
-     |TensorConstant{(1,) of 0.5} [@K]
+.. doctest::
+   :options: +SKIP
+
+   >>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE
+   Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] ''   4
+    |CGemv{inplace} [@B] ''   3
+    | |Alloc [@C] ''   2
+    | | |TensorConstant{0.0} [@D]
+    | | |Shape_i{0} [@E] ''   1
+    | |   |x [@F]
+    | |TensorConstant{1.0} [@G]
+    | |x [@F]
+    | |w [@H]
+    | |TensorConstant{0.0} [@D]
+    |InplaceDimShuffle{x} [@I] ''   0
+    | |b [@J]
+    |TensorConstant{(1,) of 0.5} [@K]


 Picture Printing of Graphs
@@ -201,24 +206,33 @@ Picture Printing of Graphs

 The graph before optimization:

->>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True)
-The output file is available at pics/logreg_pydotprint_prediction.png
+.. doctest::
+   :options: +SKIP
+
+   >>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True)
+   The output file is available at pics/logreg_pydotprint_prediction.png

 .. image:: ./pics/logreg_pydotprint_prediction.png
   :width: 800 px

 The graph after optimization:

->>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True)
-The output file is available at pics/logreg_pydotprint_predict.png
+.. doctest::
+   :options: +SKIP
+
+   >>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True)
+   The output file is available at pics/logreg_pydotprint_predict.png

 .. image:: ./pics/logreg_pydotprint_predict.png
   :width: 800 px

 The optimized training graph:

->>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True)
-The output file is available at pics/logreg_pydotprint_train.png
+.. doctest::
+   :options: +SKIP
+
+   >>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True)
+   The output file is available at pics/logreg_pydotprint_train.png

 .. image:: ./pics/logreg_pydotprint_train.png
   :width: 1500 px

--- a/doc/crei2013/theano.txt
+++ b/doc/crei2013/theano.txt
@@ -54,8 +54,8 @@ Simple example
 >>> a = theano.tensor.vector("a")      # declare symbolic variable
 >>> b = a + a ** 10                    # build symbolic expression
 >>> f = theano.function([a], b)        # compile function
->>> print f([0, 1, 2])                 # prints `array([0, 2, 1026])`
-
+>>> f([0, 1, 2])
+array([    0.,     2.,  1026.])

 ======================================================  =====================================================
        Unoptimized graph                                    Optimized graph
@@ -118,7 +118,7 @@ Where are those optimization applied?
  # Log(1-sigmoid(var)) -> -sigmoid(var)
  prediction = p_1 > 0.5
  cost = xent.mean() + 0.01 * (w ** 2).sum()
-  gw,gb = tt.grad(cost, [w, b])
+  gw, gb = tt.grad(cost, [w, b])

  train = theano.function(
            inputs=[x, y],
@@ -294,7 +294,7 @@ Details regarding symbolic broadcasting...
 Differentiation details
 -----------------------

->>> gw,gb = tt.grad(cost, [w,b])
+>>> gw, gb = tt.grad(cost, [w,b])  # doctest: +SKIP

 * tt.grad works symbolically: takes and returns a Theano variable


--- a/doc/extending/cop.txt
+++ b/doc/extending/cop.txt
@@ -253,10 +253,12 @@ We will be defining C code for the multiplication Op on doubles.

 **c_code**

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_2
+.. testsetup::

-.. code-block:: python
+   from theano import Op
+   mul = Op()
+
+.. testcode::

   def c_code(node, name, input_names, output_names, sub):
       x_name, y_name = input_names[0], input_names[1]
@@ -298,11 +300,7 @@ As before, I tried to organize the code in order to minimize
 repetition. You can check that mul produces the same C code in this
 version that it produces in the code I gave above.

-
-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_2
-
-.. code-block:: python
+.. testcode::

   from theano import gof


--- a/doc/extending/ctype.txt
+++ b/doc/extending/ctype.txt
@@ -156,12 +156,14 @@ out:
 Defining the methods
 ====================

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_2
+.. testsetup:: 
+
+    import theano
+    double = theano.Type()

 **c_declare**

-.. code-block:: python
+.. testcode::

    def c_declare(name, sub):
        return """
@@ -189,12 +191,9 @@ your Type. If you wish people to develop operations that make use of
 it, it's best to publish it somewhere.


-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_2
-
 **c_init**

-.. code-block:: python
+.. testcode::

    def c_init(name, sub):
        return """
@@ -218,12 +217,9 @@ you should only assume that either ``c_init`` or ``c_extract`` has been
 called, without knowing for sure which of the two.


-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_2
-
 **c_extract**

-.. code-block:: python
+.. testcode::

    def c_extract(name, sub):
        return """
@@ -257,12 +253,9 @@ using the ``PyFloat_AsDouble`` function (yet again provided by CPython's C
 API) and we put it in our double variable that we declared previously.


-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_2
-
 **c_sync**

-.. code-block:: python
+.. testcode::

    def c_sync(name, sub):
        return """
@@ -319,12 +312,9 @@ than sorry.
   do *NOT* decrease its reference count!


-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_2
-
 **c_cleanup**

-.. code-block:: python
+.. testcode::

    def c_cleanup(name, sub):
        return ""
@@ -370,14 +360,8 @@ depends on the the relationship between Python and C with respect to
 that Variable. For instance, imagine you define the following function
 and call it:

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_2
-
 .. code-block:: python
   
-   from theano import function
-   from theano.tensor import double
-
   x, y, z = double('x'), double('y'), double('z')
   a = add(x, y)
   b = mul(a, z)
@@ -459,10 +443,7 @@ multiplication block.
 Final version
 =============

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_2
-
-.. code-block:: python
+.. testcode::

   from theano import gof


--- a/doc/extending/fibby.txt
+++ b/doc/extending/fibby.txt
@@ -7,7 +7,7 @@ So suppose you have looked through the library documentation and you don't see a
 function that does what you want.

 If you can implement something in terms of existing Ops, you should do that.
-Odds are your function that uses existing Theano expressions is short, 
+Odds are your function that uses existing Theano expressions is short,
 has no bugs, and potentially profits from optimizations that have already been
 implemented.

@@ -18,7 +18,7 @@ Theano was designed to make it easy to add new Ops, Types, and Optimizations.

 This section walks through a non-trivial example Op that does something pretty
 weird and unrealistic, that is hard to express with existing Ops.
-(Technically, we could use ``Scan`` to implement the Op we're about to describe, 
+(Technically, we could use ``Scan`` to implement the Op we're about to describe,
 but we ignore that possibility for the sake of example.)

 The following code works, but important error-checking has been omitted for
@@ -26,53 +26,52 @@ clarity.  For example, when you write C code that assumes memory is contiguous,
 you should check the strides and alignment.


-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_fibby.test_fibby_1
-
-.. code-block:: python
-
-  class Fibby(theano.Op):
-    """
-    An arbitrarily generalized Fibbonacci sequence
-    """
-    __props__ = ()
-
-    def make_node(self, x):
-      x_ = tensor.as_tensor_variable(x)
-      assert x_.ndim == 1
-      return theano.Apply(self, 
-        inputs=[x_],
-        outputs=[x_.type()])
-      # using x_.type() is dangerous, it copies x's broadcasting behaviour
-
-    def perform(self, node, inputs, output_storage):
-      x, = inputs
-      y = output_storage[0][0] = x.copy()
-      for i in range(2, len(x)):
-        y[i] = y[i-1] * y[i-2] + x[i]
-
-    def c_code(self, node, name, inames, onames, sub):
-      x, = inames
-      y, = onames
-      fail = sub['fail']
-      return """
-        Py_XDECREF(%(y)s);
-        %(y)s = (PyArrayObject*)PyArray_FromArray(
-                    %(x)s, 0, NPY_ARRAY_ENSURECOPY);
-        if (!%(y)s)
-            %(fail)s;
-        {//New scope needed to make compilation work
-            dtype_%(y)s * y = (dtype_%(y)s*)PyArray_DATA(%(y)s);
-            dtype_%(x)s * x = (dtype_%(x)s*)PyArray_DATA(%(x)s);
-            for (int i = 2; i < PyArray_DIMS(%(x)s)[0]; ++i)
-                y[i] = y[i-1]*y[i-2] + x[i];
-        }
-      """ % locals()
-
-    def c_code_cache_version(self):
-      return (1,)
-
-  fibby = Fibby()
+.. testcode::
+
+   import theano
+
+   class Fibby(theano.Op):
+       """
+       An arbitrarily generalized Fibbonacci sequence
+       """
+       __props__ = ()
+
+       def make_node(self, x):
+           x_ = tensor.as_tensor_variable(x)
+           assert x_.ndim == 1
+           return theano.Apply(self,
+               inputs=[x_],
+               outputs=[x_.type()])
+           # using x_.type() is dangerous, it copies x's broadcasting behaviour
+
+       def perform(self, node, inputs, output_storage):
+           x, = inputs
+           y = output_storage[0][0] = x.copy()
+           for i in range(2, len(x)):
+               y[i] = y[i-1] * y[i-2] + x[i]
+
+       def c_code(self, node, name, inames, onames, sub):
+           x, = inames
+           y, = onames
+           fail = sub['fail']
+           return """
+   Py_XDECREF(%(y)s);
+   %(y)s = (PyArrayObject*)PyArray_FromArray(
+               %(x)s, 0, NPY_ARRAY_ENSURECOPY);
+   if (!%(y)s)
+     %(fail)s;
+   {//New scope needed to make compilation work
+     dtype_%(y)s * y = (dtype_%(y)s*)PyArray_DATA(%(y)s);
+     dtype_%(x)s * x = (dtype_%(x)s*)PyArray_DATA(%(x)s);
+     for (int i = 2; i < PyArray_DIMS(%(x)s)[0]; ++i)
+       y[i] = y[i-1]*y[i-2] + x[i];
+   }
+           """ % locals()
+
+       def c_code_cache_version(self):
+           return (1,)
+
+   fibby = Fibby()

 At a high level, the code fragment declares a class (``Fibby``) and then
 creates one instance of it (``fibby``).
@@ -80,7 +79,7 @@ We often gloss over this distinction, but will be precise here:
 ``fibby`` (the instance) is an Op, not ``Fibby`` (the class which is a subclass of ``theano.Op``).
 You can call ``fibby(tensor.vector())`` on a Variable to build an
 expression, and in the expression there will be a ``.op`` attribute that refers
-to ``fibby``.  
+to ``fibby``.

 The first two methods in the Op are relatively boilerplate: ``__eq__`` and ``__hash__``.
 When two Ops are equal, Theano will merge their outputs if they are applied to the same inputs.
@@ -108,14 +107,14 @@ see wrong calculation.
 The ``make_node`` method creates a node to be included in the expression graph.
 It runs when we apply our Op (``fibby``) to Variable (``x``), as in ``fibby(tensor.vector())``.
 When an Op has multiple inputs, their order in the inputs argument to ``Apply``
-is important:  Theano will call ``make_node(*inputs)`` to copy the graph, 
+is important:  Theano will call ``make_node(*inputs)`` to copy the graph,
 so it is important not to change the semantics of the expression by changing the argument order.



 All the ``inputs`` and ``outputs`` arguments to ``Apply`` must be Variables.
 A common and easy way to ensure inputs are variables is to run them through
-``as_tensor_variable``.  
+``as_tensor_variable``.
 This function leaves TensorType variables alone, raises an
 error for non-TensorType variables, and copies any ``numpy.ndarray`` into the
 storage for a TensorType Constant.
@@ -123,7 +122,7 @@ The ``make_node`` method dictates the appropriate Type for all output
 variables.

 The ``perform`` method implements the Op's mathematical logic in Python.
-The inputs (here ``x``) are passed by value, 
+The inputs (here ``x``) are passed by value,
 but a single output is returned indirectly as the first element of
 single-element lists.  If ``fibby`` had a second output, it would be stored
 in ``output_storage[1][0]``.
@@ -143,7 +142,7 @@ the correct size for the output. This is essentially simulating the line
 ``y = x.copy()``.


-.. code-block:: python
+.. code-block:: c

    Py_XDECREF(%(y)s);
    %(y)s = (PyArrayObject*)PyArray_FromArray(
@@ -153,7 +152,7 @@ The first line reduces the reference count of the data that y originally
 pointed to. The second line allocates the new data and makes y point to it.

 In C code for a theano op, numpy arrays are represented as ``PyArrayObject`` C
-structs. This is part of the numpy/scipy C API documented at 
+structs. This is part of the numpy/scipy C API documented at
 http://docs.scipy.org/doc/numpy/reference/c-api.types-and-structures.html

 TODO: NEEDS MORE EXPLANATION.
@@ -161,7 +160,7 @@ TODO: NEEDS MORE EXPLANATION.
 There are some important restrictions to remember when implementing an Op.
 Unless your Op correctly defines a ``view_map`` attribute, the ``perform`` and ``c_code`` must not
 produce outputs whose memory is aliased to any input (technically, if changing the
-output could change the input object in some sense, they are aliased). 
+output could change the input object in some sense, they are aliased).
 Unless your Op correctly defines a ``destroy_map`` attribute, ``perform`` and ``c_code`` must
 not modify any of the inputs.

@@ -206,21 +205,21 @@ TODO: talk about OPTIMIZATION STAGES
 .. If you modify this code, also change :
 .. theano/tests/test_tutorial.py:T_fibby.test_fibby_1

-.. code-block:: python
+.. testcode::

-  from theano.tensor.opt import get_scalar_constant_value, NotScalarConstantError
+   from theano.tensor.opt import get_scalar_constant_value, NotScalarConstantError

-  # Remove any fibby(zeros(...))
-  @theano.tensor.opt.register_specialize
-  @theano.gof.local_optimizer([fibby])
-  def fibby_of_zero(node):
-    if node.op == fibby:
-      x = node.inputs[0]
-      try:
-        if numpy.all(0 == get_scalar_constant_value(x)):
-          return [x]
-      except NotScalarConstantError:
-        pass
+   # Remove any fibby(zeros(...))
+   @theano.tensor.opt.register_specialize
+   @theano.gof.local_optimizer([fibby])
+   def fibby_of_zero(node):
+       if node.op == fibby:
+           x = node.inputs[0]
+           try:
+               if numpy.all(0 == get_scalar_constant_value(x)):
+                   return [x]
+           except NotScalarConstantError:
+               pass

 The ``register_specialize`` decorator is what activates our optimization, and
 tells Theano to use it in the specialization stage.
@@ -237,32 +236,35 @@ Test the optimization

 Here is some code to test that the optimization is applied only when needed.

-.. code-block:: python
-
-        # Test it does not apply when not needed
-        x = T.dvector()
-        f = function([x], fibby(x))
-        #theano.printing.debugprint(f)
-
-        # We call the function to make sure it runs.
-        # If you run in DebugMode, it will compare the C and Python outputs.
-        f(numpy.random.rand(5))
-        topo = f.maker.fgraph.toposort()
-        assert len(topo) == 1
-        assert isinstance(topo[0].op, Fibby)
-
-        # Test that the optimization gets applied.
-        f_zero = function([], fibby(T.zeros([5])))
-        #theano.printing.debugprint(f_zero)
-
-        # If you run in DebugMode, it will compare the output before
-        # and after the optimization.
-        f_zero()
-
-        # Check that the optimization removes the Fibby Op.
-        # For security, the Theano memory interface ensures that the output
-        # of the function is always memory not aliased to the input.
-        # That is why there is a DeepCopyOp op.
-        topo = f_zero.maker.fgraph.toposort()
-        assert len(topo) == 1
-        assert isinstance(topo[0].op, theano.compile.ops.DeepCopyOp)
+.. testcode::
+
+   import numpy
+   import theano.tensor as T
+   from theano import function
+   from theano import tensor
+
+   # Test it does not apply when not needed
+   x = T.dvector()
+   f = function([x], fibby(x))
+
+   # We call the function to make sure it runs.
+   # If you run in DebugMode, it will compare the C and Python outputs.
+   f(numpy.random.rand(5))
+   topo = f.maker.fgraph.toposort()
+   assert len(topo) == 1
+   assert isinstance(topo[0].op, Fibby)
+
+   # Test that the optimization gets applied.
+   f_zero = function([], fibby(T.zeros([5])))
+
+   # If you run in DebugMode, it will compare the output before
+   # and after the optimization.
+   f_zero()
+
+   # Check that the optimization removes the Fibby Op.
+   # For security, the Theano memory interface ensures that the output
+   # of the function is always memory not aliased to the input.
+   # That is why there is a DeepCopyOp op.
+   topo = f_zero.maker.fgraph.toposort()
+   assert len(topo) == 1
+   assert isinstance(topo[0].op, theano.compile.ops.DeepCopyOp)
--- a/doc/extending/graphstructures.txt
+++ b/doc/extending/graphstructures.txt
@@ -18,11 +18,13 @@ should help you understand how these pieces fit together:

 **Code**

-.. code-block:: python
+.. testcode::

-    x = dmatrix('x')
-    y = dmatrix('y')
-    z = x + y
+   import theano.tensor as T
+    
+   x = T.dmatrix('x')
+   y = T.dmatrix('y')
+   z = x + y

 **Diagram**

@@ -69,73 +71,67 @@ without any shortcuts, that will make the graph construction very explicit.

 This is what you would normally type:

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_graphstructures.test_graphstructures_1
+.. testcode::

-.. code-block:: python
+   # create 3 Variables with owner = None
+   x = T.matrix('x')
+   y = T.matrix('y')
+   z = T.matrix('z')

-    # create 3 Variables with owner = None
-    x = T.matrix('x')
-    y = T.matrix('y')
-    z = T.matrix('z')
-
-    # create 2 Variables (one for 'e', one intermediate for y*z)
-    # create 2 Apply instances (one for '+', one for '*')
-    e = x + y * z
+   # create 2 Variables (one for 'e', one intermediate for y*z)
+   # create 2 Apply instances (one for '+', one for '*')
+   e = x + y * z


 **Long example**

 This is what you would type to build the graph explicitly:

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_graphstructures.test_graphstructures_1
-
-.. code-block:: python
+.. testcode::

-    from theano.tensor import add, mul, Apply, Variable, TensorType
+   from theano.tensor import add, mul, Apply, Variable, Constant, TensorType

-    # Instantiate a type that represents a matrix of doubles
-    float64_matrix = TensorType(dtype = 'float64',              # double
-                                broadcastable = (False, False)) # matrix
+   # Instantiate a type that represents a matrix of doubles
+   float64_matrix = TensorType(dtype='float64',              # double
+                               broadcastable=(False, False)) # matrix

    # We make the Variable instances we need.
-    x = Variable(type = float64_matrix, name = 'x')
-    y = Variable(type = float64_matrix, name = 'y')
-    z = Variable(type = float64_matrix, name = 'z')
-
-    # This is the Variable that we want to symbolically represents y*z
-    mul_variable = Variable(type = float64_matrix)
-    assert mul_variable.owner is None
-
-    # Instantiate a symbolic multiplication
-    node_mul = Apply(op = mul,
-                     inputs = [y, z],
-                     outputs = [mul_variable])
-    # Fields 'owner' and 'index' are set by Apply
-    assert mul_variable.owner is node_mul
-    # 'index' is the position of mul_variable in mode_mul's outputs
-    assert mul_variable.index == 0
-
-    # This is the Variable that we want to symbolically represents x+(y*z)
-    add_variable = Variable(type = float64_matrix)
-    assert add_variable.owner is None
-
-    # Instantiate a symbolic addition
-    node_add = Apply(op = add,
-                     inputs = [x, mul_variable],
-                     outputs = [add_variable])
-    # Fields 'owner' and 'index' are set by Apply
-    assert add_variable.owner is node_add
-    assert add_variable.index == 0
-
-    e = add_variable
-
-    # We have access to x, y and z through pointers
-    assert e.owner.inputs[0] is x
-    assert e.owner.inputs[1] is mul_variable
-    assert e.owner.inputs[1].owner.inputs[0] is y
-    assert e.owner.inputs[1].owner.inputs[1] is z
+   x = Variable(type=float64_matrix, name='x')
+   y = Variable(type=float64_matrix, name='y')
+   z = Variable(type=float64_matrix, name='z')
+
+   # This is the Variable that we want to symbolically represents y*z
+   mul_variable = Variable(type=float64_matrix)
+   assert mul_variable.owner is None
+
+   # Instantiate a symbolic multiplication
+   node_mul = Apply(op=mul,
+                    inputs=[y, z],
+                    outputs=[mul_variable])
+   # Fields 'owner' and 'index' are set by Apply
+   assert mul_variable.owner is node_mul
+   # 'index' is the position of mul_variable in mode_mul's outputs
+   assert mul_variable.index == 0
+
+   # This is the Variable that we want to symbolically represents x+(y*z)
+   add_variable = Variable(type=float64_matrix)
+   assert add_variable.owner is None
+
+   # Instantiate a symbolic addition
+   node_add = Apply(op=add,
+                    inputs=[x, mul_variable],
+                    outputs=[add_variable])
+   # Fields 'owner' and 'index' are set by Apply
+   assert add_variable.owner is node_add
+   assert add_variable.index == 0
+
+   e = add_variable
+
+   # We have access to x, y and z through pointers
+   assert e.owner.inputs[0] is x
+   assert e.owner.inputs[1] is mul_variable
+   assert e.owner.inputs[1].owner.inputs[0] is y
+   assert e.owner.inputs[1].owner.inputs[1] is z


 Note how the call to ``Apply`` modifies the ``owner`` and ``index``
@@ -153,20 +149,19 @@ All nodes in the graph must be instances of ``Apply`` or ``Result``, but
 constraints. For example, the :func:`tensor.add`
 Op instance is written so that:

-.. code-block:: python
+.. testcode::

-    e = dscalar('x') + 1
+    e = T.dscalar('x') + 1

 builds the following graph:

-.. code-block:: python
-
-    node = Apply(op = add,
-                 inputs = [Variable(type = dscalar, name = 'x'),
-                           Constant(type = lscalar, data = 1)],
-                 outputs = [Variable(type = dscalar)])
-    e = node.outputs[0]
+.. testcode::

+   node = Apply(op=add,
+                inputs=[Variable(type=T.dscalar, name='x'),
+                        Constant(type=T.lscalar, data=1)],
+                outputs=[Variable(type=T.dscalar)])
+   e = node.outputs[0]


 Graph Structures
@@ -311,6 +306,7 @@ Theano. The symbolic inputs that you operate on are Variables and what
 you get from applying various Ops to these inputs are also
 Variables. For example, when I type

+>>> import theano
 >>> x = theano.tensor.ivector()
 >>> y = -x

@@ -399,31 +395,34 @@ In both types of pairs, the second element of the tuple is an index,
 such that: ``var.clients[*][0].inputs[index]`` or
 ``fgraph.outputs[index]`` is that variable.

-.. code-block:: python
-
-    import theano
-    v = theano.tensor.vector()
-    f = theano.function([v], (v+1).sum())
-    theano.printing.debugprint(f)
-    # Sorted list of all nodes in the compiled graph.
-    topo = f.maker.fgraph.toposort()
-    topo[0].outputs[0].clients
-    # [(Sum(Elemwise{add,no_inplace}.0), 0)]
-    topo[1].outputs[0].clients
-    # [('output', 0)]
-
-    # An internal variable
-    var = topo[0].outputs[0]
-    client = var.clients[0]
-    client
-    # (Sum(Elemwise{add,no_inplace}.0), 0)
-    type(client[0])
-    # <class 'theano.gof.graph.Apply'>
-    assert client[0].inputs[client[1]] is var
-
-    # An output of the graph
-    var = topo[1].outputs[0]
-    client = var.clients[0]
-    client
-    # ('output', 0)
-    assert f.maker.fgraph.outputs[client[1]] is var
+
+>>> import theano
+>>> v = theano.tensor.vector()
+>>> f = theano.function([v], (v+1).sum())
+>>> theano.printing.debugprint(f)
+Sum{acc_dtype=float64} [@A] ''   1
+ |Elemwise{add,no_inplace} [@B] ''   0
+   |TensorConstant{(1,) of 1.0} [@C]
+   |<TensorType(float64, vector)> [@D]
+>>> # Sorted list of all nodes in the compiled graph.
+>>> topo = f.maker.fgraph.toposort()
+>>> topo[0].outputs[0].clients
+[(Sum{acc_dtype=float64}(Elemwise{add,no_inplace}.0), 0)]
+>>> topo[1].outputs[0].clients
+[('output', 0)]
+
+>>> # An internal variable
+>>> var = topo[0].outputs[0]
+>>> client = var.clients[0]
+>>> client
+(Sum{acc_dtype=float64}(Elemwise{add,no_inplace}.0), 0)
+>>> type(client[0])
+<class 'theano.gof.graph.Apply'>
+>>> assert client[0].inputs[client[1]] is var
+
+>>> # An output of the graph
+>>> var = topo[1].outputs[0]
+>>> client = var.clients[0]
+>>> client
+('output', 0)
+>>> assert f.maker.fgraph.outputs[client[1]] is var
--- a/doc/extending/inplace.txt
+++ b/doc/extending/inplace.txt
@@ -55,7 +55,12 @@ Suppose you had an Op which took ``x`` as input and returned
 purpose, you would set the ``view_map`` field as follows:


-.. code-block:: python
+.. testsetup::
+
+   from theano import Op
+   myop = Op()
+
+.. testcode::

   myop.view_map = {0: [0]}

@@ -66,7 +71,7 @@ inputs that are viewed by a given output, this feature is currently
 unsupported. Here are more examples:


-.. code-block:: python
+.. testcode::

   myop.view_map = {0: [0]} # first output is a view of first input
   myop.view_map = {0: [1]} # first output is a view of second input
@@ -101,8 +106,11 @@ operation on ``x``.
   modified. Therefore, code using inplace operations would look like
   this:

-   .. code-block:: python
+   .. testcode::

+      from theano.tensor import dscalars, log
+      from theano.tensor.inplace import add_inplace
+      
      x, y = dscalars('x', 'y')
      r1 = log(x)

@@ -144,7 +152,7 @@ Theano needs to be notified of this fact. The syntax is similar to
 that of ``view_map``:


-.. code-block:: python
+.. testcode::

   myop.destroy_map = {0: [0]}

@@ -153,7 +161,7 @@ What this means is that the first output (position 0) operates inplace on the
 first input (position 0).


-.. code-block:: python
+.. testcode::

   myop.destroy_map = {0: [0]} # first output operates inplace on first input
   myop.destroy_map = {0: [1]} # first output operates inplace on second input

--- a/doc/extending/op.txt
+++ b/doc/extending/op.txt
@@ -3,6 +3,39 @@
 Making arithmetic Ops on double
 ===============================

+.. testsetup:: *
+
+   from theano import gof
+
+   class Double(gof.Type):
+
+       def filter(self, x, strict=False, allow_downcast=None):
+           if strict:
+               if isinstance(x, float):
+                   return x
+               else:
+                   raise TypeError('Expected a float!')
+           elif allow_downcast:
+               return float(x)
+           else:   # Covers both the False and None cases.
+               x_float = float(x)
+               if x_float == x:
+                   return x_float
+               else:
+                    raise TypeError('The double type cannot accurately represent '
+                                    'value %s (of type %s): you must explicitly '
+                                    'allow downcasting if you want to do this.'
+                                    % (x, type(x)))
+
+       def values_eq_approx(self, x, y, tolerance=1e-4):
+           return abs(x - y) / (abs(x) + abs(y)) < tolerance
+
+       def __str__(self):
+           return "double"
+
+   double = Double()
+
+
 Now that we have a ``double`` type, we have yet to use it to perform
 computations. We'll start by defining multiplication.

@@ -508,10 +541,7 @@ multiplication Op could take an arbitrary number of arguments.

 First, we'll instantiate a ``mul`` Op:

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_1
-
-.. code-block:: python
+.. testcode:: mul

   from theano import gof
   mul = gof.Op()
@@ -525,10 +555,7 @@ two.  This function ensures that both inputs have the ``double`` type.
 Since multiplying two doubles yields a double, this function makes an
 Apply node with an output Variable of type ``double``.

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_1
-
-.. code-block:: python
+.. testcode:: mul

   def make_node(x, y):
       if x.type != double or y.type != double:
@@ -561,9 +588,7 @@ built-in type ``float`` because this is the type that ``double.filter()``
 will always return, per our own definition. ``output_storage`` will
 contain a single storage cell for the multiplication's variable.

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_1
-.. code-block:: python
+.. testcode:: mul

   def perform(node, inputs, output_storage):
       x, y = inputs[0], inputs[1]
@@ -593,30 +618,32 @@ Here, ``z`` is a list of one element. By default, ``z == [None]``.
 Trying out our new Op
 =====================

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_1
-
 In the following code, we use our new Op:

->>> x, y = double('x'), double('y')
->>> z = mul(x, y)
->>> f = theano.function([x, y], z)
->>> f(5, 6)
-30.0
->>> f(5.6, 6.7)
-37.519999999999996
+.. doctest:: mul
+
+   >>> import theano
+   >>> x, y = double('x'), double('y')
+   >>> z = mul(x, y)
+   >>> f = theano.function([x, y], z)
+   >>> f(5, 6)
+   30.0
+   >>> f(5.6, 6.7)
+   37.519999999999996

 Note that there is an implicit call to
 ``double.filter()`` on each argument, so if we give integers as inputs
 they are magically cast to the right type. Now, what if we try this?

->>> x = double('x')
->>> z = mul(x, 2)
-Traceback (most recent call last):
-  File "<stdin>", line 1, in <module>
-  File "/u/breuleuo/hg/theano/theano/gof/op.py", line 207, in __call__
-  File "<stdin>", line 2, in make_node
-AttributeError: 'int' object has no attribute 'type'
+.. doctest:: mul
+
+   >>> x = double('x')
+   >>> z = mul(x, 2)
+   Traceback (most recent call last):
+     File "<stdin>", line 1, in <module>
+     File "/u/breuleuo/hg/theano/theano/gof/op.py", line 207, in __call__
+     File "<stdin>", line 2, in make_node
+   AttributeError: 'int' object has no attribute 'type'

 Automatic Constant Wrapping
 ---------------------------
@@ -625,9 +652,7 @@ Well, OK. We'd like our Op to be a bit more flexible. This can be done
 by modifying ``make_node`` to accept Python ``int`` or ``float`` as
 ``x`` and/or ``y``:

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_1
-.. code-block:: python
+.. testcode:: mul

   def make_node(x, y):
       if isinstance(x, (int, float)):
@@ -643,16 +668,15 @@ Whenever we pass a Python int or float instead of a Variable as ``x`` or
 ``y``, ``make_node`` will convert it to :ref:`constant` for us. ``gof.Constant``
 is a :ref:`variable` we statically know the value of.

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_op.test_op_1
+.. doctest:: mul

->>> x = double('x')
->>> z = mul(x, 2)
->>> f = theano.function([x], z)
->>> f(10)
-20.0
->>> f(3.4)
-6.7999999999999998
+   >>> x = double('x')
+   >>> z = mul(x, 2)
+   >>> f = theano.function([x], z)
+   >>> f(10)
+   20.0
+   >>> f(3.4)
+   6.8

 Now the code works the way we want it to.

@@ -673,10 +697,7 @@ operations ``add``, ``sub`` and ``div``, code for ``make_node`` can be
 shared between these Ops. Here is revised implementation of these four
 arithmetic operators:

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_1
-
-.. code-block:: python
+.. testcode::

   from theano import gof


--- a/doc/extending/optimization.txt
+++ b/doc/extending/optimization.txt
@@ -113,18 +113,16 @@ We will implement it in three ways: using a global optimization, a
 local optimization with a Navigator and then using the PatternSub
 facility.

-
 Global optimization
 -------------------

 Here is the code for a global optimization implementing the
 simplification described above:

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_2
-
-.. code-block:: python
+.. testcode::

+   import theano
+   from theano import gof
   from theano.gof import toolbox

   class Simplify(gof.Optimizer):
@@ -132,7 +130,7 @@ simplification described above:
           fgraph.attach_feature(toolbox.ReplaceValidate())
       def apply(self, fgraph):
           for node in fgraph.toposort():
-               if node.op == div:
+               if node.op == true_div:
                   x, y = node.inputs
                   z = node.outputs[0]
                   if x.owner and x.owner.op == mul:
@@ -181,37 +179,35 @@ pointer-following game you need to get ahold of the nodes of interest
 for the simplification (``x``, ``y``, ``z``, ``a``, ``b``, etc.).


-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_2
-
 Test time:

->>> x = double('x')
->>> y = double('y')
->>> z = double('z')
->>> a = add(z, mul(div(mul(y, x), y), div(z, x)))
+>>> from theano.scalar import float64, add, mul, true_div
+>>> x = float64('x')
+>>> y = float64('y')
+>>> z = float64('z')
+>>> a = add(z, mul(true_div(mul(y, x), y), true_div(z, x)))
 >>> e = gof.FunctionGraph([x, y, z], [a])
 >>> e
-[add(z, mul(div(mul(y, x), y), div(z, x)))]
+[add(z, mul(true_div(mul(y, x), y), true_div(z, x)))]
 >>> simplify.optimize(e)
 >>> e
-[add(z, mul(x, div(z, x)))]
+[add(z, mul(x, true_div(z, x)))]

 Cool! It seems to work. You can check what happens if you put many
 instances of :math:`\frac{xy}{y}` in the graph. Note that it sometimes
 won't work for reasons that have nothing to do with the quality of the
 optimization you wrote. For example, consider the following:

->>> x = double('x')
->>> y = double('y')
->>> z = double('z')
->>> a = div(mul(add(y, z), x), add(y, z))
+>>> x = float64('x')
+>>> y = float64('y')
+>>> z = float64('z')
+>>> a = true_div(mul(add(y, z), x), add(y, z))
 >>> e = gof.FunctionGraph([x, y, z], [a])
 >>> e
-[div(mul(add(y, z), x), add(y, z))]
+[true_div(mul(add(y, z), x), add(y, z))]
 >>> simplify.optimize(e)
 >>> e
-[div(mul(add(y, z), x), add(y, z))]
+[true_div(mul(add(y, z), x), add(y, z))]

 Nothing happened here. The reason is: ``add(y, z) != add(y,
 z)``. That is the case for efficiency reasons. To fix this problem we
@@ -220,9 +216,10 @@ computation, using the ``merge_optimizer`` defined in
 ``theano.gof.opt``.

 >>> from theano.gof.opt import merge_optimizer
->>> merge_optimizer.optimize(e)
+>>> merge_optimizer.optimize(e)  # doctest: +ELLIPSIS
+(0, ..., None, None, {}, 1, 0)
 >>> e
-[div(mul(*1 -> add(y, z), x), *1)]
+[true_div(mul(*1 -> add(y, z), x), *1)]
 >>> simplify.optimize(e)
 >>> e
 [x]
@@ -251,15 +248,12 @@ Local optimization

 The local version of the above code would be the following:

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_2
-

-.. code-block:: python
+.. testcode::

   class LocalSimplify(gof.LocalOptimizer):
       def transform(self, node):
-           if node.op == div:
+           if node.op == true_div:
               x, y = node.inputs
               if x.owner and x.owner.op == mul:
                   a, b = x.owner.inputs
@@ -292,21 +286,18 @@ with a :ref:`navigator`. Basically, a :ref:`navigator` is a global
 optimizer that loops through all nodes in the graph (or a well-defined
 subset of them) and applies one or several local optimizers on them.

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_2
-
->>> x = double('x')
->>> y = double('y')
->>> z = double('z')
->>> a = add(z, mul(div(mul(y, x), y), div(z, x)))
+>>> x = float64('x')
+>>> y = float64('y')
+>>> z = float64('z')
+>>> a = add(z, mul(true_div(mul(y, x), y), true_div(z, x)))
 >>> e = gof.FunctionGraph([x, y, z], [a])
 >>> e
-[add(z, mul(div(mul(y, x), y), div(z, x)))]
+[add(z, mul(true_div(mul(y, x), y), true_div(z, x)))]
 >>> simplify = gof.TopoOptimizer(local_simplify)
 >>> simplify.optimize(e)
+(<theano.gof.opt.TopoOptimizer object at 0x...>, 1, 5, 3, ..., ..., ...)
 >>> e
-[add(z, mul(x, div(z, x)))]
-
+[add(z, mul(x, true_div(z, x)))]

 OpSub, OpRemove, PatternSub
 +++++++++++++++++++++++++++
@@ -331,8 +322,11 @@ Theano defines some shortcuts to make LocalOptimizers:
  Replaces all occurrences of the first pattern by the second pattern.
  See :class:`PatternSub`.

+.. testsetup::

-.. code-block:: python
+   from theano.scalar import identity
+
+.. testcode::

   from theano.gof.opt import OpSub, OpRemove, PatternSub

@@ -346,9 +340,9 @@ Theano defines some shortcuts to make LocalOptimizers:
   # The "simplify" operation we've been defining in the past few
   # sections. Note that we need two patterns to account for the
   # permutations of the arguments to mul.
-   local_simplify_1 = PatternSub((div, (mul, 'x', 'y'), 'y'),
+   local_simplify_1 = PatternSub((true_div, (mul, 'x', 'y'), 'y'),
                                 'x')
-   local_simplify_2 = PatternSub((div, (mul, 'x', 'y'), 'x'),
+   local_simplify_2 = PatternSub((true_div, (mul, 'x', 'y'), 'x'),
                                 'y')

 .. note::
@@ -437,7 +431,7 @@ A Query is built by the following call:

 .. code-block:: python

-   theano.gof.Query(include, require = None, exclude = None, subquery = None)
+   theano.gof.Query(include, require=None, exclude=None, subquery=None)

 .. class:: Query

@@ -476,22 +470,23 @@ Examples
 Here are a few examples of how to use a Query on optdb to produce an
 Optimizer:

-.. code-block:: python
+.. testcode::
   
+   from theano.gof import Query
   from theano.compile import optdb

   # This is how the optimizer for the fast_run mode is defined
-   fast_run = optdb.query(Query(include = ['fast_run']))
+   fast_run = optdb.query(Query(include=['fast_run']))

   # This is how the optimizer for the fast_compile mode is defined
-   fast_compile = optdb.query(Query(include = ['fast_compile']))
+   fast_compile = optdb.query(Query(include=['fast_compile']))

   # This is the same as fast_run but no optimizations will replace
   # any operation by an inplace version. This assumes, of course,
   # that all inplace operations are tagged as 'inplace' (as they
   # should!)
-   fast_run_no_inplace = optdb.query(Query(include = ['fast_run'], exclude = ['inplace']))
-   fast_run_no_inplace = fast_run.excluding('inplace')
+   fast_run_no_inplace = optdb.query(Query(include=['fast_run'],
+                                           exclude=['inplace']))


 Registering an Optimizer
@@ -500,7 +495,7 @@ Registering an Optimizer
 Let's say we have a global optimizer called ``simplify``. We can add
 it to ``optdb`` as follows:

-.. code-block:: python
+.. testcode::

   # optdb.register(name, optimizer, order, *tags)
   optdb.register('simplify', simplify, 0.5, 'fast_run')

--- a/doc/extending/tips.txt
+++ b/doc/extending/tips.txt
@@ -19,7 +19,7 @@ implemented using other already existing Ops. For example, instead of
 writing a "sum_square_difference" Op, you should probably just write a
 simple function:

-.. code-block:: python
+.. testcode::

   from theano import tensor as T


--- a/doc/extending/type.txt
+++ b/doc/extending/type.txt
@@ -176,9 +176,7 @@ must define ``filter`` and shall override ``values_eq_approx``.

 **filter**

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_1
-.. code-block:: python
+.. testcode::

    # Note that we shadow Python's function ``filter`` with this
    # definition.
@@ -215,7 +213,7 @@ when ``allow_downcast`` is False, i.e. no precision loss is allowed.

 **values_eq_approx**

-.. code-block:: python
+.. testcode::

   def values_eq_approx(x, y, tolerance=1e-4):
       return abs(x - y) / (abs(x) + abs(y)) < tolerance
@@ -246,9 +244,7 @@ contract. Recall that Type defines default implementations for all
 required methods of the interface, except ``filter``. One way to make
 the Type is to instantiate a plain Type and set the needed fields:

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_1
-.. code-block:: python
+.. testcode::

   from theano import gof

@@ -260,8 +256,6 @@ the Type is to instantiate a plain Type and set the needed fields:
 Another way to make this Type is to make a subclass of ``gof.Type``
 and define ``filter`` and ``values_eq_approx`` in the subclass:

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_1
 .. code-block:: python

   from theano import gof
@@ -285,6 +279,38 @@ There is a small issue with defining ``double`` this way. All
 instances of ``Double`` are technically the same Type. However, different
 ``Double`` Type instances do not compare the same:

+.. testsetup::
+
+   from theano import gof
+
+   class Double(gof.Type):
+
+       def filter(self, x, strict=False, allow_downcast=None):
+           if strict:
+               if isinstance(x, float):
+                   return x
+               else:
+                   raise TypeError('Expected a float!')
+           elif allow_downcast:
+               return float(x)
+           else:   # Covers both the False and None cases.
+               x_float = float(x)
+               if x_float == x:
+                   return x_float
+               else:
+                    raise TypeError('The double type cannot accurately represent '
+                                    'value %s (of type %s): you must explicitly '
+                                    'allow downcasting if you want to do this.'
+                                    % (x, type(x)))
+
+       def values_eq_approx(self, x, y, tolerance=1e-4):
+           return abs(x - y) / (abs(x) + abs(y)) < tolerance
+
+       def __str__(self):
+           return "double"
+
+   double = Double()
+
 >>> double1 = Double()
 >>> double2 = Double()
 >>> double1 == double2
@@ -299,10 +325,7 @@ There are several ways to make sure that equality testing works properly:
 #. Define ``Double.__eq__`` so that instances of type Double
    are equal. For example:

-    .. If you modify this code, also change :
-    .. theano/tests/test_tutorial.py:T_extending.test_extending_1
-
-    .. code-block:: python
+    .. testcode::

        def __eq__(self, other):
            return type(self) is Double and type(other) is Double
@@ -355,9 +378,7 @@ attempt to clear up the confusion:
 Final version
 =============

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_extending.test_extending_1
-.. code-block:: python
+.. testcode::

   from theano import gof


--- a/doc/extending/unittest.txt
+++ b/doc/extending/unittest.txt
@@ -39,7 +39,9 @@ A unittest is a subclass of ``unittest.TestCase``, with member
 functions with names that start with the string ``test``.  For
 example:

-.. code-block:: python
+.. testcode::
+  
+    import unittest

    class MyTestCase(unittest.TestCase):
        def test0(self):
@@ -115,7 +117,7 @@ built-in unittest module uses metaclasses to know about all the
 them all, printing '.' for passed tests, and a stack trace for
 exceptions. The standard footer code in theano's test files is:

-.. code-block:: python
+.. testcode::

    if __name__ == '__main__':
        unittest.main()
@@ -134,7 +136,7 @@ To run all the tests in one or more ``TestCase`` subclasses:

 To run just a single ``MyTestCase`` member test function called ``test0``:

-.. code-block:: python
+.. testcode::

    MyTestCase('test0').debug()

@@ -186,6 +188,7 @@ Example:
 .. code-block:: python

    import unittest
+    
    class TestTensorDot(unittest.TestCase):
        def test_validity(self):
            # do stuff
@@ -201,8 +204,10 @@ functionality which is shared amongst all test methods in the test
 case (i.e initializing data, parameters, seeding random number
 generators -- more on this later)

-.. code-block:: python
+.. testcode:: writeUnitest

+    import unittest
+    
    class TestTensorDot(unittest.TestCase):
        def setUp(self):
            # data which will be used in various test methods
@@ -231,16 +236,16 @@ Example:
        def test_validity(self):
            a = T.dmatrix('a')
            b = T.dmatrix('b')
-            c = T.dot(a,b)
-            f = theano.function([a,b],[c])
-            cmp = f(self.avals,self.bvals) == numpy.dot(self.avals,self.bvals)
+            c = T.dot(a, b)
+            f = theano.function([a, b], [c])
+            cmp = f(self.avals, self.bvals) == numpy.dot(self.avals, self.bvals)
            self.assertTrue(numpy.all(cmp))

 Avoid hard-coding variables, as in the following case:

 .. code-block:: python

-    self.assertTrue(numpy.all(f(self.avals,self.bvals)==numpy.array([[25,25,30,28],[21,18,14,25]])))
+    self.assertTrue(numpy.all(f(self.avals, self.bvals) == numpy.array([[25, 25, 30, 28], [21, 18, 14, 25]])))

 This makes the test case less manageable and forces the user to update
 the variables each time the input is changed or possibly when the
@@ -275,6 +280,8 @@ Example:

 .. code-block:: python

+    import unittest
+    
    class TestTensorDot(unittest.TestCase):
        ...
        def test_3D_dot_fail(self):
@@ -300,7 +307,9 @@ Example:

 .. code-block:: python

-    f = T.function([a,b],[c],mode='FAST_RUN')
+    from theano import function
+    
+    f = function([a,b],[c],mode='FAST_RUN')

 Whenever possible, unit tests should omit this parameter. Leaving
 out the mode will ensure that unit tests use the default mode.
@@ -334,7 +343,7 @@ another (i.e always pass or always fail).
 Instead of using ``numpy.random.seed`` to do this, we encourage users to
 do the following:

-.. code-block:: python
+.. testcode::

    from theano.tests import unittest_tools

@@ -367,8 +376,10 @@ machine) can simply set ``config.unittests.rseed`` to 'random' (see

 Similarly, to provide a seed to numpy.random.RandomState, simply use:

-.. code-block:: python
+.. testcode::

+    import numpy
+    
    rng = numpy.random.RandomState(unittest_tools.fetch_seed())
    # OR providing an explicit seed
    rng = numpy.random.RandomState(unittest_tools.fetch_seed(1231)) #again not recommended
@@ -413,7 +424,9 @@ at point ``x`` is approximated as:

 Here is the prototype for the verify_grad function.

->>> def verify_grad(fun, pt, n_tests=2, rng=None, eps=1.0e-7, abs_tol=0.0001, rel_tol=0.0001):
+.. code-block:: python
+
+    def verify_grad(fun, pt, n_tests=2, rng=None, eps=1.0e-7, abs_tol=0.0001, rel_tol=0.0001):

 ``verify_grad`` raises an Exception if the difference between the analytic gradient and
 numerical gradient (computed through the Finite Difference Method) of a random
@@ -445,7 +458,7 @@ In the general case, you can define ``fun`` as you want, as long as it
 takes as inputs Theano symbolic variables and returns a sinble Theano
 symbolic variable:

-.. code-block:: python
+.. testcode::

    def test_verify_exprgrad():
        def fun(x,y,z):
@@ -460,7 +473,7 @@ symbolic variable:

 Here is an example showing how to use ``verify_grad`` on an Op instance:

-.. code-block:: python
+.. testcode::

    def test_flatten_outdimNone():
        # Testing gradient w.r.t. all inputs of an op (in this example the op
@@ -474,7 +487,7 @@ an Op's inputs. This is useful in particular when the gradient w.r.t. some of
 the inputs cannot be computed by finite difference (e.g. for discrete inputs),
 which would cause ``verify_grad`` to crash.

-.. code-block:: python
+.. testcode::

    def test_crossentropy_softmax_grad():
        op = tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias
@@ -511,8 +524,13 @@ this is common, two helper functions exists to make your lives easier:
 Here is an example of ``makeTester`` generating testcases for the Dot
 product op:

-.. code-block:: python
+.. testcode::
+
+    from numpy import dot
+    from numpy.random import rand

+    from theano.tensor.tests.test_basic import makeTester
+    
    DotTester = makeTester(name = 'DotTester',
                           op = dot,
                           expected = lambda x, y: numpy.dot(x, y),

--- a/doc/faq.txt
+++ b/doc/faq.txt
@@ -120,7 +120,10 @@ the function was compiled.

 For example, replace the following

-.. code-block:: python
+.. testcode:: faster
+
+    import theano
+    from theano import function

    x = theano.tensor.scalar('x')
    f = function([x], x + 1.)
@@ -128,7 +131,11 @@ For example, replace the following

 with

-.. code-block:: python
+.. testcode:: faster
+
+    import numpy
+    import theano
+    from theano import function

    x = theano.tensor.scalar('x')
    f = function([x], x + 1.)

--- a/doc/glossary.txt
+++ b/doc/glossary.txt
@@ -3,10 +3,10 @@
 Glossary
 ========

-..
-    # This is for the doctests in the file
-    >>> import theano
-    >>> from theano import tensor
+.. testsetup::
+
+   import theano
+   from theano import tensor

 .. glossary::


--- a/doc/install.txt
+++ b/doc/install.txt
@@ -330,8 +330,8 @@ a Python (or IPython) interpreter,

 .. code-block:: python

-    >>> import theano
-    >>> theano.test() # doctest: +SKIP
+    import theano
+    theano.test()

 You can also run them in-place from the Git checkout directory by typing


--- a/doc/install_windows.txt
+++ b/doc/install_windows.txt
@@ -405,7 +405,7 @@ compile C code for CPU execution.

 Create a test file containing:

-.. code-block:: python
+.. testcode::

   import numpy as np
   import time
@@ -423,6 +423,18 @@ Create a test file containing:
   print "NP time: %f[s], theano time: %f[s] (times should be close when run on CPU!)" %(
                                              np_end-np_start, t_end-t_start)
   print "Result difference: %f" % (np.abs(AB-tAB).max(), )
+   
+.. testoutput::
+   :hide:
+   :options: +ELLIPSIS
+   
+   NP time: ...[s], theano time: ...[s] (times should be close when run on CPU!)
+   Result difference: ...
+
+.. code-block:: none
+   
+   NP time: 1.480863[s], theano time: 1.475381[s] (times should be close when run on CPU!)
+   Result difference: 0.000000

 Then run it. It should execute without problems and the Theano function
 should run at a speed similar to the regular NumPy

--- a/doc/internal/mammouth.txt
+++ b/doc/internal/mammouth.txt
@@ -10,7 +10,9 @@ To run Theano on the Mammouth cluster, follow these simple steps:
      the goodies for using the latest and greatest (optimized) libraries
      (numpy, scipy, etc.)

-      >>> source /home/bastienf/.local.bashrc
+      .. code-block:: sh
+
+         source /home/bastienf/.local.bashrc

      Perhaps even put this in your ``.bashrc``


--- a/doc/internal/python.txt
+++ b/doc/internal/python.txt
@@ -37,12 +37,11 @@ Theano doesn't use your grandfather's python.
  * functions (function objects) can have attributes too. This technique
    is often used to define a function's error messages.

-    .. code-block:: python
-
-        def f(): return f.a
-        f.a = 5
-        f() # returns 5
-
+     >>> def f(): return f.a
+     >>> f.a = 5
+     >>> f()
+     5
+        
  * Warning about mutual imports:

    * script a.py file defined a class A.

--- a/doc/library/compile/debugmode.txt
+++ b/doc/library/compile/debugmode.txt
@@ -25,8 +25,12 @@ a cluster.

 DebugMode can be used as follows:

-.. code-block:: python
+.. testcode::

+    import theano
+    from theano import tensor
+    from theano.compile.debugmode import DebugMode
+    
    x = tensor.dscalar('x')

    f = theano.function([x], 10*x, mode='DebugMode')

--- a/doc/library/compile/function.txt
+++ b/doc/library/compile/function.txt
@@ -18,9 +18,11 @@ the interface for compiling graphs into callable objects.

 You've already seen example usage in the basic tutorial... something like this:

+>>> import theano
 >>> x = theano.tensor.dscalar()
 >>> f = theano.function([x], 2*x)
->>> print f(4)                    # prints 8.0
+>>> f(4)
+array(8.0)

 The idea here is that we've compiled the symbolic graph (``2*x``) into a function that can be called on a number and will do some computations.


--- a/doc/library/compile/io.txt
+++ b/doc/library/compile/io.txt
@@ -80,6 +80,9 @@ A non-None `value` argument makes an In() instance an optional parameter
 of the compiled function.  For example, in the following code we are
 defining an arity-2 function ``inc``.

+>>> import theano.tensor as T
+>>> from theano import function
+>>> from theano.compile.io import In
 >>> u, x, s = T.scalars('u', 'x', 's')
 >>> inc = function([u, In(x, value=3), In(s, update=(s+x*u), value=10.0)], [])

@@ -179,27 +182,38 @@ method to access values by indexing a Function directly by typing

 To show some examples of these access methods...

-.. code-block:: python

-    a, b, c = T.scalars('xys') # set the internal names of graph nodes
-    # Note that the name of c is 's', not 'c'!
-    fn = function([a, b, ((c, c+a+b), 10.0)], [])
+>>> from theano import tensor as T, function
+>>> a, b, c = T.scalars('xys') # set the internal names of graph nodes
+>>> # Note that the name of c is 's', not 'c'!
+>>> fn = function([a, b, ((c, c+a+b), 10.0)], [])

-    #the value associated with c is accessible in 3 ways
-    assert fn['s'] is fn.value[c]
-    assert fn['s'] is fn.container[c].value
+>>> # the value associated with c is accessible in 3 ways
+>>> fn['s'] is fn.value[c]
+True
+>>> fn['s'] is fn.container[c].value
+True

-    assert fn['s'] == 10.0
-    fn(1, 2)
-    assert fn['s'] == 13.0
-    fn.s = 99.0
-    fn(1, 0)
-    assert fn['s'] == 100.0
-    fn.value[c] = 99.0
-    fn(1,0)
-    assert fn['s'] == 100.0
-    assert fn['s'] == fn.value[c]
-    assert fn['s'] == fn.container[c].value
+>>> fn['s']
+array(10.0)
+>>> fn(1, 2)
+[]
+>>> fn['s']
+array(13.0)
+>>> fn['s'] = 99.0
+>>> fn(1, 0)
+[]
+>>> fn['s']
+array(100.0)
+>>> fn.value[c] = 99.0
+>>> fn(1,0)
+[]
+>>> fn['s']
+array(100.0)
+>>> fn['s'] == fn.value[c]
+True
+>>> fn['s'] == fn.container[c].value
+True


 Input Shortcuts
@@ -221,31 +235,41 @@ Every element of the inputs list will be upgraded to an In instance if necessary

 Example:

-.. code-block:: python
-
-    import theano
-    from theano import tensor as T
-    from theano.compile.io import In
-    x = T.scalar()
-    y = T.scalar('y')
-    z = T.scalar('z')
-    w = T.scalar('w')
-
-    fn = theano.function(inputs = [x, y, In(z, value=42), ((w, w+x), 0)],
-                         outputs = x + y + z)
-    # the first two arguments are required and the last two are
-    # optional and initialized to 42 and 0, respectively.
-    # The last argument, w, is updated with w + x each time the
-    # function is called.
-
-    fn(1)               # illegal because there are two required arguments
-    fn(1, 2)            # legal, z is 42, w goes 0 -> 1 (because w <- w + x), returns array(45.0)
-    fn(1, y = 2)        # legal, z is 42, w goes 1 -> 2, returns array(45.0)
-    fn(x = 1, y = 2)    # illegal because x was not named
-    fn(1, 2, 3)         # legal, z is 3, w goes 2 -> 3, returns array(6.0)
-    fn(1, z = 3, y = 2) # legal, z is 3, w goes 3 -> 4, returns array(6.0)
-    fn(1, 2, w = 400)   # legal, z is 42 again, w goes 400 -> 401, returns array(45.0)
-    fn(1, 2)            # legal, z is 42, w goes 401 -> 402, returns array(45.0)
+>>> import theano
+>>> from theano import tensor as T
+>>> from theano.compile.io import In
+>>> x = T.scalar()
+>>> y = T.scalar('y')
+>>> z = T.scalar('z')
+>>> w = T.scalar('w')
+
+>>> fn = theano.function(inputs=[x, y, In(z, value=42), ((w, w+x), 0)],
+...                      outputs=x + y + z)
+>>> # the first two arguments are required and the last two are
+>>> # optional and initialized to 42 and 0, respectively.
+>>> # The last argument, w, is updated with w + x each time the
+>>> # function is called.
+
+>>> fn(1)               # illegal because there are two required arguments # doctest: +ELLIPSIS
+Traceback (most recent call last):
+  ...
+TypeError: Missing required input: y
+>>> fn(1, 2)            # legal, z is 42, w goes 0 -> 1 (because w <- w + x)
+array(45.0)
+>>> fn(1, y=2)        # legal, z is 42, w goes 1 -> 2
+array(45.0)
+>>> fn(x=1, y=2)    # illegal because x was not named # doctest: +ELLIPSIS
+Traceback (most recent call last):
+  ...
+TypeError: Unknown input or state: x. The function has 3 named inputs (y, z, w), and 1 unnamed input which thus cannot be accessed through keyword argument (use 'name=...' in a variable's constructor to give it a name).
+>>> fn(1, 2, 3)         # legal, z is 3, w goes 2 -> 3
+array(6.0)
+>>> fn(1, z=3, y=2) # legal, z is 3, w goes 3 -> 4
+array(6.0)
+>>> fn(1, 2, w=400)   # legal, z is 42 again, w goes 400 -> 401
+array(45.0)
+>>> fn(1, 2)            # legal, z is 42, w goes 401 -> 402
+array(45.0)

 In the example above, ``z`` has value 42 when no value is explicitly given.
 This default value is potentially used at every function invocation, because
@@ -282,20 +306,25 @@ If a single ``Variable`` or ``Out`` instance is given as argument, then the comp

 If a list of ``Variable`` or ``Out`` instances is given as argument, then the compiled function will return a list of their values.

-.. code-block:: python
-
-    x, y, s = T.matrices('xys')
-
-    # print a list of 2 ndarrays
-    fn1 = theano.function([x], [x+x, Out((x+x).T, borrow=True)])
-    print fn1(numpy.asarray([[1,0],[0,1]]))
-
-
-    # print a list of 1 ndarray
-    fn2 = theano.function([x], [x+x])
-    print fn2(numpy.asarray([[1,0],[0,1]]))
-
-    # print an ndarray
-    fn3 = theano.function([x], outputs=x+x)
-    print fn3(numpy.asarray([[1,0],[0,1]]))
-
+>>> import numpy
+>>> from theano.compile.io import Out
+>>> x, y, s = T.matrices('xys')
+
+>>> # print a list of 2 ndarrays
+>>> fn1 = theano.function([x], [x+x, Out((x+x).T, borrow=True)])
+>>> fn1(numpy.asarray([[1,0],[0,1]]))
+[array([[ 2.,  0.],
+       [ 0.,  2.]]), array([[ 2.,  0.],
+       [ 0.,  2.]])]
+
+>>> # print a list of 1 ndarray
+>>> fn2 = theano.function([x], [x+x])
+>>> fn2(numpy.asarray([[1,0],[0,1]]))
+[array([[ 2.,  0.],
+       [ 0.,  2.]])]
+
+>>> # print an ndarray
+>>> fn3 = theano.function([x], outputs=x+x)
+>>> fn3(numpy.asarray([[1,0],[0,1]]))
+array([[ 2.,  0.],
+       [ 0.,  2.]])
--- a/doc/library/compile/nanguardmode.txt
+++ b/doc/library/compile/nanguardmode.txt
@@ -21,8 +21,13 @@ of abnormal values: NaNs, Infs, and abnormally big values.

 NanGuardMode can be used as follows:

-.. code-block:: python
+.. testcode::

+    import numpy
+    import theano
+    import theano.tensor as T
+    from theano.compile.nanguardmode import NanGuardMode
+    
    x = T.matrix()
    w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX))
    y = T.dot(x, w)
@@ -36,12 +41,20 @@ input and output variable of each node. When abnormal values are
 detected, it raises an error to indicate which node yields the NaNs. For
 example, if we pass the following values to ``fun``:

-.. code-block:: python
+.. testcode::

    infa = numpy.tile(
        (numpy.asarray(100.) ** 1000000).astype(theano.config.floatX), (3, 5))
    fun(infa)

+.. testoutput::
+   :hide:
+   :options: +ELLIPSIS
+
+   Traceback (most recent call last):
+     ...
+   AssertionError: ...
+
 It will raise an AssertionError indicating that Inf value is detected while
 executing the function.


--- a/doc/library/compile/profilemode.txt
+++ b/doc/library/compile/profilemode.txt
@@ -25,8 +25,9 @@ process.
 Creating a ProfileMode Instance
 -------------------------------

-First create a ProfileMode instance. 
+First create a ProfileMode instance.

+>>> import theano
 >>> from theano import ProfileMode
 >>> profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())

@@ -62,6 +63,12 @@ Compiling your Graph with ProfileMode
 Once the ProfileMode instance is created, simply compile your graph as you
 would normally, by specifying the mode parameter.

+.. testsetup::
+
+   import theano
+   input1, input2 = theano.tensor.scalars(2)
+   output1 = input1+input2
+
 >>> # with functions
 >>> f = theano.function([input1,input2],[output1], mode=profmode)

@@ -76,13 +83,13 @@ of its time.
 This is best shown through an example.
 Lets use the example of logistic
 regression.  (Code for this example is in the file
-``benchmark/regression/regression.py``.) 
+``benchmark/regression/regression.py``.)

 Compiling the module with ProfileMode and calling ``profmode.print_summary()``
 generates the following output:

 .. code-block:: python
-    
+
    """
    ProfileMode.print_summary()
    ---------------------------
@@ -141,7 +148,7 @@ generates the following output:
    The Apply-wise summary print the timing information for the worst
    offending Apply nodes. This corresponds to individual Op applications
    within your graph which take the longest to execute (so if you use dot
-    twice, you will see two entries there). 
+    twice, you will see two entries there).

    The Op-wise summary print the execution time of all Apply nodes
    executing the same Op are grouped together and the total execution
@@ -186,7 +193,7 @@ Reference

        Print three summaries to stdout that show where cpu time is spent during theano function executions (for all functions using this object instance).

-        :param n_apply_to_print: the number of apply nodes to print. 
+        :param n_apply_to_print: the number of apply nodes to print.
           The default 15, but can be configured via ``ProfileMode.n_ops_to_print`` in :envvar:`THEANO_FLAGS`.

        :param n_ops_to_print: the number of ops to print.
@@ -198,10 +205,10 @@ Reference
        """ As print_summary, but print the difference on two different profile mode.
        TODO: Also we don't print the Apply-wise summary as it don't work for now.
        TODO: make comparaison with gpu code.
-        
+
        :param other: the other instance of ProfileMode that we want to be compared to.
-        
-        :param n_apply_to_print: the number of apply nodes to print. 
+
+        :param n_apply_to_print: the number of apply nodes to print.
           The default 15, but can be configured via ``ProfileMode.n_ops_to_print`` in :envvar:`THEANO_FLAGS`.

        :param n_ops_to_print: the number of ops to print.

--- a/doc/library/gof/utils.txt
+++ b/doc/library/gof/utils.txt
@@ -4,6 +4,10 @@
 :mod:`utils` -- Utilities functions operating on the graph
 ==========================================================

+.. testsetup:: *
+
+   from theano.gof.utils import *
+
 .. module:: utils
   :platform: Unix, Windows
   :synopsis: Utilities functions operating on the graph

--- a/doc/library/gradient.txt
+++ b/doc/library/gradient.txt
@@ -9,6 +9,10 @@
   :synopsis: low-level automatic differentiation
 .. moduleauthor:: LISA

+.. testsetup:: *
+
+   from theano.gradient import *
+
 Symbolic gradient is usually computed from :func:`gradient.grad`, which offers a
 more convenient syntax for the common case of wanting the gradient in some
 expressions with respect to a scalar cost.  The :func:`grad_sources_inputs`

--- a/doc/library/misc/pkl_utils.txt
+++ b/doc/library/misc/pkl_utils.txt
@@ -5,6 +5,10 @@
 :mod:`misc.pkl_utils` - Tools for serialization.
 ================================================

+.. testsetup:: *
+
+   from theano.misc.pkl_utils import *
+
 .. autofunction:: theano.misc.pkl_utils.dump

 .. autofunction:: theano.misc.pkl_utils.load

--- a/doc/library/printing.txt
+++ b/doc/library/printing.txt
@@ -9,6 +9,10 @@
   :synopsis: Provides the Print Op and graph-printing routines.
 .. moduleauthor:: LISA

+.. testsetup::
+
+   import theano
+
 Guide
 ======

@@ -19,12 +23,13 @@ Intermediate values in a computation cannot be printed in
 the normal python way with the print statement, because Theano has no *statements*.
 Instead there is the :class:`Print` Op.

+>>> from theano import tensor as T, function, printing
 >>> x = T.dvector()
 >>> hello_world_op = printing.Print('hello world')
 >>> printed_x = hello_world_op(x)
 >>> f = function([x], printed_x)
->>> f([1, 2, 3])
->>> # output: "hello world __str__ = [ 1.  2.  3.]"
+>>> r = f([1, 2, 3])
+hello world __str__ = [ 1.  2.  3.]

 If you print more than one thing in a function like `f`, they will not
 necessarily be printed in the order that you think.  The order might even depend
@@ -46,14 +51,15 @@ Theano also provides :func:`theano.printing.pydotprint` that creates a png image

 1) The first is :func:`theano.pp`.

+>>> from theano import pp, tensor as T
 >>> x = T.dscalar('x') 
 >>> y = x ** 2
 >>> gy = T.grad(y, x)
 >>> pp(gy)  # print out the gradient prior to optimization
-'((fill((x ** 2), 1.0) * 2) * (x ** (2 - 1)))'
+'((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
 >>> f = function([x], gy)
 >>> pp(f.maker.fgraph.outputs[0])
-'(2.0 * x)'
+'(TensorConstant{2.0} * x)'

 The parameter in T.dscalar('x') in the first line is the name of this variable 
 in the graph. This name is used when printing the graph to make it more readable.
@@ -74,8 +80,7 @@ iteration number or other kinds of information in the name.

 2) The second function to print a graph is :func:`theano.printing.debugprint`

-
->>> theano.printing.debugprint(f.maker.fgraph.outputs[0])
+>>> theano.printing.debugprint(f.maker.fgraph.outputs[0])  # doctest: +NORMALIZE_WHITESPACE
 Elemwise{mul,no_inplace} [@A] ''
 |TensorConstant{2.0} [@B]
 |x [@C]
@@ -100,7 +105,7 @@ happen when that Variable has already been printed.  Where else has it been
 printed?  Look for debugprint identifier using the Find feature of your text
 editor.

->>> theano.printing.debugprint(gy)
+>>> theano.printing.debugprint(gy)  # doctest: +NORMALIZE_WHITESPACE
 Elemwise{mul} [@A] ''
 |Elemwise{mul} [@B] ''
 | |Elemwise{second,no_inplace} [@C] ''
@@ -113,10 +118,10 @@ Elemwise{mul} [@A] ''
   |x [@E]
   |Elemwise{sub} [@I] ''
     |TensorConstant{2} [@F]
-     |InplaceDimShuffle{} [@J] ''
+     |DimShuffle{} [@J] ''
       |TensorConstant{1} [@K]

->>> theano.printing.debugprint(gy, depth=2)
+>>> theano.printing.debugprint(gy, depth=2)  # doctest: +NORMALIZE_WHITESPACE
 Elemwise{mul} [@A] ''   
 |Elemwise{mul} [@B] ''   
 |Elemwise{pow} [@C] ''   

--- a/doc/library/scan.txt
+++ b/doc/library/scan.txt
@@ -35,7 +35,10 @@ happens automatically.

 The equivalent Theano code would be:

-.. code-block:: python
+.. testcode::
+
+  import theano
+  import theano.tensor as T

  k = T.iscalar("k")
  A = T.vector("A")
@@ -57,6 +60,13 @@ The equivalent Theano code would be:
  print power(range(10),2)
  print power(range(10),4)

+.. testoutput::
+
+    [  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]
+    [  0.00000000e+00   1.00000000e+00   1.60000000e+01   8.10000000e+01
+       2.56000000e+02   6.25000000e+02   1.29600000e+03   2.40100000e+03
+       4.09600000e+03   6.56100000e+03]
+
 Let us go through the example line by line. What we did is first to
 construct a function (using a lambda expression) that given ``prior_result`` and
 ``A`` returns ``prior_result * A``. The order of parameters is fixed by scan:
@@ -88,7 +98,9 @@ The tensor(s) to be looped over should be provided to scan using the
 Here's an example that builds a symbolic calculation of a polynomial
 from a list of its coefficients:

-.. code-block:: python
+.. testcode::
+
+    import numpy

    coefficients = theano.tensor.vector("coefficients")
    x = T.scalar("x")
@@ -112,6 +124,11 @@ from a list of its coefficients:
    print calculate_polynomial(test_coefficients, test_value)
    print 1.0 * (3 ** 0) + 0.0 * (3 ** 1) + 2.0 * (3 ** 2)

+.. testoutput::
+
+    19.0
+    19.0
+
 There are a few things to note here.

 First, we calculate the polynomial by first generating each of the coefficients, and
@@ -142,7 +159,7 @@ pitfall to be careful of: the initial output state that is supplied, that is
 generated at each iteration and moreover, it **must not involve an implicit
 downcast** of the latter.

-.. code-block:: python
+.. testcode::


    import numpy as np
@@ -169,9 +186,13 @@ downcast** of the latter.

    # test
    some_num = 15
-    print triangular_sequence(some_num)
-    print [n * (n + 1) // 2 for n in xrange(some_num)]
+    print(triangular_sequence(some_num))
+    print([n * (n + 1) // 2 for n in xrange(some_num)])
+    
+.. testoutput::

+    [  0   1   3   6  10  15  21  28  36  45  55  66  78  91 105]
+    [0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105]

 Another simple example
 ----------------------
@@ -183,7 +204,7 @@ and a "model" output array (whose shape and dtype will be mimicked),
 and produces a sequence of arrays with the shape and dtype of the model,
 with all values set to zero except at the provided array indices.

-.. code-block:: python
+.. testcode::

    location = T.imatrix("location")
    values = T.vector("values")
@@ -205,7 +226,21 @@ with all values set to zero except at the provided array indices.
    test_locations = numpy.asarray([[1, 1], [2, 3]], dtype=numpy.int32)
    test_values = numpy.asarray([42, 50], dtype=numpy.float32)
    test_output_model = numpy.zeros((5, 5), dtype=numpy.float32)
-    print assign_values_at_positions(test_locations, test_values, test_output_model)
+    print(assign_values_at_positions(test_locations, test_values, test_output_model))
+
+.. testoutput::
+
+    [[[  0.   0.   0.   0.   0.]
+      [  0.  42.   0.   0.   0.]
+      [  0.   0.   0.   0.   0.]
+      [  0.   0.   0.   0.   0.]
+      [  0.   0.   0.   0.   0.]]
+
+     [[  0.   0.   0.   0.   0.]
+      [  0.   0.   0.   0.   0.]
+      [  0.   0.   0.  50.   0.]
+      [  0.   0.   0.   0.   0.]
+      [  0.   0.   0.   0.   0.]]]

 This demonstrates that you can introduce new Theano variables into a scan function.

@@ -219,28 +254,39 @@ Another useful feature of scan, is that it can handle shared variables.
 For example, if we want to implement a Gibbs chain of length 10 we would do
 the following:

-.. code-block:: python
+.. testsetup:: scan1

-    W = theano.shared(W_values) # we assume that ``W_values`` contains the
-                                # initial values of your weight matrix
+   import theano
+   import numpy
+   W_values = numpy.random.random((2, 2))
+   bvis_values = numpy.random.random((2,))
+   bhid_values = numpy.random.random((2,))

-    bvis = theano.shared(bvis_values)
-    bhid = theano.shared(bhid_values)
+.. testcode:: scan1

-    trng = T.shared_randomstreams.RandomStreams(1234)
+   import theano
+   from theano import tensor as T

-    def OneStep(vsample) :
-        hmean = T.nnet.sigmoid(theano.dot(vsample, W) + bhid)
-        hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
-        vmean = T.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
-        return trng.binomial(size=vsample.shape, n=1, p=vmean,
-                         dtype=theano.config.floatX)
+   W = theano.shared(W_values) # we assume that ``W_values`` contains the
+                               # initial values of your weight matrix

-    sample = theano.tensor.vector()
+   bvis = theano.shared(bvis_values)
+   bhid = theano.shared(bhid_values)

-    values, updates = theano.scan(OneStep, outputs_info=sample, n_steps=10)
+   trng = T.shared_randomstreams.RandomStreams(1234)

-    gibbs10 = theano.function([sample], values[-1], updates=updates)
+   def OneStep(vsample) :
+       hmean = T.nnet.sigmoid(theano.dot(vsample, W) + bhid)
+       hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
+       vmean = T.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
+       return trng.binomial(size=vsample.shape, n=1, p=vmean,
+                            dtype=theano.config.floatX)
+
+   sample = theano.tensor.vector()
+
+   values, updates = theano.scan(OneStep, outputs_info=sample, n_steps=10)
+
+   gibbs10 = theano.function([sample], values[-1], updates=updates)


 The first, and probably most crucial observation is that the updates
@@ -251,7 +297,11 @@ update dictionary to your function, you will always get the same 10
 sets of random numbers. You can even use the ``updates`` dictionary
 afterwards. Look at this example :

-.. code-block:: python
+.. testsetup:: scan2
+
+   import theano
+
+.. testcode:: scan2

    a = theano.shared(1)
    values, updates = theano.scan(lambda: {a: a+1}, n_steps=10)
@@ -260,15 +310,22 @@ In this case the lambda expression does not require any input parameters
 and returns an update dictionary which tells how ``a`` should be updated
 after each step of scan. If we write :

-.. code-block:: python
+.. testcode:: scan2

    b = a + 1
    c = updates[a] + 1
    f = theano.function([], [b, c], updates=updates)

-    print b
-    print c
-    print a.value
+    print(b)
+    print(c)
+    print(a.get_value())
+
+.. testoutput:: scan2
+   :hide:
+
+   Elemwise{add,no_inplace}.0
+   Elemwise{add,no_inplace}.0
+   1

 We will see that because ``b`` does not use the updated version of
 ``a``, it will be 2, ``c`` will be 12, while ``a.value`` is ``11``.
@@ -289,7 +346,7 @@ execution. To pass the shared variables to Scan you need to put them in a list
 and give it to the ``non_sequences`` argument. Here is the Gibbs sampling code
 updated:

-.. code-block:: python
+.. testcode:: scan1

    W = theano.shared(W_values) # we assume that ``W_values`` contains the
                                # initial values of your weight matrix
@@ -332,7 +389,7 @@ to be ensured by the user. Otherwise, it will result in an error.

 Using the previous Gibbs sampling example:

-.. code-block:: python
+.. testcode:: scan1

    # The new scan, using strict=True
    values, updates = theano.scan(fn=OneStep,
@@ -369,7 +426,12 @@ In this case we have a sequence over which we need to iterate ``u``,
 and two outputs ``x`` and ``y``. To implement this with scan we first
 construct a function that computes one iteration step :

-.. code-block:: python
+.. testsetup:: scan3
+
+   import theano
+   from theano import tensor as T
+
+.. testcode:: scan3

  def oneStep(u_tm4, u_t, x_tm3, x_tm1, y_tm1, W, W_in_1, W_in_2,  W_feedback, W_out):

@@ -392,9 +454,15 @@ an order, but also variables, since this is how scan figures out what should
 be represented by what. Given that we have all
 the Theano variables needed we construct our RNN as follows :

-.. code-block:: python
+.. testcode:: scan3
+
+   W = T.matrix()
+   W_in_1 = T.matrix()
+   W_in_2 = T.matrix()
+   W_feedback = T.matrix()
+   W_out = T.matrix()

-   u  = T.matrix() # it is a sequence of vectors
+   u = T.matrix() # it is a sequence of vectors
   x0 = T.matrix() # initial state of x has to be a matrix, since
                   # it has to cover x[-3]
   y0 = T.vector() # y0 is just a vector since scan has only to provide
@@ -432,7 +500,7 @@ provided condition evaluates to True.
 For an example, we will compute all powers of two smaller then some provided
 value ``max_value``.

-.. code-block:: python
+.. testcode::

    def power_of_2(previous_power, max_value):
        return previous_power*2, theano.scan_module.until(previous_power*2 > max_value)
@@ -446,6 +514,10 @@ value ``max_value``.
    f = theano.function([max_value], values)

    print f(45)
+    
+.. testoutput::
+
+    [  2.   4.   8.  16.  32.  64.]

 As you can see, in order to terminate on condition, the only thing required
 is that the inner function ``power_of_2`` to return also the condition

--- a/doc/library/sparse/index.txt
+++ b/doc/library/sparse/index.txt
@@ -63,23 +63,25 @@ The following example builds a matrix and returns its columns. It
 prints the i-th column, i.e. a list of indices in the column and their
 corresponding value in the second list.

+>>> import numpy as np
+>>> import scipy.sparse as sp
 >>> data = np.asarray([7, 8, 9])
 >>> indices = np.asarray([0, 1, 2])
 >>> indptr = np.asarray([0, 2, 3, 3])
 >>> m = sp.csc_matrix((data, indices, indptr), shape=(3, 3))
->>> print m.toarray()
-[[7 0 0]
- [8 0 0]
- [0 9 0]]
+>>> m.toarray()
+array([[7, 0, 0],
+       [8, 0, 0],
+       [0, 9, 0]])
 >>> i = 0
->>> print m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
-[0, 1] [7, 8]
+>>> m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
+(array([0, 1], dtype=int32), array([7, 8]))
 >>> i = 1
->>> print m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
-[2] [9]
+>>> m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
+(array([2], dtype=int32), array([9]))
 >>> i = 2
->>> print m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
-[] []
+>>> m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
+(array([], dtype=int32), array([], dtype=int64))

 CSR Matrix
 ----------
@@ -97,23 +99,25 @@ The following example builds a matrix and returns its rows. It prints
 the i-th row, i.e. a list of indices in the row and their
 corresponding value in the second list.

+>>> import numpy as np
+>>> import scipy.sparse as sp
 >>> data = np.asarray([7, 8, 9])
 >>> indices = np.asarray([0, 1, 2])
 >>> indptr = np.asarray([0, 2, 3, 3])
 >>> m = sp.csr_matrix((data, indices, indptr), shape=(3, 3))
->>> print m.toarray()
-[[7 8 0]
- [0 0 9]
- [0 0 0]]
+>>> m.toarray()
+array([[7, 8, 0],
+       [0, 0, 9],
+       [0, 0, 0]])
 >>> i = 0
->>> print m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
-[0, 1] [7, 8]
+>>> m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
+(array([0, 1], dtype=int32), array([7, 8]))
 >>> i = 1
->>> print m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
-[2] [9]
+>>> m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
+(array([2], dtype=int32), array([9]))
 >>> i = 2
->>> print m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
-[] []
+>>> m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
+(array([], dtype=int32), array([], dtype=int64))

 List of Implemented Operations
 ==============================

--- a/doc/library/tensor/basic.txt
+++ b/doc/library/tensor/basic.txt
@@ -1665,8 +1665,8 @@ Linear Algebra
           [0, 1, 2],
           [0, 1, 2],
           [0, 1, 2],
-           [0, 1, 2]], dtype=int8)	    
-	    
+           [0, 1, 2]], dtype=int8)
+
 .. function:: ogrid

    :returns: an instance which returns an open (i.e. not fleshed out) mesh-grid 
@@ -1685,8 +1685,8 @@ Linear Algebra
           [3],
           [4]], dtype=int8)
    >>> b[1].eval()
-    array([[0, 1, 2, 3]], dtype=int8)
-      
+    array([[0, 1, 2]], dtype=int8)
+
      
 Gradient / Differentiation
 ==========================

--- a/doc/library/tensor/extra_ops.txt
+++ b/doc/library/tensor/extra_ops.txt
@@ -2,6 +2,10 @@
 :mod:`tensor.extra_ops` --  Tensor Extra Ops
 ===================================================================

+.. testsetup:: *
+
+   from theano.tensor.extra_ops import *
+
 .. module:: tensor.extra_ops
   :platform: Unix, Windows
   :synopsis: Tensor Extra Ops

--- a/doc/library/tensor/nnet/nnet.txt
+++ b/doc/library/tensor/nnet/nnet.txt
@@ -42,11 +42,13 @@

   Example:

-   .. code-block:: python
+   .. testcode::

-       x,y,b = T.dvectors('x','y','b')
+       import theano.tensor as T
+
+       x, y, b = T.dvectors('x', 'y', 'b')
       W = T.dmatrix('W')
-       y = T.nnet.sigmoid(T.dot(W,x) + b)
+       y = T.nnet.sigmoid(T.dot(W, x) + b)

   .. note:: The underlying code will return an exact 0 or 1 if an
      element of x is too small or too big.
@@ -102,7 +104,7 @@

   .. note:: The underlying code will return an exact 0 if an element of x is too small.

-   .. code-block:: python
+   .. testcode::

       x,y,b = T.dvectors('x','y','b')
       W = T.dmatrix('W')
@@ -131,7 +133,7 @@

   Example of use:

-   .. code-block:: python
+   .. testcode::

       x,y,b = T.dvectors('x','y','b')
       W = T.dmatrix('W')
@@ -155,10 +157,11 @@
   to the binary cross-entropy (note that this assumes that x will
   contain values between 0 and 1):

-   .. code-block:: python
+   .. testcode::

-       x, y, b = T.dvectors('x', 'y', 'b')
+       x, y, b, c = T.dvectors('x', 'y', 'b', 'c')
       W = T.dmatrix('W')
+       V = T.dmatrix('V')
       h = T.nnet.sigmoid(T.dot(W, x) + b)
       x_recons = T.nnet.sigmoid(T.dot(V, h) + c)
       recon_cost = T.nnet.binary_crossentropy(x_recons, x).mean()
@@ -191,7 +194,12 @@
       correct class (which is typically the training criterion in
       classification settings).

-   .. code-block:: python
+   .. testsetup::
+
+      import theano
+      o = theano.tensor.ivector()
+
+   .. testcode::

       y = T.nnet.softmax(T.dot(W, x) + b)
       cost = T.nnet.categorical_crossentropy(y, o)

--- a/doc/library/tensor/utils.txt
+++ b/doc/library/tensor/utils.txt
@@ -2,6 +2,10 @@
 :mod:`tensor.utils` --  Tensor Utils
 ===================================================================

+.. testsetup::
+
+   from theano.tensor.utils import *
+
 .. module:: tensor.utils
   :platform: Unix, Windows
   :synopsis: Tensor Utils

--- a/doc/library/typed_list.txt
+++ b/doc/library/typed_list.txt
@@ -15,32 +15,29 @@
    tensor.

 This is a type that represents a list in Theano. All elements must have
-the same Theano type. Here is an example::
+the same Theano type. Here is an example:

-    import theano.typed_list
-
-    tl = theano.typed_list.TypedListType(theano.tensor.fvector)()
-    v = theano.tensor.fvector()
-    o = theano.typed_list.append(tl, v)
-    f = theano.function([tl, v], o)
-    print f([[1, 2, 3], [4, 5]], [2])
-    #[array([ 1.,  2.,  3.], dtype=float32), array([ 4.,  5.], dtype=float32), array([ 2.], dtype=float32)]
+>>> import theano.typed_list
+>>> tl = theano.typed_list.TypedListType(theano.tensor.fvector)()
+>>> v = theano.tensor.fvector()
+>>> o = theano.typed_list.append(tl, v)
+>>> f = theano.function([tl, v], o)
+>>> f([[1, 2, 3], [4, 5]], [2])
+[array([ 1.,  2.,  3.], dtype=float32), array([ 4.,  5.], dtype=float32), array([ 2.], dtype=float32)]

 A second example with Scan. Scan doesn't yet have direct support of
 TypedList, so you can only use it as non_sequences (not in sequences or
-as outputs)::
-
-    import theano.typed_list
-
-    a = theano.typed_list.TypedListType(theano.tensor.fvector)()
-    l = theano.typed_list.length(a)
-    s, _ = theano.scan(fn=lambda i, tl: tl[i].sum(),
-                       non_sequences=[a],
-                       sequences=[theano.tensor.arange(l, dtype='int64')])
-
-    f = theano.function([a], s)
-    f([[1, 2, 3], [4, 5]])
-    #array([ 6.,  9.], dtype=float32)
+as outputs):
+
+>>> import theano.typed_list
+>>> a = theano.typed_list.TypedListType(theano.tensor.fvector)()
+>>> l = theano.typed_list.length(a)
+>>> s, _ = theano.scan(fn=lambda i, tl: tl[i].sum(),
+...                    non_sequences=[a],
+...                    sequences=[theano.tensor.arange(l, dtype='int64')])
+>>> f = theano.function([a], s)
+>>> f([[1, 2, 3], [4, 5]])
+array([ 6.,  9.], dtype=float32)

 .. automodule:: theano.typed_list.basic
    :members:
--- a/doc/proposals/noupdates.txt
+++ b/doc/proposals/noupdates.txt
-=================
-Automatic updates
-=================
-
-.. note:
-   Proposed 2010 01 13
-   Done 2010 04 ??
-
-
-The Module version of RandomStreams could arrange for the automatic update of
-certain inputs (such as the random number generators) at the time of make(), so
-that certain *obvious* patterns would work:
-
->>> rs = RandomStreams()
->>> u = rs.uniform(...)
->>> f = theano.function([], u)
->>> assert not numpy.all(f() == f())
-
-Unfortunately, with shared variables this does not work!  Function needs to be
-told which shared variables to update.  The current workaround is to do this:
-
->>> theano.function([], u, updates=rs.updates())
-
-or this:
-
->>> theano.function([], u, updates=[u.update])
-
-But it is all too easy to forget to do either of these workarounds, and
-accidentally run a program whose random numbers are the same in every call.
-
-
-Proposal
-========
-
-Add an optional `default_update` attribute to Shared variables. This will be
-consulted by function.  If no update expression is given for this variable in
-the updates list, then this default will be inserted.  Note well: a value of None for the
-default_update means to update with a value of None!  To have no default update,
-make sure that the default_update attribute is not defined.
-
-Add an optional argument to function: `no_default_updates`.  This argument defaults to
-False, which results in the current semantics.
-A True value here would mean "ignore all default_update expressions", and this
-would be useful for disabling implicit behaviour.
-A list of shared variables here would mean to ignore the
-default_update_expressions in these specific variables.
-
-
-Alternatives
-============
-
-Consider a singleton 'NOUPDATE' object that can be used as a pseudo-expression
-in the update list.  This doesn't introduce a new keyword argument, which makes
-it slightly more awkward to document in theano.function.  Really though, I have
-no strong feelings between this and the no_updates paramter.
-
--- a/doc/sandbox/max_gotcha.txt
+++ b/doc/sandbox/max_gotcha.txt
@@ -22,17 +22,20 @@ max.  The third argument is an array into which the result can be
 written.

 So for example:
-.. code-block:: python
-
-    >>> max(3, 4)
-    4
-    >>> numpy.max(3, 4)
-    3
-    >>> a,b,c = [numpy.asarray(i) for i in [0,1,2]]
-    >>> numpy.max(a,b,c)
-    0
-    >>> c
-    array(0)
+
+.. doctest::
+   :options: +SKIP
+
+   >>> import numpy
+   >>> max(3, 4)
+   4
+   >>> numpy.max(3, 4) # This is an error
+   3
+   >>> a, b, c = [numpy.asarray(i) for i in [0, 1, 2]]
+   >>> numpy.max(a, b, c) # This is an error
+   0
+   >>> c
+   array(0)

 Be careful!


--- a/doc/scripts/docgen.py
+++ b/doc/scripts/docgen.py
@@ -63,12 +63,13 @@ if __name__ == '__main__':
        os.path.join(sys.path[0], os.pardir, os.pardir))

    options = defaultdict(bool)
-    options.update(dict([x, y or True] for x, y in
-        getopt.getopt(sys.argv[1:],
-                      'o:',
-                      ['epydoc', 'rst', 'help', 'nopdf', 'cache', 'test'])[0]))
+    opts, args = getopt.getopt(
+        sys.argv[1:],
+        'o:f:',
+        ['epydoc', 'rst', 'help', 'nopdf', 'cache', 'test'])
+    options.update(dict([x, y or True] for x, y in opts))
    if options['--help']:
-        print('Usage: %s [OPTIONS]' % sys.argv[0])
+        print('Usage: %s [OPTIONS] [files...]' % sys.argv[0])
        print('  -o <dir>: output the html files in the specified dir')
        print('  --cache: use the doctree cache')
        print('  --rst: only compile the doc (requires sphinx)')
@@ -77,6 +78,9 @@ if __name__ == '__main__':
        print('(requires epydoc)')
        print('  --test: run all the code samples in the documentaton')
        print('  --help: this help')
+        print('If one or more files are specified after the options then only '
+              'those files will be built. Otherwise the whole tree is '
+              'processed. Specifying files will implies --cache.')
        sys.exit(0)

    if not (options['--epydoc'] or options['--rst'] or options['--test']):
@@ -90,6 +94,9 @@ if __name__ == '__main__':
            pass

    outdir = options['-o'] or (throot + '/html')
+    files = None
+    if len(args) != 0:
+        files = [os.path.abspath(f) for f in args]
    mkdir(outdir)
    os.chdir(outdir)

@@ -100,7 +107,6 @@ if __name__ == '__main__':

    if options['--all'] or options['--epydoc']:
        mkdir("api")
-        sys.path[0:0] = [throot]

        #Generate HTML doc

@@ -119,10 +125,13 @@ if __name__ == '__main__':
        import sphinx
        if extraopts is None:
            extraopts = []
-        if not options['--cache']:
+        if not options['--cache'] and files is None:
            extraopts.append('-E')
-        sphinx.main(['', '-b', builder] + extraopts +
-                    [os.path.join(throot, 'doc'), workdir])
+        docpath = os.path.join(throot, 'doc')
+        inopt = [docpath, workdir]
+        if files is not None:
+            inopt.extend(files)
+        sphinx.main(['', '-b', builder] + extraopts + inopt)

    if options['--all'] or options['--rst']:
        mkdir("doc")

--- a/doc/tutorial/adding.txt
+++ b/doc/tutorial/adding.txt
@@ -11,9 +11,6 @@ To get us started with Theano and get a feel of what we're working with,
 let's make a simple function: add two numbers together. Here is how you do
 it:

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_adding.test_adding_1
-
 >>> import theano.tensor as T
 >>> from theano import function
 >>> x = T.dscalar('x')
@@ -72,7 +69,7 @@ are, however, assigned the theano Type ``dscalar`` in their ``type``
 field, as you can see here:

 >>> type(x)
-<class 'theano.tensor.basic.TensorVariable'>
+<class 'theano.tensor.var.TensorVariable'>
 >>> x.type
 TensorType(float64, scalar)
 >>> T.dscalar
@@ -150,9 +147,6 @@ You might already have guessed how to do this. Indeed, the only change
 from the previous example is that you need to instantiate *x* and
 *y* using the matrix Types:

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_adding.test_adding_2
-
 >>> x = T.dmatrix('x')
 >>> y = T.dmatrix('y')
 >>> z = x + y
@@ -201,14 +195,19 @@ with NumPy arrays may be found here: :ref:`tensor creation<libdoc_tensor_creatio
 Exercise
 ========

-.. code-block:: python
+.. testcode::
+
+   import theano
+   a = theano.tensor.vector() # declare variable
+   out = a + a ** 10               # build symbolic expression
+   f = theano.function([a], out)   # compile function
+   print(f([0, 1, 2]))
+
+.. testoutput::
+
+   [    0.     2.  1026.]
+

-  import theano
-  a = theano.tensor.vector() # declare variable
-  out = a + a ** 10               # build symbolic expression
-  f = theano.function([a], out)   # compile function
-  print f([0, 1, 2])  # prints `array([0, 2, 1026])`
-  
 Modify and execute this code to compute this expression: a ** 2 + b ** 2 + 2 * a * b.



--- a/doc/tutorial/aliasing.txt
+++ b/doc/tutorial/aliasing.txt
@@ -55,30 +55,33 @@ Borrowing when Creating Shared Variables

 A ``borrow`` argument can be provided to the shared-variable constructor.

+.. testcode:: borrow

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_aliasing.test_aliasing_1
+   import numpy, theano
+   np_array = numpy.ones(2, dtype='float32')

-.. code-block:: python
-
-    import numpy, theano
-    np_array = numpy.ones(2, dtype='float32')
-
-    s_default = theano.shared(np_array)
-    s_false   = theano.shared(np_array, borrow=False)
-    s_true    = theano.shared(np_array, borrow=True)
+   s_default = theano.shared(np_array)
+   s_false   = theano.shared(np_array, borrow=False)
+   s_true    = theano.shared(np_array, borrow=True)

 By default (*s_default*) and when explicitly setting ``borrow=False``, the
 shared variable we construct gets a [deep] copy of *np_array*.  So changes we
 subsequently make to *np_array* have no effect on our shared variable.

-.. code-block:: python
+.. testcode:: borrow

-    np_array += 1 # now it is an array of 2.0 s
+   np_array += 1 # now it is an array of 2.0 s
+
+   print(s_default.get_value())
+   print(s_false.get_value())
+   print(s_true.get_value())
+
+.. testoutput:: borrow
+
+   [ 1.  1.]
+   [ 1.  1.]
+   [ 2.  2.]

-    s_default.get_value()  # -> array([1.0, 1.0])
-    s_false.get_value()    # -> array([1.0, 1.0])
-    s_true.get_value()     # -> array([2.0, 2.0])

 If we are running this with the CPU as the device,
 then changes we make to *np_array* *right away* will show up in
@@ -117,15 +120,12 @@ A ``borrow`` argument can also be used to control how a ``shared`` variable's va
 retrieved.


-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_aliasing.test_aliasing_2
+.. testcode:: borrow

-.. code-block:: python
+   s = theano.shared(np_array)

-    s = theano.shared(np_array)
-
-    v_false = s.get_value(borrow=False) # N.B. borrow default is False
-    v_true = s.get_value(borrow=True)
+   v_false = s.get_value(borrow=False) # N.B. borrow default is False
+   v_true = s.get_value(borrow=True)


 When ``borrow=False`` is passed to ``get_value``, it means that the return value
@@ -146,7 +146,7 @@ then you should use the ``return_internal_type=True`` argument to
 constant time), but might return various datatypes depending on contextual
 factors (e.g. the compute device, the dtype of the NumPy array).

-.. code-block:: python
+.. testcode:: borrow

    v_internal = s.get_value(borrow=True, return_internal_type=True)

@@ -178,7 +178,12 @@ that Theano *may* reuse the buffer you provide as the internal storage for the v
 A standard pattern for manually updating the value of a ``shared`` variable is as
 follows:

-.. code-block:: python
+.. testsetup:: borrow
+
+   def some_inplace_fn(v):
+       return v
+
+.. testcode:: borrow

    s.set_value(
        some_inplace_fn(s.get_value(borrow=True)),
@@ -224,10 +229,7 @@ Borrowing when Constructing Function Objects
 A ``borrow`` argument can also be provided to the ``In`` and ``Out`` objects
 that control how ``theano.function`` handles its argument[s] and return value[s].

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_aliasing.test_aliasing_3
-
-.. code-block:: python
+.. testcode::

    import theano, theano.tensor

@@ -263,40 +265,40 @@ For GPU graphs, this borrowing can have a major speed impact.  See the following

 .. code-block:: python

-  from theano import function, config, shared, sandbox, tensor, Out
-  import numpy
-  import time
-
-  vlen = 10 * 30 * 768  # 10 x # cores x # threads per core
-  iters = 1000
-
-  rng = numpy.random.RandomState(22)
-  x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
-  f1 = function([], sandbox.cuda.basic_ops.gpu_from_host(tensor.exp(x)))
-  f2 = function([],
-                Out(sandbox.cuda.basic_ops.gpu_from_host(tensor.exp(x)),
-                    borrow=True))
-  t0 = time.time()
-  for i in xrange(iters):
-      r = f1()
-  t1 = time.time()
-  no_borrow = t1 - t0
-  t0 = time.time()
-  for i in xrange(iters):
-      r = f2()
-  t1 = time.time()
-  print 'Looping', iters, 'times took', no_borrow, 'seconds without borrow',
-  print 'and', t1 - t0, 'seconds with borrow.'
-  if numpy.any([isinstance(x.op, tensor.Elemwise) and
-                ('Gpu' not in type(x.op).__name__)
-                for x in f1.maker.fgraph.toposort()]):
-      print 'Used the cpu'
-  else:
-      print 'Used the gpu'
+   from theano import function, config, shared, sandbox, tensor, Out
+   import numpy
+   import time
+
+   vlen = 10 * 30 * 768  # 10 x # cores x # threads per core
+   iters = 1000
+
+   rng = numpy.random.RandomState(22)
+   x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
+   f1 = function([], sandbox.cuda.basic_ops.gpu_from_host(tensor.exp(x)))
+   f2 = function([],
+                 Out(sandbox.cuda.basic_ops.gpu_from_host(tensor.exp(x)),
+                     borrow=True))
+   t0 = time.time()
+   for i in xrange(iters):
+       r = f1()
+   t1 = time.time()
+   no_borrow = t1 - t0
+   t0 = time.time()
+   for i in xrange(iters):
+       r = f2()
+   t1 = time.time()
+   print 'Looping', iters, 'times took', no_borrow, 'seconds without borrow',
+   print 'and', t1 - t0, 'seconds with borrow.'
+   if numpy.any([isinstance(x.op, tensor.Elemwise) and
+                 ('Gpu' not in type(x.op).__name__)
+                 for x in f1.maker.fgraph.toposort()]):
+       print 'Used the cpu'
+   else:
+       print 'Used the gpu'

 Which produces this output:

-.. code-block:: text
+.. code-block:: none

   $ THEANO_FLAGS=device=gpu0,floatX=float32 python test1.py
   Using gpu device 0: GeForce GTX 275

--- a/doc/tutorial/conditions.txt
+++ b/doc/tutorial/conditions.txt
@@ -18,49 +18,55 @@ IfElse vs Switch
 **Example**


-.. code-block:: python
+.. testcode::

-  from theano import tensor as T
-  from theano.ifelse import ifelse
-  import theano, time, numpy
+   from theano import tensor as T
+   from theano.ifelse import ifelse
+   import theano, time, numpy

-  a,b = T.scalars('a', 'b')
-  x,y = T.matrices('x', 'y')
+   a,b = T.scalars('a', 'b')
+   x,y = T.matrices('x', 'y')
  
-  z_switch = T.switch(T.lt(a, b), T.mean(x), T.mean(y))
-  z_lazy = ifelse(T.lt(a, b), T.mean(x), T.mean(y))
+   z_switch = T.switch(T.lt(a, b), T.mean(x), T.mean(y))
+   z_lazy = ifelse(T.lt(a, b), T.mean(x), T.mean(y))

-  f_switch = theano.function([a, b, x, y], z_switch,
-                      mode=theano.Mode(linker='vm'))
-  f_lazyifelse = theano.function([a, b, x, y], z_lazy,
-                      mode=theano.Mode(linker='vm'))
+   f_switch = theano.function([a, b, x, y], z_switch,
+                              mode=theano.Mode(linker='vm'))
+   f_lazyifelse = theano.function([a, b, x, y], z_lazy,
+                                  mode=theano.Mode(linker='vm'))

-  val1 = 0.
-  val2 = 1.
-  big_mat1 = numpy.ones((10000, 1000))
-  big_mat2 = numpy.ones((10000, 1000))
+   val1 = 0.
+   val2 = 1.
+   big_mat1 = numpy.ones((10000, 1000))
+   big_mat2 = numpy.ones((10000, 1000))

-  n_times = 10
+   n_times = 10

-  tic = time.clock()
-  for i in xrange(n_times):
-      f_switch(val1, val2, big_mat1, big_mat2)
-  print 'time spent evaluating both values %f sec' % (time.clock() - tic)
+   tic = time.clock()
+   for i in xrange(n_times):
+       f_switch(val1, val2, big_mat1, big_mat2)
+   print 'time spent evaluating both values %f sec' % (time.clock() - tic)

-  tic = time.clock()
-  for i in xrange(n_times):
-      f_lazyifelse(val1, val2, big_mat1, big_mat2)
-  print 'time spent evaluating one value %f sec' % (time.clock() - tic)
+   tic = time.clock()
+   for i in xrange(n_times):
+       f_lazyifelse(val1, val2, big_mat1, big_mat2)
+   print 'time spent evaluating one value %f sec' % (time.clock() - tic)
+
+.. testoutput::
+   :hide:
+   :options: +ELLIPSIS
+
+   time spent evaluating both values ... sec
+   time spent evaluating one value ... sec

 In this example, the ``IfElse`` op spends less time (about half as much) than ``Switch``
 since it computes only one variable out of the two.

-.. code-block:: python
-
-  >>> python ifelse_switch.py
-  time spent evaluating both values 0.6700 sec
-  time spent evaluating one value 0.3500 sec
+.. code-block:: none

+   $ python ifelse_switch.py
+   time spent evaluating both values 0.6700 sec
+   time spent evaluating one value 0.3500 sec

 Unless ``linker='vm'`` or ``linker='cvm'`` are used, ``ifelse`` will compute both
 variables and take the same computation time as ``switch``. Although the linker

--- a/doc/tutorial/debug_faq.txt
+++ b/doc/tutorial/debug_faq.txt
@@ -23,7 +23,7 @@ Interpreting Error Messages
 Even in its default configuration, Theano tries to display useful error
 messages. Consider the following faulty code.

-.. code-block:: python
+.. testcode::

    import numpy as np
    import theano
@@ -38,24 +38,20 @@ messages. Consider the following faulty code.

 Running the code above we see:

-.. code-block:: bash
+.. testoutput::
+   :options: +ELLIPSIS

-    Traceback (most recent call last):
-      File "test0.py", line 10, in <module>
-        f(np.ones((2,)), np.ones((3,)))
-      File "/PATH_TO_THEANO/theano/compile/function_module.py", line 605, in __call__
-        self.fn.thunks[self.fn.position_of_error])
-      File "/PATH_TO_THEANO/theano/compile/function_module.py", line 595, in __call__
-        outputs = self.fn()
-    ValueError: Input dimension mis-match. (input[0].shape[0] = 3, input[1].shape[0] = 2)
-    Apply node that caused the error: Elemwise{add,no_inplace}(<TensorType(float64, vector)>, <TensorType(float64, vector)>, <TensorType(float64, vector)>)
-    Inputs types: [TensorType(float64, vector), TensorType(float64, vector), TensorType(float64, vector)]
-    Inputs shapes: [(3,), (2,), (2,)]
-    Inputs strides: [(8,), (8,), (8,)]
-    Inputs scalar values: ['not scalar', 'not scalar', 'not scalar']
+   Traceback (most recent call last):
+     ...
+   ValueError: Input dimension mis-match. (input[0].shape[0] = 3, input[1].shape[0] = 2)
+   Apply node that caused the error: Elemwise{add,no_inplace}(<TensorType(float64, vector)>, <TensorType(float64, vector)>, <TensorType(float64, vector)>)
+   Inputs types: [TensorType(float64, vector), TensorType(float64, vector), TensorType(float64, vector)]
+   Inputs shapes: [(3,), (2,), (2,)]
+   Inputs strides: [(8,), (8,), (8,)]
+   Inputs scalar values: ['not scalar', 'not scalar', 'not scalar']

-    HINT: Re-running with most Theano optimization disabled could give you a back-traces when this node was created. This can be done with by setting the Theano flags 'optimizer=fast_compile'. If that does not work, Theano optimization can be disabled with 'optimizer=None'.
-    HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint of this apply node.
+   HINT: Re-running with most Theano optimization disabled could give you a back-traces when this node was created. This can be done with by setting the Theano flags 'optimizer=fast_compile'. If that does not work, Theano optimization can be disabled with 'optimizer=None'.
+   HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint of this apply node.

 Arguably the most useful information is approximately half-way through
 the error message, where the kind of error is displayed along with its
@@ -71,7 +67,7 @@ the faulty line, while ``exception_verbosity=high`` will display a
 debugprint of the apply node. Using these hints, the end of the error
 message becomes :

-.. code-block:: bash
+.. code-block:: none

    Backtrace when the node is created:
      File "test0.py", line 8, in <module>
@@ -101,7 +97,7 @@ following example. Here, we use ``exception_verbosity=high`` and
 ``optimizer=None`` would and it could therefore be used instead of test values.


-.. code-block:: python
+.. testcode:: testvalue

    import numpy
    import theano
@@ -137,7 +133,7 @@ following example. Here, we use ``exception_verbosity=high`` and

 Running the above code generates the following error message:

-.. code-block:: bash
+.. testoutput:: testvalue

    Traceback (most recent call last):
      File "test1.py", line 31, in <module>
@@ -185,7 +181,7 @@ of error can thus be identified with much more precision and much earlier in
 the compilation pipeline. For example, running the above code yields the
 following error message, which properly identifies *line 24* as the culprit.

-.. code-block:: bash
+.. code-block:: node

    Traceback (most recent call last):
      File "test2.py", line 24, in <module>
@@ -228,7 +224,10 @@ The ``compute_test_value`` mechanism works as follows:

 Theano provides a 'Print' op to do this.

-.. code-block:: python
+.. testcode::
+
+    import numpy
+    import theano

    x = theano.tensor.dvector('x')

@@ -243,6 +242,9 @@ Theano provides a 'Print' op to do this.
    #this runs the graph with the message, and value printed
    assert numpy.all( f_with_print([1, 2, 3]) == [5, 10, 15])

+.. testoutput::
+
+    this is a very important value __str__ = [ 1.  2.  3.]

 Since Theano runs your program in a topological order, you won't have precise
 control over the order in which multiple ``Print()`` ops are evaluted.  For a more
@@ -324,7 +326,7 @@ You can use ``MonitorMode`` to inspect the inputs and outputs of each
 node being executed when the function is called. The code snipped below
 shows how to print all inputs and outputs:

-.. code-block:: python
+.. testcode::

    import theano

@@ -341,8 +343,9 @@ shows how to print all inputs and outputs:
                            post_func=inspect_outputs))
    f(3)

-    # The code will print the following:
-    #   0 Elemwise{mul,no_inplace}(TensorConstant{5.0}, x) input(s) value(s): [array(5.0), array(3.0)] output(s) value(s): [array(15.0)]
+.. testoutput::
+
+    0 Elemwise{mul,no_inplace}(TensorConstant{5.0}, x) input(s) value(s): [array(5.0), array(3.0)] output(s) value(s): [array(15.0)]

 When using these ``inspect_inputs`` and ``inspect_outputs`` functions
 with ``MonitorMode``, you should see [potentially a lot of] printed output.
@@ -357,7 +360,7 @@ position, or only if a particular value showed up in one of the inputs or output
 A typical example is to detect when NaN values are added into computations, which
 can be achieved as follows:

-.. code-block:: python
+.. testcode:: compiled

    import numpy

@@ -385,12 +388,14 @@ can be achieved as follows:
                            post_func=detect_nan))
    f(0)  # log(0) * 0 = -inf * 0 = NaN

-    # The code above will print:
-    #   *** NaN detected ***
-    #   Elemwise{Composite{[mul(log(i0), i0)]}} [@A] ''
-    #    |x [@B]
-    #   Inputs : [array(0.0)]
-    #   Outputs: [array(nan)]
+.. testoutput:: compiled
+   :options: +NORMALIZE_WHITESPACE
+
+   *** NaN detected ***
+   Elemwise{Composite{(log(i0) * i0)}} [@A] ''
+    |x [@B]
+   Inputs : [array(0.0)]
+   Outputs: [array(nan)]

 To help understand what is happening in your graph, you can
 disable the ``local_elemwise_fusion`` and all ``inplace``
@@ -402,12 +407,12 @@ will not be able to see the input that was overwriten in the ``post_func``
 function. To disable those optimizations (with a Theano version after
 0.6rc3), define the MonitorMode like this:

-.. code-block:: python
+.. testcode:: compiled

   mode = theano.compile.MonitorMode(post_func=detect_nan).excluding(
-       'local_elemwise_fusion', 'inplace)
-    f = theano.function([x], [theano.tensor.log(x) * x],
-                        mode=mode)
+       'local_elemwise_fusion', 'inplace')
+   f = theano.function([x], [theano.tensor.log(x) * x],
+                       mode=mode)

 .. note::

@@ -422,12 +427,11 @@ the execution of the node can garbage collect its inputs that aren't
 needed anymore by the Theano function. This can be done with the Theano
 flag:

-.. code-block:: cfg
+.. code-block:: python

   allow_gc=False


-
 .. TODO: documentation for link.WrapLinkerMany


@@ -443,28 +447,49 @@ functions.

 Consider this example script ("ex.py"):

-.. code-block:: python
+.. testcode::
+
+   import theano
+   import numpy
+   import theano.tensor as T
+
+   a = T.dmatrix('a')
+   b = T.dmatrix('b')

-        import theano
-        import numpy
-        import theano.tensor as T
+   f = theano.function([a, b], [a * b])

-        a = T.dmatrix('a')
-        b = T.dmatrix('b')
+   # matrices chosen so dimensions are unsuitable for multiplication
+   mat1 = numpy.arange(12).reshape((3, 4))
+   mat2 = numpy.arange(25).reshape((5, 5))

-        f = theano.function([a, b], [a * b])
+   f(mat1, mat2)

-        # matrices chosen so dimensions are unsuitable for multiplication
-        mat1 = numpy.arange(12).reshape((3, 4))
-        mat2 = numpy.arange(25).reshape((5, 5))
+.. testoutput::
+   :hide:
+   :options: +ELLIPSIS

-        f(mat1, mat2)
+   Traceback (most recent call last):
+     ...
+   ValueError: Input dimension mis-match. (input[0].shape[0] = 3, input[1].shape[0] = 5)
+   Apply node that caused the error: Elemwise{mul,no_inplace}(a, b)
+   Toposort index: 0
+   Inputs types: [TensorType(float64, matrix), TensorType(float64, matrix)]
+   Inputs shapes: [(3, 4), (5, 5)]
+   Inputs strides: [(32, 8), (40, 8)]
+   Inputs values: ['not shown', 'not shown']
+   Outputs clients: [['output']]
+
+   Backtrace when the node is created:
+     File "<doctest default[0]>", line 8, in <module>
+       f = theano.function([a, b], [a * b])
+
+   HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

 This is actually so simple the debugging could be done easily, but it's for
 illustrative purposes. As the matrices can't be multiplied element-wise
 (unsuitable shapes), we get the following exception:

-.. code-block:: text
+.. code-block:: none

    File "ex.py", line 14, in <module>
      f(mat1, mat2)

--- a/doc/tutorial/examples.txt
+++ b/doc/tutorial/examples.txt
@@ -40,9 +40,11 @@ Well, what you do is this:
 .. If you modify this code, also change :
 .. theano/tests/test_tutorial.py:T_examples.test_examples_1

+>>> import theano
+>>> import theano.tensor as T
 >>> x = T.dmatrix('x')
 >>> s = 1 / (1 + T.exp(-x))
->>> logistic = function([x], s)
+>>> logistic = theano.function([x], s)
 >>> logistic([[0, 1], [-1, -2]])
 array([[ 0.5       ,  0.73105858],
       [ 0.26894142,  0.11920292]])
@@ -63,7 +65,7 @@ We can verify that this alternate form produces the same values:
 .. theano/tests/test_tutorial.py:T_examples.test_examples_2

 >>> s2 = (1 + T.tanh(x / 2)) / 2
->>> logistic2 = function([x], s2)
+>>> logistic2 = theano.function([x], s2)
 >>> logistic2([[0, 1], [-1, -2]])
 array([[ 0.5       ,  0.73105858],
       [ 0.26894142,  0.11920292]])
@@ -83,7 +85,7 @@ squared difference between two matrices *a* and *b* at the same time:
 >>> diff = a - b
 >>> abs_diff = abs(diff)
 >>> diff_squared = diff**2
->>> f = function([a, b], [diff, abs_diff, diff_squared])
+>>> f = theano.function([a, b], [diff, abs_diff, diff_squared])

 .. note::
   `dmatrices` produces as many outputs as names that you provide.  It is a
@@ -95,11 +97,9 @@ was reformatted for readability):

 >>> f([[1, 1], [1, 1]], [[0, 1], [2, 3]])
 [array([[ 1.,  0.],
-        [-1., -2.]]),
- array([[ 1.,  0.],
-        [ 1.,  2.]]),
- array([[ 1.,  0.],
-        [ 1.,  4.]])]
+       [-1., -2.]]), array([[ 1.,  0.],
+       [ 1.,  2.]]), array([[ 1.,  0.],
+       [ 1.,  4.]])]


 Setting a Default Value for an Argument
@@ -113,6 +113,7 @@ one. You can do it like this:
 .. theano/tests/test_tutorial.py:T_examples.test_examples_6

 >>> from theano import Param
+>>> from theano import function
 >>> x, y = T.dscalars('x', 'y')
 >>> z = x + y
 >>> f = function([x, Param(y, default=1)], z)
@@ -257,8 +258,7 @@ for the purpose of one particular function.
 >>> # The type of foo must match the shared variable we are replacing
 >>> # with the ``givens``
 >>> foo = T.scalar(dtype=state.dtype)
->>> skip_shared = function([inc, foo], fn_of_state,
-                           givens=[(state, foo)])
+>>> skip_shared = function([inc, foo], fn_of_state, givens=[(state, foo)])
 >>> skip_shared(1, 3)  # we're using 3 for the state, not state.value
 array(7)
 >>> state.get_value()  # old state still there, but we didn't use it
@@ -311,7 +311,7 @@ Here's a brief example.  The setup code is:
 .. If you modify this code, also change :
 .. theano/tests/test_tutorial.py:T_examples.test_examples_9

-.. code-block:: python
+.. testcode::

    from theano.tensor.shared_randomstreams import RandomStreams
    from theano import function
@@ -382,6 +382,8 @@ For example:

 >>> state_after_v0 = rv_u.rng.get_value().get_state()
 >>> nearly_zeros()       # this affects rv_u's generator
+array([[ 0.,  0.],
+       [ 0.,  0.]])
 >>> v1 = f()
 >>> rng = rv_u.rng.get_value(borrow=True)
 >>> rng.set_state(state_after_v0)
@@ -410,53 +412,46 @@ corresponding to the random number generation process (i.e. RandomFunction{unifo
 An example of how "random states" can be transferred from one theano function
 to another is shown below.

-.. code-block:: python
+>>> from __future__ import print_function
+>>> import theano
+>>> import numpy
+>>> import theano.tensor as T
+>>> from theano.sandbox.rng_mrg import MRG_RandomStreams
+>>> from theano.tensor.shared_randomstreams import RandomStreams

-    import theano
-    import numpy
-    import theano.tensor as T
-    from theano.sandbox.rng_mrg import MRG_RandomStreams
-    from theano.tensor.shared_randomstreams import RandomStreams
-
-    class Graph():
-        def __init__(self, seed=123):
-            self.rng = RandomStreams(seed)
-            self.y = self.rng.uniform(size=(1,))
-
-    g1 = Graph(seed=123)
-    f1 = theano.function([], g1.y)
-
-    g2 = Graph(seed=987)
-    f2 = theano.function([], g2.y)
+>>> class Graph():
+...     def __init__(self, seed=123):
+...         self.rng = RandomStreams(seed)
+...         self.y = self.rng.uniform(size=(1,))

-    print 'By default, the two functions are out of sync.'
-    print 'f1() returns ', f1()
-    print 'f2() returns ', f2()
+>>> g1 = Graph(seed=123)
+>>> f1 = theano.function([], g1.y)

-    def copy_random_state(g1, g2):
-        if isinstance(g1.rng, MRG_RandomStreams):
-            g2.rng.rstate = g1.rng.rstate
-        for (su1, su2) in zip(g1.rng.state_updates, g2.rng.state_updates):
-            su2[0].set_value(su1[0].get_value())
+>>> g2 = Graph(seed=987)
+>>> f2 = theano.function([], g2.y)

-    print 'We now copy the state of the theano random number generators.'
-    copy_random_state(g1, g2)
-    print 'f1() returns ', f1()
-    print 'f2() returns ', f2()
+>>> # By default, the two functions are out of sync.
+>>> f1()
+array([ 0.72803009])
+>>> f2()
+array([ 0.55056769])

-This gives the following output:
+>>> def copy_random_state(g1, g2):
+...     if isinstance(g1.rng, MRG_RandomStreams):
+...         g2.rng.rstate = g1.rng.rstate
+...     for (su1, su2) in zip(g1.rng.state_updates, g2.rng.state_updates):
+...         su2[0].set_value(su1[0].get_value())

-.. code-block:: bash
+>>> # We now copy the state of the theano random number generators.
+>>> copy_random_state(g1, g2)
+>>> f1()
+array([ 0.59044123])
+>>> f2()
+array([ 0.59044123])

-    # By default, the two functions are out of sync.
-    f1() returns  [ 0.72803009]
-    f2() returns  [ 0.55056769]
-    # We now copy the state of the theano random number generators.
-    f1() returns  [ 0.59044123]
-    f2() returns  [ 0.59044123]

 Other Random Distributions
---------------------------
+--------------------------

 There are :ref:`other distributions implemented <libdoc_tensor_raw_random>`. 

@@ -487,50 +482,65 @@ A Real Example: Logistic Regression
 The preceding elements are featured in this more realistic example.
 It will be used repeatedly.

-.. code-block:: python
+.. testcode::

-  import numpy
-  import theano
-  import theano.tensor as T
-  rng = numpy.random
+    import numpy
+    import theano
+    import theano.tensor as T
+    rng = numpy.random
  
-  N = 400
-  feats = 784
-  D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
-  training_steps = 10000
+    N = 400
+    feats = 784
+    D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
+    training_steps = 10000
  
-  # Declare Theano symbolic variables
-  x = T.dmatrix("x")
-  y = T.dvector("y")
-  w = theano.shared(rng.randn(feats), name="w")
-  b = theano.shared(0., name="b")
-  print "Initial model:"
-  print w.get_value(), b.get_value()
-
-  # Construct Theano expression graph
-  p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b))   # Probability that target = 1
-  prediction = p_1 > 0.5                    # The prediction thresholded
-  xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
-  cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize
-  gw, gb = T.grad(cost, [w, b])             # Compute the gradient of the cost
-                                            # (we shall return to this in a
-                                            # following section of this tutorial)
-
-  # Compile
-  train = theano.function(
-            inputs=[x,y],
-            outputs=[prediction, xent],
-            updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)))
-  predict = theano.function(inputs=[x], outputs=prediction)
-
-  # Train
-  for i in range(training_steps):
-      pred, err = train(D[0], D[1])
-
-  print "Final model:"
-  print w.get_value(), b.get_value()
-  print "target values for D:", D[1]
-  print "prediction on D:", predict(D[0])
-
-
-
+    # Declare Theano symbolic variables
+    x = T.matrix("x")
+    y = T.vector("y")
+    w = theano.shared(rng.randn(feats), name="w")
+    b = theano.shared(0., name="b")
+    print("Initial model:")
+    print(w.get_value())
+    print(b.get_value())
+
+    # Construct Theano expression graph
+    p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b))   # Probability that target = 1
+    prediction = p_1 > 0.5                    # The prediction thresholded
+    xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
+    cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize
+    gw, gb = T.grad(cost, [w, b])             # Compute the gradient of the cost
+                                              # (we shall return to this in a
+                                              # following section of this tutorial)
+
+    # Compile
+    train = theano.function(
+              inputs=[x,y],
+              outputs=[prediction, xent],
+              updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)))
+    predict = theano.function(inputs=[x], outputs=prediction)
+
+    # Train
+    for i in range(training_steps):
+        pred, err = train(D[0], D[1])
+
+    print("Final model:")
+    print(w.get_value())
+    print(b.get_value())
+    print("target values for D:")
+    print(D[1])
+    print("prediction on D:")
+    print(predict(D[0]))
+
+.. testoutput::
+   :hide:
+   :options: +ELLIPSIS
+
+   Initial model:
+   ...
+   0.0
+   Final model:
+   ...
+   target values for D:
+   ...
+   prediction on D:
+   ...
--- a/doc/tutorial/extending_theano.txt
+++ b/doc/tutorial/extending_theano.txt
@@ -56,7 +56,7 @@ This section provides an overview of the methods you typically have to implement
 possibilities you may encounter or need.  For that refer to
 :ref:`op_contract`.

-.. code-block:: python
+.. testcode::

    import theano

@@ -73,9 +73,9 @@ possibilities you may encounter or need.  For that refer to

        # Other type of implementation
        # C implementation: [see theano web site for other functions]
-        def c_code(...):
-            # ...
+        def c_code(self, node, inputs, outputs, sub):
            pass
+
        # Other implementations (pycuda, ...):
        def make_thunk(self, node, storage_map, _, _2):
            pass
@@ -83,7 +83,7 @@ possibilities you may encounter or need.  For that refer to
        # optional:
        check_input = True

-        def __init__(self, ...):
+        def __init__(self, *args):
            pass

        def grad(self, inputs, g):
@@ -92,7 +92,7 @@ possibilities you may encounter or need.  For that refer to
        def R_op(self, inputs, eval_points):
            pass

-        def infer_shape(node, (i0_shapes, ...)):
+        def infer_shape(node, input_shapes):
            pass

 .. ../extending/op.txt
@@ -250,7 +250,7 @@ Other methods can be optionally defined by the op.
 Op Example
 ==========

-.. code-block:: python
+.. testcode:: example

    import theano

@@ -288,7 +288,7 @@ Op Example

 You can try it as follows:

-.. code-block:: python
+.. testcode:: example

    x = theano.tensor.matrix()
    f = theano.function([x], DoubleOp()(x))
@@ -296,8 +296,28 @@ You can try it as follows:
    inp = numpy.random.rand(5, 4)
    out = f(inp)
    assert numpy.allclose(inp * 2, out)
-    print inp
-    print out
+    print(inp)
+    print(out)
+
+.. testoutput:: example
+   :hide:
+   :options: +ELLIPSIS
+   
+   ...
+   ...
+   
+.. code-block:: none
+
+    [[ 0.02443785  0.67833979  0.91954769  0.95444365]
+     [ 0.60853382  0.7770539   0.78163219  0.92838837]
+     [ 0.04427765  0.37895602  0.23155797  0.4934699 ]
+     [ 0.20551517  0.7419955   0.34500905  0.49347629]
+     [ 0.24082769  0.49321452  0.24566545  0.15351132]]
+    [[ 0.04887571  1.35667957  1.83909538  1.90888731]
+     [ 1.21706764  1.55410779  1.56326439  1.85677674]
+     [ 0.08855531  0.75791203  0.46311594  0.9869398 ]
+     [ 0.41103034  1.48399101  0.69001811  0.98695258]
+     [ 0.48165539  0.98642904  0.4913309   0.30702264]]


 Example for properties of a Op
@@ -310,7 +330,7 @@ We create an Op that takes a variable ``x`` and returns ``a*x+b``.
 We want to say that two such ops are equal when their values of ``a``
 and ``b`` are equal.

-.. code-block:: python
+.. testcode:: properties

    import theano

@@ -349,7 +369,7 @@ It also generates a default :func:`__str__` method that prints the attribute nam

 We can test this by running the following segment:

-.. code-block:: python
+.. testcode:: properties

    mult4plus5op = AXPBOp(4, 5)
    another_mult4plus5op = AXPBOp(4, 5)
@@ -383,7 +403,10 @@ returns the right answer. If you detect an error, you must raise an
 *exception*. You can use the ``assert`` keyword to automatically raise an
 ``AssertionError``.

-.. code-block:: python
+.. testcode:: tests
+
+    import numpy
+    import theano

    from theano.tests import unittest_tools as utt
    from theano import config
@@ -439,7 +462,7 @@ square matrices will not detect the problem. This is why the
 your op works only with such matrices, you can disable the warning with the
 ``warn=False`` parameter.

-.. code-block:: python
+.. testcode:: tests

    from theano.tests import unittest_tools as utt
    from theano import config
@@ -468,7 +491,7 @@ If there is an error, the function raises an exception. If you want to
 see it fail, you can implement an incorrect gradient (for instance, by removing
 the multiplication by 2).

-.. code-block:: python
+.. testcode:: tests

        def test_grad(self):
            theano.tests.unittest_tools.verify_grad(self.op,
@@ -486,7 +509,7 @@ implementation of the Rop method of a particular op.

 For instance, to verify the Rop method of the DoubleOp, you can use this:

-.. code-block:: python
+.. testcode:: tests

   import numpy
   import theano.tests
@@ -562,7 +585,7 @@ of the file containing a specific test of interest and run the
 file. In this example, the test *test_DoubleRop* in the class
 *test_double_op* would be performed.

-.. code-block:: python
+.. testcode:: tests

    if __name__ == '__main__':
       t = test_DoubleRop("test_double_rop")
@@ -572,7 +595,7 @@ file. In this example, the test *test_DoubleRop* in the class
 We recommend that when we execute a file, we run all tests in that
 file. This can be done by adding this at the end of your test files:

-.. code-block:: python
+.. testcode:: tests

    if __name__ == '__main__':
        unittest.main()
@@ -638,10 +661,11 @@ signature:
 as_op Example
 -------------

-.. code-block:: python
+.. testcode:: asop

    import theano
    import numpy
+    from theano import function
    from theano.compile.ops import as_op

    def infer_shape_numpy_dot(node, input_shapes):
@@ -655,13 +679,13 @@ as_op Example

 You can try it as follows:

-.. code-block:: python
+.. testcode:: asop

    x = theano.tensor.fmatrix()
    y = theano.tensor.fmatrix()
    f = function([x, y], numpy_dot(x, y))
-    inp1 = numpy.random.rand(5, 4)
-    inp2 = numpy.random.rand(4, 7)
+    inp1 = numpy.random.rand(5, 4).astype('float32')
+    inp2 = numpy.random.rand(4, 7).astype('float32')
    out = f(inp1, inp2)


@@ -701,27 +725,27 @@ the documentation.

 Here is an example how to add docstring to a class.

-.. code-block:: python
+.. testcode::

    import theano

    class DoubleOp(theano.Op):
-    """ Double each element of a tensor.
+        """ Double each element of a tensor.

-    :param x: input tensor.
+        :param x: input tensor.

-    :return: a tensor of the same shape and dtype as the input with all
+        :return: a tensor of the same shape and dtype as the input with all
        values doubled.

-    :note:
-        this is a test note
+        :note:
+            this is a test note

-    :seealso:
-        You can use the elemwise op to replace this example.
-        Just execute `x * 2` with x being a Theano variable.
+        :seealso:
+            You can use the elemwise op to replace this example.
+            Just execute `x * 2` with x being a Theano variable.

-    .. versionadded:: 0.6
-    """
+        .. versionadded:: 0.6
+        """

 This is how it will show up for files that we auto-list in the library
 documentation:

--- a/doc/tutorial/extending_theano_c.txt
+++ b/doc/tutorial/extending_theano_c.txt
@@ -129,7 +129,7 @@ that the data is not only contiguous in memory but also that it is organized
 such that the index of the latest dimension changes the fastest. If the
 following array

-.. code-block:: python
+.. testcode::

    x = [[1, 2, 3],
         [4, 5, 6]]
@@ -337,7 +337,7 @@ commonly used.
    of C code that you should include in your C code (after ensuring that a
    Python exception is set) if it needs to raise an exception. Ex:

-    .. code-block:: python
+    .. code-block:: c

        c_code = """
            PyErr_Format(PyExc_ValueError, "X does not have the right value");
@@ -354,7 +354,7 @@ commonly used.
    ``%`` characters in the format characters need to be escaped since the C
    code itself is defined in a string which undergoes string formatting.

-    .. code-block:: python
+    .. code-block:: c

        c_code = """
            PyErr_Format(PyExc_ValueError,
@@ -440,7 +440,7 @@ need to validate that the output storage has been allocated and has the same
 shape as our vector input. If it is not the case, we allocate a new output
 storage with the right shape and number of dimensions.

-.. code-block:: python
+.. testcode:: examples

    import numpy
    import theano
@@ -565,7 +565,7 @@ the inputs (because of the upcast in the method ``make_node()``), the typenum
 of the output has to be obtained in the Python code and then included in the
 C code.

-.. code-block:: python
+.. testcode:: examples

    class VectorTimesVector(gof.Op):
        __props__ = ()
@@ -705,7 +705,7 @@ implemented using the ``COp`` class.

 The new op is defined inside a Python file with the following code :

-.. code-block:: python
+.. testcode::

    import theano
    from theano import gof
@@ -917,8 +917,8 @@ In addition to these macros, the ``init_code_struct``, ``code``, and
      .. code-block:: c

        if (error) {
-	  // Set python exception
-	  FAIL
+          // Set python exception
+          FAIL
        }

      You can add a semicolon after the macro if it makes your editor

--- a/doc/tutorial/faq_tutorial.txt
+++ b/doc/tutorial/faq_tutorial.txt
@@ -21,36 +21,50 @@ should be written:

 Defining a shared variable for the lookup table

->>> lookup_table = theano.shared(matrix_ndarray).
+.. code-block:: python
+
+   lookup_table = theano.shared(matrix_ndarray)

 Getting a subset of the table (some rows or some columns) by passing
 an integer vector of indices corresponding to those rows or columns.

->>> subset = lookup_table[vector_of_indices]
+.. code-block:: python
+
+   subset = lookup_table[vector_of_indices]

 From now on, use only 'subset'. Do not call lookup_table[vector_of_indices]
 again. This causes problems with grad as this will create new variables.

 Defining cost which depends only on subset and not the entire lookup_table

->>> cost = something that depends on subset
->>> g = theano.grad(cost, subset)
+.. code-block:: python
+
+   cost = something that depends on subset
+   g = theano.grad(cost, subset)

 There are two ways for updating the parameters:
 Either use inc_subtensor or set_subtensor. It is recommended to use
 inc_subtensor. Some theano optimizations do the conversion between
 the two functions, but not in all cases.

->>> updates = inc_subtensor(subset, g*lr)
+.. code-block:: python
+
+   updates = inc_subtensor(subset, g*lr)
+
 OR
->>> updates = set_subtensor(subset, subset + g*lr)
+
+.. code-block:: python
+
+   updates = set_subtensor(subset, subset + g*lr)

 Currently we just cover the case here,
 not if you use inc_subtensor or set_subtensor with other types of indexing.

 Defining the theano function

->>> f=theano.function(..., updates=updates)
+.. code-block:: python
+
+   f = theano.function(..., updates=updates)

 Note that you can compute the gradient of the cost function w.r.t.
 the entire lookup_table, and the gradient will have nonzero rows only

--- a/doc/tutorial/gradients.txt
+++ b/doc/tutorial/gradients.txt
@@ -23,17 +23,19 @@ Here is the code to compute this gradient:
 .. If you modify this code, also change :
 .. theano/tests/test_tutorial.py:T_examples.test_examples_4

->>> from theano import pp
+>>> import theano
+>>> import theano.tensor as T
+>>> from theano import pp 
 >>> x = T.dscalar('x')
 >>> y = x ** 2
 >>> gy = T.grad(y, x)
 >>> pp(gy)  # print out the gradient prior to optimization
-'((fill((x ** 2), 1.0) * 2) * (x ** (2 - 1)))'
->>> f = function([x], gy)
+'((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
+>>> f = theano.function([x], gy)
 >>> f(4)
 array(8.0)
 >>> f(94.2)
-array(188.40000000000001)
+array(188.4)

 In this example, we can see from ``pp(gy)`` that we are computing
 the correct symbolic gradient.
@@ -44,7 +46,7 @@ the correct symbolic gradient.
    The optimizer simplifies the symbolic gradient expression.  You can see
    this by digging inside the internal properties of the compiled function.

-    .. code-block:: python
+    .. testcode::

        pp(f.maker.fgraph.outputs[0])
        '(2.0 * x)'
@@ -68,7 +70,7 @@ logistic is: :math:`ds(x)/dx = s(x) \cdot (1 - s(x))`.
 >>> x = T.dmatrix('x')
 >>> s = T.sum(1 / (1 + T.exp(-x)))
 >>> gs = T.grad(s, x)
->>> dlogistic = function([x], gs)
+>>> dlogistic = theano.function([x], gs)
 >>> dlogistic([[0, 1], [-1, -2]])
 array([[ 0.25      ,  0.19661193],
       [ 0.19661193,  0.10499359]])
@@ -117,10 +119,12 @@ do is to loop over the entries in *y* and compute the gradient of
    effort is being done for improving the performance of ``scan``. We 
    shall return to :ref:`scan<tutloop>` later in this tutorial.

+>>> import theano
+>>> import theano.tensor as T
 >>> x = T.dvector('x')
 >>> y = x ** 2
 >>> J, updates = theano.scan(lambda i, y,x : T.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y,x])
->>> f = function([x], J, updates=updates)
+>>> f = theano.function([x], J, updates=updates)
 >>> f([4, 4])
 array([[ 8.,  0.],
       [ 0.,  8.]])
@@ -154,13 +158,12 @@ difference is that now, instead of computing the Jacobian of some expression
 *y*, we compute the Jacobian of ``T.grad(cost,x)``, where *cost* is some
 scalar. 

-
 >>> x = T.dvector('x')
 >>> y = x ** 2
 >>> cost = y.sum()
 >>> gy = T.grad(cost, x)
 >>> H, updates = theano.scan(lambda i, gy,x : T.grad(gy[i], x), sequences=T.arange(gy.shape[0]), non_sequences=[gy, x])
->>> f = function([x], H, updates=updates)
+>>> f = theano.function([x], H, updates=updates)
 >>> f([4, 4])
 array([[ 2.,  0.],
       [ 0.,  2.]])
@@ -196,7 +199,6 @@ form of the operation. In order to evaluate the *R-operation* of
 expression *y*, with respect to *x*, multiplying the Jacobian with *v*
 you need to do something similar to this:

-
 >>> W = T.dmatrix('W')
 >>> V = T.dmatrix('V')
 >>> x = T.dvector('x')
@@ -247,7 +249,6 @@ Hessian matrix, you have two options that will
 give you the same result, though these options might exhibit differing performances. 
 Hence, we suggest profiling the methods before using either one of the two:

-
 >>> x = T.dvector('x')
 >>> v = T.dvector('v')
 >>> y = T.sum(x ** 2)

--- a/doc/tutorial/loading_and_saving.txt
+++ b/doc/tutorial/loading_and_saving.txt
@@ -38,6 +38,10 @@ The two modules ``pickle`` and ``cPickle`` have the same functionalities, but
 You can serialize (or *save*, or *pickle*) objects to a file with
 ``cPickle.dump``:

+.. testsetup::
+
+   my_obj = object()
+
 >>> f = file('obj.save', 'wb')
 >>> cPickle.dump(my_obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
 >>> f.close()
@@ -64,9 +68,15 @@ To de-serialize (or *load*, or *unpickle*) a pickled file, use
 You can pickle several objects into the same file, and load them all (in the
 same order):

+.. testsetup::
+
+   obj1 = object()
+   obj2 = object()
+   obj3 = object()
+
 >>> f = file('objects.save', 'wb')
 >>> for obj in [obj1, obj2, obj3]:
->>>     cPickle.dump(obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
+...     cPickle.dump(obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
 >>> f.close()

 Then:
@@ -74,7 +84,7 @@ Then:
 >>> f = file('objects.save', 'rb')
 >>> loaded_objects = []
 >>> for i in range(3):
->>>     loaded_objects.append(cPickle.load(f))
+...     loaded_objects.append(cPickle.load(f))
 >>> f.close()

 For more details about pickle's usage, see
@@ -102,7 +112,7 @@ along every instance of your model.

 For instance, you can define functions along the lines of:

-.. code-block:: python
+.. testcode::

    def __getstate__(self):
        state = dict(self.__dict__)
@@ -129,6 +139,7 @@ just load the parameters manually with `numpy`.

 .. code-block:: python

+    import numpy
    numpy.load('model.zip')

 This approach could be beneficial if you are sharing your model with people who
@@ -153,7 +164,7 @@ don't.
 For instance, if the only parameters you want to save are a weight
 matrix *W* and a bias *b*, you can define:

-.. code-block:: python
+.. testcode::

    def __getstate__(self):
        return (self.W, self.b)
@@ -167,7 +178,7 @@ If at some point in time *W* is renamed to *weights* and *b* to
 *bias*, the older pickled files will still be usable, if you update these
 functions to reflect the change in name:

-.. code-block:: python
+.. testcode::

    def __getstate__(self):
        return (self.weights, self.bias)

--- a/doc/tutorial/loop.txt
+++ b/doc/tutorial/loop.txt
@@ -26,7 +26,7 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.

 **Scan Example: Computing tanh(x(t).dot(W) + b) elementwise**

-.. code-block:: python
+.. testcode::

  import theano
  import theano.tensor as T
@@ -46,15 +46,21 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
  b = np.ones((2), dtype=theano.config.floatX)
  b[1] = 2

-  print compute_elementwise(x, w, b)[0]
+  print(compute_elementwise(x, w, b)[0])

  # comparison with numpy
-  print np.tanh(x.dot(w) + b)
+  print(np.tanh(x.dot(w) + b))

+.. testoutput::
+
+    [[ 0.96402758  0.99505475]
+     [ 0.96402758  0.99505475]]
+    [[ 0.96402758  0.99505475]
+     [ 0.96402758  0.99505475]]

 **Scan Example: Computing the sequence x(t) = tanh(x(t - 1).dot(W) + y(t).dot(U) + p(T - t).dot(V))**

-.. code-block:: python
+.. testcode::

  import theano
  import theano.tensor as T
@@ -84,18 +90,31 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
  p[0, :] = 3
  v = np.ones((2, 2), dtype=theano.config.floatX)

-  print compute_seq(x, w, y, u, p, v)[0]
+  print(compute_seq(x, w, y, u, p, v)[0])

  # comparison with numpy
  x_res = np.zeros((5, 2), dtype=theano.config.floatX)
  x_res[0] = np.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
  for i in range(1, 5):
    x_res[i] = np.tanh(x_res[i - 1].dot(w) + y[i].dot(u) + p[4-i].dot(v))
-  print x_res
+  print(x_res)
+
+.. testoutput::
+
+    [[-0.99505475 -0.99505475]
+     [ 0.96471973  0.96471973]
+     [ 0.99998585  0.99998585]
+     [ 0.99998771  0.99998771]
+     [ 1.          1.        ]]
+    [[-0.99505475 -0.99505475]
+     [ 0.96471973  0.96471973]
+     [ 0.99998585  0.99998585]
+     [ 0.99998771  0.99998771]
+     [ 1.          1.        ]]

 **Scan Example: Computing norms of lines of X**

-.. code-block:: python
+.. testcode::

  import theano
  import theano.tensor as T
@@ -108,14 +127,19 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.

  # test value
  x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
-  print compute_norm_lines(x)[0]
+  print(compute_norm_lines(x)[0])

  # comparison with numpy
-  print np.sqrt((x ** 2).sum(1))
+  print(np.sqrt((x ** 2).sum(1)))
+
+.. testoutput::
+
+    [ 1.  2.  3.  4.  5.  0.]
+    [ 1.  2.  3.  4.  5.  0.]

 **Scan Example: Computing norms of columns of X**

-.. code-block:: python
+.. testcode::

  import theano
  import theano.tensor as T
@@ -128,14 +152,19 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.

  # test value
  x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
-  print compute_norm_cols(x)[0]
+  print(compute_norm_cols(x)[0])

  # comparison with numpy
-  print np.sqrt((x ** 2).sum(0))
+  print(np.sqrt((x ** 2).sum(0)))
+
+.. testoutput::
+
+    [ 0.  1.  2.  3.  4.  5.]
+    [ 0.  1.  2.  3.  4.  5.]

 **Scan Example: Computing trace of X**

-.. code-block:: python
+.. testcode::

  import theano
  import theano.tensor as T
@@ -153,14 +182,20 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
  # test value
  x = np.eye(5, dtype=theano.config.floatX)
  x[0] = np.arange(5, dtype=theano.config.floatX)
-  print compute_trace(x)[0]
+  print(compute_trace(x)[0])

  # comparison with numpy
-  print np.diagonal(x).sum()
+  print(np.diagonal(x).sum())
+
+.. testoutput::
+
+    4.0
+    4.0
+

 **Scan Example: Computing the sequence x(t) = x(t - 2).dot(U) + x(t - 1).dot(V) +  tanh(x(t - 1).dot(W)  + b)**

-.. code-block:: python
+.. testcode::

  import theano
  import theano.tensor as T
@@ -187,7 +222,7 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
  n = 10
  b = np.ones((2), dtype=theano.config.floatX)

-  print compute_seq2(x, u, v, w, b, n)
+  print(compute_seq2(x, u, v, w, b, n))

  # comparison with numpy
  x_res = np.zeros((10, 2))
@@ -197,11 +232,35 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
  for i in range(2, 10):
    x_res[i] = (x_res[i - 2].dot(u) + x_res[i - 1].dot(v) +
                np.tanh(x_res[i - 1].dot(w) + b))
-  print x_res
+  print(x_res)
+
+.. testoutput::
+
+    [array([[  1.40514825,   1.40514825],
+           [  2.88898899,   2.38898899],
+           [  4.34018291,   4.34018291],
+           [  6.53463142,   6.78463142],
+           [  9.82972243,   9.82972243],
+           [ 14.22203814,  14.09703814],
+           [ 20.07439936,  20.07439936],
+           [ 28.12291843,  28.18541843],
+           [ 39.1913681 ,  39.1913681 ],
+           [ 54.28407732,  54.25282732]])]
+    [[  1.40514825   1.40514825]
+     [  2.88898899   2.38898899]
+     [  4.34018291   4.34018291]
+     [  6.53463142   6.78463142]
+     [  9.82972243   9.82972243]
+     [ 14.22203814  14.09703814]
+     [ 20.07439936  20.07439936]
+     [ 28.12291843  28.18541843]
+     [ 39.1913681   39.1913681 ]
+     [ 54.28407732  54.25282732]]
+

 **Scan Example: Computing the Jacobian of y = tanh(v.dot(A)) wrt x**

-.. code-block:: python
+.. testcode::

  import theano
  import theano.tensor as T
@@ -221,13 +280,22 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
  print compute_jac_t(w, x)[0]

  # compare with numpy
-  print ((1 - np.tanh(x.dot(w)) ** 2) * w).T
+  print(((1 - np.tanh(x.dot(w)) ** 2) * w).T)
+
+.. testoutput::
+
+    [[ 0.41997434  0.          0.41997434  0.          0.        ]
+     [ 0.          1.          1.          0.          0.        ]
+     [ 0.          0.          1.          0.          0.        ]]
+    [[ 0.41997434  0.          0.41997434  0.          0.        ]
+     [ 0.          1.          1.          0.          0.        ]
+     [ 0.          0.          1.          0.          0.        ]]

 Note that we need to iterate over the indices of ``y`` and not over the elements of ``y``. The reason is that scan create a placeholder variable for its internal function and this placeholder variable does not have the same dependencies than the variables that will replace it.

 **Scan Example: Accumulate number of loop during a scan**

-.. code-block:: python
+.. testcode::

  import theano
  import theano.tensor as T
@@ -246,7 +314,7 @@ Note that we need to iterate over the indices of ``y`` and not over the elements

 **Scan Example: Computing tanh(v.dot(W) + b) * d where d is binomial**

-.. code-block:: python
+.. testcode::

  import theano
  import theano.tensor as T
@@ -268,13 +336,26 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
  w = np.ones((2, 2), dtype=theano.config.floatX)
  b = np.ones((2), dtype=theano.config.floatX)

-  print compute_with_bnoise(x, w, b)
+  print(compute_with_bnoise(x, w, b))
+
+.. testoutput::
+
+    [array([[ 0.96402758,  0.        ],
+           [ 0.        ,  0.96402758],
+           [ 0.        ,  0.        ],
+           [ 0.76159416,  0.76159416],
+           [ 0.76159416,  0.        ],
+           [ 0.        ,  0.76159416],
+           [ 0.        ,  0.76159416],
+           [ 0.        ,  0.76159416],
+           [ 0.        ,  0.        ],
+           [ 0.76159416,  0.76159416]])]

 Note that if you want to use a random variable ``d`` that will not be updated through scan loops, you should pass this variable as a ``non_sequences`` arguments. 

 **Scan Example: Computing pow(A, k)**

-.. code-block:: python
+.. testcode::

  import theano
  import theano.tensor as T
@@ -298,13 +379,16 @@ Note that if you want to use a random variable ``d`` that will not be updated th
  power = theano.function(inputs=[A, k], outputs=final_result,
                        updates=updates)

-  print power(range(10), 2)
-  #[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]
+  print(power(range(10), 2))
+
+.. testoutput::
+
+    [  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]


 **Scan Example: Calculating a Polynomial**

-.. code-block:: python
+.. testcode::

  import numpy
  import theano
@@ -329,7 +413,10 @@ Note that if you want to use a random variable ``d`` that will not be updated th

  test_coeff = numpy.asarray([1, 0, 2], dtype=numpy.float32)
  print calculate_polynomial(test_coeff, 3)
-  # 19.0
+
+.. testoutput::
+
+    19.0




--- a/doc/tutorial/modes.txt
+++ b/doc/tutorial/modes.txt
@@ -43,7 +43,7 @@ Exercise

 Consider the logistic regression:

-.. code-block:: python
+.. testcode::
    
    import numpy
    import theano
@@ -63,8 +63,6 @@ Consider the logistic regression:
    b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
    x.tag.test_value = D[0]
    y.tag.test_value = D[1]
-    #print "Initial model:"
-    #print w.get_value(), b.get_value()

    # Construct Theano expression graph
    p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one
@@ -77,33 +75,40 @@ Consider the logistic regression:
    train = theano.function(
                inputs=[x,y],
                outputs=[prediction, xent],
-                updates={w:w-0.01*gw, b:b-0.01*gb},
+                updates=[(w, w-0.01*gw), (b, b-0.01*gb)],
                name = "train")
    predict = theano.function(inputs=[x], outputs=prediction,
                name = "predict")

    if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in
            train.maker.fgraph.toposort()]):
-        print 'Used the cpu'
+        print('Used the cpu')
    elif any([x.op.__class__.__name__ in ['GpuGemm', 'GpuGemv'] for x in
              train.maker.fgraph.toposort()]):
-        print 'Used the gpu'
+        print('Used the gpu')
    else:
-        print 'ERROR, not able to tell if theano used the cpu or the gpu'
-        print train.maker.fgraph.toposort()
+        print('ERROR, not able to tell if theano used the cpu or the gpu')
+        print(train.maker.fgraph.toposort())

    for i in range(training_steps):
        pred, err = train(D[0], D[1])
-    #print "Final model:"
-    #print w.get_value(), b.get_value()

-    print "target values for D"
-    print D[1]
+    print("target values for D")
+    print(D[1])
+
+    print("prediction on D")
+    print(predict(D[0]))

-    print "prediction on D"
-    print predict(D[0])
+.. testoutput::
+   :hide:
+   :options: +ELLIPSIS
+ 
+   Used the cpu
+   target values for D
+   ...
+   prediction on D
+   ...

-   
 Modify and execute this example to run on CPU (the default) with floatX=float32 and 
 time the execution using the command line ``time python file.py``.  Save your code 
 as it will be useful later on. 
@@ -215,7 +220,7 @@ cluster!).

 DebugMode is used as follows:

-.. code-block:: python
+.. testcode::

    x = T.dvector('x')

@@ -296,8 +301,9 @@ Compiling your Graph with ProfileMode
 Once the ProfileMode instance is created, simply compile your graph as you
 would normally, by specifying the mode parameter.

->>> # with functions
->>> f = theano.function([input1,input2],[output1], mode=profmode)
+>>> v1, v2 = T.vectors(2)
+>>> o = v1 + v2
+>>> f = theano.function([v1,v2],[o], mode=profmode)

 Retrieving Timing Information
 -----------------------------

--- a/doc/tutorial/numpy.txt
+++ b/doc/tutorial/numpy.txt

 .. _numpy:

+.. testsetup::
+
+   import numpy

 ***************
 NumPy refresher
@@ -59,7 +62,7 @@ compatible shapes. The example below shows an instance of
 >>> a = numpy.asarray([1.0, 2.0, 3.0])
 >>> b = 2.0
 >>> a * b
-array([2., 4., 6.])
+array([ 2.,  4.,  6.])

 The smaller array ``b`` (actually a scalar here, which works like a 0-d array) in this case is *broadcasted* to the same size
 as ``a`` during the multiplication. This trick is often useful in

--- a/doc/tutorial/printing_drawing.txt
+++ b/doc/tutorial/printing_drawing.txt
@@ -67,40 +67,39 @@ Debug Print
 The pre-compilation graph:

 >>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE
-    Elemwise{gt,no_inplace} [@A] ''
-    |Elemwise{true_div,no_inplace} [@B] ''
-    | |DimShuffle{x} [@C] ''
-    | | |TensorConstant{1} [@D]
-    | |Elemwise{add,no_inplace} [@E] ''
-    |   |DimShuffle{x} [@F] ''
-    |   | |TensorConstant{1} [@D]
-    |   |Elemwise{exp,no_inplace} [@G] ''
-    |     |Elemwise{sub,no_inplace} [@H] ''
-    |       |Elemwise{neg,no_inplace} [@I] ''
-    |       | |dot [@J] ''
-    |       |   |x [@K]
-    |       |   |w [@L]
-    |       |DimShuffle{x} [@M] ''
-    |         |b [@N]
-    |DimShuffle{x} [@O] ''
-      |TensorConstant{0.5} [@P]
+Elemwise{gt,no_inplace} [@A] ''
+ |Elemwise{true_div,no_inplace} [@B] ''
+ | |DimShuffle{x} [@C] ''
+ | | |TensorConstant{1} [@D]
+ | |Elemwise{add,no_inplace} [@E] ''
+ |   |DimShuffle{x} [@F] ''
+ |   | |TensorConstant{1} [@D]
+ |   |Elemwise{exp,no_inplace} [@G] ''
+ |     |Elemwise{sub,no_inplace} [@H] ''
+ |       |Elemwise{neg,no_inplace} [@I] ''
+ |       | |dot [@J] ''
+ |       |   |x [@K]
+ |       |   |w [@L]
+ |       |DimShuffle{x} [@M] ''
+ |         |b [@N]
+ |DimShuffle{x} [@O] ''
+   |TensorConstant{0.5} [@P]

 The post-compilation graph:

->>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE
-    Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] ''   4
-     |CGemv{inplace} [@B] ''   3
-     | |Alloc [@C] ''   2
-     | | |TensorConstant{0.0} [@D]
-     | | |Shape_i{0} [@E] ''   1
-     | |   |x [@F]
-     | |TensorConstant{1.0} [@G]
-     | |x [@F]
-     | |w [@H]
-     | |TensorConstant{0.0} [@D]
-     |InplaceDimShuffle{x} [@I] ''   0
-     | |b [@J]
-     |TensorConstant{(1,) of 0.5} [@K]
+>>> theano.printing.debugprint(predict)  # doctest: +NORMALIZE_WHITESPACE
+Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] ''   4
+ |CGemv{inplace} [@B] ''   3
+ | |AllocEmpty{dtype='float64'} [@C] ''   2
+ | | |Shape_i{0} [@D] ''   1
+ | |   |x [@E]
+ | |TensorConstant{1.0} [@F]
+ | |x [@E]
+ | |w [@G]
+ | |TensorConstant{0.0} [@H]
+ |InplaceDimShuffle{x} [@I] ''   0
+ | |b [@J]
+ |TensorConstant{(1,) of 0.5} [@K]


 Picture Printing of Graphs
@@ -108,7 +107,7 @@ Picture Printing of Graphs

 The pre-compilation graph:

->>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True)
+>>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True)  # doctest: +SKIP
 The output file is available at pics/logreg_pydotprint_prediction.png

 .. image:: ./pics/logreg_pydotprint_prediction.png
@@ -116,7 +115,7 @@ The output file is available at pics/logreg_pydotprint_prediction.png

 The post-compilation graph:

->>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True)
+>>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True)  # doctest: +SKIP
 The output file is available at pics/logreg_pydotprint_predict.png

 .. image:: ./pics/logreg_pydotprint_predict.png
@@ -124,7 +123,7 @@ The output file is available at pics/logreg_pydotprint_predict.png

 The optimized training graph:

->>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True)
+>>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True)  # doctest: +SKIP
 The output file is available at pics/logreg_pydotprint_train.png

 .. image:: ./pics/logreg_pydotprint_train.png

--- a/doc/tutorial/shape_info.txt
+++ b/doc/tutorial/shape_info.txt
@@ -24,7 +24,7 @@ Currently, information regarding shape is used in two ways in Theano:
 >>> x = theano.tensor.matrix('x')
 >>> f = theano.function([x], (x ** 2).shape)
 >>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
-MakeVector [@A] ''   2
+MakeVector{dtype='int64'} [@A] ''   2
 |Shape_i{0} [@B] ''   1
 | |x [@C]
 |Shape_i{1} [@D] ''   0
@@ -49,9 +49,9 @@ can lead to errors. Consider this example:
 >>> xv = numpy.random.rand(5, 4)
 >>> yv = numpy.random.rand(3, 3)

->>> f = theano.function([x,y], z.shape)
+>>> f = theano.function([x, y], z.shape)
 >>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
-MakeVector [@A] ''   4
+MakeVector{dtype='int64'} [@A] ''   4
 |Elemwise{Add}[(0, 0)] [@B] ''   3
 | |Shape_i{0} [@C] ''   1
 | | |x [@D]
@@ -60,8 +60,8 @@ MakeVector [@A] ''   4
 |Shape_i{1} [@G] ''   0
   |x [@D]

-print f(xv,yv)# DOES NOT RAISE AN ERROR AS SHOULD BE.
-[8, 4]
+>>> f(xv, yv) # DOES NOT RAISE AN ERROR AS SHOULD BE.
+array([8, 4])

 >>> f = theano.function([x,y], z)# Do not take the shape.
 >>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
@@ -70,8 +70,10 @@ Join [@A] ''   0
 |x [@C]
 |y [@D]

->>> f(xv,yv) # doctest: +SKIP
->>> # Raises a dimensions mismatch error.
+>>> f(xv, yv)  # doctest: +ELLIPSIS
+Traceback (most recent call last):
+  ...
+ValueError: ...

 As you can see, when asking only for the shape of some computation (``join`` in the
 example), an inferred shape is computed directly, without executing

--- a/doc/tutorial/sparse.txt
+++ b/doc/tutorial/sparse.txt
@@ -104,7 +104,7 @@ does not provide any way to handle a number of dimensions different from two.
 The set of all accepted ``dtype`` for the sparse matrices can be found in
 ``sparse.all_dtypes``.

->>> sparse.all_dtypes
+>>> sparse.all_dtypes  # doctest: +SKIP
 set(['int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64',
     'float32', 'float64', 'complex64', 'complex128'])


--- a/doc/tutorial/symbolic_graphs.txt
+++ b/doc/tutorial/symbolic_graphs.txt
@@ -34,8 +34,9 @@ detail about these building blocks refer to :ref:`variable`, :ref:`op`,

 **Code**

-.. code-block:: python
+.. testcode::

+    import theano.tensor as T
    x = T.dmatrix('x')
    y = T.dmatrix('y')
    z = x + y
@@ -159,9 +160,9 @@ as we apply it. Consider the following example of optimization:
 >>> f = theano.function([a], b)        # compile function
 >>> print f([0, 1, 2])                 # prints `array([0,2,1026])`
 [    0.     2.  1026.]
->>> theano.printing.pydotprint(b, outfile="./pics/symbolic_graph_unopt.png", var_with_name_simple=True)
+>>> theano.printing.pydotprint(b, outfile="./pics/symbolic_graph_unopt.png", var_with_name_simple=True)  # doctest: +SKIP
 The output file is available at ./pics/symbolic_graph_unopt.png
->>> theano.printing.pydotprint(f, outfile="./pics/symbolic_graph_opt.png", var_with_name_simple=True)
+>>> theano.printing.pydotprint(f, outfile="./pics/symbolic_graph_opt.png", var_with_name_simple=True)  # doctest: +SKIP
 The output file is available at ./pics/symbolic_graph_opt.png



--- a/doc/tutorial/using_gpu.txt
+++ b/doc/tutorial/using_gpu.txt
@@ -33,10 +33,7 @@ Testing Theano with GPU
 To see if your GPU is being used, cut and paste the following program into a
 file and run it.

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_using_gpu.test_using_gpu_1
-
-.. code-block:: python
+.. testcode::

    from theano import function, config, shared, sandbox
    import theano.tensor as T
@@ -49,17 +46,17 @@ file and run it.
    rng = numpy.random.RandomState(22)
    x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
    f = function([], T.exp(x))
-    print f.maker.fgraph.toposort()
+    print(f.maker.fgraph.toposort())
    t0 = time.time()
    for i in xrange(iters):
        r = f()
    t1 = time.time()
-    print 'Looping %d times took' % iters, t1 - t0, 'seconds'
-    print 'Result is', r
+    print("Looping %d times took %f seconds" % (iters, t1 - t0))
+    print("Result is %s" % (r,))
    if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
-        print 'Used the cpu'
+        print('Used the cpu')
    else:
-        print 'Used the gpu'
+        print('Used the gpu')

 The program just computes the ``exp()`` of a bunch of random numbers.
 Note that we use the ``shared`` function to
@@ -71,7 +68,16 @@ If I run this program (in check1.py) with ``device=cpu``, my computer takes a li
 whereas on the GPU it takes just over 0.64 seconds. The GPU will not always produce the exact
 same floating-point numbers as the CPU. As a benchmark, a loop that calls ``numpy.exp(x.get_value())`` takes about 46 seconds.

-.. code-block:: text
+.. testoutput::
+   :hide:
+   :options: +ELLIPSIS
+
+   [Elemwise{exp,no_inplace}(<TensorType(float64, vector)>)]
+   Looping 1000 times took ... seconds
+   Result is ...
+   Used the cpu
+
+.. code-block:: none

    $ THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 python check1.py
    [Elemwise{exp,no_inplace}(<TensorType(float32, vector)>)]
@@ -102,10 +108,7 @@ the graph to express a computation with a GPU-stored result.  The ``gpu_from_hos
 op means "copy the input from the host to the GPU" and it is optimized away
 after the ``T.exp(x)`` is replaced by a GPU version of ``exp()``.

-.. If you modify this code, also change :
-.. theano/tests/test_tutorial.py:T_using_gpu.test_using_gpu_2
-
-.. code-block:: python
+.. testcode::

    from theano import function, config, shared, sandbox
    import theano.sandbox.cuda.basic_ops
@@ -117,24 +120,35 @@ after the ``T.exp(x)`` is replaced by a GPU version of ``exp()``.
    iters = 1000

    rng = numpy.random.RandomState(22)
-    x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
+    x = shared(numpy.asarray(rng.rand(vlen), 'float32'))
    f = function([], sandbox.cuda.basic_ops.gpu_from_host(T.exp(x)))
-    print f.maker.fgraph.toposort()
+    print(f.maker.fgraph.toposort())
    t0 = time.time()
    for i in xrange(iters):
        r = f()
    t1 = time.time()
-    print 'Looping %d times took' % iters, t1 - t0, 'seconds'
-    print 'Result is', r
-    print 'Numpy result is', numpy.asarray(r)
+    print("Looping %d times took %f seconds" % (iters, t1 - t0))
+    print("Result is %s" % (r,))
+    print("Numpy result is %s" % (numpy.asarray(r),))
    if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
-        print 'Used the cpu'
+        print('Used the cpu')
    else:
-        print 'Used the gpu'
+        print('Used the gpu')

 The output from this program is

-.. code-block:: text
+.. testoutput::
+   :hide:
+   :options: +ELLIPSIS, +SKIP
+
+   Using gpu device 0: GeForce GTX 580
+   [GpuElemwise{exp,no_inplace}(<CudaNdarrayType(float32, vector)>)]
+   Looping 1000 times took ... seconds
+   Result is <CudaNdarray object at 0x...>
+   Numpy result is ...
+   Used the gpu
+
+.. code-block:: none

    $ THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python check2.py
    Using gpu device 0: GeForce GTX 580
@@ -253,7 +267,7 @@ Exercise

 Consider again the logistic regression:

-.. code-block:: python
+.. testcode::

    import numpy
    import theano
@@ -273,8 +287,6 @@ Consider again the logistic regression:
    b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
    x.tag.test_value = D[0]
    y.tag.test_value = D[1]
-    #print "Initial model:"
-    #print w.get_value(), b.get_value()

    # Construct Theano expression graph
    p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one
@@ -287,33 +299,39 @@ Consider again the logistic regression:
    train = theano.function(
                inputs=[x,y],
                outputs=[prediction, xent],
-                updates={w:w-0.01*gw, b:b-0.01*gb},
+                updates=[(w, w-0.01*gw), (b, b-0.01*gb)],
                name = "train")
    predict = theano.function(inputs=[x], outputs=prediction,
                name = "predict")

    if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in
            train.maker.fgraph.toposort()]):
-        print 'Used the cpu'
+        print('Used the cpu')
    elif any([x.op.__class__.__name__ in ['GpuGemm', 'GpuGemv'] for x in
              train.maker.fgraph.toposort()]):
-        print 'Used the gpu'
+        print('Used the gpu')
    else:
-        print 'ERROR, not able to tell if theano used the cpu or the gpu'
-        print train.maker.fgraph.toposort()
+        print('ERROR, not able to tell if theano used the cpu or the gpu')
+        print(train.maker.fgraph.toposort())

    for i in range(training_steps):
        pred, err = train(D[0], D[1])
-    #print "Final model:"
-    #print w.get_value(), b.get_value()

-    print "target values for D"
-    print D[1]
-
-    print "prediction on D"
-    print predict(D[0])
+    print("target values for D")
+    print(D[1])

+    print("prediction on D")
+    print(predict(D[0]))

+.. testoutput::
+   :hide:
+   :options: + ELLIPSIS
+   
+   Used the cpu
+   target values for D
+   ...
+   prediction on D
+   ...

 Modify and execute this example to run on GPU with ``floatX=float32`` and
 time it using the command line ``time python file.py``. (Of course, you may use some of your answer
@@ -373,7 +391,7 @@ Testing Theano with GPU
 To see if your GPU is being used, cut and paste the following program
 into a file and run it.

-.. code-block:: python
+.. testcode::

  from theano import function, config, shared, tensor, sandbox
  import numpy
@@ -385,25 +403,34 @@ into a file and run it.
  rng = numpy.random.RandomState(22)
  x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
  f = function([], tensor.exp(x))
-  print f.maker.fgraph.toposort()
+  print(f.maker.fgraph.toposort())
  t0 = time.time()
  for i in xrange(iters):
      r = f()
  t1 = time.time()
-  print 'Looping %d times took' % iters, t1 - t0, 'seconds'
-  print 'Result is', r
+  print("Looping %d times took %f seconds" % (iters, t1 - t0))
+  print("Result is %s" % (r,))
  if numpy.any([isinstance(x.op, tensor.Elemwise) and
                ('Gpu' not in type(x.op).__name__)
                for x in f.maker.fgraph.toposort()]):
-      print 'Used the cpu'
+      print('Used the cpu')
  else:
-      print 'Used the gpu'
+      print('Used the gpu')

 The program just compute ``exp()`` of a bunch of random numbers.  Note
 that we use the :func:`theano.shared` function to make sure that the
 input *x* is stored on the GPU.

-.. code-block:: text
+.. testoutput::
+   :hide:
+   :options: +ELLIPSIS
+
+   [Elemwise{exp,no_inplace}(<TensorType(float64, vector)>)]
+   Looping 1000 times took ... seconds
+   Result is ...
+   Used the cpu
+
+.. code-block:: none

  $ THEANO_FLAGS=device=cpu python check1.py
  [Elemwise{exp,no_inplace}(<TensorType(float64, vector)>)]
@@ -432,8 +459,7 @@ the value of the ``device`` flag without touching the code.
 If you don't mind a loss of flexibility, you can ask theano to return
 the GPU object directly.  The following code is modifed to do just that.

-.. code-block:: python
-  :emphasize-lines: 10,17
+.. testcode::

  from theano import function, config, shared, tensor, sandbox
  import numpy
@@ -445,19 +471,19 @@ the GPU object directly.  The following code is modifed to do just that.
  rng = numpy.random.RandomState(22)
  x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
  f = function([], sandbox.gpuarray.basic_ops.gpu_from_host(tensor.exp(x)))
-  print f.maker.fgraph.toposort()
+  print(f.maker.fgraph.toposort())
  t0 = time.time()
  for i in xrange(iters):
      r = f()
  t1 = time.time()
-  print 'Looping %d times took' % iters, t1 - t0, 'seconds'
-  print 'Result is', numpy.asarray(r)
+  print("Looping %d times took %f seconds" % (iters, t1 - t0))
+  print("Result is %s" % (numpy.asarray(r),))
  if numpy.any([isinstance(x.op, tensor.Elemwise) and
                ('Gpu' not in type(x.op).__name__)
                for x in f.maker.fgraph.toposort()]):
-      print 'Used the cpu'
+      print('Used the cpu')
  else:
-      print 'Used the gpu'
+      print('Used the gpu')

 Here the :func:`theano.sandbox.gpuarray.basic.gpu_from_host` call
 means "copy input to the GPU".  However during the optimization phase,
@@ -466,7 +492,17 @@ used here to tell theano that we want the result on the GPU.

 The output is

-.. code-block:: text
+.. testoutput::
+   :hide:
+   :options: +ELLIPSIS, +SKIP
+   
+   Using device cuda0: ...
+   [GpuElemwise{exp,no_inplace}(<GpuArray<float64>>)]
+   Looping 1000 times took ... seconds
+   Result is ...
+   Used the gpu
+
+.. code-block:: none

  $ THEANO_FLAGS=device=cuda0 python check2.py
  Using device cuda0: GeForce GTX 275
@@ -663,7 +699,7 @@ you feel competent enough, you may try yourself on the corresponding exercises.
          block=(400,1,1), grid=(1,1))

  assert numpy.allclose(dest, a*b)
-  print dest
+  print(dest)


 Exercise
@@ -722,10 +758,10 @@ Modify and execute to work for a matrix of shape (20, 10).
 Use this code to test it:

 >>> x = theano.tensor.fmatrix()
->>> f = theano.function([x], PyCUDADoubleOp()(x))
+>>> f = theano.function([x], PyCUDADoubleOp()(x))  # doctest: +SKIP
 >>> xv = numpy.ones((4, 5), dtype="float32")
->>> assert numpy.allclose(f(xv), xv*2)
->>> print numpy.asarray(f(xv))
+>>> assert numpy.allclose(f(xv), xv*2)  # doctest: +SKIP
+>>> print(numpy.asarray(f(xv)))  # doctest: +SKIP


 Exercise

--- a/theano/compile/function.py
+++ b/theano/compile/function.py
@@ -46,8 +46,8 @@ def function_dump(filename, inputs, outputs=None, mode=None, updates=None,
    To load such a dump and do the compilation:

    >>> import cPickle, theano
-    >>> d=cPickle.load(open("func_dump.bin", "rb"))
-    >>> f=theano.function(**d)
+    >>> d = cPickle.load(open("func_dump.bin", "rb"))  # doctest: +SKIP
+    >>> f = theano.function(**d)  # doctest: +SKIP

    """
    assert isinstance(filename, string_types)

--- a/theano/gof/utils.py
+++ b/theano/gof/utils.py
@@ -456,7 +456,6 @@ def remove(predicate, coll):

    Examples
    --------
-    >>> from itertoolz import remove
    >>> def even(x):
    ...     return x % 2 == 0
    >>> remove(even, [1, 2, 3, 4])

--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -1525,8 +1525,8 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,

    Example:
        >>> verify_grad(theano.tensor.tanh,
-                        (numpy.asarray([[2,3,4], [-1, 3.3, 9.9]]),),
-                        rng=numpy.random)
+        ...             (numpy.asarray([[2,3,4], [-1, 3.3, 9.9]]),),
+        ...             rng=numpy.random)

    Raises an Exception if the difference between the analytic gradient and
    numerical gradient (computed through the Finite Difference Method) of a

--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
@@ -1092,6 +1092,7 @@ class Unique(theano.Op):
    Examples
    --------
    >>> import numpy as np
+    >>> import theano

    >>> x = theano.tensor.vector()
    >>> f = theano.function([x], Unique(True, True, False)(x))

--- a/theano/tensor/io.py
+++ b/theano/tensor/io.py
@@ -83,7 +83,7 @@ def load(path, dtype, broadcastable, mmap_mode=None):
    >>> x = tensor.load(path, 'int64', (False,))
    >>> y = x*2
    >>> fn = function([path], y)
-    >>> fn("stored-array.npy")
+    >>> fn("stored-array.npy")  # doctest: +SKIP
    array([0, 2, 4, 6, 8], dtype=int64)

    """

--- a/theano/tensor/utils.py
+++ b/theano/tensor/utils.py
@@ -55,9 +55,11 @@ def shape_of_variables(fgraph, input_shapes):
    >>> x = theano.tensor.matrix('x')
    >>> y = x[512:]; y.name = 'y'
    >>> fgraph = theano.FunctionGraph([x], [y], clone=False)
-    >>> shape_of_variables(fgraph, {x: (1024, 1024)})
-    {y: (512, 1024), x: (1024, 1024)}
-
+    >>> d = shape_of_variables(fgraph, {x: (1024, 1024)})
+    >>> d[y]
+    (array(512), array(1024))
+    >>> d[x]
+    (array(1024), array(1024))
    """

    if not hasattr(fgraph, 'shape_feature'):

--- a/theano/tests/test_flake8.py
+++ b/theano/tests/test_flake8.py
@@ -40,7 +40,6 @@ whitelist_flake8 = [
    "tests/test_pickle_unpickle_theano_fn.py",
    "tests/test_determinism.py",
    "tests/record.py",
-    "tests/test_tutorial.py",
    "tests/unittest_tools.py",
    "compile/__init__.py",
    "compile/profiling.py",

--- a/theano/tests/test_tutorial.py
+++ b/theano/tests/test_tutorial.py
-""" test code snippet in the Theano tutorials.
-"""
-from __future__ import print_function
-
-import os
-import shutil
-import unittest
-
-from nose.plugins.attrib import attr
-from nose.plugins.skip import SkipTest
-import numpy
-from numpy import array
-
-import theano
-import theano.tensor as T
-from theano import function, compat
-
-from six.moves import xrange
-from theano import config
-from theano.tests import unittest_tools as utt
-from theano.sandbox.rng_mrg import MRG_RandomStreams
-from theano.tensor.shared_randomstreams import RandomStreams
-
-
-class T_extending(unittest.TestCase):
-    # All tests here belong to files in
-    # http://deeplearning.net/software/theano/extending
-    # Theano/doc/extending/*.txt
-    # Any change you do here also add it to the tutorial!
-    # This belongs to an entire folder since code-snippets are connected
-    # from one file to another .. and they do not make sense on their
-    # own.
-
-    def test_extending_1(self):
-
-        # Note that we shadow Python's function ``filter`` with this
-        # definition.
-        def filter(x, strict=False, allow_downcast=None):
-            if strict:
-                if isinstance(x, float):
-                    return x
-                else:
-                    raise TypeError('Expected a float!')
-            else:
-                return float(x)
-
-        def values_eq_approx(x, y, tolerance=1e-4):
-            return abs(x - y) / (abs(x) + abs(y)) < tolerance
-
-        from theano import gof
-
-        double = gof.Type()
-        double.filter = filter
-        double.values_eq_approx = values_eq_approx
-
-        from theano import gof
-
-        class Double(gof.Type):
-
-            def filter(self, x, strict=False):
-                if strict and not isinstance(x, float):
-                    raise TypeError('Expected a float!')
-                return float(x)
-
-            def values_eq_approx(self, x, y, tolerance=1e-4):
-                return abs(x - y) / (abs(x) + abs(y)) < tolerance
-
-            # Added to make those tests pass in DebugMode
-            @staticmethod
-            def may_share_memory(a, b):
-                return a is b
-
-        double = Double()
-
-        def __eq__(self, other):
-            return type(self) is Double and type(other) is Double
-
-        from theano import gof
-
-        class Double(gof.Type):
-
-            def filter(self, x, strict=False, allow_downcast=None):
-                if strict and not isinstance(x, float):
-                    raise TypeError('Expected a float!')
-                return float(x)
-
-            def values_eq_approx(self, x, y, tolerance=1e-4):
-                return abs(x - y) / (abs(x) + abs(y)) < tolerance
-
-            def __str__(self):
-                return "double"
-
-            # Added to make those tests pass in DebugMode
-            @staticmethod
-            def may_share_memory(a, b):
-                return a is b
-
-        double = Double()
-
-        from theano import gof
-        mul = gof.Op()
-
-        def make_node(x, y):
-            if x.type != double or y.type != double:
-                raise TypeError('mul only works on doubles')
-            return gof.Apply(mul, [x, y], [double()])
-        mul.make_node = make_node
-
-        def perform(node, inputs, output_storage):
-            x, y = inputs[0], inputs[1]
-            z = output_storage[0]
-            z[0] = x * y
-        mul.perform = perform
-
-        x, y = double('x'), double('y')
-        z = mul(x, y)
-        f = theano.function([x, y], z)
-        assert f(5, 6) == 30.0
-        assert f(5.6, 6.7) == 37.519999999999996
-
-        x = double('x')
-        self.assertRaises(AttributeError, mul, x, 2)
-
-        def make_node(x, y):
-            if isinstance(x, (int, float)):
-                x = gof.Constant(double, x)
-            if isinstance(y, (int, float)):
-                y = gof.Constant(double, y)
-            if x.type != double or y.type != double:
-                raise TypeError('mul only works on doubles')
-            return gof.Apply(mul, [x, y], [double()])
-        mul.make_node = make_node
-
-        x = double('x')
-        z = mul(x, 2)
-        f = theano.function([x], z)
-        assert f(10) == 20.0
-        assert f(3.4) == 6.7999999999999998
-
-        from theano import gof
-        class BinaryDoubleOp(gof.Op):
-        
-            __props__ = ("name", "fn")
-            
-            def __init__(self, name, fn):
-                self.name = name
-                self.fn = fn
-
-            def make_node(self, x, y):
-                if isinstance(x, (int, float)):
-                    x = gof.Constant(double, x)
-                if isinstance(y, (int, float)):
-                    y = gof.Constant(double, y)
-                if x.type != double or y.type != double:
-                    raise TypeError('%s only works on doubles' % self.name)
-                return gof.Apply(self, [x, y], [double()])
-
-            def perform(self, node, inp, out):
-                x, y = inp
-                z, = out
-                z[0] = self.fn(x, y)
-
-            def __str__(self):
-                return self.name
-
-        add = BinaryDoubleOp(name='add',
-                             fn=lambda x, y: x + y)
-
-        sub = BinaryDoubleOp(name='sub',
-                             fn=lambda x, y: x - y)
-
-        mul = BinaryDoubleOp(name='mul',
-                             fn=lambda x, y: x * y)
-
-        div = BinaryDoubleOp(name='div',
-                             fn=lambda x, y: x / y)
-
-    def test_extending_2(self):
-        '''
-         This test fails in DebugMode for the same reasons the test in
-         tensor/tests/test_basic.py:T_scalarfromtensor.test0
-         fails on debug mode ( as much as I could tell - Razvan )
-        '''
-        from theano import gof
-
-        class Double(gof.Type):
-
-            def filter(self, x, strict=False, allow_downcast=None):
-                if strict and not isinstance(x, float):
-                    raise TypeError('Expected a float!')
-                return float(x)
-
-            def values_eq_approx(self, x, y, tolerance=1e-4):
-                return abs(x - y) / (abs(x) + abs(y)) < tolerance
-
-            def __str__(self):
-                return "double"
-
-            # Added to make those tests pass in DebugMode
-            @staticmethod
-            def may_share_memory(a, b):
-                return a is b
-
-        double = Double()
-
-        class BinaryDoubleOp(gof.Op):
-        
-            __props__ = ("name", "fn")
-            
-            def __init__(self, name, fn):
-                self.name = name
-                self.fn = fn
-
-            def make_node(self, x, y):
-                if isinstance(x, (int, float)):
-                    x = gof.Constant(double, x)
-                if isinstance(y, (int, float)):
-                    y = gof.Constant(double, y)
-                if x.type != double or y.type != double:
-                    raise TypeError('%s only works on doubles' % self.name)
-                return gof.Apply(self, [x, y], [double()])
-
-            def perform(self, node, inp, out):
-                x, y = inp
-                z, = out
-                z[0] = self.fn(x, y)
-
-            def __str__(self):
-                return self.name
-
-        add = BinaryDoubleOp(name='add',
-                             fn=lambda x, y: x + y)
-
-        sub = BinaryDoubleOp(name='sub',
-                             fn=lambda x, y: x - y)
-
-        mul = BinaryDoubleOp(name='mul',
-                             fn=lambda x, y: x * y)
-
-        div = BinaryDoubleOp(name='div',
-                             fn=lambda x, y: x / y)
-
-        def c_declare(name, sub, check_input=True):
-            return """
-            double %(name)s;
-            """ % dict(name=name)
-        double.c_declare = c_declare
-
-        def c_init(name, sub):
-            return """
-            %(name)s = 0.0;
-            """ % dict(name=name)
-        double.c_init = c_init
-
-        def c_extract(name, sub, check_input=True):
-            if(check_input):
-                pre = """
-                if (!PyFloat_Check(py_%(name)s)) {
-                    PyErr_SetString(PyExc_TypeError, "expected a float");
-                    %(fail)s
-                }""" % dict(name=name, fail=sub['fail'])
-            else:
-                pre = ""
-            return pre + """
-            %(name)s = PyFloat_AsDouble(py_%(name)s);
-            """ % dict(name=name, fail=sub['fail'])
-        double.c_extract = c_extract
-
-        def c_sync( name, sub):
-            return """
-            Py_XDECREF(py_%(name)s);
-            py_%(name)s = PyFloat_FromDouble(%(name)s);
-            if (!py_%(name)s) {
-                printf("PyFloat_FromDouble failed on: %%f\\n", %(name)s);
-                Py_XINCREF(Py_None);
-                py_%(name)s = Py_None;
-            }
-            """ % dict(name=name)
-        double.c_sync = c_sync
-
-        def c_cleanup(name, sub):
-            return ""
-        double.c_cleanup = c_cleanup
-
-        from theano import function
-
-        x, y, z = double('x'), double('y'), double('z')
-        a = add(x, y)
-        b = mul(a, z)
-        f = function([x, y, z], b)
-        assert f(1.0, 2.0, 3.0) == 9.0
-
-        from theano import gof
-        class Double(gof.Type):
-
-            def filter(self, x, strict=False, allow_downcast=None):
-                if strict and not isinstance(x, float):
-                    raise TypeError('Expected a float!')
-                return float(x)
-
-            def values_eq_approx(self, x, y, tolerance=1e-4):
-                return abs(x - y) / (x + y) < tolerance
-
-            def __str__(self):
-                return "double"
-
-            def c_declare(self, name, sub, check_input=True):
-                return """
-                double %(name)s;
-                """ % dict(name=name)
-
-            def c_init(self, name, sub):
-                return """
-                %(name)s = 0.0;
-                """ % dict(name=name)
-
-            def c_extract(self, name, sub, check_input=True):
-                if(check_input):
-                    pre = """
-                    if (!PyFloat_Check(py_%(name)s)) {
-                        PyErr_SetString(PyExc_TypeError, "expected a float");
-                        %(fail)s
-                    }
-                    """ % dict(sub, name=name)
-                else:
-                    pre = ""
-                return pre + """
-                %(name)s = PyFloat_AsDouble(py_%(name)s);
-                """ % dict(sub, name=name)
-
-            def c_sync(self, name, sub):
-                return """
-                Py_XDECREF(py_%(name)s);
-                py_%(name)s = PyFloat_FromDouble(%(name)s);
-                if (!py_%(name)s) {
-                    printf("PyFloat_FromDouble failed on: %%f\\n", %(name)s);
-                    Py_XINCREF(Py_None);
-                    py_%(name)s = Py_None;
-                }
-                """ % dict(name=name)
-
-            def c_cleanup(self, name, sub):
-                return ""
-
-            # Added to make those tests pass in DebugMode
-            @staticmethod
-            def may_share_memory(a, b):
-                return a is b
-
-        double = Double()
-
-        def c_code(node, name, input_names, output_names, sub):
-            x_name, y_name = input_names[0], input_names[1]
-            output_name = output_names[0]
-            return """
-            %(output_name)s = %(x_name)s * %(y_name)s;
-            """ % locals()
-        mul.c_code = c_code
-
-        from theano import gof
-        class BinaryDoubleOp(gof.Op):
-
-            __props__ = ("name", "fn", "ccode")
-            
-            def __init__(self, name, fn, ccode):
-                self.name = name
-                self.fn = fn
-                self.ccode = ccode
-
-            def make_node(self, x, y):
-                if isinstance(x, (int, float)):
-                    x = gof.Constant(double, x)
-                if isinstance(y, (int, float)):
-                    y = gof.Constant(double, y)
-                if x.type != double or y.type != double:
-                    raise TypeError('%s only works on doubles' % self.name)
-                return gof.Apply(self, [x, y], [double()])
-
-            def perform(self, node, inp, out):
-                x, y = inp
-                z, = out
-                z[0] = self.fn(x, y)
-
-            def __str__(self):
-                return self.name
-
-            def c_code(self, node, name, inp, out, sub):
-                x, y = inp
-                z, = out
-                return self.ccode % locals()
-
-        add = BinaryDoubleOp(name='add',
-                            fn=lambda x, y: x + y,
-                            ccode="%(z)s = %(x)s + %(y)s;")
-
-        sub = BinaryDoubleOp(name='sub',
-                            fn=lambda x, y: x - y,
-                            ccode="%(z)s = %(x)s - %(y)s;")
-
-        mul = BinaryDoubleOp(name='mul',
-                            fn=lambda x, y: x * y,
-                            ccode="%(z)s = %(x)s * %(y)s;")
-
-        div = BinaryDoubleOp(name='div',
-                            fn=lambda x, y: x / y,
-                            ccode="%(z)s = %(x)s / %(y)s;")
-
-        from theano.gof import toolbox
-
-        class Simplify(gof.Optimizer):
-            def add_requirements(self, fgraph):
-                fgraph.attach_feature(toolbox.ReplaceValidate())
-            def apply(self, fgraph):
-                for node in fgraph.toposort():
-                    if node.op == div:
-                        x, y = node.inputs
-                        z = node.outputs[0]
-                        if x.owner and x.owner.op == mul:
-                            a, b = x.owner.inputs
-                            if y == a:
-                                fgraph.replace_validate(z, b)
-                            elif y == b:
-                                fgraph.replace_validate(z, a)
-
-        simplify = Simplify()
-        x = double('x')
-        y = double('y')
-        z = double('z')
-        a = add(z, mul(div(mul(y, x), y), div(z, x)))
-        e = gof.FunctionGraph([x, y, z], [a])
-        simplify.optimize(e)
-
-        class LocalSimplify(gof.LocalOptimizer):
-            def transform(self, node):
-                if node.op == div:
-                    x, y = node.inputs
-                    if x.owner and x.owner.op == mul:
-                        a, b = x.owner.inputs
-                        if y == a:
-                            return [b]
-                        elif y == b:
-                            return [a]
-                return False
-            def tracks(self):
-                # This should be needed for the EquilibriumOptimizer
-                # but it isn't now
-                # TODO: do this and explain it
-                return []  # that's not what you should do
-
-        local_simplify = LocalSimplify()
-
-        x = double('x')
-        y = double('y')
-        z = double('z')
-        a = add(z, mul(div(mul(y, x), y), div(z, x)))
-        e = gof.FunctionGraph([x, y, z], [a])
-        simplify = gof.TopoOptimizer(local_simplify)
-        simplify.optimize(e)
-
-    def test_as_op(self):
-        import theano
-        import numpy
-        from theano.compile.ops import as_op
-
-        def infer_shape_numpy_dot(node, input_shapes):
-            ashp, bshp = input_shapes
-            return [ashp[:-1] + bshp[-1:]]
-
-        @as_op(itypes=[theano.tensor.fmatrix, theano.tensor.fmatrix],
-               otypes=[theano.tensor.fmatrix],
-               infer_shape=infer_shape_numpy_dot)
-        def numpy_add(a, b):
-            return numpy.add(a, b)
-
-        def infer_shape_numpy_add_sub(node, input_shapes):
-            ashp, bshp = input_shapes
-            # Both inputs should have that same shape, so we just
-            # return one of them.
-            return [ashp[0]]
-
-        @as_op(itypes=[theano.tensor.fmatrix, theano.tensor.fmatrix],
-               otypes=[theano.tensor.fmatrix],
-               infer_shape=infer_shape_numpy_add_sub)
-        def numpy_add(a, b):
-            return numpy.add(a, b)
-
-        @as_op(itypes=[theano.tensor.fmatrix, theano.tensor.fmatrix],
-               otypes=[theano.tensor.fmatrix],
-               infer_shape=infer_shape_numpy_add_sub)
-        def numpy_sub(a, b):
-            return numpy.sub(a, b)
-
-
-class T_introduction(unittest.TestCase):
-    # All tests here belong to
-    # http://deeplearning.net/software/theano/tutorial/introduction.html
-    # Theano/doc/tutorial/introduction.txt
-    # Any change you do here also add it to the tutorial !
-    def test_introduction_1(self):
-
-        import theano
-        from theano import tensor
-
-        # declare two symbolic floating-point scalars
-        a = tensor.dscalar()
-        b = tensor.dscalar()
-
-        # create a simple expression
-        c = a + b
-
-        # convert the expression into a callable object that takes (a,b)
-        # values as input and computes a value for c
-        f = theano.function([a, b], c)
-
-        # bind 1.5 to 'a', 2.5 to 'b', and evaluate 'c'
-        assert 4.0 == f(1.5, 2.5)
-
-
-class T_adding(unittest.TestCase):
-    # All tests here belong to
-    # http://deeplearning.net/software/theano/tutorial/adding.html
-    # Theano/doc/tutorial/adding.txt
-    # Any change you do here also add it to the tutorial !
-
-    def test_adding_1(self):
-        import theano.tensor as T
-        from theano import function
-        x = T.dscalar('x')
-        y = T.dscalar('y')
-        z = x + y
-        f = function([x, y], z)
-        assert f(2, 3) == numpy.array(5.0)
-        assert f(16.3, 12.1) == numpy.array(28.4)
-
-    def test_adding_2(self):
-        x = T.dmatrix('x')
-        y = T.dmatrix('y')
-        z = x + y
-        f = function([x, y], z)
-        assert numpy.all(f([[1, 2], [3, 4]], [[10, 20], [30, 40]]) ==
-                         numpy.array([[ 11.,  22.], [ 33.,  44.]]))
-
-        assert numpy.all(f(numpy.array([[1, 2], [3, 4]])
-                           , numpy.array([[10, 20], [30, 40]])) ==
-                         numpy.array([[ 11.,  22.], [ 33.,  44.]]))
-
-
-class T_examples(unittest.TestCase):
-    # All tests here belog to
-    # http://deeplearning.net/software/theano/tutorial/examples.html
-    # Theano/doc/tutorial/examples.txt
-    # Any change you do here also add it to the tutorial !
-
-    def test_examples_1(self):
-        x = T.dmatrix('x')
-        s = 1 / (1 + T.exp(-x))
-        logistic = function([x], s)
-        assert numpy.allclose( logistic([[0, 1], [-1, -2]]),
-                         array([[ 0.5       ,  0.73105858],
-                                [ 0.26894142,  0.11920292]]))
-
-    def test_examples_2(self):
-
-        x = T.dmatrix('x')
-        s2 = (1 + T.tanh(x / 2)) / 2
-        logistic2 = function([x], s2)
-        assert numpy.allclose(logistic2([[0, 1], [-1, -2]]),
-                    array([[ 0.5       ,  0.73105858],
-                          [ 0.26894142,  0.11920292]]))
-
-    def test_examples_3(self):
-        a, b = T.dmatrices('a', 'b')
-        diff         = a - b
-        abs_diff     = abs(diff)
-        diff_squared = diff**2
-        f = function([a, b], [diff, abs_diff, diff_squared])
-        elems = f([[1, 1], [1, 1]], [[0, 1], [2, 3]])
-        assert numpy.all( elems[0] == array([[ 1.,  0.], [-1., -2.]]))
-        assert numpy.all( elems[1] == array([[ 1.,  0.], [ 1.,  2.]]))
-        assert numpy.all( elems[2] == array([[ 1.,  0.], [ 1.,  4.]]))
-
-    def test_examples_4(self):
-        from theano import pp
-        x = T.dscalar('x')
-        y = x**2
-        gy = T.grad(y, x)
-        pp(gy)  # print out the gradient prior to optimization
-        '((fill((x ** 2), 1.0) * 2) * (x ** (2 - 1)))'
-        f = function([x], gy)
-        assert f(4)    ==  array(8.0)
-        assert f(94.2) == array(188.40000000000001)
-
-    def test_examples_5(self):
-
-        x = T.dmatrix('x')
-        s = T.sum(1 / (1 + T.exp(-x)))
-        gs = T.grad(s, x)
-        dlogistic = function([x], gs)
-        assert numpy.allclose( dlogistic([[0, 1], [-1, -2]]),
-                         array([[ 0.25      ,  0.19661193],
-                               [ 0.19661193,  0.10499359]]))
-
-    def test_examples_6(self):
-
-        from theano import Param
-        x, y = T.dscalars('x', 'y')
-        z = x + y
-        f = function([x, Param(y, default=1)], z)
-        assert f(33)    == array(34.0)
-        assert f(33, 2) == array(35.0)
-
-    def test_examples_7(self):
-        from theano import Param
-        x, y, w = T.dscalars('x', 'y', 'w')
-        z = (x + y) * w
-        f = function([x, Param(y, default=1), Param(w, default=2, name='w_by_name')], z)
-        assert f(33)                   == array(68.0)
-        assert f(33, 2)                == array(70.0)
-        assert f(33, 0, 1)             == array(33.0)
-        assert f(33, w_by_name=1)      == array(34.0)
-        assert f(33, w_by_name=1, y=0) == array(33.0)
-
-    def test_examples_8(self):
-        from theano import shared
-        # Force the dtype to int64 to work correctly on 32 bit computer.
-        # Otherwise, it create by default a int32 on 32 bit computer.
-        state = shared(0)
-        inc = T.iscalar('inc')
-        accumulator = function([inc], state, updates=[(state, state+inc)])
-
-        assert state.get_value()       == array(0)
-        assert accumulator(1)          == array(0)
-        assert state.get_value()       == array(1)
-        assert accumulator(300)        == array(1)
-        assert state.get_value()       == array(301)
-
-        state.set_value(-1)
-        assert accumulator(3)          == array(-1)
-        assert state.get_value()       == array(2)
-
-        decrementor = function([inc], state, updates=[(state, state-inc)])
-        assert decrementor(2)          == array(2)
-        assert state.get_value()       == array(0)
-
-        fn_of_state = state * 2 + inc
-        # The type of foo must match the shared variable we are replacing
-        # with the ``givens``
-        foo = T.scalar(dtype=state.dtype)
-        skip_shared = function([inc, foo], fn_of_state,
-                               givens=[(state, foo)])
-        assert skip_shared(1, 3)       == array(7)
-        assert state.get_value()       == array(0)
-
-    def test_examples_9(self):
-
-        from theano.tensor.shared_randomstreams import RandomStreams
-        srng = RandomStreams(seed=234)
-        rv_u = srng.uniform((2, 2))
-        rv_n = srng.normal((2, 2))
-        f = function([], rv_u)
-        g = function([], rv_n, no_default_updates=True)    #Not updating rv_n.rng
-        nearly_zeros = function([], rv_u + rv_u - 2 * rv_u)
-
-        f_val0 = f()
-        f_val1 = f()  #different numbers from f_val0
-        assert numpy.all(f_val0 != f_val1)
-
-        g_val0 = g()  # different numbers from f_val0 and f_val1
-        g_val1 = g()  # same numbers as g_val0 !!!
-
-        assert numpy.all(g_val0 == g_val1)
-        assert numpy.all(g_val0 != f_val0)
-        assert numpy.all(g_val0 != f_val1)
-
-        nearly_zeros = function([], rv_u + rv_u - 2 * rv_u)
-        assert numpy.allclose(nearly_zeros(), [[0., 0.], [0., 0.]])
-
-        rng_val = rv_u.rng.get_value(borrow=True)   # Get the rng for rv_u
-        rng_val.seed(89234)                         # seeds the generator
-        rv_u.rng.set_value(rng_val, borrow=True)    # Assign back seeded rng
-
-        srng.seed(902340)  # seeds rv_u and rv_n with different seeds each
-        state_after_v0 = rv_u.rng.get_value().get_state()
-        nearly_zeros()       # this affects rv_u's generator
-        v1 = f()
-        rng = rv_u.rng.get_value(borrow=True)
-        rng.set_state(state_after_v0)
-        rv_u.rng.set_value(rng, borrow=True)
-        v2 = f()             # v2 != v1
-        v3 = f()             # v3 == v1
-        assert numpy.all(v1 != v2)
-        assert numpy.all(v1 == v3)
-
-    def test_copy_random_state(self):
-
-        class Graph():
-            def __init__(self, seed=123):
-                self.rng = RandomStreams(seed)
-                self.y = self.rng.uniform(size=(1,))
-
-        g1 = Graph(seed=123)
-        f1 = theano.function([], g1.y)
-
-        g2 = Graph(seed=987)
-        f2 = theano.function([], g2.y)
-
-        # print 'By default, the two functions are out of sync.'
-        v1 =  f1()
-        v2 =  f2()
-
-        def copy_random_state(g1, g2):
-            if isinstance(g1.rng, MRG_RandomStreams):
-                g2.rng.rstate = g1.rng.rstate
-            for (su1, su2) in zip(g1.rng.state_updates, g2.rng.state_updates):
-                su2[0].set_value(su1[0].get_value())
-
-        # print 'We now copy the state of the theano random number generators.'
-        copy_random_state(g1, g2)
-        v3 = f1()
-        v4 = f2()
-        assert numpy.allclose(v1, 0.72803009)
-        assert numpy.allclose(v2, 0.55056769)
-        assert numpy.allclose(v3, 0.59044123)
-        assert numpy.allclose(v4, 0.59044123)
-
-    @attr('slow')
-    def test_examples_real_example(self):
-        rng = numpy.random
-
-        N = 400
-        feats = 784
-        D = (rng.randn(N, feats).astype(config.floatX),
-             rng.randint(size=N, low=0, high=2).astype(config.floatX))
-        training_steps = 10000
-        if config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
-            training_steps = 10
-
-        # Declare Theano symbolic variables
-        x = T.matrix("x")
-        y = T.vector("y")
-        # The *.03 have been added to have DebugMode don't complain
-        w = theano.shared(rng.randn(feats).astype(config.floatX) * .03,
-                          name="w")
-        b = theano.shared(numpy.asarray(0., dtype=config.floatX),
-                          name="b")
-        print("Initial model:")
-        print(w.get_value(), b.get_value())
-
-        # Construct Theano expression graph
-        p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b))   # Probability that target = 1
-        prediction = p_1 > 0.5                    # The prediction thresholded
-        xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1)  # Cross-entropy loss function
-        cost = xent.mean() + 0.01 * (w ** 2).sum()  # The cost to minimize
-        gw, gb = T.grad(cost, [w, b])             # Compute the gradient of the cost
-                                                  # (we shall return to this in a
-                                                  # following section of this tutorial)
-
-        # Compile
-        train = theano.function(
-            inputs=[x, y],
-            outputs=[prediction, xent],
-            updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)))
-        predict = theano.function(inputs=[x], outputs=prediction)
-
-        # Train
-        for i in range(training_steps):
-            pred, err = train(D[0], D[1])
-
-        print("Final model:")
-        print(w.get_value(), b.get_value())
-        print("target values for D:", D[1])
-        print("prediction on D:", predict(D[0]))
-
-        # A user reported that this happened on the mailig list.
-        assert not numpy.isnan(b.get_value()).any()
-        assert not numpy.isnan(w.get_value()).any()
-
-
-class T_aliasing(unittest.TestCase):
-    # All tests here belog to
-    # http://deeplearning.net/software/theano/tutorial/aliasing.html
-    # Theano/doc/tutorial/aliasing.txt
-    # Any change you do here also add it to the tutorial !
-
-    def test_aliasing_1(self):
-
-        import numpy, theano
-        np_array = numpy.ones(2, dtype='float32')
-
-        s_default = theano.shared(np_array)
-        s_false   = theano.shared(np_array, borrow=False)
-        s_true    = theano.shared(np_array, borrow=True)
-
-        np_array += 1  # now it is an array of 2.0 s
-
-        assert numpy.all(s_default.get_value() == array([1.0, 1.0]))
-        assert numpy.all(s_false.get_value()   == array([1.0, 1.0]))
-        assert numpy.all(s_true.get_value()    == array([2.0, 2.0]))
-
-    def test_aliasing_2(self):
-
-        import numpy, theano
-        np_array = numpy.ones(2, dtype='float32')
-
-        s = theano.shared(np_array)
-
-        v_false = s.get_value(borrow=False)  # N.B. borrow default is False
-        v_true = s.get_value(borrow=True)
-
-        v_internal = s.get_value(borrow=True, return_internal_type=True)
-
-        s.set_value(
-            # some_inplace_fn
-            s.get_value(borrow=True).__imul__(2),
-            borrow=True)
-
-    def test_aliasing_3(self):
-
-        import theano, theano.tensor
-
-        x = theano.tensor.matrix()
-        y = 2*x
-        f = theano.function([theano.In(x, borrow=True)], theano.Out(y, borrow=True))
-
-
-class T_loading_and_saving(unittest.TestCase):
-    # All tests here belong to
-    # http://deeplearning.net/software/theano/tutorial/loading_and_saving.html
-    # Theano/doc/tutorial/loading_and_saving.txt
-    # Any change you do here also add it to the tutorial !
-
-    def test_loading_and_saving_1(self):
-
-        import six.moves.cPickle as pickle
-        import theano, theano.tensor
-
-        x = theano.tensor.matrix()
-        y = 2*x
-        my_obj =  theano.function([theano.In(x, borrow=True)]
-                                  , theano.Out(y, borrow=True))
-
-        mode_instance = theano.compile.mode.get_mode(None)
-        if not isinstance(mode_instance, theano.compile.debugmode.DebugMode):
-            # Here, we work in a temporary directory in order not to clutter
-            # the Theano repository. Code relative to creating that dir and
-            # removing it afterwards should _not_ be backported to the tutorial.
-            from tempfile import mkdtemp
-            origdir = os.getcwd()
-            tmpdir = None
-            try:
-                tmpdir = mkdtemp()
-                os.chdir(tmpdir)
-
-                f = open('obj.save', 'wb')
-                pickle.dump(my_obj, f, protocol=pickle.HIGHEST_PROTOCOL)
-                f.close()
-
-                f = open('obj.save', 'rb')
-                loaded_obj = pickle.load(f)
-                f.close()
-
-                obj1 = my_obj
-                obj2 = my_obj
-                obj3 = my_obj
-
-                f = open('objects.save', 'wb')
-                for obj in [obj1, obj2, obj3]:
-                    pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL)
-                f.close()
-
-                f = open('objects.save', 'rb')
-                loaded_objects = []
-                for i in range(3):
-                    loaded_objects.append(pickle.load(f))
-                f.close()
-            finally:
-                # Get back to the original dir, and delete the temporary one.
-                os.chdir(origdir)
-                if tmpdir is not None:
-                    shutil.rmtree(tmpdir)
-
-
-class T_modes(unittest.TestCase):
-    # All tests here belong to
-    # http://deeplearning.net/software/theano/tutorial/modes.html
-    # Theano/doc/tutorial/modes.txt
-    # Any change you do here also add it to the tutorial !
-
-    def test_modes_1(self):
-
-        x = T.dvector('x')
-
-        f = theano.function([x], 10*x, mode='DEBUG_MODE')
-
-        assert numpy.all(f([5]) == [50.])
-        assert numpy.all(f([0]) == [0.])
-        assert numpy.all(f([7]) == [70.])
-
-
-class T_using_gpu(unittest.TestCase):
-    # All tests here belog to
-    # http://deeplearning.net/software/theano/tutorial/using_gpu.html
-    # Theano/doc/tutorial/using_gpu.txt
-    # Any change you do here also add it to the tutorial !
-
-    def test_using_gpu_1(self):
-        # I'm checking if this compiles and runs
-        from theano import function, config, shared, sandbox
-        import theano.tensor as T
-        import numpy
-        import time
-
-        vlen = 10 * 30 * 70  # 10 x #cores x # threads per core
-        iters = 10
-
-        rng = numpy.random.RandomState(22)
-        x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
-        f = function([], T.exp(x))
-        # print f.maker.fgraph.toposort()
-        t0 = time.time()
-        for i in xrange(iters):
-            r = f()
-        t1 = time.time()
-        print('Looping %d times took' % iters, t1 - t0, 'seconds')
-        print('Result is', r)
-        if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
-            print('Used the cpu')
-        else:
-            print('Used the gpu')
-        if theano.config.device.find('gpu') > -1:
-            assert not numpy.any( [isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()])
-        else:
-            assert numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()])
-
-    def test_using_gpu_2(self):
-        if theano.config.device.find('gpu') > -1:
-
-            from theano import function, config, shared, sandbox
-            import theano.tensor as T
-            import numpy
-            import time
-
-            vlen = 10 * 30 * 70  # 10 x #cores x # threads per core
-            iters = 10
-
-            rng = numpy.random.RandomState(22)
-            x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
-            f = function([], sandbox.cuda.basic_ops.gpu_from_host(T.exp(x)))
-            # print f.maker.fgraph.toposort()
-            t0 = time.time()
-            for i in xrange(iters):
-                r = f()
-            t1 = time.time()
-            print('Looping %d times took' % iters, t1 - t0, 'seconds')
-            print('Result is', r)
-            print('Numpy result is', numpy.asarray(r))
-            if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
-                print('Used the cpu')
-            else:
-                print('Used the gpu')
-
-            assert not numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()])
-
-    def test_using_gpu_3(self):
-
-        if theano.config.device.find('gpu') > -1:
-
-            from theano import function, config, shared, sandbox, Out
-            import theano.tensor as T
-            import numpy
-            import time
-
-            vlen = 10 * 30 * 70  # 10 x #cores x # threads per core
-            iters = 10
-
-            rng = numpy.random.RandomState(22)
-            x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
-            f = function([],
-                    Out(sandbox.cuda.basic_ops.gpu_from_host(T.exp(x)),
-                        borrow=True))
-            # print f.maker.fgraph.toposort()
-            t0 = time.time()
-            for i in xrange(iters):
-                r = f()
-            t1 = time.time()
-            print('Looping %d times took' % iters, t1 - t0, 'seconds')
-            print('Result is', r)
-            print('Numpy result is', numpy.asarray(r))
-            if numpy.any([isinstance(x.op, T.Elemwise)
-                          for x in f.maker.fgraph.toposort()]):
-                print('Used the cpu')
-            else:
-                print('Used the gpu')
-
-            assert not numpy.any([isinstance(x.op, T.Elemwise)
-                                  for x in f.maker.fgraph.toposort()])
-
-    def test_using_gpu_pycudaop(self):
-        import theano.misc.pycuda_init
-        if not theano.misc.pycuda_init.pycuda_available:
-            raise SkipTest("Pycuda not installed. Skip test of theano op"
-                           " with pycuda code.")
-        from pycuda.compiler import SourceModule
-        import theano.sandbox.cuda as cuda
-
-        import theano.sandbox.cuda as cuda_ndarray
-        if not cuda_ndarray.cuda_available:
-            raise SkipTest('Optional package cuda disabled')
-
-        class PyCUDADoubleOp(theano.Op):
-            
-            __props__ = ()
-
-            def make_node(self, inp):
-                inp = cuda.basic_ops.gpu_contiguous(
-                    cuda.basic_ops.as_cuda_ndarray_variable(inp))
-                assert inp.dtype == "float32"
-                return theano.Apply(self, [inp], [inp.type()])
-
-            def make_thunk(self, node, storage_map, _, _2):
-                mod = SourceModule("""
-    __global__ void my_fct(float * i0, float * o0, int size) {
-    int i = blockIdx.x*blockDim.x + threadIdx.x;
-    if(i<size){
-        o0[i] = i0[i]*2;
-    }
-  }""")
-                pycuda_fct = mod.get_function("my_fct")
-                inputs = [storage_map[v] for v in node.inputs]
-                outputs = [storage_map[v] for v in node.outputs]
-
-                def thunk():
-                    z = outputs[0]
-                    if z[0] is None or z[0].shape != inputs[0][0].shape:
-                        z[0] = cuda.CudaNdarray.zeros(inputs[0][0].shape)
-                        grid = (int(numpy.ceil(inputs[0][0].size / 512.)), 1)
-                        pycuda_fct(inputs[0][0], z[0],
-                                   numpy.intc(inputs[0][0].size),
-                                   block=(512, 1, 1), grid=grid)
-                return thunk
-        x = theano.tensor.fmatrix()
-        f = theano.function([x], PyCUDADoubleOp()(x))
-        xv = numpy.ones((4, 5), dtype="float32")
-        assert numpy.allclose(f(xv), xv*2)
-        # print numpy.asarray(f(xv))
-
-
-# Used in T_fibby
-class Fibby(theano.Op):
-
-    """
-    An arbitrarily generalized Fibbonacci sequence
-    """
-    __props__ = ()
-
-    def make_node(self, x):
-        x_ = theano.tensor.as_tensor_variable(x)
-        assert x_.ndim == 1
-        return theano.Apply(self,
-            inputs=[x_],
-            outputs=[x_.type()])
-        # using x_.type() is dangerous, it copies x's broadcasting
-        # behaviour
-
-    def perform(self, node, inputs, output_storage):
-        x, = inputs
-        y = output_storage[0][0] = x.copy()
-        for i in range(2, len(x)):
-            y[i] = y[i - 1] * y[i - 2] + x[i]
-
-    def c_code(self, node, name, inames, onames, sub):
-        x, = inames
-        y, = onames
-        fail = sub['fail']
-        return """
-            Py_XDECREF(%(y)s);
-            %(y)s = (PyArrayObject*)PyArray_FromArray(
-                    %(x)s, 0, NPY_ARRAY_ENSURECOPY);
-            if (!%(y)s)
-                %(fail)s;
-            {//New scope needed to make compilation work
-                dtype_%(y)s * y = (dtype_%(y)s*)PyArray_DATA(%(y)s);
-                dtype_%(x)s * x = (dtype_%(x)s*)PyArray_DATA(%(x)s);
-                for (int i = 2; i < PyArray_DIMS(%(x)s)[0]; ++i)
-                    y[i] = y[i-1]*y[i-2] + x[i];
-            }
-        """ % locals()
-
-    def c_code_cache_version(self):
-        return (1,)
-
-
-class T_fibby(unittest.TestCase):
-    # All tests here belong to
-    # http://deeplearning.net/software/theano/extending/fibby.html
-    # Theano/doc/extending/fibby.txt
-    # Any change you do here also add it to the tutorial !
-
-    def test_fibby_1(self):
-
-        # The definition of class Fibby is done outside of the test,
-        # so the object can be pickled.
-        fibby = Fibby()
-
-        from theano.tensor.opt import (get_scalar_constant_value,
-                                       NotScalarConstantError)
-
-        # Remove any fibby(zeros(...))
-        @theano.tensor.opt.register_specialize
-        @theano.gof.local_optimizer([fibby])
-        def fibby_of_zero(node):
-            if node.op == fibby:
-                x = node.inputs[0]
-                try:
-                    if numpy.all(0 == get_scalar_constant_value(x)):
-                        return [x]
-                except NotScalarConstantError:
-                    pass
-
-        # Test it does not apply when not needed
-        x = T.dvector()
-        f = function([x], fibby(x))
-        # theano.printing.debugprint(f)
-
-        # We call the function to make sure it runs.
-        # If you run in DebugMode, it will compare the C and Python outputs.
-        f(numpy.random.rand(5))
-        topo = f.maker.fgraph.toposort()
-        assert len(topo) == 1
-        assert isinstance(topo[0].op, Fibby)
-
-        # Test that the optimization gets applied.
-        f_zero = function([], fibby(T.zeros([5])))
-        # theano.printing.debugprint(f_zero)
-
-        # If you run in DebugMode, it will compare the output before
-        # and after the optimization.
-        f_zero()
-
-        # Check that the optimization removes the Fibby Op.
-        # For security, the Theano memory interface ensures that the output
-        # of the function is always memory not aliased to the input.
-        # That is why there is a DeepCopyOp op.
-        topo = f_zero.maker.fgraph.toposort()
-        assert len(topo) == 1
-        assert isinstance(topo[0].op, theano.compile.ops.DeepCopyOp)
-
-
-class T_graphstructures(unittest.TestCase):
-    # All tests here belong to
-    # http://deeplearning.net/software/theano/extending/graphstructures.html
-    # Theano/doc/extending/graphstructures.txt
-    # Any change you do here also add it to the tutorial !
-
-    def test_graphstructures_1(self):
-
-        x = T.dmatrix('x')
-        y = T.dmatrix('y')
-        z = x + y
-
-        x = T.matrix('x')
-        y = T.matrix('y')
-        z = T.matrix('z')
-
-        # create 2 Variables (one for 'e', one intermediate for y*z)
-        # create 2 Apply instances (one for '+', one for '*')
-        e = x + y * z
-
-        from theano.tensor import add, mul, Apply, Variable, TensorType
-
-        # Instantiate a type that represents a matrix of doubles
-        float64_matrix = TensorType(dtype='float64',               # double
-                                    broadcastable=(False, False))  # matrix
-
-        # We make the Variable instances we need.
-        x = Variable(type=float64_matrix, name='x')
-        y = Variable(type=float64_matrix, name='y')
-        z = Variable(type=float64_matrix, name='z')
-
-        # This is the Variable that we want to symbolically represents y*z
-        mul_variable = Variable(type=float64_matrix)
-        assert mul_variable.owner is None
-
-        # Instantiate a symbolic multiplication
-        node_mul = Apply(op=mul,
-                         inputs=[y, z],
-                         outputs=[mul_variable])
-        # Fields 'owner' and 'index' are set by Apply
-        assert mul_variable.owner is node_mul
-        # 'index' is the position of mul_variable in mode_mul's outputs
-        assert mul_variable.index == 0
-
-        # This is the Variable that we want to symbolically represents x+(y*z)
-        add_variable = Variable(type=float64_matrix)
-        assert add_variable.owner is None
-
-        # Instantiate a symbolic addition
-        node_add = Apply(op=add,
-                         inputs=[x, mul_variable],
-                         outputs=[add_variable])
-        # Fields 'owner' and 'index' are set by Apply
-        assert add_variable.owner is node_add
-        assert add_variable.index == 0
-
-        e = add_variable
-
-        # We have access to x, y and z through pointers
-        assert e.owner.inputs[0] is x
-        assert e.owner.inputs[1] is mul_variable
-        assert e.owner.inputs[1].owner.inputs[0] is y
-        assert e.owner.inputs[1].owner.inputs[1] is z
-
-
-class T_scan(unittest.TestCase):
-    # All tests here belong to
-    # http://deeplearning.net/software/theano/tutorial/loop.html
-    # Theano/doc/tutorial/loop.txt
-    # Any change you do here also add it to the tutorial !
-
-    def test_elemwise(self):
-        # defining the tensor variables
-        X = T.matrix("X")
-        W = T.matrix("W")
-        b_sym = T.vector("b_sym")
-
-        results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym),
-                                       sequences=X)
-        compute_elementwise = theano.function(inputs=[X, W, b_sym],
-                                              outputs=[results])
-
-        # test values
-        x = numpy.eye(2, dtype=theano.config.floatX)
-        w = numpy.ones((2, 2), dtype=theano.config.floatX)
-        b = numpy.ones((2), dtype=theano.config.floatX)
-        b[1] = 2
-
-        print("Scan results:", compute_elementwise(x, w, b)[0])
-
-        # comparison with numpy
-        print("Numpy results:", numpy.tanh(x.dot(w) + b))
-
-    def test_sequence(self):
-        # define tensor variables
-        X = T.vector("X")
-        W = T.matrix("W")
-        b_sym = T.vector("b_sym")
-        U = T.matrix("U")
-        Y = T.matrix("Y")
-        V = T.matrix("V")
-        P = T.matrix("P")
-
-        results, updates = theano.scan(
-            lambda y, p, x_tm1: T.tanh(T.dot(x_tm1, W) +
-                                       T.dot(y, U) + T.dot(p, V)),
-            sequences=[Y, P[::-1]], outputs_info=[X])
-
-        compute_seq = theano.function(inputs=[X, W, Y, U, P, V],
-                                      outputs=[results])
-
-        # test values
-        x = numpy.zeros((2), dtype=theano.config.floatX)
-        x[1] = 1
-        w = numpy.ones((2, 2), dtype=theano.config.floatX)
-        y = numpy.ones((5, 2), dtype=theano.config.floatX)
-        y[0, :] = -3
-        u = numpy.ones((2, 2), dtype=theano.config.floatX)
-        p = numpy.ones((5, 2), dtype=theano.config.floatX)
-        p[0, :] = 3
-        v = numpy.ones((2, 2), dtype=theano.config.floatX)
-
-        print("Scan results", compute_seq(x, w, y, u, p, v)[0])
-
-        # comparison with numpy
-        x_res = numpy.zeros((5, 2), dtype=theano.config.floatX)
-        x_res[0] = numpy.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
-        for i in range(1, 5):
-            x_res[i] = numpy.tanh(x_res[i-1].dot(w) +
-                                  y[i].dot(u) + p[4-i].dot(v))
-
-        print("Numpy results:", x_res)
-
-    def test_norm(self):
-        # define tensor variable
-        X = T.matrix("X")
-        results, updates = theano.scan(lambda x_i: T.sqrt((x_i**2).sum()),
-                                       sequences=[X])
-        compute_norm_lines = theano.function(inputs=[X], outputs=[results])
-
-        results, updates = theano.scan(lambda x_i: T.sqrt((x_i**2).sum()),
-                                       sequences=[X.T])
-        compute_norm_cols = theano.function(inputs=[X], outputs=[results])
-
-        # test value
-        x = numpy.diag(numpy.arange(1, 6, dtype=theano.config.floatX), 1)
-        print("Scan results:", compute_norm_lines(x)[0], \
-                            compute_norm_cols(x)[0])
-
-        # comparison with numpy
-        print("Numpy results:", numpy.sqrt((x**2).sum(1)), \
-                            numpy.sqrt((x**2).sum(0)))
-
-    def test_trace(self):
-        # define tensor variable
-        X = T.matrix("X")
-        results, updates = theano.scan(lambda i, j, t_f: T.cast(X[i, j] +
-                                                                t_f, theano.config.floatX),
-                                       sequences=[T.arange(X.shape[0]),
-                                                  T.arange(X.shape[1])],
-                                       outputs_info=numpy.asarray(
-                                           0., dtype=theano.config.floatX))
-
-        result = results[-1]
-        compute_trace = theano.function(inputs=[X], outputs=[result])
-
-        # test value
-        x = numpy.eye(5, dtype=theano.config.floatX)
-        x[0] = numpy.arange(5, dtype=theano.config.floatX)
-        print("Scan results:", compute_trace(x)[0])
-
-        # comparison with numpy
-        print("Numpy results:", numpy.diagonal(x).sum())
-
-    def test_taps(self):
-        # define tensor variables
-        X = T.matrix("X")
-        W = T.matrix("W")
-        b_sym = T.vector("b_sym")
-        U = T.matrix("U")
-        V = T.matrix("V")
-        n_sym = T.iscalar("n_sym")
-
-        results, updates = theano.scan(
-            lambda x_tm2, x_tm1: T.dot(x_tm2, U) + T.dot(x_tm1, V) + T.tanh(T.dot(x_tm1, W) + b_sym),
-            n_steps=n_sym,
-            outputs_info=[dict(initial=X, taps=[-2, -1])])
-
-        compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym],
-                                       outputs=[results])
-
-        # test values
-        x = numpy.zeros((2, 2), dtype=theano.config.floatX)
-        # the initial value must be able to return x[-2]
-        x[1, 1] = 1
-        w = 0.5 * numpy.ones((2, 2), dtype=theano.config.floatX)
-        u = 0.5 * (numpy.ones((2, 2), dtype=theano.config.floatX) -
-                   numpy.eye(2, dtype=theano.config.floatX))
-        v = 0.5 * numpy.ones((2, 2), dtype=theano.config.floatX)
-        n = 10
-        b = numpy.ones((2), dtype=theano.config.floatX)
-
-        print("Scan results:", compute_seq2(x, u, v, w, b, n))
-
-        # comparison with numpy
-        x_res = numpy.zeros((10, 2), dtype=theano.config.floatX)
-        x_res[0] = x[0].dot(u) + x[1].dot(v) + numpy.tanh(x[1].dot(w) + b)
-        x_res[1] = x[1].dot(u) + x_res[0].dot(v) \
-                        + numpy.tanh(x_res[0].dot(w) + b)
-        x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) \
-                   + numpy.tanh(x_res[1].dot(w) + b)
-        for i in range(2, 10):
-            x_res[i] = (x_res[i-2].dot(u) + x_res[i-1].dot(v) +
-                        numpy.tanh(x_res[i-1].dot(w) + b))
-
-        print("Numpy results:", x_res)
-
-    def test_jacobian(self):
-        # define tensor variables
-        v = T.vector()
-        A = T.matrix()
-        y = T.tanh(T.dot(v, A))
-        results, updates = theano.scan(lambda i: T.grad(y[i], v),
-                                       sequences=[T.arange(y.shape[0])])
-        compute_jac_t = theano.function([A, v], [results],
-                                        allow_input_downcast=True)  # shape (d_out, d_in)
-
-        # test values
-        x = numpy.eye(5)[0]
-        w = numpy.eye(5, 3)
-        w[2] = numpy.ones((3))
-        print("Scan results:", compute_jac_t(w, x)[0])
-
-        # compare with numpy
-        print("Numpy results:", ((1 - numpy.tanh(x.dot(w))**2)*w).T)
-
-    def test_accumulator(self):
-        # define shared variables
-        k = theano.shared(0)
-        n_sym = T.iscalar("n_sym")
-
-        results, updates = theano.scan(lambda: {k: (k + 1)}, n_steps=n_sym)
-        accumulator = theano.function([n_sym], [], updates=updates,
-                                      allow_input_downcast=True)
-
-        print("Before 5 steps:", k.get_value())
-        accumulator(5)
-        print("After 5 steps:", k.get_value())
-
-    def test_random(self):
-        # define tensor variables
-        X = T.matrix("X")
-        W = T.matrix("W")
-        b_sym = T.vector("b_sym")
-
-        # define shared random stream
-        trng = T.shared_randomstreams.RandomStreams(1234)
-        d = trng.binomial(size=W[1].shape)
-
-        results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym) * d,
-                                       sequences=X)
-        compute_with_bnoise = theano.function(inputs=[X, W, b_sym],
-                                              outputs=[results],
-                                              updates=updates,
-                                              allow_input_downcast=True)
-        x = numpy.eye(10, 2)
-        w = numpy.ones((2, 2))
-        b = numpy.ones((2))
-
-        print(compute_with_bnoise(x, w, b))
-
-
-class T_typedlist(unittest.TestCase):
-    # All tests here belong to
-    # http://deeplearning.net/software/theano/library/typed_list.html
-    # Theano/doc/library/typed_list.txt
-    # Any change you do here must also be done in the documentation !
-
-    def test_typedlist_basic(self):
-        import theano.typed_list
-
-        tl = theano.typed_list.TypedListType(theano.tensor.fvector)()
-        v = theano.tensor.fvector()
-        o = theano.typed_list.append(tl, v)
-        f = theano.function([tl, v], o)
-        output = f([[1, 2, 3], [4, 5]], [2])
-
-        # Validate ouput is as expected
-        expected_output = [numpy.array([1, 2, 3], dtype="float32"),
-                           numpy.array([4, 5], dtype="float32"),
-                           numpy.array([2], dtype="float32")]
-
-        assert len(output) == len(expected_output)
-        for i in range(len(output)):
-            utt.assert_allclose(output[i], expected_output[i])
-
-    def test_typedlist_with_scan(self):
-        import theano.typed_list
-
-        a = theano.typed_list.TypedListType(theano.tensor.fvector)()
-        l = theano.typed_list.length(a)
-        s, _ = theano.scan(fn=lambda i, tl: tl[i].sum(),
-                        non_sequences=[a],
-                        sequences=[theano.tensor.arange(l, dtype='int64')])
-
-        f = theano.function([a], s)
-        output = f([[1, 2, 3], [4, 5]])
-
-        # Validate ouput is as expected
-        expected_output = numpy.array([6, 9], dtype="float32")
-        utt.assert_allclose(output, expected_output)