提交 752f1f73 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3284 from abergeron/doctest

Use the testcode facility to test code examples directly
...@@ -312,8 +312,7 @@ Pretty Printing ...@@ -312,8 +312,7 @@ Pretty Printing
~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~
>>> theano.printing.pprint(prediction) # doctest: +NORMALIZE_WHITESPACE >>> theano.printing.pprint(prediction) # doctest: +NORMALIZE_WHITESPACE
'gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))), 'gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))), TensorConstant{0.5})'
TensorConstant{0.5})'
Debug Print Debug Print
...@@ -321,7 +320,7 @@ Debug Print ...@@ -321,7 +320,7 @@ Debug Print
The graph before optimization: The graph before optimization:
>>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE >>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE, +SKIP
Elemwise{gt,no_inplace} [@A] '' Elemwise{gt,no_inplace} [@A] ''
|Elemwise{true_div,no_inplace} [@B] '' |Elemwise{true_div,no_inplace} [@B] ''
| |DimShuffle{x} [@C] '' | |DimShuffle{x} [@C] ''
...@@ -342,7 +341,7 @@ The graph before optimization: ...@@ -342,7 +341,7 @@ The graph before optimization:
The graph after optimization: The graph after optimization:
>>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE >>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE, +SKIP
Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] '' 4 Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] '' 4
|CGemv{inplace} [@B] '' 3 |CGemv{inplace} [@B] '' 3
| |Alloc [@C] '' 2 | |Alloc [@C] '' 2
...@@ -364,7 +363,7 @@ Picture Printing of Graphs ...@@ -364,7 +363,7 @@ Picture Printing of Graphs
The graph before optimization: The graph before optimization:
>>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True) >>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True) # doctest: +SKIP
The output file is available at pics/logreg_pydotprint_prediction.png The output file is available at pics/logreg_pydotprint_prediction.png
.. image:: ./pics/logreg_pydotprint_prediction.png .. image:: ./pics/logreg_pydotprint_prediction.png
...@@ -372,7 +371,7 @@ The output file is available at pics/logreg_pydotprint_prediction.png ...@@ -372,7 +371,7 @@ The output file is available at pics/logreg_pydotprint_prediction.png
The graph after optimization: The graph after optimization:
>>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True) >>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True) # doctest: +SKIP
The output file is available at pics/logreg_pydotprint_predict.png The output file is available at pics/logreg_pydotprint_predict.png
.. image:: ./pics/logreg_pydotprint_predict.png .. image:: ./pics/logreg_pydotprint_predict.png
...@@ -380,7 +379,7 @@ The output file is available at pics/logreg_pydotprint_predict.png ...@@ -380,7 +379,7 @@ The output file is available at pics/logreg_pydotprint_predict.png
The optimized training graph: The optimized training graph:
>>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True) >>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True) # doctest: +SKIP
The output file is available at pics/logreg_pydotprint_train.png The output file is available at pics/logreg_pydotprint_train.png
.. image:: ./pics/logreg_pydotprint_train.png .. image:: ./pics/logreg_pydotprint_train.png
......
...@@ -56,7 +56,8 @@ Simple example ...@@ -56,7 +56,8 @@ Simple example
>>> a = theano.tensor.vector("a") # declare symbolic variable >>> a = theano.tensor.vector("a") # declare symbolic variable
>>> b = a + a**10 # build symbolic expression >>> b = a + a**10 # build symbolic expression
>>> f = theano.function([a], b) # compile function >>> f = theano.function([a], b) # compile function
>>> print f([0,1,2]) # prints `array([0,2,1026])` >>> f([0,1,2])
array([ 0., 2., 1026.])
====================================================== ===================================================== ====================================================== =====================================================
...@@ -332,7 +333,7 @@ Details regarding symbolic broadcasting... ...@@ -332,7 +333,7 @@ Details regarding symbolic broadcasting...
Differentiation details Differentiation details
----------------------- -----------------------
>>> gw,gb = T.grad(cost, [w,b]) >>> gw,gb = T.grad(cost, [w,b]) # doctest: +SKIP
* T.grad works symbolically: takes and returns a Theano variable * T.grad works symbolically: takes and returns a Theano variable
......
...@@ -148,8 +148,7 @@ Pretty Printing ...@@ -148,8 +148,7 @@ Pretty Printing
~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~
>>> theano.printing.pprint(prediction) # doctest: +NORMALIZE_WHITESPACE >>> theano.printing.pprint(prediction) # doctest: +NORMALIZE_WHITESPACE
'gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))), 'gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))), TensorConstant{0.5})'
TensorConstant{0.5})'
Debug Print Debug Print
...@@ -157,8 +156,11 @@ Debug Print ...@@ -157,8 +156,11 @@ Debug Print
The graph before optimization: The graph before optimization:
>>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE .. doctest::
Elemwise{gt,no_inplace} [@A] '' :options: +SKIP
>>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE
Elemwise{gt,no_inplace} [@A] ''
|Elemwise{true_div,no_inplace} [@B] '' |Elemwise{true_div,no_inplace} [@B] ''
| |DimShuffle{x} [@C] '' | |DimShuffle{x} [@C] ''
| | |TensorConstant{1} [@D] | | |TensorConstant{1} [@D]
...@@ -178,20 +180,23 @@ The graph before optimization: ...@@ -178,20 +180,23 @@ The graph before optimization:
The graph after optimization: The graph after optimization:
>>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE .. doctest::
Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] '' 4 :options: +SKIP
|CGemv{inplace} [@B] '' 3
| |Alloc [@C] '' 2 >>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE
| | |TensorConstant{0.0} [@D] Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] '' 4
| | |Shape_i{0} [@E] '' 1 |CGemv{inplace} [@B] '' 3
| | |x [@F] | |Alloc [@C] '' 2
| |TensorConstant{1.0} [@G] | | |TensorConstant{0.0} [@D]
| |x [@F] | | |Shape_i{0} [@E] '' 1
| |w [@H] | | |x [@F]
| |TensorConstant{0.0} [@D] | |TensorConstant{1.0} [@G]
|InplaceDimShuffle{x} [@I] '' 0 | |x [@F]
| |b [@J] | |w [@H]
|TensorConstant{(1,) of 0.5} [@K] | |TensorConstant{0.0} [@D]
|InplaceDimShuffle{x} [@I] '' 0
| |b [@J]
|TensorConstant{(1,) of 0.5} [@K]
Picture Printing of Graphs Picture Printing of Graphs
...@@ -201,24 +206,33 @@ Picture Printing of Graphs ...@@ -201,24 +206,33 @@ Picture Printing of Graphs
The graph before optimization: The graph before optimization:
>>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True) .. doctest::
The output file is available at pics/logreg_pydotprint_prediction.png :options: +SKIP
>>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True)
The output file is available at pics/logreg_pydotprint_prediction.png
.. image:: ./pics/logreg_pydotprint_prediction.png .. image:: ./pics/logreg_pydotprint_prediction.png
:width: 800 px :width: 800 px
The graph after optimization: The graph after optimization:
>>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True) .. doctest::
The output file is available at pics/logreg_pydotprint_predict.png :options: +SKIP
>>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True)
The output file is available at pics/logreg_pydotprint_predict.png
.. image:: ./pics/logreg_pydotprint_predict.png .. image:: ./pics/logreg_pydotprint_predict.png
:width: 800 px :width: 800 px
The optimized training graph: The optimized training graph:
>>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True) .. doctest::
The output file is available at pics/logreg_pydotprint_train.png :options: +SKIP
>>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True)
The output file is available at pics/logreg_pydotprint_train.png
.. image:: ./pics/logreg_pydotprint_train.png .. image:: ./pics/logreg_pydotprint_train.png
:width: 1500 px :width: 1500 px
......
...@@ -54,8 +54,8 @@ Simple example ...@@ -54,8 +54,8 @@ Simple example
>>> a = theano.tensor.vector("a") # declare symbolic variable >>> a = theano.tensor.vector("a") # declare symbolic variable
>>> b = a + a ** 10 # build symbolic expression >>> b = a + a ** 10 # build symbolic expression
>>> f = theano.function([a], b) # compile function >>> f = theano.function([a], b) # compile function
>>> print f([0, 1, 2]) # prints `array([0, 2, 1026])` >>> f([0, 1, 2])
array([ 0., 2., 1026.])
====================================================== ===================================================== ====================================================== =====================================================
Unoptimized graph Optimized graph Unoptimized graph Optimized graph
...@@ -118,7 +118,7 @@ Where are those optimization applied? ...@@ -118,7 +118,7 @@ Where are those optimization applied?
# Log(1-sigmoid(var)) -> -sigmoid(var) # Log(1-sigmoid(var)) -> -sigmoid(var)
prediction = p_1 > 0.5 prediction = p_1 > 0.5
cost = xent.mean() + 0.01 * (w ** 2).sum() cost = xent.mean() + 0.01 * (w ** 2).sum()
gw,gb = tt.grad(cost, [w, b]) gw, gb = tt.grad(cost, [w, b])
train = theano.function( train = theano.function(
inputs=[x, y], inputs=[x, y],
...@@ -294,7 +294,7 @@ Details regarding symbolic broadcasting... ...@@ -294,7 +294,7 @@ Details regarding symbolic broadcasting...
Differentiation details Differentiation details
----------------------- -----------------------
>>> gw,gb = tt.grad(cost, [w,b]) >>> gw, gb = tt.grad(cost, [w,b]) # doctest: +SKIP
* tt.grad works symbolically: takes and returns a Theano variable * tt.grad works symbolically: takes and returns a Theano variable
......
...@@ -253,10 +253,12 @@ We will be defining C code for the multiplication Op on doubles. ...@@ -253,10 +253,12 @@ We will be defining C code for the multiplication Op on doubles.
**c_code** **c_code**
.. If you modify this code, also change : .. testsetup::
.. theano/tests/test_tutorial.py:T_extending.test_extending_2
.. code-block:: python from theano import Op
mul = Op()
.. testcode::
def c_code(node, name, input_names, output_names, sub): def c_code(node, name, input_names, output_names, sub):
x_name, y_name = input_names[0], input_names[1] x_name, y_name = input_names[0], input_names[1]
...@@ -298,11 +300,7 @@ As before, I tried to organize the code in order to minimize ...@@ -298,11 +300,7 @@ As before, I tried to organize the code in order to minimize
repetition. You can check that mul produces the same C code in this repetition. You can check that mul produces the same C code in this
version that it produces in the code I gave above. version that it produces in the code I gave above.
.. testcode::
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_extending.test_extending_2
.. code-block:: python
from theano import gof from theano import gof
......
...@@ -156,12 +156,14 @@ out: ...@@ -156,12 +156,14 @@ out:
Defining the methods Defining the methods
==================== ====================
.. If you modify this code, also change : .. testsetup::
.. theano/tests/test_tutorial.py:T_extending.test_extending_2
import theano
double = theano.Type()
**c_declare** **c_declare**
.. code-block:: python .. testcode::
def c_declare(name, sub): def c_declare(name, sub):
return """ return """
...@@ -189,12 +191,9 @@ your Type. If you wish people to develop operations that make use of ...@@ -189,12 +191,9 @@ your Type. If you wish people to develop operations that make use of
it, it's best to publish it somewhere. it, it's best to publish it somewhere.
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_extending.test_extending_2
**c_init** **c_init**
.. code-block:: python .. testcode::
def c_init(name, sub): def c_init(name, sub):
return """ return """
...@@ -218,12 +217,9 @@ you should only assume that either ``c_init`` or ``c_extract`` has been ...@@ -218,12 +217,9 @@ you should only assume that either ``c_init`` or ``c_extract`` has been
called, without knowing for sure which of the two. called, without knowing for sure which of the two.
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_extending.test_extending_2
**c_extract** **c_extract**
.. code-block:: python .. testcode::
def c_extract(name, sub): def c_extract(name, sub):
return """ return """
...@@ -257,12 +253,9 @@ using the ``PyFloat_AsDouble`` function (yet again provided by CPython's C ...@@ -257,12 +253,9 @@ using the ``PyFloat_AsDouble`` function (yet again provided by CPython's C
API) and we put it in our double variable that we declared previously. API) and we put it in our double variable that we declared previously.
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_extending.test_extending_2
**c_sync** **c_sync**
.. code-block:: python .. testcode::
def c_sync(name, sub): def c_sync(name, sub):
return """ return """
...@@ -319,12 +312,9 @@ than sorry. ...@@ -319,12 +312,9 @@ than sorry.
do *NOT* decrease its reference count! do *NOT* decrease its reference count!
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_extending.test_extending_2
**c_cleanup** **c_cleanup**
.. code-block:: python .. testcode::
def c_cleanup(name, sub): def c_cleanup(name, sub):
return "" return ""
...@@ -370,14 +360,8 @@ depends on the the relationship between Python and C with respect to ...@@ -370,14 +360,8 @@ depends on the the relationship between Python and C with respect to
that Variable. For instance, imagine you define the following function that Variable. For instance, imagine you define the following function
and call it: and call it:
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_extending.test_extending_2
.. code-block:: python .. code-block:: python
from theano import function
from theano.tensor import double
x, y, z = double('x'), double('y'), double('z') x, y, z = double('x'), double('y'), double('z')
a = add(x, y) a = add(x, y)
b = mul(a, z) b = mul(a, z)
...@@ -459,10 +443,7 @@ multiplication block. ...@@ -459,10 +443,7 @@ multiplication block.
Final version Final version
============= =============
.. If you modify this code, also change : .. testcode::
.. theano/tests/test_tutorial.py:T_extending.test_extending_2
.. code-block:: python
from theano import gof from theano import gof
......
...@@ -7,7 +7,7 @@ So suppose you have looked through the library documentation and you don't see a ...@@ -7,7 +7,7 @@ So suppose you have looked through the library documentation and you don't see a
function that does what you want. function that does what you want.
If you can implement something in terms of existing Ops, you should do that. If you can implement something in terms of existing Ops, you should do that.
Odds are your function that uses existing Theano expressions is short, Odds are your function that uses existing Theano expressions is short,
has no bugs, and potentially profits from optimizations that have already been has no bugs, and potentially profits from optimizations that have already been
implemented. implemented.
...@@ -18,7 +18,7 @@ Theano was designed to make it easy to add new Ops, Types, and Optimizations. ...@@ -18,7 +18,7 @@ Theano was designed to make it easy to add new Ops, Types, and Optimizations.
This section walks through a non-trivial example Op that does something pretty This section walks through a non-trivial example Op that does something pretty
weird and unrealistic, that is hard to express with existing Ops. weird and unrealistic, that is hard to express with existing Ops.
(Technically, we could use ``Scan`` to implement the Op we're about to describe, (Technically, we could use ``Scan`` to implement the Op we're about to describe,
but we ignore that possibility for the sake of example.) but we ignore that possibility for the sake of example.)
The following code works, but important error-checking has been omitted for The following code works, but important error-checking has been omitted for
...@@ -26,53 +26,52 @@ clarity. For example, when you write C code that assumes memory is contiguous, ...@@ -26,53 +26,52 @@ clarity. For example, when you write C code that assumes memory is contiguous,
you should check the strides and alignment. you should check the strides and alignment.
.. If you modify this code, also change : .. testcode::
.. theano/tests/test_tutorial.py:T_fibby.test_fibby_1
import theano
.. code-block:: python
class Fibby(theano.Op):
class Fibby(theano.Op): """
""" An arbitrarily generalized Fibbonacci sequence
An arbitrarily generalized Fibbonacci sequence """
""" __props__ = ()
__props__ = ()
def make_node(self, x):
def make_node(self, x): x_ = tensor.as_tensor_variable(x)
x_ = tensor.as_tensor_variable(x) assert x_.ndim == 1
assert x_.ndim == 1 return theano.Apply(self,
return theano.Apply(self, inputs=[x_],
inputs=[x_], outputs=[x_.type()])
outputs=[x_.type()]) # using x_.type() is dangerous, it copies x's broadcasting behaviour
# using x_.type() is dangerous, it copies x's broadcasting behaviour
def perform(self, node, inputs, output_storage):
def perform(self, node, inputs, output_storage): x, = inputs
x, = inputs y = output_storage[0][0] = x.copy()
y = output_storage[0][0] = x.copy() for i in range(2, len(x)):
for i in range(2, len(x)): y[i] = y[i-1] * y[i-2] + x[i]
y[i] = y[i-1] * y[i-2] + x[i]
def c_code(self, node, name, inames, onames, sub):
def c_code(self, node, name, inames, onames, sub): x, = inames
x, = inames y, = onames
y, = onames fail = sub['fail']
fail = sub['fail'] return """
return """ Py_XDECREF(%(y)s);
Py_XDECREF(%(y)s); %(y)s = (PyArrayObject*)PyArray_FromArray(
%(y)s = (PyArrayObject*)PyArray_FromArray( %(x)s, 0, NPY_ARRAY_ENSURECOPY);
%(x)s, 0, NPY_ARRAY_ENSURECOPY); if (!%(y)s)
if (!%(y)s) %(fail)s;
%(fail)s; {//New scope needed to make compilation work
{//New scope needed to make compilation work dtype_%(y)s * y = (dtype_%(y)s*)PyArray_DATA(%(y)s);
dtype_%(y)s * y = (dtype_%(y)s*)PyArray_DATA(%(y)s); dtype_%(x)s * x = (dtype_%(x)s*)PyArray_DATA(%(x)s);
dtype_%(x)s * x = (dtype_%(x)s*)PyArray_DATA(%(x)s); for (int i = 2; i < PyArray_DIMS(%(x)s)[0]; ++i)
for (int i = 2; i < PyArray_DIMS(%(x)s)[0]; ++i) y[i] = y[i-1]*y[i-2] + x[i];
y[i] = y[i-1]*y[i-2] + x[i]; }
} """ % locals()
""" % locals()
def c_code_cache_version(self):
def c_code_cache_version(self): return (1,)
return (1,)
fibby = Fibby()
fibby = Fibby()
At a high level, the code fragment declares a class (``Fibby``) and then At a high level, the code fragment declares a class (``Fibby``) and then
creates one instance of it (``fibby``). creates one instance of it (``fibby``).
...@@ -80,7 +79,7 @@ We often gloss over this distinction, but will be precise here: ...@@ -80,7 +79,7 @@ We often gloss over this distinction, but will be precise here:
``fibby`` (the instance) is an Op, not ``Fibby`` (the class which is a subclass of ``theano.Op``). ``fibby`` (the instance) is an Op, not ``Fibby`` (the class which is a subclass of ``theano.Op``).
You can call ``fibby(tensor.vector())`` on a Variable to build an You can call ``fibby(tensor.vector())`` on a Variable to build an
expression, and in the expression there will be a ``.op`` attribute that refers expression, and in the expression there will be a ``.op`` attribute that refers
to ``fibby``. to ``fibby``.
The first two methods in the Op are relatively boilerplate: ``__eq__`` and ``__hash__``. The first two methods in the Op are relatively boilerplate: ``__eq__`` and ``__hash__``.
When two Ops are equal, Theano will merge their outputs if they are applied to the same inputs. When two Ops are equal, Theano will merge their outputs if they are applied to the same inputs.
...@@ -108,14 +107,14 @@ see wrong calculation. ...@@ -108,14 +107,14 @@ see wrong calculation.
The ``make_node`` method creates a node to be included in the expression graph. The ``make_node`` method creates a node to be included in the expression graph.
It runs when we apply our Op (``fibby``) to Variable (``x``), as in ``fibby(tensor.vector())``. It runs when we apply our Op (``fibby``) to Variable (``x``), as in ``fibby(tensor.vector())``.
When an Op has multiple inputs, their order in the inputs argument to ``Apply`` When an Op has multiple inputs, their order in the inputs argument to ``Apply``
is important: Theano will call ``make_node(*inputs)`` to copy the graph, is important: Theano will call ``make_node(*inputs)`` to copy the graph,
so it is important not to change the semantics of the expression by changing the argument order. so it is important not to change the semantics of the expression by changing the argument order.
All the ``inputs`` and ``outputs`` arguments to ``Apply`` must be Variables. All the ``inputs`` and ``outputs`` arguments to ``Apply`` must be Variables.
A common and easy way to ensure inputs are variables is to run them through A common and easy way to ensure inputs are variables is to run them through
``as_tensor_variable``. ``as_tensor_variable``.
This function leaves TensorType variables alone, raises an This function leaves TensorType variables alone, raises an
error for non-TensorType variables, and copies any ``numpy.ndarray`` into the error for non-TensorType variables, and copies any ``numpy.ndarray`` into the
storage for a TensorType Constant. storage for a TensorType Constant.
...@@ -123,7 +122,7 @@ The ``make_node`` method dictates the appropriate Type for all output ...@@ -123,7 +122,7 @@ The ``make_node`` method dictates the appropriate Type for all output
variables. variables.
The ``perform`` method implements the Op's mathematical logic in Python. The ``perform`` method implements the Op's mathematical logic in Python.
The inputs (here ``x``) are passed by value, The inputs (here ``x``) are passed by value,
but a single output is returned indirectly as the first element of but a single output is returned indirectly as the first element of
single-element lists. If ``fibby`` had a second output, it would be stored single-element lists. If ``fibby`` had a second output, it would be stored
in ``output_storage[1][0]``. in ``output_storage[1][0]``.
...@@ -143,7 +142,7 @@ the correct size for the output. This is essentially simulating the line ...@@ -143,7 +142,7 @@ the correct size for the output. This is essentially simulating the line
``y = x.copy()``. ``y = x.copy()``.
.. code-block:: python .. code-block:: c
Py_XDECREF(%(y)s); Py_XDECREF(%(y)s);
%(y)s = (PyArrayObject*)PyArray_FromArray( %(y)s = (PyArrayObject*)PyArray_FromArray(
...@@ -153,7 +152,7 @@ The first line reduces the reference count of the data that y originally ...@@ -153,7 +152,7 @@ The first line reduces the reference count of the data that y originally
pointed to. The second line allocates the new data and makes y point to it. pointed to. The second line allocates the new data and makes y point to it.
In C code for a theano op, numpy arrays are represented as ``PyArrayObject`` C In C code for a theano op, numpy arrays are represented as ``PyArrayObject`` C
structs. This is part of the numpy/scipy C API documented at structs. This is part of the numpy/scipy C API documented at
http://docs.scipy.org/doc/numpy/reference/c-api.types-and-structures.html http://docs.scipy.org/doc/numpy/reference/c-api.types-and-structures.html
TODO: NEEDS MORE EXPLANATION. TODO: NEEDS MORE EXPLANATION.
...@@ -161,7 +160,7 @@ TODO: NEEDS MORE EXPLANATION. ...@@ -161,7 +160,7 @@ TODO: NEEDS MORE EXPLANATION.
There are some important restrictions to remember when implementing an Op. There are some important restrictions to remember when implementing an Op.
Unless your Op correctly defines a ``view_map`` attribute, the ``perform`` and ``c_code`` must not Unless your Op correctly defines a ``view_map`` attribute, the ``perform`` and ``c_code`` must not
produce outputs whose memory is aliased to any input (technically, if changing the produce outputs whose memory is aliased to any input (technically, if changing the
output could change the input object in some sense, they are aliased). output could change the input object in some sense, they are aliased).
Unless your Op correctly defines a ``destroy_map`` attribute, ``perform`` and ``c_code`` must Unless your Op correctly defines a ``destroy_map`` attribute, ``perform`` and ``c_code`` must
not modify any of the inputs. not modify any of the inputs.
...@@ -206,21 +205,21 @@ TODO: talk about OPTIMIZATION STAGES ...@@ -206,21 +205,21 @@ TODO: talk about OPTIMIZATION STAGES
.. If you modify this code, also change : .. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_fibby.test_fibby_1 .. theano/tests/test_tutorial.py:T_fibby.test_fibby_1
.. code-block:: python .. testcode::
from theano.tensor.opt import get_scalar_constant_value, NotScalarConstantError from theano.tensor.opt import get_scalar_constant_value, NotScalarConstantError
# Remove any fibby(zeros(...)) # Remove any fibby(zeros(...))
@theano.tensor.opt.register_specialize @theano.tensor.opt.register_specialize
@theano.gof.local_optimizer([fibby]) @theano.gof.local_optimizer([fibby])
def fibby_of_zero(node): def fibby_of_zero(node):
if node.op == fibby: if node.op == fibby:
x = node.inputs[0] x = node.inputs[0]
try: try:
if numpy.all(0 == get_scalar_constant_value(x)): if numpy.all(0 == get_scalar_constant_value(x)):
return [x] return [x]
except NotScalarConstantError: except NotScalarConstantError:
pass pass
The ``register_specialize`` decorator is what activates our optimization, and The ``register_specialize`` decorator is what activates our optimization, and
tells Theano to use it in the specialization stage. tells Theano to use it in the specialization stage.
...@@ -237,32 +236,35 @@ Test the optimization ...@@ -237,32 +236,35 @@ Test the optimization
Here is some code to test that the optimization is applied only when needed. Here is some code to test that the optimization is applied only when needed.
.. code-block:: python .. testcode::
# Test it does not apply when not needed import numpy
x = T.dvector() import theano.tensor as T
f = function([x], fibby(x)) from theano import function
#theano.printing.debugprint(f) from theano import tensor
# We call the function to make sure it runs. # Test it does not apply when not needed
# If you run in DebugMode, it will compare the C and Python outputs. x = T.dvector()
f(numpy.random.rand(5)) f = function([x], fibby(x))
topo = f.maker.fgraph.toposort()
assert len(topo) == 1 # We call the function to make sure it runs.
assert isinstance(topo[0].op, Fibby) # If you run in DebugMode, it will compare the C and Python outputs.
f(numpy.random.rand(5))
# Test that the optimization gets applied. topo = f.maker.fgraph.toposort()
f_zero = function([], fibby(T.zeros([5]))) assert len(topo) == 1
#theano.printing.debugprint(f_zero) assert isinstance(topo[0].op, Fibby)
# If you run in DebugMode, it will compare the output before # Test that the optimization gets applied.
# and after the optimization. f_zero = function([], fibby(T.zeros([5])))
f_zero()
# If you run in DebugMode, it will compare the output before
# Check that the optimization removes the Fibby Op. # and after the optimization.
# For security, the Theano memory interface ensures that the output f_zero()
# of the function is always memory not aliased to the input.
# That is why there is a DeepCopyOp op. # Check that the optimization removes the Fibby Op.
topo = f_zero.maker.fgraph.toposort() # For security, the Theano memory interface ensures that the output
assert len(topo) == 1 # of the function is always memory not aliased to the input.
assert isinstance(topo[0].op, theano.compile.ops.DeepCopyOp) # That is why there is a DeepCopyOp op.
topo = f_zero.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, theano.compile.ops.DeepCopyOp)
...@@ -18,11 +18,13 @@ should help you understand how these pieces fit together: ...@@ -18,11 +18,13 @@ should help you understand how these pieces fit together:
**Code** **Code**
.. code-block:: python .. testcode::
x = dmatrix('x') import theano.tensor as T
y = dmatrix('y')
z = x + y x = T.dmatrix('x')
y = T.dmatrix('y')
z = x + y
**Diagram** **Diagram**
...@@ -69,73 +71,67 @@ without any shortcuts, that will make the graph construction very explicit. ...@@ -69,73 +71,67 @@ without any shortcuts, that will make the graph construction very explicit.
This is what you would normally type: This is what you would normally type:
.. If you modify this code, also change : .. testcode::
.. theano/tests/test_tutorial.py:T_graphstructures.test_graphstructures_1
.. code-block:: python # create 3 Variables with owner = None
x = T.matrix('x')
y = T.matrix('y')
z = T.matrix('z')
# create 3 Variables with owner = None # create 2 Variables (one for 'e', one intermediate for y*z)
x = T.matrix('x') # create 2 Apply instances (one for '+', one for '*')
y = T.matrix('y') e = x + y * z
z = T.matrix('z')
# create 2 Variables (one for 'e', one intermediate for y*z)
# create 2 Apply instances (one for '+', one for '*')
e = x + y * z
**Long example** **Long example**
This is what you would type to build the graph explicitly: This is what you would type to build the graph explicitly:
.. If you modify this code, also change : .. testcode::
.. theano/tests/test_tutorial.py:T_graphstructures.test_graphstructures_1
.. code-block:: python
from theano.tensor import add, mul, Apply, Variable, TensorType from theano.tensor import add, mul, Apply, Variable, Constant, TensorType
# Instantiate a type that represents a matrix of doubles # Instantiate a type that represents a matrix of doubles
float64_matrix = TensorType(dtype = 'float64', # double float64_matrix = TensorType(dtype='float64', # double
broadcastable = (False, False)) # matrix broadcastable=(False, False)) # matrix
# We make the Variable instances we need. # We make the Variable instances we need.
x = Variable(type = float64_matrix, name = 'x') x = Variable(type=float64_matrix, name='x')
y = Variable(type = float64_matrix, name = 'y') y = Variable(type=float64_matrix, name='y')
z = Variable(type = float64_matrix, name = 'z') z = Variable(type=float64_matrix, name='z')
# This is the Variable that we want to symbolically represents y*z # This is the Variable that we want to symbolically represents y*z
mul_variable = Variable(type = float64_matrix) mul_variable = Variable(type=float64_matrix)
assert mul_variable.owner is None assert mul_variable.owner is None
# Instantiate a symbolic multiplication # Instantiate a symbolic multiplication
node_mul = Apply(op = mul, node_mul = Apply(op=mul,
inputs = [y, z], inputs=[y, z],
outputs = [mul_variable]) outputs=[mul_variable])
# Fields 'owner' and 'index' are set by Apply # Fields 'owner' and 'index' are set by Apply
assert mul_variable.owner is node_mul assert mul_variable.owner is node_mul
# 'index' is the position of mul_variable in mode_mul's outputs # 'index' is the position of mul_variable in mode_mul's outputs
assert mul_variable.index == 0 assert mul_variable.index == 0
# This is the Variable that we want to symbolically represents x+(y*z) # This is the Variable that we want to symbolically represents x+(y*z)
add_variable = Variable(type = float64_matrix) add_variable = Variable(type=float64_matrix)
assert add_variable.owner is None assert add_variable.owner is None
# Instantiate a symbolic addition # Instantiate a symbolic addition
node_add = Apply(op = add, node_add = Apply(op=add,
inputs = [x, mul_variable], inputs=[x, mul_variable],
outputs = [add_variable]) outputs=[add_variable])
# Fields 'owner' and 'index' are set by Apply # Fields 'owner' and 'index' are set by Apply
assert add_variable.owner is node_add assert add_variable.owner is node_add
assert add_variable.index == 0 assert add_variable.index == 0
e = add_variable e = add_variable
# We have access to x, y and z through pointers # We have access to x, y and z through pointers
assert e.owner.inputs[0] is x assert e.owner.inputs[0] is x
assert e.owner.inputs[1] is mul_variable assert e.owner.inputs[1] is mul_variable
assert e.owner.inputs[1].owner.inputs[0] is y assert e.owner.inputs[1].owner.inputs[0] is y
assert e.owner.inputs[1].owner.inputs[1] is z assert e.owner.inputs[1].owner.inputs[1] is z
Note how the call to ``Apply`` modifies the ``owner`` and ``index`` Note how the call to ``Apply`` modifies the ``owner`` and ``index``
...@@ -153,20 +149,19 @@ All nodes in the graph must be instances of ``Apply`` or ``Result``, but ...@@ -153,20 +149,19 @@ All nodes in the graph must be instances of ``Apply`` or ``Result``, but
constraints. For example, the :func:`tensor.add` constraints. For example, the :func:`tensor.add`
Op instance is written so that: Op instance is written so that:
.. code-block:: python .. testcode::
e = dscalar('x') + 1 e = T.dscalar('x') + 1
builds the following graph: builds the following graph:
.. code-block:: python .. testcode::
node = Apply(op = add,
inputs = [Variable(type = dscalar, name = 'x'),
Constant(type = lscalar, data = 1)],
outputs = [Variable(type = dscalar)])
e = node.outputs[0]
node = Apply(op=add,
inputs=[Variable(type=T.dscalar, name='x'),
Constant(type=T.lscalar, data=1)],
outputs=[Variable(type=T.dscalar)])
e = node.outputs[0]
Graph Structures Graph Structures
...@@ -311,6 +306,7 @@ Theano. The symbolic inputs that you operate on are Variables and what ...@@ -311,6 +306,7 @@ Theano. The symbolic inputs that you operate on are Variables and what
you get from applying various Ops to these inputs are also you get from applying various Ops to these inputs are also
Variables. For example, when I type Variables. For example, when I type
>>> import theano
>>> x = theano.tensor.ivector() >>> x = theano.tensor.ivector()
>>> y = -x >>> y = -x
...@@ -399,31 +395,34 @@ In both types of pairs, the second element of the tuple is an index, ...@@ -399,31 +395,34 @@ In both types of pairs, the second element of the tuple is an index,
such that: ``var.clients[*][0].inputs[index]`` or such that: ``var.clients[*][0].inputs[index]`` or
``fgraph.outputs[index]`` is that variable. ``fgraph.outputs[index]`` is that variable.
.. code-block:: python
>>> import theano
import theano >>> v = theano.tensor.vector()
v = theano.tensor.vector() >>> f = theano.function([v], (v+1).sum())
f = theano.function([v], (v+1).sum()) >>> theano.printing.debugprint(f)
theano.printing.debugprint(f) Sum{acc_dtype=float64} [@A] '' 1
# Sorted list of all nodes in the compiled graph. |Elemwise{add,no_inplace} [@B] '' 0
topo = f.maker.fgraph.toposort() |TensorConstant{(1,) of 1.0} [@C]
topo[0].outputs[0].clients |<TensorType(float64, vector)> [@D]
# [(Sum(Elemwise{add,no_inplace}.0), 0)] >>> # Sorted list of all nodes in the compiled graph.
topo[1].outputs[0].clients >>> topo = f.maker.fgraph.toposort()
# [('output', 0)] >>> topo[0].outputs[0].clients
[(Sum{acc_dtype=float64}(Elemwise{add,no_inplace}.0), 0)]
# An internal variable >>> topo[1].outputs[0].clients
var = topo[0].outputs[0] [('output', 0)]
client = var.clients[0]
client >>> # An internal variable
# (Sum(Elemwise{add,no_inplace}.0), 0) >>> var = topo[0].outputs[0]
type(client[0]) >>> client = var.clients[0]
# <class 'theano.gof.graph.Apply'> >>> client
assert client[0].inputs[client[1]] is var (Sum{acc_dtype=float64}(Elemwise{add,no_inplace}.0), 0)
>>> type(client[0])
# An output of the graph <class 'theano.gof.graph.Apply'>
var = topo[1].outputs[0] >>> assert client[0].inputs[client[1]] is var
client = var.clients[0]
client >>> # An output of the graph
# ('output', 0) >>> var = topo[1].outputs[0]
assert f.maker.fgraph.outputs[client[1]] is var >>> client = var.clients[0]
>>> client
('output', 0)
>>> assert f.maker.fgraph.outputs[client[1]] is var
...@@ -55,7 +55,12 @@ Suppose you had an Op which took ``x`` as input and returned ...@@ -55,7 +55,12 @@ Suppose you had an Op which took ``x`` as input and returned
purpose, you would set the ``view_map`` field as follows: purpose, you would set the ``view_map`` field as follows:
.. code-block:: python .. testsetup::
from theano import Op
myop = Op()
.. testcode::
myop.view_map = {0: [0]} myop.view_map = {0: [0]}
...@@ -66,7 +71,7 @@ inputs that are viewed by a given output, this feature is currently ...@@ -66,7 +71,7 @@ inputs that are viewed by a given output, this feature is currently
unsupported. Here are more examples: unsupported. Here are more examples:
.. code-block:: python .. testcode::
myop.view_map = {0: [0]} # first output is a view of first input myop.view_map = {0: [0]} # first output is a view of first input
myop.view_map = {0: [1]} # first output is a view of second input myop.view_map = {0: [1]} # first output is a view of second input
...@@ -101,8 +106,11 @@ operation on ``x``. ...@@ -101,8 +106,11 @@ operation on ``x``.
modified. Therefore, code using inplace operations would look like modified. Therefore, code using inplace operations would look like
this: this:
.. code-block:: python .. testcode::
from theano.tensor import dscalars, log
from theano.tensor.inplace import add_inplace
x, y = dscalars('x', 'y') x, y = dscalars('x', 'y')
r1 = log(x) r1 = log(x)
...@@ -144,7 +152,7 @@ Theano needs to be notified of this fact. The syntax is similar to ...@@ -144,7 +152,7 @@ Theano needs to be notified of this fact. The syntax is similar to
that of ``view_map``: that of ``view_map``:
.. code-block:: python .. testcode::
myop.destroy_map = {0: [0]} myop.destroy_map = {0: [0]}
...@@ -153,7 +161,7 @@ What this means is that the first output (position 0) operates inplace on the ...@@ -153,7 +161,7 @@ What this means is that the first output (position 0) operates inplace on the
first input (position 0). first input (position 0).
.. code-block:: python .. testcode::
myop.destroy_map = {0: [0]} # first output operates inplace on first input myop.destroy_map = {0: [0]} # first output operates inplace on first input
myop.destroy_map = {0: [1]} # first output operates inplace on second input myop.destroy_map = {0: [1]} # first output operates inplace on second input
......
...@@ -3,6 +3,39 @@ ...@@ -3,6 +3,39 @@
Making arithmetic Ops on double Making arithmetic Ops on double
=============================== ===============================
.. testsetup:: *
from theano import gof
class Double(gof.Type):
def filter(self, x, strict=False, allow_downcast=None):
if strict:
if isinstance(x, float):
return x
else:
raise TypeError('Expected a float!')
elif allow_downcast:
return float(x)
else: # Covers both the False and None cases.
x_float = float(x)
if x_float == x:
return x_float
else:
raise TypeError('The double type cannot accurately represent '
'value %s (of type %s): you must explicitly '
'allow downcasting if you want to do this.'
% (x, type(x)))
def values_eq_approx(self, x, y, tolerance=1e-4):
return abs(x - y) / (abs(x) + abs(y)) < tolerance
def __str__(self):
return "double"
double = Double()
Now that we have a ``double`` type, we have yet to use it to perform Now that we have a ``double`` type, we have yet to use it to perform
computations. We'll start by defining multiplication. computations. We'll start by defining multiplication.
...@@ -508,10 +541,7 @@ multiplication Op could take an arbitrary number of arguments. ...@@ -508,10 +541,7 @@ multiplication Op could take an arbitrary number of arguments.
First, we'll instantiate a ``mul`` Op: First, we'll instantiate a ``mul`` Op:
.. If you modify this code, also change : .. testcode:: mul
.. theano/tests/test_tutorial.py:T_extending.test_extending_1
.. code-block:: python
from theano import gof from theano import gof
mul = gof.Op() mul = gof.Op()
...@@ -525,10 +555,7 @@ two. This function ensures that both inputs have the ``double`` type. ...@@ -525,10 +555,7 @@ two. This function ensures that both inputs have the ``double`` type.
Since multiplying two doubles yields a double, this function makes an Since multiplying two doubles yields a double, this function makes an
Apply node with an output Variable of type ``double``. Apply node with an output Variable of type ``double``.
.. If you modify this code, also change : .. testcode:: mul
.. theano/tests/test_tutorial.py:T_extending.test_extending_1
.. code-block:: python
def make_node(x, y): def make_node(x, y):
if x.type != double or y.type != double: if x.type != double or y.type != double:
...@@ -561,9 +588,7 @@ built-in type ``float`` because this is the type that ``double.filter()`` ...@@ -561,9 +588,7 @@ built-in type ``float`` because this is the type that ``double.filter()``
will always return, per our own definition. ``output_storage`` will will always return, per our own definition. ``output_storage`` will
contain a single storage cell for the multiplication's variable. contain a single storage cell for the multiplication's variable.
.. If you modify this code, also change : .. testcode:: mul
.. theano/tests/test_tutorial.py:T_extending.test_extending_1
.. code-block:: python
def perform(node, inputs, output_storage): def perform(node, inputs, output_storage):
x, y = inputs[0], inputs[1] x, y = inputs[0], inputs[1]
...@@ -593,30 +618,32 @@ Here, ``z`` is a list of one element. By default, ``z == [None]``. ...@@ -593,30 +618,32 @@ Here, ``z`` is a list of one element. By default, ``z == [None]``.
Trying out our new Op Trying out our new Op
===================== =====================
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_extending.test_extending_1
In the following code, we use our new Op: In the following code, we use our new Op:
>>> x, y = double('x'), double('y') .. doctest:: mul
>>> z = mul(x, y)
>>> f = theano.function([x, y], z) >>> import theano
>>> f(5, 6) >>> x, y = double('x'), double('y')
30.0 >>> z = mul(x, y)
>>> f(5.6, 6.7) >>> f = theano.function([x, y], z)
37.519999999999996 >>> f(5, 6)
30.0
>>> f(5.6, 6.7)
37.519999999999996
Note that there is an implicit call to Note that there is an implicit call to
``double.filter()`` on each argument, so if we give integers as inputs ``double.filter()`` on each argument, so if we give integers as inputs
they are magically cast to the right type. Now, what if we try this? they are magically cast to the right type. Now, what if we try this?
>>> x = double('x') .. doctest:: mul
>>> z = mul(x, 2)
Traceback (most recent call last): >>> x = double('x')
File "<stdin>", line 1, in <module> >>> z = mul(x, 2)
File "/u/breuleuo/hg/theano/theano/gof/op.py", line 207, in __call__ Traceback (most recent call last):
File "<stdin>", line 2, in make_node File "<stdin>", line 1, in <module>
AttributeError: 'int' object has no attribute 'type' File "/u/breuleuo/hg/theano/theano/gof/op.py", line 207, in __call__
File "<stdin>", line 2, in make_node
AttributeError: 'int' object has no attribute 'type'
Automatic Constant Wrapping Automatic Constant Wrapping
--------------------------- ---------------------------
...@@ -625,9 +652,7 @@ Well, OK. We'd like our Op to be a bit more flexible. This can be done ...@@ -625,9 +652,7 @@ Well, OK. We'd like our Op to be a bit more flexible. This can be done
by modifying ``make_node`` to accept Python ``int`` or ``float`` as by modifying ``make_node`` to accept Python ``int`` or ``float`` as
``x`` and/or ``y``: ``x`` and/or ``y``:
.. If you modify this code, also change : .. testcode:: mul
.. theano/tests/test_tutorial.py:T_extending.test_extending_1
.. code-block:: python
def make_node(x, y): def make_node(x, y):
if isinstance(x, (int, float)): if isinstance(x, (int, float)):
...@@ -643,16 +668,15 @@ Whenever we pass a Python int or float instead of a Variable as ``x`` or ...@@ -643,16 +668,15 @@ Whenever we pass a Python int or float instead of a Variable as ``x`` or
``y``, ``make_node`` will convert it to :ref:`constant` for us. ``gof.Constant`` ``y``, ``make_node`` will convert it to :ref:`constant` for us. ``gof.Constant``
is a :ref:`variable` we statically know the value of. is a :ref:`variable` we statically know the value of.
.. If you modify this code, also change : .. doctest:: mul
.. theano/tests/test_tutorial.py:T_op.test_op_1
>>> x = double('x') >>> x = double('x')
>>> z = mul(x, 2) >>> z = mul(x, 2)
>>> f = theano.function([x], z) >>> f = theano.function([x], z)
>>> f(10) >>> f(10)
20.0 20.0
>>> f(3.4) >>> f(3.4)
6.7999999999999998 6.8
Now the code works the way we want it to. Now the code works the way we want it to.
...@@ -673,10 +697,7 @@ operations ``add``, ``sub`` and ``div``, code for ``make_node`` can be ...@@ -673,10 +697,7 @@ operations ``add``, ``sub`` and ``div``, code for ``make_node`` can be
shared between these Ops. Here is revised implementation of these four shared between these Ops. Here is revised implementation of these four
arithmetic operators: arithmetic operators:
.. If you modify this code, also change : .. testcode::
.. theano/tests/test_tutorial.py:T_extending.test_extending_1
.. code-block:: python
from theano import gof from theano import gof
......
...@@ -113,18 +113,16 @@ We will implement it in three ways: using a global optimization, a ...@@ -113,18 +113,16 @@ We will implement it in three ways: using a global optimization, a
local optimization with a Navigator and then using the PatternSub local optimization with a Navigator and then using the PatternSub
facility. facility.
Global optimization Global optimization
------------------- -------------------
Here is the code for a global optimization implementing the Here is the code for a global optimization implementing the
simplification described above: simplification described above:
.. If you modify this code, also change : .. testcode::
.. theano/tests/test_tutorial.py:T_extending.test_extending_2
.. code-block:: python
import theano
from theano import gof
from theano.gof import toolbox from theano.gof import toolbox
class Simplify(gof.Optimizer): class Simplify(gof.Optimizer):
...@@ -132,7 +130,7 @@ simplification described above: ...@@ -132,7 +130,7 @@ simplification described above:
fgraph.attach_feature(toolbox.ReplaceValidate()) fgraph.attach_feature(toolbox.ReplaceValidate())
def apply(self, fgraph): def apply(self, fgraph):
for node in fgraph.toposort(): for node in fgraph.toposort():
if node.op == div: if node.op == true_div:
x, y = node.inputs x, y = node.inputs
z = node.outputs[0] z = node.outputs[0]
if x.owner and x.owner.op == mul: if x.owner and x.owner.op == mul:
...@@ -181,37 +179,35 @@ pointer-following game you need to get ahold of the nodes of interest ...@@ -181,37 +179,35 @@ pointer-following game you need to get ahold of the nodes of interest
for the simplification (``x``, ``y``, ``z``, ``a``, ``b``, etc.). for the simplification (``x``, ``y``, ``z``, ``a``, ``b``, etc.).
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_extending.test_extending_2
Test time: Test time:
>>> x = double('x') >>> from theano.scalar import float64, add, mul, true_div
>>> y = double('y') >>> x = float64('x')
>>> z = double('z') >>> y = float64('y')
>>> a = add(z, mul(div(mul(y, x), y), div(z, x))) >>> z = float64('z')
>>> a = add(z, mul(true_div(mul(y, x), y), true_div(z, x)))
>>> e = gof.FunctionGraph([x, y, z], [a]) >>> e = gof.FunctionGraph([x, y, z], [a])
>>> e >>> e
[add(z, mul(div(mul(y, x), y), div(z, x)))] [add(z, mul(true_div(mul(y, x), y), true_div(z, x)))]
>>> simplify.optimize(e) >>> simplify.optimize(e)
>>> e >>> e
[add(z, mul(x, div(z, x)))] [add(z, mul(x, true_div(z, x)))]
Cool! It seems to work. You can check what happens if you put many Cool! It seems to work. You can check what happens if you put many
instances of :math:`\frac{xy}{y}` in the graph. Note that it sometimes instances of :math:`\frac{xy}{y}` in the graph. Note that it sometimes
won't work for reasons that have nothing to do with the quality of the won't work for reasons that have nothing to do with the quality of the
optimization you wrote. For example, consider the following: optimization you wrote. For example, consider the following:
>>> x = double('x') >>> x = float64('x')
>>> y = double('y') >>> y = float64('y')
>>> z = double('z') >>> z = float64('z')
>>> a = div(mul(add(y, z), x), add(y, z)) >>> a = true_div(mul(add(y, z), x), add(y, z))
>>> e = gof.FunctionGraph([x, y, z], [a]) >>> e = gof.FunctionGraph([x, y, z], [a])
>>> e >>> e
[div(mul(add(y, z), x), add(y, z))] [true_div(mul(add(y, z), x), add(y, z))]
>>> simplify.optimize(e) >>> simplify.optimize(e)
>>> e >>> e
[div(mul(add(y, z), x), add(y, z))] [true_div(mul(add(y, z), x), add(y, z))]
Nothing happened here. The reason is: ``add(y, z) != add(y, Nothing happened here. The reason is: ``add(y, z) != add(y,
z)``. That is the case for efficiency reasons. To fix this problem we z)``. That is the case for efficiency reasons. To fix this problem we
...@@ -220,9 +216,10 @@ computation, using the ``merge_optimizer`` defined in ...@@ -220,9 +216,10 @@ computation, using the ``merge_optimizer`` defined in
``theano.gof.opt``. ``theano.gof.opt``.
>>> from theano.gof.opt import merge_optimizer >>> from theano.gof.opt import merge_optimizer
>>> merge_optimizer.optimize(e) >>> merge_optimizer.optimize(e) # doctest: +ELLIPSIS
(0, ..., None, None, {}, 1, 0)
>>> e >>> e
[div(mul(*1 -> add(y, z), x), *1)] [true_div(mul(*1 -> add(y, z), x), *1)]
>>> simplify.optimize(e) >>> simplify.optimize(e)
>>> e >>> e
[x] [x]
...@@ -251,15 +248,12 @@ Local optimization ...@@ -251,15 +248,12 @@ Local optimization
The local version of the above code would be the following: The local version of the above code would be the following:
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_extending.test_extending_2
.. code-block:: python .. testcode::
class LocalSimplify(gof.LocalOptimizer): class LocalSimplify(gof.LocalOptimizer):
def transform(self, node): def transform(self, node):
if node.op == div: if node.op == true_div:
x, y = node.inputs x, y = node.inputs
if x.owner and x.owner.op == mul: if x.owner and x.owner.op == mul:
a, b = x.owner.inputs a, b = x.owner.inputs
...@@ -292,21 +286,18 @@ with a :ref:`navigator`. Basically, a :ref:`navigator` is a global ...@@ -292,21 +286,18 @@ with a :ref:`navigator`. Basically, a :ref:`navigator` is a global
optimizer that loops through all nodes in the graph (or a well-defined optimizer that loops through all nodes in the graph (or a well-defined
subset of them) and applies one or several local optimizers on them. subset of them) and applies one or several local optimizers on them.
.. If you modify this code, also change : >>> x = float64('x')
.. theano/tests/test_tutorial.py:T_extending.test_extending_2 >>> y = float64('y')
>>> z = float64('z')
>>> x = double('x') >>> a = add(z, mul(true_div(mul(y, x), y), true_div(z, x)))
>>> y = double('y')
>>> z = double('z')
>>> a = add(z, mul(div(mul(y, x), y), div(z, x)))
>>> e = gof.FunctionGraph([x, y, z], [a]) >>> e = gof.FunctionGraph([x, y, z], [a])
>>> e >>> e
[add(z, mul(div(mul(y, x), y), div(z, x)))] [add(z, mul(true_div(mul(y, x), y), true_div(z, x)))]
>>> simplify = gof.TopoOptimizer(local_simplify) >>> simplify = gof.TopoOptimizer(local_simplify)
>>> simplify.optimize(e) >>> simplify.optimize(e)
(<theano.gof.opt.TopoOptimizer object at 0x...>, 1, 5, 3, ..., ..., ...)
>>> e >>> e
[add(z, mul(x, div(z, x)))] [add(z, mul(x, true_div(z, x)))]
OpSub, OpRemove, PatternSub OpSub, OpRemove, PatternSub
+++++++++++++++++++++++++++ +++++++++++++++++++++++++++
...@@ -331,8 +322,11 @@ Theano defines some shortcuts to make LocalOptimizers: ...@@ -331,8 +322,11 @@ Theano defines some shortcuts to make LocalOptimizers:
Replaces all occurrences of the first pattern by the second pattern. Replaces all occurrences of the first pattern by the second pattern.
See :class:`PatternSub`. See :class:`PatternSub`.
.. testsetup::
.. code-block:: python from theano.scalar import identity
.. testcode::
from theano.gof.opt import OpSub, OpRemove, PatternSub from theano.gof.opt import OpSub, OpRemove, PatternSub
...@@ -346,9 +340,9 @@ Theano defines some shortcuts to make LocalOptimizers: ...@@ -346,9 +340,9 @@ Theano defines some shortcuts to make LocalOptimizers:
# The "simplify" operation we've been defining in the past few # The "simplify" operation we've been defining in the past few
# sections. Note that we need two patterns to account for the # sections. Note that we need two patterns to account for the
# permutations of the arguments to mul. # permutations of the arguments to mul.
local_simplify_1 = PatternSub((div, (mul, 'x', 'y'), 'y'), local_simplify_1 = PatternSub((true_div, (mul, 'x', 'y'), 'y'),
'x') 'x')
local_simplify_2 = PatternSub((div, (mul, 'x', 'y'), 'x'), local_simplify_2 = PatternSub((true_div, (mul, 'x', 'y'), 'x'),
'y') 'y')
.. note:: .. note::
...@@ -437,7 +431,7 @@ A Query is built by the following call: ...@@ -437,7 +431,7 @@ A Query is built by the following call:
.. code-block:: python .. code-block:: python
theano.gof.Query(include, require = None, exclude = None, subquery = None) theano.gof.Query(include, require=None, exclude=None, subquery=None)
.. class:: Query .. class:: Query
...@@ -476,22 +470,23 @@ Examples ...@@ -476,22 +470,23 @@ Examples
Here are a few examples of how to use a Query on optdb to produce an Here are a few examples of how to use a Query on optdb to produce an
Optimizer: Optimizer:
.. code-block:: python .. testcode::
from theano.gof import Query
from theano.compile import optdb from theano.compile import optdb
# This is how the optimizer for the fast_run mode is defined # This is how the optimizer for the fast_run mode is defined
fast_run = optdb.query(Query(include = ['fast_run'])) fast_run = optdb.query(Query(include=['fast_run']))
# This is how the optimizer for the fast_compile mode is defined # This is how the optimizer for the fast_compile mode is defined
fast_compile = optdb.query(Query(include = ['fast_compile'])) fast_compile = optdb.query(Query(include=['fast_compile']))
# This is the same as fast_run but no optimizations will replace # This is the same as fast_run but no optimizations will replace
# any operation by an inplace version. This assumes, of course, # any operation by an inplace version. This assumes, of course,
# that all inplace operations are tagged as 'inplace' (as they # that all inplace operations are tagged as 'inplace' (as they
# should!) # should!)
fast_run_no_inplace = optdb.query(Query(include = ['fast_run'], exclude = ['inplace'])) fast_run_no_inplace = optdb.query(Query(include=['fast_run'],
fast_run_no_inplace = fast_run.excluding('inplace') exclude=['inplace']))
Registering an Optimizer Registering an Optimizer
...@@ -500,7 +495,7 @@ Registering an Optimizer ...@@ -500,7 +495,7 @@ Registering an Optimizer
Let's say we have a global optimizer called ``simplify``. We can add Let's say we have a global optimizer called ``simplify``. We can add
it to ``optdb`` as follows: it to ``optdb`` as follows:
.. code-block:: python .. testcode::
# optdb.register(name, optimizer, order, *tags) # optdb.register(name, optimizer, order, *tags)
optdb.register('simplify', simplify, 0.5, 'fast_run') optdb.register('simplify', simplify, 0.5, 'fast_run')
......
...@@ -19,7 +19,7 @@ implemented using other already existing Ops. For example, instead of ...@@ -19,7 +19,7 @@ implemented using other already existing Ops. For example, instead of
writing a "sum_square_difference" Op, you should probably just write a writing a "sum_square_difference" Op, you should probably just write a
simple function: simple function:
.. code-block:: python .. testcode::
from theano import tensor as T from theano import tensor as T
......
...@@ -176,9 +176,7 @@ must define ``filter`` and shall override ``values_eq_approx``. ...@@ -176,9 +176,7 @@ must define ``filter`` and shall override ``values_eq_approx``.
**filter** **filter**
.. If you modify this code, also change : .. testcode::
.. theano/tests/test_tutorial.py:T_extending.test_extending_1
.. code-block:: python
# Note that we shadow Python's function ``filter`` with this # Note that we shadow Python's function ``filter`` with this
# definition. # definition.
...@@ -215,7 +213,7 @@ when ``allow_downcast`` is False, i.e. no precision loss is allowed. ...@@ -215,7 +213,7 @@ when ``allow_downcast`` is False, i.e. no precision loss is allowed.
**values_eq_approx** **values_eq_approx**
.. code-block:: python .. testcode::
def values_eq_approx(x, y, tolerance=1e-4): def values_eq_approx(x, y, tolerance=1e-4):
return abs(x - y) / (abs(x) + abs(y)) < tolerance return abs(x - y) / (abs(x) + abs(y)) < tolerance
...@@ -246,9 +244,7 @@ contract. Recall that Type defines default implementations for all ...@@ -246,9 +244,7 @@ contract. Recall that Type defines default implementations for all
required methods of the interface, except ``filter``. One way to make required methods of the interface, except ``filter``. One way to make
the Type is to instantiate a plain Type and set the needed fields: the Type is to instantiate a plain Type and set the needed fields:
.. If you modify this code, also change : .. testcode::
.. theano/tests/test_tutorial.py:T_extending.test_extending_1
.. code-block:: python
from theano import gof from theano import gof
...@@ -260,8 +256,6 @@ the Type is to instantiate a plain Type and set the needed fields: ...@@ -260,8 +256,6 @@ the Type is to instantiate a plain Type and set the needed fields:
Another way to make this Type is to make a subclass of ``gof.Type`` Another way to make this Type is to make a subclass of ``gof.Type``
and define ``filter`` and ``values_eq_approx`` in the subclass: and define ``filter`` and ``values_eq_approx`` in the subclass:
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_extending.test_extending_1
.. code-block:: python .. code-block:: python
from theano import gof from theano import gof
...@@ -285,6 +279,38 @@ There is a small issue with defining ``double`` this way. All ...@@ -285,6 +279,38 @@ There is a small issue with defining ``double`` this way. All
instances of ``Double`` are technically the same Type. However, different instances of ``Double`` are technically the same Type. However, different
``Double`` Type instances do not compare the same: ``Double`` Type instances do not compare the same:
.. testsetup::
from theano import gof
class Double(gof.Type):
def filter(self, x, strict=False, allow_downcast=None):
if strict:
if isinstance(x, float):
return x
else:
raise TypeError('Expected a float!')
elif allow_downcast:
return float(x)
else: # Covers both the False and None cases.
x_float = float(x)
if x_float == x:
return x_float
else:
raise TypeError('The double type cannot accurately represent '
'value %s (of type %s): you must explicitly '
'allow downcasting if you want to do this.'
% (x, type(x)))
def values_eq_approx(self, x, y, tolerance=1e-4):
return abs(x - y) / (abs(x) + abs(y)) < tolerance
def __str__(self):
return "double"
double = Double()
>>> double1 = Double() >>> double1 = Double()
>>> double2 = Double() >>> double2 = Double()
>>> double1 == double2 >>> double1 == double2
...@@ -299,10 +325,7 @@ There are several ways to make sure that equality testing works properly: ...@@ -299,10 +325,7 @@ There are several ways to make sure that equality testing works properly:
#. Define ``Double.__eq__`` so that instances of type Double #. Define ``Double.__eq__`` so that instances of type Double
are equal. For example: are equal. For example:
.. If you modify this code, also change : .. testcode::
.. theano/tests/test_tutorial.py:T_extending.test_extending_1
.. code-block:: python
def __eq__(self, other): def __eq__(self, other):
return type(self) is Double and type(other) is Double return type(self) is Double and type(other) is Double
...@@ -355,9 +378,7 @@ attempt to clear up the confusion: ...@@ -355,9 +378,7 @@ attempt to clear up the confusion:
Final version Final version
============= =============
.. If you modify this code, also change : .. testcode::
.. theano/tests/test_tutorial.py:T_extending.test_extending_1
.. code-block:: python
from theano import gof from theano import gof
......
...@@ -39,7 +39,9 @@ A unittest is a subclass of ``unittest.TestCase``, with member ...@@ -39,7 +39,9 @@ A unittest is a subclass of ``unittest.TestCase``, with member
functions with names that start with the string ``test``. For functions with names that start with the string ``test``. For
example: example:
.. code-block:: python .. testcode::
import unittest
class MyTestCase(unittest.TestCase): class MyTestCase(unittest.TestCase):
def test0(self): def test0(self):
...@@ -115,7 +117,7 @@ built-in unittest module uses metaclasses to know about all the ...@@ -115,7 +117,7 @@ built-in unittest module uses metaclasses to know about all the
them all, printing '.' for passed tests, and a stack trace for them all, printing '.' for passed tests, and a stack trace for
exceptions. The standard footer code in theano's test files is: exceptions. The standard footer code in theano's test files is:
.. code-block:: python .. testcode::
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -134,7 +136,7 @@ To run all the tests in one or more ``TestCase`` subclasses: ...@@ -134,7 +136,7 @@ To run all the tests in one or more ``TestCase`` subclasses:
To run just a single ``MyTestCase`` member test function called ``test0``: To run just a single ``MyTestCase`` member test function called ``test0``:
.. code-block:: python .. testcode::
MyTestCase('test0').debug() MyTestCase('test0').debug()
...@@ -186,6 +188,7 @@ Example: ...@@ -186,6 +188,7 @@ Example:
.. code-block:: python .. code-block:: python
import unittest import unittest
class TestTensorDot(unittest.TestCase): class TestTensorDot(unittest.TestCase):
def test_validity(self): def test_validity(self):
# do stuff # do stuff
...@@ -201,8 +204,10 @@ functionality which is shared amongst all test methods in the test ...@@ -201,8 +204,10 @@ functionality which is shared amongst all test methods in the test
case (i.e initializing data, parameters, seeding random number case (i.e initializing data, parameters, seeding random number
generators -- more on this later) generators -- more on this later)
.. code-block:: python .. testcode:: writeUnitest
import unittest
class TestTensorDot(unittest.TestCase): class TestTensorDot(unittest.TestCase):
def setUp(self): def setUp(self):
# data which will be used in various test methods # data which will be used in various test methods
...@@ -231,16 +236,16 @@ Example: ...@@ -231,16 +236,16 @@ Example:
def test_validity(self): def test_validity(self):
a = T.dmatrix('a') a = T.dmatrix('a')
b = T.dmatrix('b') b = T.dmatrix('b')
c = T.dot(a,b) c = T.dot(a, b)
f = theano.function([a,b],[c]) f = theano.function([a, b], [c])
cmp = f(self.avals,self.bvals) == numpy.dot(self.avals,self.bvals) cmp = f(self.avals, self.bvals) == numpy.dot(self.avals, self.bvals)
self.assertTrue(numpy.all(cmp)) self.assertTrue(numpy.all(cmp))
Avoid hard-coding variables, as in the following case: Avoid hard-coding variables, as in the following case:
.. code-block:: python .. code-block:: python
self.assertTrue(numpy.all(f(self.avals,self.bvals)==numpy.array([[25,25,30,28],[21,18,14,25]]))) self.assertTrue(numpy.all(f(self.avals, self.bvals) == numpy.array([[25, 25, 30, 28], [21, 18, 14, 25]])))
This makes the test case less manageable and forces the user to update This makes the test case less manageable and forces the user to update
the variables each time the input is changed or possibly when the the variables each time the input is changed or possibly when the
...@@ -275,6 +280,8 @@ Example: ...@@ -275,6 +280,8 @@ Example:
.. code-block:: python .. code-block:: python
import unittest
class TestTensorDot(unittest.TestCase): class TestTensorDot(unittest.TestCase):
... ...
def test_3D_dot_fail(self): def test_3D_dot_fail(self):
...@@ -300,7 +307,9 @@ Example: ...@@ -300,7 +307,9 @@ Example:
.. code-block:: python .. code-block:: python
f = T.function([a,b],[c],mode='FAST_RUN') from theano import function
f = function([a,b],[c],mode='FAST_RUN')
Whenever possible, unit tests should omit this parameter. Leaving Whenever possible, unit tests should omit this parameter. Leaving
out the mode will ensure that unit tests use the default mode. out the mode will ensure that unit tests use the default mode.
...@@ -334,7 +343,7 @@ another (i.e always pass or always fail). ...@@ -334,7 +343,7 @@ another (i.e always pass or always fail).
Instead of using ``numpy.random.seed`` to do this, we encourage users to Instead of using ``numpy.random.seed`` to do this, we encourage users to
do the following: do the following:
.. code-block:: python .. testcode::
from theano.tests import unittest_tools from theano.tests import unittest_tools
...@@ -367,8 +376,10 @@ machine) can simply set ``config.unittests.rseed`` to 'random' (see ...@@ -367,8 +376,10 @@ machine) can simply set ``config.unittests.rseed`` to 'random' (see
Similarly, to provide a seed to numpy.random.RandomState, simply use: Similarly, to provide a seed to numpy.random.RandomState, simply use:
.. code-block:: python .. testcode::
import numpy
rng = numpy.random.RandomState(unittest_tools.fetch_seed()) rng = numpy.random.RandomState(unittest_tools.fetch_seed())
# OR providing an explicit seed # OR providing an explicit seed
rng = numpy.random.RandomState(unittest_tools.fetch_seed(1231)) #again not recommended rng = numpy.random.RandomState(unittest_tools.fetch_seed(1231)) #again not recommended
...@@ -413,7 +424,9 @@ at point ``x`` is approximated as: ...@@ -413,7 +424,9 @@ at point ``x`` is approximated as:
Here is the prototype for the verify_grad function. Here is the prototype for the verify_grad function.
>>> def verify_grad(fun, pt, n_tests=2, rng=None, eps=1.0e-7, abs_tol=0.0001, rel_tol=0.0001): .. code-block:: python
def verify_grad(fun, pt, n_tests=2, rng=None, eps=1.0e-7, abs_tol=0.0001, rel_tol=0.0001):
``verify_grad`` raises an Exception if the difference between the analytic gradient and ``verify_grad`` raises an Exception if the difference between the analytic gradient and
numerical gradient (computed through the Finite Difference Method) of a random numerical gradient (computed through the Finite Difference Method) of a random
...@@ -445,7 +458,7 @@ In the general case, you can define ``fun`` as you want, as long as it ...@@ -445,7 +458,7 @@ In the general case, you can define ``fun`` as you want, as long as it
takes as inputs Theano symbolic variables and returns a sinble Theano takes as inputs Theano symbolic variables and returns a sinble Theano
symbolic variable: symbolic variable:
.. code-block:: python .. testcode::
def test_verify_exprgrad(): def test_verify_exprgrad():
def fun(x,y,z): def fun(x,y,z):
...@@ -460,7 +473,7 @@ symbolic variable: ...@@ -460,7 +473,7 @@ symbolic variable:
Here is an example showing how to use ``verify_grad`` on an Op instance: Here is an example showing how to use ``verify_grad`` on an Op instance:
.. code-block:: python .. testcode::
def test_flatten_outdimNone(): def test_flatten_outdimNone():
# Testing gradient w.r.t. all inputs of an op (in this example the op # Testing gradient w.r.t. all inputs of an op (in this example the op
...@@ -474,7 +487,7 @@ an Op's inputs. This is useful in particular when the gradient w.r.t. some of ...@@ -474,7 +487,7 @@ an Op's inputs. This is useful in particular when the gradient w.r.t. some of
the inputs cannot be computed by finite difference (e.g. for discrete inputs), the inputs cannot be computed by finite difference (e.g. for discrete inputs),
which would cause ``verify_grad`` to crash. which would cause ``verify_grad`` to crash.
.. code-block:: python .. testcode::
def test_crossentropy_softmax_grad(): def test_crossentropy_softmax_grad():
op = tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias op = tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias
...@@ -511,8 +524,13 @@ this is common, two helper functions exists to make your lives easier: ...@@ -511,8 +524,13 @@ this is common, two helper functions exists to make your lives easier:
Here is an example of ``makeTester`` generating testcases for the Dot Here is an example of ``makeTester`` generating testcases for the Dot
product op: product op:
.. code-block:: python .. testcode::
from numpy import dot
from numpy.random import rand
from theano.tensor.tests.test_basic import makeTester
DotTester = makeTester(name = 'DotTester', DotTester = makeTester(name = 'DotTester',
op = dot, op = dot,
expected = lambda x, y: numpy.dot(x, y), expected = lambda x, y: numpy.dot(x, y),
......
...@@ -120,7 +120,10 @@ the function was compiled. ...@@ -120,7 +120,10 @@ the function was compiled.
For example, replace the following For example, replace the following
.. code-block:: python .. testcode:: faster
import theano
from theano import function
x = theano.tensor.scalar('x') x = theano.tensor.scalar('x')
f = function([x], x + 1.) f = function([x], x + 1.)
...@@ -128,7 +131,11 @@ For example, replace the following ...@@ -128,7 +131,11 @@ For example, replace the following
with with
.. code-block:: python .. testcode:: faster
import numpy
import theano
from theano import function
x = theano.tensor.scalar('x') x = theano.tensor.scalar('x')
f = function([x], x + 1.) f = function([x], x + 1.)
......
...@@ -3,10 +3,10 @@ ...@@ -3,10 +3,10 @@
Glossary Glossary
======== ========
.. .. testsetup::
# This is for the doctests in the file
>>> import theano import theano
>>> from theano import tensor from theano import tensor
.. glossary:: .. glossary::
......
...@@ -330,8 +330,8 @@ a Python (or IPython) interpreter, ...@@ -330,8 +330,8 @@ a Python (or IPython) interpreter,
.. code-block:: python .. code-block:: python
>>> import theano import theano
>>> theano.test() # doctest: +SKIP theano.test()
You can also run them in-place from the Git checkout directory by typing You can also run them in-place from the Git checkout directory by typing
......
...@@ -405,7 +405,7 @@ compile C code for CPU execution. ...@@ -405,7 +405,7 @@ compile C code for CPU execution.
Create a test file containing: Create a test file containing:
.. code-block:: python .. testcode::
import numpy as np import numpy as np
import time import time
...@@ -423,6 +423,18 @@ Create a test file containing: ...@@ -423,6 +423,18 @@ Create a test file containing:
print "NP time: %f[s], theano time: %f[s] (times should be close when run on CPU!)" %( print "NP time: %f[s], theano time: %f[s] (times should be close when run on CPU!)" %(
np_end-np_start, t_end-t_start) np_end-np_start, t_end-t_start)
print "Result difference: %f" % (np.abs(AB-tAB).max(), ) print "Result difference: %f" % (np.abs(AB-tAB).max(), )
.. testoutput::
:hide:
:options: +ELLIPSIS
NP time: ...[s], theano time: ...[s] (times should be close when run on CPU!)
Result difference: ...
.. code-block:: none
NP time: 1.480863[s], theano time: 1.475381[s] (times should be close when run on CPU!)
Result difference: 0.000000
Then run it. It should execute without problems and the Theano function Then run it. It should execute without problems and the Theano function
should run at a speed similar to the regular NumPy should run at a speed similar to the regular NumPy
......
...@@ -10,7 +10,9 @@ To run Theano on the Mammouth cluster, follow these simple steps: ...@@ -10,7 +10,9 @@ To run Theano on the Mammouth cluster, follow these simple steps:
the goodies for using the latest and greatest (optimized) libraries the goodies for using the latest and greatest (optimized) libraries
(numpy, scipy, etc.) (numpy, scipy, etc.)
>>> source /home/bastienf/.local.bashrc .. code-block:: sh
source /home/bastienf/.local.bashrc
Perhaps even put this in your ``.bashrc`` Perhaps even put this in your ``.bashrc``
......
...@@ -37,12 +37,11 @@ Theano doesn't use your grandfather's python. ...@@ -37,12 +37,11 @@ Theano doesn't use your grandfather's python.
* functions (function objects) can have attributes too. This technique * functions (function objects) can have attributes too. This technique
is often used to define a function's error messages. is often used to define a function's error messages.
.. code-block:: python >>> def f(): return f.a
>>> f.a = 5
def f(): return f.a >>> f()
f.a = 5 5
f() # returns 5
* Warning about mutual imports: * Warning about mutual imports:
* script a.py file defined a class A. * script a.py file defined a class A.
......
...@@ -25,8 +25,12 @@ a cluster. ...@@ -25,8 +25,12 @@ a cluster.
DebugMode can be used as follows: DebugMode can be used as follows:
.. code-block:: python .. testcode::
import theano
from theano import tensor
from theano.compile.debugmode import DebugMode
x = tensor.dscalar('x') x = tensor.dscalar('x')
f = theano.function([x], 10*x, mode='DebugMode') f = theano.function([x], 10*x, mode='DebugMode')
......
...@@ -18,9 +18,11 @@ the interface for compiling graphs into callable objects. ...@@ -18,9 +18,11 @@ the interface for compiling graphs into callable objects.
You've already seen example usage in the basic tutorial... something like this: You've already seen example usage in the basic tutorial... something like this:
>>> import theano
>>> x = theano.tensor.dscalar() >>> x = theano.tensor.dscalar()
>>> f = theano.function([x], 2*x) >>> f = theano.function([x], 2*x)
>>> print f(4) # prints 8.0 >>> f(4)
array(8.0)
The idea here is that we've compiled the symbolic graph (``2*x``) into a function that can be called on a number and will do some computations. The idea here is that we've compiled the symbolic graph (``2*x``) into a function that can be called on a number and will do some computations.
......
...@@ -80,6 +80,9 @@ A non-None `value` argument makes an In() instance an optional parameter ...@@ -80,6 +80,9 @@ A non-None `value` argument makes an In() instance an optional parameter
of the compiled function. For example, in the following code we are of the compiled function. For example, in the following code we are
defining an arity-2 function ``inc``. defining an arity-2 function ``inc``.
>>> import theano.tensor as T
>>> from theano import function
>>> from theano.compile.io import In
>>> u, x, s = T.scalars('u', 'x', 's') >>> u, x, s = T.scalars('u', 'x', 's')
>>> inc = function([u, In(x, value=3), In(s, update=(s+x*u), value=10.0)], []) >>> inc = function([u, In(x, value=3), In(s, update=(s+x*u), value=10.0)], [])
...@@ -179,27 +182,38 @@ method to access values by indexing a Function directly by typing ...@@ -179,27 +182,38 @@ method to access values by indexing a Function directly by typing
To show some examples of these access methods... To show some examples of these access methods...
.. code-block:: python
a, b, c = T.scalars('xys') # set the internal names of graph nodes >>> from theano import tensor as T, function
# Note that the name of c is 's', not 'c'! >>> a, b, c = T.scalars('xys') # set the internal names of graph nodes
fn = function([a, b, ((c, c+a+b), 10.0)], []) >>> # Note that the name of c is 's', not 'c'!
>>> fn = function([a, b, ((c, c+a+b), 10.0)], [])
#the value associated with c is accessible in 3 ways >>> # the value associated with c is accessible in 3 ways
assert fn['s'] is fn.value[c] >>> fn['s'] is fn.value[c]
assert fn['s'] is fn.container[c].value True
>>> fn['s'] is fn.container[c].value
True
assert fn['s'] == 10.0 >>> fn['s']
fn(1, 2) array(10.0)
assert fn['s'] == 13.0 >>> fn(1, 2)
fn.s = 99.0 []
fn(1, 0) >>> fn['s']
assert fn['s'] == 100.0 array(13.0)
fn.value[c] = 99.0 >>> fn['s'] = 99.0
fn(1,0) >>> fn(1, 0)
assert fn['s'] == 100.0 []
assert fn['s'] == fn.value[c] >>> fn['s']
assert fn['s'] == fn.container[c].value array(100.0)
>>> fn.value[c] = 99.0
>>> fn(1,0)
[]
>>> fn['s']
array(100.0)
>>> fn['s'] == fn.value[c]
True
>>> fn['s'] == fn.container[c].value
True
Input Shortcuts Input Shortcuts
...@@ -221,31 +235,41 @@ Every element of the inputs list will be upgraded to an In instance if necessary ...@@ -221,31 +235,41 @@ Every element of the inputs list will be upgraded to an In instance if necessary
Example: Example:
.. code-block:: python >>> import theano
>>> from theano import tensor as T
import theano >>> from theano.compile.io import In
from theano import tensor as T >>> x = T.scalar()
from theano.compile.io import In >>> y = T.scalar('y')
x = T.scalar() >>> z = T.scalar('z')
y = T.scalar('y') >>> w = T.scalar('w')
z = T.scalar('z')
w = T.scalar('w') >>> fn = theano.function(inputs=[x, y, In(z, value=42), ((w, w+x), 0)],
... outputs=x + y + z)
fn = theano.function(inputs = [x, y, In(z, value=42), ((w, w+x), 0)], >>> # the first two arguments are required and the last two are
outputs = x + y + z) >>> # optional and initialized to 42 and 0, respectively.
# the first two arguments are required and the last two are >>> # The last argument, w, is updated with w + x each time the
# optional and initialized to 42 and 0, respectively. >>> # function is called.
# The last argument, w, is updated with w + x each time the
# function is called. >>> fn(1) # illegal because there are two required arguments # doctest: +ELLIPSIS
Traceback (most recent call last):
fn(1) # illegal because there are two required arguments ...
fn(1, 2) # legal, z is 42, w goes 0 -> 1 (because w <- w + x), returns array(45.0) TypeError: Missing required input: y
fn(1, y = 2) # legal, z is 42, w goes 1 -> 2, returns array(45.0) >>> fn(1, 2) # legal, z is 42, w goes 0 -> 1 (because w <- w + x)
fn(x = 1, y = 2) # illegal because x was not named array(45.0)
fn(1, 2, 3) # legal, z is 3, w goes 2 -> 3, returns array(6.0) >>> fn(1, y=2) # legal, z is 42, w goes 1 -> 2
fn(1, z = 3, y = 2) # legal, z is 3, w goes 3 -> 4, returns array(6.0) array(45.0)
fn(1, 2, w = 400) # legal, z is 42 again, w goes 400 -> 401, returns array(45.0) >>> fn(x=1, y=2) # illegal because x was not named # doctest: +ELLIPSIS
fn(1, 2) # legal, z is 42, w goes 401 -> 402, returns array(45.0) Traceback (most recent call last):
...
TypeError: Unknown input or state: x. The function has 3 named inputs (y, z, w), and 1 unnamed input which thus cannot be accessed through keyword argument (use 'name=...' in a variable's constructor to give it a name).
>>> fn(1, 2, 3) # legal, z is 3, w goes 2 -> 3
array(6.0)
>>> fn(1, z=3, y=2) # legal, z is 3, w goes 3 -> 4
array(6.0)
>>> fn(1, 2, w=400) # legal, z is 42 again, w goes 400 -> 401
array(45.0)
>>> fn(1, 2) # legal, z is 42, w goes 401 -> 402
array(45.0)
In the example above, ``z`` has value 42 when no value is explicitly given. In the example above, ``z`` has value 42 when no value is explicitly given.
This default value is potentially used at every function invocation, because This default value is potentially used at every function invocation, because
...@@ -282,20 +306,25 @@ If a single ``Variable`` or ``Out`` instance is given as argument, then the comp ...@@ -282,20 +306,25 @@ If a single ``Variable`` or ``Out`` instance is given as argument, then the comp
If a list of ``Variable`` or ``Out`` instances is given as argument, then the compiled function will return a list of their values. If a list of ``Variable`` or ``Out`` instances is given as argument, then the compiled function will return a list of their values.
.. code-block:: python >>> import numpy
>>> from theano.compile.io import Out
x, y, s = T.matrices('xys') >>> x, y, s = T.matrices('xys')
# print a list of 2 ndarrays >>> # print a list of 2 ndarrays
fn1 = theano.function([x], [x+x, Out((x+x).T, borrow=True)]) >>> fn1 = theano.function([x], [x+x, Out((x+x).T, borrow=True)])
print fn1(numpy.asarray([[1,0],[0,1]])) >>> fn1(numpy.asarray([[1,0],[0,1]]))
[array([[ 2., 0.],
[ 0., 2.]]), array([[ 2., 0.],
# print a list of 1 ndarray [ 0., 2.]])]
fn2 = theano.function([x], [x+x])
print fn2(numpy.asarray([[1,0],[0,1]])) >>> # print a list of 1 ndarray
>>> fn2 = theano.function([x], [x+x])
# print an ndarray >>> fn2(numpy.asarray([[1,0],[0,1]]))
fn3 = theano.function([x], outputs=x+x) [array([[ 2., 0.],
print fn3(numpy.asarray([[1,0],[0,1]])) [ 0., 2.]])]
>>> # print an ndarray
>>> fn3 = theano.function([x], outputs=x+x)
>>> fn3(numpy.asarray([[1,0],[0,1]]))
array([[ 2., 0.],
[ 0., 2.]])
...@@ -21,8 +21,13 @@ of abnormal values: NaNs, Infs, and abnormally big values. ...@@ -21,8 +21,13 @@ of abnormal values: NaNs, Infs, and abnormally big values.
NanGuardMode can be used as follows: NanGuardMode can be used as follows:
.. code-block:: python .. testcode::
import numpy
import theano
import theano.tensor as T
from theano.compile.nanguardmode import NanGuardMode
x = T.matrix() x = T.matrix()
w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX)) w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX))
y = T.dot(x, w) y = T.dot(x, w)
...@@ -36,12 +41,20 @@ input and output variable of each node. When abnormal values are ...@@ -36,12 +41,20 @@ input and output variable of each node. When abnormal values are
detected, it raises an error to indicate which node yields the NaNs. For detected, it raises an error to indicate which node yields the NaNs. For
example, if we pass the following values to ``fun``: example, if we pass the following values to ``fun``:
.. code-block:: python .. testcode::
infa = numpy.tile( infa = numpy.tile(
(numpy.asarray(100.) ** 1000000).astype(theano.config.floatX), (3, 5)) (numpy.asarray(100.) ** 1000000).astype(theano.config.floatX), (3, 5))
fun(infa) fun(infa)
.. testoutput::
:hide:
:options: +ELLIPSIS
Traceback (most recent call last):
...
AssertionError: ...
It will raise an AssertionError indicating that Inf value is detected while It will raise an AssertionError indicating that Inf value is detected while
executing the function. executing the function.
......
...@@ -25,8 +25,9 @@ process. ...@@ -25,8 +25,9 @@ process.
Creating a ProfileMode Instance Creating a ProfileMode Instance
------------------------------- -------------------------------
First create a ProfileMode instance. First create a ProfileMode instance.
>>> import theano
>>> from theano import ProfileMode >>> from theano import ProfileMode
>>> profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker()) >>> profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
...@@ -62,6 +63,12 @@ Compiling your Graph with ProfileMode ...@@ -62,6 +63,12 @@ Compiling your Graph with ProfileMode
Once the ProfileMode instance is created, simply compile your graph as you Once the ProfileMode instance is created, simply compile your graph as you
would normally, by specifying the mode parameter. would normally, by specifying the mode parameter.
.. testsetup::
import theano
input1, input2 = theano.tensor.scalars(2)
output1 = input1+input2
>>> # with functions >>> # with functions
>>> f = theano.function([input1,input2],[output1], mode=profmode) >>> f = theano.function([input1,input2],[output1], mode=profmode)
...@@ -76,13 +83,13 @@ of its time. ...@@ -76,13 +83,13 @@ of its time.
This is best shown through an example. This is best shown through an example.
Lets use the example of logistic Lets use the example of logistic
regression. (Code for this example is in the file regression. (Code for this example is in the file
``benchmark/regression/regression.py``.) ``benchmark/regression/regression.py``.)
Compiling the module with ProfileMode and calling ``profmode.print_summary()`` Compiling the module with ProfileMode and calling ``profmode.print_summary()``
generates the following output: generates the following output:
.. code-block:: python .. code-block:: python
""" """
ProfileMode.print_summary() ProfileMode.print_summary()
--------------------------- ---------------------------
...@@ -141,7 +148,7 @@ generates the following output: ...@@ -141,7 +148,7 @@ generates the following output:
The Apply-wise summary print the timing information for the worst The Apply-wise summary print the timing information for the worst
offending Apply nodes. This corresponds to individual Op applications offending Apply nodes. This corresponds to individual Op applications
within your graph which take the longest to execute (so if you use dot within your graph which take the longest to execute (so if you use dot
twice, you will see two entries there). twice, you will see two entries there).
The Op-wise summary print the execution time of all Apply nodes The Op-wise summary print the execution time of all Apply nodes
executing the same Op are grouped together and the total execution executing the same Op are grouped together and the total execution
...@@ -186,7 +193,7 @@ Reference ...@@ -186,7 +193,7 @@ Reference
Print three summaries to stdout that show where cpu time is spent during theano function executions (for all functions using this object instance). Print three summaries to stdout that show where cpu time is spent during theano function executions (for all functions using this object instance).
:param n_apply_to_print: the number of apply nodes to print. :param n_apply_to_print: the number of apply nodes to print.
The default 15, but can be configured via ``ProfileMode.n_ops_to_print`` in :envvar:`THEANO_FLAGS`. The default 15, but can be configured via ``ProfileMode.n_ops_to_print`` in :envvar:`THEANO_FLAGS`.
:param n_ops_to_print: the number of ops to print. :param n_ops_to_print: the number of ops to print.
...@@ -198,10 +205,10 @@ Reference ...@@ -198,10 +205,10 @@ Reference
""" As print_summary, but print the difference on two different profile mode. """ As print_summary, but print the difference on two different profile mode.
TODO: Also we don't print the Apply-wise summary as it don't work for now. TODO: Also we don't print the Apply-wise summary as it don't work for now.
TODO: make comparaison with gpu code. TODO: make comparaison with gpu code.
:param other: the other instance of ProfileMode that we want to be compared to. :param other: the other instance of ProfileMode that we want to be compared to.
:param n_apply_to_print: the number of apply nodes to print. :param n_apply_to_print: the number of apply nodes to print.
The default 15, but can be configured via ``ProfileMode.n_ops_to_print`` in :envvar:`THEANO_FLAGS`. The default 15, but can be configured via ``ProfileMode.n_ops_to_print`` in :envvar:`THEANO_FLAGS`.
:param n_ops_to_print: the number of ops to print. :param n_ops_to_print: the number of ops to print.
......
...@@ -4,6 +4,10 @@ ...@@ -4,6 +4,10 @@
:mod:`utils` -- Utilities functions operating on the graph :mod:`utils` -- Utilities functions operating on the graph
========================================================== ==========================================================
.. testsetup:: *
from theano.gof.utils import *
.. module:: utils .. module:: utils
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: Utilities functions operating on the graph :synopsis: Utilities functions operating on the graph
......
...@@ -9,6 +9,10 @@ ...@@ -9,6 +9,10 @@
:synopsis: low-level automatic differentiation :synopsis: low-level automatic differentiation
.. moduleauthor:: LISA .. moduleauthor:: LISA
.. testsetup:: *
from theano.gradient import *
Symbolic gradient is usually computed from :func:`gradient.grad`, which offers a Symbolic gradient is usually computed from :func:`gradient.grad`, which offers a
more convenient syntax for the common case of wanting the gradient in some more convenient syntax for the common case of wanting the gradient in some
expressions with respect to a scalar cost. The :func:`grad_sources_inputs` expressions with respect to a scalar cost. The :func:`grad_sources_inputs`
......
...@@ -5,6 +5,10 @@ ...@@ -5,6 +5,10 @@
:mod:`misc.pkl_utils` - Tools for serialization. :mod:`misc.pkl_utils` - Tools for serialization.
================================================ ================================================
.. testsetup:: *
from theano.misc.pkl_utils import *
.. autofunction:: theano.misc.pkl_utils.dump .. autofunction:: theano.misc.pkl_utils.dump
.. autofunction:: theano.misc.pkl_utils.load .. autofunction:: theano.misc.pkl_utils.load
......
...@@ -9,6 +9,10 @@ ...@@ -9,6 +9,10 @@
:synopsis: Provides the Print Op and graph-printing routines. :synopsis: Provides the Print Op and graph-printing routines.
.. moduleauthor:: LISA .. moduleauthor:: LISA
.. testsetup::
import theano
Guide Guide
====== ======
...@@ -19,12 +23,13 @@ Intermediate values in a computation cannot be printed in ...@@ -19,12 +23,13 @@ Intermediate values in a computation cannot be printed in
the normal python way with the print statement, because Theano has no *statements*. the normal python way with the print statement, because Theano has no *statements*.
Instead there is the :class:`Print` Op. Instead there is the :class:`Print` Op.
>>> from theano import tensor as T, function, printing
>>> x = T.dvector() >>> x = T.dvector()
>>> hello_world_op = printing.Print('hello world') >>> hello_world_op = printing.Print('hello world')
>>> printed_x = hello_world_op(x) >>> printed_x = hello_world_op(x)
>>> f = function([x], printed_x) >>> f = function([x], printed_x)
>>> f([1, 2, 3]) >>> r = f([1, 2, 3])
>>> # output: "hello world __str__ = [ 1. 2. 3.]" hello world __str__ = [ 1. 2. 3.]
If you print more than one thing in a function like `f`, they will not If you print more than one thing in a function like `f`, they will not
necessarily be printed in the order that you think. The order might even depend necessarily be printed in the order that you think. The order might even depend
...@@ -46,14 +51,15 @@ Theano also provides :func:`theano.printing.pydotprint` that creates a png image ...@@ -46,14 +51,15 @@ Theano also provides :func:`theano.printing.pydotprint` that creates a png image
1) The first is :func:`theano.pp`. 1) The first is :func:`theano.pp`.
>>> from theano import pp, tensor as T
>>> x = T.dscalar('x') >>> x = T.dscalar('x')
>>> y = x ** 2 >>> y = x ** 2
>>> gy = T.grad(y, x) >>> gy = T.grad(y, x)
>>> pp(gy) # print out the gradient prior to optimization >>> pp(gy) # print out the gradient prior to optimization
'((fill((x ** 2), 1.0) * 2) * (x ** (2 - 1)))' '((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
>>> f = function([x], gy) >>> f = function([x], gy)
>>> pp(f.maker.fgraph.outputs[0]) >>> pp(f.maker.fgraph.outputs[0])
'(2.0 * x)' '(TensorConstant{2.0} * x)'
The parameter in T.dscalar('x') in the first line is the name of this variable The parameter in T.dscalar('x') in the first line is the name of this variable
in the graph. This name is used when printing the graph to make it more readable. in the graph. This name is used when printing the graph to make it more readable.
...@@ -74,8 +80,7 @@ iteration number or other kinds of information in the name. ...@@ -74,8 +80,7 @@ iteration number or other kinds of information in the name.
2) The second function to print a graph is :func:`theano.printing.debugprint` 2) The second function to print a graph is :func:`theano.printing.debugprint`
>>> theano.printing.debugprint(f.maker.fgraph.outputs[0]) # doctest: +NORMALIZE_WHITESPACE
>>> theano.printing.debugprint(f.maker.fgraph.outputs[0])
Elemwise{mul,no_inplace} [@A] '' Elemwise{mul,no_inplace} [@A] ''
|TensorConstant{2.0} [@B] |TensorConstant{2.0} [@B]
|x [@C] |x [@C]
...@@ -100,7 +105,7 @@ happen when that Variable has already been printed. Where else has it been ...@@ -100,7 +105,7 @@ happen when that Variable has already been printed. Where else has it been
printed? Look for debugprint identifier using the Find feature of your text printed? Look for debugprint identifier using the Find feature of your text
editor. editor.
>>> theano.printing.debugprint(gy) >>> theano.printing.debugprint(gy) # doctest: +NORMALIZE_WHITESPACE
Elemwise{mul} [@A] '' Elemwise{mul} [@A] ''
|Elemwise{mul} [@B] '' |Elemwise{mul} [@B] ''
| |Elemwise{second,no_inplace} [@C] '' | |Elemwise{second,no_inplace} [@C] ''
...@@ -113,10 +118,10 @@ Elemwise{mul} [@A] '' ...@@ -113,10 +118,10 @@ Elemwise{mul} [@A] ''
|x [@E] |x [@E]
|Elemwise{sub} [@I] '' |Elemwise{sub} [@I] ''
|TensorConstant{2} [@F] |TensorConstant{2} [@F]
|InplaceDimShuffle{} [@J] '' |DimShuffle{} [@J] ''
|TensorConstant{1} [@K] |TensorConstant{1} [@K]
>>> theano.printing.debugprint(gy, depth=2) >>> theano.printing.debugprint(gy, depth=2) # doctest: +NORMALIZE_WHITESPACE
Elemwise{mul} [@A] '' Elemwise{mul} [@A] ''
|Elemwise{mul} [@B] '' |Elemwise{mul} [@B] ''
|Elemwise{pow} [@C] '' |Elemwise{pow} [@C] ''
......
...@@ -35,7 +35,10 @@ happens automatically. ...@@ -35,7 +35,10 @@ happens automatically.
The equivalent Theano code would be: The equivalent Theano code would be:
.. code-block:: python .. testcode::
import theano
import theano.tensor as T
k = T.iscalar("k") k = T.iscalar("k")
A = T.vector("A") A = T.vector("A")
...@@ -57,6 +60,13 @@ The equivalent Theano code would be: ...@@ -57,6 +60,13 @@ The equivalent Theano code would be:
print power(range(10),2) print power(range(10),2)
print power(range(10),4) print power(range(10),4)
.. testoutput::
[ 0. 1. 4. 9. 16. 25. 36. 49. 64. 81.]
[ 0.00000000e+00 1.00000000e+00 1.60000000e+01 8.10000000e+01
2.56000000e+02 6.25000000e+02 1.29600000e+03 2.40100000e+03
4.09600000e+03 6.56100000e+03]
Let us go through the example line by line. What we did is first to Let us go through the example line by line. What we did is first to
construct a function (using a lambda expression) that given ``prior_result`` and construct a function (using a lambda expression) that given ``prior_result`` and
``A`` returns ``prior_result * A``. The order of parameters is fixed by scan: ``A`` returns ``prior_result * A``. The order of parameters is fixed by scan:
...@@ -88,7 +98,9 @@ The tensor(s) to be looped over should be provided to scan using the ...@@ -88,7 +98,9 @@ The tensor(s) to be looped over should be provided to scan using the
Here's an example that builds a symbolic calculation of a polynomial Here's an example that builds a symbolic calculation of a polynomial
from a list of its coefficients: from a list of its coefficients:
.. code-block:: python .. testcode::
import numpy
coefficients = theano.tensor.vector("coefficients") coefficients = theano.tensor.vector("coefficients")
x = T.scalar("x") x = T.scalar("x")
...@@ -112,6 +124,11 @@ from a list of its coefficients: ...@@ -112,6 +124,11 @@ from a list of its coefficients:
print calculate_polynomial(test_coefficients, test_value) print calculate_polynomial(test_coefficients, test_value)
print 1.0 * (3 ** 0) + 0.0 * (3 ** 1) + 2.0 * (3 ** 2) print 1.0 * (3 ** 0) + 0.0 * (3 ** 1) + 2.0 * (3 ** 2)
.. testoutput::
19.0
19.0
There are a few things to note here. There are a few things to note here.
First, we calculate the polynomial by first generating each of the coefficients, and First, we calculate the polynomial by first generating each of the coefficients, and
...@@ -142,7 +159,7 @@ pitfall to be careful of: the initial output state that is supplied, that is ...@@ -142,7 +159,7 @@ pitfall to be careful of: the initial output state that is supplied, that is
generated at each iteration and moreover, it **must not involve an implicit generated at each iteration and moreover, it **must not involve an implicit
downcast** of the latter. downcast** of the latter.
.. code-block:: python .. testcode::
import numpy as np import numpy as np
...@@ -169,9 +186,13 @@ downcast** of the latter. ...@@ -169,9 +186,13 @@ downcast** of the latter.
# test # test
some_num = 15 some_num = 15
print triangular_sequence(some_num) print(triangular_sequence(some_num))
print [n * (n + 1) // 2 for n in xrange(some_num)] print([n * (n + 1) // 2 for n in xrange(some_num)])
.. testoutput::
[ 0 1 3 6 10 15 21 28 36 45 55 66 78 91 105]
[0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105]
Another simple example Another simple example
---------------------- ----------------------
...@@ -183,7 +204,7 @@ and a "model" output array (whose shape and dtype will be mimicked), ...@@ -183,7 +204,7 @@ and a "model" output array (whose shape and dtype will be mimicked),
and produces a sequence of arrays with the shape and dtype of the model, and produces a sequence of arrays with the shape and dtype of the model,
with all values set to zero except at the provided array indices. with all values set to zero except at the provided array indices.
.. code-block:: python .. testcode::
location = T.imatrix("location") location = T.imatrix("location")
values = T.vector("values") values = T.vector("values")
...@@ -205,7 +226,21 @@ with all values set to zero except at the provided array indices. ...@@ -205,7 +226,21 @@ with all values set to zero except at the provided array indices.
test_locations = numpy.asarray([[1, 1], [2, 3]], dtype=numpy.int32) test_locations = numpy.asarray([[1, 1], [2, 3]], dtype=numpy.int32)
test_values = numpy.asarray([42, 50], dtype=numpy.float32) test_values = numpy.asarray([42, 50], dtype=numpy.float32)
test_output_model = numpy.zeros((5, 5), dtype=numpy.float32) test_output_model = numpy.zeros((5, 5), dtype=numpy.float32)
print assign_values_at_positions(test_locations, test_values, test_output_model) print(assign_values_at_positions(test_locations, test_values, test_output_model))
.. testoutput::
[[[ 0. 0. 0. 0. 0.]
[ 0. 42. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]]
[[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 50. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]]]
This demonstrates that you can introduce new Theano variables into a scan function. This demonstrates that you can introduce new Theano variables into a scan function.
...@@ -219,28 +254,39 @@ Another useful feature of scan, is that it can handle shared variables. ...@@ -219,28 +254,39 @@ Another useful feature of scan, is that it can handle shared variables.
For example, if we want to implement a Gibbs chain of length 10 we would do For example, if we want to implement a Gibbs chain of length 10 we would do
the following: the following:
.. code-block:: python .. testsetup:: scan1
W = theano.shared(W_values) # we assume that ``W_values`` contains the import theano
# initial values of your weight matrix import numpy
W_values = numpy.random.random((2, 2))
bvis_values = numpy.random.random((2,))
bhid_values = numpy.random.random((2,))
bvis = theano.shared(bvis_values) .. testcode:: scan1
bhid = theano.shared(bhid_values)
trng = T.shared_randomstreams.RandomStreams(1234) import theano
from theano import tensor as T
def OneStep(vsample) : W = theano.shared(W_values) # we assume that ``W_values`` contains the
hmean = T.nnet.sigmoid(theano.dot(vsample, W) + bhid) # initial values of your weight matrix
hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
vmean = T.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
return trng.binomial(size=vsample.shape, n=1, p=vmean,
dtype=theano.config.floatX)
sample = theano.tensor.vector() bvis = theano.shared(bvis_values)
bhid = theano.shared(bhid_values)
values, updates = theano.scan(OneStep, outputs_info=sample, n_steps=10) trng = T.shared_randomstreams.RandomStreams(1234)
gibbs10 = theano.function([sample], values[-1], updates=updates) def OneStep(vsample) :
hmean = T.nnet.sigmoid(theano.dot(vsample, W) + bhid)
hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
vmean = T.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
return trng.binomial(size=vsample.shape, n=1, p=vmean,
dtype=theano.config.floatX)
sample = theano.tensor.vector()
values, updates = theano.scan(OneStep, outputs_info=sample, n_steps=10)
gibbs10 = theano.function([sample], values[-1], updates=updates)
The first, and probably most crucial observation is that the updates The first, and probably most crucial observation is that the updates
...@@ -251,7 +297,11 @@ update dictionary to your function, you will always get the same 10 ...@@ -251,7 +297,11 @@ update dictionary to your function, you will always get the same 10
sets of random numbers. You can even use the ``updates`` dictionary sets of random numbers. You can even use the ``updates`` dictionary
afterwards. Look at this example : afterwards. Look at this example :
.. code-block:: python .. testsetup:: scan2
import theano
.. testcode:: scan2
a = theano.shared(1) a = theano.shared(1)
values, updates = theano.scan(lambda: {a: a+1}, n_steps=10) values, updates = theano.scan(lambda: {a: a+1}, n_steps=10)
...@@ -260,15 +310,22 @@ In this case the lambda expression does not require any input parameters ...@@ -260,15 +310,22 @@ In this case the lambda expression does not require any input parameters
and returns an update dictionary which tells how ``a`` should be updated and returns an update dictionary which tells how ``a`` should be updated
after each step of scan. If we write : after each step of scan. If we write :
.. code-block:: python .. testcode:: scan2
b = a + 1 b = a + 1
c = updates[a] + 1 c = updates[a] + 1
f = theano.function([], [b, c], updates=updates) f = theano.function([], [b, c], updates=updates)
print b print(b)
print c print(c)
print a.value print(a.get_value())
.. testoutput:: scan2
:hide:
Elemwise{add,no_inplace}.0
Elemwise{add,no_inplace}.0
1
We will see that because ``b`` does not use the updated version of We will see that because ``b`` does not use the updated version of
``a``, it will be 2, ``c`` will be 12, while ``a.value`` is ``11``. ``a``, it will be 2, ``c`` will be 12, while ``a.value`` is ``11``.
...@@ -289,7 +346,7 @@ execution. To pass the shared variables to Scan you need to put them in a list ...@@ -289,7 +346,7 @@ execution. To pass the shared variables to Scan you need to put them in a list
and give it to the ``non_sequences`` argument. Here is the Gibbs sampling code and give it to the ``non_sequences`` argument. Here is the Gibbs sampling code
updated: updated:
.. code-block:: python .. testcode:: scan1
W = theano.shared(W_values) # we assume that ``W_values`` contains the W = theano.shared(W_values) # we assume that ``W_values`` contains the
# initial values of your weight matrix # initial values of your weight matrix
...@@ -332,7 +389,7 @@ to be ensured by the user. Otherwise, it will result in an error. ...@@ -332,7 +389,7 @@ to be ensured by the user. Otherwise, it will result in an error.
Using the previous Gibbs sampling example: Using the previous Gibbs sampling example:
.. code-block:: python .. testcode:: scan1
# The new scan, using strict=True # The new scan, using strict=True
values, updates = theano.scan(fn=OneStep, values, updates = theano.scan(fn=OneStep,
...@@ -369,7 +426,12 @@ In this case we have a sequence over which we need to iterate ``u``, ...@@ -369,7 +426,12 @@ In this case we have a sequence over which we need to iterate ``u``,
and two outputs ``x`` and ``y``. To implement this with scan we first and two outputs ``x`` and ``y``. To implement this with scan we first
construct a function that computes one iteration step : construct a function that computes one iteration step :
.. code-block:: python .. testsetup:: scan3
import theano
from theano import tensor as T
.. testcode:: scan3
def oneStep(u_tm4, u_t, x_tm3, x_tm1, y_tm1, W, W_in_1, W_in_2, W_feedback, W_out): def oneStep(u_tm4, u_t, x_tm3, x_tm1, y_tm1, W, W_in_1, W_in_2, W_feedback, W_out):
...@@ -392,9 +454,15 @@ an order, but also variables, since this is how scan figures out what should ...@@ -392,9 +454,15 @@ an order, but also variables, since this is how scan figures out what should
be represented by what. Given that we have all be represented by what. Given that we have all
the Theano variables needed we construct our RNN as follows : the Theano variables needed we construct our RNN as follows :
.. code-block:: python .. testcode:: scan3
W = T.matrix()
W_in_1 = T.matrix()
W_in_2 = T.matrix()
W_feedback = T.matrix()
W_out = T.matrix()
u = T.matrix() # it is a sequence of vectors u = T.matrix() # it is a sequence of vectors
x0 = T.matrix() # initial state of x has to be a matrix, since x0 = T.matrix() # initial state of x has to be a matrix, since
# it has to cover x[-3] # it has to cover x[-3]
y0 = T.vector() # y0 is just a vector since scan has only to provide y0 = T.vector() # y0 is just a vector since scan has only to provide
...@@ -432,7 +500,7 @@ provided condition evaluates to True. ...@@ -432,7 +500,7 @@ provided condition evaluates to True.
For an example, we will compute all powers of two smaller then some provided For an example, we will compute all powers of two smaller then some provided
value ``max_value``. value ``max_value``.
.. code-block:: python .. testcode::
def power_of_2(previous_power, max_value): def power_of_2(previous_power, max_value):
return previous_power*2, theano.scan_module.until(previous_power*2 > max_value) return previous_power*2, theano.scan_module.until(previous_power*2 > max_value)
...@@ -446,6 +514,10 @@ value ``max_value``. ...@@ -446,6 +514,10 @@ value ``max_value``.
f = theano.function([max_value], values) f = theano.function([max_value], values)
print f(45) print f(45)
.. testoutput::
[ 2. 4. 8. 16. 32. 64.]
As you can see, in order to terminate on condition, the only thing required As you can see, in order to terminate on condition, the only thing required
is that the inner function ``power_of_2`` to return also the condition is that the inner function ``power_of_2`` to return also the condition
......
...@@ -63,23 +63,25 @@ The following example builds a matrix and returns its columns. It ...@@ -63,23 +63,25 @@ The following example builds a matrix and returns its columns. It
prints the i-th column, i.e. a list of indices in the column and their prints the i-th column, i.e. a list of indices in the column and their
corresponding value in the second list. corresponding value in the second list.
>>> import numpy as np
>>> import scipy.sparse as sp
>>> data = np.asarray([7, 8, 9]) >>> data = np.asarray([7, 8, 9])
>>> indices = np.asarray([0, 1, 2]) >>> indices = np.asarray([0, 1, 2])
>>> indptr = np.asarray([0, 2, 3, 3]) >>> indptr = np.asarray([0, 2, 3, 3])
>>> m = sp.csc_matrix((data, indices, indptr), shape=(3, 3)) >>> m = sp.csc_matrix((data, indices, indptr), shape=(3, 3))
>>> print m.toarray() >>> m.toarray()
[[7 0 0] array([[7, 0, 0],
[8 0 0] [8, 0, 0],
[0 9 0]] [0, 9, 0]])
>>> i = 0 >>> i = 0
>>> print m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]] >>> m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
[0, 1] [7, 8] (array([0, 1], dtype=int32), array([7, 8]))
>>> i = 1 >>> i = 1
>>> print m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]] >>> m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
[2] [9] (array([2], dtype=int32), array([9]))
>>> i = 2 >>> i = 2
>>> print m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]] >>> m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
[] [] (array([], dtype=int32), array([], dtype=int64))
CSR Matrix CSR Matrix
---------- ----------
...@@ -97,23 +99,25 @@ The following example builds a matrix and returns its rows. It prints ...@@ -97,23 +99,25 @@ The following example builds a matrix and returns its rows. It prints
the i-th row, i.e. a list of indices in the row and their the i-th row, i.e. a list of indices in the row and their
corresponding value in the second list. corresponding value in the second list.
>>> import numpy as np
>>> import scipy.sparse as sp
>>> data = np.asarray([7, 8, 9]) >>> data = np.asarray([7, 8, 9])
>>> indices = np.asarray([0, 1, 2]) >>> indices = np.asarray([0, 1, 2])
>>> indptr = np.asarray([0, 2, 3, 3]) >>> indptr = np.asarray([0, 2, 3, 3])
>>> m = sp.csr_matrix((data, indices, indptr), shape=(3, 3)) >>> m = sp.csr_matrix((data, indices, indptr), shape=(3, 3))
>>> print m.toarray() >>> m.toarray()
[[7 8 0] array([[7, 8, 0],
[0 0 9] [0, 0, 9],
[0 0 0]] [0, 0, 0]])
>>> i = 0 >>> i = 0
>>> print m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]] >>> m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
[0, 1] [7, 8] (array([0, 1], dtype=int32), array([7, 8]))
>>> i = 1 >>> i = 1
>>> print m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]] >>> m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
[2] [9] (array([2], dtype=int32), array([9]))
>>> i = 2 >>> i = 2
>>> print m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]] >>> m.indices[m.indptr[i]:m.indptr[i+1]], m.data[m.indptr[i]:m.indptr[i+1]]
[] [] (array([], dtype=int32), array([], dtype=int64))
List of Implemented Operations List of Implemented Operations
============================== ==============================
......
...@@ -1665,8 +1665,8 @@ Linear Algebra ...@@ -1665,8 +1665,8 @@ Linear Algebra
[0, 1, 2], [0, 1, 2],
[0, 1, 2], [0, 1, 2],
[0, 1, 2], [0, 1, 2],
[0, 1, 2]], dtype=int8) [0, 1, 2]], dtype=int8)
.. function:: ogrid .. function:: ogrid
:returns: an instance which returns an open (i.e. not fleshed out) mesh-grid :returns: an instance which returns an open (i.e. not fleshed out) mesh-grid
...@@ -1685,8 +1685,8 @@ Linear Algebra ...@@ -1685,8 +1685,8 @@ Linear Algebra
[3], [3],
[4]], dtype=int8) [4]], dtype=int8)
>>> b[1].eval() >>> b[1].eval()
array([[0, 1, 2, 3]], dtype=int8) array([[0, 1, 2]], dtype=int8)
Gradient / Differentiation Gradient / Differentiation
========================== ==========================
......
...@@ -2,6 +2,10 @@ ...@@ -2,6 +2,10 @@
:mod:`tensor.extra_ops` -- Tensor Extra Ops :mod:`tensor.extra_ops` -- Tensor Extra Ops
=================================================================== ===================================================================
.. testsetup:: *
from theano.tensor.extra_ops import *
.. module:: tensor.extra_ops .. module:: tensor.extra_ops
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: Tensor Extra Ops :synopsis: Tensor Extra Ops
......
...@@ -42,11 +42,13 @@ ...@@ -42,11 +42,13 @@
Example: Example:
.. code-block:: python .. testcode::
x,y,b = T.dvectors('x','y','b') import theano.tensor as T
x, y, b = T.dvectors('x', 'y', 'b')
W = T.dmatrix('W') W = T.dmatrix('W')
y = T.nnet.sigmoid(T.dot(W,x) + b) y = T.nnet.sigmoid(T.dot(W, x) + b)
.. note:: The underlying code will return an exact 0 or 1 if an .. note:: The underlying code will return an exact 0 or 1 if an
element of x is too small or too big. element of x is too small or too big.
...@@ -102,7 +104,7 @@ ...@@ -102,7 +104,7 @@
.. note:: The underlying code will return an exact 0 if an element of x is too small. .. note:: The underlying code will return an exact 0 if an element of x is too small.
.. code-block:: python .. testcode::
x,y,b = T.dvectors('x','y','b') x,y,b = T.dvectors('x','y','b')
W = T.dmatrix('W') W = T.dmatrix('W')
...@@ -131,7 +133,7 @@ ...@@ -131,7 +133,7 @@
Example of use: Example of use:
.. code-block:: python .. testcode::
x,y,b = T.dvectors('x','y','b') x,y,b = T.dvectors('x','y','b')
W = T.dmatrix('W') W = T.dmatrix('W')
...@@ -155,10 +157,11 @@ ...@@ -155,10 +157,11 @@
to the binary cross-entropy (note that this assumes that x will to the binary cross-entropy (note that this assumes that x will
contain values between 0 and 1): contain values between 0 and 1):
.. code-block:: python .. testcode::
x, y, b = T.dvectors('x', 'y', 'b') x, y, b, c = T.dvectors('x', 'y', 'b', 'c')
W = T.dmatrix('W') W = T.dmatrix('W')
V = T.dmatrix('V')
h = T.nnet.sigmoid(T.dot(W, x) + b) h = T.nnet.sigmoid(T.dot(W, x) + b)
x_recons = T.nnet.sigmoid(T.dot(V, h) + c) x_recons = T.nnet.sigmoid(T.dot(V, h) + c)
recon_cost = T.nnet.binary_crossentropy(x_recons, x).mean() recon_cost = T.nnet.binary_crossentropy(x_recons, x).mean()
...@@ -191,7 +194,12 @@ ...@@ -191,7 +194,12 @@
correct class (which is typically the training criterion in correct class (which is typically the training criterion in
classification settings). classification settings).
.. code-block:: python .. testsetup::
import theano
o = theano.tensor.ivector()
.. testcode::
y = T.nnet.softmax(T.dot(W, x) + b) y = T.nnet.softmax(T.dot(W, x) + b)
cost = T.nnet.categorical_crossentropy(y, o) cost = T.nnet.categorical_crossentropy(y, o)
......
...@@ -2,6 +2,10 @@ ...@@ -2,6 +2,10 @@
:mod:`tensor.utils` -- Tensor Utils :mod:`tensor.utils` -- Tensor Utils
=================================================================== ===================================================================
.. testsetup::
from theano.tensor.utils import *
.. module:: tensor.utils .. module:: tensor.utils
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: Tensor Utils :synopsis: Tensor Utils
......
...@@ -15,32 +15,29 @@ ...@@ -15,32 +15,29 @@
tensor. tensor.
This is a type that represents a list in Theano. All elements must have This is a type that represents a list in Theano. All elements must have
the same Theano type. Here is an example:: the same Theano type. Here is an example:
import theano.typed_list >>> import theano.typed_list
>>> tl = theano.typed_list.TypedListType(theano.tensor.fvector)()
tl = theano.typed_list.TypedListType(theano.tensor.fvector)() >>> v = theano.tensor.fvector()
v = theano.tensor.fvector() >>> o = theano.typed_list.append(tl, v)
o = theano.typed_list.append(tl, v) >>> f = theano.function([tl, v], o)
f = theano.function([tl, v], o) >>> f([[1, 2, 3], [4, 5]], [2])
print f([[1, 2, 3], [4, 5]], [2]) [array([ 1., 2., 3.], dtype=float32), array([ 4., 5.], dtype=float32), array([ 2.], dtype=float32)]
#[array([ 1., 2., 3.], dtype=float32), array([ 4., 5.], dtype=float32), array([ 2.], dtype=float32)]
A second example with Scan. Scan doesn't yet have direct support of A second example with Scan. Scan doesn't yet have direct support of
TypedList, so you can only use it as non_sequences (not in sequences or TypedList, so you can only use it as non_sequences (not in sequences or
as outputs):: as outputs):
import theano.typed_list >>> import theano.typed_list
>>> a = theano.typed_list.TypedListType(theano.tensor.fvector)()
a = theano.typed_list.TypedListType(theano.tensor.fvector)() >>> l = theano.typed_list.length(a)
l = theano.typed_list.length(a) >>> s, _ = theano.scan(fn=lambda i, tl: tl[i].sum(),
s, _ = theano.scan(fn=lambda i, tl: tl[i].sum(), ... non_sequences=[a],
non_sequences=[a], ... sequences=[theano.tensor.arange(l, dtype='int64')])
sequences=[theano.tensor.arange(l, dtype='int64')]) >>> f = theano.function([a], s)
>>> f([[1, 2, 3], [4, 5]])
f = theano.function([a], s) array([ 6., 9.], dtype=float32)
f([[1, 2, 3], [4, 5]])
#array([ 6., 9.], dtype=float32)
.. automodule:: theano.typed_list.basic .. automodule:: theano.typed_list.basic
:members: :members:
=================
Automatic updates
=================
.. note:
Proposed 2010 01 13
Done 2010 04 ??
The Module version of RandomStreams could arrange for the automatic update of
certain inputs (such as the random number generators) at the time of make(), so
that certain *obvious* patterns would work:
>>> rs = RandomStreams()
>>> u = rs.uniform(...)
>>> f = theano.function([], u)
>>> assert not numpy.all(f() == f())
Unfortunately, with shared variables this does not work! Function needs to be
told which shared variables to update. The current workaround is to do this:
>>> theano.function([], u, updates=rs.updates())
or this:
>>> theano.function([], u, updates=[u.update])
But it is all too easy to forget to do either of these workarounds, and
accidentally run a program whose random numbers are the same in every call.
Proposal
========
Add an optional `default_update` attribute to Shared variables. This will be
consulted by function. If no update expression is given for this variable in
the updates list, then this default will be inserted. Note well: a value of None for the
default_update means to update with a value of None! To have no default update,
make sure that the default_update attribute is not defined.
Add an optional argument to function: `no_default_updates`. This argument defaults to
False, which results in the current semantics.
A True value here would mean "ignore all default_update expressions", and this
would be useful for disabling implicit behaviour.
A list of shared variables here would mean to ignore the
default_update_expressions in these specific variables.
Alternatives
============
Consider a singleton 'NOUPDATE' object that can be used as a pseudo-expression
in the update list. This doesn't introduce a new keyword argument, which makes
it slightly more awkward to document in theano.function. Really though, I have
no strong feelings between this and the no_updates paramter.
...@@ -22,17 +22,20 @@ max. The third argument is an array into which the result can be ...@@ -22,17 +22,20 @@ max. The third argument is an array into which the result can be
written. written.
So for example: So for example:
.. code-block:: python
.. doctest::
>>> max(3, 4) :options: +SKIP
4
>>> numpy.max(3, 4) >>> import numpy
3 >>> max(3, 4)
>>> a,b,c = [numpy.asarray(i) for i in [0,1,2]] 4
>>> numpy.max(a,b,c) >>> numpy.max(3, 4) # This is an error
0 3
>>> c >>> a, b, c = [numpy.asarray(i) for i in [0, 1, 2]]
array(0) >>> numpy.max(a, b, c) # This is an error
0
>>> c
array(0)
Be careful! Be careful!
......
...@@ -63,12 +63,13 @@ if __name__ == '__main__': ...@@ -63,12 +63,13 @@ if __name__ == '__main__':
os.path.join(sys.path[0], os.pardir, os.pardir)) os.path.join(sys.path[0], os.pardir, os.pardir))
options = defaultdict(bool) options = defaultdict(bool)
options.update(dict([x, y or True] for x, y in opts, args = getopt.getopt(
getopt.getopt(sys.argv[1:], sys.argv[1:],
'o:', 'o:f:',
['epydoc', 'rst', 'help', 'nopdf', 'cache', 'test'])[0])) ['epydoc', 'rst', 'help', 'nopdf', 'cache', 'test'])
options.update(dict([x, y or True] for x, y in opts))
if options['--help']: if options['--help']:
print('Usage: %s [OPTIONS]' % sys.argv[0]) print('Usage: %s [OPTIONS] [files...]' % sys.argv[0])
print(' -o <dir>: output the html files in the specified dir') print(' -o <dir>: output the html files in the specified dir')
print(' --cache: use the doctree cache') print(' --cache: use the doctree cache')
print(' --rst: only compile the doc (requires sphinx)') print(' --rst: only compile the doc (requires sphinx)')
...@@ -77,6 +78,9 @@ if __name__ == '__main__': ...@@ -77,6 +78,9 @@ if __name__ == '__main__':
print('(requires epydoc)') print('(requires epydoc)')
print(' --test: run all the code samples in the documentaton') print(' --test: run all the code samples in the documentaton')
print(' --help: this help') print(' --help: this help')
print('If one or more files are specified after the options then only '
'those files will be built. Otherwise the whole tree is '
'processed. Specifying files will implies --cache.')
sys.exit(0) sys.exit(0)
if not (options['--epydoc'] or options['--rst'] or options['--test']): if not (options['--epydoc'] or options['--rst'] or options['--test']):
...@@ -90,6 +94,9 @@ if __name__ == '__main__': ...@@ -90,6 +94,9 @@ if __name__ == '__main__':
pass pass
outdir = options['-o'] or (throot + '/html') outdir = options['-o'] or (throot + '/html')
files = None
if len(args) != 0:
files = [os.path.abspath(f) for f in args]
mkdir(outdir) mkdir(outdir)
os.chdir(outdir) os.chdir(outdir)
...@@ -100,7 +107,6 @@ if __name__ == '__main__': ...@@ -100,7 +107,6 @@ if __name__ == '__main__':
if options['--all'] or options['--epydoc']: if options['--all'] or options['--epydoc']:
mkdir("api") mkdir("api")
sys.path[0:0] = [throot]
#Generate HTML doc #Generate HTML doc
...@@ -119,10 +125,13 @@ if __name__ == '__main__': ...@@ -119,10 +125,13 @@ if __name__ == '__main__':
import sphinx import sphinx
if extraopts is None: if extraopts is None:
extraopts = [] extraopts = []
if not options['--cache']: if not options['--cache'] and files is None:
extraopts.append('-E') extraopts.append('-E')
sphinx.main(['', '-b', builder] + extraopts + docpath = os.path.join(throot, 'doc')
[os.path.join(throot, 'doc'), workdir]) inopt = [docpath, workdir]
if files is not None:
inopt.extend(files)
sphinx.main(['', '-b', builder] + extraopts + inopt)
if options['--all'] or options['--rst']: if options['--all'] or options['--rst']:
mkdir("doc") mkdir("doc")
......
...@@ -11,9 +11,6 @@ To get us started with Theano and get a feel of what we're working with, ...@@ -11,9 +11,6 @@ To get us started with Theano and get a feel of what we're working with,
let's make a simple function: add two numbers together. Here is how you do let's make a simple function: add two numbers together. Here is how you do
it: it:
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_adding.test_adding_1
>>> import theano.tensor as T >>> import theano.tensor as T
>>> from theano import function >>> from theano import function
>>> x = T.dscalar('x') >>> x = T.dscalar('x')
...@@ -72,7 +69,7 @@ are, however, assigned the theano Type ``dscalar`` in their ``type`` ...@@ -72,7 +69,7 @@ are, however, assigned the theano Type ``dscalar`` in their ``type``
field, as you can see here: field, as you can see here:
>>> type(x) >>> type(x)
<class 'theano.tensor.basic.TensorVariable'> <class 'theano.tensor.var.TensorVariable'>
>>> x.type >>> x.type
TensorType(float64, scalar) TensorType(float64, scalar)
>>> T.dscalar >>> T.dscalar
...@@ -150,9 +147,6 @@ You might already have guessed how to do this. Indeed, the only change ...@@ -150,9 +147,6 @@ You might already have guessed how to do this. Indeed, the only change
from the previous example is that you need to instantiate *x* and from the previous example is that you need to instantiate *x* and
*y* using the matrix Types: *y* using the matrix Types:
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_adding.test_adding_2
>>> x = T.dmatrix('x') >>> x = T.dmatrix('x')
>>> y = T.dmatrix('y') >>> y = T.dmatrix('y')
>>> z = x + y >>> z = x + y
...@@ -201,14 +195,19 @@ with NumPy arrays may be found here: :ref:`tensor creation<libdoc_tensor_creatio ...@@ -201,14 +195,19 @@ with NumPy arrays may be found here: :ref:`tensor creation<libdoc_tensor_creatio
Exercise Exercise
======== ========
.. code-block:: python .. testcode::
import theano
a = theano.tensor.vector() # declare variable
out = a + a ** 10 # build symbolic expression
f = theano.function([a], out) # compile function
print(f([0, 1, 2]))
.. testoutput::
[ 0. 2. 1026.]
import theano
a = theano.tensor.vector() # declare variable
out = a + a ** 10 # build symbolic expression
f = theano.function([a], out) # compile function
print f([0, 1, 2]) # prints `array([0, 2, 1026])`
Modify and execute this code to compute this expression: a ** 2 + b ** 2 + 2 * a * b. Modify and execute this code to compute this expression: a ** 2 + b ** 2 + 2 * a * b.
......
...@@ -55,30 +55,33 @@ Borrowing when Creating Shared Variables ...@@ -55,30 +55,33 @@ Borrowing when Creating Shared Variables
A ``borrow`` argument can be provided to the shared-variable constructor. A ``borrow`` argument can be provided to the shared-variable constructor.
.. testcode:: borrow
.. If you modify this code, also change : import numpy, theano
.. theano/tests/test_tutorial.py:T_aliasing.test_aliasing_1 np_array = numpy.ones(2, dtype='float32')
.. code-block:: python s_default = theano.shared(np_array)
s_false = theano.shared(np_array, borrow=False)
import numpy, theano s_true = theano.shared(np_array, borrow=True)
np_array = numpy.ones(2, dtype='float32')
s_default = theano.shared(np_array)
s_false = theano.shared(np_array, borrow=False)
s_true = theano.shared(np_array, borrow=True)
By default (*s_default*) and when explicitly setting ``borrow=False``, the By default (*s_default*) and when explicitly setting ``borrow=False``, the
shared variable we construct gets a [deep] copy of *np_array*. So changes we shared variable we construct gets a [deep] copy of *np_array*. So changes we
subsequently make to *np_array* have no effect on our shared variable. subsequently make to *np_array* have no effect on our shared variable.
.. code-block:: python .. testcode:: borrow
np_array += 1 # now it is an array of 2.0 s np_array += 1 # now it is an array of 2.0 s
print(s_default.get_value())
print(s_false.get_value())
print(s_true.get_value())
.. testoutput:: borrow
[ 1. 1.]
[ 1. 1.]
[ 2. 2.]
s_default.get_value() # -> array([1.0, 1.0])
s_false.get_value() # -> array([1.0, 1.0])
s_true.get_value() # -> array([2.0, 2.0])
If we are running this with the CPU as the device, If we are running this with the CPU as the device,
then changes we make to *np_array* *right away* will show up in then changes we make to *np_array* *right away* will show up in
...@@ -117,15 +120,12 @@ A ``borrow`` argument can also be used to control how a ``shared`` variable's va ...@@ -117,15 +120,12 @@ A ``borrow`` argument can also be used to control how a ``shared`` variable's va
retrieved. retrieved.
.. If you modify this code, also change : .. testcode:: borrow
.. theano/tests/test_tutorial.py:T_aliasing.test_aliasing_2
.. code-block:: python s = theano.shared(np_array)
s = theano.shared(np_array) v_false = s.get_value(borrow=False) # N.B. borrow default is False
v_true = s.get_value(borrow=True)
v_false = s.get_value(borrow=False) # N.B. borrow default is False
v_true = s.get_value(borrow=True)
When ``borrow=False`` is passed to ``get_value``, it means that the return value When ``borrow=False`` is passed to ``get_value``, it means that the return value
...@@ -146,7 +146,7 @@ then you should use the ``return_internal_type=True`` argument to ...@@ -146,7 +146,7 @@ then you should use the ``return_internal_type=True`` argument to
constant time), but might return various datatypes depending on contextual constant time), but might return various datatypes depending on contextual
factors (e.g. the compute device, the dtype of the NumPy array). factors (e.g. the compute device, the dtype of the NumPy array).
.. code-block:: python .. testcode:: borrow
v_internal = s.get_value(borrow=True, return_internal_type=True) v_internal = s.get_value(borrow=True, return_internal_type=True)
...@@ -178,7 +178,12 @@ that Theano *may* reuse the buffer you provide as the internal storage for the v ...@@ -178,7 +178,12 @@ that Theano *may* reuse the buffer you provide as the internal storage for the v
A standard pattern for manually updating the value of a ``shared`` variable is as A standard pattern for manually updating the value of a ``shared`` variable is as
follows: follows:
.. code-block:: python .. testsetup:: borrow
def some_inplace_fn(v):
return v
.. testcode:: borrow
s.set_value( s.set_value(
some_inplace_fn(s.get_value(borrow=True)), some_inplace_fn(s.get_value(borrow=True)),
...@@ -224,10 +229,7 @@ Borrowing when Constructing Function Objects ...@@ -224,10 +229,7 @@ Borrowing when Constructing Function Objects
A ``borrow`` argument can also be provided to the ``In`` and ``Out`` objects A ``borrow`` argument can also be provided to the ``In`` and ``Out`` objects
that control how ``theano.function`` handles its argument[s] and return value[s]. that control how ``theano.function`` handles its argument[s] and return value[s].
.. If you modify this code, also change : .. testcode::
.. theano/tests/test_tutorial.py:T_aliasing.test_aliasing_3
.. code-block:: python
import theano, theano.tensor import theano, theano.tensor
...@@ -263,40 +265,40 @@ For GPU graphs, this borrowing can have a major speed impact. See the following ...@@ -263,40 +265,40 @@ For GPU graphs, this borrowing can have a major speed impact. See the following
.. code-block:: python .. code-block:: python
from theano import function, config, shared, sandbox, tensor, Out from theano import function, config, shared, sandbox, tensor, Out
import numpy import numpy
import time import time
vlen = 10 * 30 * 768 # 10 x # cores x # threads per core vlen = 10 * 30 * 768 # 10 x # cores x # threads per core
iters = 1000 iters = 1000
rng = numpy.random.RandomState(22) rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX)) x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f1 = function([], sandbox.cuda.basic_ops.gpu_from_host(tensor.exp(x))) f1 = function([], sandbox.cuda.basic_ops.gpu_from_host(tensor.exp(x)))
f2 = function([], f2 = function([],
Out(sandbox.cuda.basic_ops.gpu_from_host(tensor.exp(x)), Out(sandbox.cuda.basic_ops.gpu_from_host(tensor.exp(x)),
borrow=True)) borrow=True))
t0 = time.time() t0 = time.time()
for i in xrange(iters): for i in xrange(iters):
r = f1() r = f1()
t1 = time.time() t1 = time.time()
no_borrow = t1 - t0 no_borrow = t1 - t0
t0 = time.time() t0 = time.time()
for i in xrange(iters): for i in xrange(iters):
r = f2() r = f2()
t1 = time.time() t1 = time.time()
print 'Looping', iters, 'times took', no_borrow, 'seconds without borrow', print 'Looping', iters, 'times took', no_borrow, 'seconds without borrow',
print 'and', t1 - t0, 'seconds with borrow.' print 'and', t1 - t0, 'seconds with borrow.'
if numpy.any([isinstance(x.op, tensor.Elemwise) and if numpy.any([isinstance(x.op, tensor.Elemwise) and
('Gpu' not in type(x.op).__name__) ('Gpu' not in type(x.op).__name__)
for x in f1.maker.fgraph.toposort()]): for x in f1.maker.fgraph.toposort()]):
print 'Used the cpu' print 'Used the cpu'
else: else:
print 'Used the gpu' print 'Used the gpu'
Which produces this output: Which produces this output:
.. code-block:: text .. code-block:: none
$ THEANO_FLAGS=device=gpu0,floatX=float32 python test1.py $ THEANO_FLAGS=device=gpu0,floatX=float32 python test1.py
Using gpu device 0: GeForce GTX 275 Using gpu device 0: GeForce GTX 275
......
...@@ -18,49 +18,55 @@ IfElse vs Switch ...@@ -18,49 +18,55 @@ IfElse vs Switch
**Example** **Example**
.. code-block:: python .. testcode::
from theano import tensor as T from theano import tensor as T
from theano.ifelse import ifelse from theano.ifelse import ifelse
import theano, time, numpy import theano, time, numpy
a,b = T.scalars('a', 'b') a,b = T.scalars('a', 'b')
x,y = T.matrices('x', 'y') x,y = T.matrices('x', 'y')
z_switch = T.switch(T.lt(a, b), T.mean(x), T.mean(y)) z_switch = T.switch(T.lt(a, b), T.mean(x), T.mean(y))
z_lazy = ifelse(T.lt(a, b), T.mean(x), T.mean(y)) z_lazy = ifelse(T.lt(a, b), T.mean(x), T.mean(y))
f_switch = theano.function([a, b, x, y], z_switch, f_switch = theano.function([a, b, x, y], z_switch,
mode=theano.Mode(linker='vm')) mode=theano.Mode(linker='vm'))
f_lazyifelse = theano.function([a, b, x, y], z_lazy, f_lazyifelse = theano.function([a, b, x, y], z_lazy,
mode=theano.Mode(linker='vm')) mode=theano.Mode(linker='vm'))
val1 = 0. val1 = 0.
val2 = 1. val2 = 1.
big_mat1 = numpy.ones((10000, 1000)) big_mat1 = numpy.ones((10000, 1000))
big_mat2 = numpy.ones((10000, 1000)) big_mat2 = numpy.ones((10000, 1000))
n_times = 10 n_times = 10
tic = time.clock() tic = time.clock()
for i in xrange(n_times): for i in xrange(n_times):
f_switch(val1, val2, big_mat1, big_mat2) f_switch(val1, val2, big_mat1, big_mat2)
print 'time spent evaluating both values %f sec' % (time.clock() - tic) print 'time spent evaluating both values %f sec' % (time.clock() - tic)
tic = time.clock() tic = time.clock()
for i in xrange(n_times): for i in xrange(n_times):
f_lazyifelse(val1, val2, big_mat1, big_mat2) f_lazyifelse(val1, val2, big_mat1, big_mat2)
print 'time spent evaluating one value %f sec' % (time.clock() - tic) print 'time spent evaluating one value %f sec' % (time.clock() - tic)
.. testoutput::
:hide:
:options: +ELLIPSIS
time spent evaluating both values ... sec
time spent evaluating one value ... sec
In this example, the ``IfElse`` op spends less time (about half as much) than ``Switch`` In this example, the ``IfElse`` op spends less time (about half as much) than ``Switch``
since it computes only one variable out of the two. since it computes only one variable out of the two.
.. code-block:: python .. code-block:: none
>>> python ifelse_switch.py
time spent evaluating both values 0.6700 sec
time spent evaluating one value 0.3500 sec
$ python ifelse_switch.py
time spent evaluating both values 0.6700 sec
time spent evaluating one value 0.3500 sec
Unless ``linker='vm'`` or ``linker='cvm'`` are used, ``ifelse`` will compute both Unless ``linker='vm'`` or ``linker='cvm'`` are used, ``ifelse`` will compute both
variables and take the same computation time as ``switch``. Although the linker variables and take the same computation time as ``switch``. Although the linker
......
...@@ -23,7 +23,7 @@ Interpreting Error Messages ...@@ -23,7 +23,7 @@ Interpreting Error Messages
Even in its default configuration, Theano tries to display useful error Even in its default configuration, Theano tries to display useful error
messages. Consider the following faulty code. messages. Consider the following faulty code.
.. code-block:: python .. testcode::
import numpy as np import numpy as np
import theano import theano
...@@ -38,24 +38,20 @@ messages. Consider the following faulty code. ...@@ -38,24 +38,20 @@ messages. Consider the following faulty code.
Running the code above we see: Running the code above we see:
.. code-block:: bash .. testoutput::
:options: +ELLIPSIS
Traceback (most recent call last): Traceback (most recent call last):
File "test0.py", line 10, in <module> ...
f(np.ones((2,)), np.ones((3,))) ValueError: Input dimension mis-match. (input[0].shape[0] = 3, input[1].shape[0] = 2)
File "/PATH_TO_THEANO/theano/compile/function_module.py", line 605, in __call__ Apply node that caused the error: Elemwise{add,no_inplace}(<TensorType(float64, vector)>, <TensorType(float64, vector)>, <TensorType(float64, vector)>)
self.fn.thunks[self.fn.position_of_error]) Inputs types: [TensorType(float64, vector), TensorType(float64, vector), TensorType(float64, vector)]
File "/PATH_TO_THEANO/theano/compile/function_module.py", line 595, in __call__ Inputs shapes: [(3,), (2,), (2,)]
outputs = self.fn() Inputs strides: [(8,), (8,), (8,)]
ValueError: Input dimension mis-match. (input[0].shape[0] = 3, input[1].shape[0] = 2) Inputs scalar values: ['not scalar', 'not scalar', 'not scalar']
Apply node that caused the error: Elemwise{add,no_inplace}(<TensorType(float64, vector)>, <TensorType(float64, vector)>, <TensorType(float64, vector)>)
Inputs types: [TensorType(float64, vector), TensorType(float64, vector), TensorType(float64, vector)]
Inputs shapes: [(3,), (2,), (2,)]
Inputs strides: [(8,), (8,), (8,)]
Inputs scalar values: ['not scalar', 'not scalar', 'not scalar']
HINT: Re-running with most Theano optimization disabled could give you a back-traces when this node was created. This can be done with by setting the Theano flags 'optimizer=fast_compile'. If that does not work, Theano optimization can be disabled with 'optimizer=None'. HINT: Re-running with most Theano optimization disabled could give you a back-traces when this node was created. This can be done with by setting the Theano flags 'optimizer=fast_compile'. If that does not work, Theano optimization can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint of this apply node. HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint of this apply node.
Arguably the most useful information is approximately half-way through Arguably the most useful information is approximately half-way through
the error message, where the kind of error is displayed along with its the error message, where the kind of error is displayed along with its
...@@ -71,7 +67,7 @@ the faulty line, while ``exception_verbosity=high`` will display a ...@@ -71,7 +67,7 @@ the faulty line, while ``exception_verbosity=high`` will display a
debugprint of the apply node. Using these hints, the end of the error debugprint of the apply node. Using these hints, the end of the error
message becomes : message becomes :
.. code-block:: bash .. code-block:: none
Backtrace when the node is created: Backtrace when the node is created:
File "test0.py", line 8, in <module> File "test0.py", line 8, in <module>
...@@ -101,7 +97,7 @@ following example. Here, we use ``exception_verbosity=high`` and ...@@ -101,7 +97,7 @@ following example. Here, we use ``exception_verbosity=high`` and
``optimizer=None`` would and it could therefore be used instead of test values. ``optimizer=None`` would and it could therefore be used instead of test values.
.. code-block:: python .. testcode:: testvalue
import numpy import numpy
import theano import theano
...@@ -137,7 +133,7 @@ following example. Here, we use ``exception_verbosity=high`` and ...@@ -137,7 +133,7 @@ following example. Here, we use ``exception_verbosity=high`` and
Running the above code generates the following error message: Running the above code generates the following error message:
.. code-block:: bash .. testoutput:: testvalue
Traceback (most recent call last): Traceback (most recent call last):
File "test1.py", line 31, in <module> File "test1.py", line 31, in <module>
...@@ -185,7 +181,7 @@ of error can thus be identified with much more precision and much earlier in ...@@ -185,7 +181,7 @@ of error can thus be identified with much more precision and much earlier in
the compilation pipeline. For example, running the above code yields the the compilation pipeline. For example, running the above code yields the
following error message, which properly identifies *line 24* as the culprit. following error message, which properly identifies *line 24* as the culprit.
.. code-block:: bash .. code-block:: node
Traceback (most recent call last): Traceback (most recent call last):
File "test2.py", line 24, in <module> File "test2.py", line 24, in <module>
...@@ -228,7 +224,10 @@ The ``compute_test_value`` mechanism works as follows: ...@@ -228,7 +224,10 @@ The ``compute_test_value`` mechanism works as follows:
Theano provides a 'Print' op to do this. Theano provides a 'Print' op to do this.
.. code-block:: python .. testcode::
import numpy
import theano
x = theano.tensor.dvector('x') x = theano.tensor.dvector('x')
...@@ -243,6 +242,9 @@ Theano provides a 'Print' op to do this. ...@@ -243,6 +242,9 @@ Theano provides a 'Print' op to do this.
#this runs the graph with the message, and value printed #this runs the graph with the message, and value printed
assert numpy.all( f_with_print([1, 2, 3]) == [5, 10, 15]) assert numpy.all( f_with_print([1, 2, 3]) == [5, 10, 15])
.. testoutput::
this is a very important value __str__ = [ 1. 2. 3.]
Since Theano runs your program in a topological order, you won't have precise Since Theano runs your program in a topological order, you won't have precise
control over the order in which multiple ``Print()`` ops are evaluted. For a more control over the order in which multiple ``Print()`` ops are evaluted. For a more
...@@ -324,7 +326,7 @@ You can use ``MonitorMode`` to inspect the inputs and outputs of each ...@@ -324,7 +326,7 @@ You can use ``MonitorMode`` to inspect the inputs and outputs of each
node being executed when the function is called. The code snipped below node being executed when the function is called. The code snipped below
shows how to print all inputs and outputs: shows how to print all inputs and outputs:
.. code-block:: python .. testcode::
import theano import theano
...@@ -341,8 +343,9 @@ shows how to print all inputs and outputs: ...@@ -341,8 +343,9 @@ shows how to print all inputs and outputs:
post_func=inspect_outputs)) post_func=inspect_outputs))
f(3) f(3)
# The code will print the following: .. testoutput::
# 0 Elemwise{mul,no_inplace}(TensorConstant{5.0}, x) input(s) value(s): [array(5.0), array(3.0)] output(s) value(s): [array(15.0)]
0 Elemwise{mul,no_inplace}(TensorConstant{5.0}, x) input(s) value(s): [array(5.0), array(3.0)] output(s) value(s): [array(15.0)]
When using these ``inspect_inputs`` and ``inspect_outputs`` functions When using these ``inspect_inputs`` and ``inspect_outputs`` functions
with ``MonitorMode``, you should see [potentially a lot of] printed output. with ``MonitorMode``, you should see [potentially a lot of] printed output.
...@@ -357,7 +360,7 @@ position, or only if a particular value showed up in one of the inputs or output ...@@ -357,7 +360,7 @@ position, or only if a particular value showed up in one of the inputs or output
A typical example is to detect when NaN values are added into computations, which A typical example is to detect when NaN values are added into computations, which
can be achieved as follows: can be achieved as follows:
.. code-block:: python .. testcode:: compiled
import numpy import numpy
...@@ -385,12 +388,14 @@ can be achieved as follows: ...@@ -385,12 +388,14 @@ can be achieved as follows:
post_func=detect_nan)) post_func=detect_nan))
f(0) # log(0) * 0 = -inf * 0 = NaN f(0) # log(0) * 0 = -inf * 0 = NaN
# The code above will print: .. testoutput:: compiled
# *** NaN detected *** :options: +NORMALIZE_WHITESPACE
# Elemwise{Composite{[mul(log(i0), i0)]}} [@A] ''
# |x [@B] *** NaN detected ***
# Inputs : [array(0.0)] Elemwise{Composite{(log(i0) * i0)}} [@A] ''
# Outputs: [array(nan)] |x [@B]
Inputs : [array(0.0)]
Outputs: [array(nan)]
To help understand what is happening in your graph, you can To help understand what is happening in your graph, you can
disable the ``local_elemwise_fusion`` and all ``inplace`` disable the ``local_elemwise_fusion`` and all ``inplace``
...@@ -402,12 +407,12 @@ will not be able to see the input that was overwriten in the ``post_func`` ...@@ -402,12 +407,12 @@ will not be able to see the input that was overwriten in the ``post_func``
function. To disable those optimizations (with a Theano version after function. To disable those optimizations (with a Theano version after
0.6rc3), define the MonitorMode like this: 0.6rc3), define the MonitorMode like this:
.. code-block:: python .. testcode:: compiled
mode = theano.compile.MonitorMode(post_func=detect_nan).excluding( mode = theano.compile.MonitorMode(post_func=detect_nan).excluding(
'local_elemwise_fusion', 'inplace) 'local_elemwise_fusion', 'inplace')
f = theano.function([x], [theano.tensor.log(x) * x], f = theano.function([x], [theano.tensor.log(x) * x],
mode=mode) mode=mode)
.. note:: .. note::
...@@ -422,12 +427,11 @@ the execution of the node can garbage collect its inputs that aren't ...@@ -422,12 +427,11 @@ the execution of the node can garbage collect its inputs that aren't
needed anymore by the Theano function. This can be done with the Theano needed anymore by the Theano function. This can be done with the Theano
flag: flag:
.. code-block:: cfg .. code-block:: python
allow_gc=False allow_gc=False
.. TODO: documentation for link.WrapLinkerMany .. TODO: documentation for link.WrapLinkerMany
...@@ -443,28 +447,49 @@ functions. ...@@ -443,28 +447,49 @@ functions.
Consider this example script ("ex.py"): Consider this example script ("ex.py"):
.. code-block:: python .. testcode::
import theano
import numpy
import theano.tensor as T
a = T.dmatrix('a')
b = T.dmatrix('b')
import theano f = theano.function([a, b], [a * b])
import numpy
import theano.tensor as T
a = T.dmatrix('a') # matrices chosen so dimensions are unsuitable for multiplication
b = T.dmatrix('b') mat1 = numpy.arange(12).reshape((3, 4))
mat2 = numpy.arange(25).reshape((5, 5))
f = theano.function([a, b], [a * b]) f(mat1, mat2)
# matrices chosen so dimensions are unsuitable for multiplication .. testoutput::
mat1 = numpy.arange(12).reshape((3, 4)) :hide:
mat2 = numpy.arange(25).reshape((5, 5)) :options: +ELLIPSIS
f(mat1, mat2) Traceback (most recent call last):
...
ValueError: Input dimension mis-match. (input[0].shape[0] = 3, input[1].shape[0] = 5)
Apply node that caused the error: Elemwise{mul,no_inplace}(a, b)
Toposort index: 0
Inputs types: [TensorType(float64, matrix), TensorType(float64, matrix)]
Inputs shapes: [(3, 4), (5, 5)]
Inputs strides: [(32, 8), (40, 8)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [['output']]
Backtrace when the node is created:
File "<doctest default[0]>", line 8, in <module>
f = theano.function([a, b], [a * b])
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
This is actually so simple the debugging could be done easily, but it's for This is actually so simple the debugging could be done easily, but it's for
illustrative purposes. As the matrices can't be multiplied element-wise illustrative purposes. As the matrices can't be multiplied element-wise
(unsuitable shapes), we get the following exception: (unsuitable shapes), we get the following exception:
.. code-block:: text .. code-block:: none
File "ex.py", line 14, in <module> File "ex.py", line 14, in <module>
f(mat1, mat2) f(mat1, mat2)
......
...@@ -40,9 +40,11 @@ Well, what you do is this: ...@@ -40,9 +40,11 @@ Well, what you do is this:
.. If you modify this code, also change : .. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_examples.test_examples_1 .. theano/tests/test_tutorial.py:T_examples.test_examples_1
>>> import theano
>>> import theano.tensor as T
>>> x = T.dmatrix('x') >>> x = T.dmatrix('x')
>>> s = 1 / (1 + T.exp(-x)) >>> s = 1 / (1 + T.exp(-x))
>>> logistic = function([x], s) >>> logistic = theano.function([x], s)
>>> logistic([[0, 1], [-1, -2]]) >>> logistic([[0, 1], [-1, -2]])
array([[ 0.5 , 0.73105858], array([[ 0.5 , 0.73105858],
[ 0.26894142, 0.11920292]]) [ 0.26894142, 0.11920292]])
...@@ -63,7 +65,7 @@ We can verify that this alternate form produces the same values: ...@@ -63,7 +65,7 @@ We can verify that this alternate form produces the same values:
.. theano/tests/test_tutorial.py:T_examples.test_examples_2 .. theano/tests/test_tutorial.py:T_examples.test_examples_2
>>> s2 = (1 + T.tanh(x / 2)) / 2 >>> s2 = (1 + T.tanh(x / 2)) / 2
>>> logistic2 = function([x], s2) >>> logistic2 = theano.function([x], s2)
>>> logistic2([[0, 1], [-1, -2]]) >>> logistic2([[0, 1], [-1, -2]])
array([[ 0.5 , 0.73105858], array([[ 0.5 , 0.73105858],
[ 0.26894142, 0.11920292]]) [ 0.26894142, 0.11920292]])
...@@ -83,7 +85,7 @@ squared difference between two matrices *a* and *b* at the same time: ...@@ -83,7 +85,7 @@ squared difference between two matrices *a* and *b* at the same time:
>>> diff = a - b >>> diff = a - b
>>> abs_diff = abs(diff) >>> abs_diff = abs(diff)
>>> diff_squared = diff**2 >>> diff_squared = diff**2
>>> f = function([a, b], [diff, abs_diff, diff_squared]) >>> f = theano.function([a, b], [diff, abs_diff, diff_squared])
.. note:: .. note::
`dmatrices` produces as many outputs as names that you provide. It is a `dmatrices` produces as many outputs as names that you provide. It is a
...@@ -95,11 +97,9 @@ was reformatted for readability): ...@@ -95,11 +97,9 @@ was reformatted for readability):
>>> f([[1, 1], [1, 1]], [[0, 1], [2, 3]]) >>> f([[1, 1], [1, 1]], [[0, 1], [2, 3]])
[array([[ 1., 0.], [array([[ 1., 0.],
[-1., -2.]]), [-1., -2.]]), array([[ 1., 0.],
array([[ 1., 0.], [ 1., 2.]]), array([[ 1., 0.],
[ 1., 2.]]), [ 1., 4.]])]
array([[ 1., 0.],
[ 1., 4.]])]
Setting a Default Value for an Argument Setting a Default Value for an Argument
...@@ -113,6 +113,7 @@ one. You can do it like this: ...@@ -113,6 +113,7 @@ one. You can do it like this:
.. theano/tests/test_tutorial.py:T_examples.test_examples_6 .. theano/tests/test_tutorial.py:T_examples.test_examples_6
>>> from theano import Param >>> from theano import Param
>>> from theano import function
>>> x, y = T.dscalars('x', 'y') >>> x, y = T.dscalars('x', 'y')
>>> z = x + y >>> z = x + y
>>> f = function([x, Param(y, default=1)], z) >>> f = function([x, Param(y, default=1)], z)
...@@ -257,8 +258,7 @@ for the purpose of one particular function. ...@@ -257,8 +258,7 @@ for the purpose of one particular function.
>>> # The type of foo must match the shared variable we are replacing >>> # The type of foo must match the shared variable we are replacing
>>> # with the ``givens`` >>> # with the ``givens``
>>> foo = T.scalar(dtype=state.dtype) >>> foo = T.scalar(dtype=state.dtype)
>>> skip_shared = function([inc, foo], fn_of_state, >>> skip_shared = function([inc, foo], fn_of_state, givens=[(state, foo)])
givens=[(state, foo)])
>>> skip_shared(1, 3) # we're using 3 for the state, not state.value >>> skip_shared(1, 3) # we're using 3 for the state, not state.value
array(7) array(7)
>>> state.get_value() # old state still there, but we didn't use it >>> state.get_value() # old state still there, but we didn't use it
...@@ -311,7 +311,7 @@ Here's a brief example. The setup code is: ...@@ -311,7 +311,7 @@ Here's a brief example. The setup code is:
.. If you modify this code, also change : .. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_examples.test_examples_9 .. theano/tests/test_tutorial.py:T_examples.test_examples_9
.. code-block:: python .. testcode::
from theano.tensor.shared_randomstreams import RandomStreams from theano.tensor.shared_randomstreams import RandomStreams
from theano import function from theano import function
...@@ -382,6 +382,8 @@ For example: ...@@ -382,6 +382,8 @@ For example:
>>> state_after_v0 = rv_u.rng.get_value().get_state() >>> state_after_v0 = rv_u.rng.get_value().get_state()
>>> nearly_zeros() # this affects rv_u's generator >>> nearly_zeros() # this affects rv_u's generator
array([[ 0., 0.],
[ 0., 0.]])
>>> v1 = f() >>> v1 = f()
>>> rng = rv_u.rng.get_value(borrow=True) >>> rng = rv_u.rng.get_value(borrow=True)
>>> rng.set_state(state_after_v0) >>> rng.set_state(state_after_v0)
...@@ -410,53 +412,46 @@ corresponding to the random number generation process (i.e. RandomFunction{unifo ...@@ -410,53 +412,46 @@ corresponding to the random number generation process (i.e. RandomFunction{unifo
An example of how "random states" can be transferred from one theano function An example of how "random states" can be transferred from one theano function
to another is shown below. to another is shown below.
.. code-block:: python >>> from __future__ import print_function
>>> import theano
>>> import numpy
>>> import theano.tensor as T
>>> from theano.sandbox.rng_mrg import MRG_RandomStreams
>>> from theano.tensor.shared_randomstreams import RandomStreams
import theano >>> class Graph():
import numpy ... def __init__(self, seed=123):
import theano.tensor as T ... self.rng = RandomStreams(seed)
from theano.sandbox.rng_mrg import MRG_RandomStreams ... self.y = self.rng.uniform(size=(1,))
from theano.tensor.shared_randomstreams import RandomStreams
class Graph():
def __init__(self, seed=123):
self.rng = RandomStreams(seed)
self.y = self.rng.uniform(size=(1,))
g1 = Graph(seed=123)
f1 = theano.function([], g1.y)
g2 = Graph(seed=987)
f2 = theano.function([], g2.y)
print 'By default, the two functions are out of sync.' >>> g1 = Graph(seed=123)
print 'f1() returns ', f1() >>> f1 = theano.function([], g1.y)
print 'f2() returns ', f2()
def copy_random_state(g1, g2): >>> g2 = Graph(seed=987)
if isinstance(g1.rng, MRG_RandomStreams): >>> f2 = theano.function([], g2.y)
g2.rng.rstate = g1.rng.rstate
for (su1, su2) in zip(g1.rng.state_updates, g2.rng.state_updates):
su2[0].set_value(su1[0].get_value())
print 'We now copy the state of the theano random number generators.' >>> # By default, the two functions are out of sync.
copy_random_state(g1, g2) >>> f1()
print 'f1() returns ', f1() array([ 0.72803009])
print 'f2() returns ', f2() >>> f2()
array([ 0.55056769])
This gives the following output: >>> def copy_random_state(g1, g2):
... if isinstance(g1.rng, MRG_RandomStreams):
... g2.rng.rstate = g1.rng.rstate
... for (su1, su2) in zip(g1.rng.state_updates, g2.rng.state_updates):
... su2[0].set_value(su1[0].get_value())
.. code-block:: bash >>> # We now copy the state of the theano random number generators.
>>> copy_random_state(g1, g2)
>>> f1()
array([ 0.59044123])
>>> f2()
array([ 0.59044123])
# By default, the two functions are out of sync.
f1() returns [ 0.72803009]
f2() returns [ 0.55056769]
# We now copy the state of the theano random number generators.
f1() returns [ 0.59044123]
f2() returns [ 0.59044123]
Other Random Distributions Other Random Distributions
--------------------------- --------------------------
There are :ref:`other distributions implemented <libdoc_tensor_raw_random>`. There are :ref:`other distributions implemented <libdoc_tensor_raw_random>`.
...@@ -487,50 +482,65 @@ A Real Example: Logistic Regression ...@@ -487,50 +482,65 @@ A Real Example: Logistic Regression
The preceding elements are featured in this more realistic example. The preceding elements are featured in this more realistic example.
It will be used repeatedly. It will be used repeatedly.
.. code-block:: python .. testcode::
import numpy import numpy
import theano import theano
import theano.tensor as T import theano.tensor as T
rng = numpy.random rng = numpy.random
N = 400 N = 400
feats = 784 feats = 784
D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2)) D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
training_steps = 10000 training_steps = 10000
# Declare Theano symbolic variables # Declare Theano symbolic variables
x = T.dmatrix("x") x = T.matrix("x")
y = T.dvector("y") y = T.vector("y")
w = theano.shared(rng.randn(feats), name="w") w = theano.shared(rng.randn(feats), name="w")
b = theano.shared(0., name="b") b = theano.shared(0., name="b")
print "Initial model:" print("Initial model:")
print w.get_value(), b.get_value() print(w.get_value())
print(b.get_value())
# Construct Theano expression graph
p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b)) # Probability that target = 1 # Construct Theano expression graph
prediction = p_1 > 0.5 # The prediction thresholded p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b)) # Probability that target = 1
xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function prediction = p_1 > 0.5 # The prediction thresholded
cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
gw, gb = T.grad(cost, [w, b]) # Compute the gradient of the cost cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize
# (we shall return to this in a gw, gb = T.grad(cost, [w, b]) # Compute the gradient of the cost
# following section of this tutorial) # (we shall return to this in a
# following section of this tutorial)
# Compile
train = theano.function( # Compile
inputs=[x,y], train = theano.function(
outputs=[prediction, xent], inputs=[x,y],
updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb))) outputs=[prediction, xent],
predict = theano.function(inputs=[x], outputs=prediction) updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)))
predict = theano.function(inputs=[x], outputs=prediction)
# Train
for i in range(training_steps): # Train
pred, err = train(D[0], D[1]) for i in range(training_steps):
pred, err = train(D[0], D[1])
print "Final model:"
print w.get_value(), b.get_value() print("Final model:")
print "target values for D:", D[1] print(w.get_value())
print "prediction on D:", predict(D[0]) print(b.get_value())
print("target values for D:")
print(D[1])
print("prediction on D:")
print(predict(D[0]))
.. testoutput::
:hide:
:options: +ELLIPSIS
Initial model:
...
0.0
Final model:
...
target values for D:
...
prediction on D:
...
...@@ -56,7 +56,7 @@ This section provides an overview of the methods you typically have to implement ...@@ -56,7 +56,7 @@ This section provides an overview of the methods you typically have to implement
possibilities you may encounter or need. For that refer to possibilities you may encounter or need. For that refer to
:ref:`op_contract`. :ref:`op_contract`.
.. code-block:: python .. testcode::
import theano import theano
...@@ -73,9 +73,9 @@ possibilities you may encounter or need. For that refer to ...@@ -73,9 +73,9 @@ possibilities you may encounter or need. For that refer to
# Other type of implementation # Other type of implementation
# C implementation: [see theano web site for other functions] # C implementation: [see theano web site for other functions]
def c_code(...): def c_code(self, node, inputs, outputs, sub):
# ...
pass pass
# Other implementations (pycuda, ...): # Other implementations (pycuda, ...):
def make_thunk(self, node, storage_map, _, _2): def make_thunk(self, node, storage_map, _, _2):
pass pass
...@@ -83,7 +83,7 @@ possibilities you may encounter or need. For that refer to ...@@ -83,7 +83,7 @@ possibilities you may encounter or need. For that refer to
# optional: # optional:
check_input = True check_input = True
def __init__(self, ...): def __init__(self, *args):
pass pass
def grad(self, inputs, g): def grad(self, inputs, g):
...@@ -92,7 +92,7 @@ possibilities you may encounter or need. For that refer to ...@@ -92,7 +92,7 @@ possibilities you may encounter or need. For that refer to
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
pass pass
def infer_shape(node, (i0_shapes, ...)): def infer_shape(node, input_shapes):
pass pass
.. ../extending/op.txt .. ../extending/op.txt
...@@ -250,7 +250,7 @@ Other methods can be optionally defined by the op. ...@@ -250,7 +250,7 @@ Other methods can be optionally defined by the op.
Op Example Op Example
========== ==========
.. code-block:: python .. testcode:: example
import theano import theano
...@@ -288,7 +288,7 @@ Op Example ...@@ -288,7 +288,7 @@ Op Example
You can try it as follows: You can try it as follows:
.. code-block:: python .. testcode:: example
x = theano.tensor.matrix() x = theano.tensor.matrix()
f = theano.function([x], DoubleOp()(x)) f = theano.function([x], DoubleOp()(x))
...@@ -296,8 +296,28 @@ You can try it as follows: ...@@ -296,8 +296,28 @@ You can try it as follows:
inp = numpy.random.rand(5, 4) inp = numpy.random.rand(5, 4)
out = f(inp) out = f(inp)
assert numpy.allclose(inp * 2, out) assert numpy.allclose(inp * 2, out)
print inp print(inp)
print out print(out)
.. testoutput:: example
:hide:
:options: +ELLIPSIS
...
...
.. code-block:: none
[[ 0.02443785 0.67833979 0.91954769 0.95444365]
[ 0.60853382 0.7770539 0.78163219 0.92838837]
[ 0.04427765 0.37895602 0.23155797 0.4934699 ]
[ 0.20551517 0.7419955 0.34500905 0.49347629]
[ 0.24082769 0.49321452 0.24566545 0.15351132]]
[[ 0.04887571 1.35667957 1.83909538 1.90888731]
[ 1.21706764 1.55410779 1.56326439 1.85677674]
[ 0.08855531 0.75791203 0.46311594 0.9869398 ]
[ 0.41103034 1.48399101 0.69001811 0.98695258]
[ 0.48165539 0.98642904 0.4913309 0.30702264]]
Example for properties of a Op Example for properties of a Op
...@@ -310,7 +330,7 @@ We create an Op that takes a variable ``x`` and returns ``a*x+b``. ...@@ -310,7 +330,7 @@ We create an Op that takes a variable ``x`` and returns ``a*x+b``.
We want to say that two such ops are equal when their values of ``a`` We want to say that two such ops are equal when their values of ``a``
and ``b`` are equal. and ``b`` are equal.
.. code-block:: python .. testcode:: properties
import theano import theano
...@@ -349,7 +369,7 @@ It also generates a default :func:`__str__` method that prints the attribute nam ...@@ -349,7 +369,7 @@ It also generates a default :func:`__str__` method that prints the attribute nam
We can test this by running the following segment: We can test this by running the following segment:
.. code-block:: python .. testcode:: properties
mult4plus5op = AXPBOp(4, 5) mult4plus5op = AXPBOp(4, 5)
another_mult4plus5op = AXPBOp(4, 5) another_mult4plus5op = AXPBOp(4, 5)
...@@ -383,7 +403,10 @@ returns the right answer. If you detect an error, you must raise an ...@@ -383,7 +403,10 @@ returns the right answer. If you detect an error, you must raise an
*exception*. You can use the ``assert`` keyword to automatically raise an *exception*. You can use the ``assert`` keyword to automatically raise an
``AssertionError``. ``AssertionError``.
.. code-block:: python .. testcode:: tests
import numpy
import theano
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano import config from theano import config
...@@ -439,7 +462,7 @@ square matrices will not detect the problem. This is why the ...@@ -439,7 +462,7 @@ square matrices will not detect the problem. This is why the
your op works only with such matrices, you can disable the warning with the your op works only with such matrices, you can disable the warning with the
``warn=False`` parameter. ``warn=False`` parameter.
.. code-block:: python .. testcode:: tests
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano import config from theano import config
...@@ -468,7 +491,7 @@ If there is an error, the function raises an exception. If you want to ...@@ -468,7 +491,7 @@ If there is an error, the function raises an exception. If you want to
see it fail, you can implement an incorrect gradient (for instance, by removing see it fail, you can implement an incorrect gradient (for instance, by removing
the multiplication by 2). the multiplication by 2).
.. code-block:: python .. testcode:: tests
def test_grad(self): def test_grad(self):
theano.tests.unittest_tools.verify_grad(self.op, theano.tests.unittest_tools.verify_grad(self.op,
...@@ -486,7 +509,7 @@ implementation of the Rop method of a particular op. ...@@ -486,7 +509,7 @@ implementation of the Rop method of a particular op.
For instance, to verify the Rop method of the DoubleOp, you can use this: For instance, to verify the Rop method of the DoubleOp, you can use this:
.. code-block:: python .. testcode:: tests
import numpy import numpy
import theano.tests import theano.tests
...@@ -562,7 +585,7 @@ of the file containing a specific test of interest and run the ...@@ -562,7 +585,7 @@ of the file containing a specific test of interest and run the
file. In this example, the test *test_DoubleRop* in the class file. In this example, the test *test_DoubleRop* in the class
*test_double_op* would be performed. *test_double_op* would be performed.
.. code-block:: python .. testcode:: tests
if __name__ == '__main__': if __name__ == '__main__':
t = test_DoubleRop("test_double_rop") t = test_DoubleRop("test_double_rop")
...@@ -572,7 +595,7 @@ file. In this example, the test *test_DoubleRop* in the class ...@@ -572,7 +595,7 @@ file. In this example, the test *test_DoubleRop* in the class
We recommend that when we execute a file, we run all tests in that We recommend that when we execute a file, we run all tests in that
file. This can be done by adding this at the end of your test files: file. This can be done by adding this at the end of your test files:
.. code-block:: python .. testcode:: tests
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -638,10 +661,11 @@ signature: ...@@ -638,10 +661,11 @@ signature:
as_op Example as_op Example
------------- -------------
.. code-block:: python .. testcode:: asop
import theano import theano
import numpy import numpy
from theano import function
from theano.compile.ops import as_op from theano.compile.ops import as_op
def infer_shape_numpy_dot(node, input_shapes): def infer_shape_numpy_dot(node, input_shapes):
...@@ -655,13 +679,13 @@ as_op Example ...@@ -655,13 +679,13 @@ as_op Example
You can try it as follows: You can try it as follows:
.. code-block:: python .. testcode:: asop
x = theano.tensor.fmatrix() x = theano.tensor.fmatrix()
y = theano.tensor.fmatrix() y = theano.tensor.fmatrix()
f = function([x, y], numpy_dot(x, y)) f = function([x, y], numpy_dot(x, y))
inp1 = numpy.random.rand(5, 4) inp1 = numpy.random.rand(5, 4).astype('float32')
inp2 = numpy.random.rand(4, 7) inp2 = numpy.random.rand(4, 7).astype('float32')
out = f(inp1, inp2) out = f(inp1, inp2)
...@@ -701,27 +725,27 @@ the documentation. ...@@ -701,27 +725,27 @@ the documentation.
Here is an example how to add docstring to a class. Here is an example how to add docstring to a class.
.. code-block:: python .. testcode::
import theano import theano
class DoubleOp(theano.Op): class DoubleOp(theano.Op):
""" Double each element of a tensor. """ Double each element of a tensor.
:param x: input tensor. :param x: input tensor.
:return: a tensor of the same shape and dtype as the input with all :return: a tensor of the same shape and dtype as the input with all
values doubled. values doubled.
:note: :note:
this is a test note this is a test note
:seealso: :seealso:
You can use the elemwise op to replace this example. You can use the elemwise op to replace this example.
Just execute `x * 2` with x being a Theano variable. Just execute `x * 2` with x being a Theano variable.
.. versionadded:: 0.6 .. versionadded:: 0.6
""" """
This is how it will show up for files that we auto-list in the library This is how it will show up for files that we auto-list in the library
documentation: documentation:
......
...@@ -129,7 +129,7 @@ that the data is not only contiguous in memory but also that it is organized ...@@ -129,7 +129,7 @@ that the data is not only contiguous in memory but also that it is organized
such that the index of the latest dimension changes the fastest. If the such that the index of the latest dimension changes the fastest. If the
following array following array
.. code-block:: python .. testcode::
x = [[1, 2, 3], x = [[1, 2, 3],
[4, 5, 6]] [4, 5, 6]]
...@@ -337,7 +337,7 @@ commonly used. ...@@ -337,7 +337,7 @@ commonly used.
of C code that you should include in your C code (after ensuring that a of C code that you should include in your C code (after ensuring that a
Python exception is set) if it needs to raise an exception. Ex: Python exception is set) if it needs to raise an exception. Ex:
.. code-block:: python .. code-block:: c
c_code = """ c_code = """
PyErr_Format(PyExc_ValueError, "X does not have the right value"); PyErr_Format(PyExc_ValueError, "X does not have the right value");
...@@ -354,7 +354,7 @@ commonly used. ...@@ -354,7 +354,7 @@ commonly used.
``%`` characters in the format characters need to be escaped since the C ``%`` characters in the format characters need to be escaped since the C
code itself is defined in a string which undergoes string formatting. code itself is defined in a string which undergoes string formatting.
.. code-block:: python .. code-block:: c
c_code = """ c_code = """
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
...@@ -440,7 +440,7 @@ need to validate that the output storage has been allocated and has the same ...@@ -440,7 +440,7 @@ need to validate that the output storage has been allocated and has the same
shape as our vector input. If it is not the case, we allocate a new output shape as our vector input. If it is not the case, we allocate a new output
storage with the right shape and number of dimensions. storage with the right shape and number of dimensions.
.. code-block:: python .. testcode:: examples
import numpy import numpy
import theano import theano
...@@ -565,7 +565,7 @@ the inputs (because of the upcast in the method ``make_node()``), the typenum ...@@ -565,7 +565,7 @@ the inputs (because of the upcast in the method ``make_node()``), the typenum
of the output has to be obtained in the Python code and then included in the of the output has to be obtained in the Python code and then included in the
C code. C code.
.. code-block:: python .. testcode:: examples
class VectorTimesVector(gof.Op): class VectorTimesVector(gof.Op):
__props__ = () __props__ = ()
...@@ -705,7 +705,7 @@ implemented using the ``COp`` class. ...@@ -705,7 +705,7 @@ implemented using the ``COp`` class.
The new op is defined inside a Python file with the following code : The new op is defined inside a Python file with the following code :
.. code-block:: python .. testcode::
import theano import theano
from theano import gof from theano import gof
...@@ -917,8 +917,8 @@ In addition to these macros, the ``init_code_struct``, ``code``, and ...@@ -917,8 +917,8 @@ In addition to these macros, the ``init_code_struct``, ``code``, and
.. code-block:: c .. code-block:: c
if (error) { if (error) {
// Set python exception // Set python exception
FAIL FAIL
} }
You can add a semicolon after the macro if it makes your editor You can add a semicolon after the macro if it makes your editor
......
...@@ -21,36 +21,50 @@ should be written: ...@@ -21,36 +21,50 @@ should be written:
Defining a shared variable for the lookup table Defining a shared variable for the lookup table
>>> lookup_table = theano.shared(matrix_ndarray). .. code-block:: python
lookup_table = theano.shared(matrix_ndarray)
Getting a subset of the table (some rows or some columns) by passing Getting a subset of the table (some rows or some columns) by passing
an integer vector of indices corresponding to those rows or columns. an integer vector of indices corresponding to those rows or columns.
>>> subset = lookup_table[vector_of_indices] .. code-block:: python
subset = lookup_table[vector_of_indices]
From now on, use only 'subset'. Do not call lookup_table[vector_of_indices] From now on, use only 'subset'. Do not call lookup_table[vector_of_indices]
again. This causes problems with grad as this will create new variables. again. This causes problems with grad as this will create new variables.
Defining cost which depends only on subset and not the entire lookup_table Defining cost which depends only on subset and not the entire lookup_table
>>> cost = something that depends on subset .. code-block:: python
>>> g = theano.grad(cost, subset)
cost = something that depends on subset
g = theano.grad(cost, subset)
There are two ways for updating the parameters: There are two ways for updating the parameters:
Either use inc_subtensor or set_subtensor. It is recommended to use Either use inc_subtensor or set_subtensor. It is recommended to use
inc_subtensor. Some theano optimizations do the conversion between inc_subtensor. Some theano optimizations do the conversion between
the two functions, but not in all cases. the two functions, but not in all cases.
>>> updates = inc_subtensor(subset, g*lr) .. code-block:: python
updates = inc_subtensor(subset, g*lr)
OR OR
>>> updates = set_subtensor(subset, subset + g*lr)
.. code-block:: python
updates = set_subtensor(subset, subset + g*lr)
Currently we just cover the case here, Currently we just cover the case here,
not if you use inc_subtensor or set_subtensor with other types of indexing. not if you use inc_subtensor or set_subtensor with other types of indexing.
Defining the theano function Defining the theano function
>>> f=theano.function(..., updates=updates) .. code-block:: python
f = theano.function(..., updates=updates)
Note that you can compute the gradient of the cost function w.r.t. Note that you can compute the gradient of the cost function w.r.t.
the entire lookup_table, and the gradient will have nonzero rows only the entire lookup_table, and the gradient will have nonzero rows only
......
...@@ -23,17 +23,19 @@ Here is the code to compute this gradient: ...@@ -23,17 +23,19 @@ Here is the code to compute this gradient:
.. If you modify this code, also change : .. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_examples.test_examples_4 .. theano/tests/test_tutorial.py:T_examples.test_examples_4
>>> from theano import pp >>> import theano
>>> import theano.tensor as T
>>> from theano import pp
>>> x = T.dscalar('x') >>> x = T.dscalar('x')
>>> y = x ** 2 >>> y = x ** 2
>>> gy = T.grad(y, x) >>> gy = T.grad(y, x)
>>> pp(gy) # print out the gradient prior to optimization >>> pp(gy) # print out the gradient prior to optimization
'((fill((x ** 2), 1.0) * 2) * (x ** (2 - 1)))' '((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
>>> f = function([x], gy) >>> f = theano.function([x], gy)
>>> f(4) >>> f(4)
array(8.0) array(8.0)
>>> f(94.2) >>> f(94.2)
array(188.40000000000001) array(188.4)
In this example, we can see from ``pp(gy)`` that we are computing In this example, we can see from ``pp(gy)`` that we are computing
the correct symbolic gradient. the correct symbolic gradient.
...@@ -44,7 +46,7 @@ the correct symbolic gradient. ...@@ -44,7 +46,7 @@ the correct symbolic gradient.
The optimizer simplifies the symbolic gradient expression. You can see The optimizer simplifies the symbolic gradient expression. You can see
this by digging inside the internal properties of the compiled function. this by digging inside the internal properties of the compiled function.
.. code-block:: python .. testcode::
pp(f.maker.fgraph.outputs[0]) pp(f.maker.fgraph.outputs[0])
'(2.0 * x)' '(2.0 * x)'
...@@ -68,7 +70,7 @@ logistic is: :math:`ds(x)/dx = s(x) \cdot (1 - s(x))`. ...@@ -68,7 +70,7 @@ logistic is: :math:`ds(x)/dx = s(x) \cdot (1 - s(x))`.
>>> x = T.dmatrix('x') >>> x = T.dmatrix('x')
>>> s = T.sum(1 / (1 + T.exp(-x))) >>> s = T.sum(1 / (1 + T.exp(-x)))
>>> gs = T.grad(s, x) >>> gs = T.grad(s, x)
>>> dlogistic = function([x], gs) >>> dlogistic = theano.function([x], gs)
>>> dlogistic([[0, 1], [-1, -2]]) >>> dlogistic([[0, 1], [-1, -2]])
array([[ 0.25 , 0.19661193], array([[ 0.25 , 0.19661193],
[ 0.19661193, 0.10499359]]) [ 0.19661193, 0.10499359]])
...@@ -117,10 +119,12 @@ do is to loop over the entries in *y* and compute the gradient of ...@@ -117,10 +119,12 @@ do is to loop over the entries in *y* and compute the gradient of
effort is being done for improving the performance of ``scan``. We effort is being done for improving the performance of ``scan``. We
shall return to :ref:`scan<tutloop>` later in this tutorial. shall return to :ref:`scan<tutloop>` later in this tutorial.
>>> import theano
>>> import theano.tensor as T
>>> x = T.dvector('x') >>> x = T.dvector('x')
>>> y = x ** 2 >>> y = x ** 2
>>> J, updates = theano.scan(lambda i, y,x : T.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y,x]) >>> J, updates = theano.scan(lambda i, y,x : T.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y,x])
>>> f = function([x], J, updates=updates) >>> f = theano.function([x], J, updates=updates)
>>> f([4, 4]) >>> f([4, 4])
array([[ 8., 0.], array([[ 8., 0.],
[ 0., 8.]]) [ 0., 8.]])
...@@ -154,13 +158,12 @@ difference is that now, instead of computing the Jacobian of some expression ...@@ -154,13 +158,12 @@ difference is that now, instead of computing the Jacobian of some expression
*y*, we compute the Jacobian of ``T.grad(cost,x)``, where *cost* is some *y*, we compute the Jacobian of ``T.grad(cost,x)``, where *cost* is some
scalar. scalar.
>>> x = T.dvector('x') >>> x = T.dvector('x')
>>> y = x ** 2 >>> y = x ** 2
>>> cost = y.sum() >>> cost = y.sum()
>>> gy = T.grad(cost, x) >>> gy = T.grad(cost, x)
>>> H, updates = theano.scan(lambda i, gy,x : T.grad(gy[i], x), sequences=T.arange(gy.shape[0]), non_sequences=[gy, x]) >>> H, updates = theano.scan(lambda i, gy,x : T.grad(gy[i], x), sequences=T.arange(gy.shape[0]), non_sequences=[gy, x])
>>> f = function([x], H, updates=updates) >>> f = theano.function([x], H, updates=updates)
>>> f([4, 4]) >>> f([4, 4])
array([[ 2., 0.], array([[ 2., 0.],
[ 0., 2.]]) [ 0., 2.]])
...@@ -196,7 +199,6 @@ form of the operation. In order to evaluate the *R-operation* of ...@@ -196,7 +199,6 @@ form of the operation. In order to evaluate the *R-operation* of
expression *y*, with respect to *x*, multiplying the Jacobian with *v* expression *y*, with respect to *x*, multiplying the Jacobian with *v*
you need to do something similar to this: you need to do something similar to this:
>>> W = T.dmatrix('W') >>> W = T.dmatrix('W')
>>> V = T.dmatrix('V') >>> V = T.dmatrix('V')
>>> x = T.dvector('x') >>> x = T.dvector('x')
...@@ -247,7 +249,6 @@ Hessian matrix, you have two options that will ...@@ -247,7 +249,6 @@ Hessian matrix, you have two options that will
give you the same result, though these options might exhibit differing performances. give you the same result, though these options might exhibit differing performances.
Hence, we suggest profiling the methods before using either one of the two: Hence, we suggest profiling the methods before using either one of the two:
>>> x = T.dvector('x') >>> x = T.dvector('x')
>>> v = T.dvector('v') >>> v = T.dvector('v')
>>> y = T.sum(x ** 2) >>> y = T.sum(x ** 2)
......
...@@ -38,6 +38,10 @@ The two modules ``pickle`` and ``cPickle`` have the same functionalities, but ...@@ -38,6 +38,10 @@ The two modules ``pickle`` and ``cPickle`` have the same functionalities, but
You can serialize (or *save*, or *pickle*) objects to a file with You can serialize (or *save*, or *pickle*) objects to a file with
``cPickle.dump``: ``cPickle.dump``:
.. testsetup::
my_obj = object()
>>> f = file('obj.save', 'wb') >>> f = file('obj.save', 'wb')
>>> cPickle.dump(my_obj, f, protocol=cPickle.HIGHEST_PROTOCOL) >>> cPickle.dump(my_obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
>>> f.close() >>> f.close()
...@@ -64,9 +68,15 @@ To de-serialize (or *load*, or *unpickle*) a pickled file, use ...@@ -64,9 +68,15 @@ To de-serialize (or *load*, or *unpickle*) a pickled file, use
You can pickle several objects into the same file, and load them all (in the You can pickle several objects into the same file, and load them all (in the
same order): same order):
.. testsetup::
obj1 = object()
obj2 = object()
obj3 = object()
>>> f = file('objects.save', 'wb') >>> f = file('objects.save', 'wb')
>>> for obj in [obj1, obj2, obj3]: >>> for obj in [obj1, obj2, obj3]:
>>> cPickle.dump(obj, f, protocol=cPickle.HIGHEST_PROTOCOL) ... cPickle.dump(obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
>>> f.close() >>> f.close()
Then: Then:
...@@ -74,7 +84,7 @@ Then: ...@@ -74,7 +84,7 @@ Then:
>>> f = file('objects.save', 'rb') >>> f = file('objects.save', 'rb')
>>> loaded_objects = [] >>> loaded_objects = []
>>> for i in range(3): >>> for i in range(3):
>>> loaded_objects.append(cPickle.load(f)) ... loaded_objects.append(cPickle.load(f))
>>> f.close() >>> f.close()
For more details about pickle's usage, see For more details about pickle's usage, see
...@@ -102,7 +112,7 @@ along every instance of your model. ...@@ -102,7 +112,7 @@ along every instance of your model.
For instance, you can define functions along the lines of: For instance, you can define functions along the lines of:
.. code-block:: python .. testcode::
def __getstate__(self): def __getstate__(self):
state = dict(self.__dict__) state = dict(self.__dict__)
...@@ -129,6 +139,7 @@ just load the parameters manually with `numpy`. ...@@ -129,6 +139,7 @@ just load the parameters manually with `numpy`.
.. code-block:: python .. code-block:: python
import numpy
numpy.load('model.zip') numpy.load('model.zip')
This approach could be beneficial if you are sharing your model with people who This approach could be beneficial if you are sharing your model with people who
...@@ -153,7 +164,7 @@ don't. ...@@ -153,7 +164,7 @@ don't.
For instance, if the only parameters you want to save are a weight For instance, if the only parameters you want to save are a weight
matrix *W* and a bias *b*, you can define: matrix *W* and a bias *b*, you can define:
.. code-block:: python .. testcode::
def __getstate__(self): def __getstate__(self):
return (self.W, self.b) return (self.W, self.b)
...@@ -167,7 +178,7 @@ If at some point in time *W* is renamed to *weights* and *b* to ...@@ -167,7 +178,7 @@ If at some point in time *W* is renamed to *weights* and *b* to
*bias*, the older pickled files will still be usable, if you update these *bias*, the older pickled files will still be usable, if you update these
functions to reflect the change in name: functions to reflect the change in name:
.. code-block:: python .. testcode::
def __getstate__(self): def __getstate__(self):
return (self.weights, self.bias) return (self.weights, self.bias)
......
...@@ -26,7 +26,7 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -26,7 +26,7 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
**Scan Example: Computing tanh(x(t).dot(W) + b) elementwise** **Scan Example: Computing tanh(x(t).dot(W) + b) elementwise**
.. code-block:: python .. testcode::
import theano import theano
import theano.tensor as T import theano.tensor as T
...@@ -46,15 +46,21 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -46,15 +46,21 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
b = np.ones((2), dtype=theano.config.floatX) b = np.ones((2), dtype=theano.config.floatX)
b[1] = 2 b[1] = 2
print compute_elementwise(x, w, b)[0] print(compute_elementwise(x, w, b)[0])
# comparison with numpy # comparison with numpy
print np.tanh(x.dot(w) + b) print(np.tanh(x.dot(w) + b))
.. testoutput::
[[ 0.96402758 0.99505475]
[ 0.96402758 0.99505475]]
[[ 0.96402758 0.99505475]
[ 0.96402758 0.99505475]]
**Scan Example: Computing the sequence x(t) = tanh(x(t - 1).dot(W) + y(t).dot(U) + p(T - t).dot(V))** **Scan Example: Computing the sequence x(t) = tanh(x(t - 1).dot(W) + y(t).dot(U) + p(T - t).dot(V))**
.. code-block:: python .. testcode::
import theano import theano
import theano.tensor as T import theano.tensor as T
...@@ -84,18 +90,31 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -84,18 +90,31 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
p[0, :] = 3 p[0, :] = 3
v = np.ones((2, 2), dtype=theano.config.floatX) v = np.ones((2, 2), dtype=theano.config.floatX)
print compute_seq(x, w, y, u, p, v)[0] print(compute_seq(x, w, y, u, p, v)[0])
# comparison with numpy # comparison with numpy
x_res = np.zeros((5, 2), dtype=theano.config.floatX) x_res = np.zeros((5, 2), dtype=theano.config.floatX)
x_res[0] = np.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v)) x_res[0] = np.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
for i in range(1, 5): for i in range(1, 5):
x_res[i] = np.tanh(x_res[i - 1].dot(w) + y[i].dot(u) + p[4-i].dot(v)) x_res[i] = np.tanh(x_res[i - 1].dot(w) + y[i].dot(u) + p[4-i].dot(v))
print x_res print(x_res)
.. testoutput::
[[-0.99505475 -0.99505475]
[ 0.96471973 0.96471973]
[ 0.99998585 0.99998585]
[ 0.99998771 0.99998771]
[ 1. 1. ]]
[[-0.99505475 -0.99505475]
[ 0.96471973 0.96471973]
[ 0.99998585 0.99998585]
[ 0.99998771 0.99998771]
[ 1. 1. ]]
**Scan Example: Computing norms of lines of X** **Scan Example: Computing norms of lines of X**
.. code-block:: python .. testcode::
import theano import theano
import theano.tensor as T import theano.tensor as T
...@@ -108,14 +127,19 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -108,14 +127,19 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
# test value # test value
x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1) x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
print compute_norm_lines(x)[0] print(compute_norm_lines(x)[0])
# comparison with numpy # comparison with numpy
print np.sqrt((x ** 2).sum(1)) print(np.sqrt((x ** 2).sum(1)))
.. testoutput::
[ 1. 2. 3. 4. 5. 0.]
[ 1. 2. 3. 4. 5. 0.]
**Scan Example: Computing norms of columns of X** **Scan Example: Computing norms of columns of X**
.. code-block:: python .. testcode::
import theano import theano
import theano.tensor as T import theano.tensor as T
...@@ -128,14 +152,19 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -128,14 +152,19 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
# test value # test value
x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1) x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
print compute_norm_cols(x)[0] print(compute_norm_cols(x)[0])
# comparison with numpy # comparison with numpy
print np.sqrt((x ** 2).sum(0)) print(np.sqrt((x ** 2).sum(0)))
.. testoutput::
[ 0. 1. 2. 3. 4. 5.]
[ 0. 1. 2. 3. 4. 5.]
**Scan Example: Computing trace of X** **Scan Example: Computing trace of X**
.. code-block:: python .. testcode::
import theano import theano
import theano.tensor as T import theano.tensor as T
...@@ -153,14 +182,20 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -153,14 +182,20 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
# test value # test value
x = np.eye(5, dtype=theano.config.floatX) x = np.eye(5, dtype=theano.config.floatX)
x[0] = np.arange(5, dtype=theano.config.floatX) x[0] = np.arange(5, dtype=theano.config.floatX)
print compute_trace(x)[0] print(compute_trace(x)[0])
# comparison with numpy # comparison with numpy
print np.diagonal(x).sum() print(np.diagonal(x).sum())
.. testoutput::
4.0
4.0
**Scan Example: Computing the sequence x(t) = x(t - 2).dot(U) + x(t - 1).dot(V) + tanh(x(t - 1).dot(W) + b)** **Scan Example: Computing the sequence x(t) = x(t - 2).dot(U) + x(t - 1).dot(V) + tanh(x(t - 1).dot(W) + b)**
.. code-block:: python .. testcode::
import theano import theano
import theano.tensor as T import theano.tensor as T
...@@ -187,7 +222,7 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -187,7 +222,7 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
n = 10 n = 10
b = np.ones((2), dtype=theano.config.floatX) b = np.ones((2), dtype=theano.config.floatX)
print compute_seq2(x, u, v, w, b, n) print(compute_seq2(x, u, v, w, b, n))
# comparison with numpy # comparison with numpy
x_res = np.zeros((10, 2)) x_res = np.zeros((10, 2))
...@@ -197,11 +232,35 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -197,11 +232,35 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
for i in range(2, 10): for i in range(2, 10):
x_res[i] = (x_res[i - 2].dot(u) + x_res[i - 1].dot(v) + x_res[i] = (x_res[i - 2].dot(u) + x_res[i - 1].dot(v) +
np.tanh(x_res[i - 1].dot(w) + b)) np.tanh(x_res[i - 1].dot(w) + b))
print x_res print(x_res)
.. testoutput::
[array([[ 1.40514825, 1.40514825],
[ 2.88898899, 2.38898899],
[ 4.34018291, 4.34018291],
[ 6.53463142, 6.78463142],
[ 9.82972243, 9.82972243],
[ 14.22203814, 14.09703814],
[ 20.07439936, 20.07439936],
[ 28.12291843, 28.18541843],
[ 39.1913681 , 39.1913681 ],
[ 54.28407732, 54.25282732]])]
[[ 1.40514825 1.40514825]
[ 2.88898899 2.38898899]
[ 4.34018291 4.34018291]
[ 6.53463142 6.78463142]
[ 9.82972243 9.82972243]
[ 14.22203814 14.09703814]
[ 20.07439936 20.07439936]
[ 28.12291843 28.18541843]
[ 39.1913681 39.1913681 ]
[ 54.28407732 54.25282732]]
**Scan Example: Computing the Jacobian of y = tanh(v.dot(A)) wrt x** **Scan Example: Computing the Jacobian of y = tanh(v.dot(A)) wrt x**
.. code-block:: python .. testcode::
import theano import theano
import theano.tensor as T import theano.tensor as T
...@@ -221,13 +280,22 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`. ...@@ -221,13 +280,22 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
print compute_jac_t(w, x)[0] print compute_jac_t(w, x)[0]
# compare with numpy # compare with numpy
print ((1 - np.tanh(x.dot(w)) ** 2) * w).T print(((1 - np.tanh(x.dot(w)) ** 2) * w).T)
.. testoutput::
[[ 0.41997434 0. 0.41997434 0. 0. ]
[ 0. 1. 1. 0. 0. ]
[ 0. 0. 1. 0. 0. ]]
[[ 0.41997434 0. 0.41997434 0. 0. ]
[ 0. 1. 1. 0. 0. ]
[ 0. 0. 1. 0. 0. ]]
Note that we need to iterate over the indices of ``y`` and not over the elements of ``y``. The reason is that scan create a placeholder variable for its internal function and this placeholder variable does not have the same dependencies than the variables that will replace it. Note that we need to iterate over the indices of ``y`` and not over the elements of ``y``. The reason is that scan create a placeholder variable for its internal function and this placeholder variable does not have the same dependencies than the variables that will replace it.
**Scan Example: Accumulate number of loop during a scan** **Scan Example: Accumulate number of loop during a scan**
.. code-block:: python .. testcode::
import theano import theano
import theano.tensor as T import theano.tensor as T
...@@ -246,7 +314,7 @@ Note that we need to iterate over the indices of ``y`` and not over the elements ...@@ -246,7 +314,7 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
**Scan Example: Computing tanh(v.dot(W) + b) * d where d is binomial** **Scan Example: Computing tanh(v.dot(W) + b) * d where d is binomial**
.. code-block:: python .. testcode::
import theano import theano
import theano.tensor as T import theano.tensor as T
...@@ -268,13 +336,26 @@ Note that we need to iterate over the indices of ``y`` and not over the elements ...@@ -268,13 +336,26 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
w = np.ones((2, 2), dtype=theano.config.floatX) w = np.ones((2, 2), dtype=theano.config.floatX)
b = np.ones((2), dtype=theano.config.floatX) b = np.ones((2), dtype=theano.config.floatX)
print compute_with_bnoise(x, w, b) print(compute_with_bnoise(x, w, b))
.. testoutput::
[array([[ 0.96402758, 0. ],
[ 0. , 0.96402758],
[ 0. , 0. ],
[ 0.76159416, 0.76159416],
[ 0.76159416, 0. ],
[ 0. , 0.76159416],
[ 0. , 0.76159416],
[ 0. , 0.76159416],
[ 0. , 0. ],
[ 0.76159416, 0.76159416]])]
Note that if you want to use a random variable ``d`` that will not be updated through scan loops, you should pass this variable as a ``non_sequences`` arguments. Note that if you want to use a random variable ``d`` that will not be updated through scan loops, you should pass this variable as a ``non_sequences`` arguments.
**Scan Example: Computing pow(A, k)** **Scan Example: Computing pow(A, k)**
.. code-block:: python .. testcode::
import theano import theano
import theano.tensor as T import theano.tensor as T
...@@ -298,13 +379,16 @@ Note that if you want to use a random variable ``d`` that will not be updated th ...@@ -298,13 +379,16 @@ Note that if you want to use a random variable ``d`` that will not be updated th
power = theano.function(inputs=[A, k], outputs=final_result, power = theano.function(inputs=[A, k], outputs=final_result,
updates=updates) updates=updates)
print power(range(10), 2) print(power(range(10), 2))
#[ 0. 1. 4. 9. 16. 25. 36. 49. 64. 81.]
.. testoutput::
[ 0. 1. 4. 9. 16. 25. 36. 49. 64. 81.]
**Scan Example: Calculating a Polynomial** **Scan Example: Calculating a Polynomial**
.. code-block:: python .. testcode::
import numpy import numpy
import theano import theano
...@@ -329,7 +413,10 @@ Note that if you want to use a random variable ``d`` that will not be updated th ...@@ -329,7 +413,10 @@ Note that if you want to use a random variable ``d`` that will not be updated th
test_coeff = numpy.asarray([1, 0, 2], dtype=numpy.float32) test_coeff = numpy.asarray([1, 0, 2], dtype=numpy.float32)
print calculate_polynomial(test_coeff, 3) print calculate_polynomial(test_coeff, 3)
# 19.0
.. testoutput::
19.0
......
...@@ -43,7 +43,7 @@ Exercise ...@@ -43,7 +43,7 @@ Exercise
Consider the logistic regression: Consider the logistic regression:
.. code-block:: python .. testcode::
import numpy import numpy
import theano import theano
...@@ -63,8 +63,6 @@ Consider the logistic regression: ...@@ -63,8 +63,6 @@ Consider the logistic regression:
b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b") b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
x.tag.test_value = D[0] x.tag.test_value = D[0]
y.tag.test_value = D[1] y.tag.test_value = D[1]
#print "Initial model:"
#print w.get_value(), b.get_value()
# Construct Theano expression graph # Construct Theano expression graph
p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one
...@@ -77,33 +75,40 @@ Consider the logistic regression: ...@@ -77,33 +75,40 @@ Consider the logistic regression:
train = theano.function( train = theano.function(
inputs=[x,y], inputs=[x,y],
outputs=[prediction, xent], outputs=[prediction, xent],
updates={w:w-0.01*gw, b:b-0.01*gb}, updates=[(w, w-0.01*gw), (b, b-0.01*gb)],
name = "train") name = "train")
predict = theano.function(inputs=[x], outputs=prediction, predict = theano.function(inputs=[x], outputs=prediction,
name = "predict") name = "predict")
if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in
train.maker.fgraph.toposort()]): train.maker.fgraph.toposort()]):
print 'Used the cpu' print('Used the cpu')
elif any([x.op.__class__.__name__ in ['GpuGemm', 'GpuGemv'] for x in elif any([x.op.__class__.__name__ in ['GpuGemm', 'GpuGemv'] for x in
train.maker.fgraph.toposort()]): train.maker.fgraph.toposort()]):
print 'Used the gpu' print('Used the gpu')
else: else:
print 'ERROR, not able to tell if theano used the cpu or the gpu' print('ERROR, not able to tell if theano used the cpu or the gpu')
print train.maker.fgraph.toposort() print(train.maker.fgraph.toposort())
for i in range(training_steps): for i in range(training_steps):
pred, err = train(D[0], D[1]) pred, err = train(D[0], D[1])
#print "Final model:"
#print w.get_value(), b.get_value()
print "target values for D" print("target values for D")
print D[1] print(D[1])
print("prediction on D")
print(predict(D[0]))
print "prediction on D" .. testoutput::
print predict(D[0]) :hide:
:options: +ELLIPSIS
Used the cpu
target values for D
...
prediction on D
...
Modify and execute this example to run on CPU (the default) with floatX=float32 and Modify and execute this example to run on CPU (the default) with floatX=float32 and
time the execution using the command line ``time python file.py``. Save your code time the execution using the command line ``time python file.py``. Save your code
as it will be useful later on. as it will be useful later on.
...@@ -215,7 +220,7 @@ cluster!). ...@@ -215,7 +220,7 @@ cluster!).
DebugMode is used as follows: DebugMode is used as follows:
.. code-block:: python .. testcode::
x = T.dvector('x') x = T.dvector('x')
...@@ -296,8 +301,9 @@ Compiling your Graph with ProfileMode ...@@ -296,8 +301,9 @@ Compiling your Graph with ProfileMode
Once the ProfileMode instance is created, simply compile your graph as you Once the ProfileMode instance is created, simply compile your graph as you
would normally, by specifying the mode parameter. would normally, by specifying the mode parameter.
>>> # with functions >>> v1, v2 = T.vectors(2)
>>> f = theano.function([input1,input2],[output1], mode=profmode) >>> o = v1 + v2
>>> f = theano.function([v1,v2],[o], mode=profmode)
Retrieving Timing Information Retrieving Timing Information
----------------------------- -----------------------------
......
.. _numpy: .. _numpy:
.. testsetup::
import numpy
*************** ***************
NumPy refresher NumPy refresher
...@@ -59,7 +62,7 @@ compatible shapes. The example below shows an instance of ...@@ -59,7 +62,7 @@ compatible shapes. The example below shows an instance of
>>> a = numpy.asarray([1.0, 2.0, 3.0]) >>> a = numpy.asarray([1.0, 2.0, 3.0])
>>> b = 2.0 >>> b = 2.0
>>> a * b >>> a * b
array([2., 4., 6.]) array([ 2., 4., 6.])
The smaller array ``b`` (actually a scalar here, which works like a 0-d array) in this case is *broadcasted* to the same size The smaller array ``b`` (actually a scalar here, which works like a 0-d array) in this case is *broadcasted* to the same size
as ``a`` during the multiplication. This trick is often useful in as ``a`` during the multiplication. This trick is often useful in
......
...@@ -67,40 +67,39 @@ Debug Print ...@@ -67,40 +67,39 @@ Debug Print
The pre-compilation graph: The pre-compilation graph:
>>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE >>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE
Elemwise{gt,no_inplace} [@A] '' Elemwise{gt,no_inplace} [@A] ''
|Elemwise{true_div,no_inplace} [@B] '' |Elemwise{true_div,no_inplace} [@B] ''
| |DimShuffle{x} [@C] '' | |DimShuffle{x} [@C] ''
| | |TensorConstant{1} [@D] | | |TensorConstant{1} [@D]
| |Elemwise{add,no_inplace} [@E] '' | |Elemwise{add,no_inplace} [@E] ''
| |DimShuffle{x} [@F] '' | |DimShuffle{x} [@F] ''
| | |TensorConstant{1} [@D] | | |TensorConstant{1} [@D]
| |Elemwise{exp,no_inplace} [@G] '' | |Elemwise{exp,no_inplace} [@G] ''
| |Elemwise{sub,no_inplace} [@H] '' | |Elemwise{sub,no_inplace} [@H] ''
| |Elemwise{neg,no_inplace} [@I] '' | |Elemwise{neg,no_inplace} [@I] ''
| | |dot [@J] '' | | |dot [@J] ''
| | |x [@K] | | |x [@K]
| | |w [@L] | | |w [@L]
| |DimShuffle{x} [@M] '' | |DimShuffle{x} [@M] ''
| |b [@N] | |b [@N]
|DimShuffle{x} [@O] '' |DimShuffle{x} [@O] ''
|TensorConstant{0.5} [@P] |TensorConstant{0.5} [@P]
The post-compilation graph: The post-compilation graph:
>>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE >>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE
Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] '' 4 Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] '' 4
|CGemv{inplace} [@B] '' 3 |CGemv{inplace} [@B] '' 3
| |Alloc [@C] '' 2 | |AllocEmpty{dtype='float64'} [@C] '' 2
| | |TensorConstant{0.0} [@D] | | |Shape_i{0} [@D] '' 1
| | |Shape_i{0} [@E] '' 1 | | |x [@E]
| | |x [@F] | |TensorConstant{1.0} [@F]
| |TensorConstant{1.0} [@G] | |x [@E]
| |x [@F] | |w [@G]
| |w [@H] | |TensorConstant{0.0} [@H]
| |TensorConstant{0.0} [@D] |InplaceDimShuffle{x} [@I] '' 0
|InplaceDimShuffle{x} [@I] '' 0 | |b [@J]
| |b [@J] |TensorConstant{(1,) of 0.5} [@K]
|TensorConstant{(1,) of 0.5} [@K]
Picture Printing of Graphs Picture Printing of Graphs
...@@ -108,7 +107,7 @@ Picture Printing of Graphs ...@@ -108,7 +107,7 @@ Picture Printing of Graphs
The pre-compilation graph: The pre-compilation graph:
>>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True) >>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True) # doctest: +SKIP
The output file is available at pics/logreg_pydotprint_prediction.png The output file is available at pics/logreg_pydotprint_prediction.png
.. image:: ./pics/logreg_pydotprint_prediction.png .. image:: ./pics/logreg_pydotprint_prediction.png
...@@ -116,7 +115,7 @@ The output file is available at pics/logreg_pydotprint_prediction.png ...@@ -116,7 +115,7 @@ The output file is available at pics/logreg_pydotprint_prediction.png
The post-compilation graph: The post-compilation graph:
>>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True) >>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True) # doctest: +SKIP
The output file is available at pics/logreg_pydotprint_predict.png The output file is available at pics/logreg_pydotprint_predict.png
.. image:: ./pics/logreg_pydotprint_predict.png .. image:: ./pics/logreg_pydotprint_predict.png
...@@ -124,7 +123,7 @@ The output file is available at pics/logreg_pydotprint_predict.png ...@@ -124,7 +123,7 @@ The output file is available at pics/logreg_pydotprint_predict.png
The optimized training graph: The optimized training graph:
>>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True) >>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True) # doctest: +SKIP
The output file is available at pics/logreg_pydotprint_train.png The output file is available at pics/logreg_pydotprint_train.png
.. image:: ./pics/logreg_pydotprint_train.png .. image:: ./pics/logreg_pydotprint_train.png
......
...@@ -24,7 +24,7 @@ Currently, information regarding shape is used in two ways in Theano: ...@@ -24,7 +24,7 @@ Currently, information regarding shape is used in two ways in Theano:
>>> x = theano.tensor.matrix('x') >>> x = theano.tensor.matrix('x')
>>> f = theano.function([x], (x ** 2).shape) >>> f = theano.function([x], (x ** 2).shape)
>>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE >>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
MakeVector [@A] '' 2 MakeVector{dtype='int64'} [@A] '' 2
|Shape_i{0} [@B] '' 1 |Shape_i{0} [@B] '' 1
| |x [@C] | |x [@C]
|Shape_i{1} [@D] '' 0 |Shape_i{1} [@D] '' 0
...@@ -49,9 +49,9 @@ can lead to errors. Consider this example: ...@@ -49,9 +49,9 @@ can lead to errors. Consider this example:
>>> xv = numpy.random.rand(5, 4) >>> xv = numpy.random.rand(5, 4)
>>> yv = numpy.random.rand(3, 3) >>> yv = numpy.random.rand(3, 3)
>>> f = theano.function([x,y], z.shape) >>> f = theano.function([x, y], z.shape)
>>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE >>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
MakeVector [@A] '' 4 MakeVector{dtype='int64'} [@A] '' 4
|Elemwise{Add}[(0, 0)] [@B] '' 3 |Elemwise{Add}[(0, 0)] [@B] '' 3
| |Shape_i{0} [@C] '' 1 | |Shape_i{0} [@C] '' 1
| | |x [@D] | | |x [@D]
...@@ -60,8 +60,8 @@ MakeVector [@A] '' 4 ...@@ -60,8 +60,8 @@ MakeVector [@A] '' 4
|Shape_i{1} [@G] '' 0 |Shape_i{1} [@G] '' 0
|x [@D] |x [@D]
print f(xv,yv)# DOES NOT RAISE AN ERROR AS SHOULD BE. >>> f(xv, yv) # DOES NOT RAISE AN ERROR AS SHOULD BE.
[8, 4] array([8, 4])
>>> f = theano.function([x,y], z)# Do not take the shape. >>> f = theano.function([x,y], z)# Do not take the shape.
>>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE >>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
...@@ -70,8 +70,10 @@ Join [@A] '' 0 ...@@ -70,8 +70,10 @@ Join [@A] '' 0
|x [@C] |x [@C]
|y [@D] |y [@D]
>>> f(xv,yv) # doctest: +SKIP >>> f(xv, yv) # doctest: +ELLIPSIS
>>> # Raises a dimensions mismatch error. Traceback (most recent call last):
...
ValueError: ...
As you can see, when asking only for the shape of some computation (``join`` in the As you can see, when asking only for the shape of some computation (``join`` in the
example), an inferred shape is computed directly, without executing example), an inferred shape is computed directly, without executing
......
...@@ -104,7 +104,7 @@ does not provide any way to handle a number of dimensions different from two. ...@@ -104,7 +104,7 @@ does not provide any way to handle a number of dimensions different from two.
The set of all accepted ``dtype`` for the sparse matrices can be found in The set of all accepted ``dtype`` for the sparse matrices can be found in
``sparse.all_dtypes``. ``sparse.all_dtypes``.
>>> sparse.all_dtypes >>> sparse.all_dtypes # doctest: +SKIP
set(['int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64', set(['int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64',
'float32', 'float64', 'complex64', 'complex128']) 'float32', 'float64', 'complex64', 'complex128'])
......
...@@ -34,8 +34,9 @@ detail about these building blocks refer to :ref:`variable`, :ref:`op`, ...@@ -34,8 +34,9 @@ detail about these building blocks refer to :ref:`variable`, :ref:`op`,
**Code** **Code**
.. code-block:: python .. testcode::
import theano.tensor as T
x = T.dmatrix('x') x = T.dmatrix('x')
y = T.dmatrix('y') y = T.dmatrix('y')
z = x + y z = x + y
...@@ -159,9 +160,9 @@ as we apply it. Consider the following example of optimization: ...@@ -159,9 +160,9 @@ as we apply it. Consider the following example of optimization:
>>> f = theano.function([a], b) # compile function >>> f = theano.function([a], b) # compile function
>>> print f([0, 1, 2]) # prints `array([0,2,1026])` >>> print f([0, 1, 2]) # prints `array([0,2,1026])`
[ 0. 2. 1026.] [ 0. 2. 1026.]
>>> theano.printing.pydotprint(b, outfile="./pics/symbolic_graph_unopt.png", var_with_name_simple=True) >>> theano.printing.pydotprint(b, outfile="./pics/symbolic_graph_unopt.png", var_with_name_simple=True) # doctest: +SKIP
The output file is available at ./pics/symbolic_graph_unopt.png The output file is available at ./pics/symbolic_graph_unopt.png
>>> theano.printing.pydotprint(f, outfile="./pics/symbolic_graph_opt.png", var_with_name_simple=True) >>> theano.printing.pydotprint(f, outfile="./pics/symbolic_graph_opt.png", var_with_name_simple=True) # doctest: +SKIP
The output file is available at ./pics/symbolic_graph_opt.png The output file is available at ./pics/symbolic_graph_opt.png
......
...@@ -33,10 +33,7 @@ Testing Theano with GPU ...@@ -33,10 +33,7 @@ Testing Theano with GPU
To see if your GPU is being used, cut and paste the following program into a To see if your GPU is being used, cut and paste the following program into a
file and run it. file and run it.
.. If you modify this code, also change : .. testcode::
.. theano/tests/test_tutorial.py:T_using_gpu.test_using_gpu_1
.. code-block:: python
from theano import function, config, shared, sandbox from theano import function, config, shared, sandbox
import theano.tensor as T import theano.tensor as T
...@@ -49,17 +46,17 @@ file and run it. ...@@ -49,17 +46,17 @@ file and run it.
rng = numpy.random.RandomState(22) rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX)) x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], T.exp(x)) f = function([], T.exp(x))
print f.maker.fgraph.toposort() print(f.maker.fgraph.toposort())
t0 = time.time() t0 = time.time()
for i in xrange(iters): for i in xrange(iters):
r = f() r = f()
t1 = time.time() t1 = time.time()
print 'Looping %d times took' % iters, t1 - t0, 'seconds' print("Looping %d times took %f seconds" % (iters, t1 - t0))
print 'Result is', r print("Result is %s" % (r,))
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]): if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
print 'Used the cpu' print('Used the cpu')
else: else:
print 'Used the gpu' print('Used the gpu')
The program just computes the ``exp()`` of a bunch of random numbers. The program just computes the ``exp()`` of a bunch of random numbers.
Note that we use the ``shared`` function to Note that we use the ``shared`` function to
...@@ -71,7 +68,16 @@ If I run this program (in check1.py) with ``device=cpu``, my computer takes a li ...@@ -71,7 +68,16 @@ If I run this program (in check1.py) with ``device=cpu``, my computer takes a li
whereas on the GPU it takes just over 0.64 seconds. The GPU will not always produce the exact whereas on the GPU it takes just over 0.64 seconds. The GPU will not always produce the exact
same floating-point numbers as the CPU. As a benchmark, a loop that calls ``numpy.exp(x.get_value())`` takes about 46 seconds. same floating-point numbers as the CPU. As a benchmark, a loop that calls ``numpy.exp(x.get_value())`` takes about 46 seconds.
.. code-block:: text .. testoutput::
:hide:
:options: +ELLIPSIS
[Elemwise{exp,no_inplace}(<TensorType(float64, vector)>)]
Looping 1000 times took ... seconds
Result is ...
Used the cpu
.. code-block:: none
$ THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 python check1.py $ THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 python check1.py
[Elemwise{exp,no_inplace}(<TensorType(float32, vector)>)] [Elemwise{exp,no_inplace}(<TensorType(float32, vector)>)]
...@@ -102,10 +108,7 @@ the graph to express a computation with a GPU-stored result. The ``gpu_from_hos ...@@ -102,10 +108,7 @@ the graph to express a computation with a GPU-stored result. The ``gpu_from_hos
op means "copy the input from the host to the GPU" and it is optimized away op means "copy the input from the host to the GPU" and it is optimized away
after the ``T.exp(x)`` is replaced by a GPU version of ``exp()``. after the ``T.exp(x)`` is replaced by a GPU version of ``exp()``.
.. If you modify this code, also change : .. testcode::
.. theano/tests/test_tutorial.py:T_using_gpu.test_using_gpu_2
.. code-block:: python
from theano import function, config, shared, sandbox from theano import function, config, shared, sandbox
import theano.sandbox.cuda.basic_ops import theano.sandbox.cuda.basic_ops
...@@ -117,24 +120,35 @@ after the ``T.exp(x)`` is replaced by a GPU version of ``exp()``. ...@@ -117,24 +120,35 @@ after the ``T.exp(x)`` is replaced by a GPU version of ``exp()``.
iters = 1000 iters = 1000
rng = numpy.random.RandomState(22) rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX)) x = shared(numpy.asarray(rng.rand(vlen), 'float32'))
f = function([], sandbox.cuda.basic_ops.gpu_from_host(T.exp(x))) f = function([], sandbox.cuda.basic_ops.gpu_from_host(T.exp(x)))
print f.maker.fgraph.toposort() print(f.maker.fgraph.toposort())
t0 = time.time() t0 = time.time()
for i in xrange(iters): for i in xrange(iters):
r = f() r = f()
t1 = time.time() t1 = time.time()
print 'Looping %d times took' % iters, t1 - t0, 'seconds' print("Looping %d times took %f seconds" % (iters, t1 - t0))
print 'Result is', r print("Result is %s" % (r,))
print 'Numpy result is', numpy.asarray(r) print("Numpy result is %s" % (numpy.asarray(r),))
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]): if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
print 'Used the cpu' print('Used the cpu')
else: else:
print 'Used the gpu' print('Used the gpu')
The output from this program is The output from this program is
.. code-block:: text .. testoutput::
:hide:
:options: +ELLIPSIS, +SKIP
Using gpu device 0: GeForce GTX 580
[GpuElemwise{exp,no_inplace}(<CudaNdarrayType(float32, vector)>)]
Looping 1000 times took ... seconds
Result is <CudaNdarray object at 0x...>
Numpy result is ...
Used the gpu
.. code-block:: none
$ THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python check2.py $ THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python check2.py
Using gpu device 0: GeForce GTX 580 Using gpu device 0: GeForce GTX 580
...@@ -253,7 +267,7 @@ Exercise ...@@ -253,7 +267,7 @@ Exercise
Consider again the logistic regression: Consider again the logistic regression:
.. code-block:: python .. testcode::
import numpy import numpy
import theano import theano
...@@ -273,8 +287,6 @@ Consider again the logistic regression: ...@@ -273,8 +287,6 @@ Consider again the logistic regression:
b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b") b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
x.tag.test_value = D[0] x.tag.test_value = D[0]
y.tag.test_value = D[1] y.tag.test_value = D[1]
#print "Initial model:"
#print w.get_value(), b.get_value()
# Construct Theano expression graph # Construct Theano expression graph
p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one
...@@ -287,33 +299,39 @@ Consider again the logistic regression: ...@@ -287,33 +299,39 @@ Consider again the logistic regression:
train = theano.function( train = theano.function(
inputs=[x,y], inputs=[x,y],
outputs=[prediction, xent], outputs=[prediction, xent],
updates={w:w-0.01*gw, b:b-0.01*gb}, updates=[(w, w-0.01*gw), (b, b-0.01*gb)],
name = "train") name = "train")
predict = theano.function(inputs=[x], outputs=prediction, predict = theano.function(inputs=[x], outputs=prediction,
name = "predict") name = "predict")
if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in
train.maker.fgraph.toposort()]): train.maker.fgraph.toposort()]):
print 'Used the cpu' print('Used the cpu')
elif any([x.op.__class__.__name__ in ['GpuGemm', 'GpuGemv'] for x in elif any([x.op.__class__.__name__ in ['GpuGemm', 'GpuGemv'] for x in
train.maker.fgraph.toposort()]): train.maker.fgraph.toposort()]):
print 'Used the gpu' print('Used the gpu')
else: else:
print 'ERROR, not able to tell if theano used the cpu or the gpu' print('ERROR, not able to tell if theano used the cpu or the gpu')
print train.maker.fgraph.toposort() print(train.maker.fgraph.toposort())
for i in range(training_steps): for i in range(training_steps):
pred, err = train(D[0], D[1]) pred, err = train(D[0], D[1])
#print "Final model:"
#print w.get_value(), b.get_value()
print "target values for D" print("target values for D")
print D[1] print(D[1])
print "prediction on D"
print predict(D[0])
print("prediction on D")
print(predict(D[0]))
.. testoutput::
:hide:
:options: + ELLIPSIS
Used the cpu
target values for D
...
prediction on D
...
Modify and execute this example to run on GPU with ``floatX=float32`` and Modify and execute this example to run on GPU with ``floatX=float32`` and
time it using the command line ``time python file.py``. (Of course, you may use some of your answer time it using the command line ``time python file.py``. (Of course, you may use some of your answer
...@@ -373,7 +391,7 @@ Testing Theano with GPU ...@@ -373,7 +391,7 @@ Testing Theano with GPU
To see if your GPU is being used, cut and paste the following program To see if your GPU is being used, cut and paste the following program
into a file and run it. into a file and run it.
.. code-block:: python .. testcode::
from theano import function, config, shared, tensor, sandbox from theano import function, config, shared, tensor, sandbox
import numpy import numpy
...@@ -385,25 +403,34 @@ into a file and run it. ...@@ -385,25 +403,34 @@ into a file and run it.
rng = numpy.random.RandomState(22) rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX)) x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], tensor.exp(x)) f = function([], tensor.exp(x))
print f.maker.fgraph.toposort() print(f.maker.fgraph.toposort())
t0 = time.time() t0 = time.time()
for i in xrange(iters): for i in xrange(iters):
r = f() r = f()
t1 = time.time() t1 = time.time()
print 'Looping %d times took' % iters, t1 - t0, 'seconds' print("Looping %d times took %f seconds" % (iters, t1 - t0))
print 'Result is', r print("Result is %s" % (r,))
if numpy.any([isinstance(x.op, tensor.Elemwise) and if numpy.any([isinstance(x.op, tensor.Elemwise) and
('Gpu' not in type(x.op).__name__) ('Gpu' not in type(x.op).__name__)
for x in f.maker.fgraph.toposort()]): for x in f.maker.fgraph.toposort()]):
print 'Used the cpu' print('Used the cpu')
else: else:
print 'Used the gpu' print('Used the gpu')
The program just compute ``exp()`` of a bunch of random numbers. Note The program just compute ``exp()`` of a bunch of random numbers. Note
that we use the :func:`theano.shared` function to make sure that the that we use the :func:`theano.shared` function to make sure that the
input *x* is stored on the GPU. input *x* is stored on the GPU.
.. code-block:: text .. testoutput::
:hide:
:options: +ELLIPSIS
[Elemwise{exp,no_inplace}(<TensorType(float64, vector)>)]
Looping 1000 times took ... seconds
Result is ...
Used the cpu
.. code-block:: none
$ THEANO_FLAGS=device=cpu python check1.py $ THEANO_FLAGS=device=cpu python check1.py
[Elemwise{exp,no_inplace}(<TensorType(float64, vector)>)] [Elemwise{exp,no_inplace}(<TensorType(float64, vector)>)]
...@@ -432,8 +459,7 @@ the value of the ``device`` flag without touching the code. ...@@ -432,8 +459,7 @@ the value of the ``device`` flag without touching the code.
If you don't mind a loss of flexibility, you can ask theano to return If you don't mind a loss of flexibility, you can ask theano to return
the GPU object directly. The following code is modifed to do just that. the GPU object directly. The following code is modifed to do just that.
.. code-block:: python .. testcode::
:emphasize-lines: 10,17
from theano import function, config, shared, tensor, sandbox from theano import function, config, shared, tensor, sandbox
import numpy import numpy
...@@ -445,19 +471,19 @@ the GPU object directly. The following code is modifed to do just that. ...@@ -445,19 +471,19 @@ the GPU object directly. The following code is modifed to do just that.
rng = numpy.random.RandomState(22) rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX)) x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], sandbox.gpuarray.basic_ops.gpu_from_host(tensor.exp(x))) f = function([], sandbox.gpuarray.basic_ops.gpu_from_host(tensor.exp(x)))
print f.maker.fgraph.toposort() print(f.maker.fgraph.toposort())
t0 = time.time() t0 = time.time()
for i in xrange(iters): for i in xrange(iters):
r = f() r = f()
t1 = time.time() t1 = time.time()
print 'Looping %d times took' % iters, t1 - t0, 'seconds' print("Looping %d times took %f seconds" % (iters, t1 - t0))
print 'Result is', numpy.asarray(r) print("Result is %s" % (numpy.asarray(r),))
if numpy.any([isinstance(x.op, tensor.Elemwise) and if numpy.any([isinstance(x.op, tensor.Elemwise) and
('Gpu' not in type(x.op).__name__) ('Gpu' not in type(x.op).__name__)
for x in f.maker.fgraph.toposort()]): for x in f.maker.fgraph.toposort()]):
print 'Used the cpu' print('Used the cpu')
else: else:
print 'Used the gpu' print('Used the gpu')
Here the :func:`theano.sandbox.gpuarray.basic.gpu_from_host` call Here the :func:`theano.sandbox.gpuarray.basic.gpu_from_host` call
means "copy input to the GPU". However during the optimization phase, means "copy input to the GPU". However during the optimization phase,
...@@ -466,7 +492,17 @@ used here to tell theano that we want the result on the GPU. ...@@ -466,7 +492,17 @@ used here to tell theano that we want the result on the GPU.
The output is The output is
.. code-block:: text .. testoutput::
:hide:
:options: +ELLIPSIS, +SKIP
Using device cuda0: ...
[GpuElemwise{exp,no_inplace}(<GpuArray<float64>>)]
Looping 1000 times took ... seconds
Result is ...
Used the gpu
.. code-block:: none
$ THEANO_FLAGS=device=cuda0 python check2.py $ THEANO_FLAGS=device=cuda0 python check2.py
Using device cuda0: GeForce GTX 275 Using device cuda0: GeForce GTX 275
...@@ -663,7 +699,7 @@ you feel competent enough, you may try yourself on the corresponding exercises. ...@@ -663,7 +699,7 @@ you feel competent enough, you may try yourself on the corresponding exercises.
block=(400,1,1), grid=(1,1)) block=(400,1,1), grid=(1,1))
assert numpy.allclose(dest, a*b) assert numpy.allclose(dest, a*b)
print dest print(dest)
Exercise Exercise
...@@ -722,10 +758,10 @@ Modify and execute to work for a matrix of shape (20, 10). ...@@ -722,10 +758,10 @@ Modify and execute to work for a matrix of shape (20, 10).
Use this code to test it: Use this code to test it:
>>> x = theano.tensor.fmatrix() >>> x = theano.tensor.fmatrix()
>>> f = theano.function([x], PyCUDADoubleOp()(x)) >>> f = theano.function([x], PyCUDADoubleOp()(x)) # doctest: +SKIP
>>> xv = numpy.ones((4, 5), dtype="float32") >>> xv = numpy.ones((4, 5), dtype="float32")
>>> assert numpy.allclose(f(xv), xv*2) >>> assert numpy.allclose(f(xv), xv*2) # doctest: +SKIP
>>> print numpy.asarray(f(xv)) >>> print(numpy.asarray(f(xv))) # doctest: +SKIP
Exercise Exercise
......
...@@ -46,8 +46,8 @@ def function_dump(filename, inputs, outputs=None, mode=None, updates=None, ...@@ -46,8 +46,8 @@ def function_dump(filename, inputs, outputs=None, mode=None, updates=None,
To load such a dump and do the compilation: To load such a dump and do the compilation:
>>> import cPickle, theano >>> import cPickle, theano
>>> d=cPickle.load(open("func_dump.bin", "rb")) >>> d = cPickle.load(open("func_dump.bin", "rb")) # doctest: +SKIP
>>> f=theano.function(**d) >>> f = theano.function(**d) # doctest: +SKIP
""" """
assert isinstance(filename, string_types) assert isinstance(filename, string_types)
......
...@@ -456,7 +456,6 @@ def remove(predicate, coll): ...@@ -456,7 +456,6 @@ def remove(predicate, coll):
Examples Examples
-------- --------
>>> from itertoolz import remove
>>> def even(x): >>> def even(x):
... return x % 2 == 0 ... return x % 2 == 0
>>> remove(even, [1, 2, 3, 4]) >>> remove(even, [1, 2, 3, 4])
......
...@@ -1525,8 +1525,8 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, ...@@ -1525,8 +1525,8 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
Example: Example:
>>> verify_grad(theano.tensor.tanh, >>> verify_grad(theano.tensor.tanh,
(numpy.asarray([[2,3,4], [-1, 3.3, 9.9]]),), ... (numpy.asarray([[2,3,4], [-1, 3.3, 9.9]]),),
rng=numpy.random) ... rng=numpy.random)
Raises an Exception if the difference between the analytic gradient and Raises an Exception if the difference between the analytic gradient and
numerical gradient (computed through the Finite Difference Method) of a numerical gradient (computed through the Finite Difference Method) of a
......
...@@ -1092,6 +1092,7 @@ class Unique(theano.Op): ...@@ -1092,6 +1092,7 @@ class Unique(theano.Op):
Examples Examples
-------- --------
>>> import numpy as np >>> import numpy as np
>>> import theano
>>> x = theano.tensor.vector() >>> x = theano.tensor.vector()
>>> f = theano.function([x], Unique(True, True, False)(x)) >>> f = theano.function([x], Unique(True, True, False)(x))
......
...@@ -83,7 +83,7 @@ def load(path, dtype, broadcastable, mmap_mode=None): ...@@ -83,7 +83,7 @@ def load(path, dtype, broadcastable, mmap_mode=None):
>>> x = tensor.load(path, 'int64', (False,)) >>> x = tensor.load(path, 'int64', (False,))
>>> y = x*2 >>> y = x*2
>>> fn = function([path], y) >>> fn = function([path], y)
>>> fn("stored-array.npy") >>> fn("stored-array.npy") # doctest: +SKIP
array([0, 2, 4, 6, 8], dtype=int64) array([0, 2, 4, 6, 8], dtype=int64)
""" """
......
...@@ -55,9 +55,11 @@ def shape_of_variables(fgraph, input_shapes): ...@@ -55,9 +55,11 @@ def shape_of_variables(fgraph, input_shapes):
>>> x = theano.tensor.matrix('x') >>> x = theano.tensor.matrix('x')
>>> y = x[512:]; y.name = 'y' >>> y = x[512:]; y.name = 'y'
>>> fgraph = theano.FunctionGraph([x], [y], clone=False) >>> fgraph = theano.FunctionGraph([x], [y], clone=False)
>>> shape_of_variables(fgraph, {x: (1024, 1024)}) >>> d = shape_of_variables(fgraph, {x: (1024, 1024)})
{y: (512, 1024), x: (1024, 1024)} >>> d[y]
(array(512), array(1024))
>>> d[x]
(array(1024), array(1024))
""" """
if not hasattr(fgraph, 'shape_feature'): if not hasattr(fgraph, 'shape_feature'):
......
...@@ -40,7 +40,6 @@ whitelist_flake8 = [ ...@@ -40,7 +40,6 @@ whitelist_flake8 = [
"tests/test_pickle_unpickle_theano_fn.py", "tests/test_pickle_unpickle_theano_fn.py",
"tests/test_determinism.py", "tests/test_determinism.py",
"tests/record.py", "tests/record.py",
"tests/test_tutorial.py",
"tests/unittest_tools.py", "tests/unittest_tools.py",
"compile/__init__.py", "compile/__init__.py",
"compile/profiling.py", "compile/profiling.py",
......
""" test code snippet in the Theano tutorials.
"""
from __future__ import print_function
import os
import shutil
import unittest
from nose.plugins.attrib import attr
from nose.plugins.skip import SkipTest
import numpy
from numpy import array
import theano
import theano.tensor as T
from theano import function, compat
from six.moves import xrange
from theano import config
from theano.tests import unittest_tools as utt
from theano.sandbox.rng_mrg import MRG_RandomStreams
from theano.tensor.shared_randomstreams import RandomStreams
class T_extending(unittest.TestCase):
# All tests here belong to files in
# http://deeplearning.net/software/theano/extending
# Theano/doc/extending/*.txt
# Any change you do here also add it to the tutorial!
# This belongs to an entire folder since code-snippets are connected
# from one file to another .. and they do not make sense on their
# own.
def test_extending_1(self):
# Note that we shadow Python's function ``filter`` with this
# definition.
def filter(x, strict=False, allow_downcast=None):
if strict:
if isinstance(x, float):
return x
else:
raise TypeError('Expected a float!')
else:
return float(x)
def values_eq_approx(x, y, tolerance=1e-4):
return abs(x - y) / (abs(x) + abs(y)) < tolerance
from theano import gof
double = gof.Type()
double.filter = filter
double.values_eq_approx = values_eq_approx
from theano import gof
class Double(gof.Type):
def filter(self, x, strict=False):
if strict and not isinstance(x, float):
raise TypeError('Expected a float!')
return float(x)
def values_eq_approx(self, x, y, tolerance=1e-4):
return abs(x - y) / (abs(x) + abs(y)) < tolerance
# Added to make those tests pass in DebugMode
@staticmethod
def may_share_memory(a, b):
return a is b
double = Double()
def __eq__(self, other):
return type(self) is Double and type(other) is Double
from theano import gof
class Double(gof.Type):
def filter(self, x, strict=False, allow_downcast=None):
if strict and not isinstance(x, float):
raise TypeError('Expected a float!')
return float(x)
def values_eq_approx(self, x, y, tolerance=1e-4):
return abs(x - y) / (abs(x) + abs(y)) < tolerance
def __str__(self):
return "double"
# Added to make those tests pass in DebugMode
@staticmethod
def may_share_memory(a, b):
return a is b
double = Double()
from theano import gof
mul = gof.Op()
def make_node(x, y):
if x.type != double or y.type != double:
raise TypeError('mul only works on doubles')
return gof.Apply(mul, [x, y], [double()])
mul.make_node = make_node
def perform(node, inputs, output_storage):
x, y = inputs[0], inputs[1]
z = output_storage[0]
z[0] = x * y
mul.perform = perform
x, y = double('x'), double('y')
z = mul(x, y)
f = theano.function([x, y], z)
assert f(5, 6) == 30.0
assert f(5.6, 6.7) == 37.519999999999996
x = double('x')
self.assertRaises(AttributeError, mul, x, 2)
def make_node(x, y):
if isinstance(x, (int, float)):
x = gof.Constant(double, x)
if isinstance(y, (int, float)):
y = gof.Constant(double, y)
if x.type != double or y.type != double:
raise TypeError('mul only works on doubles')
return gof.Apply(mul, [x, y], [double()])
mul.make_node = make_node
x = double('x')
z = mul(x, 2)
f = theano.function([x], z)
assert f(10) == 20.0
assert f(3.4) == 6.7999999999999998
from theano import gof
class BinaryDoubleOp(gof.Op):
__props__ = ("name", "fn")
def __init__(self, name, fn):
self.name = name
self.fn = fn
def make_node(self, x, y):
if isinstance(x, (int, float)):
x = gof.Constant(double, x)
if isinstance(y, (int, float)):
y = gof.Constant(double, y)
if x.type != double or y.type != double:
raise TypeError('%s only works on doubles' % self.name)
return gof.Apply(self, [x, y], [double()])
def perform(self, node, inp, out):
x, y = inp
z, = out
z[0] = self.fn(x, y)
def __str__(self):
return self.name
add = BinaryDoubleOp(name='add',
fn=lambda x, y: x + y)
sub = BinaryDoubleOp(name='sub',
fn=lambda x, y: x - y)
mul = BinaryDoubleOp(name='mul',
fn=lambda x, y: x * y)
div = BinaryDoubleOp(name='div',
fn=lambda x, y: x / y)
def test_extending_2(self):
'''
This test fails in DebugMode for the same reasons the test in
tensor/tests/test_basic.py:T_scalarfromtensor.test0
fails on debug mode ( as much as I could tell - Razvan )
'''
from theano import gof
class Double(gof.Type):
def filter(self, x, strict=False, allow_downcast=None):
if strict and not isinstance(x, float):
raise TypeError('Expected a float!')
return float(x)
def values_eq_approx(self, x, y, tolerance=1e-4):
return abs(x - y) / (abs(x) + abs(y)) < tolerance
def __str__(self):
return "double"
# Added to make those tests pass in DebugMode
@staticmethod
def may_share_memory(a, b):
return a is b
double = Double()
class BinaryDoubleOp(gof.Op):
__props__ = ("name", "fn")
def __init__(self, name, fn):
self.name = name
self.fn = fn
def make_node(self, x, y):
if isinstance(x, (int, float)):
x = gof.Constant(double, x)
if isinstance(y, (int, float)):
y = gof.Constant(double, y)
if x.type != double or y.type != double:
raise TypeError('%s only works on doubles' % self.name)
return gof.Apply(self, [x, y], [double()])
def perform(self, node, inp, out):
x, y = inp
z, = out
z[0] = self.fn(x, y)
def __str__(self):
return self.name
add = BinaryDoubleOp(name='add',
fn=lambda x, y: x + y)
sub = BinaryDoubleOp(name='sub',
fn=lambda x, y: x - y)
mul = BinaryDoubleOp(name='mul',
fn=lambda x, y: x * y)
div = BinaryDoubleOp(name='div',
fn=lambda x, y: x / y)
def c_declare(name, sub, check_input=True):
return """
double %(name)s;
""" % dict(name=name)
double.c_declare = c_declare
def c_init(name, sub):
return """
%(name)s = 0.0;
""" % dict(name=name)
double.c_init = c_init
def c_extract(name, sub, check_input=True):
if(check_input):
pre = """
if (!PyFloat_Check(py_%(name)s)) {
PyErr_SetString(PyExc_TypeError, "expected a float");
%(fail)s
}""" % dict(name=name, fail=sub['fail'])
else:
pre = ""
return pre + """
%(name)s = PyFloat_AsDouble(py_%(name)s);
""" % dict(name=name, fail=sub['fail'])
double.c_extract = c_extract
def c_sync( name, sub):
return """
Py_XDECREF(py_%(name)s);
py_%(name)s = PyFloat_FromDouble(%(name)s);
if (!py_%(name)s) {
printf("PyFloat_FromDouble failed on: %%f\\n", %(name)s);
Py_XINCREF(Py_None);
py_%(name)s = Py_None;
}
""" % dict(name=name)
double.c_sync = c_sync
def c_cleanup(name, sub):
return ""
double.c_cleanup = c_cleanup
from theano import function
x, y, z = double('x'), double('y'), double('z')
a = add(x, y)
b = mul(a, z)
f = function([x, y, z], b)
assert f(1.0, 2.0, 3.0) == 9.0
from theano import gof
class Double(gof.Type):
def filter(self, x, strict=False, allow_downcast=None):
if strict and not isinstance(x, float):
raise TypeError('Expected a float!')
return float(x)
def values_eq_approx(self, x, y, tolerance=1e-4):
return abs(x - y) / (x + y) < tolerance
def __str__(self):
return "double"
def c_declare(self, name, sub, check_input=True):
return """
double %(name)s;
""" % dict(name=name)
def c_init(self, name, sub):
return """
%(name)s = 0.0;
""" % dict(name=name)
def c_extract(self, name, sub, check_input=True):
if(check_input):
pre = """
if (!PyFloat_Check(py_%(name)s)) {
PyErr_SetString(PyExc_TypeError, "expected a float");
%(fail)s
}
""" % dict(sub, name=name)
else:
pre = ""
return pre + """
%(name)s = PyFloat_AsDouble(py_%(name)s);
""" % dict(sub, name=name)
def c_sync(self, name, sub):
return """
Py_XDECREF(py_%(name)s);
py_%(name)s = PyFloat_FromDouble(%(name)s);
if (!py_%(name)s) {
printf("PyFloat_FromDouble failed on: %%f\\n", %(name)s);
Py_XINCREF(Py_None);
py_%(name)s = Py_None;
}
""" % dict(name=name)
def c_cleanup(self, name, sub):
return ""
# Added to make those tests pass in DebugMode
@staticmethod
def may_share_memory(a, b):
return a is b
double = Double()
def c_code(node, name, input_names, output_names, sub):
x_name, y_name = input_names[0], input_names[1]
output_name = output_names[0]
return """
%(output_name)s = %(x_name)s * %(y_name)s;
""" % locals()
mul.c_code = c_code
from theano import gof
class BinaryDoubleOp(gof.Op):
__props__ = ("name", "fn", "ccode")
def __init__(self, name, fn, ccode):
self.name = name
self.fn = fn
self.ccode = ccode
def make_node(self, x, y):
if isinstance(x, (int, float)):
x = gof.Constant(double, x)
if isinstance(y, (int, float)):
y = gof.Constant(double, y)
if x.type != double or y.type != double:
raise TypeError('%s only works on doubles' % self.name)
return gof.Apply(self, [x, y], [double()])
def perform(self, node, inp, out):
x, y = inp
z, = out
z[0] = self.fn(x, y)
def __str__(self):
return self.name
def c_code(self, node, name, inp, out, sub):
x, y = inp
z, = out
return self.ccode % locals()
add = BinaryDoubleOp(name='add',
fn=lambda x, y: x + y,
ccode="%(z)s = %(x)s + %(y)s;")
sub = BinaryDoubleOp(name='sub',
fn=lambda x, y: x - y,
ccode="%(z)s = %(x)s - %(y)s;")
mul = BinaryDoubleOp(name='mul',
fn=lambda x, y: x * y,
ccode="%(z)s = %(x)s * %(y)s;")
div = BinaryDoubleOp(name='div',
fn=lambda x, y: x / y,
ccode="%(z)s = %(x)s / %(y)s;")
from theano.gof import toolbox
class Simplify(gof.Optimizer):
def add_requirements(self, fgraph):
fgraph.attach_feature(toolbox.ReplaceValidate())
def apply(self, fgraph):
for node in fgraph.toposort():
if node.op == div:
x, y = node.inputs
z = node.outputs[0]
if x.owner and x.owner.op == mul:
a, b = x.owner.inputs
if y == a:
fgraph.replace_validate(z, b)
elif y == b:
fgraph.replace_validate(z, a)
simplify = Simplify()
x = double('x')
y = double('y')
z = double('z')
a = add(z, mul(div(mul(y, x), y), div(z, x)))
e = gof.FunctionGraph([x, y, z], [a])
simplify.optimize(e)
class LocalSimplify(gof.LocalOptimizer):
def transform(self, node):
if node.op == div:
x, y = node.inputs
if x.owner and x.owner.op == mul:
a, b = x.owner.inputs
if y == a:
return [b]
elif y == b:
return [a]
return False
def tracks(self):
# This should be needed for the EquilibriumOptimizer
# but it isn't now
# TODO: do this and explain it
return [] # that's not what you should do
local_simplify = LocalSimplify()
x = double('x')
y = double('y')
z = double('z')
a = add(z, mul(div(mul(y, x), y), div(z, x)))
e = gof.FunctionGraph([x, y, z], [a])
simplify = gof.TopoOptimizer(local_simplify)
simplify.optimize(e)
def test_as_op(self):
import theano
import numpy
from theano.compile.ops import as_op
def infer_shape_numpy_dot(node, input_shapes):
ashp, bshp = input_shapes
return [ashp[:-1] + bshp[-1:]]
@as_op(itypes=[theano.tensor.fmatrix, theano.tensor.fmatrix],
otypes=[theano.tensor.fmatrix],
infer_shape=infer_shape_numpy_dot)
def numpy_add(a, b):
return numpy.add(a, b)
def infer_shape_numpy_add_sub(node, input_shapes):
ashp, bshp = input_shapes
# Both inputs should have that same shape, so we just
# return one of them.
return [ashp[0]]
@as_op(itypes=[theano.tensor.fmatrix, theano.tensor.fmatrix],
otypes=[theano.tensor.fmatrix],
infer_shape=infer_shape_numpy_add_sub)
def numpy_add(a, b):
return numpy.add(a, b)
@as_op(itypes=[theano.tensor.fmatrix, theano.tensor.fmatrix],
otypes=[theano.tensor.fmatrix],
infer_shape=infer_shape_numpy_add_sub)
def numpy_sub(a, b):
return numpy.sub(a, b)
class T_introduction(unittest.TestCase):
# All tests here belong to
# http://deeplearning.net/software/theano/tutorial/introduction.html
# Theano/doc/tutorial/introduction.txt
# Any change you do here also add it to the tutorial !
def test_introduction_1(self):
import theano
from theano import tensor
# declare two symbolic floating-point scalars
a = tensor.dscalar()
b = tensor.dscalar()
# create a simple expression
c = a + b
# convert the expression into a callable object that takes (a,b)
# values as input and computes a value for c
f = theano.function([a, b], c)
# bind 1.5 to 'a', 2.5 to 'b', and evaluate 'c'
assert 4.0 == f(1.5, 2.5)
class T_adding(unittest.TestCase):
# All tests here belong to
# http://deeplearning.net/software/theano/tutorial/adding.html
# Theano/doc/tutorial/adding.txt
# Any change you do here also add it to the tutorial !
def test_adding_1(self):
import theano.tensor as T
from theano import function
x = T.dscalar('x')
y = T.dscalar('y')
z = x + y
f = function([x, y], z)
assert f(2, 3) == numpy.array(5.0)
assert f(16.3, 12.1) == numpy.array(28.4)
def test_adding_2(self):
x = T.dmatrix('x')
y = T.dmatrix('y')
z = x + y
f = function([x, y], z)
assert numpy.all(f([[1, 2], [3, 4]], [[10, 20], [30, 40]]) ==
numpy.array([[ 11., 22.], [ 33., 44.]]))
assert numpy.all(f(numpy.array([[1, 2], [3, 4]])
, numpy.array([[10, 20], [30, 40]])) ==
numpy.array([[ 11., 22.], [ 33., 44.]]))
class T_examples(unittest.TestCase):
# All tests here belog to
# http://deeplearning.net/software/theano/tutorial/examples.html
# Theano/doc/tutorial/examples.txt
# Any change you do here also add it to the tutorial !
def test_examples_1(self):
x = T.dmatrix('x')
s = 1 / (1 + T.exp(-x))
logistic = function([x], s)
assert numpy.allclose( logistic([[0, 1], [-1, -2]]),
array([[ 0.5 , 0.73105858],
[ 0.26894142, 0.11920292]]))
def test_examples_2(self):
x = T.dmatrix('x')
s2 = (1 + T.tanh(x / 2)) / 2
logistic2 = function([x], s2)
assert numpy.allclose(logistic2([[0, 1], [-1, -2]]),
array([[ 0.5 , 0.73105858],
[ 0.26894142, 0.11920292]]))
def test_examples_3(self):
a, b = T.dmatrices('a', 'b')
diff = a - b
abs_diff = abs(diff)
diff_squared = diff**2
f = function([a, b], [diff, abs_diff, diff_squared])
elems = f([[1, 1], [1, 1]], [[0, 1], [2, 3]])
assert numpy.all( elems[0] == array([[ 1., 0.], [-1., -2.]]))
assert numpy.all( elems[1] == array([[ 1., 0.], [ 1., 2.]]))
assert numpy.all( elems[2] == array([[ 1., 0.], [ 1., 4.]]))
def test_examples_4(self):
from theano import pp
x = T.dscalar('x')
y = x**2
gy = T.grad(y, x)
pp(gy) # print out the gradient prior to optimization
'((fill((x ** 2), 1.0) * 2) * (x ** (2 - 1)))'
f = function([x], gy)
assert f(4) == array(8.0)
assert f(94.2) == array(188.40000000000001)
def test_examples_5(self):
x = T.dmatrix('x')
s = T.sum(1 / (1 + T.exp(-x)))
gs = T.grad(s, x)
dlogistic = function([x], gs)
assert numpy.allclose( dlogistic([[0, 1], [-1, -2]]),
array([[ 0.25 , 0.19661193],
[ 0.19661193, 0.10499359]]))
def test_examples_6(self):
from theano import Param
x, y = T.dscalars('x', 'y')
z = x + y
f = function([x, Param(y, default=1)], z)
assert f(33) == array(34.0)
assert f(33, 2) == array(35.0)
def test_examples_7(self):
from theano import Param
x, y, w = T.dscalars('x', 'y', 'w')
z = (x + y) * w
f = function([x, Param(y, default=1), Param(w, default=2, name='w_by_name')], z)
assert f(33) == array(68.0)
assert f(33, 2) == array(70.0)
assert f(33, 0, 1) == array(33.0)
assert f(33, w_by_name=1) == array(34.0)
assert f(33, w_by_name=1, y=0) == array(33.0)
def test_examples_8(self):
from theano import shared
# Force the dtype to int64 to work correctly on 32 bit computer.
# Otherwise, it create by default a int32 on 32 bit computer.
state = shared(0)
inc = T.iscalar('inc')
accumulator = function([inc], state, updates=[(state, state+inc)])
assert state.get_value() == array(0)
assert accumulator(1) == array(0)
assert state.get_value() == array(1)
assert accumulator(300) == array(1)
assert state.get_value() == array(301)
state.set_value(-1)
assert accumulator(3) == array(-1)
assert state.get_value() == array(2)
decrementor = function([inc], state, updates=[(state, state-inc)])
assert decrementor(2) == array(2)
assert state.get_value() == array(0)
fn_of_state = state * 2 + inc
# The type of foo must match the shared variable we are replacing
# with the ``givens``
foo = T.scalar(dtype=state.dtype)
skip_shared = function([inc, foo], fn_of_state,
givens=[(state, foo)])
assert skip_shared(1, 3) == array(7)
assert state.get_value() == array(0)
def test_examples_9(self):
from theano.tensor.shared_randomstreams import RandomStreams
srng = RandomStreams(seed=234)
rv_u = srng.uniform((2, 2))
rv_n = srng.normal((2, 2))
f = function([], rv_u)
g = function([], rv_n, no_default_updates=True) #Not updating rv_n.rng
nearly_zeros = function([], rv_u + rv_u - 2 * rv_u)
f_val0 = f()
f_val1 = f() #different numbers from f_val0
assert numpy.all(f_val0 != f_val1)
g_val0 = g() # different numbers from f_val0 and f_val1
g_val1 = g() # same numbers as g_val0 !!!
assert numpy.all(g_val0 == g_val1)
assert numpy.all(g_val0 != f_val0)
assert numpy.all(g_val0 != f_val1)
nearly_zeros = function([], rv_u + rv_u - 2 * rv_u)
assert numpy.allclose(nearly_zeros(), [[0., 0.], [0., 0.]])
rng_val = rv_u.rng.get_value(borrow=True) # Get the rng for rv_u
rng_val.seed(89234) # seeds the generator
rv_u.rng.set_value(rng_val, borrow=True) # Assign back seeded rng
srng.seed(902340) # seeds rv_u and rv_n with different seeds each
state_after_v0 = rv_u.rng.get_value().get_state()
nearly_zeros() # this affects rv_u's generator
v1 = f()
rng = rv_u.rng.get_value(borrow=True)
rng.set_state(state_after_v0)
rv_u.rng.set_value(rng, borrow=True)
v2 = f() # v2 != v1
v3 = f() # v3 == v1
assert numpy.all(v1 != v2)
assert numpy.all(v1 == v3)
def test_copy_random_state(self):
class Graph():
def __init__(self, seed=123):
self.rng = RandomStreams(seed)
self.y = self.rng.uniform(size=(1,))
g1 = Graph(seed=123)
f1 = theano.function([], g1.y)
g2 = Graph(seed=987)
f2 = theano.function([], g2.y)
# print 'By default, the two functions are out of sync.'
v1 = f1()
v2 = f2()
def copy_random_state(g1, g2):
if isinstance(g1.rng, MRG_RandomStreams):
g2.rng.rstate = g1.rng.rstate
for (su1, su2) in zip(g1.rng.state_updates, g2.rng.state_updates):
su2[0].set_value(su1[0].get_value())
# print 'We now copy the state of the theano random number generators.'
copy_random_state(g1, g2)
v3 = f1()
v4 = f2()
assert numpy.allclose(v1, 0.72803009)
assert numpy.allclose(v2, 0.55056769)
assert numpy.allclose(v3, 0.59044123)
assert numpy.allclose(v4, 0.59044123)
@attr('slow')
def test_examples_real_example(self):
rng = numpy.random
N = 400
feats = 784
D = (rng.randn(N, feats).astype(config.floatX),
rng.randint(size=N, low=0, high=2).astype(config.floatX))
training_steps = 10000
if config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
training_steps = 10
# Declare Theano symbolic variables
x = T.matrix("x")
y = T.vector("y")
# The *.03 have been added to have DebugMode don't complain
w = theano.shared(rng.randn(feats).astype(config.floatX) * .03,
name="w")
b = theano.shared(numpy.asarray(0., dtype=config.floatX),
name="b")
print("Initial model:")
print(w.get_value(), b.get_value())
# Construct Theano expression graph
p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b)) # Probability that target = 1
prediction = p_1 > 0.5 # The prediction thresholded
xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
cost = xent.mean() + 0.01 * (w ** 2).sum() # The cost to minimize
gw, gb = T.grad(cost, [w, b]) # Compute the gradient of the cost
# (we shall return to this in a
# following section of this tutorial)
# Compile
train = theano.function(
inputs=[x, y],
outputs=[prediction, xent],
updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)))
predict = theano.function(inputs=[x], outputs=prediction)
# Train
for i in range(training_steps):
pred, err = train(D[0], D[1])
print("Final model:")
print(w.get_value(), b.get_value())
print("target values for D:", D[1])
print("prediction on D:", predict(D[0]))
# A user reported that this happened on the mailig list.
assert not numpy.isnan(b.get_value()).any()
assert not numpy.isnan(w.get_value()).any()
class T_aliasing(unittest.TestCase):
# All tests here belog to
# http://deeplearning.net/software/theano/tutorial/aliasing.html
# Theano/doc/tutorial/aliasing.txt
# Any change you do here also add it to the tutorial !
def test_aliasing_1(self):
import numpy, theano
np_array = numpy.ones(2, dtype='float32')
s_default = theano.shared(np_array)
s_false = theano.shared(np_array, borrow=False)
s_true = theano.shared(np_array, borrow=True)
np_array += 1 # now it is an array of 2.0 s
assert numpy.all(s_default.get_value() == array([1.0, 1.0]))
assert numpy.all(s_false.get_value() == array([1.0, 1.0]))
assert numpy.all(s_true.get_value() == array([2.0, 2.0]))
def test_aliasing_2(self):
import numpy, theano
np_array = numpy.ones(2, dtype='float32')
s = theano.shared(np_array)
v_false = s.get_value(borrow=False) # N.B. borrow default is False
v_true = s.get_value(borrow=True)
v_internal = s.get_value(borrow=True, return_internal_type=True)
s.set_value(
# some_inplace_fn
s.get_value(borrow=True).__imul__(2),
borrow=True)
def test_aliasing_3(self):
import theano, theano.tensor
x = theano.tensor.matrix()
y = 2*x
f = theano.function([theano.In(x, borrow=True)], theano.Out(y, borrow=True))
class T_loading_and_saving(unittest.TestCase):
# All tests here belong to
# http://deeplearning.net/software/theano/tutorial/loading_and_saving.html
# Theano/doc/tutorial/loading_and_saving.txt
# Any change you do here also add it to the tutorial !
def test_loading_and_saving_1(self):
import six.moves.cPickle as pickle
import theano, theano.tensor
x = theano.tensor.matrix()
y = 2*x
my_obj = theano.function([theano.In(x, borrow=True)]
, theano.Out(y, borrow=True))
mode_instance = theano.compile.mode.get_mode(None)
if not isinstance(mode_instance, theano.compile.debugmode.DebugMode):
# Here, we work in a temporary directory in order not to clutter
# the Theano repository. Code relative to creating that dir and
# removing it afterwards should _not_ be backported to the tutorial.
from tempfile import mkdtemp
origdir = os.getcwd()
tmpdir = None
try:
tmpdir = mkdtemp()
os.chdir(tmpdir)
f = open('obj.save', 'wb')
pickle.dump(my_obj, f, protocol=pickle.HIGHEST_PROTOCOL)
f.close()
f = open('obj.save', 'rb')
loaded_obj = pickle.load(f)
f.close()
obj1 = my_obj
obj2 = my_obj
obj3 = my_obj
f = open('objects.save', 'wb')
for obj in [obj1, obj2, obj3]:
pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL)
f.close()
f = open('objects.save', 'rb')
loaded_objects = []
for i in range(3):
loaded_objects.append(pickle.load(f))
f.close()
finally:
# Get back to the original dir, and delete the temporary one.
os.chdir(origdir)
if tmpdir is not None:
shutil.rmtree(tmpdir)
class T_modes(unittest.TestCase):
# All tests here belong to
# http://deeplearning.net/software/theano/tutorial/modes.html
# Theano/doc/tutorial/modes.txt
# Any change you do here also add it to the tutorial !
def test_modes_1(self):
x = T.dvector('x')
f = theano.function([x], 10*x, mode='DEBUG_MODE')
assert numpy.all(f([5]) == [50.])
assert numpy.all(f([0]) == [0.])
assert numpy.all(f([7]) == [70.])
class T_using_gpu(unittest.TestCase):
# All tests here belog to
# http://deeplearning.net/software/theano/tutorial/using_gpu.html
# Theano/doc/tutorial/using_gpu.txt
# Any change you do here also add it to the tutorial !
def test_using_gpu_1(self):
# I'm checking if this compiles and runs
from theano import function, config, shared, sandbox
import theano.tensor as T
import numpy
import time
vlen = 10 * 30 * 70 # 10 x #cores x # threads per core
iters = 10
rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], T.exp(x))
# print f.maker.fgraph.toposort()
t0 = time.time()
for i in xrange(iters):
r = f()
t1 = time.time()
print('Looping %d times took' % iters, t1 - t0, 'seconds')
print('Result is', r)
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
print('Used the cpu')
else:
print('Used the gpu')
if theano.config.device.find('gpu') > -1:
assert not numpy.any( [isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()])
else:
assert numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()])
def test_using_gpu_2(self):
if theano.config.device.find('gpu') > -1:
from theano import function, config, shared, sandbox
import theano.tensor as T
import numpy
import time
vlen = 10 * 30 * 70 # 10 x #cores x # threads per core
iters = 10
rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], sandbox.cuda.basic_ops.gpu_from_host(T.exp(x)))
# print f.maker.fgraph.toposort()
t0 = time.time()
for i in xrange(iters):
r = f()
t1 = time.time()
print('Looping %d times took' % iters, t1 - t0, 'seconds')
print('Result is', r)
print('Numpy result is', numpy.asarray(r))
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
print('Used the cpu')
else:
print('Used the gpu')
assert not numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()])
def test_using_gpu_3(self):
if theano.config.device.find('gpu') > -1:
from theano import function, config, shared, sandbox, Out
import theano.tensor as T
import numpy
import time
vlen = 10 * 30 * 70 # 10 x #cores x # threads per core
iters = 10
rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([],
Out(sandbox.cuda.basic_ops.gpu_from_host(T.exp(x)),
borrow=True))
# print f.maker.fgraph.toposort()
t0 = time.time()
for i in xrange(iters):
r = f()
t1 = time.time()
print('Looping %d times took' % iters, t1 - t0, 'seconds')
print('Result is', r)
print('Numpy result is', numpy.asarray(r))
if numpy.any([isinstance(x.op, T.Elemwise)
for x in f.maker.fgraph.toposort()]):
print('Used the cpu')
else:
print('Used the gpu')
assert not numpy.any([isinstance(x.op, T.Elemwise)
for x in f.maker.fgraph.toposort()])
def test_using_gpu_pycudaop(self):
import theano.misc.pycuda_init
if not theano.misc.pycuda_init.pycuda_available:
raise SkipTest("Pycuda not installed. Skip test of theano op"
" with pycuda code.")
from pycuda.compiler import SourceModule
import theano.sandbox.cuda as cuda
import theano.sandbox.cuda as cuda_ndarray
if not cuda_ndarray.cuda_available:
raise SkipTest('Optional package cuda disabled')
class PyCUDADoubleOp(theano.Op):
__props__ = ()
def make_node(self, inp):
inp = cuda.basic_ops.gpu_contiguous(
cuda.basic_ops.as_cuda_ndarray_variable(inp))
assert inp.dtype == "float32"
return theano.Apply(self, [inp], [inp.type()])
def make_thunk(self, node, storage_map, _, _2):
mod = SourceModule("""
__global__ void my_fct(float * i0, float * o0, int size) {
int i = blockIdx.x*blockDim.x + threadIdx.x;
if(i<size){
o0[i] = i0[i]*2;
}
}""")
pycuda_fct = mod.get_function("my_fct")
inputs = [storage_map[v] for v in node.inputs]
outputs = [storage_map[v] for v in node.outputs]
def thunk():
z = outputs[0]
if z[0] is None or z[0].shape != inputs[0][0].shape:
z[0] = cuda.CudaNdarray.zeros(inputs[0][0].shape)
grid = (int(numpy.ceil(inputs[0][0].size / 512.)), 1)
pycuda_fct(inputs[0][0], z[0],
numpy.intc(inputs[0][0].size),
block=(512, 1, 1), grid=grid)
return thunk
x = theano.tensor.fmatrix()
f = theano.function([x], PyCUDADoubleOp()(x))
xv = numpy.ones((4, 5), dtype="float32")
assert numpy.allclose(f(xv), xv*2)
# print numpy.asarray(f(xv))
# Used in T_fibby
class Fibby(theano.Op):
"""
An arbitrarily generalized Fibbonacci sequence
"""
__props__ = ()
def make_node(self, x):
x_ = theano.tensor.as_tensor_variable(x)
assert x_.ndim == 1
return theano.Apply(self,
inputs=[x_],
outputs=[x_.type()])
# using x_.type() is dangerous, it copies x's broadcasting
# behaviour
def perform(self, node, inputs, output_storage):
x, = inputs
y = output_storage[0][0] = x.copy()
for i in range(2, len(x)):
y[i] = y[i - 1] * y[i - 2] + x[i]
def c_code(self, node, name, inames, onames, sub):
x, = inames
y, = onames
fail = sub['fail']
return """
Py_XDECREF(%(y)s);
%(y)s = (PyArrayObject*)PyArray_FromArray(
%(x)s, 0, NPY_ARRAY_ENSURECOPY);
if (!%(y)s)
%(fail)s;
{//New scope needed to make compilation work
dtype_%(y)s * y = (dtype_%(y)s*)PyArray_DATA(%(y)s);
dtype_%(x)s * x = (dtype_%(x)s*)PyArray_DATA(%(x)s);
for (int i = 2; i < PyArray_DIMS(%(x)s)[0]; ++i)
y[i] = y[i-1]*y[i-2] + x[i];
}
""" % locals()
def c_code_cache_version(self):
return (1,)
class T_fibby(unittest.TestCase):
# All tests here belong to
# http://deeplearning.net/software/theano/extending/fibby.html
# Theano/doc/extending/fibby.txt
# Any change you do here also add it to the tutorial !
def test_fibby_1(self):
# The definition of class Fibby is done outside of the test,
# so the object can be pickled.
fibby = Fibby()
from theano.tensor.opt import (get_scalar_constant_value,
NotScalarConstantError)
# Remove any fibby(zeros(...))
@theano.tensor.opt.register_specialize
@theano.gof.local_optimizer([fibby])
def fibby_of_zero(node):
if node.op == fibby:
x = node.inputs[0]
try:
if numpy.all(0 == get_scalar_constant_value(x)):
return [x]
except NotScalarConstantError:
pass
# Test it does not apply when not needed
x = T.dvector()
f = function([x], fibby(x))
# theano.printing.debugprint(f)
# We call the function to make sure it runs.
# If you run in DebugMode, it will compare the C and Python outputs.
f(numpy.random.rand(5))
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, Fibby)
# Test that the optimization gets applied.
f_zero = function([], fibby(T.zeros([5])))
# theano.printing.debugprint(f_zero)
# If you run in DebugMode, it will compare the output before
# and after the optimization.
f_zero()
# Check that the optimization removes the Fibby Op.
# For security, the Theano memory interface ensures that the output
# of the function is always memory not aliased to the input.
# That is why there is a DeepCopyOp op.
topo = f_zero.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, theano.compile.ops.DeepCopyOp)
class T_graphstructures(unittest.TestCase):
# All tests here belong to
# http://deeplearning.net/software/theano/extending/graphstructures.html
# Theano/doc/extending/graphstructures.txt
# Any change you do here also add it to the tutorial !
def test_graphstructures_1(self):
x = T.dmatrix('x')
y = T.dmatrix('y')
z = x + y
x = T.matrix('x')
y = T.matrix('y')
z = T.matrix('z')
# create 2 Variables (one for 'e', one intermediate for y*z)
# create 2 Apply instances (one for '+', one for '*')
e = x + y * z
from theano.tensor import add, mul, Apply, Variable, TensorType
# Instantiate a type that represents a matrix of doubles
float64_matrix = TensorType(dtype='float64', # double
broadcastable=(False, False)) # matrix
# We make the Variable instances we need.
x = Variable(type=float64_matrix, name='x')
y = Variable(type=float64_matrix, name='y')
z = Variable(type=float64_matrix, name='z')
# This is the Variable that we want to symbolically represents y*z
mul_variable = Variable(type=float64_matrix)
assert mul_variable.owner is None
# Instantiate a symbolic multiplication
node_mul = Apply(op=mul,
inputs=[y, z],
outputs=[mul_variable])
# Fields 'owner' and 'index' are set by Apply
assert mul_variable.owner is node_mul
# 'index' is the position of mul_variable in mode_mul's outputs
assert mul_variable.index == 0
# This is the Variable that we want to symbolically represents x+(y*z)
add_variable = Variable(type=float64_matrix)
assert add_variable.owner is None
# Instantiate a symbolic addition
node_add = Apply(op=add,
inputs=[x, mul_variable],
outputs=[add_variable])
# Fields 'owner' and 'index' are set by Apply
assert add_variable.owner is node_add
assert add_variable.index == 0
e = add_variable
# We have access to x, y and z through pointers
assert e.owner.inputs[0] is x
assert e.owner.inputs[1] is mul_variable
assert e.owner.inputs[1].owner.inputs[0] is y
assert e.owner.inputs[1].owner.inputs[1] is z
class T_scan(unittest.TestCase):
# All tests here belong to
# http://deeplearning.net/software/theano/tutorial/loop.html
# Theano/doc/tutorial/loop.txt
# Any change you do here also add it to the tutorial !
def test_elemwise(self):
# defining the tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym),
sequences=X)
compute_elementwise = theano.function(inputs=[X, W, b_sym],
outputs=[results])
# test values
x = numpy.eye(2, dtype=theano.config.floatX)
w = numpy.ones((2, 2), dtype=theano.config.floatX)
b = numpy.ones((2), dtype=theano.config.floatX)
b[1] = 2
print("Scan results:", compute_elementwise(x, w, b)[0])
# comparison with numpy
print("Numpy results:", numpy.tanh(x.dot(w) + b))
def test_sequence(self):
# define tensor variables
X = T.vector("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
U = T.matrix("U")
Y = T.matrix("Y")
V = T.matrix("V")
P = T.matrix("P")
results, updates = theano.scan(
lambda y, p, x_tm1: T.tanh(T.dot(x_tm1, W) +
T.dot(y, U) + T.dot(p, V)),
sequences=[Y, P[::-1]], outputs_info=[X])
compute_seq = theano.function(inputs=[X, W, Y, U, P, V],
outputs=[results])
# test values
x = numpy.zeros((2), dtype=theano.config.floatX)
x[1] = 1
w = numpy.ones((2, 2), dtype=theano.config.floatX)
y = numpy.ones((5, 2), dtype=theano.config.floatX)
y[0, :] = -3
u = numpy.ones((2, 2), dtype=theano.config.floatX)
p = numpy.ones((5, 2), dtype=theano.config.floatX)
p[0, :] = 3
v = numpy.ones((2, 2), dtype=theano.config.floatX)
print("Scan results", compute_seq(x, w, y, u, p, v)[0])
# comparison with numpy
x_res = numpy.zeros((5, 2), dtype=theano.config.floatX)
x_res[0] = numpy.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
for i in range(1, 5):
x_res[i] = numpy.tanh(x_res[i-1].dot(w) +
y[i].dot(u) + p[4-i].dot(v))
print("Numpy results:", x_res)
def test_norm(self):
# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda x_i: T.sqrt((x_i**2).sum()),
sequences=[X])
compute_norm_lines = theano.function(inputs=[X], outputs=[results])
results, updates = theano.scan(lambda x_i: T.sqrt((x_i**2).sum()),
sequences=[X.T])
compute_norm_cols = theano.function(inputs=[X], outputs=[results])
# test value
x = numpy.diag(numpy.arange(1, 6, dtype=theano.config.floatX), 1)
print("Scan results:", compute_norm_lines(x)[0], \
compute_norm_cols(x)[0])
# comparison with numpy
print("Numpy results:", numpy.sqrt((x**2).sum(1)), \
numpy.sqrt((x**2).sum(0)))
def test_trace(self):
# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda i, j, t_f: T.cast(X[i, j] +
t_f, theano.config.floatX),
sequences=[T.arange(X.shape[0]),
T.arange(X.shape[1])],
outputs_info=numpy.asarray(
0., dtype=theano.config.floatX))
result = results[-1]
compute_trace = theano.function(inputs=[X], outputs=[result])
# test value
x = numpy.eye(5, dtype=theano.config.floatX)
x[0] = numpy.arange(5, dtype=theano.config.floatX)
print("Scan results:", compute_trace(x)[0])
# comparison with numpy
print("Numpy results:", numpy.diagonal(x).sum())
def test_taps(self):
# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
U = T.matrix("U")
V = T.matrix("V")
n_sym = T.iscalar("n_sym")
results, updates = theano.scan(
lambda x_tm2, x_tm1: T.dot(x_tm2, U) + T.dot(x_tm1, V) + T.tanh(T.dot(x_tm1, W) + b_sym),
n_steps=n_sym,
outputs_info=[dict(initial=X, taps=[-2, -1])])
compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym],
outputs=[results])
# test values
x = numpy.zeros((2, 2), dtype=theano.config.floatX)
# the initial value must be able to return x[-2]
x[1, 1] = 1
w = 0.5 * numpy.ones((2, 2), dtype=theano.config.floatX)
u = 0.5 * (numpy.ones((2, 2), dtype=theano.config.floatX) -
numpy.eye(2, dtype=theano.config.floatX))
v = 0.5 * numpy.ones((2, 2), dtype=theano.config.floatX)
n = 10
b = numpy.ones((2), dtype=theano.config.floatX)
print("Scan results:", compute_seq2(x, u, v, w, b, n))
# comparison with numpy
x_res = numpy.zeros((10, 2), dtype=theano.config.floatX)
x_res[0] = x[0].dot(u) + x[1].dot(v) + numpy.tanh(x[1].dot(w) + b)
x_res[1] = x[1].dot(u) + x_res[0].dot(v) \
+ numpy.tanh(x_res[0].dot(w) + b)
x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) \
+ numpy.tanh(x_res[1].dot(w) + b)
for i in range(2, 10):
x_res[i] = (x_res[i-2].dot(u) + x_res[i-1].dot(v) +
numpy.tanh(x_res[i-1].dot(w) + b))
print("Numpy results:", x_res)
def test_jacobian(self):
# define tensor variables
v = T.vector()
A = T.matrix()
y = T.tanh(T.dot(v, A))
results, updates = theano.scan(lambda i: T.grad(y[i], v),
sequences=[T.arange(y.shape[0])])
compute_jac_t = theano.function([A, v], [results],
allow_input_downcast=True) # shape (d_out, d_in)
# test values
x = numpy.eye(5)[0]
w = numpy.eye(5, 3)
w[2] = numpy.ones((3))
print("Scan results:", compute_jac_t(w, x)[0])
# compare with numpy
print("Numpy results:", ((1 - numpy.tanh(x.dot(w))**2)*w).T)
def test_accumulator(self):
# define shared variables
k = theano.shared(0)
n_sym = T.iscalar("n_sym")
results, updates = theano.scan(lambda: {k: (k + 1)}, n_steps=n_sym)
accumulator = theano.function([n_sym], [], updates=updates,
allow_input_downcast=True)
print("Before 5 steps:", k.get_value())
accumulator(5)
print("After 5 steps:", k.get_value())
def test_random(self):
# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
# define shared random stream
trng = T.shared_randomstreams.RandomStreams(1234)
d = trng.binomial(size=W[1].shape)
results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym) * d,
sequences=X)
compute_with_bnoise = theano.function(inputs=[X, W, b_sym],
outputs=[results],
updates=updates,
allow_input_downcast=True)
x = numpy.eye(10, 2)
w = numpy.ones((2, 2))
b = numpy.ones((2))
print(compute_with_bnoise(x, w, b))
class T_typedlist(unittest.TestCase):
# All tests here belong to
# http://deeplearning.net/software/theano/library/typed_list.html
# Theano/doc/library/typed_list.txt
# Any change you do here must also be done in the documentation !
def test_typedlist_basic(self):
import theano.typed_list
tl = theano.typed_list.TypedListType(theano.tensor.fvector)()
v = theano.tensor.fvector()
o = theano.typed_list.append(tl, v)
f = theano.function([tl, v], o)
output = f([[1, 2, 3], [4, 5]], [2])
# Validate ouput is as expected
expected_output = [numpy.array([1, 2, 3], dtype="float32"),
numpy.array([4, 5], dtype="float32"),
numpy.array([2], dtype="float32")]
assert len(output) == len(expected_output)
for i in range(len(output)):
utt.assert_allclose(output[i], expected_output[i])
def test_typedlist_with_scan(self):
import theano.typed_list
a = theano.typed_list.TypedListType(theano.tensor.fvector)()
l = theano.typed_list.length(a)
s, _ = theano.scan(fn=lambda i, tl: tl[i].sum(),
non_sequences=[a],
sequences=[theano.tensor.arange(l, dtype='int64')])
f = theano.function([a], s)
output = f([[1, 2, 3], [4, 5]])
# Validate ouput is as expected
expected_output = numpy.array([6, 9], dtype="float32")
utt.assert_allclose(output, expected_output)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论