提交 0ffa6ba5 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

merge

...@@ -23,7 +23,9 @@ import sys, os ...@@ -23,7 +23,9 @@ import sys, os
# Add any Sphinx extension module names here, as strings. They can be extensions # Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'ext'] extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'ext']
todo_include_todos = True
try: try:
from sphinx.ext import pngmath from sphinx.ext import pngmath
...@@ -166,7 +168,7 @@ latex_font_size = '11pt' ...@@ -166,7 +168,7 @@ latex_font_size = '11pt'
# Grouping the document tree into LaTeX files. List of tuples # Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, document class [howto/manual]). # (source start file, target name, title, author, document class [howto/manual]).
latex_documents = [ latex_documents = [
('contents', 'theano.tex', 'theano Documentation', ('index', 'theano.tex', 'theano Documentation',
'LISA lab, University of Montreal', 'manual'), 'LISA lab, University of Montreal', 'manual'),
] ]
......
...@@ -26,26 +26,28 @@ What needs to be defined ...@@ -26,26 +26,28 @@ What needs to be defined
There are less methods to define for an Op than for a Type: There are less methods to define for an Op than for a Type:
.. function:: c_code(node, name, input_names, output_names, sub) .. class:: Op
This must return C code that carries the computation we want to do. .. method:: c_code(node, name, input_names, output_names, sub)
.. function:: c_code_cleanup(node, name, input_names, output_names, sub) This must return C code that carries the computation we want to do.
This must return C code that cleans up whatever c_code allocated and .. method:: c_code_cleanup(node, name, input_names, output_names, sub)
that we must free.
*Default:* The default behavior is to do nothing. This must return C code that cleans up whatever c_code allocated and
that we must free.
.. function:: c_compile_args() *Default:* The default behavior is to do nothing.
c_no_compile_args()
c_headers()
c_libraries()
c_support_code()
Allows you to specify headers, libraries, .. method:: c_compile_args()
special g++ arguments to add/exclude or .. method:: c_no_compile_args()
helper functions/structs that the type needs. See :ref:`op`. .. method:: c_headers()
.. method:: c_libraries()
.. method:: c_support_code()
Allows you to specify headers, libraries,
special g++ arguments to add/exclude or
helper functions/structs that the type needs. See :ref:`op`.
The ``name`` argument is currently given an invalid value, so steer The ``name`` argument is currently given an invalid value, so steer
......
...@@ -46,43 +46,45 @@ be found in the documentation for :api:`gof.type.Type`. Here, we'll focus on ...@@ -46,43 +46,45 @@ be found in the documentation for :api:`gof.type.Type`. Here, we'll focus on
the most important ones: the most important ones:
.. function:: c_declare(name, sub) .. class:: CLinkerType
This must return C code which declares variables. These variables .. method:: c_declare(name, sub)
will be available to operations defined in C. You may also write
typedefs.
.. function:: c_init(name, sub) This must return C code which declares variables. These variables
will be available to operations defined in C. You may also write
typedefs.
This must return C code which initializes the variables declared in .. method:: c_init(name, sub)
``c_declare``. Either this or ``c_extract`` will be called.
.. function:: c_extract(name, sub) This must return C code which initializes the variables declared in
``c_declare``. Either this or ``c_extract`` will be called.
This must return C code which takes a reference to a Python object .. method:: c_extract(name, sub)
and initializes the variables declared in ``c_declare`` to match the
Python object's data. Either this or ``c_init`` will be called.
.. function:: c_sync(name, sub) This must return C code which takes a reference to a Python object
and initializes the variables declared in ``c_declare`` to match the
Python object's data. Either this or ``c_init`` will be called.
When the computations are done, transfer the variables from the C .. method:: c_sync(name, sub)
structure we put them in to the destination Python object. This will
only be called for the outputs.
.. function:: c_cleanup(name, sub) When the computations are done, transfer the variables from the C
structure we put them in to the destination Python object. This will
only be called for the outputs.
When we are done using the data, clean up whatever we allocated and .. method:: c_cleanup(name, sub)
decrease the appropriate reference counts.
.. function:: c_compile_args() When we are done using the data, clean up whatever we allocated and
c_no_compile_args() decrease the appropriate reference counts.
c_headers()
c_libraries()
c_support_code()
Allows you to specify headers, libraries, .. method:: c_compile_args()
special g++ arguments to add/exclude or c_no_compile_args()
helper functions/structs that the type needs. See :ref:`type`. c_headers()
c_libraries()
c_support_code()
Allows you to specify headers, libraries,
special g++ arguments to add/exclude or
helper functions/structs that the type needs. See :ref:`type`.
Each of these functions take two arguments, ``name`` and ``sub`` which Each of these functions take two arguments, ``name`` and ``sub`` which
...@@ -391,7 +393,7 @@ done. Note which variables get extracted (the three inputs ``x``, ``y`` and ...@@ -391,7 +393,7 @@ done. Note which variables get extracted (the three inputs ``x``, ``y`` and
output ``b``) and which one is synced (the final output ``b``). output ``b``) and which one is synced (the final output ``b``).
The C code above is a single C block for the whole graph. Depending on The C code above is a single C block for the whole graph. Depending on
which :ref:`linker` is used to process the computation graph, it is which :term:`linker` is used to process the computation graph, it is
possible that one such block is generated for each operation and that possible that one such block is generated for each operation and that
we transit through Python after each operation. In that situation, we transit through Python after each operation. In that situation,
``a`` would be synced by the addition block and extracted by the ``a`` would be synced by the addition block and extracted by the
......
.. _debug_faq:
=========================================
Debugging Theano: FAQ and Troubleshooting
=========================================
There are many kinds of bugs that might come up in a computer program.
This page is structured as an FAQ. It should provide recipes to tackle common
problems, and introduce some of the tools that we use to find problems in our
Theano code, and even (it happens) in Theano's internals, such as
:ref:`using_debugmode`.
How do I print an intermediate value in a Function/Method?
----------------------------------------------------------
Theano provides a 'Print' Op to do this.
.. code-block:: python
x = theano.tensor.dvector('x')
x_printed = theano.Print('this is a very important value')(x)
f = theano.function([x], x * 5)
f_with_print = theano.function([x], x_printed * 5)
#this runs the graph without any printing
assert numpy.all( f([1,2,3]) == [5, 10, 15])
#this runs the graph with the message, and value printed
assert numpy.all( f_with_print([1,2,3]) == [5, 10, 15])
Since Theano runs your program in a topological order, you won't have precise
control over the order in which multiple Print() Ops are evaluted. For a more
precise inspection of what's being computed where, when, and how, see the
:ref:`faq_wraplinker`.
I wrote a new Op/Type, and weird stuff is happening...
------------------------------------------------------
First, check the :ref:`op_contract` and the :ref:`type_contract`
and make sure you're following the rules.
Then try running your program in :ref:`using_debugmode`. DebugMode might catch
something that you're not seeing.
I wrote a new optimization, but it's not getting used...
---------------------------------------------------------
Remember that you have to register optimizations with the :ref:`optdb`
for them to get used by the normal modes like FAST_COMPILE, FAST_RUN,
and DEBUG_MODE.
I wrote a new optimization, and it changed my results even though I'm pretty sure it is correct.
------------------------------------------------------------------------------------------------
First, check the :ref:`op_contract` and make sure you're following the rules.
Then try running your program in :ref:`using_debugmode`. DebugMode might
catch something that you're not seeing.
The function I compiled is too slow, what's up?
-----------------------------------------------
First, make sure you're running in FAST_RUN mode, by passing
``mode='FAST_RUN'`` to ``theano.function`` or ``theano.make``. Some
operations have excruciatingly slow Python implementations and that
can negatively effect the performance of FAST_COMPILE.
Second, try the theano :ref:`using_profilemode`. This will tell you which
Apply nodes, and which Ops are eating up your CPU cycles.
.. _faq_wraplinker:
How do I step through a compiled function with the WrapLinker?
--------------------------------------------------------------
This is not exactly an FAQ, but the doc is here for now...
It's pretty easy to roll-your-own evaluation mode.
Check out this one:
.. code-block:: python
class PrintEverythingMode(Mode):
def __init__(self):
def print_eval(i, node, fn):
print i, node, [input[0] for input in fn.inputs],
fn()
print [output[0] for output in fn.outputs]
wrap_linker = theano.gof.WrapLinkerMany([theano.gof.OpWiseCLinker()], [print_eval])
super(PrintEverythingMode, self).__init__(wrap_linker, optimizer='fast_run')
When you use ``mode=PrintEverythingMode()`` as the mode for Function or Method,
then you should see a lot of output. Every Apply node will be printed out,
along with its position in the graph, the arguments to the ``perform`` or
``c_code`` and the output it computed. Admittedly, this is a huge amount of
output to read through if you are using big tensors... but you can choose to
put logic inside of the print_eval function that would, for example, only
print something out if a certain kind of Op was used, at a certain program
position, or if a particular value shows up in one of the inputs or outputs.
.. TODO: documentation for link.WrapLinkerMany
This can be a really powerful debugging tool. Note the call to ``fn`` inside the call to ``print_eval``; without it, the graph wouldn't get computed at all!
.. _extend_faq:
=========================================
Extending Theano: FAQ and Troubleshooting
=========================================
I wrote a new Op/Type, and weird stuff is happening...
------------------------------------------------------
First, check the :ref:`op_contract` and the :ref:`type_contract`
and make sure you're following the rules.
Then try running your program in :ref:`using_debugmode`. DebugMode might catch
something that you're not seeing.
I wrote a new optimization, but it's not getting used...
---------------------------------------------------------
Remember that you have to register optimizations with the :ref:`optdb`
for them to get used by the normal modes like FAST_COMPILE, FAST_RUN,
and DEBUG_MODE.
I wrote a new optimization, and it changed my results even though I'm pretty sure it is correct.
------------------------------------------------------------------------------------------------
First, check the :ref:`op_contract` and make sure you're following the rules.
Then try running your program in :ref:`using_debugmode`. DebugMode might
catch something that you're not seeing.
...@@ -146,7 +146,7 @@ Automatic wrapping ...@@ -146,7 +146,7 @@ Automatic wrapping
All nodes in the graph must be instances of ``Apply`` or ``Result``, but All nodes in the graph must be instances of ``Apply`` or ``Result``, but
``<Op subclass>.make_node()`` typically wraps constants to satisfy those ``<Op subclass>.make_node()`` typically wraps constants to satisfy those
constraints. For example, the :api:`tensor.add <theano.tensor.basic.add>` constraints. For example, the :func:`tensor.add`
Op instance is written so that: Op instance is written so that:
.. code-block:: python .. code-block:: python
...@@ -189,8 +189,8 @@ An *Apply node* is a type of internal node used to represent a ...@@ -189,8 +189,8 @@ An *Apply node* is a type of internal node used to represent a
manipulated directly by the end user. They may be accessed via manipulated directly by the end user. They may be accessed via
a Variable's ``owner`` field. a Variable's ``owner`` field.
An Apply node is typically an instance of the :api:`Apply An Apply node is typically an instance of the :class:`Apply`
<theano.gof.graph.Apply>` class. It represents the application class. It represents the application
of an :ref:`op` on one or more inputs, where each input is a of an :ref:`op` on one or more inputs, where each input is a
:ref:`variable`. By convention, each Op is responsible for :ref:`variable`. By convention, each Op is responsible for
knowing how to build an Apply node from a list of knowing how to build an Apply node from a list of
...@@ -215,8 +215,7 @@ An Apply instance has three important fields: ...@@ -215,8 +215,7 @@ An Apply instance has three important fields:
A list of :ref:`Variables <variable>` that represent the return values A list of :ref:`Variables <variable>` that represent the return values
of the function. of the function.
An Apply instance can be created by calling ``gof.Apply(op, inputs, An Apply instance can be created by calling ``gof.Apply(op, inputs, outputs)``.
outputs)``.
...@@ -260,7 +259,7 @@ Type ...@@ -260,7 +259,7 @@ Type
A :ref:`type` in Theano represents a set of constraints on potential A :ref:`type` in Theano represents a set of constraints on potential
data objects. These constraints allow Theano to tailor C code to handle data objects. These constraints allow Theano to tailor C code to handle
them and to statically optimize the computation graph. For instance, them and to statically optimize the computation graph. For instance,
the :ref:`irow <predefinedtypes>` type in the ``theano.tensor`` package the :ref:`irow <libdoc_tensor_creation>` type in the ``theano.tensor`` package
gives the following constraints on the data the Variables of type ``irow`` gives the following constraints on the data the Variables of type ``irow``
may contain: may contain:
...@@ -273,8 +272,8 @@ that declares the right data types and that contains the right number ...@@ -273,8 +272,8 @@ that declares the right data types and that contains the right number
of loops over the dimensions. of loops over the dimensions.
Note that a Theano :ref:`type` is not equivalent to a Python type or Note that a Theano :ref:`type` is not equivalent to a Python type or
class. Indeed, in Theano, :ref:`irow <predefinedtypes>` and :ref:`dmatrix class. Indeed, in Theano, :ref:`irow <libdoc_tensor_creation>` and :ref:`dmatrix
<predefinedtypes>` both use ``numpy.ndarray`` as the underlying type <libdoc_tensor_creation>` both use ``numpy.ndarray`` as the underlying type
for doing computations and storing data, yet they are different Theano for doing computations and storing data, yet they are different Theano
Types. Indeed, the constraints set by ``dmatrix`` are: Types. Indeed, the constraints set by ``dmatrix`` are:
...@@ -311,8 +310,7 @@ Variables. For example, when I type ...@@ -311,8 +310,7 @@ Variables. For example, when I type
>>> x = theano.tensor.ivector() >>> x = theano.tensor.ivector()
>>> y = -x >>> y = -x
``x`` and ``y`` are both Variables, i.e. instances of the :api:`Variable ``x`` and ``y`` are both Variables, i.e. instances of the :class:`Variable` class. The :ref:`type` of both ``x`` and
<theano.gof.graph.Variable>` class. The :ref:`type` of both ``x`` and
``y`` is ``theano.tensor.ivector``. ``y`` is ``theano.tensor.ivector``.
Unlike ``x``, ``y`` is a Variable produced by a computation (in this Unlike ``x``, ``y`` is a Variable produced by a computation (in this
...@@ -324,7 +322,7 @@ through ``y.owner``. ...@@ -324,7 +322,7 @@ through ``y.owner``.
More specifically, a Variable is a basic structure in Theano that More specifically, a Variable is a basic structure in Theano that
represents a datum at a certain point in computation. It is typically represents a datum at a certain point in computation. It is typically
an instance of the class :api:`Variable <theano.gof.graph.Variable>` or an instance of the class :class:`Variable` or
one of its subclasses. one of its subclasses.
A Variable ``r`` contains four important fields: A Variable ``r`` contains four important fields:
...@@ -365,6 +363,7 @@ any circumstances modify the input. This means that a constant is ...@@ -365,6 +363,7 @@ any circumstances modify the input. This means that a constant is
eligible to participate in numerous optimizations: constant inlining eligible to participate in numerous optimizations: constant inlining
in C code, constant folding, etc. in C code, constant folding, etc.
A constant does not need to be specified in a :ref:`function`'s list A constant does not need to be specified in a :func:`function
<function.function>`'s list
of inputs. In fact, doing so will raise an exception. of inputs. In fact, doing so will raise an exception.
...@@ -14,7 +14,7 @@ also good for you if you are interested in getting more under the hood with ...@@ -14,7 +14,7 @@ also good for you if you are interested in getting more under the hood with
Theano itself. Theano itself.
Before tackling this tutorial, it is highly recommended to read the Before tackling this tutorial, it is highly recommended to read the
:ref:`basictutorial`. :ref:`tutorial`.
The first few pages will walk you through the definition of a new :ref:`type`, The first few pages will walk you through the definition of a new :ref:`type`,
``double``, and a basic arithmetic :ref:`operations <op>` on that Type. We ``double``, and a basic arithmetic :ref:`operations <op>` on that Type. We
...@@ -34,5 +34,6 @@ a C implementation. ...@@ -34,5 +34,6 @@ a C implementation.
optimization optimization
tips tips
unittest unittest
extending_faq
...@@ -12,7 +12,7 @@ computations. We'll start by defining multiplication. ...@@ -12,7 +12,7 @@ computations. We'll start by defining multiplication.
Op's contract Op's contract
============= =============
An Op (:api:`gof.op.Op`) is any object which defines the An Op (:class:`gof.Op`) is any object which defines the
following methods: following methods:
...@@ -134,9 +134,7 @@ following methods: ...@@ -134,9 +134,7 @@ following methods:
includes this Op. includes this Op.
For each method, the *default* is what :api:`theano.gof.op.Op` defines At a bare minimum, a new Op must define ``make_node`` and ``perform``, which have no defaults.
for you. At a bare minimum, a new Op must define ``make_node`` and
``perform``, which have no defaults.
For more details, including the interface for providing a C For more details, including the interface for providing a C
implementation of ``perform()``, refer to the documentation for :ref:`op`. implementation of ``perform()``, refer to the documentation for :ref:`op`.
......
...@@ -26,9 +26,9 @@ Global and local optimizations ...@@ -26,9 +26,9 @@ Global and local optimizations
First, let's lay out the way optimizations work in Theano. There are First, let's lay out the way optimizations work in Theano. There are
two types of optimizations: *global* optimizations and *local* two types of optimizations: *global* optimizations and *local*
optimizations. A global optimization takes an :ref:`env` object (an optimizations. A global optimization takes an ``Env`` object (an
Env is a wrapper around a whole computation graph, you can see its Env is a wrapper around a whole computation graph, you can see its
:ref:`documentation <env>` for more details) and navigates through it :class:`documentation <Env>` for more details) and navigates through it
in a suitable way, replacing some Variables by others in the process. A in a suitable way, replacing some Variables by others in the process. A
local optimization, on the other hand, is defined as a function on a local optimization, on the other hand, is defined as a function on a
*single* :ref:`apply` node and must return either ``False`` (to mean that *single* :ref:`apply` node and must return either ``False`` (to mean that
...@@ -52,26 +52,28 @@ Global optimization ...@@ -52,26 +52,28 @@ Global optimization
A global optimization (or optimizer) is an object which defines the following A global optimization (or optimizer) is an object which defines the following
methods: methods:
.. function:: apply(env) .. class:: Optimizer
This method takes an Env object which contains the computation graph .. method:: apply(env)
and does modifications in line with what the optimization is meant
to do. This is of the main method of the optimizer.
.. function:: add_requirements(env) This method takes an Env object which contains the computation graph
and does modifications in line with what the optimization is meant
to do. This is of the main method of the optimizer.
This method takes an Env object and adds :ref:`features .. method:: add_requirements(env)
<envfeature>` to it. These features are "plugins" that are needed
for the ``apply`` method to do its job properly.
.. function:: optimize(env) This method takes an Env object and adds :ref:`features
<libdoc_gof_envfeature>` to it. These features are "plugins" that are needed
for the ``apply`` method to do its job properly.
This is the interface function called by Theano. .. method:: optimize(env)
*Default:* this is defined by Optimizer as ``add_requirement(env); This is the interface function called by Theano.
apply(env)``.
See the section about :ref:`env` to understand how to define these *Default:* this is defined by Optimizer as ``add_requirement(env);
apply(env)``.
See the section about :class:`Env` to understand how to define these
methods. methods.
...@@ -80,14 +82,16 @@ Local optimization ...@@ -80,14 +82,16 @@ Local optimization
A local optimization is an object which defines the following methods: A local optimization is an object which defines the following methods:
.. function:: transform(node) .. class:: LocalOptimizer
.. method:: transform(node)
This method takes an :ref:`apply` node and returns either ``False`` to This method takes an :ref:`apply` node and returns either ``False`` to
signify that no changes are to be done or a list of Variables which signify that no changes are to be done or a list of Variables which
matches the length of the node's ``outputs`` list. When the matches the length of the node's ``outputs`` list. When the
LocalOptimizer is applied by a Navigator, the outputs of the node LocalOptimizer is applied by a Navigator, the outputs of the node
passed as argument to the LocalOptimizer will be replaced by the passed as argument to the LocalOptimizer will be replaced by the
list returned. list returned.
...@@ -138,8 +142,8 @@ simplification described above: ...@@ -138,8 +142,8 @@ simplification described above:
requirements we might want to know about? requirements we might want to know about?
Here's how it works: first, in ``add_requirements``, we add the Here's how it works: first, in ``add_requirements``, we add the
``ReplaceValidate`` :ref:`envfeature` located in ``ReplaceValidate`` :ref:`libdoc_gof_envfeature` located in
:api:`theano.gof.toolbox`. This feature adds the ``replace_validate`` :ref:`libdoc_gof_toolbox`. This feature adds the ``replace_validate``
method to ``env``, which is an enhanced version of ``replace`` that method to ``env``, which is an enhanced version of ``replace`` that
does additional checks to ensure that we are not messing up the does additional checks to ensure that we are not messing up the
computation graph (note: if ``ReplaceValidate`` was already added by computation graph (note: if ``ReplaceValidate`` was already added by
...@@ -147,9 +151,9 @@ another optimizer, ``extend`` will do nothing). In a nutshell, ...@@ -147,9 +151,9 @@ another optimizer, ``extend`` will do nothing). In a nutshell,
``toolbox.ReplaceValidate`` grants access to ``env.replace_validate``, ``toolbox.ReplaceValidate`` grants access to ``env.replace_validate``,
and ``env.replace_validate`` allows us to replace a Variable with and ``env.replace_validate`` allows us to replace a Variable with
another while respecting certain validation constraints. You can another while respecting certain validation constraints. You can
browse the list of :ref:`features <envfeaturelist>` and see if some of browse the list of :ref:`libdoc_gof_envfeaturelist` and see if some of
them might be useful to write optimizations with. For example, as an them might be useful to write optimizations with. For example, as an
exercise, try to rewrite Simplify using :ref:`nodefinder`. (Hint: you exercise, try to rewrite Simplify using :class:`NodeFinder`. (Hint: you
want to use the method it publishes instead of the call to toposort!) want to use the method it publishes instead of the call to toposort!)
Then, in ``apply`` we do the actual job of simplification. We start by Then, in ``apply`` we do the actual job of simplification. We start by
...@@ -222,12 +226,12 @@ arithmetics that your Ops implement. Theano might provide facilities ...@@ -222,12 +226,12 @@ arithmetics that your Ops implement. Theano might provide facilities
for this somewhere in the future. for this somewhere in the future.
.. note:: .. note::
:ref:`env` is a Theano structure intended for the optimization :class:`Env` is a Theano structure intended for the optimization
phase. It is used internally by function and Module and is rarely phase. It is used internally by function and Module and is rarely
exposed to the end user. You can use it to test out optimizations, exposed to the end user. You can use it to test out optimizations,
etc. if you are comfortable with it, but it is recommended to use etc. if you are comfortable with it, but it is recommended to use
the function/Module frontends and to interface optimizations with the function/Module frontends and to interface optimizations with
:ref:`optdb <optdb>` (we'll see how to do that soon). :class:`optdb` (we'll see how to do that soon).
Local optimization Local optimization
...@@ -305,7 +309,7 @@ Theano defines some shortcuts to make LocalOptimizers: ...@@ -305,7 +309,7 @@ Theano defines some shortcuts to make LocalOptimizers:
.. function:: PatternSub(pattern1, pattern2) .. function:: PatternSub(pattern1, pattern2)
Replaces all occurrences of the first pattern by the second pattern. Replaces all occurrences of the first pattern by the second pattern.
See :api:`theano.gof.opt.PatternSub`. See :class:`PatternSub`.
.. code-block:: python .. code-block:: python
...@@ -342,7 +346,7 @@ or ``PatternSub``, it is highly recommended to use them. ...@@ -342,7 +346,7 @@ or ``PatternSub``, it is highly recommended to use them.
WRITEME: more about using PatternSub (syntax for the patterns, how to WRITEME: more about using PatternSub (syntax for the patterns, how to
use constraints, etc. - there's some decent doc at use constraints, etc. - there's some decent doc at
:api:`theano.gof.opt.PatternSub` for those interested) :class:`PatternSub` for those interested)
...@@ -376,8 +380,8 @@ Definition of optdb ...@@ -376,8 +380,8 @@ Definition of optdb
------------------- -------------------
optdb is an object which is an instance of optdb is an object which is an instance of
:api:`theano.gof.SequenceDB <theano.gof.optdb.SequenceDB>`, :class:`SequenceDB <optdb.SequenceDB>`,
itself a subclass of :api:`theano.gof.DB <theano.gof.optdb.DB>`. itself a subclass of :class:`DB <optdb.DB>`.
There exist (for now) two types of DB, SequenceDB and EquilibriumDB. There exist (for now) two types of DB, SequenceDB and EquilibriumDB.
When given an appropriate Query, DB objects build an Optimizer matching When given an appropriate Query, DB objects build an Optimizer matching
the query. the query.
...@@ -399,7 +403,7 @@ well and the LocalOptimizers they return will be put in their places ...@@ -399,7 +403,7 @@ well and the LocalOptimizers they return will be put in their places
(note that as of yet no DB can produce LocalOptimizer objects, so this (note that as of yet no DB can produce LocalOptimizer objects, so this
is a moot point). is a moot point).
Theano contains one principal DB object, :api:`theano.gof.optdb`, which Theano contains one principal DB object, :class:`optdb`, which
contains all of Theano's optimizers with proper tags. It is contains all of Theano's optimizers with proper tags. It is
recommended to insert new Optimizers in it. As mentioned previously, recommended to insert new Optimizers in it. As mentioned previously,
optdb is a SequenceDB, so, at the top level, Theano applies a sequence optdb is a SequenceDB, so, at the top level, Theano applies a sequence
...@@ -411,33 +415,35 @@ Query ...@@ -411,33 +415,35 @@ Query
A Query is built by the following call: A Query is built by the following call:
:: .. code-block:: python
theano.gof.Query(include, require = None, exclude = None, subquery = None) theano.gof.Query(include, require = None, exclude = None, subquery = None)
.. attribute:: include .. class:: Query
.. attribute:: include
A set of tags (a tag being a string) such that every A set of tags (a tag being a string) such that every
optimization obtained through this Query must have **one** of the tags optimization obtained through this Query must have **one** of the tags
listed. This field is required and basically acts as a starting point listed. This field is required and basically acts as a starting point
for the search. for the search.
.. attribute:: require .. attribute:: require
A set of tags such that every optimization obtained A set of tags such that every optimization obtained
through this Query must have **all** of these tags. through this Query must have **all** of these tags.
.. attribute:: exclude .. attribute:: exclude
A set of tags such that every optimization obtained A set of tags such that every optimization obtained
through this Query must have **none** of these tags. through this Query must have **none** of these tags.
.. attribute:: subquery .. attribute:: subquery
optdb can contain sub-databases; subquery is a optdb can contain sub-databases; subquery is a
dictionary mapping the name of a sub-database to a special Query. dictionary mapping the name of a sub-database to a special Query.
If no subquery is given for a sub-database, the original Query will be If no subquery is given for a sub-database, the original Query will be
used again. used again.
Furthermore, a Query object includes three methods, ``including``, Furthermore, a Query object includes three methods, ``including``,
``requiring`` and ``excluding`` which each produce a new Query object ``requiring`` and ``excluding`` which each produce a new Query object
......
...@@ -40,17 +40,17 @@ Step 1 - Create an Env ...@@ -40,17 +40,17 @@ Step 1 - Create an Env
^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^
The subgraph given by the end user is wrapped in a structure called The subgraph given by the end user is wrapped in a structure called
:ref:`env`. That structure defines several hooks on adding and *Env*. That structure defines several hooks on adding and
removing (pruning) nodes as well as on modifying links between nodes removing (pruning) nodes as well as on modifying links between nodes
(for example, modifying an input of an :ref:`apply` node) (see the (for example, modifying an input of an :ref:`apply` node) (see the
article about :ref:`env` for more information). article about :ref:`libdoc_gof_env` for more information).
Env provides a method to change the input of an Apply node from one Env provides a method to change the input of an Apply node from one
Variable to another and a more high-level method to replace a Variable Variable to another and a more high-level method to replace a Variable
with another. This is the structure that :ref:`Optimizers with another. This is the structure that :ref:`Optimizers
<optimization>` work on. <optimization>` work on.
Some relevant :ref:`Features <envfeature>` are typically added to the Some relevant :ref:`Features <libdoc_gof_envfeature>` are typically added to the
Env, namely to prevent any optimization from operating inplace on Env, namely to prevent any optimization from operating inplace on
inputs declared as immutable. inputs declared as immutable.
...@@ -58,19 +58,19 @@ inputs declared as immutable. ...@@ -58,19 +58,19 @@ inputs declared as immutable.
Step 2 - Execute main Optimizer Step 2 - Execute main Optimizer
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Once the Env is made, an :ref:`optimizer <optimization>` is produced Once the Env is made, an :term:`optimizer` is produced
by the :ref:`function_mode` passed to ``function`` or to the Method/Module's by the :term:`mode` passed to ``function`` or to the Method/Module's
``make`` (the Mode basically has two important fields, ``linker`` and ``make`` (the Mode basically has two important fields, ``linker`` and
``optimizer``). That optimizer is applied on the Env using its ``optimizer``). That optimizer is applied on the Env using its
optimize() method. optimize() method.
The optimizer is typically obtained through :ref:`optdb <optdb>`. The optimizer is typically obtained through :attr:`optdb`.
Step 3 - Execute linker to obtain a thunk Step 3 - Execute linker to obtain a thunk
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Once the computation graph is optimized, the :ref:`linker` is Once the computation graph is optimized, the :term:`linker` is
extracted from the Mode. It is then called with the Env as argument to extracted from the Mode. It is then called with the Env as argument to
produce a ``thunk``, which is a function with no arguments that produce a ``thunk``, which is a function with no arguments that
returns nothing. Along with the thunk, one list of input containers (a returns nothing. Along with the thunk, one list of input containers (a
......
...@@ -40,8 +40,7 @@ Theano provides some generic Op classes which allow you to generate a ...@@ -40,8 +40,7 @@ Theano provides some generic Op classes which allow you to generate a
lot of Ops at a lesser effort. For instance, Elemwise can be used to lot of Ops at a lesser effort. For instance, Elemwise can be used to
make :term:`elementwise` operations easily whereas DimShuffle can be make :term:`elementwise` operations easily whereas DimShuffle can be
used to make transpose-like transformations. These higher order Ops used to make transpose-like transformations. These higher order Ops
are mostly Tensor-related, as this is Theano's specialty. An exposé of are mostly Tensor-related, as this is Theano's specialty.
them can therefore be found in :ref:`tensoroptools`.
.. _opchecklist: .. _opchecklist:
......
...@@ -22,69 +22,71 @@ i.e. the same default argument names and values. If you wish to add ...@@ -22,69 +22,71 @@ i.e. the same default argument names and values. If you wish to add
extra arguments to any of these methods, these extra arguments must have extra arguments to any of these methods, these extra arguments must have
default values. default values.
.. function:: filter(value, strict=False) .. class:: PureType
This casts a value to match the Type and returns the .. method:: filter(value, strict=False)
casted value. If ``value`` is incompatible with the Type,
the method must raise an exception. If ``strict`` is True, ``filter`` must return a
reference to ``value`` (i.e. casting prohibited)
We need to define ``filter`` with two arguments. The second argument This casts a value to match the Type and returns the
must be called ``strict`` (Theano often calls it by keyword) and must casted value. If ``value`` is incompatible with the Type,
have a default value of ``False``. the method must raise an exception. If ``strict`` is True, ``filter`` must return a
reference to ``value`` (i.e. casting prohibited)
.. function:: is_valid_value(value) We need to define ``filter`` with two arguments. The second argument
must be called ``strict`` (Theano often calls it by keyword) and must
have a default value of ``False``.
Returns True iff the value is compatible with the Type. If .. method:: is_valid_value(value)
``filter(value, strict = True)`` does not raise an exception, the
value is compatible with the Type.
*Default:* True iff ``filter(value, strict = True)`` does not raise Returns True iff the value is compatible with the Type. If
an exception. ``filter(value, strict = True)`` does not raise an exception, the
value is compatible with the Type.
.. function:: values_eq(a, b) *Default:* True iff ``filter(value, strict = True)`` does not raise
an exception.
Returns True iff ``a`` and ``b`` are equal. .. method:: values_eq(a, b)
*Default:* ``a == b`` Returns True iff ``a`` and ``b`` are equal.
.. function:: values_eq_approx(a, b) *Default:* ``a == b``
Returns True iff ``a`` and ``b`` are approximately equal, for a .. method:: values_eq_approx(a, b)
definition of "approximately" which varies from Type to Type.
*Default:* ``values_eq(a, b)`` Returns True iff ``a`` and ``b`` are approximately equal, for a
definition of "approximately" which varies from Type to Type.
.. function:: make_variable(name=None) *Default:* ``values_eq(a, b)``
Makes a :term:`Variable` of this Type with the specified name, if .. method:: make_variable(name=None)
``name`` is not ``None``. If ``name`` is ``None``, then the Variable does
not have a name. The Variable will have its ``type`` field set to
the Type object.
*Default:* there is a generic definition of this in Type. The Makes a :term:`Variable` of this Type with the specified name, if
Variable's ``type`` will be the object that defines this method (in ``name`` is not ``None``. If ``name`` is ``None``, then the Variable does
other words, ``self``). not have a name. The Variable will have its ``type`` field set to
the Type object.
.. function:: __call__(name=None) *Default:* there is a generic definition of this in Type. The
Variable's ``type`` will be the object that defines this method (in
other words, ``self``).
Syntactic shortcut to ``make_variable``. .. method:: __call__(name=None)
*Default:* ``make_variable`` Syntactic shortcut to ``make_variable``.
.. function:: __eq__(other) *Default:* ``make_variable``
Used to compare Type instances themselves .. method:: __eq__(other)
*Default:* ``object.__eq__`` Used to compare Type instances themselves
.. function:: __hash__() *Default:* ``object.__eq__``
Types should not be mutable, so it should be OK to define a hash .. method:: __hash__()
function. Typically this function should hash all of the terms
involved in ``__eq__``.
*Default:* ``id(self)`` Types should not be mutable, so it should be OK to define a hash
function. Typically this function should hash all of the terms
involved in ``__eq__``.
*Default:* ``id(self)``
For each method, the *default* is what ``Type`` defines For each method, the *default* is what ``Type`` defines
for you. So, if you create an instance of ``Type`` or an for you. So, if you create an instance of ``Type`` or an
...@@ -249,7 +251,7 @@ attempt to clear up the confusion: ...@@ -249,7 +251,7 @@ attempt to clear up the confusion:
there is actually only one Type in that set, therefore the subclass there is actually only one Type in that set, therefore the subclass
doesn't represent anything that one of its instances doesn't. In this doesn't represent anything that one of its instances doesn't. In this
case it is a singleton, a set with one element. However, the case it is a singleton, a set with one element. However, the
:api:`TensorType` :class:`TensorType`
class in Theano (which is a subclass of Type) class in Theano (which is a subclass of Type)
represents a set of types of tensors represents a set of types of tensors
parametrized by their data type or number of dimensions. We could say parametrized by their data type or number of dimensions. We could say
......
差异被折叠。
...@@ -37,7 +37,7 @@ Roughly in order of what you'll want to check out: ...@@ -37,7 +37,7 @@ Roughly in order of what you'll want to check out:
* :ref:`extending` -- Learn to add a Type, Op, or graph optimization. * :ref:`extending` -- Learn to add a Type, Op, or graph optimization.
* :ref:`internal` -- How to maintaining Theano, LISA-specific tips, and more... * :ref:`internal` -- How to maintaining Theano, LISA-specific tips, and more...
You can download the latest `PDF documentation <http://pylearn.org/theano/theano.pdf>`_, rather than reading it online. You can download the latest `PDF documentation <http://deeplearning.net/theanodoc/theano.pdf>`_, rather than reading it online.
Community Community
========= =========
...@@ -60,7 +60,6 @@ Community ...@@ -60,7 +60,6 @@ Community
tutorial/index tutorial/index
library/index library/index
extending/index extending/index
indexes/index
glossary glossary
links links
internal/index internal/index
......
...@@ -20,7 +20,7 @@ to be installed: ...@@ -20,7 +20,7 @@ to be installed:
We develop mainly on 64-bit Linux machines. 32-bit architectures are We develop mainly on 64-bit Linux machines. 32-bit architectures are
not well-tested. not well-tested.
python >= 2.5 python >= 2.5 (2.4 should be supported as well)
`numpy <http://numpy.scipy.org/>`_ >= 1.2 `numpy <http://numpy.scipy.org/>`_ >= 1.2
Earlier versions have memory leaks. Earlier versions have memory leaks.
...@@ -30,6 +30,8 @@ to be installed: ...@@ -30,6 +30,8 @@ to be installed:
is buggy in 0.6. (scipy.csc_matrix dot has a bug with singleton is buggy in 0.6. (scipy.csc_matrix dot has a bug with singleton
dimensions. There may be more bugs.) dimensions. There may be more bugs.)
A BLAS installation (with Level 3 functionality)
The following libraries and software are optional: The following libraries and software are optional:
g++, python-dev g++, python-dev
...@@ -42,41 +44,49 @@ The following libraries and software are optional: ...@@ -42,41 +44,49 @@ The following libraries and software are optional:
`mercurial <http://www.selenic.com/mercurial/>`_ `mercurial <http://www.selenic.com/mercurial/>`_
To download bleeding-edge version of Theano. To download bleeding-edge version of Theano.
.. _install_bleeding_edge:
Getting the code
-----------------
Easy install If you are a developer of Theano, then check out the :ref:`dev_start_guide` guide.
------------
The following command will install the latest release of Theano The following are general instructions that will set you up with the bleeding-edge
on your system: version of Theano. First, get the code using `mercurial <http://www.selenic.com/mercurial/wiki/>`__:
.. code-block:: bash .. code-block:: bash
easy_install Theano hg clone http://hg.assembla.com/theano Theano
Manual install Configuring PYTHONPATH
-------------- ---------------------------
The subdirectory Theano/theano has to be located in a path
mentioned in your PYTHONPATH. In order to do that, you can either
create a symbolic link to Theano/theano in a directory already
mentioned in your PYTHONPATH environment variable, or modify the
PYTHONPATH so that it mentions Theano.
To install the latest release of Theano from source, visit the `downloads To create a symbolic link:
<http://pylearn.org/theano/downloads/>`_ page and download the release you
want. Unpack the release, and type:
.. code-block:: bash .. code-block:: bash
python setup.py build ln -s Theano/theano <someplace on your PYTHONPATH>/theano
python setup.py test
python setup.py install
.. _install_bleeding_edge: To modify the environment variable PYTHONPATH in bash, you may do this:
Bleeding Edge .. code-block:: bash
--------------
Feeling lucky and want to run bleeding-edge code? export PYTHONPATH=<path to Theano's parent dir>/Theano:$PYTHONPATH
Then check out the :ref:`dev_start_guide` guide.
In csh:
Configuring the environment .. code-block:: csh
---------------------------
setenv PYTHONPATH <path to Theano's parent dir>/Theano:$PYTHONPATH
Configuring Theano's environmental variables
---------------------------------------------
Two environment variables are used to control automatic code Two environment variables are used to control automatic code
generation. It is possible to use Theano in a way which avoids all generation. It is possible to use Theano in a way which avoids all
...@@ -118,6 +128,33 @@ automatic code generation, but that way is much, much slower. ...@@ -118,6 +128,33 @@ automatic code generation, but that way is much, much slower.
Omitting this variable defaults the mode to ``'FAST_RUN'``. Omitting this variable defaults the mode to ``'FAST_RUN'``.
Testing your installation
---------------------------
Once you have completed these steps, you should run the theano test suite like this:
.. code-block:: bash
cd Theano
nosetests #execute all the tests
All tests should pass. If some test fails on your machine, you are
encouraged to tell us what went wrong on the ``theano-users@googlegroups.com``
mailing list.
Updating
-------------
To update your library to the latest revision, change directory (``cd``)
to your ``Theano`` folder and execute the following command:
.. code-block:: bash
hg pull -u
You should update frequently, bugs are fixed on a very regular basis.
Mac Mac
--- ---
...@@ -126,20 +163,21 @@ Mac ...@@ -126,20 +163,21 @@ Mac
- -
.. code-block:: bash .. code-block:: bash
$ sudo port install gcc42 py25-zlib py25-numpy py25-scipy mercurial $ sudo port install gcc44 py25-zlib py25-numpy py25-scipy mercurial
Note that compiling gcc42 takes a significant time (hours) so it is probably Note that compiling gcc takes a significant time (hours) so it is probably
not the best solution if you are in a rush! It may happen that SciPy not the best solution if you are in a rush! It may happen that SciPy
fails to compile the first time and still compiles just fine on a second fails to compile the first time and still compiles just fine on a second
try. Same thing with py25-zlib. try. Same thing with py25-zlib.
- Install some kind of BLAS library (TODO: how?) - scipy depends on ATLAS (a BLAS library), which will be installed by MacPorts.
- Set ``THEANO_BLAS_LDFLAGS`` to something which will link against said BLAS - Set ``THEANO_BLAS_LDFLAGS`` to something which will link against said BLAS
library. E.g., ``THEANO_BLAS_LDFLAGS='-lcblas -latlas -lgfortran'``. library. E.g., ``THEANO_BLAS_LDFLAGS='-lcblas -latlas -lgfortran'``.
This advice has not been tested recently, so please inform us of your results. These installation instructions have not tested recently, please infom us of your results!
We would be especially interested in dependencies that we missed listing, as well as tests
that fail on your platform (use the ``theano-users@googlegroups.com`` mailing list).
Windows Windows
...@@ -216,7 +254,8 @@ but this has not been tested yet. ...@@ -216,7 +254,8 @@ but this has not been tested yet.
tar zxvf lapack.tgz tar zxvf lapack.tgz
cd lapack-3.2.1 cd lapack-3.2.1
gfortran -shared -O3 -o libblas.dll BLAS/SRC/*.f gfortran -shared -O3 -o libblas.dll BLAS/SRC/*.f
mv libblas.dll /mingw/lib cp libblas.dll /mingw/lib
mv libblas.dll /mingw/bin
- Install `Mercurial <http://mercurial.selenic.com/downloads/>`__ - Install `Mercurial <http://mercurial.selenic.com/downloads/>`__
(you can use the regular Windows release, you do not need TortoiseHg). (you can use the regular Windows release, you do not need TortoiseHg).
...@@ -246,9 +285,9 @@ Generating the documentation ...@@ -246,9 +285,9 @@ Generating the documentation
---------------------------- ----------------------------
You can read the latest HTML documentation `here You can read the latest HTML documentation `here
<http://pylearn.org/theano/contents.html>`__. <http://deeplearning.net/theanodoc>`__.
You can download the latest PDF documentation `here You can download the latest PDF documentation `here
<http://pylearn.org/theano/theano.pdf>`__. <http://deeplearning.net/theanodoc/theano.pdf>`__.
We recommend you look at the documentation on the website, since it We recommend you look at the documentation on the website, since it
will be more current than the documentation included with the package. will be more current than the documentation included with the package.
......
...@@ -21,11 +21,10 @@ Developer Start Guide ...@@ -21,11 +21,10 @@ Developer Start Guide
Accounts Accounts
======== ========
To obtain developer access: send an email to an admin with an username and To obtain developer access: register with `Assembla
temporary password. Pending approval, this will give you access to both the <http://www.assembla.com/>`_ and add yourself as a watcher on the `Theano space
repository and Trac. You should then change your password in the <http://www.assembla.com/spaces/theano>`_. Then send an email to an admin asking
`<http://pylearn.org/theano/prefs preferences>` tab - do *NOT* use a good to be promoted to a member of the project.
password! We are using plain text http which is not secure.
Theano code Theano code
...@@ -34,10 +33,9 @@ Theano code ...@@ -34,10 +33,9 @@ Theano code
*To get the source via mercurial,* you must have `mercurial *To get the source via mercurial,* you must have `mercurial
<http://www.selenic.com/mercurial/wiki/>`__ installed. <http://www.selenic.com/mercurial/wiki/>`__ installed.
The code that makes up Theano is in a single repository available in The code that makes up Theano is in a `single repository
`<http://pylearn.org/hg/Theano>`__. <http://www.assembla.com/spaces/theano/trac_mercurial_tool>`__. As a developer,
you should clone this repository like this:
As a developer, you should clone this repository like this:
.. code-block:: bash .. code-block:: bash
......
...@@ -5,43 +5,40 @@ ...@@ -5,43 +5,40 @@
Theano at a Glance Theano at a Glance
================== ==================
Theano is a Python library that allows you to define, optimize, and evaluate Theano is a Python library that lets you to define, optimize, and evaluate
mathematical expressions involving multi-dimensional arrays. Using Theano it is mathematical expressions, especially ones with multi-dimensional arrays
(numpy.ndarray). Using Theano it is
possible to attain speeds rivaling hand-crafted C implementations for problems possible to attain speeds rivaling hand-crafted C implementations for problems
involving large amounts of data. It can also surpass C on a CPU by many orders involving large amounts of data. It can also surpass C on a CPU by many orders
of magnitude by taking advantage of recent GPUs. of magnitude by taking advantage of recent GPUs.
Theano melds some aspects of a computer algebra system (CAS) with Theano combines aspects of a computer algebra system (CAS) with aspects of an
aspects of an optimizing compiler. It can even transform some or all optimizing compiler. It can also generate customized C code for many
of the mathematical expression into C code and compile it into native mathematical operations. This combination of CAS with optimizing compilation
machine instructions. This combination of CAS with optimizing is particularly useful for tasks in which complicated mathematical expressions
compilation is particularly useful for tasks in which complicated are evaluated repeatedly and evaluation speed is critical. For situations
mathematical expressions are evaluated repeatedly and evaluation speed where many different expressions are each evaluated once Theano can minimize
is critical. the amount of compilation/analysis overhead, but still provide symbolic
features such as automatic differentiation.
Theano supports a range of numerical types in multiple dimensions and
a number of well-tested operations. It also allows you to compute the
gradient of an expression with respect to another. Symbolic
expressions may be compiled into functions, which work on the same
data structures as numpy_, allowing for easy interoperability.
Theano's compiler applies many optimizations of varying complexity to Theano's compiler applies many optimizations of varying complexity to
these symbolic expressions. These optimizations include, but are not these symbolic expressions. These optimizations include, but are not
limited to: limited to:
* use of GPU for computations
* constant folding * constant folding
* merging of similar subgraphs, to avoid calculating the same values * merging of similar subgraphs, to avoid redundant calculation
more than once * arithmetic simplification (e.g. ``x*y/x -> y``, ``--x -> x``)
* arithmetic simplification (``x*y/x -> y``) * inserting efficient BLAS_ operations (e.g. ``GEMM``) in a variety of
* inserting efficient BLAS_ operations contexts
* using inplace operations wherever it is safe to do so. * using memory aliasing to avoid calculation
* using inplace operations wherever it does not interfere with aliasing
Theano defines several optimizations which improve the numerical * loop fusion for elementwise sub-expressions
stability of computations. * improvements to numerical stability (e.g. :math:`\log(1+\exp(x))` and :math:`\log(\sum_i \exp(x[i]))`)
* for a complete list, see :ref:`_optimizations`
Theano was written at the LISA_ lab to support the development of
efficient machine learning algorithms while minimizing human time. We Theano was written at the LISA_ lab to support rapid development of
use it especially in gradient-based learning techniques. Theano is efficient machine learning algorithms. Theano is
named after the `Greek mathematician`_, who may have been Pythagoras' named after the `Greek mathematician`_, who may have been Pythagoras'
wife. Theano is released under a BSD license (:ref:`link <license>`). wife. Theano is released under a BSD license (:ref:`link <license>`).
...@@ -92,30 +89,28 @@ machine instructions. ...@@ -92,30 +89,28 @@ machine instructions.
What does it do that they don't? What does it do that they don't?
================================ ================================
Theano is a python library and optimizing compiler for manipulating Theano is a Python library and optimizing compiler for manipulating
and evaluating expressions, especially matrix-valued and evaluating expressions, especially matrix-valued
ones. Manipulation of matrices is typically done using the numpy ones. Manipulation of matrices is typically done using the numpy
package, so what does Theano do that Python and numpy do not? package, so what does Theano do that Python and numpy do not?
- *execution speed optimizations*: Theano can use `g++` to compile - *execution speed optimizations*: Theano can use `g++` or `nvcc` to compile
parts your expression graph into native machine code, which runs parts your expression graph into CPU or GPU instructions, which run
much faster than python. much faster than pure Python.
- *symbolic differentiation*: Theano can automatic build symbolic graphs - *symbolic differentiation*: Theano can automatic build symbolic graphs
for computing gradients. for computing gradients.
- *stability optimizations*: Theano can recognize numerically unstable - *stability optimizations*: Theano can recognize [some] numerically unstable
expressions and compute them with more stable algorithms. expressions and compute them with more stable algorithms.
There exist another symbolic package in Python, namely sympy_. Theano The closest Python package to Theano is sympy_.
is different from sympy in the sense that while Theano allows symbolic Theano focuses more on tensor expressions than Sympy, and has more machinery
manipulation it puts more emphasis on the evaluation of these expressions for compilation. Sympy has more sophisticated algebra rules and can
and being able to repeatedly evaluate them on many different inputs. Theano handle a wider variety of mathematical operations (such as series, limits, and integrals).
is also better suited to handling large tensors which have no
assumed structures.
If numpy_ is to be compared to MATLAB_ and sympy_ to Mathematica_, If numpy_ is to be compared to MATLAB_ and sympy_ to Mathematica_,
Theano is a sort of hybrid of the two which tries to make the best of Theano is a sort of hybrid of the two which tries to combine the best of
both worlds. both worlds.
...@@ -134,7 +129,8 @@ Getting started ...@@ -134,7 +129,8 @@ Getting started
the :ref:`tutorial` first though. the :ref:`tutorial` first though.
A PDF version of the online documentation may be found `here <theano.pdf>`_. A PDF version of the online documentation may be found `here
<http://deeplearning.net/theanodoc/theano.pdf>`_.
Contact us Contact us
......
...@@ -102,7 +102,7 @@ Reference ...@@ -102,7 +102,7 @@ Reference
:type updates: iterable over pairs (shared_variable, new_expression). :type updates: iterable over pairs (shared_variable, new_expression).
List, tuple or dict. List, tuple or dict.
:param updates: expressions for new SharedVariable values :param updates: expressions for new :class:`SharedVariable` values
:type givens: iterable over pairs (Var1, Var2) of Variables. :type givens: iterable over pairs (Var1, Var2) of Variables.
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
shared
function function
io io
mode mode
......
...@@ -122,7 +122,7 @@ array(10.0) ...@@ -122,7 +122,7 @@ array(10.0)
Advanced: Sharing Storage Between Functions Advanced: Sharing Storage Between Functions
------------------------------------------- -------------------------------------------
``value`` can be a :api:`theano.gof.link.Container` as well as a literal. ``value`` can be a :class:`Container` as well as a literal.
This permits linking a value of a Variable in one function to the value of a Variable in another function. This permits linking a value of a Variable in one function to the value of a Variable in another function.
By using a ``Container`` as a value we can implement shared variables between functions. By using a ``Container`` as a value we can implement shared variables between functions.
......
...@@ -26,8 +26,10 @@ environment variable 'THEANO_DEFAULT_MODE', which can in turn be overridden by ...@@ -26,8 +26,10 @@ environment variable 'THEANO_DEFAULT_MODE', which can in turn be overridden by
setting ``theano.compile.mode.default_mode`` directly, which can in turn be setting ``theano.compile.mode.default_mode`` directly, which can in turn be
overridden by passing the keyword argument to ``theano.function``. overridden by passing the keyword argument to ``theano.function``.
For a finer level of control over which optimizations are applied, and whether .. TODO::
C or python implementations are used, read :api:`compile.mode.Mode`.
For a finer level of control over which optimizations are applied, and whether
C or Python implementations are used, read.... what exactly?
Reference Reference
......
...@@ -175,7 +175,7 @@ Using Inheritance ...@@ -175,7 +175,7 @@ Using Inheritance
A friendlier way to use Module is to implement your functionality as a A friendlier way to use Module is to implement your functionality as a
subclass of Module: subclass of Module:
.. literalinclude:: ../examples/module/accumulator.py .. literalinclude:: ../../examples/module/accumulator.py
This is just like the previous example except slightly fancier. This is just like the previous example except slightly fancier.
......
.. _libdoc_compile_shared:
===========================================
:mod:`shared` - defines theano.shared
===========================================
.. module:: shared
:platform: Unix, Windows
:synopsis: defines theano.shared and related classes
.. moduleauthor:: LISA
.. class:: SharedVariable
Variable with Storage that is shared between functions that it appears in.
These variables are meant to be created by registered *shared constructors*
(see :func:`shared_constructor`).
The user-friendly constructor is :func:`shared`
.. attribute:: value
Read/write access to the [non-symbolic] value/data associated with this SharedVariable.
Changes to this value will be visible to all functions using this SharedVariable.
.. method:: __init__(self, name, type, value, strict, container=None)
:param name: The name for this variable.
:type name: None or str
:param type: The :term:`Type` for this Variable.
:param value: A value to associate with this variable (a new container will be created).
:param strict: True -> assignments to ``self.value`` will not be casted
or copied, so they must have the correct type or an exception will be
raised.
:param container: The container to use for this variable. This should
instead of the `value` parameter. Using both is an error.
.. attribute:: container
A container to use for this SharedVariable when it is an implicit function parameter.
:type: class:`Container`
.. function:: shared(value, name=None, strict=False, **kwargs)
Return a :class:`SharedVariable` Variable, initialized with a copy or reference of `value`.
This function iterates over constructor functions (see `shared_constructor`) to find a
suitable SharedVariable subclass. The suitable one is the first constructor
that doesn't raise an exception.
This function is meant as a convenient default. If you want to use a
specific shared variable constructor, consider calling it directly.
.. note::
By passing `kwargs`, you effectively limit the set of potential constructors to those that
can accept those kwargs.
Each registered constructor ``ctor`` will be called like this:
.. code-block:: python
ctor(value, name=name, strict=strict, **kwargs)
.. attribute:: constructors
A list of shared variable constructors that will be tried in reverse
order.
.. function:: shared_constructor(ctor)
Append `ctor` to the list of shared constructors (see :func:`shared`).
.. _libdoc_gof_env:
================================================
:mod:`env` -- Graph Container [doc TODO]
================================================
.. module:: env
:platform: Unix, Windows
:synopsis: Theano Internals
.. moduleauthor:: LISA
Guide
=====
Env
---
.. _libdoc_gof_envfeature:
Env Features
-------------
.. _libdoc_gof_envfeaturelist:
Env Feature List
^^^^^^^^^^^^^^^^
* ReplaceValidate
* DestroyHandler
Reference
=========
.. class:: Env
***TODO***
...@@ -4,3 +4,17 @@ ...@@ -4,3 +4,17 @@
================================================ ================================================
:mod:`gof` -- Theano Internals [doc TODO] :mod:`gof` -- Theano Internals [doc TODO]
================================================ ================================================
.. module:: gof
:platform: Unix, Windows
:synopsis: Theano Internals
.. moduleauthor:: LISA
.. toctree::
:maxdepth: 1
env
toolbox
.. _libdoc_gof_toolbox:
================================================
:mod:`toolbox` -- [doc TODO]
================================================
.. module:: toolbox
:platform: Unix, Windows
:synopsis: Theano Internals
.. moduleauthor:: LISA
Guide
=====
.. class:: Bookkeeper(object)
.. class:: History(object)
.. method:: revert(env, checkpoint)
Reverts the graph to whatever it was at the provided
checkpoint (undoes all replacements). A checkpoint at any
given time can be obtained using self.checkpoint().
.. class:: Validator(object)
.. class:: ReplaceValidate(History, Validator)
.. method:: replace_validate(env, var, new_var, reason=None)
.. class:: NodeFinder(Bookkeeper)
.. class:: PrintListener(object)
.. _libdoc_tensor: .. _libdoc_tensor:
================================================== ==================================================
:mod:`tensor` -- Types and Ops for Symbolic numpy :mod:`tensor` -- Types and Ops for Symbolic numpy
================================================== ==================================================
......
...@@ -109,9 +109,10 @@ Reference ...@@ -109,9 +109,10 @@ Reference
.. method:: updates() .. method:: updates()
:returns: a list of all the (state, new_state) update pairs from the :returns: a list of all the (state, new_state) update pairs from the
random variables it has returned. This can be a convenient shortcut random variables it has returned.
to enumerating all the random variables in a large graph in the
``update`` paramter of function. This can be a convenient shortcut to enumerating all the random
variables in a large graph in the ``update`` parameter of function.
.. method:: seed(meta_seed) .. method:: seed(meta_seed)
......
...@@ -17,3 +17,6 @@ TODO: Give examples for how to use these things! They are pretty complicated. ...@@ -17,3 +17,6 @@ TODO: Give examples for how to use these things! They are pretty complicated.
.. function:: downsample2D(*todo) .. function:: downsample2D(*todo)
.. function:: fft(*todo) .. function:: fft(*todo)
[James has some code for this, but hasn't gotten it into the source tree yet.]
...@@ -110,7 +110,7 @@ and giving ``z`` as output: ...@@ -110,7 +110,7 @@ and giving ``z`` as output:
>>> f = function([x, y], z) >>> f = function([x, y], z)
The first argument to :ref:`function <libdoc_compile_function>` is a list of Variables The first argument to :func:`function <function.function>` is a list of Variables
that will be provided as inputs to the function. The second argument that will be provided as inputs to the function. The second argument
is a single Variable *or* a list of Variables. For either case, the second is a single Variable *or* a list of Variables. For either case, the second
argument is what we want to see as output when we apply the function. argument is what we want to see as output when we apply the function.
......
差异被折叠。
...@@ -97,3 +97,88 @@ Use your imagination :) ...@@ -97,3 +97,88 @@ Use your imagination :)
This can be a really powerful debugging tool. This can be a really powerful debugging tool.
Note the call to ``fn`` inside the call to ``print_eval``; without it, the graph wouldn't get computed at all! Note the call to ``fn`` inside the call to ``print_eval``; without it, the graph wouldn't get computed at all!
How to use pdb ?
----------------
In the majority of cases, you won't be executing from the interactive shell
but from a set of Python scripts. In such cases, the use of the Python
debugger can come in handy, especially as your models become more complex.
Intermediate results don't necessarily have a clear name and you can get
exceptions which are hard to decipher, due to the "compiled" nature of
functions.
Consider this example script ("ex.py"):
.. code-block:: python
import theano
import numpy
import theano.tensor as T
a = T.dmatrix('a')
b = T.dmatrix('b')
f = theano.function([a,b], [a*b])
# matrices chosen so dimensions are unsuitable for multiplication
mat1 = numpy.arange(12).reshape((3,4))
mat2 = numpy.arange(25).reshape((5,5))
f(mat1, mat2)
This is actually so simple the debugging could be done easily, but it's for
illustrative purposes. As the matrices can't be element-wise multiplied
(unsuitable shapes), we get the following exception:
.. code-block:: text
File "ex.py", line 14, in <module>
f(mat1, mat2)
File "/u/username/Theano/theano/compile/function_module.py", line 451, in __call__
File "/u/username/Theano/theano/gof/link.py", line 271, in streamline_default_f
File "/u/username/Theano/theano/gof/link.py", line 267, in streamline_default_f
File "/u/username/Theano/theano/gof/cc.py", line 1049, in execute ValueError: ('Input dimension mis-match. (input[0].shape[0] = 3, input[1].shape[0] = 5)', Elemwise{mul,no_inplace}(a, b), Elemwise{mul,no_inplace}(a, b))
The call stack contains a few useful informations to trace back the source
of the error. There's the script where the compiled function was called --
but if you're using (improperly parameterized) prebuilt modules, the error
might originate from ops in these modules, not this script. The last line
tells us about the Op that caused the exception. In thise case it's a "mul"
involving Variables name "a" and "b". But suppose we instead had an
intermediate result to which we hadn't given a name.
After learning a few things about the graph structure in Theano, we can use
the Python debugger to explore the graph, and then we can get runtime
information about the error. Matrix dimensions, especially, are useful to
pinpoint the source of the error. In the printout, there are also 2 of the 4
dimensions of the matrices involved, but for the sake of example say we'd
need the other dimensions to pinpoint the error. First, we re-launch with
the debugger module and run the program with "c":
.. code-block:: text
python -m pdb ex.py
> /u/username/experiments/doctmp1/ex.py(1)<module>()
-> import theano
(Pdb) c
Then we get back the above error printout, but the interpreter breaks in
that state. Useful commands here are
* "up" and "down" (to move up and down the call stack),
* "l" (to print code around the line in the current stack position),
* "p variable_name" (to print the string representation of 'variable_name'),
* "p dir(object_name)", using the Python dir() function to print the list of an object's members
Here, for example, I do "up", and a simple "l" shows me there's a local
variable "node". This is the "node" from the computation graph, so by
following the "node.inputs", "node.owner" and "node.outputs" links I can
explore around the graph.
That graph is purely symbolic (no data, just symbols to manipulate it
abstractly). To get information about the actual parameters, you explore the
"thunks" objects, which bind the storage for the inputs (and outputs) with
the function itself (a "thunk" is a concept related to closures). Here, to
get the current node's first input's shape, you'd therefore do "p
thunk.inputs[0][0].shape", which prints out "(3, 4)".
...@@ -197,8 +197,8 @@ array(33.0) ...@@ -197,8 +197,8 @@ array(33.0)
.. _functionstateexample: .. _functionstateexample:
Including values in a symbolic graph Using shared variables
==================================== ======================
It is also possible to make a function with an internal state. For It is also possible to make a function with an internal state. For
example, let's say we want to make an accumulator: at the beginning, example, let's say we want to make an accumulator: at the beginning,
...@@ -214,7 +214,7 @@ internal state, and returns the old state value. ...@@ -214,7 +214,7 @@ internal state, and returns the old state value.
>>> accumulator = function([inc], state, updates=[(state, state+inc)]) >>> accumulator = function([inc], state, updates=[(state, state+inc)])
This code introduces a few new concepts. The ``shared`` function constructs This code introduces a few new concepts. The ``shared`` function constructs
so-called *shared variables*. These are hybrid symbolic and non-symbolic so-called :term:shared variables:. These are hybrid symbolic and non-symbolic
variables. Shared variables can be used in symbolic expressions just like variables. Shared variables can be used in symbolic expressions just like
the objects returned by ``dmatrices(...)`` but they also have a ``.value`` the objects returned by ``dmatrices(...)`` but they also have a ``.value``
property that defines the value taken by this symbolic variable in *all* the property that defines the value taken by this symbolic variable in *all* the
...@@ -268,8 +268,8 @@ updates). Also, theano has more control over where and how shared variables are ...@@ -268,8 +268,8 @@ updates). Also, theano has more control over where and how shared variables are
allocated, which is one of the important elements of getting good performance allocated, which is one of the important elements of getting good performance
on the GPU. on the GPU.
It may happen that you have constructed a symbolic graph on top of a It may happen that you expressed some formula using a shared variable, but
shared variable, but you do *not* want to use its value. In this case, you can use the you do *not* want to use its value. In this case, you can use the
``givens`` parameter of ``function`` which replaces a particular node in a graph ``givens`` parameter of ``function`` which replaces a particular node in a graph
for the purpose of one particular function. for the purpose of one particular function.
...@@ -290,5 +290,94 @@ substitution to be co-dependent, the order of substitution is not defined, so ...@@ -290,5 +290,94 @@ substitution to be co-dependent, the order of substitution is not defined, so
the substitutions have to work in any order. the substitutions have to work in any order.
Using Random Numbers
====================
Because in Theano you first express everything symbolically and
afterwards compile this expression to get functions,
using pseudo-random numbers is not as straightforward as it is in
numpy, though also not to complicated.
The way to think about putting randomness into Theano's computations is
to put random variables in your graph. Theano will allocate a numpy
RandomStream object (a random number generator) for each such
variable, and draw from it as necessary. I'll call this sort of
sequence of random numbers a *random stream*. *Random streams* are at
their core shared variables, so the observations on shared variables
hold here as well.
Brief example
-------------
Here's a brief example. The setup code is:
.. code-block:: python
from theano.tensor.shared_randomstreams import RandomStreams
srng = RandomStreams(seed=234)
rv_u = srng.uniform((2,2))
rv_n = srng.normal((2,2))
f = function([], rv_u, updates=[rv_u.update])
g = function([], rv_n) #omitting rv_n.update
nearly_zeros = function([], rv_u + rv_u - 2 * rv_u, updates=[rv_u.update])
Here, 'rv_u' represents a random stream of 2x2 matrices of draws from a uniform
distribution. Likewise, 'rv_n' represenents a random stream of 2x2 matrices of
draws from a normal distribution. The distributions that are implemented are
defined in :class:`RandomStreams`.
Now let's use these things. If we call f(), we get random uniform numbers.
Since we are updating the internal state of the random number generator (via
the ``updates`` argument), we get different random numbers every time.
>>> f_val0 = f()
>>> f_val1 = f() #different numbers from f_val0
When we omit the updates argument (as in ``g``) to ``function``, then the
random number generator state is not affected by calling the returned function. So for example,
calling ``g`` multiple times will return the same numbers.
>>> g_val0 = g() # different numbers from f_val0 and f_val1
>>> g_val0 = g() # same numbers as g_val0 !!!
An important remark is that a random variable is drawn at most once during any
single function execution. So the ``nearly_zeros`` function is guaranteed to
return approximately 0 (except for rounding error) even though the ``rv_u``
random variable appears three times in the output expression.
>>> nearly_zeros = function([], rv_u + rv_u - 2 * rv_u, updates=[rv_u.update])
Seedings Streams
----------------
Random variables can be seeded individually or collectively.
You can seed just one random variable by seeding or assigning to the
``.rng.value`` attribute.
>>> rv_u.rng.value.seed(89234) # seeds the generator for rv_u
You can also seed *all* of the random variables allocated by a :class:`RandomStreams`
object by that object's ``seed`` method. This seed will be used to seed a
temporary random number generator, that will in turn generate seeds for each
of the random variables.
>>> srng.seed(902340) # seeds rv_u and rv_n with different seeds each
Sharing Streams between Functions
---------------------------------
As usual for shared variables, the random number generators used for random
variables are common between functions. So our ``nearly_zeros`` function will
update the state of the generators used in function ``f`` above.
For example:
>>> state_after_v0 = rv_u.rng.value.get_state()
>>> nearly_zeros() # this affects rv_u's generator
>>> v1 = f()
>>> rv_u.rng.value.set_state(state_after_v0)
>>> v2 = f() # v2 != v1
...@@ -18,12 +18,16 @@ of Theano. Let's import that subpackage under a handy name. I like ...@@ -18,12 +18,16 @@ of Theano. Let's import that subpackage under a handy name. I like
If that worked you're ready for the tutorial, otherwise check your If that worked you're ready for the tutorial, otherwise check your
installation (see :ref:`install`). installation (see :ref:`install`).
Throughout the tutorial, bear in mind that there is a :ref:`glossary` to help
you out.
.. toctree:: .. toctree::
numpy numpy
adding adding
examples examples
loading_and_saving loading_and_saving
symbolic_graphs
modes modes
remarks remarks
debug_faq debug_faq
......
.. tutorial_loadsave: .. _tutorial_loadsave:
================== ==================
Loading and Saving Loading and Saving
================== ==================
......
...@@ -8,7 +8,7 @@ Using different compiling modes ...@@ -8,7 +8,7 @@ Using different compiling modes
Mode Mode
==== ====
Everytime :ref:`theano.function <libdoc_compile_function>` is called Everytime :func:`theano.function <function.function>` is called
the symbolic relationships between the input and output Theano *variables* the symbolic relationships between the input and output Theano *variables*
are optimized and compiled. The way this compilation occurs are optimized and compiled. The way this compilation occurs
is controlled by the value of the ``mode`` parameter. is controlled by the value of the ``mode`` parameter.
...@@ -25,7 +25,7 @@ The default mode is typically ``FAST_RUN``, but it can be controlled via ...@@ -25,7 +25,7 @@ The default mode is typically ``FAST_RUN``, but it can be controlled via
the environment variable ``THEANO_DEFAULT_MODE``, which can in turn be the environment variable ``THEANO_DEFAULT_MODE``, which can in turn be
overridden by setting `theano.compile.mode.default_mode` directly, overridden by setting `theano.compile.mode.default_mode` directly,
which can in turn be overridden by passing the keyword argument to which can in turn be overridden by passing the keyword argument to
:ref:`theano.function <libdoc_compile_function>`. :func:`theano.function <function.function>`.
================= =============================================================== =============================================================================== ================= =============================================================== ===============================================================================
short name Full constructor What does it do? short name Full constructor What does it do?
...@@ -91,7 +91,7 @@ ProfileMode ...@@ -91,7 +91,7 @@ ProfileMode
Beside checking for errors, another important task is to profile your Beside checking for errors, another important task is to profile your
code. For this Theano uses a special mode called ProfileMode which has code. For this Theano uses a special mode called ProfileMode which has
to be passed as an argument to :ref:`theano.function <libdoc_compile_function>`. Using the ProfileMode is a three-step process. to be passed as an argument to :func:`theano.function <function.function>`. Using the ProfileMode is a three-step process.
Creating a ProfileMode Instance Creating a ProfileMode Instance
......
.. _tutorial_graphstructures:
================
Graph Structures
================
Debugging or profiling code written in Theano is not that simple if you
do not know what goes on under the hood. This chapter is meant to
introduce you to a required minimum of the inner workings of Theano,
for more details see :ref:`extending`.
The first step in writing Theano code is to write down all mathematical
relations using symbolic placeholders (**variables**). When writing down
these expressions you use operations like ``+``, ``-``, ``**``,
``sum()``, ``tanh()``. All these are represented internally as **ops**.
An **op** represents a certain computation on some type of inputs
producing some type of output. You can see it as a function definition
in most programming languages.
Theano builds internally a graph structure composed of interconnected
**variable** nodes, **op** nodes and **apply** nodes. An
**apply** node represents the application of an **op** to some
**variables**. It is important to make the difference between the
definition of a computation represented by an **op** and its application
to some actual data which is represented by the **apply** node. For more
details about these building blocks see :ref:`variable`, :ref:`op`,
:ref:`apply`. A graph example is the following:
**Code**
.. code-block:: python
x = T.dmatrix('x')
y = T.dmatrix('y')
z = x + y
**Diagram**
.. figure:: apply.png
:align: center
Arrows represent references to the Python objects pointed at. The blue
box is an :ref:`apply` node. Red boxes are :ref:`variable` nodes. Green
circles are :ref:`Ops <op>`. Purple boxes are :ref:`Types <type>`.
The graph can be traversed starting from outputs (the result of some
computation) down to its inputs using the owner field.
Take for example the following code:
.. code-block:: python
x = T.dmatrix('x')
y = x*2.
If you print `type(y.owner)`` you get ``<class 'theano.gof.graph.Apply'>``,
which is the apply node that connects the op and the inputs to get this
output. You can now print the name of the op that is applied to get
``y``:
>>> y.owner.op.name
'Elemwise{mul,no_inplace}'
So a elementwise multiplication is used to compute ``y``. This
muliplication is done between the inputs
>>> len(y.owner.inputs)
2
>>> y.owner.inputs[0]
x
>>> y.owner.inputs[1]
InplaceDimShuffle{x,x}.0
Note that the second input is not 2 as we would have expected. This is
because 2 was first :term:`broadcasted <broadcasting>` to a matrix of
same shape as x. This is done by using the op ``DimShuffle`` :
>>> type(y.owner.inputs[1])
<class 'theano.tensor.basic.TensorVariable'>
>>> type(y.owner.inputs[1].owner)
<class 'theano.gof.graph.Apply'>
>>> y.owner.inputs[1].owner.op
<class 'theano.tensor.elemwise.DimShuffle object at 0x14675f0'>
>>> y.owner.inputs[1].owner.inputs
[2.0]
Starting from this graph structure is easy to understand how
*automatic differentiation* is done, or how the symbolic relations
can be optimized for performance or stability.
Automatic Differentiation
=========================
Having the graph structure, computing automatic differentiation is
simple. The only thing :func:`tensor.grad` has to do is to traverse the
graph from the outputs back towards the inputs through all :ref:`apply`
nodes ( :ref:`apply` nodes are those who define what computations the
graph does). For each such :ref:`apply` node, its :ref:`op` defines
how to compute the gradient of the node's outputs with respect to its
inputs. Note that if an :ref:`op` does not provide this information,
it is assumed that the gradient does not defined.
Using the
`chain rule <http://en.wikipedia.org/wiki/Chain_rile>`_
these gradients can be composed in order to obtain the expression of the
gradient of the graph's output with respect to the graph's inputs .
Optimizations
=============
When compiling a Theano function, what you give to the
:func:`theano.function <function.function>` is actually a graph
(starting from the outputs variables you can traverse the graph up to
the input variables). While this graph structure shows how to compute
the output from the input, it also offers the posibility to improve the
the way this computation is carried out. The way optimizations work in
Theano is by indentifying and replacing certain patterns in the graph
with other specialized patterns that produce the same results but are either
faster or more stable. Optimizations can also detect
identical subgraphs and ensure that the same values are not computed
twice or reformulate parts of the graph to a GPU specific version.
For example, one (simple) optimization that Theano uses is to replace
the pattern :math:`\frac{xy}{y}` by :math:`x`.
=============================
Basic Tutorial Mini-Reference
=============================
.. miniref_mode:
Mode
====
================= =============================================================== ===============================================================================
short name Full constructor What does it do?
================= =============================================================== ===============================================================================
(default) ``compile.mode.Mode(linker='py', optimizer=None)`` Python implementations with zero graph modifications.
FAST_COMPILE ``compile.mode.Mode(linker='c|py', optimizer='fast_compile')`` C implementations where available, quick and cheap graph transformations
FAST_RUN ``compile.mode.Mode(linker='c|py', optimizer='fast_run')`` C implementations where available, all available graph transformations.
DEBUG_MODE ``compile.debugmode.DebugMode()`` Both implementations where available, all available graph transformations.
================= =============================================================== ===============================================================================
.. _tensortypes:
Types
=====
.. _predefinedtypes:
Predefined types
----------------
Predefined types are
located in the :ref:`theano.tensor <libdoc_tensor>` package. The name of the types follow
a recipe:
``<dtype><dimensionality>``
Where ``<dtype>`` is one of:
==== ======== ============== ====
code type domain bits
==== ======== ============== ====
b byte signed integer 8
w word signed integer 16
i integer signed integer 32
l long signed integer 64
f float floating point 32
d double floating point 64
==== ======== ============== ====
Dimensionality is one of:
So, if you want a row of 32-bit floats, it is available
as :ref:`theano.tensor.frow <libdoc_tensor_type>`.
If you want a matrix of unsigned 32-bit integers it is available as
:ref:`theano.tensor.imatrix <libdoc_tensor_type>`.
Each of the types described above can be constructed by two methods:
a singular version (e.g., :ref:`dmatrix <libdoc_tensor_creation>`)
and a plural version (:ref:`dmatrices <libdoc_tensor_creation>`).
When called, the singular version takes a single
argument which is the name of the *Variable* we want to make and it
makes a single Variable of that type. The plural version can either take
an integer or several strings. If an integer is provided, the method
will return that many Variables and if strings are provided, it will
create one Variable for each string, using the string as the Variable's
name. For example:
.. code-block:: python
from theano.tensor import *
x = dmatrix() # creates one Variable with no name
x = dmatrix('x') # creates one Variable with name 'x'
xyz = dmatrix('xyz') # creates one Variable with name 'xyz'
x, y, z = dmatrices(3) # creates three Variables with no names
x, y, z = dmatrices('x', 'y', 'z') # creates three Variables named 'x', 'y' and 'z'
Custom tensor types
-------------------
If you wish to use a type of tensor which is not already available here
(for example, a 3D tensor) you can build an appropriate type using
:ref:`theano.tensor.TensorType <libdoc_tensor_type>`.
The first argument you pass is the `dtype` and the second is the
`broadcastable pattern`.
Where `dtype` is one of:
=========== ================ =================
dtype domain bits
=========== ================ =================
int8 signed integer 8
int16 signed integer 16
int32 signed integer 32
int64 signed integer 64
uint8 unsigned integer 8
uint16 unsigned integer 16
uint32 unsigned integer 32
uint64 unsigned integer 64
float32 floating point 32
float64 floating point 64
complex64 complex 64 (two float32)
complex128 complex 128 (two float64)
=========== ================ =================
.. note::
Even though :ref:`theano.tensor <libdoc_tensor>` does not define any type
using ``complex`` dtypes (``complex64`` or ``complex128``),
you can define them explicitly with
:ref:`TensorType <libdoc_tensor_type>` (see example
below). However, few operations are fully supported for complex
types: as of version 0.1, only elementary operations (``+-*/``)
have C implementations. Additionally, complex types have received
little testing.
The broadcastable pattern indicates both the number of dimensions and
whether a particular dimension must have length 1.
Here is a table mapping the :ref:`broadcastable <libdoc_tensor_broadcastable>` pattern to what kind of tensor it encodes:
===================== =================================
pattern interpretation
===================== =================================
[] scalar
[True] 1D scalar (vector of length 1)
[True, True] 2D scalar (1x1 matrix)
[False] vector
[False, False] matrix
[False] * n nD tensor
[True, False] row (1xN matrix)
[False, True] column (Mx1 matrix)
[False, True, False] A Mx1xP tensor (a)
[True, False, False] A 1xNxP tensor (b)
[False, False, False] A MxNxP tensor (pattern of a + b)
===================== =================================
For dimensions in which broadcasting is False, the length of this
dimension can be 1 or more. For dimensions in which broadcasting is True,
the length of this dimension must be 1.
When two tensors have a different number of dimensions, the broadcastable
pattern is *expanded to the left*, by padding with ``True``. For example,
a vector's pattern, ``[False]``, could be expanded to ``[True, False]``, and
would behave like a row (1xN matrix). In the same way, a matrix (``[False,
False]``) would behave like a 1xNxP tensor (``[True, False, False]``).
If we wanted to create a type representing a 3D array of unsigned
bytes, we would do:
.. code-block:: python
# 3D tensor of signed bytes
mytype = theano.tensor.TensorType('uint8', [False]*3)
# complex types (based on complex64)
my_cscalar = theano.tensor.TensorType('complex64', [])
my_cmatrix = theano.tensor.TensorType('complex64', [False, False])
...@@ -60,8 +60,8 @@ import scalar ...@@ -60,8 +60,8 @@ import scalar
import sparse import sparse
import gradient import gradient
import gof import gof
import floatx import floatX
floatx.set_floatX() floatX.set_floatX()
## import scalar_opt ## import scalar_opt
......
...@@ -150,6 +150,10 @@ def lock(tmp_dir, timeout=120, min_wait=5, max_wait=10, verbosity=1): ...@@ -150,6 +150,10 @@ def lock(tmp_dir, timeout=120, min_wait=5, max_wait=10, verbosity=1):
while os.path.isdir(tmp_dir): while os.path.isdir(tmp_dir):
try: try:
read_owner = open(lock_file).readlines()[0].strip() read_owner = open(lock_file).readlines()[0].strip()
# The following line does nothing but raise an exception
# if somehow something is wrong in the owner format, to
# avoid crashing later on.
read_owner.split('_')[0]
except: except:
read_owner = 'failure' read_owner = 'failure'
if last_owner == read_owner: if last_owner == read_owner:
...@@ -163,9 +167,13 @@ def lock(tmp_dir, timeout=120, min_wait=5, max_wait=10, verbosity=1): ...@@ -163,9 +167,13 @@ def lock(tmp_dir, timeout=120, min_wait=5, max_wait=10, verbosity=1):
time_start = time.time() time_start = time.time()
no_display = (verbosity == 0) no_display = (verbosity == 0)
if not no_display: if not no_display:
info('Waiting for existing lock by %s (I am %s)' % ( if read_owner == 'failure':
read_owner, my_pid)) msg = 'unknown process'
info("To manually release the lock, delete", lock_file) else:
msg = "process '%s'" % read_owner.split('_')[0]
info("Waiting for existing lock by %s (I am "
"process '%s')" % (msg, my_pid))
info("To manually release the lock, delete", tmp_dir)
if verbosity <= 1: if verbosity <= 1:
no_display = True no_display = True
time.sleep(random.uniform(min_wait, max_wait)) time.sleep(random.uniform(min_wait, max_wait))
......
#!/usr/bin/env python
import sys
def filter_output(fd_in):
s=""
for line in fd_in:
toks = line.split()
if len(toks):
if toks[0] == "File" and toks[-1].startswith('test'):
s+=line
if toks[0].startswith("ImportError"):
s+=line
return s
if __name__ == "__main__":
import pdb;pdb.set_trace()
if len(sys.argv)>1:
print filter_output(open(sys.argv[1]))
else:
print filter_output(sys.stdin)
差异被折叠。
import numpy
import theano
import theano.sandbox.scan
# generator network, only one output , type scalar ; no sequence or
# non sequence arguments
def test_1():
def f_pow2(x_tm1):
return (2*x_tm1, {})
s = theano.tensor.dvector()
n_steps = theano.tensor.dscalar()
Y = theano.sandbox.scan.scan(f_pow2, [],s, [],n_steps = n_steps)
f1 = theano.function([s,n_steps], Y)
assert( numpy.any(f1([1],3)== [2,4,8]) )
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars
def test_2():
def f_rnn(u_t,x_tm1,W_in, W):
return (u_t*W_in+x_tm1*W, {})
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
W_in = theano.tensor.dscalar()
W = theano.tensor.dscalar()
Y = theano.sandbox.scan.scan(f_rnn, u,x0,[W_in,W])
f2 = theano.function([u,x0,W_in,W], Y)
assert(numpy.any(f2([1,2,3,4],[1],.1,1)== numpy.array([1.1,1.3,1.6,2.])))
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables
def test_3():
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
W_in = theano.shared(.1, name = 'w_in')
W = theano.shared(1., name ='w')
def f_rnn_shared(u_t,x_tm1):
return (u_t*W_in+x_tm1*W, {})
Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0,[])
f3 = theano.function([u,x0], Y)
assert(numpy.any(f3([1,2,3,4],[1])== numpy.array([1.1,1.3,1.6,2.])))
# some rnn with multiple outputs and multiple inputs; other dimension
# instead of scalars/vectors
def test_4():
W_in2 = theano.shared(numpy.array([1.,2.]), name='win2')
W = theano.shared(numpy.array([[2.,1.],[1.,1.]]), name='w')
W_out = theano.shared(numpy.array([.5,1.]), name = 'wout')
W_in1 = theano.tensor.dmatrix('win')
u1 = theano.tensor.dmatrix('u1')
u2 = theano.tensor.dvector('u2')
x0 = theano.tensor.dmatrix('x0')
y0 = theano.tensor.dvector('y0')
## Why dot doesn;t work with scalars !??
## Why * doesn't support SharedVariable and TensorVariable
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
return ({}, [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)])
Y = theano.sandbox.scan.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1)
f4 = theano.function([u1,u2,x0,y0,W_in1], Y)
(x,y) = f4( numpy.array([[1,2],[1,2],[1,2]]), \
numpy.array([1,2,3]), \
numpy.array([[0,0]]), \
numpy.array([1]), \
numpy.array([[1,1],[1,1]]))
assert( numpy.all(x == numpy.array([[4.,5.],[18.,16.],[58.,43.]])))
assert( numpy.all(y == numpy.array([0.,7.,25.])))
# basic ESN using updates
def test_5():
W_in = theano.shared(numpy.array([1.,1.]), name='win')
W = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
W_out= theano.shared(numpy.array([.5,1.]), name='wout')
u = theano.tensor.dvector('u')
x = theano.shared(numpy.array([0.,0.]),'x')
y0 = theano.tensor.dvector('y0')
def f_ESN(u_t):
return ( theano.dot(x,W_out), \
{ x: W_in*u_t + theano.dot(x,W) } )
Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]})
f5 = theano.function([u,y0],Y)
assert( f5( numpy.array([1,2,3]), numpy.array([0])) == \
numpy.array([0.,1.4,3.15]))
# basic ESN using updates ; moving backwards
def test_6():
W_in = theano.shared(numpy.array([1.,1.]), name='win')
W = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
W_out= theano.shared(numpy.array([.5,1.]), name='wout')
u = theano.tensor.dvector('u')
x = theano.shared(numpy.array([0.,0.]),'x')
y0 = theano.tensor.dvector('y0')
def f_ESN(u_t):
return ( theano.dot(x,W_out), \
{ x: W_in*u_t + theano.dot(x,W) } )
Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]}, \
go_backwards = True)
f6 = theano.function([u,y0],Y)
assert( f6( numpy.array([1,2,3]), numpy.array([0])) == \
numpy.array([0., 4.5, 3.45]))
'''
TO TEST:
- test taps (for sequences and outputs )
- test gradient (one output)
- test gradient (multiple outputs)
- test gradient (go_bacwards)
- test gradient (multiple outputs / some uncomputable )
- test gradient (truncate_gradient)
- test gradient (force_gradient)
- test inplace map
'''
if __name__=='__main__':
test_1()
test_2()
test_3()
test_4()
test_5()
test_6()
差异被折叠。
from scan import Scan
import unittest import unittest
import theano import theano
import theano.sandbox.scan
import random import random
import numpy.random import numpy.random
...@@ -74,20 +75,214 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps = None, tol = None, ...@@ -74,20 +75,214 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps = None, tol = None,
def compareArrays(a,b):
if type(a) in (list,tuple):
a = numpy.array(a)
if type(b) in (list, tuple):
b = numpy.array(b)
return numpy.all( abs(a-b) < 1e-5)
# Naming convention :
# u_1,u_2,.. -> sequences
# s_1,s_2,.. -> initial states
# w_1,w_2,.. -> non-sequences
###################################
class T_Scan(unittest.TestCase): class T_Scan(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
def test_one(self):
pass
# generator network, only one output , type scalar ; no sequence or
# non sequence arguments
def test_1(self):
def f_pow2(x_tm1):
return (2*x_tm1, {})
s = theano.tensor.dvector()
n_steps = theano.tensor.dscalar()
Y = theano.sandbox.scan.scan(f_pow2, [],s, [],n_steps = n_steps)
f1 = theano.function([s,n_steps], Y)
assert(compareArrays(f1([1],3), [2,4,8]))
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars
def test_2(self):
def f_rnn(u_t,x_tm1,W_in, W):
return (u_t*W_in+x_tm1*W, {})
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
W_in = theano.tensor.dscalar()
W = theano.tensor.dscalar()
Y = theano.sandbox.scan.scan(f_rnn, u,x0,[W_in,W])
f2 = theano.function([u,x0,W_in,W], Y)
v_u = numpy.array([1.,2.,3.,4.])
v_x0 = numpy.array([1])
v_out = numpy.array([1.1,1.3,1.6,2.])
assert(compareArrays( f2(v_u,v_x0,.1,1), v_out ) )
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables
def test_3(self):
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
W_in = theano.shared(.1, name = 'w_in')
W = theano.shared(1., name ='w')
def f_rnn_shared(u_t,x_tm1):
return (u_t*W_in+x_tm1*W, {})
Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0,[])
f3 = theano.function([u,x0], Y)
v_u = numpy.array([1.,2.,3.,4.])
v_x0 = numpy.array([1.])
v_out = numpy.array([1.1,1.3,1.6,2.])
assert(compareArrays(f3(v_u,v_x0),v_out))
# some rnn with multiple outputs and multiple inputs; other dimension
# instead of scalars/vectors
def test_4(self):
W_in2 = theano.shared(numpy.array([1.,2.]), name='win2')
W = theano.shared(numpy.array([[2.,1.],[1.,1.]]), name='w')
W_out = theano.shared(numpy.array([.5,1.]), name = 'wout')
W_in1 = theano.tensor.dmatrix('win')
u1 = theano.tensor.dmatrix('u1')
u2 = theano.tensor.dvector('u2')
x0 = theano.tensor.dmatrix('x0')
y0 = theano.tensor.dvector('y0')
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
return ({}, [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)])
Y = theano.sandbox.scan.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1)
f4 = theano.function([u1,u2,x0,y0,W_in1], Y)
v_u1 = numpy.array([[1.,2.],[1.,2.],[1.,2.]])
v_u2 = numpy.array([1.,2.,3.])
v_x0 = numpy.array([[0.,0.]])
v_y0 = numpy.array([1])
v_Win1 = numpy.array([[1.,1.],[1.,1.]])
v_x = numpy.array([[4.,5.],[18.,16.],[58.,43.]])
v_y = numpy.array([0.,7.,25.])
(x,y) = f4( v_u1, v_u2, v_x0, v_y0, v_Win1)
assert( compareArrays(x,v_x))
assert( compareArrays(y,v_y))
# basic ESN using updates
def test_5(self):
W_in = theano.shared(numpy.array([1.,1.]), name='win')
W = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
W_out= theano.shared(numpy.array([.5,1.]), name='wout')
u = theano.tensor.dvector('u')
x = theano.shared(numpy.array([0.,0.]),'x')
y0 = theano.tensor.dvector('y0')
def f_ESN(u_t):
return ( theano.dot(x,W_out), \
{ x: W_in*u_t + theano.dot(x,W) } )
Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]})
f5 = theano.function([u,y0],Y)
v_u = numpy.array([1.,2.,3.])
v_y0 = numpy.array([0.])
v_out = numpy.array([0.,1.5,3.15])
out = f5( v_u, v_y0 )
assert( compareArrays(v_out, out))
# basic ESN using updates ; moving backwards
def test_6(self):
W_in = theano.shared(numpy.array([1.,1.]), name='win')
W = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
W_out= theano.shared(numpy.array([.5,1.]), name='wout')
u = theano.tensor.dvector('u')
x = theano.shared(numpy.array([0.,0.]),'x')
y0 = theano.tensor.dvector('y0')
def f_ESN(u_t):
return ( theano.dot(x,W_out), \
{ x: W_in*u_t + theano.dot(x,W) } )
Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]}, \
go_backwards = True)
f6 = theano.function([u,y0],Y)
v_u = numpy.array([1.,2.,3.])
v_y0 = numpy.array([0])
v_out = numpy.array([0.,4.5,3.45])
out = f6(v_u, v_y0)
assert( compareArrays(out, v_out))
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past
# taps (sequences and outputs)
def test_7(self):
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
W_in = theano.shared(.1, name = 'w_in')
W = theano.shared(1., name ='w')
def f_rnn_shared(u_tm2, x_tm1, x_tm2):
return (u_tm2*W_in+x_tm1*W+x_tm2, {})
Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0, [], \
sequences_taps = {0:[-2]}, outputs_taps = {0:[-1,-2]})
f7 = theano.function([u,x0], Y)
#print f7([1,2,3,4],[1,2])
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past
# taps (sequences and outputs) and future taps for sequences
def test_8(self):
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
W_in = theano.shared(.1, name = 'w_in')
W = theano.shared(1., name ='w')
def f_rnn_shared(u_tm2,u_tp2, x_tm1, x_tm2):
return ((u_tm2+u_tp2)*W_in+x_tm1*W+x_tm2, {})
Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0, [], \
sequences_taps = {0:[-2,2]}, outputs_taps = {0:[-1,-2]})
f8 = theano.function([u,x0], Y)
#print f8([1,2,3,4,5,6],[1,2])
'''
TO TEST:
- test taps (for sequences and outputs )
- test gradient (one output)
- test gradient (multiple outputs)
- test gradient (go_bacwards)
- test gradient (multiple outputs / some uncomputable )
- test gradient (truncate_gradient)
- test gradient (force_gradient)
- test_gradient (taps past/future)
- test inplace map
'''
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -1423,7 +1423,7 @@ def neg(a): ...@@ -1423,7 +1423,7 @@ def neg(a):
@_scal_elemwise @_scal_elemwise
def inv(a): def inv(a):
"""1.0/a (inplace on a)""" """1.0/a"""
@_scal_elemwise @_scal_elemwise
def log(a): def log(a):
......
...@@ -461,6 +461,10 @@ def test_asymptotic_32(): ...@@ -461,6 +461,10 @@ def test_asymptotic_32():
""" """
This test makes sure that our functions behave sensibly when huge values are present This test makes sure that our functions behave sensibly when huge values are present
""" """
#TODO: consider adding the optimization of crossentropy into the current mode for the
# purpose of running this test
for dtype in 'float32', 'float64': for dtype in 'float32', 'float64':
if dtype == 'float32': if dtype == 'float32':
x = tensor.fmatrix() x = tensor.fmatrix()
...@@ -471,7 +475,7 @@ def test_asymptotic_32(): ...@@ -471,7 +475,7 @@ def test_asymptotic_32():
y = tensor.lvector() y = tensor.lvector()
c = categorical_crossentropy(softmax(x+x2), y) c = categorical_crossentropy(softmax(x+x2), y)
f = theano.function([x,y,x2], [c.sum(), tensor.grad(c, x)]) f = theano.function([x,y,x2], [c.sum(), tensor.grad(c.sum(), x)], mode='FAST_RUN')
if 0: if 0:
for i, n in enumerate( f.maker.env.toposort()): for i, n in enumerate( f.maker.env.toposort()):
print i, n print i, n
......
...@@ -2,7 +2,7 @@ from theano.tensor import * ...@@ -2,7 +2,7 @@ from theano.tensor import *
import theano.config as config import theano.config as config
from theano import function from theano import function
#from theano.floatx import set_floatX, xscalar, xmatrix, xrow, xcol, xvector, xtensor3, xtensor4 #from theano.floatx import set_floatX, xscalar, xmatrix, xrow, xcol, xvector, xtensor3, xtensor4
import theano.floatx as FX import theano.floatX as FX
def test_floatX(): def test_floatX():
def test(): def test():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论