提交 a77a0ef4 authored 作者: Kumar Krishna Agrawal's avatar Kumar Krishna Agrawal

Updated implementation, merge conflicts, added test

#!/bin/bash
# Script for Jenkins continuous integration pre-testing
# Print commands as they are executed
set -x
# Anaconda python
export PATH=/usr/local/miniconda2/bin:$PATH
# Test flake8
echo "===== Testing flake8"
bin/theano-nose theano/tests/test_flake8.py || exit 1
# Test documentation
echo "===== Testing documentation build"
python doc/scripts/docgen.py --nopdf --check || exit 1
echo "===== Testing documentation code snippets"
python doc/scripts/docgen.py --test --check || exit 1
#!/bin/bash
# Script for Jenkins continuous integration testing of theano base
# Print commands as they are executed
set -x
# Anaconda python
export PATH=/usr/local/miniconda2/bin:$PATH
echo "===== Testing theano core"
# Test theano core
PARTS="theano -e cuda -e gpuarray"
THEANO_PARAM="${PARTS} --with-timer --timer-top-n 10"
FLAGS="mode=FAST_RUN,floatX=float32"
THEANO_FLAGS=${FLAGS} bin/theano-nose ${THEANO_PARAM}
#!/bin/bash
# Script for Jenkins continuous integration testing of gpu backends
# Print commands as they are executed
set -x
# Anaconda python
export PATH=/usr/local/miniconda2/bin:$PATH
# CUDA
export PATH=/usr/local/cuda/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export LIBRARY_PATH=/usr/local/cuda/lib64:$LIBRARY_PATH
echo "===== Testing old theano.sandbox.cuda backend"
THEANO_CUDA_TESTS="theano/sandbox/cuda/tests \
theano/misc/tests/test_pycuda_example.py \
theano/misc/tests/test_pycuda_theano_simple.py \
theano/misc/tests/test_pycuda_utils.py \
theano/tensor/tests/test_opt.py:TestCompositeCodegen \
theano/tensor/tests/test_opt.py:test_shapeoptimizer \
theano/tensor/tests/test_opt.py:test_fusion \
theano/compile/tests/test_debugmode.py:Test_preallocated_output \
theano/sparse/tests/test_basic.py:DotTests \
theano/sandbox/tests/test_multinomial.py:test_gpu_opt \
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPU_serial \
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPU_parallel \
theano/sandbox/tests/test_rng_mrg.py:test_GPU_nstreams_limit \
theano/sandbox/tests/test_rng_mrg.py:test_overflow_gpu_old_backend \
theano/scan_module/tests/test_scan.py:T_Scan_Cuda"
THEANO_PARAM="${THEANO_CUDA_TESTS} --with-timer --timer-top-n 10"
FLAGS="mode=FAST_RUN,init_gpu_device=gpu,floatX=float32"
THEANO_FLAGS=${FLAGS} bin/theano-nose ${THEANO_PARAM}
echo "===== Testing gpuarray backend"
GPUARRAY_CONFIG="Release"
DEVICE=cuda0
LIBDIR=~/tmp/local
# Make fresh clones of libgpuarray (with no history since we don't need it)
rm -rf libgpuarray
git clone --depth 1 "https://github.com/Theano/libgpuarray.git"
# Clean up previous installs (to make sure no old files are left)
rm -rf $LIBDIR
mkdir $LIBDIR
# Build libgpuarray
mkdir libgpuarray/build
(cd libgpuarray/build && cmake .. -DCMAKE_BUILD_TYPE=${GPUARRAY_CONFIG} -DCMAKE_INSTALL_PREFIX=$LIBDIR && make)
# Finally install
(cd libgpuarray/build && make install)
# Export paths
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIBDIR/lib64/
export LIBRARY_PATH=$LIBRARY_PATH:$LIBDIR/lib64/
export CPATH=$CPATH:$LIBDIR/include
export LIBRARY_PATH=$LIBRARY_PATH:$LIBDIR/lib
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIBDIR/lib
# Build the pygpu modules
(cd libgpuarray && python setup.py build_ext --inplace -I$LIBDIR/include -L$LIBDIR/lib)
ls $LIBDIR
mkdir $LIBDIR/lib/python
export PYTHONPATH=${PYTHONPATH}:$LIBDIR/lib/python
# Then install
(cd libgpuarray && python setup.py install --home=$LIBDIR)
# Testing theano (the gpuarray parts)
THEANO_GPUARRAY_TESTS="theano/gpuarray/tests \
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_serial \
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_parallel \
theano/scan_module/tests/test_scan.py:T_Scan_Gpuarray"
FLAGS="init_gpu_device=$DEVICE,gpuarray.preallocate=1000,mode=FAST_RUN"
THEANO_FLAGS=${FLAGS} time nosetests -v ${THEANO_GPUARRAY_TESTS}
...@@ -33,7 +33,8 @@ install: ...@@ -33,7 +33,8 @@ install:
- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then conda create --yes -q -n pyenv mkl python=2.6 numpy=1.7.1 scipy=0.11 nose=1.3.0 pip flake8=2.3 six=1.9.0 pep8=1.6.2 pyflakes=0.8.1 sphinx; fi - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then conda create --yes -q -n pyenv mkl python=2.6 numpy=1.7.1 scipy=0.11 nose=1.3.0 pip flake8=2.3 six=1.9.0 pep8=1.6.2 pyflakes=0.8.1 sphinx; fi
- if [[ $TRAVIS_PYTHON_VERSION == '3.3' ]]; then conda create --yes -q -n pyenv mkl python=3.3 numpy=1.9.1 scipy=0.14.0 nose=1.3.4 pip flake8=2.3 six=1.9.0 pep8=1.6.2 pyflakes=0.8.1 sphinx; fi - if [[ $TRAVIS_PYTHON_VERSION == '3.3' ]]; then conda create --yes -q -n pyenv mkl python=3.3 numpy=1.9.1 scipy=0.14.0 nose=1.3.4 pip flake8=2.3 six=1.9.0 pep8=1.6.2 pyflakes=0.8.1 sphinx; fi
- source activate pyenv - source activate pyenv
- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install pydot; else pip install pydot-ng; fi # pydot 1.2 broke support of python 2.6. They won't try to maintain it.
- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install pydot==1.1.0; else pip install pydot-ng; fi
- pip install . --no-deps - pip install . --no-deps
- pip install flake8-future-import nose-parameterized==0.5.0 - pip install flake8-future-import nose-parameterized==0.5.0
......
...@@ -30,10 +30,14 @@ ...@@ -30,10 +30,14 @@
function build_vswitch() { function build_vswitch() {
// Build HTML string for version selector, based on ReadTheDocs theme's versions.html // Build HTML string for version selector, based on ReadTheDocs theme's versions.html
var vlabel = current_version.replace("theano_versions/", "");
if (vlabel == 'theano') {
vlabel = 'release';
}
var vswitch = ['<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions" align=left>']; var vswitch = ['<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions" align=left>'];
vswitch.push('<span class="rst-current-version" data-toggle="rst-current-version">'); vswitch.push('<span class="rst-current-version" data-toggle="rst-current-version">');
vswitch.push('<span class="fa fa-book"></span>'); vswitch.push('<span class="fa fa-book"></span>');
vswitch.push('v: ', current_version.replace("theano_versions/", ""), ' '); vswitch.push('v: ', vlabel, ' ');
vswitch.push('<span class="fa fa-caret-down"></span>'); vswitch.push('<span class="fa fa-caret-down"></span>');
vswitch.push('</span>'); vswitch.push('</span>');
......
.. _css:
.. raw:: html
<style> .black {color:black} </style>
<style> .blue {color:blue} </style>
<style> .red {color:red} </style>
<style> .green {color:green} </style>
<style> .pink {color:pink} </style>
.. role:: blue
.. role:: red
.. role:: green
.. role:: pink
.. role:: black
...@@ -10,12 +10,15 @@ Contributing ...@@ -10,12 +10,15 @@ Contributing
You want to contribute to Theano? That is great! This page explain our You want to contribute to Theano? That is great! This page explain our
workflow and some resource for doing so. workflow and some resource for doing so.
Looking for an idea for a first contribution? Check `github issue Looking for an idea for a first contribution? Check the `github issues
<https://github.com/Theano/Theano/issues?q=is%3Aopen+is%3Aissue+label%3A%22Easy+fix%22>` <https://github.com/Theano/Theano/issues?q=is%3Aopen+is%3Aissue+label%3A%22Easy+fix%22>`_
with a label ``easy fix``. They are good starter. It is recommanded with a label ``easy fix``. They are good starter. It is recommanded
that you write on the issue you want to work on it. This help make that you write on the issue you want to work on it. This help make
sure it is up to date and see if nobody else is working on it. Also, sure it is up to date and see if nobody else is working on it. Also,
we can sometimes provides more information about it. we can sometimes provides more information about it. There is also
the label `NeedSomeoneToFinish
<https://github.com/Theano/Theano/labels/NeedSomeoneToFinish>`_ that is
interresting to check. The difficulty level is variable.
Resources Resources
========= =========
...@@ -82,8 +85,8 @@ make sure there are no global impacts. ...@@ -82,8 +85,8 @@ make sure there are no global impacts.
Also, if you are changing GPU code, travis doesn't test that, because Also, if you are changing GPU code, travis doesn't test that, because
there are no GPUs on the test nodes. there are no GPUs on the test nodes.
To run the test suite with the default options, you can follow the To run the test suite with the default options, see
instructions of :ref:`testing_installation`. :ref:`test_theano`.
Each night we execute all the unit tests automatically, with several Each night we execute all the unit tests automatically, with several
sets of options. The result is sent by email to the `theano-buildbot`_ sets of options. The result is sent by email to the `theano-buildbot`_
...@@ -123,7 +126,11 @@ To setup VIM: ...@@ -123,7 +126,11 @@ To setup VIM:
#. Install flake8 (if not already installed) with:: #. Install flake8 (if not already installed) with::
pip install flake8 pip install "flake8<3"
.. warning:: Starting version 3.0.0, flake8 changed its dependancies and
moved its Python API to a legacy module, breaking Theano's flake8 tests.
We recommend using a version prior to 3.
.. note:: You can use ``easy_install`` instead of ``pip``, and ``pep8`` .. note:: You can use ``easy_install`` instead of ``pip``, and ``pep8``
instead of ``flake8`` if you prefer. The important thing is that the instead of ``flake8`` if you prefer. The important thing is that the
...@@ -357,7 +364,7 @@ You can choose another name than "central" to reference Theano/Theano ...@@ -357,7 +364,7 @@ You can choose another name than "central" to reference Theano/Theano
to "central." to "central."
You can then test your installation of Theano by following the steps of You can then test your installation of Theano by following the steps of
:ref:`testing_installation`. :ref:`test_theano`.
Using your local copy Using your local copy
......
...@@ -10,21 +10,6 @@ Does Theano support Python 3? ...@@ -10,21 +10,6 @@ Does Theano support Python 3?
------------------------------ ------------------------------
We support both Python 2 >= 2.6 and Python 3 >= 3.3. We support both Python 2 >= 2.6 and Python 3 >= 3.3.
TypeError: object of type 'TensorVariable' has no len()
-------------------------------------------------------
If you receive the following error, it is because the Python function *__len__* cannot
be implemented on Theano variables:
.. code-block:: python
TypeError: object of type 'TensorVariable' has no len()
Python requires that *__len__* returns an integer, yet it cannot be done as Theano's variables are symbolic. However, `var.shape[0]` can be used as a workaround.
This error message cannot be made more explicit because the relevant aspects of Python's
internals cannot be modified.
Output slight numerical difference Output slight numerical difference
---------------------------------- ----------------------------------
...@@ -39,7 +24,6 @@ Every Computer Scientist Should Know About Floating-Point Arithmetic ...@@ -39,7 +24,6 @@ Every Computer Scientist Should Know About Floating-Point Arithmetic
<https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html>`_. <https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html>`_.
Faster gcc optimization Faster gcc optimization
----------------------- -----------------------
...@@ -179,33 +163,6 @@ but requires that all nodes in the graph have a C implementation: ...@@ -179,33 +163,6 @@ but requires that all nodes in the graph have a C implementation:
f(10.) f(10.)
Out of memory... but not really
-------------------------------
Occasionally Theano may fail to allocate memory when there appears to be more
than enough reporting:
Error allocating X bytes of device memory (out of memory). Driver report Y
bytes free and Z total.
where X is far less than Y and Z (i.e. X << Y < Z).
This scenario arises when an operation requires allocation of a large contiguous
block of memory but no blocks of sufficient size are available.
GPUs do not have virtual memory and as such all allocations must be assigned to
a continuous memory region. CPUs do not have this limitation because or their
support for virtual memory. Multiple allocations on a GPU can result in memory
fragmentation which can makes it more difficult to find contiguous regions
of memory of sufficient size during subsequent memory allocations.
A known example is related to writing data to shared variables. When updating a
shared variable Theano will allocate new space if the size of the data does not
match the size of the space already assigned to the variable. This can lead to
memory fragmentation which means that a continugous block of memory of
sufficient capacity may not be available even if the free memory overall is
large enough.
Related Projects Related Projects
---------------- ----------------
...@@ -226,55 +183,3 @@ Here is a list of some of the known limitations: ...@@ -226,55 +183,3 @@ Here is a list of some of the known limitations:
interact with the rest of the graph). interact with the rest of the graph).
- Neither *goto* nor *recursion* is supported or planned within expression graphs. - Neither *goto* nor *recursion* is supported or planned within expression graphs.
"float32 / int{32, 64} gives float64"
-------------------------------------
It should be noted that using float32 and int{32, 64} together
inside a function would provide float64 as output.
Since the GPU can't compute this kind of output, it would be
preferable not to use those dtypes together.
To help you find where float64 are created, see the
:attr:`warn_float64` Theano flag.
Theano memory/speed trade-off
-----------------------------
There is a few things you can easily do to change the trade-off
between speed and memory usage. It nothing is said, this affect the
CPU and GPU memory usage.
Could speed up and lower memory usage:
- :ref:`cuDNN <libdoc_cuda_dnn>` default cuDNN convolution use less
memory then Theano version. But some flags allow it to use more
memory. GPU only.
- Shortly avail, multi-GPU.
Could raise memory usage but speed up computation:
- :attr:`config.lib.cnmem` =1 # Do not raise much memory usage, but if you are at the limit of GPU memory available. GPU only.
- :attr:`config.allow_gc` =False
- :attr:`config.optimizer_excluding` =low_memory , GPU only for now.
Could lower the memory usage, but raise computation time:
- :attr:`config.scan.allow_gc` =True # Probably not significant slowdown if config.lib.cnmem is used.
- :attr:`config.scan.allow_output_prealloc` =False
- Use :func:`batch_normalization()
<theano.tensor.nnet.bn.batch_normalization>`. It use less memory
then building a corresponding Theano graph.
- Disable one or scan more optimizations:
- ``optimizer_excluding=scanOp_pushout_seqs_ops``
- ``optimizer_excluding=scan_pushout_dot1``
- ``optimizer_excluding=scanOp_pushout_output``
- Disable all optimization tagged as raising memory usage:
``optimizer_excluding=more_mem`` (currently only the 3 scan optimizations above)
- `float16 <https://github.com/Theano/Theano/issues/2908>`_.
If you want to analyze the memory usage during computation, the
simplest is to let the memory error happen during Theano execution and
use the Theano flags :attr:`exception_verbosity=high`.
...@@ -17,17 +17,18 @@ shapes = [ ...@@ -17,17 +17,18 @@ shapes = [
('col', (False, True)), ('col', (False, True)),
('matrix', (False,False)), ('matrix', (False,False)),
('tensor3', (False,False,False)), ('tensor3', (False,False,False)),
('tensor4', (False,False,False,False)),] ('tensor4', (False,False,False,False)),
('tensor5', (False,False,False,False,False)),]
hdr = '============ =========== ==== =========== =================================' hdr = '============ =========== ==== ============ ==================================='
print(hdr) print(hdr)
print('Constructor dtype ndim shape broadcastable') print('Constructor dtype ndim shape broadcastable')
print(hdr) print(hdr)
for letter in letters: for letter in letters:
for shape in shapes: for shape in shapes:
suff = ',)' if len(shape[1])==1 else ')' suff = ',)' if len(shape[1])==1 else ')'
s = '(' + ','.join('1' if b else '?' for b in shape[1]) + suff s = '(' + ','.join('1' if b else '?' for b in shape[1]) + suff
print('%s%-10s %-10s %-4s %-10s %-20s' %( print('%s%-10s %-10s %-4s %-11s %-20s' %(
letter[0], shape[0], letter[1], len(shape[1]), s, shape[1] letter[0], shape[0], letter[1], len(shape[1]), s, shape[1]
)) ))
print(hdr) print(hdr)
...@@ -125,6 +125,7 @@ Roughly in order of what you'll want to check out: ...@@ -125,6 +125,7 @@ Roughly in order of what you'll want to check out:
* :ref:`install` -- How to install Theano. * :ref:`install` -- How to install Theano.
* :ref:`introduction` -- What is Theano? * :ref:`introduction` -- What is Theano?
* :ref:`tutorial` -- Learn the basics. * :ref:`tutorial` -- Learn the basics.
* :ref:`troubleshooting` -- Tips and tricks for common debugging.
* :ref:`libdoc` -- Theano's functionality, module by module. * :ref:`libdoc` -- Theano's functionality, module by module.
* :ref:`faq` -- A set of commonly asked questions. * :ref:`faq` -- A set of commonly asked questions.
* :ref:`optimizations` -- Guide to Theano's graph optimizations. * :ref:`optimizations` -- Guide to Theano's graph optimizations.
...@@ -237,12 +238,15 @@ StackOverflow, follow their guidance for `answering questions <http://stackoverf ...@@ -237,12 +238,15 @@ StackOverflow, follow their guidance for `answering questions <http://stackoverf
NEWS NEWS
introduction introduction
requirements
install install
updating
tutorial/index tutorial/index
extending/index extending/index
dev_start_guide dev_start_guide
optimizations optimizations
library/index library/index
troubleshooting
glossary glossary
links links
internal/index internal/index
......
差异被折叠。
:orphan: .. include:: css.inc
.. _install_centos6: .. _install_centos6:
CentOS 6 Installation Instructions
##################################
Easy Installation of an optimized Theano on CentOS 6 .. warning::
==================================================== If you want to install the bleeding-edge or development version of Theano
from GitHub, please make sure you are reading `the latest version of this
page <http://deeplearning.net/software/theano_versions/dev/install_centos6.html>`_.
.. note:: .. include:: requirements.txt
It is possible to have a faster installation of Theano than the one these .. include:: install_generic.inc
instructions will provide, but this will make the installation more :start-line: 5
complicated and/or may require that you buy software. This is a simple set
of installation instructions that will leave you with a relatively
well-optimized version that uses only free software. With more work or by
investing money (i.e. buying a license to a proprietary BLAS
implementation), it is possible to gain further performance.
.. note:: Requirements through System Packages (not recommended)
------------------------------------------------------
If you are behind a proxy, you must do some extra configuration steps
before starting the installation. You must set the environment
variable ``http_proxy`` to the proxy address. Using bash this is
accomplished with the command
``export http_proxy="http://user:pass@my.site:port/"``
You can also provide the ``--proxy=[user:pass@]url:port`` parameter
to pip. The ``[user:pass@]`` portion is optional.
.. note::
We use ``pip`` for 2 reasons. First, it allows "``import module;
module.test()``" to work correctly. Second, the installation of NumPy
1.6 or 1.6.1 with ``easy_install`` raises an ImportError at the end of
the installation. To my knowledge we can ignore this error, but
this is not completely safe. ``easy_install`` with NumPy 1.5.1 does not
raise this error.
Installation steps
~~~~~~~~~~~~~~~~~~
1) ``sudo yum install python-devel python-nose python-setuptools gcc
gcc-gfortran gcc-c++ blas-devel lapack-devel atlas-devel``
2) ``sudo easy_install pip``
3) ``sudo pip install numpy==1.6.1``
4) ``sudo pip install scipy==0.10.1``
5) ``sudo pip install Theano``
Test the newly installed packages
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1) NumPy (~30s): ``python -c "import numpy; numpy.test()"``
2) SciPy (~1m): ``python -c "import scipy; scipy.test()"``
3) Theano (~30m): ``python -c "import theano; theano.test()"``
Speed test Theano/BLAS
~~~~~~~~~~~~~~~~~~~~~~
It is recommended to test your Theano/BLAS integration. There are many versions
of BLAS that exist and there can be up to 10x speed difference between them.
Also, having Theano link directly against BLAS instead of using NumPy/SciPy as
an intermediate layer reduces the computational overhead. This is
important for BLAS calls to ``ger``, ``gemv`` and small ``gemm`` operations
(automatically called when needed when you use ``dot()``). To run the
Theano/BLAS speed test:
.. code-block:: bash .. code-block:: bash
python /usr/lib/python2.*/site-packages/theano/misc/check_blas.py sudo yum install python-devel python-nose python-setuptools gcc gcc-gfortran gcc-c++ blas-devel lapack-devel atlas-devel
sudo easy_install pip
This will print a table with different versions of BLAS/numbers of
threads on multiple CPUs and GPUs. It will also print some Theano/NumPy
configuration information. Then, it will print the running time of the same
benchmarks for your installation. Try to find a CPU similar to yours in
the table, and check that the single-threaded timings are roughly the same.
Updating Theano
~~~~~~~~~~~~~~~
If you followed these installation instructions, you can execute this command
to update only Theano:
.. code-block:: bash
sudo pip install --upgrade --no-deps theano
If you want to also update NumPy/SciPy, you can run this:
.. code-block:: bash
sudo pip install --upgrade theano
Bleeding edge
~~~~~~~~~~~~~
Do like in the section "Updating Theano", but use
``git+git://github.com/Theano/Theano.git`` instead of ``theano``.
.. include:: css.inc
.. _install_generic:
Installation
============
Stable Installation
-------------------
Install the latest stable version of Theano with:
.. raw:: html
<div class="highlight"><pre><span class="red">&lt;sudo&gt;</span> pip install <span class="blue">&lt;--user&gt;</span> Theano[test, doc]</pre></div>
- Any argument between <...> is optional.
- Use :red:`sudo` for a root installation.
- Use :blue:`user` for a user installation without admin rights. It will install Theano in your local site-packages.
- [test] will install the requirements for testing.
- [doc] will install the requirements in order to generate the documentation.
If you encountered any trouble, head to the :ref:`troubleshooting` page.
libgpuarray
^^^^^^^^^^^
For the stable version of Theano you need a specific version of libgpuarray,
that has been tagged ``v-9998``.
Download it with:
.. raw:: html
<div class='highlight'><pre>
git clone https://github.com/Theano/libgpuarray.git --tags
git checkout origin/v-9998
cd libgpuarray
</pre></div>
and then follow the `Step-by-step instructions <http://deeplearning.net/software/libgpuarray/installation.html#step-by-step-install>`__.
Bleeding-Edge Installation (recommended)
----------------------------------------
Install the latest, bleeding-edge, development version of Theano with:
.. raw:: html
<div class='highlight'><pre><span class="red">&lt;sudo&gt;</span> pip install <span class="blue">&lt;--user&gt;</span> <span class="pink">&lt;--no-deps&gt;</span> git+https://github.com/Theano/Theano.git#egg=Theano</pre></div>
- Any argument between <...> is optional.
- Use :red:`sudo` for a root installation.
- Use :blue:`user` for a user installation without admin rights. It will install Theano in your local site-packages.
- Use :pink:`no-deps` when you don't want the dependencies of Theano to be installed through pip. This is important when they have already been installed as system packages.
If you encountered any trouble, head to the :ref:`troubleshooting` page.
libgpuarray
^^^^^^^^^^^
Install the latest, development version of libgpuarray following the
`Step-by-step instructions <http://deeplearning.net/software/libgpuarray/installation.html#step-by-step-install>`__.
Developer Installation
----------------------
Install the developer version of Theano with:
.. raw:: html
<div class="highlight"><pre>git clone git://github.com/Theano/Theano.git
cd Theano
<span class="red">&lt;sudo&gt;</span> pip install <span class="blue">&lt;--user&gt;</span> <span class="pink">&lt;--no-deps&gt;</span> <span class="green">-e .</span></pre></div>
- Any argument between <...> is optional.
- Use :red:`sudo` for a root installation.
- Use :blue:`user` for a user installation without admin rights. It will install Theano in your local site-packages.
- Use :pink:`no-deps` when you don't want the dependencies of Theano to be installed through pip. This is important when they have already been installed as system packages.
- :green:`-e` makes your installation *editable*, i.e., it links it to your
source directory.
If you encountered any trouble, head to the :ref:`troubleshooting` page.
libgpuarray
^^^^^^^^^^^
Install the latest, development version of libgpuarray following the
`Step-by-step instructions <http://deeplearning.net/software/libgpuarray/installation.html#step-by-step-install>`__.
.. include:: css.inc
.. _install_macos:
Mac OS Installation Instructions
################################
.. warning::
If you want to install the bleeding-edge or development version of Theano
from GitHub, please make sure you are reading `the latest version of this
page <http://deeplearning.net/software/theano_versions/dev/install_macos.html>`_.
There are various ways to install Theano dependencies on a Mac. Here
we describe the process in detail with Anaconda, Homebrew or MacPorts
but if you did it differently and it worked, please let us know the
details on the `theano-users`_ mailing-list, so that we can add
alternative instructions here.
.. _theano-users: http://groups.google.com/group/theano-users?pli=1
.. include:: requirements.txt
.. _gpu_macos:
.. attention::
For MacOS you should be able to follow the above instructions to
setup CUDA, but be aware of the following caveats:
* If you want to compile the CUDA SDK code, you may need to temporarily
revert back to Apple's gcc (``sudo port select gcc``) as their Makefiles
are not compatible with MacPort's gcc.
* If CUDA seems unable to find a CUDA-capable GPU, you may need to manually
toggle your GPU on, which can be done with
`gfxCardStatus <http://codykrieger.com/gfxCardStatus>`__.
.. attention::
Theano officially supports only clang on OS X. This can be installed
by getting XCode from the App Store and running it once to install the
command-line tools.
.. include:: install_generic.inc
:start-line: 5
Requirements through Homebrew (not recommended)
-----------------------------------------------
Install python with homebrew:
.. code-block:: bash
$ brew install python # or python3 if you prefer
This will install pip. Then use pip to install numpy, scipy:
.. code-block:: bash
$ pip install numpy scipy
If you want to use openblas instead of Accelerate, you have to install
numpy and scipy with hombrew:
.. code-block:: bash
$ brew tap homebrew/python
$ brew install numpy --with-openblas
$ brew install scipy --with-openblas
Requirements through MacPorts (not recommended)
-----------------------------------------------
Using `MacPorts <http://www.macports.org/>`__ to install all required
Theano dependencies is easy, but be aware that it will take a long time
(a few hours) to build and install everything.
- MacPorts requires installing XCode first (which can be found in the
Mac App Store), if you do not have it already.
If you can't install it from the App Store, look in your MacOS X installation
DVD for an old version. Then update your Mac to update XCode.
- Download and install `MacPorts <http://www.macports.org/>`__, then
ensure its package list is up-to-date with ``sudo port selfupdate``.
- Then, in order to install one or more of the required libraries, use
``port install``, e.g. as follows:
.. code-block:: bash
$ sudo port install py27-numpy +atlas py27-scipy +atlas py27-pip
This will install all the required Theano dependencies. gcc will
be automatically installed (since it is a SciPy dependency), but be
aware that it takes a long time to compile (hours)!
Having NumPy and SciPy linked with ATLAS (an optimized BLAS
implementation) is not mandatory, but recommended if you care about
performance.
- You might have some different versions of gcc, SciPy, NumPy, Python installed
on your system, perhaps via Xcode. It is a good idea to use **either** the
MacPorts version of everything **or** some other set of compatible versions
(e.g. provided by Xcode or Fink). The advantages of MacPorts are the
transparency with which everything can be installed and the fact that
packages are updated quite frequently. The following steps describe how to
make sure you are using the MacPorts version of these packages.
- In order to use the MacPorts version of Python, you will probably
need to explicitly select it with ``sudo port select python python27``. The
reason this is necessary is because you may have an Apple-provided Python
(via, for example, an Xcode installation). After performing this step, you
should check that the symbolic link provided by ``which python`` points to
the MacPorts python. For instance, on MacOS X Lion with MacPorts 2.0.3,
the output of ``which python`` is ``/opt/local/bin/python`` and this symbolic
link points to ``/opt/local/bin/python2.7``. When executing ``sudo
port select python python27-apple`` (which you should **not** do), the link
points to ``/usr/bin/python2.7``.
- Similarly, make sure that you are using the MacPorts-provided gcc:
use ``sudo port select gcc`` to see which gcc installs you have on the
system. Then execute for instance ``sudo port select gcc mp-gcc44``
to create a symlink that points to the correct (MacPorts) gcc (version 4.4
in this case).
- At this point, if you have not done so already, it may be a good idea to
close and restart your terminal, to make sure all configuration changes
are properly taken into account.
- Afterwards, please check that the ``scipy`` module that is imported in
Python is the right one (and is a recent one). For instance, ``import
scipy`` followed by ``print scipy.__version__`` and ``print scipy.__path__``
should result in a version number of at least 0.7.0 and a path that starts
with ``/opt/local`` (the path where MacPorts installs its packages). If this
is not the case, then you might have some old installation of ``scipy`` in your
``PYTHONPATH`` so you should edit ``PYTHONPATH`` accordingly.
- Please follow the same procedure with ``numpy``.
- This is covered in the MacPorts installation process, but make sure that
your ``PATH`` environment variable contains ``/opt/local/bin`` and
``/opt/local/sbin`` before any other paths (to ensure that the Python and
gcc binaries that you installed with MacPorts are visible first).
- MacPorts does not create automatically ``nosetests`` and ``pip`` symlinks
pointing to the MacPorts version, so you can add them yourself with
.. code-block:: bash
$ sudo ln -s /opt/local/bin/nosetests-2.7 /opt/local/bin/nosetests
$ sudo ln -s /opt/local/bin/pip-2.7 /opt/local/bin/pip
.. _install_others:
Other Platform-specific Installations
=====================================
.. warning::
These instructions are not kept up to date.
NVIDIA Jetson TX1 embedded platform
-----------------------------------
.. code-block:: bash
sudo apt-get install python-numpy python-scipy python-dev python-pip python-nose g++ libblas-dev git
pip install --upgrade --no-deps git+git://github.com/Theano/Theano.git --user # Need Theano 0.8 or more recent
Gentoo
------
Brian Vandenberg emailed `installation instructions on Gentoo
<http://groups.google.com/d/msg/theano-dev/-8WCMn2FMR0/bJPasoZXaqoJ>`_,
focusing on how to install the appropriate dependencies.
Nicolas Pinto provides `ebuild scripts <https://github.com/npinto/sekyfsr-gentoo-overlay/tree/master/sci-libs/Theano>`_.
Docker images
-------------
Builds of Theano are available as `Docker <https://www.docker.com/whatisdocker>`_ images:
`Theano Docker (CPU) <https://hub.docker.com/r/kaixhin/theano/>`_ or `Theano Docker (CUDA) <https://hub.docker.com/r/kaixhin/cuda-theano/>`_.
These are updated on a weekly basis with bleeding-edge builds of Theano. Examples of running bash in a Docker container
are as follows:
.. code-block:: bash
sudo docker run -it kaixhin/theano
sudo docker run -it --device /dev/nvidiactl --device /dev/nvidia-uvm --device /dev/nvidia0 kaixhin/cuda-theano:7.0
For a guide to Docker, see the `official docs <https://docs.docker.com/userguide/>`_. For more details on how to use the
Theano Docker images, including requirements for CUDA support, consult the `source project <https://github.com/Kaixhin/dockerfiles>`_.
差异被折叠。
:orphan:
.. _install_windows: .. _install_windows:
Windows Installation Instructions
=================================
.. warning::
If you want to install the bleeding-edge or development version of Theano
from GitHub, please make sure you are reading `the latest version of this
page <http://deeplearning.net/software/theano_versions/dev/install_windows.html>`_.
.. warning::
Installation of Theano on Windows Theano is mainly developed and tested on Linux Machines.
==================================
These instructions show step-by-step how to install Theano and These instructions show step-by-step how to install Theano and
required dependencies on a 32- or 64-bit system using freely available required dependencies on a 32- or 64-bit system using freely available
...@@ -26,6 +32,8 @@ C/C++ (for Python 2.7 family this has to be Microsoft Visual Studio ...@@ -26,6 +32,8 @@ C/C++ (for Python 2.7 family this has to be Microsoft Visual Studio
version supporting Visual Studio 2008), and GCC (for non-CUDA C code version supporting Visual Studio 2008), and GCC (for non-CUDA C code
generated by Theano). generated by Theano).
.. _gpu_windows:
Visual Studio and CUDA Visual Studio and CUDA
###################### ######################
...@@ -37,7 +45,6 @@ Studio installation to proceed). Afterwards, the Visual Studio 2010 ...@@ -37,7 +45,6 @@ Studio installation to proceed). Afterwards, the Visual Studio 2010
can be safely removed. If someone knows how to install CUDA 5.5 can be safely removed. If someone knows how to install CUDA 5.5
without a proper Visual Studio installation, please let us know. without a proper Visual Studio installation, please let us know.
First we need to install Microsoft Visual Studio 2010 Express, which First we need to install Microsoft Visual Studio 2010 Express, which
is required to install CUDA. You can download it from is required to install CUDA. You can download it from
`Visual Studio Express `Visual Studio Express
...@@ -79,7 +86,7 @@ The package will be installed to ``C:\Program Files ...@@ -79,7 +86,7 @@ The package will be installed to ``C:\Program Files
(x86)\Common Files\Microsoft\Visual C++ for Python\9.0``. (x86)\Common Files\Microsoft\Visual C++ for Python\9.0``.
Finally download the ``stdint.h`` header from Finally download the ``stdint.h`` header from
`here <http://msinttypes.googlecode.com/svn/trunk/stdint.h>`_ and save it as `here <https://sourceforge.net/p/mspgcc/msp430-libc/ci/master/tree/include/stdint.h>`_ and save it as
``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for ``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for
Python\9.0\VC\include\stdint.h``. Python\9.0\VC\include\stdint.h``.
...@@ -619,6 +626,3 @@ follows: ...@@ -619,6 +626,3 @@ follows:
dependencies. In the case where it is a dependency, you can use the dependencies. In the case where it is a dependency, you can use the
`Dependency Walker <http://www.dependencywalker.com/>`__ utility to figure out `Dependency Walker <http://www.dependencywalker.com/>`__ utility to figure out
which one. which one.
.. _gpu_windows:
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
:mod:`debugmode` :mod:`debugmode`
================= =================
.. module:: debugmode .. module:: theano.compile.debugmode
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: defines DebugMode :synopsis: defines DebugMode
.. moduleauthor:: LISA .. moduleauthor:: LISA
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
:mod:`function` - defines theano.function :mod:`function` - defines theano.function
=========================================== ===========================================
.. module:: function .. module:: theano.compile.function
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: defines theano.function and related classes :synopsis: defines theano.function and related classes
.. moduleauthor:: LISA .. moduleauthor:: LISA
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
:mod:`io` - defines theano.function [TODO] :mod:`io` - defines theano.function [TODO]
=========================================== ===========================================
.. module:: io .. module:: theano.compile.io
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: defines In and Out :synopsis: defines In and Out
.. moduleauthor:: LISA .. moduleauthor:: LISA
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
:mod:`mode` -- controlling compilation :mod:`mode` -- controlling compilation
====================================== ======================================
.. module:: mode .. module:: theano.compile.mode
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: controlling compilation :synopsis: controlling compilation
.. moduleauthor:: LISA .. moduleauthor:: LISA
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
:mod:`nanguardmode` :mod:`nanguardmode`
=================== ===================
.. module:: nanguardmode .. module:: theano.compile.nanguardmode
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: defines NanGuardMode :synopsis: defines NanGuardMode
.. moduleauthor:: LISA .. moduleauthor:: LISA
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
================================================ ================================================
.. module:: profilemode .. module:: theano.compile.profilemode
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: profiling Theano functions with ProfileMode :synopsis: profiling Theano functions with ProfileMode
.. moduleauthor:: LISA .. moduleauthor:: LISA
......
...@@ -487,6 +487,21 @@ import theano and print the config variable, as in: ...@@ -487,6 +487,21 @@ import theano and print the config variable, as in:
automatically to get more memory. But this can cause automatically to get more memory. But this can cause
fragmentation, see note above. fragmentation, see note above.
.. attribute:: config.gpuarray.sched
String value: ``'default'``, ``'multi'``, ``'single'``
Default: ``'default'``
Control the stream mode of contexts.
The sched parameter passed for context creation to pygpu. With
CUDA, using "multi" mean using the parameter
cudaDeviceScheduleYield. This is useful to lower the CPU overhead
when waiting for GPU. One user found that it speeds up his other
processes that was doing data augmentation.
.. attribute:: config.gpuarray.single_stream .. attribute:: config.gpuarray.single_stream
Boolean value Boolean value
......
...@@ -2,26 +2,40 @@ ...@@ -2,26 +2,40 @@
.. _libdoc_gof_fgraph: .. _libdoc_gof_fgraph:
================================================ ================================================
:mod:`fgraph` -- Graph Container [doc TODO] :mod:`fg` -- Graph Container [doc TODO]
================================================ ================================================
.. module:: fgraph .. module:: theano.gof.fg
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: Theano Internals :synopsis: Theano Internals
.. moduleauthor:: LISA .. moduleauthor:: LISA
Guide .. _fgraph:
=====
FunctionGraph FunctionGraph
------------- -------------
.. autoclass:: theano.gof.FunctionGraph
:members:
***TODO***
.. note:: FunctionGraph(inputs, outputs) clones the inputs by
default. To avoid this behavior, add the parameter
clone=False. This is needed as we do not want cached constants
in fgraph.
.. _libdoc_gof_fgraphfeature: .. _libdoc_gof_fgraphfeature:
.. _fgraphfeature:
FunctionGraph Features FunctionGraph Features
---------------------- ----------------------
.. autoclass:: theano.gof.toolbox.Feature
:members:
.. _libdoc_gof_fgraphfeaturelist: .. _libdoc_gof_fgraphfeaturelist:
FunctionGraph Feature List FunctionGraph Feature List
...@@ -29,14 +43,3 @@ FunctionGraph Feature List ...@@ -29,14 +43,3 @@ FunctionGraph Feature List
* ReplaceValidate * ReplaceValidate
* DestroyHandler * DestroyHandler
Reference
=========
.. class:: FunctionGraph
***TODO***
.. note:: FunctionGraph(inputs, outputs) clones the inputs by
default. To avoid this behavior, add the parameter
clone=False. This is needed as we do not want cached constants
in fgraph.
...@@ -4,14 +4,12 @@ ...@@ -4,14 +4,12 @@
:mod:`graph` -- Interface for the Theano graph :mod:`graph` -- Interface for the Theano graph
============================================== ==============================================
.. module:: graph
:platform: Unix, Windows
:synopsis: Interface for types of symbolic variables
.. moduleauthor:: LISA
--------- ---------
Reference Reference
--------- ---------
.. automodule:: theano.gof.graph .. automodule:: theano.gof.graph
:members: :platform: Unix, Windows
:synopsis: Interface for types of symbolic variables
:members:
.. moduleauthor:: LISA
...@@ -4,14 +4,12 @@ ...@@ -4,14 +4,12 @@
:mod:`type` -- Interface for types of variables :mod:`type` -- Interface for types of variables
================================================ ================================================
.. module:: type
:platform: Unix, Windows
:synopsis: Interface for types of symbolic variables
.. moduleauthor:: LISA
--------- ---------
Reference Reference
--------- ---------
.. automodule:: theano.gof.type .. automodule:: theano.gof.type
:members: :platform: Unix, Windows
:synopsis: Interface for types of symbolic variables
:members:
.. moduleauthor:: LISA
...@@ -8,14 +8,12 @@ ...@@ -8,14 +8,12 @@
from theano.gof.utils import * from theano.gof.utils import *
.. module:: utils
:platform: Unix, Windows
:synopsis: Utilities functions operating on the graph
.. moduleauthor:: LISA
--------- ---------
Reference Reference
--------- ---------
.. automodule:: theano.gof.utils .. automodule:: theano.gof.utils
:members: :platform: Unix, Windows
:synopsis: Utilities functions operating on the graph
:members:
.. moduleauthor:: LISA
...@@ -61,7 +61,7 @@ To get an error if Theano can not use cuDNN, use this Theano flag: ...@@ -61,7 +61,7 @@ To get an error if Theano can not use cuDNN, use this Theano flag:
usage usage
* ``none`` : use a slower implementation with minimal memory usage * ``none`` : use a slower implementation with minimal memory usage
* ``large`` : use a sometimes faster implementation with large memory usage * ``large`` : use a sometimes faster implementation with large memory usage
* ``fft`` : use the Fast Fourrier Transform implementation of convolution * ``fft`` : use the Fast Fourier Transform implementation of convolution
(very high memory usage) (very high memory usage)
* ``guess_once`` : the first time a convolution is executed, the * ``guess_once`` : the first time a convolution is executed, the
implementation to use is chosen according to cuDNN's heuristics and reused implementation to use is chosen according to cuDNN's heuristics and reused
...@@ -83,7 +83,7 @@ To get an error if Theano can not use cuDNN, use this Theano flag: ...@@ -83,7 +83,7 @@ To get an error if Theano can not use cuDNN, use this Theano flag:
* ``none`` (default) : use the default non-deterministic convolution * ``none`` (default) : use the default non-deterministic convolution
implementation implementation
* ``deterministic`` : use a slower but deterministic implementation * ``deterministic`` : use a slower but deterministic implementation
* ``fft`` : use the Fast Fourrier Transform implementation of convolution * ``fft`` : use the Fast Fourier Transform implementation of convolution
(very high memory usage) (very high memory usage)
* ``guess_once`` : the first time a convolution is executed, the * ``guess_once`` : the first time a convolution is executed, the
implementation to use is chosen according to cuDNN's heuristics and reused implementation to use is chosen according to cuDNN's heuristics and reused
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
:mod:`printing` -- Graph Printing and Symbolic Print Statement :mod:`printing` -- Graph Printing and Symbolic Print Statement
=============================================================== ===============================================================
.. module:: printing .. module:: theano.printing
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: Provides the Print Op and graph-printing routines. :synopsis: Provides the Print Op and graph-printing routines.
.. moduleauthor:: LISA .. moduleauthor:: LISA
......
...@@ -64,9 +64,9 @@ get an error when cuDNN can not be used with them, use this flag: ...@@ -64,9 +64,9 @@ get an error when cuDNN can not be used with them, use this flag:
usage usage
* ``none`` : use a slower implementation with minimal memory usage * ``none`` : use a slower implementation with minimal memory usage
* ``large`` : use a sometimes faster implementation with large memory usage * ``large`` : use a sometimes faster implementation with large memory usage
* ``fft`` : use the Fast Fourrier Transform implementation of convolution * ``fft`` : use the Fast Fourier Transform implementation of convolution
(very high memory usage) (very high memory usage)
* ``fft_tiling`` : use the Fast Fourrier Transform implementation of convolution * ``fft_tiling`` : use the Fast Fourier Transform implementation of convolution
with tiling (high memory usage, but less then fft) with tiling (high memory usage, but less then fft)
* ``guess_once`` : the first time a convolution is executed, the * ``guess_once`` : the first time a convolution is executed, the
implementation to use is chosen according to cuDNN's heuristics and reused implementation to use is chosen according to cuDNN's heuristics and reused
...@@ -89,7 +89,7 @@ get an error when cuDNN can not be used with them, use this flag: ...@@ -89,7 +89,7 @@ get an error when cuDNN can not be used with them, use this flag:
* ``none`` (default) : use the default non-deterministic convolution * ``none`` (default) : use the default non-deterministic convolution
implementation implementation
* ``deterministic`` : use a slower but deterministic implementation * ``deterministic`` : use a slower but deterministic implementation
* ``fft`` : use the Fast Fourrier Transform implementation of convolution * ``fft`` : use the Fast Fourier Transform implementation of convolution
(very high memory usage) (very high memory usage)
* ``guess_once`` : the first time a convolution is executed, the * ``guess_once`` : the first time a convolution is executed, the
implementation to use is chosen according to cuDNN's heuristics and reused implementation to use is chosen according to cuDNN's heuristics and reused
...@@ -104,7 +104,7 @@ get an error when cuDNN can not be used with them, use this flag: ...@@ -104,7 +104,7 @@ get an error when cuDNN can not be used with them, use this flag:
implementation selected every time the shapes of the inputs and kernels implementation selected every time the shapes of the inputs and kernels
don't match the shapes from the last execution. don't match the shapes from the last execution.
* (algo_bwd_data only) ``fft_tiling`` : use the Fast Fourrier * (algo_bwd_data only) ``fft_tiling`` : use the Fast Fourier
Transform implementation of convolution with tiling (high memory Transform implementation of convolution with tiling (high memory
usage, but less then fft) usage, but less then fft)
...@@ -173,3 +173,14 @@ Softmax Ops ...@@ -173,3 +173,14 @@ Softmax Ops
.. automodule:: theano.sandbox.cuda.dnn .. automodule:: theano.sandbox.cuda.dnn
:noindex: :noindex:
:members: GpuDnnSoftmax, GpuDnnSoftmaxGrad :members: GpuDnnSoftmax, GpuDnnSoftmaxGrad
.. _libdoc_cuda_dnn_bn:
Batch Normalization
===================
.. automodule:: theano.sandbox.cuda.dnn
:noindex:
:members: dnn_batch_normalization_train, dnn_batch_normalization_test
...@@ -85,6 +85,10 @@ floating-point precision. ...@@ -85,6 +85,10 @@ floating-point precision.
Return a Variable for a 4-dimensional ndarray Return a Variable for a 4-dimensional ndarray
.. function:: tensor5(name=None, dtype=config.floatX)
Return a Variable for a 5-dimensional ndarray
.. #COMMENT .. #COMMENT
Each of the types described above can be constructed by two methods: Each of the types described above can be constructed by two methods:
a singular version (e.g., :ref:`dmatrix <libdoc_tensor_creation>`) a singular version (e.g., :ref:`dmatrix <libdoc_tensor_creation>`)
...@@ -112,66 +116,74 @@ They are all callable, and accept an optional ``name`` argument. So for example ...@@ -112,66 +116,74 @@ They are all callable, and accept an optional ``name`` argument. So for example
table generated by table generated by
$ python Theano/doc/generate_dtype_tensor_table.py $ python Theano/doc/generate_dtype_tensor_table.py
============ =========== ==== =========== ================================= ============ =========== ==== ============ ===================================
Constructor dtype ndim shape broadcastable Constructor dtype ndim shape broadcastable
============ =========== ==== =========== ================================= ============ =========== ==== ============ ===================================
bscalar int8 0 () () bscalar int8 0 () ()
bvector int8 1 (?,) (False,) bvector int8 1 (?,) (False,)
brow int8 2 (1,?) (True, False) brow int8 2 (1,?) (True, False)
bcol int8 2 (?,1) (False, True) bcol int8 2 (?,1) (False, True)
bmatrix int8 2 (?,?) (False, False) bmatrix int8 2 (?,?) (False, False)
btensor3 int8 3 (?,?,?) (False, False, False) btensor3 int8 3 (?,?,?) (False, False, False)
btensor4 int8 4 (?,?,?,?) (False, False, False, False) btensor4 int8 4 (?,?,?,?) (False, False, False, False)
wscalar int16 0 () () btensor5 int8 5 (?,?,?,?,?) (False, False, False, False, False)
wvector int16 1 (?,) (False,) wscalar int16 0 () ()
wrow int16 2 (1,?) (True, False) wvector int16 1 (?,) (False,)
wcol int16 2 (?,1) (False, True) wrow int16 2 (1,?) (True, False)
wmatrix int16 2 (?,?) (False, False) wcol int16 2 (?,1) (False, True)
wtensor3 int16 3 (?,?,?) (False, False, False) wmatrix int16 2 (?,?) (False, False)
wtensor4 int16 4 (?,?,?,?) (False, False, False, False) wtensor3 int16 3 (?,?,?) (False, False, False)
iscalar int32 0 () () wtensor4 int16 4 (?,?,?,?) (False, False, False, False)
ivector int32 1 (?,) (False,) wtensor5 int16 5 (?,?,?,?,?) (False, False, False, False, False)
irow int32 2 (1,?) (True, False) iscalar int32 0 () ()
icol int32 2 (?,1) (False, True) ivector int32 1 (?,) (False,)
imatrix int32 2 (?,?) (False, False) irow int32 2 (1,?) (True, False)
itensor3 int32 3 (?,?,?) (False, False, False) icol int32 2 (?,1) (False, True)
itensor4 int32 4 (?,?,?,?) (False, False, False, False) imatrix int32 2 (?,?) (False, False)
lscalar int64 0 () () itensor3 int32 3 (?,?,?) (False, False, False)
lvector int64 1 (?,) (False,) itensor4 int32 4 (?,?,?,?) (False, False, False, False)
lrow int64 2 (1,?) (True, False) itensor5 int32 5 (?,?,?,?,?) (False, False, False, False, False)
lcol int64 2 (?,1) (False, True) lscalar int64 0 () ()
lmatrix int64 2 (?,?) (False, False) lvector int64 1 (?,) (False,)
ltensor3 int64 3 (?,?,?) (False, False, False) lrow int64 2 (1,?) (True, False)
ltensor4 int64 4 (?,?,?,?) (False, False, False, False) lcol int64 2 (?,1) (False, True)
dscalar float64 0 () () lmatrix int64 2 (?,?) (False, False)
dvector float64 1 (?,) (False,) ltensor3 int64 3 (?,?,?) (False, False, False)
drow float64 2 (1,?) (True, False) ltensor4 int64 4 (?,?,?,?) (False, False, False, False)
dcol float64 2 (?,1) (False, True) ltensor5 int64 5 (?,?,?,?,?) (False, False, False, False, False)
dmatrix float64 2 (?,?) (False, False) dscalar float64 0 () ()
dtensor3 float64 3 (?,?,?) (False, False, False) dvector float64 1 (?,) (False,)
dtensor4 float64 4 (?,?,?,?) (False, False, False, False) drow float64 2 (1,?) (True, False)
fscalar float32 0 () () dcol float64 2 (?,1) (False, True)
fvector float32 1 (?,) (False,) dmatrix float64 2 (?,?) (False, False)
frow float32 2 (1,?) (True, False) dtensor3 float64 3 (?,?,?) (False, False, False)
fcol float32 2 (?,1) (False, True) dtensor4 float64 4 (?,?,?,?) (False, False, False, False)
fmatrix float32 2 (?,?) (False, False) dtensor5 float64 5 (?,?,?,?,?) (False, False, False, False, False)
ftensor3 float32 3 (?,?,?) (False, False, False) fscalar float32 0 () ()
ftensor4 float32 4 (?,?,?,?) (False, False, False, False) fvector float32 1 (?,) (False,)
cscalar complex64 0 () () frow float32 2 (1,?) (True, False)
cvector complex64 1 (?,) (False,) fcol float32 2 (?,1) (False, True)
crow complex64 2 (1,?) (True, False) fmatrix float32 2 (?,?) (False, False)
ccol complex64 2 (?,1) (False, True) ftensor3 float32 3 (?,?,?) (False, False, False)
cmatrix complex64 2 (?,?) (False, False) ftensor4 float32 4 (?,?,?,?) (False, False, False, False)
ctensor3 complex64 3 (?,?,?) (False, False, False) ftensor5 float32 5 (?,?,?,?,?) (False, False, False, False, False)
ctensor4 complex64 4 (?,?,?,?) (False, False, False, False) cscalar complex64 0 () ()
zscalar complex128 0 () () cvector complex64 1 (?,) (False,)
zvector complex128 1 (?,) (False,) crow complex64 2 (1,?) (True, False)
zrow complex128 2 (1,?) (True, False) ccol complex64 2 (?,1) (False, True)
zcol complex128 2 (?,1) (False, True) cmatrix complex64 2 (?,?) (False, False)
zmatrix complex128 2 (?,?) (False, False) ctensor3 complex64 3 (?,?,?) (False, False, False)
ztensor3 complex128 3 (?,?,?) (False, False, False) ctensor4 complex64 4 (?,?,?,?) (False, False, False, False)
ztensor4 complex128 4 (?,?,?,?) (False, False, False, False) ctensor5 complex64 5 (?,?,?,?,?) (False, False, False, False, False)
============ =========== ==== =========== ================================= zscalar complex128 0 () ()
zvector complex128 1 (?,) (False,)
zrow complex128 2 (1,?) (True, False)
zcol complex128 2 (?,1) (False, True)
zmatrix complex128 2 (?,?) (False, False)
ztensor3 complex128 3 (?,?,?) (False, False, False)
ztensor4 complex128 4 (?,?,?,?) (False, False, False, False)
ztensor5 complex128 5 (?,?,?,?,?) (False, False, False, False, False)
============ =========== ==== ============ ===================================
Plural Constructors Plural Constructors
-------------------------- --------------------------
...@@ -220,11 +232,11 @@ If you would like to construct a tensor variable with a non-standard ...@@ -220,11 +232,11 @@ If you would like to construct a tensor variable with a non-standard
broadcasting pattern, or a larger number of dimensions you'll need to create broadcasting pattern, or a larger number of dimensions you'll need to create
your own :class:`TensorType` instance. You create such an instance by passing your own :class:`TensorType` instance. You create such an instance by passing
the dtype and broadcasting pattern to the constructor. For example, you the dtype and broadcasting pattern to the constructor. For example, you
can create your own 5-dimensional tensor type can create your own 6-dimensional tensor type
>>> dtensor5 = TensorType('float64', (False,)*5) >>> dtensor6 = TensorType('float64', (False,)*6)
>>> x = dtensor5() >>> x = dtensor6()
>>> z = dtensor5('z') >>> z = dtensor6('z')
You can also redefine some of the provided types and they will interact You can also redefine some of the provided types and they will interact
correctly: correctly:
...@@ -1095,13 +1107,11 @@ Indexing ...@@ -1095,13 +1107,11 @@ Indexing
Like NumPy, Theano distinguishes between *basic* and *advanced* indexing. Like NumPy, Theano distinguishes between *basic* and *advanced* indexing.
Theano fully supports basic indexing Theano fully supports basic indexing
(see `NumPy's indexing <http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html>`_). (see `NumPy's indexing <http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html>`_)
and `integer advanced indexing
`Integer advanced indexing <http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#integer>`_. We do not
<http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#integer>`_ support boolean masks, as Theano does not have a boolean type (we use int8 for the output of
will be supported in 0.6rc4 (or the development version). We do not logic operators).
support boolean masks, as Theano does not have a boolean type (we use
int8 for the output of logic operators).
.. testsetup:: indexing .. testsetup:: indexing
......
...@@ -10,4 +10,7 @@ ...@@ -10,4 +10,7 @@
.. moduleauthor:: LISA .. moduleauthor:: LISA
.. seealso:: :ref:`cuDNN batch normalization <libdoc_cuda_dnn_bn>`
must be added manually.
.. autofunction:: theano.tensor.nnet.bn.batch_normalization .. autofunction:: theano.tensor.nnet.bn.batch_normalization
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
:mod:`nnet` -- Ops related to neural networks :mod:`nnet` -- Ops related to neural networks
================================================== ==================================================
.. module:: nnet .. module:: theano.tensor.nnet
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: various ops relating to neural networks :synopsis: various ops relating to neural networks
.. moduleauthor:: LISA .. moduleauthor:: LISA
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
:mod:`nnet` -- Ops for neural networks :mod:`nnet` -- Ops for neural networks
====================================================== ======================================================
.. module:: tensor.nnet .. module:: theano.tensor.nnet.nnet
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: Ops for neural networks :synopsis: Ops for neural networks
.. moduleauthor:: LISA .. moduleauthor:: LISA
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
:mod:`raw_random` -- Low-level random numbers :mod:`raw_random` -- Low-level random numbers
============================================= =============================================
.. module:: raw_random .. module:: theano.tensor.raw_random
:synopsis: symbolic random variables :synopsis: symbolic random variables
.. moduleauthor:: LISA .. moduleauthor:: LISA
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
:mod:`shared_randomstreams` -- Friendly random numbers :mod:`shared_randomstreams` -- Friendly random numbers
====================================================== ======================================================
.. module:: shared_randomstreams .. module:: theano.tensor.shared_randomstreams
:platform: Unix, Windows :platform: Unix, Windows
:synopsis: symbolic random variables :synopsis: symbolic random variables
.. moduleauthor:: LISA .. moduleauthor:: LISA
......
Requirements
============
.. note::
We only support the installation of the requirements through conda.
.. _BLAS: http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms
.. _Python: http://www.python.org/
Python_ >= 2.6 or >= 3.3
The development package (python-dev or python-devel on most Linux distributions) is recommended (see just below). Python 2.4 was supported up to and including the release 0.6. Python 3 is supported past the 3.3 release.
`NumPy <http://numpy.scipy.org/>`_ >= 1.7.1 < 1.11.1
Earlier versions could work, but we don’t test it.
`SciPy <http://scipy.org>`_ >= 0.11 < 0.17.1
Only currently required for sparse matrix and special functions support, but highly recommended. SciPy >=0.8 could work, but earlier versions have known bugs with sparse matrices.
`BLAS`_ installation (with Level 3 functionality)
* **Recommended**: MKL, which is free through Conda.
* Alternatively, we suggest to install OpenBLAS, with the development headers (``-dev``, ``-devel``, depending on your Linux distribution).
**Optional requirements**
``python-dev``, ``g++`` >= 4.2
**Highly recommended.** Theano can fall back on a NumPy-based Python execution model, but a C compiler allows for vastly faster execution.
`nose <http://nose.readthedocs.io/en/latest/>`_ >= 1.3.0
Recommended, to run Theano's test-suite.
`Sphinx <http://sphinx.pocoo.org/>`_ >= 0.5.1, `pygments <http://pygments.org/>`_
For building the documentation. LaTeX_ and dvipng_ are also necessary for math to show up as images.
`pydot-ng <https://github.com/pydot/pydot-ng>`_
To handle large picture for gif/images.
`NVIDIA CUDA drivers and SDK`_
**Highly recommended** Required for GPU code generation/execution on NVIDIA gpus. See instruction below.
`libgpuarray`_
Required for GPU/CPU code generation on CUDA and OpenCL devices (see: :ref:`gpuarray`.)
Requirements installation through Conda (recommended)
-----------------------------------------------------
Install Miniconda
^^^^^^^^^^^^^^^^^
Follow this `link <http://conda.pydata.org/miniconda.html>`__ to install Miniconda.
.. note::
If you want fast compiled code (recommended), make sure you have g++ (Windows/Linux) or Clang (OS X) installed.
Install requirements and optional packages
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. code-block:: bash
conda install numpy scipy mkl <nose> <sphinx> <pydot-ng>
* Arguments between <...> are optional.
Install and configure the GPU drivers (recommended)
---------------------------------------------------
.. warning::
OpenCL support is still minimal for now.
1. Install CUDA drivers
* Follow `this link <https://developer.nvidia.com/cuda-downloads>`__
to install the CUDA driver and the CUDA Toolkit.
* You must reboot the computer after the driver installation.
* Test that it was loaded correctly after the reboot, executing the
command `nvidia-smi` from the command line.
.. note::
Sanity check: The *bin* subfolder should contain an *nvcc*
program. This folder is called the *cuda root* directory.
2. Fix 'lib' path
* Add the 'lib' subdirectory (and/or 'lib64' subdirectory if you have a
64-bit OS) to your ``$LD_LIBRARY_PATH`` environment
variable.
3. Set Theano's config flags
To use the GPU you need to define the *cuda root*. You can do it in one
of the following ways:
* Define a $CUDA_ROOT environment variable to equal the cuda root directory, as in ``CUDA_ROOT=/path/to/cuda/root``, or
* add a ``cuda.root`` flag to :envvar:`THEANO_FLAGS`, as in ``THEANO_FLAGS='cuda.root=/path/to/cuda/root'``, or
* add a [cuda] section to your .theanorc file containing the option ``root = /path/to/cuda/root``.
.. _LaTeX: http://www.latex-project.org/
.. _dvipng: http://savannah.nongnu.org/projects/dvipng/
.. _NVIDIA CUDA drivers and SDK: http://developer.nvidia.com/object/gpucomputing.html
.. _libgpuarray: http://deeplearning.net/software/libgpuarray/installation.html
.. _fgraph:
=============
FunctionGraph
=============
TODO: clean up/update the doc/check if complete
WRITEME
.. autoclass:: theano.gof.fg.FunctionGraph
.. _fgraphfeature:
Feature
=======
.. autoclass:: theano.gof.toolbox.Feature
:members:
...@@ -8,7 +8,6 @@ Advanced Topics (under construction) ...@@ -8,7 +8,6 @@ Advanced Topics (under construction)
.. toctree:: .. toctree::
:maxdepth: 2 :maxdepth: 2
fg
compilation compilation
ccodegen ccodegen
function function
......
...@@ -54,6 +54,10 @@ if __name__ == '__main__': ...@@ -54,6 +54,10 @@ if __name__ == '__main__':
pythonpath = os.pathsep.join([throot, pythonpath]) pythonpath = os.pathsep.join([throot, pythonpath])
sys.path[0:0] = [throot] # We must not use os.environ. sys.path[0:0] = [throot] # We must not use os.environ.
# Make sure we don't use gpu to compile documentation
env_th_flags = os.environ.get('THEANO_FLAGS', '')
os.environ['THEANO_FLAGS'] = 'device=cpu,force_device=True'
def call_sphinx(builder, workdir): def call_sphinx(builder, workdir):
import sphinx import sphinx
if options['--check']: if options['--check']:
...@@ -99,3 +103,6 @@ if __name__ == '__main__': ...@@ -99,3 +103,6 @@ if __name__ == '__main__':
# To go back to the original current directory. # To go back to the original current directory.
os.chdir(currentdir) os.chdir(currentdir)
# Reset THEANO_FLAGS
os.environ['THEANO_FLAGS'] = env_th_flags
差异被折叠。
...@@ -175,13 +175,13 @@ by :ref:`broadcasting <libdoc_tensor_broadcastable>`. ...@@ -175,13 +175,13 @@ by :ref:`broadcasting <libdoc_tensor_broadcastable>`.
The following types are available: The following types are available:
* **byte**: ``bscalar, bvector, bmatrix, brow, bcol, btensor3, btensor4`` * **byte**: ``bscalar, bvector, bmatrix, brow, bcol, btensor3, btensor4, btensor5``
* **16-bit integers**: ``wscalar, wvector, wmatrix, wrow, wcol, wtensor3, wtensor4`` * **16-bit integers**: ``wscalar, wvector, wmatrix, wrow, wcol, wtensor3, wtensor4, wtensor5``
* **32-bit integers**: ``iscalar, ivector, imatrix, irow, icol, itensor3, itensor4`` * **32-bit integers**: ``iscalar, ivector, imatrix, irow, icol, itensor3, itensor4, itensor5``
* **64-bit integers**: ``lscalar, lvector, lmatrix, lrow, lcol, ltensor3, ltensor4`` * **64-bit integers**: ``lscalar, lvector, lmatrix, lrow, lcol, ltensor3, ltensor4, ltensor5``
* **float**: ``fscalar, fvector, fmatrix, frow, fcol, ftensor3, ftensor4`` * **float**: ``fscalar, fvector, fmatrix, frow, fcol, ftensor3, ftensor4, ftensor5``
* **double**: ``dscalar, dvector, dmatrix, drow, dcol, dtensor3, dtensor4`` * **double**: ``dscalar, dvector, dmatrix, drow, dcol, dtensor3, dtensor4, dtensor5``
* **complex**: ``cscalar, cvector, cmatrix, crow, ccol, ctensor3, ctensor4`` * **complex**: ``cscalar, cvector, cmatrix, crow, ccol, ctensor3, ctensor4, ctensor5``
The previous list is not exhaustive and a guide to all types compatible The previous list is not exhaustive and a guide to all types compatible
with NumPy arrays may be found here: :ref:`tensor creation<libdoc_tensor_creation>`. with NumPy arrays may be found here: :ref:`tensor creation<libdoc_tensor_creation>`.
......
差异被折叠。
...@@ -49,6 +49,7 @@ Advanced ...@@ -49,6 +49,7 @@ Advanced
sparse sparse
using_gpu using_gpu
using_multi_gpu using_multi_gpu
conv_arithmetic
Advanced configuration and debugging Advanced configuration and debugging
------------------------------------ ------------------------------------
......
...@@ -4,6 +4,13 @@ ...@@ -4,6 +4,13 @@
Multi cores support in Theano Multi cores support in Theano
============================= =============================
Convolution and Pooling
=======================
Since Theano 0.9dev2, the convolution and pooling are parallelized on
CPU.
BLAS operation BLAS operation
============== ==============
......
.. _using_gpu: .. _using_gpu:
============= =============
...@@ -19,11 +18,33 @@ There are two ways currently to use a gpu, one that should support any OpenCL ...@@ -19,11 +18,33 @@ There are two ways currently to use a gpu, one that should support any OpenCL
device as well as NVIDIA cards (:ref:`gpuarray`), and the old backend that device as well as NVIDIA cards (:ref:`gpuarray`), and the old backend that
only supports NVIDIA cards (:ref:`cuda`). only supports NVIDIA cards (:ref:`cuda`).
Using the GPU in Theano is as simple as setting the ``device`` configuration
flag to ``device=cuda`` (or ``device=gpu`` for the old backend). You can optionally target a specific gpu by specifying
the number of the gpu as in e.g. ``device=cuda2``. You also need to set the
default floating point precision.
For example: ``THEANO_FLAGS='cuda.root=/path/to/cuda/root,device=cuda,floatX=float32'``.
You can also set these options in the .theanorc file's ``[global]`` section:
.. code-block:: cfg
[global]
device = cuda
floatX = float32
.. warning:: .. warning::
If you want to use the new GpuArray backend, make sure to have the The old CUDA backend will be deprecated soon, in favor of the new libgpuarray
development version of Theano installed. The 0.8.X releases have not backend.
been optimized to work correctly with the new backend.
.. note::
* If your computer has multiple GPUs and you use ``device=cuda``, the driver
selects the one to use (usually gpu0).
* You can use the program ``nvidia-smi`` to change this policy.
* By default, when ``device`` indicates preference for GPU computations,
Theano will fall back to the CPU if there is a problem with the GPU.
You can use the flag ``force_device=True`` to instead raise an error when
Theano cannot use the GPU.
.. _gpuarray: .. _gpuarray:
...@@ -31,19 +52,32 @@ GpuArray Backend ...@@ -31,19 +52,32 @@ GpuArray Backend
---------------- ----------------
If you have not done so already, you will need to install libgpuarray If you have not done so already, you will need to install libgpuarray
as well as at least one computing toolkit. Instructions for doing so as well as at least one computing toolkit (CUDA or OpenCL). Detailed
are provided at `libgpuarray <http://deeplearning.net/software/libgpuarray/installation.html>`_. instructions to accomplish that are provided at
`libgpuarray <http://deeplearning.net/software/libgpuarray/installation.html>`_.
To install Nvidia's GPU-programming toolchain (CUDA) and configure
Theano to use it, see the installation instructions for
:ref:`Linux <gpu_linux>`, :ref:`MacOS <gpu_macos>` and :ref:`Windows <gpu_windows>`.
While all types of devices are supported if using OpenCL, for the While all types of devices are supported if using OpenCL, for the
remainder of this section, whatever compute device you are using will remainder of this section, whatever compute device you are using will
be referred to as GPU. be referred to as GPU.
.. warning::
If you want to use the new GpuArray backend, make sure to have the
development version of Theano installed. The 0.8.X releases have not
been optimized to work correctly with the new backend.
.. warning:: .. warning::
The backend was designed to support OpenCL, however current support is The backend was designed to support OpenCL, however current support is
incomplete. A lot of very useful ops still do not support it because they incomplete. A lot of very useful ops still do not support it because they
were ported from the old backend with minimal change. were ported from the old backend with minimal change.
.. _testing_the_gpu:
Testing Theano with GPU Testing Theano with GPU
~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~
...@@ -150,7 +184,7 @@ the GPU object directly. The following code is modified to do just that. ...@@ -150,7 +184,7 @@ the GPU object directly. The following code is modified to do just that.
Here ``tensor.exp(x).transfer(None)`` means "copy ``exp(x)`` to the GPU", Here ``tensor.exp(x).transfer(None)`` means "copy ``exp(x)`` to the GPU",
with ``None`` the default GPU context when not explicitly given. with ``None`` the default GPU context when not explicitly given.
For information on how to set GPU contexts, see :ref:`tut_using_multi_gpu`. For information on how to set GPU contexts, see :ref:`tut_using_multi_gpu`.
The output is The output is
...@@ -227,10 +261,10 @@ Tips for Improving Performance on GPU ...@@ -227,10 +261,10 @@ Tips for Improving Performance on GPU
``.theanorc`` file if you plan to do a lot of GPU work. ``.theanorc`` file if you plan to do a lot of GPU work.
* The GPU backend supports *float64* variables, but they are still slower * The GPU backend supports *float64* variables, but they are still slower
to compute than *float32*. The more *float32*, the better GPU performance to compute than *float32*. The more *float32*, the better GPU performance
you will get. you will get.
* Prefer constructors like ``matrix``, ``vector`` and ``scalar`` (which * Prefer constructors like ``matrix``, ``vector`` and ``scalar`` (which
follow the type set in ``floatX``) to ``dmatrix``, ``dvector`` and follow the type set in ``floatX``) to ``dmatrix``, ``dvector`` and
``dscalar``. The latter enforce double precision (*float64* on most ``dscalar``. The latter enforce double precision (*float64* on most
machines), which slows down GPU computations on current hardware. machines), which slows down GPU computations on current hardware.
* Minimize transfers to the GPU device by using ``shared`` variables * Minimize transfers to the GPU device by using ``shared`` variables
to store frequently-accessed data (see :func:`shared()<shared.shared>`). to store frequently-accessed data (see :func:`shared()<shared.shared>`).
......
.. include:: css.inc
.. _updating:
Updating Theano
===============
Follow one of these three sections depending on how you installed Theano.
You should update frequently, bugs are fixed on a very regular basis, and features are
added even more frequently!
Stable Installation
-------------------
The following command will update only Theano:
.. raw:: html
<pre><span class="red">&#60;sudo&#62;</span> pip install <span class="blue">&#60;--user&#62;</span> <span class="pink">&#60;--no-deps&#62;</span> theano</pre>
- Use :red:`sudo` for a root installation.
- Use :blue:`user` for a user installation without admin rights. It will install Theano in your local site-packages.
- Use :pink:`no-deps` when you don't want the dependencies of Theano to not be installed through pip. This is important when they have already been installed as system packages.
.. warning::
If you installed NumPy/SciPy with yum/apt-get, updating NumPy/SciPy
with pip/easy_install is not always a good idea. This can make Theano
crash due to problems with BLAS. The versions of
NumPy/SciPy in the distribution are sometimes linked against faster
versions of BLAS. Installing NumPy/SciPy with
yum/apt-get/pip/easy_install won't install the development package
needed to recompile it with the fast version.
To fix a possible crash, you can clear
the Theano cache like this:
.. code-block:: bash
theano-cache clear
Bleeding-Edge Installation
--------------------------
The following command will update your bleeding-edge version of Theano
.. raw:: html
<div style="width:100%"><pre><span class="red">&#60;sudo&#62;</span> pip install <span class="blue">&#60;--user&#62;</span> <span class="pink">&#60;--no-deps&#62;</span> git+https://github.com/Theano/Theano.git#egg=Theano</pre></div>
- Use :red:`sudo` for a root installation.
- Use :blue:`user` for a user installation without admin rights. It will install Theano in your local site-packages.
- Use :pink:`no-deps` when you don't want the dependencies of Theano to not be installed through pip. This is important when they have already been installed as system packages.
.. warning::
If you installed NumPy/SciPy with yum/apt-get, updating NumPy/SciPy
with pip/easy_install is not always a good idea. This can make Theano
crash due to problems with BLAS. The versions of
NumPy/SciPy in the distribution are sometimes linked against faster
versions of BLAS. Installing NumPy/SciPy with
yum/apt-get/pip/easy_install won't install the development package
needed to recompile it with the fast version.
To fix a possible crash, you can clear
the Theano cache like this:
.. code-block:: bash
theano-cache clear
Developer Installation
----------------------
To update your library to the latest revision, change directory (``cd``)
to your ``Theano`` folder and execute the following command:
.. warning::
The following assumes you have knowledge of git and know how to do a rebase.
.. code-block:: bash
git pull --rebase
...@@ -54,7 +54,7 @@ PLATFORMS = ["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"] ...@@ -54,7 +54,7 @@ PLATFORMS = ["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"]
MAJOR = 0 MAJOR = 0
MINOR = 9 MINOR = 9
MICRO = 0 MICRO = 0
SUFFIX = "dev1" # Should be blank except for rc's, betas, etc. SUFFIX = "dev2" # Should be blank except for rc's, betas, etc.
ISRELEASED = False ISRELEASED = False
VERSION = '%d.%d.%d%s' % (MAJOR, MINOR, MICRO, SUFFIX) VERSION = '%d.%d.%d%s' % (MAJOR, MINOR, MICRO, SUFFIX)
...@@ -166,7 +166,7 @@ def do_setup(): ...@@ -166,7 +166,7 @@ def do_setup():
install_requires=['numpy>=1.7.1', 'scipy>=0.11', 'six>=1.9.0'], install_requires=['numpy>=1.7.1', 'scipy>=0.11', 'six>=1.9.0'],
# pygments is a dependency for Sphinx code highlight # pygments is a dependency for Sphinx code highlight
extras_require={ extras_require={
'test': ['nose>=1.3.0', 'nose-parameterized>=0.5.0'], 'test': ['nose>=1.3.0', 'nose-parameterized>=0.5.0', 'flake8<3'],
'doc': ['Sphinx>=0.5.1', 'pygments'] 'doc': ['Sphinx>=0.5.1', 'pygments']
}, },
package_data={ package_data={
......
...@@ -147,7 +147,7 @@ class BadThunkOutput(DebugModeError): ...@@ -147,7 +147,7 @@ class BadThunkOutput(DebugModeError):
print(" thunk2 :", self.thunk2, file=sio) print(" thunk2 :", self.thunk2, file=sio)
# Don't import it at the top of the file to prevent circular import. # Don't import it at the top of the file to prevent circular import.
utt = theano.tests.unittest_tools import theano.tests.unittest_tools as utt
print(utt.str_diagnostic(self.val1, self.val2, None, None), file=sio) print(utt.str_diagnostic(self.val1, self.val2, None, None), file=sio)
ret = sio.getvalue() ret = sio.getvalue()
return ret return ret
...@@ -1769,12 +1769,13 @@ class _Linker(gof.link.LocalLinker): ...@@ -1769,12 +1769,13 @@ class _Linker(gof.link.LocalLinker):
if schedule: if schedule:
self.schedule = schedule self.schedule = schedule
def accept(self, fgraph, no_recycling=None): def accept(self, fgraph, no_recycling=None, profile=None):
if no_recycling is None: if no_recycling is None:
no_recycling = [] no_recycling = []
if self.fgraph is not None and self.fgraph is not fgraph: if self.fgraph is not None and self.fgraph is not fgraph:
assert type(self) is _Linker assert type(self) is _Linker
return type(self)(maker=self.maker).accept(fgraph, no_recycling) return type(self)(maker=self.maker).accept(
fgraph, no_recycling, profile)
self.fgraph = fgraph self.fgraph = fgraph
self.no_recycling = no_recycling self.no_recycling = no_recycling
return self return self
......
...@@ -735,9 +735,13 @@ class Function(object): ...@@ -735,9 +735,13 @@ class Function(object):
kwargs : dict kwargs : dict
The function inputs can be passed as keyword argument. For this, use The function inputs can be passed as keyword argument. For this, use
the name of the input or the input instance as the key. the name of the input or the input instance as the key.
Keyword argument ``output_subset`` is a list of either indices of the Keyword argument ``output_subset`` is a list of either indices of the
function's outputs or the keys belonging to the `output_keys` dict function's outputs or the keys belonging to the `output_keys` dict
and represent outputs that are requested to be calculated. and represent outputs that are requested to be calculated. Regardless
of the presence of ``output_subset``, the updates are always calculated
and processed. To disable the updates, you should use the ``copy``
method with ``delete_updates=True``.
Returns Returns
------- -------
...@@ -1496,9 +1500,10 @@ class FunctionMaker(object): ...@@ -1496,9 +1500,10 @@ class FunctionMaker(object):
if not spec.borrow] if not spec.borrow]
if no_borrow: if no_borrow:
self.linker = linker.accept( self.linker = linker.accept(
fgraph, no_recycling=infer_reuse_pattern(fgraph, no_borrow)) fgraph, no_recycling=infer_reuse_pattern(fgraph, no_borrow),
profile=profile)
else: else:
self.linker = linker.accept(fgraph) self.linker = linker.accept(fgraph, profile=profile)
if hasattr(linker, 'accept_var_updates'): if hasattr(linker, 'accept_var_updates'):
# hacky thing so VMLinker knows about updates # hacky thing so VMLinker knows about updates
......
...@@ -41,7 +41,7 @@ def flatten(l): ...@@ -41,7 +41,7 @@ def flatten(l):
return rval return rval
def contains_nan(arr, node=None): def contains_nan(arr, node=None, var=None):
""" """
Test whether a numpy.ndarray contains any `np.nan` values. Test whether a numpy.ndarray contains any `np.nan` values.
...@@ -50,6 +50,7 @@ def contains_nan(arr, node=None): ...@@ -50,6 +50,7 @@ def contains_nan(arr, node=None):
arr : np.ndarray or output of any Theano op arr : np.ndarray or output of any Theano op
node : None or an Apply instance. node : None or an Apply instance.
If arr is the output of a Theano op, the node associated to it. If arr is the output of a Theano op, the node associated to it.
var : The Theano symbolic variable.
Returns Returns
------- -------
...@@ -68,6 +69,8 @@ def contains_nan(arr, node=None): ...@@ -68,6 +69,8 @@ def contains_nan(arr, node=None):
return False return False
elif isinstance(arr, np.random.mtrand.RandomState): elif isinstance(arr, np.random.mtrand.RandomState):
return False return False
elif var and getattr(var.tag, 'is_rng', False):
return False
elif isinstance(arr, slice): elif isinstance(arr, slice):
return False return False
elif arr.size == 0: elif arr.size == 0:
...@@ -86,7 +89,7 @@ def contains_nan(arr, node=None): ...@@ -86,7 +89,7 @@ def contains_nan(arr, node=None):
return np.isnan(np.min(arr)) return np.isnan(np.min(arr))
def contains_inf(arr, node=None): def contains_inf(arr, node=None, var=None):
""" """
Test whether a numpy.ndarray contains any `np.inf` values. Test whether a numpy.ndarray contains any `np.inf` values.
...@@ -95,6 +98,7 @@ def contains_inf(arr, node=None): ...@@ -95,6 +98,7 @@ def contains_inf(arr, node=None):
arr : np.ndarray or output of any Theano op arr : np.ndarray or output of any Theano op
node : None or an Apply instance. node : None or an Apply instance.
If the output of a Theano op, the node associated to it. If the output of a Theano op, the node associated to it.
var : The Theano symbolic variable.
Returns Returns
------- -------
...@@ -114,6 +118,8 @@ def contains_inf(arr, node=None): ...@@ -114,6 +118,8 @@ def contains_inf(arr, node=None):
return False return False
elif isinstance(arr, np.random.mtrand.RandomState): elif isinstance(arr, np.random.mtrand.RandomState):
return False return False
elif var and getattr(var.tag, 'is_rng', False):
return False
elif isinstance(arr, slice): elif isinstance(arr, slice):
return False return False
elif arr.size == 0: elif arr.size == 0:
...@@ -215,44 +221,47 @@ class NanGuardMode(Mode): ...@@ -215,44 +221,47 @@ class NanGuardMode(Mode):
assert nan_is_error or inf_is_error or big_is_error assert nan_is_error or inf_is_error or big_is_error
compile_gpu_func(nan_is_error, inf_is_error, big_is_error) compile_gpu_func(nan_is_error, inf_is_error, big_is_error)
def do_check_on(var, nd): def do_check_on(value, nd, var=None):
""" """
Checks `var` for NaNs / Infs. If detected, raises an exception Checks `value` for NaNs / Infs. If detected, raises an exception
and / or prints information about `nd`, `f`, and `is_input` to and / or prints information about `nd`, `f`, and `is_input` to
help the user determine the cause of the invalid values. help the user determine the cause of the invalid values.
Parameters Parameters
---------- ----------
var : numpy.ndarray value : numpy.ndarray
The value to be checked. The value to be checked.
nd : theano.gof.Apply nd : theano.gof.Apply
The Apply node being executed. The Apply node being executed.
var : theano.gof.Variable
Not used if nd is there. Otherwise, used to print the stack
trace for inputs of the graph.
""" """
error = False error = False
sio = StringIO() sio = StringIO()
if nan_is_error: if nan_is_error:
if contains_nan(var, nd): if contains_nan(value, nd, var):
print('NaN detected', file=sio) print('NaN detected', file=sio)
error = True error = True
if inf_is_error: if inf_is_error:
if contains_inf(var, nd): if contains_inf(value, nd, var):
print('Inf detected', file=sio) print('Inf detected', file=sio)
error = True error = True
if big_is_error: if big_is_error:
err = False err = False
if isinstance(var, theano.gof.type.CDataType._cdata_type): if isinstance(value, theano.gof.type.CDataType._cdata_type):
err = False err = False
elif isinstance(var, np.random.mtrand.RandomState): elif isinstance(value, np.random.mtrand.RandomState):
err = False err = False
elif isinstance(var, slice): elif isinstance(value, slice):
err = False err = False
elif var.size == 0: elif value.size == 0:
err = False err = False
elif cuda.cuda_available and isinstance(var, cuda.CudaNdarray): elif cuda.cuda_available and isinstance(value, cuda.CudaNdarray):
err = (f_gpuabsmax(var.reshape(var.size)) > 1e10) err = (f_gpuabsmax(value.reshape(value.size)) > 1e10)
else: else:
err = (np.abs(var).max() > 1e10) err = (np.abs(value).max() > 1e10)
if err: if err:
print('Big value detected', file=sio) print('Big value detected', file=sio)
error = True error = True
...@@ -264,6 +273,11 @@ class NanGuardMode(Mode): ...@@ -264,6 +273,11 @@ class NanGuardMode(Mode):
else: else:
print("NanGuardMode found an error in an input of the " print("NanGuardMode found an error in an input of the "
"graph.", file=sio) "graph.", file=sio)
# Add the stack trace
if nd:
var = nd.outputs[0]
print(theano.gof.utils.get_variable_trace_string(var),
file=sio)
msg = sio.getvalue() msg = sio.getvalue()
if config.NanGuardMode.action == 'raise': if config.NanGuardMode.action == 'raise':
raise AssertionError(msg) raise AssertionError(msg)
...@@ -281,7 +295,7 @@ class NanGuardMode(Mode): ...@@ -281,7 +295,7 @@ class NanGuardMode(Mode):
def nan_check_input(var, value): def nan_check_input(var, value):
if getattr(var.tag, 'nan_guard_mode_check', True): if getattr(var.tag, 'nan_guard_mode_check', True):
do_check_on(value, None) do_check_on(value, None, var=var)
wrap_linker = theano.gof.vm.VM_Linker(callback=nan_check, wrap_linker = theano.gof.vm.VM_Linker(callback=nan_check,
callback_input=nan_check_input) callback_input=nan_check_input)
......
...@@ -402,6 +402,14 @@ class Shape_i(gof.Op): ...@@ -402,6 +402,14 @@ class Shape_i(gof.Op):
def infer_shape(self, node, input_shapes): def infer_shape(self, node, input_shapes):
return [()] return [()]
def connection_pattern(self, node):
# the grad returns the gradient with respect to the
# elements of a tensor variable
# the elements of the tensor variable do not participate
# in the computation of the shape, so they are not really
# part of the graph
return [[False]]
def grad(self, inp, grads): def grad(self, inp, grads):
return [theano.gradient.grad_not_implemented( return [theano.gradient.grad_not_implemented(
op=self, x_pos=0, x=inp[0], op=self, x_pos=0, x=inp[0],
...@@ -455,6 +463,14 @@ def shape_i(var, i, fgraph=None): ...@@ -455,6 +463,14 @@ def shape_i(var, i, fgraph=None):
return var.shape[i] return var.shape[i]
def shape_i_op(i):
key = i
if key not in shape_i_op.cache:
shape_i_op.cache[key] = Shape_i(i)
return shape_i_op.cache[key]
shape_i_op.cache = {}
def register_shape_i_c_code(typ, code, check_input, version=()): def register_shape_i_c_code(typ, code, check_input, version=()):
""" """
Tell Shape_i how to generate C code for a Theano Type. Tell Shape_i how to generate C code for a Theano Type.
......
...@@ -54,7 +54,7 @@ def _atexit_print_fn(): ...@@ -54,7 +54,7 @@ def _atexit_print_fn():
destination_file = open(config.profiling.destination, 'w') destination_file = open(config.profiling.destination, 'w')
for ps in _atexit_print_list: for ps in _atexit_print_list:
if ps.fct_callcount or ps.compile_time > 0: if ps.fct_callcount >= 1 or ps.compile_time > 1:
ps.summary(file=destination_file, ps.summary(file=destination_file,
n_ops_to_print=config.profiling.n_ops, n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply) n_apply_to_print=config.profiling.n_apply)
...@@ -72,7 +72,8 @@ def _atexit_print_fn(): ...@@ -72,7 +72,8 @@ def _atexit_print_fn():
for ps in to_sum[1:]: for ps in to_sum[1:]:
for attr in ["compile_time", "fct_call_time", "fct_callcount", for attr in ["compile_time", "fct_call_time", "fct_callcount",
"vm_call_time", "optimizer_time", "linker_time", "vm_call_time", "optimizer_time", "linker_time",
"validate_time", "import_time"]: "validate_time", "import_time",
"linker_node_make_thunks"]:
setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr)) setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
# merge dictonary # merge dictonary
...@@ -190,6 +191,8 @@ class ProfileStats(object): ...@@ -190,6 +191,8 @@ class ProfileStats(object):
import_time = 0.0 import_time = 0.0
# time spent in importing compiled python module. # time spent in importing compiled python module.
linker_node_make_thunks = 0.0
line_width = config.profiling.output_line_width line_width = config.profiling.output_line_width
nb_nodes = -1 nb_nodes = -1
...@@ -665,6 +668,8 @@ class ProfileStats(object): ...@@ -665,6 +668,8 @@ class ProfileStats(object):
print(' Theano Linker time (includes C, CUDA code ' print(' Theano Linker time (includes C, CUDA code '
'generation/compiling): %es' % self.linker_time, file=file) 'generation/compiling): %es' % self.linker_time, file=file)
print(' Import time %es' % self.import_time, file=file) print(' Import time %es' % self.import_time, file=file)
print(' Node make_thunk time %es' % self.linker_node_make_thunks,
file=file)
print('', file=file) print('', file=file)
# The validation time is a subset of optimizer_time # The validation time is a subset of optimizer_time
......
...@@ -242,6 +242,15 @@ AddConfigVar('gpuarray.preallocate', ...@@ -242,6 +242,15 @@ AddConfigVar('gpuarray.preallocate',
FloatParam(0), FloatParam(0),
in_c_key=False) in_c_key=False)
AddConfigVar('gpuarray.sched',
"""The sched parameter passed for context creation to pygpu.
With CUDA, using "multi" is equivalent to using the parameter
cudaDeviceScheduleYield. This is useful to lower the
CPU overhead when waiting for GPU. One user found that it
speeds up his other processes that was doing data augmentation.
""",
EnumStr("default", "multi", "single"))
AddConfigVar('gpuarray.single_stream', AddConfigVar('gpuarray.single_stream',
""" """
If your computations are mostly lots of small elements, If your computations are mostly lots of small elements,
...@@ -345,8 +354,9 @@ AddConfigVar('dnn.conv.algo_bwd_filter', ...@@ -345,8 +354,9 @@ AddConfigVar('dnn.conv.algo_bwd_filter',
AddConfigVar('dnn.conv.precision', AddConfigVar('dnn.conv.precision',
"Default data precision to use for the computation in cuDNN " "Default data precision to use for the computation in cuDNN "
"convolutions (defaults to the same dtype as the inputs of the " "convolutions (defaults to the same dtype as the inputs of the "
"convolutions).", "convolutions, or float32 if inputs are float16).",
EnumStr('as_input', 'float16', 'float32', 'float64'), EnumStr('as_input_f32', 'as_input', 'float16', 'float32',
'float64'),
in_c_key=False) in_c_key=False)
...@@ -374,7 +384,7 @@ AddConfigVar('dnn.enabled', ...@@ -374,7 +384,7 @@ AddConfigVar('dnn.enabled',
" to not using it if not present." " to not using it if not present."
" If True and cuDNN can not be used, raise an error." " If True and cuDNN can not be used, raise an error."
" If False, disable cudnn", " If False, disable cudnn",
StrParam("auto", "True", "False"), EnumStr("auto", "True", "False"),
in_c_key=False) in_c_key=False)
# This flag determines whether or not to raise error/warning message if # This flag determines whether or not to raise error/warning message if
...@@ -1620,6 +1630,8 @@ def short_platform(r=None, p=None): ...@@ -1620,6 +1630,8 @@ def short_platform(r=None, p=None):
return p return p
compiledir_format_dict['short_platform'] = short_platform() compiledir_format_dict['short_platform'] = short_platform()
# Allow to have easily one compiledir per device.
compiledir_format_dict['device'] = config.device
compiledir_format_keys = ", ".join(sorted(compiledir_format_dict.keys())) compiledir_format_keys = ", ".join(sorted(compiledir_format_dict.keys()))
default_compiledir_format = ("compiledir_%(short_platform)s-%(processor)s-" default_compiledir_format = ("compiledir_%(short_platform)s-%(processor)s-"
"%(python_version)s-%(python_bitwidth)s") "%(python_version)s-%(python_bitwidth)s")
......
...@@ -8,6 +8,7 @@ import os ...@@ -8,6 +8,7 @@ import os
import shlex import shlex
import sys import sys
import warnings import warnings
from functools import wraps
from six import StringIO from six import StringIO
...@@ -96,6 +97,7 @@ def change_flags(**kwargs): ...@@ -96,6 +97,7 @@ def change_flags(**kwargs):
Useful during tests. Useful during tests.
""" """
def change_flags_exec(f): def change_flags_exec(f):
@wraps(f)
def inner(*args, **kwargs_): def inner(*args, **kwargs_):
old_val = {} old_val = {}
for k in kwargs: for k in kwargs:
...@@ -117,9 +119,6 @@ def change_flags(**kwargs): ...@@ -117,9 +119,6 @@ def change_flags(**kwargs):
assert len(l) == 1 assert len(l) == 1
l[0].__set__(None, old_val[k]) l[0].__set__(None, old_val[k])
# Make sure that the name of the decorated function remains the same.
inner.__name__ = f.__name__
return inner return inner
return change_flags_exec return change_flags_exec
......
...@@ -25,7 +25,11 @@ except ImportError: ...@@ -25,7 +25,11 @@ except ImportError:
try: try:
# fall back on pydot if necessary # fall back on pydot if necessary
import pydot as pd import pydot as pd
if pd.find_graphviz(): if hasattr(pd, 'find_graphviz'):
if pd.find_graphviz():
pydot_imported = True
else:
pd.Dot.create(pd.Dot())
pydot_imported = True pydot_imported = True
except ImportError: except ImportError:
pass # tests should not fail on optional dependency pass # tests should not fail on optional dependency
......
...@@ -548,7 +548,7 @@ class CLinker(link.Linker): ...@@ -548,7 +548,7 @@ class CLinker(link.Linker):
if schedule: if schedule:
self.schedule = schedule self.schedule = schedule
def accept(self, fgraph, no_recycling=None): def accept(self, fgraph, no_recycling=None, profile=None):
""" """
Associate linker with fgraph Associate linker with fgraph
...@@ -557,7 +557,8 @@ class CLinker(link.Linker): ...@@ -557,7 +557,8 @@ class CLinker(link.Linker):
no_recycling = [] no_recycling = []
if self.fgraph is not None and self.fgraph is not fgraph: if self.fgraph is not None and self.fgraph is not fgraph:
# A linker can be tied to only one FunctionGraph. # A linker can be tied to only one FunctionGraph.
return type(self)(self.schedule).accept(fgraph, no_recycling) return type(self)(self.schedule).accept(
fgraph, no_recycling, profile)
self.fgraph = fgraph self.fgraph = fgraph
self.fetch_variables() self.fetch_variables()
self.no_recycling = no_recycling self.no_recycling = no_recycling
...@@ -1737,7 +1738,7 @@ class OpWiseCLinker(link.LocalLinker): ...@@ -1737,7 +1738,7 @@ class OpWiseCLinker(link.LocalLinker):
if schedule: if schedule:
self.schedule = schedule self.schedule = schedule
def accept(self, fgraph, no_recycling=None): def accept(self, fgraph, no_recycling=None, profile=None):
""" """
Associate linker with fgraph Associate linker with fgraph
""" """
...@@ -1750,7 +1751,7 @@ class OpWiseCLinker(link.LocalLinker): ...@@ -1750,7 +1751,7 @@ class OpWiseCLinker(link.LocalLinker):
allow_gc=self.allow_gc, allow_gc=self.allow_gc,
nice_errors=self.nice_errors, nice_errors=self.nice_errors,
schedule=self.schedule, schedule=self.schedule,
).accept(fgraph, no_recycling) ).accept(fgraph, no_recycling, profile)
self.fgraph = fgraph self.fgraph = fgraph
self.no_recycling = no_recycling self.no_recycling = no_recycling
return self return self
...@@ -1897,7 +1898,7 @@ class DualLinker(link.Linker): ...@@ -1897,7 +1898,7 @@ class DualLinker(link.Linker):
if schedule: if schedule:
self.schedule = schedule self.schedule = schedule
def accept(self, fgraph, no_recycling=None): def accept(self, fgraph, no_recycling=None, profile=None):
""" """
Update/tie self with fgraph Update/tie self with fgraph
""" """
...@@ -1905,7 +1906,7 @@ class DualLinker(link.Linker): ...@@ -1905,7 +1906,7 @@ class DualLinker(link.Linker):
no_recycling = [] no_recycling = []
if self.fgraph is not None and self.fgraph is not fgraph: if self.fgraph is not None and self.fgraph is not fgraph:
return type(self)(self.checker, self.schedule).accept( return type(self)(self.checker, self.schedule).accept(
fgraph, no_recycling) fgraph, no_recycling, profile)
self.fgraph = fgraph self.fgraph = fgraph
self.no_recycling = no_recycling self.no_recycling = no_recycling
return self return self
......
...@@ -1873,7 +1873,8 @@ class GCC_compiler(Compiler): ...@@ -1873,7 +1873,8 @@ class GCC_compiler(Compiler):
if ('g++' not in theano.config.cxx and if ('g++' not in theano.config.cxx and
'clang++' not in theano.config.cxx and 'clang++' not in theano.config.cxx and
'clang-omp++' not in theano.config.cxx): 'clang-omp++' not in theano.config.cxx and
'icpc' not in theano.config.cxx):
_logger.warn( _logger.warn(
"OPTIMIZATION WARNING: your Theano flag `cxx` seems not to be" "OPTIMIZATION WARNING: your Theano flag `cxx` seems not to be"
" the g++ compiler. So we disable the compiler optimization" " the g++ compiler. So we disable the compiler optimization"
......
...@@ -593,10 +593,10 @@ class FunctionGraph(utils.object2): ...@@ -593,10 +593,10 @@ class FunctionGraph(utils.object2):
# callback utils # # callback utils #
def execute_callbacks(self, name, *args, **kwargs): def execute_callbacks(self, name, *args, **kwargs):
""" """Execute callbacks
Calls
getattr(feature, name)(*args) Calls `getattr(feature, name)(*args)` for each feature which has
for each feature which has a method called after name. a method called after name.
""" """
t0 = time.time() t0 = time.time()
...@@ -614,11 +614,11 @@ class FunctionGraph(utils.object2): ...@@ -614,11 +614,11 @@ class FunctionGraph(utils.object2):
self.execute_callbacks_time += time.time() - t0 self.execute_callbacks_time += time.time() - t0
def collect_callbacks(self, name, *args): def collect_callbacks(self, name, *args):
""" """Collects callbacks
Returns a dictionary d such that:
d[feature] == getattr(feature, name)(*args)
For each feature which has a method called after name.
Returns a dictionary d such that
`d[feature] == getattr(feature, name)(*args)`
For each feature which has a method called after name.
""" """
d = {} d = {}
for feature in self._features: for feature in self._features:
...@@ -631,17 +631,18 @@ class FunctionGraph(utils.object2): ...@@ -631,17 +631,18 @@ class FunctionGraph(utils.object2):
# misc # # misc #
def toposort(self): def toposort(self):
""" """Toposort
Return an ordering of the graph's Apply nodes such that:
- All the nodes of the inputs of a node are before that node. Return an ordering of the graph's Apply nodes such that
- Satisfies the orderings provided by each feature that has
* All the nodes of the inputs of a node are before that node.
* Satisfies the orderings provided by each feature that has
an 'orderings' method. an 'orderings' method.
If a feature has an 'orderings' method, it will be called with If a feature has an 'orderings' method, it will be called with
this FunctionGraph as sole argument. It should return a dictionary of this FunctionGraph as sole argument. It should return a dictionary of
{node: predecessors} where predecessors is a list of nodes `{node: predecessors}` where predecessors is a list of nodes that
that should be computed before the key node. should be computed before the key node.
""" """
if len(self.apply_nodes) < 2: if len(self.apply_nodes) < 2:
# optimization # optimization
...@@ -760,17 +761,20 @@ class FunctionGraph(utils.object2): ...@@ -760,17 +761,20 @@ class FunctionGraph(utils.object2):
return self.clone_get_equiv(check_integrity)[0] return self.clone_get_equiv(check_integrity)[0]
def clone_get_equiv(self, check_integrity=True, attach_feature=True): def clone_get_equiv(self, check_integrity=True, attach_feature=True):
"""Clone the graph and get a memo( a dict )that map old node to new node """Clone the graph and get a dict that maps old nodes to new ones
----------------------------
Parameters: Parameters:
check_integrity - { bool } Whether to check integrity. check_integrity: bool
Default is True. Whether to check integrity. Default is True.
attach_feature - { bool } Whether to attach feature of origin graph to attach_feature: bool
cloned graph. Default is True. Whether to attach feature of origin graph to cloned graph.
---------------------------- Default is True.
Returns: Returns:
e - { FunctionGraph } Cloned fgraph. Every node in cloned graph is cloned. e: FunctionGraph
equiv - { dict } A dict that map old node to new node. Cloned fgraph. Every node in cloned graph is cloned.
equiv: dict
A dict that map old node to new node.
""" """
equiv = graph.clone_get_equiv(self.inputs, self.outputs) equiv = graph.clone_get_equiv(self.inputs, self.outputs)
......
""" """
Node classes (`Apply`, `Variable`) and expression graph algorithms. Node classes (`Apply`, `Variable`) and expression graph algorithms.
To read about what theano graphs are from a user perspective, have a look at
`graph.html <../doc/graph.html>`__.
""" """
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
...@@ -1246,9 +1242,9 @@ def as_string(i, o, ...@@ -1246,9 +1242,9 @@ def as_string(i, o,
Input `Variable` s. Input `Variable` s.
o : list o : list
Output `Variable` s. Output `Variable` s.
leaf_formatter : function leaf_formatter : callable
Takes a `Variable` and returns a string to describe it. Takes a `Variable` and returns a string to describe it.
node_formatter : function node_formatter : callable
Takes an `Op` and the list of strings corresponding to its arguments Takes an `Op` and the list of strings corresponding to its arguments
and returns a string to describe it. and returns a string to describe it.
......
...@@ -789,15 +789,47 @@ CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds) ...@@ -789,15 +789,47 @@ CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds)
{ {
CLazyLinker * self = (CLazyLinker*)_self; CLazyLinker * self = (CLazyLinker*)_self;
static char *kwlist[] = { static char *kwlist[] = {
(char*)"time_thunks", (char *)"time_thunks",
(char *)"n_calls", (char *)"n_calls",
(char *)"output_subset",
NULL}; NULL};
int n_calls=1; int n_calls=1;
if (! PyArg_ParseTupleAndKeywords(args, kwds, "|ii", kwlist, PyObject *output_subset_ptr = NULL;
if (! PyArg_ParseTupleAndKeywords(args, kwds, "|iiO", kwlist,
&self->do_timing, &self->do_timing,
&n_calls)) &n_calls,
&output_subset_ptr))
return NULL; return NULL;
int err = 0; int err = 0;
// parse an output_subset list
// it is stored as a bool list of length n_output_vars: calculate a var or not
char *output_subset = NULL;
int output_subset_size = -1;
if (output_subset_ptr != NULL)
{
if (! PyList_Check(output_subset_ptr))
{
err = 1;
PyErr_SetString(PyExc_RuntimeError, "Output_subset is not a list");
}
else
{
output_subset_size = PyList_Size(output_subset_ptr);
output_subset = (char*)calloc(self->n_output_vars, sizeof(char));
for (int it = 0; it < output_subset_size; ++it)
{
PyObject *elem = PyList_GetItem(output_subset_ptr, it);
if (! PyInt_Check(elem))
{
err = 1;
PyErr_SetString(PyExc_RuntimeError, "Some elements of output_subset list are not int");
}
output_subset[PyInt_AsLong(elem)] = 1;
}
}
}
self->position_of_error = -1; self->position_of_error = -1;
// create constants used to fill the var_compute_cells // create constants used to fill the var_compute_cells
PyObject * one = PyInt_FromLong(1); PyObject * one = PyInt_FromLong(1);
...@@ -833,9 +865,13 @@ CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds) ...@@ -833,9 +865,13 @@ CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds)
} }
} }
int first_updated = self->n_output_vars - self->n_updates;
for (int i = 0; i < self->n_output_vars && (!err); ++i) for (int i = 0; i < self->n_output_vars && (!err); ++i)
{ {
err = lazy_rec_eval(self, self->output_vars[i], one, zero); if (i >= first_updated || output_subset == NULL || output_subset[i] == 1)
{
err = lazy_rec_eval(self, self->output_vars[i], one, zero);
}
} }
if (!err) if (!err)
...@@ -848,7 +884,8 @@ CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds) ...@@ -848,7 +884,8 @@ CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds)
{ {
Py_ssize_t src = self->output_vars[i]; Py_ssize_t src = self->output_vars[i];
PyObject * item = PyList_GetItem(self->var_value_cells[src], 0); PyObject * item = PyList_GetItem(self->var_value_cells[src], 0);
if (self->var_computed[src] != 1) if ((output_subset == NULL || output_subset[i]) &&
self->var_computed[src] != 1)
{ {
err = 1; err = 1;
PyErr_Format(PyExc_AssertionError, PyErr_Format(PyExc_AssertionError,
...@@ -876,7 +913,7 @@ CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds) ...@@ -876,7 +913,7 @@ CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds)
} }
/* /*
Clear everything that is left and not an output. This is needed Clear everything that is left and not an output. This is needed
for lazy evaluation since the current GC algo is too conservative for lazy evaluation since the current GC algo is too conservative
with lazy graphs. with lazy graphs.
*/ */
...@@ -901,6 +938,9 @@ CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds) ...@@ -901,6 +938,9 @@ CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds)
PyList_SetItem(self->var_value_cells[i], 0, Py_None); PyList_SetItem(self->var_value_cells[i], 0, Py_None);
} }
} }
if (output_subset != NULL)
free(output_subset);
Py_DECREF(one); Py_DECREF(one);
Py_DECREF(zero); Py_DECREF(zero);
if (err) if (err)
...@@ -1014,7 +1054,7 @@ static PyTypeObject lazylinker_ext_CLazyLinkerType = { ...@@ -1014,7 +1054,7 @@ static PyTypeObject lazylinker_ext_CLazyLinkerType = {
static PyObject * get_version(PyObject *dummy, PyObject *args) static PyObject * get_version(PyObject *dummy, PyObject *args)
{ {
PyObject *result = PyFloat_FromDouble(0.21); PyObject *result = PyFloat_FromDouble(0.211);
return result; return result;
} }
......
...@@ -15,7 +15,7 @@ from theano.gof import cmodule ...@@ -15,7 +15,7 @@ from theano.gof import cmodule
_logger = logging.getLogger('theano.gof.lazylinker_c') _logger = logging.getLogger('theano.gof.lazylinker_c')
force_compile = False force_compile = False
version = 0.21 # must match constant returned in function get_version() version = 0.211 # must match constant returned in function get_version()
lazylinker_ext = None lazylinker_ext = None
...@@ -145,4 +145,4 @@ except ImportError: ...@@ -145,4 +145,4 @@ except ImportError:
release_lock() release_lock()
from lazylinker_ext.lazylinker_ext import * # noqa from lazylinker_ext.lazylinker_ext import * # noqa
assert force_compile or (version == get_version()) assert force_compile or (version == get_version()) # noqa
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论