提交 e40c1b29 authored 作者: Brandon T. Willard's avatar Brandon T. Willard 提交者: Brandon T. Willard

Documentation formatting and NumPy usage updates

上级 8b7446e0
......@@ -146,7 +146,7 @@ class OpFromGraph(Op, HasInnerGraph):
from aesara.compile.builders import OpFromGraph
x, y, z = at.scalars('xyz')
s = aesara.shared(np.random.rand(2, 2).astype(config.floatX))
s = aesara.shared(np.random.random((2, 2)).astype(config.floatX))
e = x + y * z + s
op = OpFromGraph([x, y, z], [e])
# op behaves like a normal aesara op
......
......@@ -5,6 +5,7 @@ import time
import warnings
from collections import OrderedDict
from functools import partial, reduce
from typing import TYPE_CHECKING, Callable, List, Optional, Union
import numpy as np
......@@ -18,6 +19,10 @@ from aesara.graph.op import get_test_values
from aesara.graph.type import Type
if TYPE_CHECKING:
from aesara.compile.mode import Mode
__docformat__ = "restructuredtext en"
_logger = logging.getLogger("aesara.gradient")
......@@ -684,8 +689,8 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
.. code-block:: python
x, t = aesara.tensor.fvector('x'), aesara.tensor.fvector('t')
w1 = aesara.shared(np.random.randn(3,4))
w2 = aesara.shared(np.random.randn(4,2))
w1 = aesara.shared(np.random.standard_normal((3,4)))
w2 = aesara.shared(np.random.standard_normal((4,2)))
a1 = aesara.tensor.tanh(aesara.tensor.dot(x,w1))
a2 = aesara.tensor.tanh(aesara.tensor.dot(a1,w2))
cost2 = aesara.tensor.sqr(a2 - t).sum()
......@@ -1690,17 +1695,17 @@ def mode_not_slow(mode):
def verify_grad(
fun,
pt,
n_tests=2,
rng=None,
eps=None,
out_type=None,
abs_tol=None,
rel_tol=None,
mode=None,
cast_to_output_type=False,
no_debug_ref=True,
fun: Callable,
pt: List[np.ndarray],
n_tests: int = 2,
rng: Optional[Union[np.random.Generator, np.random.RandomState]] = None,
eps: Optional[float] = None,
out_type: Optional[str] = None,
abs_tol: Optional[float] = None,
rel_tol: Optional[float] = None,
mode: Optional[Union["Mode", str]] = None,
cast_to_output_type: bool = False,
no_debug_ref: bool = True,
):
"""Test a gradient by Finite Difference Method. Raise error on failure.
......@@ -1713,47 +1718,47 @@ def verify_grad(
--------
>>> verify_grad(aesara.tensor.tanh,
... (np.asarray([[2, 3, 4], [-1, 3.3, 9.9]]),),
... rng=np.random)
... rng=np.random.default_rng(23098))
Parameters
----------
fun : a Python function
fun
`fun` takes Aesara variables as inputs, and returns an Aesara variable.
For instance, an Op instance with a single output.
pt : list of numpy.ndarrays
For instance, an `Op` instance with a single output.
pt
Input values, points where the gradient is estimated.
These arrays must be either float16, float32, or float64 arrays.
n_tests : int
Number of times to run the test
rng : numpy.random.RandomState
n_tests
Number o to run the test.
rng
Random number generator used to sample the output random projection `u`,
we test gradient of sum(u * fun) at `pt`
eps : float, optional
we test gradient of ``sum(u * fun)`` at `pt`.
eps
Step size used in the Finite Difference Method (Default
None is type-dependent).
Raising the value of eps can raise or lower the absolute
``None`` is type-dependent).
Raising the value of `eps` can raise or lower the absolute
and relative errors of the verification depending on the
Op. Raising eps does not lower the verification quality for
`Op`. Raising `eps` does not lower the verification quality for
linear operations. It is better to raise `eps` than raising
`abs_tol` or `rel_tol`.
out_type : string
Dtype of output, if complex (i.e., 'complex32' or 'complex64')
abs_tol : float
out_type
Dtype of output, if complex (i.e., ``'complex32'`` or ``'complex64'``)
abs_tol
Absolute tolerance used as threshold for gradient comparison
rel_tol : float
rel_tol
Relative tolerance used as threshold for gradient comparison
cast_to_output_type : bool
If the output is float32 and cast_to_output_type is True, cast
the random projection to float32. Otherwise it is float64.
cast_to_output_type
If the output is float32 and `cast_to_output_type` is ``True``, cast
the random projection to float32; otherwise, it is float64.
float16 is not handled here.
no_debug_ref : bool
Don't use DebugMode for the numerical gradient function.
no_debug_ref
Don't use `DebugMode` for the numerical gradient function.
Notes
-----
This function does not support multiple outputs. In
tests/scan/test_basic.py there is an experimental `verify_grad` that covers
that case as well by using random projections.
This function does not support multiple outputs. In `tests.scan.test_basic`
there is an experimental `verify_grad` that covers that case as well by
using random projections.
"""
from aesara.compile.function import function
......
......@@ -404,13 +404,14 @@ You can try the new :class:`Op` as follows:
.. testcode:: example
import numpy as np
import aesara
x = aesara.tensor.matrix()
f = aesara.function([x], DoubleOp1()(x))
import numpy
inp = numpy.random.rand(5, 4)
inp = np.random.random_sample((5, 4))
out = f(inp)
assert numpy.allclose(inp * 2, out)
assert np.allclose(inp * 2, out)
print(inp)
print(out)
......@@ -435,13 +436,14 @@ You can try the new :class:`Op` as follows:
.. testcode:: example
import numpy as np
import aesara
x = aesara.tensor.matrix()
f = aesara.function([x], DoubleOp2()(x))
import numpy
inp = numpy.random.rand(5, 4)
inp = np.random.random_sample((5, 4))
out = f(inp)
assert numpy.allclose(inp * 2, out)
assert np.allclose(inp * 2, out)
print(inp)
print(out)
......@@ -530,10 +532,9 @@ We can test this by running the following segment:
f = aesara.function([x], mult4plus5op(x))
g = aesara.function([x], mult2plus3op(x))
import numpy
inp = numpy.random.rand(5, 4).astype(numpy.float32)
assert numpy.allclose(4 * inp + 5, f(inp))
assert numpy.allclose(2 * inp + 3, g(inp))
inp = np.random.random_sample((5, 4)).astype(np.float32)
assert np.allclose(4 * inp + 5, f(inp))
assert np.allclose(2 * inp + 3, g(inp))
How To Test it
......@@ -553,11 +554,11 @@ returns the right answer. If you detect an error, you must raise an
.. testcode:: tests
import numpy
import numpy as np
import aesara
from tests import unittest_tools as utt
from aesara.configdefaults import config
class TestDouble(utt.InferShapeTester):
def setup_method(self):
super().setup_method()
......@@ -565,9 +566,12 @@ returns the right answer. If you detect an error, you must raise an
self.op = DoubleOp()
def test_basic(self):
rng = np.random.default_rng(utt.fetch_seed())
x = aesara.tensor.matrix()
f = aesara.function([x], self.op(x))
inp = numpy.asarray(numpy.random.rand(5, 4), dtype=config.floatX)
inp = np.asarray(rng.random((5, 4)), dtype=aesara.config.floatX)
out = f(inp)
# Compare the result computed to the expected value.
utt.assert_allclose(inp * 2, out)
......@@ -612,20 +616,26 @@ your :class:`Op` works only with such matrices, you can disable the warning with
.. testcode:: tests
from tests import unittest_tools as utt
from aesara.configdefaults import config
from tests import unittest_tools as utt
class TestDouble(utt.InferShapeTester):
# [...] as previous tests.
def test_infer_shape(self):
rng = np.random.default_rng(utt.fetch_seed())
x = aesara.tensor.matrix()
self._compile_and_check([x], # aesara.function inputs
[self.op(x)], # aesara.function outputs
# Always use not square matrix!
# inputs data
[numpy.asarray(numpy.random.rand(5, 4),
dtype=config.floatX)],
# Op that should be removed from the graph.
self.op_class)
self._compile_and_check(
[x], # aesara.function inputs
[self.op(x)], # aesara.function outputs
# Always use not square matrix!
# inputs data
[np.asarray(rng.random((5, 4)), dtype=config.floatX)],
# Op that should be removed from the graph.
self.op_class,
)
Testing the gradient
^^^^^^^^^^^^^^^^^^^^
......@@ -642,8 +652,11 @@ the multiplication by 2).
.. testcode:: tests
def test_grad(self):
tests.unittest_tools.verify_grad(self.op,
[numpy.random.rand(5, 7, 2)])
rng = np.random.default_rng(utt.fetch_seed())
tests.unittest_tools.verify_grad(
self.op,
[rng.random((5, 7, 2))]
)
Testing the Rop
^^^^^^^^^^^^^^^
......@@ -778,40 +791,34 @@ signature:
.. testcode:: asop
import aesara
import numpy
import aesara.tensor as at
import numpy as np
from aesara import function
from aesara.compile.ops import as_op
def infer_shape_numpy_dot(fgraph, node, input_shapes):
ashp, bshp = input_shapes
return [ashp[:-1] + bshp[-1:]]
@as_op(itypes=[aesara.tensor.fmatrix, aesara.tensor.fmatrix],
otypes=[aesara.tensor.fmatrix], infer_shape=infer_shape_numpy_dot)
@as_op(itypes=[at.matrix, at.matrix],
otypes=[at.matrix], infer_shape=infer_shape_numpy_dot)
def numpy_dot(a, b):
return numpy.dot(a, b)
return np.dot(a, b)
You can try it as follows:
.. testcode:: asop
x = aesara.tensor.fmatrix()
y = aesara.tensor.fmatrix()
x = at.matrix()
y = at.matrix()
f = function([x, y], numpy_dot(x, y))
inp1 = numpy.random.rand(5, 4).astype('float32')
inp2 = numpy.random.rand(4, 7).astype('float32')
inp1 = np.random.random_sample((5, 4))
inp2 = np.random.random_sample((4, 7))
out = f(inp1, inp2)
Exercise
^^^^^^^^
Run the code of the ``numpy_dot`` example above.
Modify and execute to compute: ``numpy.add`` and ``numpy.subtract``.
Modify and execute the example to return two outputs: ``x + y`` and ``x - y``.
.. _Documentation:
Documentation and Coding Style
......@@ -822,7 +829,7 @@ will not be accepted.
:class:`NanGuardMode` and :class:`AllocEmpty`
---------------------------------------------
:class:`NanGuardMode` help users find where in the graph NaN appear. But
:class:`NanGuardMode` help users find where in the graph ``NaN`` appear. But
sometimes, we want some variables to not be checked. For example, in
the old GPU back-end, we used a float32 :class:`CudaNdarray` to store the MRG
random number generator state (they are integers). So if :class:`NanGuardMode`
......
......@@ -81,60 +81,60 @@ from aesara.tensor.type import dmatrix, matrix
class TestProdOp(utt.InferShapeTester):
rng = np.random.RandomState(43)
def setup_method(self):
super().setup_method()
self.op_class = ProdOp # case 1
def test_perform(self):
rng = np.random.default_rng(43)
x = matrix()
y = matrix()
f = aesara.function([x, y], self.op_class()(x, y))
x_val = np.random.rand(5, 4)
y_val = np.random.rand(5, 4)
x_val = rng.random((5, 4))
y_val = rng.random((5, 4))
out = f(x_val, y_val)
assert np.allclose(x_val * y_val, out)
def test_gradient(self):
rng = np.random.default_rng(43)
utt.verify_grad(
self.op_class(),
[np.random.rand(5, 4), np.random.rand(5, 4)],
[rng.random((5, 4)), rng.random((5, 4))],
n_tests=1,
rng=TestProdOp.rng,
)
def test_infer_shape(self):
rng = np.random.default_rng(43)
x = dmatrix()
y = dmatrix()
self._compile_and_check(
[x, y],
[self.op_class()(x, y)],
[np.random.rand(5, 6), np.random.rand(5, 6)],
[rng.random(5, 6), rng.random((5, 6))],
self.op_class,
)
class TestSumDiffOp(utt.InferShapeTester):
rng = np.random.RandomState(43)
def setup_method(self):
super().setup_method()
self.op_class = SumDiffOp
def test_perform(self):
rng = np.random.RandomState(43)
x = matrix()
y = matrix()
f = aesara.function([x, y], self.op_class()(x, y))
x_val = np.random.rand(5, 4)
y_val = np.random.rand(5, 4)
x_val = rng.random((5, 4))
y_val = rng.random((5, 4))
out = f(x_val, y_val)
assert np.allclose([x_val + y_val, x_val - y_val], out)
def test_gradient(self):
rng = np.random.RandomState(43)
def output_0(x, y):
return self.op_class()(x, y)[0]
......@@ -143,18 +143,20 @@ class TestSumDiffOp(utt.InferShapeTester):
utt.verify_grad(
output_0,
[np.random.rand(5, 4), np.random.rand(5, 4)],
[rng.random((5, 4)), rng.random((5, 4))],
n_tests=1,
rng=TestSumDiffOp.rng,
)
utt.verify_grad(
output_1,
[np.random.rand(5, 4), np.random.rand(5, 4)],
[rng.random((5, 4)), rng.random((5, 4))],
n_tests=1,
rng=TestSumDiffOp.rng,
)
def test_infer_shape(self):
rng = np.random.RandomState(43)
x = dmatrix()
y = dmatrix()
......@@ -163,7 +165,7 @@ class TestSumDiffOp(utt.InferShapeTester):
self._compile_and_check(
[x, y],
self.op_class()(x, y),
[np.random.rand(5, 6), np.random.rand(5, 6)],
[rng.random((5, 6)), rng.random((5, 6))],
self.op_class,
)
......
......@@ -97,12 +97,23 @@ Example:
.. code-block:: python
import numpy as np
import aesara.tensor as at
def test_dot_validity():
a = at.dmatrix('a')
b = at.dmatrix('b')
c = at.dot(a, b)
f = aesara.function([a, b], [c])
assert np.array_equal(f(self.avals, self.bvals), numpy.dot(self.avals, self.bvals))
c_fn = aesara.function([a, b], [c])
avals = ...
bvals = ...
res = c_fn(avals, bvals)
exp_res = np.dot(self.avals, self.bvals)
assert np.array_equal(res, exp_res)
Creating an :class:`Op` Unit Test
......@@ -117,16 +128,16 @@ unit tests for Aesara :class:`Op`\s.
Validating the Gradient
-----------------------
The :func:`verify_grad` function can be used to validate that the :meth:`Op.grad`
The :func:`aesara.gradient.verify_grad` function can be used to validate that the :meth:`Op.grad`
method of your :class:`Op` is properly implemented. :func:`verify_grad` is based
on the Finite Difference Method where the derivative of function ``f``
at point ``x`` is approximated as:
on the Finite Difference Method where the derivative of function :math:`f`
at point :math:`x` is approximated as:
.. math::
\frac{\partial{f}}{\partial{x}} = lim_{\Delta \rightarrow 0} \frac {f(x+\Delta) - f(x-\Delta)} {2\Delta}
``verify_grad`` performs the following steps:
:func:`verify_grad` performs the following steps:
* approximates the gradient numerically using the Finite Difference Method
......@@ -142,7 +153,7 @@ Here is the prototype for the :func:`verify_grad` function.
def verify_grad(fun, pt, n_tests=2, rng=None, eps=1.0e-7, abs_tol=0.0001, rel_tol=0.0001):
:func:`verify_grad` raises an ``Exception`` if the difference between the analytic gradient and
:func:`verify_grad` raises an :class:`Exception` if the difference between the analytic gradient and
numerical gradient (computed through the Finite Difference Method) of a random
projection of the fun's output to a scalar exceeds both the given absolute and
relative tolerances.
......@@ -152,15 +163,15 @@ The parameters are as follows:
* ``fun``: a Python function that takes Aesara variables as inputs,
and returns an Aesara variable.
For instance, an :class:`Op` instance with a single output is such a function.
It can also be a Python function that calls an op with some of its
It can also be a Python function that calls an :class:`Op` with some of its
inputs being fixed to specific values, or that combine multiple :class:`Op`\s.
* ``pt``: the list of numpy.ndarrays to use as input values
* ``pt``: the list of `np.ndarrays` to use as input values
* ``n_tests``: number of times to run the test
* ``rng``: random number generator used to generate a random vector u,
we check the gradient of sum(u*fn) at pt
* ``rng``: random number generator used to generate a random vector `u`,
we check the gradient of ``sum(u*fn)`` at ``pt``
* ``eps``: stepsize used in the Finite Difference Method
......@@ -176,12 +187,12 @@ symbolic variable:
def test_verify_exprgrad():
def fun(x,y,z):
return (x + tensor.cos(y)) / (4 * z)**2
return (x + at.cos(y)) / (4 * z)**2
x_val = numpy.asarray([[1], [1.1], [1.2]])
y_val = numpy.asarray([0.1, 0.2])
z_val = numpy.asarray(2)
rng = numpy.random.RandomState(42)
x_val = np.asarray([[1], [1.1], [1.2]])
y_val = np.asarray([0.1, 0.2])
z_val = np.asarray(2)
rng = np.random.default_rng(42)
aesara.gradient.verify_grad(fun, [x_val, y_val, z_val], rng=rng)
......@@ -190,11 +201,13 @@ Here is an example showing how to use :func:`verify_grad` on an :class:`Op` inst
.. testcode::
def test_flatten_outdimNone():
# Testing gradient w.r.t. all inputs of an op (in this example the op
# being used is Flatten(), which takes a single input).
a_val = numpy.asarray([[0,1,2],[3,4,5]], dtype='float64')
rng = numpy.random.RandomState(42)
aesara.gradient.verify_grad(tensor.Flatten(), [a_val], rng=rng)
"""
Testing gradient w.r.t. all inputs of an `Op` (in this example the `Op`
being used is `Flatten`, which takes a single input).
"""
a_val = np.asarray([[0,1,2],[3,4,5]], dtype='float64')
rng = np.random.default_rng(42)
aesara.gradient.verify_grad(at.Flatten(), [a_val], rng=rng)
Here is another example, showing how to verify the gradient w.r.t. a subset of
an :class:`Op`'s inputs. This is useful in particular when the gradient w.r.t. some of
......@@ -204,29 +217,30 @@ which would cause :func:`verify_grad` to crash.
.. testcode::
def test_crossentropy_softmax_grad():
op = tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias
op = at.nnet.crossentropy_softmax_argmax_1hot_with_bias
def op_with_fixed_y_idx(x, b):
# Input `y_idx` of this Op takes integer values, so we fix them
# Input `y_idx` of this `Op` takes integer values, so we fix them
# to some constant array.
# Although this op has multiple outputs, we can return only one.
# Although this `Op` has multiple outputs, we can return only one.
# Here, we return the first output only.
return op(x, b, y_idx=numpy.asarray([0, 2]))[0]
return op(x, b, y_idx=np.asarray([0, 2]))[0]
x_val = numpy.asarray([[-1, 0, 1], [3, 2, 1]], dtype='float64')
b_val = numpy.asarray([1, 2, 3], dtype='float64')
rng = numpy.random.RandomState(42)
x_val = np.asarray([[-1, 0, 1], [3, 2, 1]], dtype='float64')
b_val = np.asarray([1, 2, 3], dtype='float64')
rng = np.random.default_rng(42)
aesara.gradient.verify_grad(op_with_fixed_y_idx, [x_val, b_val], rng=rng)
.. note::
Although ``verify_grad`` is defined in ``aesara.tensor.basic``, unittests
should use the version of ``verify_grad`` defined in ``tests.unittest_tools``.
Although :func:`verify_grad` is defined in :mod:`aesara.gradient`, unittests
should use the version of :func:`verify_grad` defined in :mod:`tests.unittest_tools`.
This is simply a wrapper function which takes care of seeding the random
number generator appropriately before calling ``aesara.gradient.verify_grad``
number generator appropriately before calling :func:`aesara.gradient.verify_grad`
makeTester and makeBroadcastTester
==================================
:func:`makeTester` and :func:`makeBroadcastTester`
==================================================
Most :class:`Op` unittests perform the same function. All such tests must
verify that the :class:`Op` generates the proper output, that the gradient is
......@@ -244,21 +258,23 @@ product :class:`Op`:
from tests.tensor.utils import makeTester
rng = np.random.default_rng(23098)
TestDot = makeTester(
name="DotTester",
op=np.dot,
expected=lambda x, y: numpy.dot(x, y),
expected=lambda x, y: np.dot(x, y),
checks={},
good=dict(
correct1=(rng.rand(5, 7), rng.rand(7, 5)),
correct2=(rng.rand(5, 7), rng.rand(7, 9)),
correct3=(rng.rand(5, 7), rng.rand(7)),
correct1=(rng.random((5, 7)), rng.random((7, 5))),
correct2=(rng.random((5, 7)), rng.random((7, 9))),
correct3=(rng.random((5, 7)), rng.random((7,))),
),
bad_build=dict(),
bad_runtime=dict(
bad1=(rng.rand(5, 7), rng.rand(5, 7)), bad2=(rng.rand(5, 7), rng.rand(8, 3))
bad1=(rng.random((5, 7)), rng.random((5, 7))),
bad2=(rng.random((5, 7)), rng.random((8, 3)))
),
grad=dict(),
)
......
......@@ -14,37 +14,36 @@ Guide
=====
The NanGuardMode aims to prevent the model from outputting NaNs or Infs. It has
a number of self-checks, which can help to find out which apply node is
generating those incorrect outputs. It provides automatic detection of 3 types
The :class:`NanGuardMode` aims to prevent the model from outputting NaNs or Infs. It has
a number of self-checks, which can help to find out which :class:`Apply` node is
generating those incorrect outputs. It provides automatic detection of three types
of abnormal values: NaNs, Infs, and abnormally big values.
NanGuardMode can be used as follows:
`NanGuardMode` can be used as follows:
.. testcode::
import numpy
import numpy as np
import aesara
import aesara.tensor as at
from aesara.compile.nanguardmode import NanGuardMode
x = at.matrix()
w = aesara.shared(numpy.random.randn(5, 7).astype(aesara.config.floatX))
w = aesara.shared(np.random.standard_normal((5, 7)).astype(aesara.config.floatX))
y = at.dot(x, w)
fun = aesara.function(
[x], y,
mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
)
While using the aesara function ``fun``, it will monitor the values of each
While using the Aesara function ``fun``, it will monitor the values of each
input and output variable of each node. When abnormal values are
detected, it raises an error to indicate which node yields the NaNs. For
example, if we pass the following values to ``fun``:
.. testcode::
infa = numpy.tile(
(numpy.asarray(100.) ** 1000000).astype(aesara.config.floatX), (3, 5))
infa = np.tile((np.asarray(100.) ** 1000000).astype(aesara.config.floatX), (3, 5))
fun(infa)
.. testoutput::
......@@ -55,17 +54,17 @@ example, if we pass the following values to ``fun``:
...
AssertionError: ...
It will raise an AssertionError indicating that Inf value is detected while
It will raise an `AssertionError` indicating that Inf value is detected while
executing the function.
You can also set the three parameters in ``NanGuardMode()`` to indicate which
You can also set the three parameters in `NanGuardMode` to indicate which
kind of abnormal values to monitor. ``nan_is_error`` and ``inf_is_error`` has
no default values, so they need to be set explicitly, but ``big_is_error`` is
set to be ``True`` by default.
.. note::
NanGuardMode significantly slows down computations; only
`NanGuardMode` significantly slows down computations; only
enable as needed.
Reference
......
......@@ -797,8 +797,7 @@ import ``aesara`` and print the config variable, as in:
Aesara will execute the graph using constants and/or shared variables
provided by the user. Purely symbolic variables (e.g. ``x =
aesara.tensor.dmatrix()``) can be augmented with test values, by writing to
their ``tag.test_value`` attribute (e.g. ``x.tag.test_value =
numpy.random.rand(5, 4)``).
their ``.tag.test_value`` attributes (e.g. ``x.tag.test_value = np.ones((5, 4))``).
When not ``'off'``, the value of this option dictates what happens when
an :class:`Op`'s inputs do not provide appropriate test values:
......
......@@ -8,29 +8,32 @@ Basic Tensor Functionality
.. testsetup::
import numpy as np
import aesara
import aesara.tensor as at
from aesara.tensor.type import scalar, iscalar, TensorType, dmatrix, ivector, fmatrix
from aesara.tensor import set_subtensor, inc_subtensor, batched_dot
from aesara import shared
import numpy
import numpy as np
Aesara supports any kind of Python object, but its focus is support for
symbolic matrix expressions. When you type,
Aesara supports symbolic tensor expressions. When you type,
>>> import aesara.tensor as at
>>> x = at.fmatrix()
the ``x`` is a :class:`TensorVariable` instance.
The ``at.fmatrix`` object itself is an instance of :class:`TensorType`.
Aesara knows what type of variable ``x`` is because ``x.type``
points back to ``at.fmatrix``.
This chapter explains the various ways of creating tensor variables,
This section explains the various ways in which a tensor variable can be created,
the attributes and methods of :class:`TensorVariable` and :class:`TensorType`,
and various basic symbolic math and arithmetic that Aesara supports for
tensor variables.
In general, Aesara's API tries to mirror NumPy's, so, in most cases, it's safe
to assume that the basic NumPy array functions and methods will be available.
.. _libdoc_tensor_creation:
Creation
......@@ -39,63 +42,65 @@ Creation
Aesara provides a list of predefined tensor types that can be used
to create a tensor variables. Variables can be named to facilitate debugging,
and all of these constructors accept an optional ``name`` argument.
For example, the following each produce a TensorVariable instance that stands
for a 0-dimensional ndarray of integers with the name ``'myvar'``:
For example, the following each produce a `TensorVariable` instance that stands
for a 0-dimensional `ndarray` of integers with the name ``'myvar'``:
>>> x = scalar('myvar', dtype='int32')
>>> x = iscalar('myvar')
>>> x = at.scalar('myvar', dtype='int32')
>>> x = at.iscalar('myvar')
>>> x = at.tensor(dtype='int32', shape=(), name='myvar')
>>> from aesara.tensor.type import TensorType
>>> x = TensorType(dtype='int32', shape=())('myvar')
Constructors with optional dtype
----------------------------------------
--------------------------------
These are the simplest and often-preferred methods for creating symbolic
variables in your code. By default, they produce floating-point variables
(with dtype determined by config.floatX, see :attr:`floatX`) so if you use
(with dtype determined by `aesara.config.floatX`) so if you use
these constructors it is easy to switch your code between different levels of
floating-point precision.
.. function:: scalar(name=None, dtype=config.floatX)
Return a Variable for a 0-dimensional ndarray
Return a `Variable` for a 0-dimensional `ndarray`
.. function:: vector(name=None, dtype=config.floatX)
Return a Variable for a 1-dimensional ndarray
Return a `Variable` for a 1-dimensional `ndarray`
.. function:: row(name=None, dtype=config.floatX)
Return a Variable for a 2-dimensional ndarray
Return a `Variable` for a 2-dimensional `ndarray`
in which the number of rows is guaranteed to be 1.
.. function:: col(name=None, dtype=config.floatX)
Return a Variable for a 2-dimensional ndarray
Return a `Variable` for a 2-dimensional `ndarray`
in which the number of columns is guaranteed to be 1.
.. function:: matrix(name=None, dtype=config.floatX)
Return a Variable for a 2-dimensional ndarray
Return a `Variable` for a 2-dimensional `ndarray`
.. function:: tensor3(name=None, dtype=config.floatX)
Return a Variable for a 3-dimensional ndarray
Return a `Variable` for a 3-dimensional `ndarray`
.. function:: tensor4(name=None, dtype=config.floatX)
Return a Variable for a 4-dimensional ndarray
Return a `Variable` for a 4-dimensional `ndarray`
.. function:: tensor5(name=None, dtype=config.floatX)
Return a Variable for a 5-dimensional ndarray
Return a `Variable` for a 5-dimensional `ndarray`
.. function:: tensor6(name=None, dtype=config.floatX)
Return a Variable for a 6-dimensional ndarray
Return a `Variable` for a 6-dimensional `ndarray`
.. function:: tensor7(name=None, dtype=config.floatX)
Return a Variable for a 7-dimensional ndarray
Return a `Variable` for a 7-dimensional `ndarray`
.. #COMMENT
Each of the types described above can be constructed by two methods:
......@@ -109,16 +114,14 @@ floating-point precision.
All Fully-Typed Constructors
----------------------------
The following TensorType instances are provided in the aesara.tensor module.
The following `TensorType` instances are provided in the `aesara.tensor` module.
They are all callable, and accept an optional ``name`` argument. So for example:
.. testcode:: constructors
from aesara.tensor import *
x = dmatrix() # creates one Variable with no name
x = dmatrix('x') # creates one Variable with name 'x'
xyz = dmatrix('xyz') # creates one Variable with name 'xyz'
x = at.dmatrix() # creates one Variable with no name
x = at.dmatrix('x') # creates one Variable with name 'x'
xyz = at.dmatrix('xyz') # creates one Variable with name 'xyz'
.. #COMMENT
table generated by
......@@ -210,7 +213,7 @@ ztensor7 complex128 7 (?,?,?,?,?,?,?) (False,) * 7
============ =========== ==== ================ ===================================
Plural Constructors
--------------------------
-------------------
There are several constructors that can produce multiple variables at once.
These are not frequently used in practice, but often used in tutorial examples to save space!
......@@ -237,16 +240,16 @@ These are not frequently used in practice, but often used in tutorial examples t
Each of these plural constructors accepts
an integer or several strings. If an integer is provided, the method
will return that many Variables and if strings are provided, it will
create one Variable for each string, using the string as the Variable's
will return that many `Variables` and if strings are provided, it will
create one `Variable` for each string, using the string as the `Variable`'s
name. For example:
.. testcode:: constructors
from aesara.tensor import *
x, y, z = dmatrices(3) # creates three matrix Variables with no names
x, y, z = dmatrices('x', 'y', 'z') # creates three matrix Variables named 'x', 'y' and 'z'
# Creates three matrix `Variable`s with no names
x, y, z = at.dmatrices(3)
# Creates three matrix `Variables` named 'x', 'y' and 'z'
x, y, z = at.dmatrices('x', 'y', 'z')
Custom tensor types
......@@ -258,110 +261,121 @@ your own :class:`TensorType` instance. You create such an instance by passing
the dtype and broadcasting pattern to the constructor. For example, you
can create your own 8-dimensional tensor type
>>> dtensor8 = TensorType('float64', (False,)*8)
>>> dtensor8 = TensorType(dtype='float64', shape=(None,)*8)
>>> x = dtensor8()
>>> z = dtensor8('z')
You can also redefine some of the provided types and they will interact
correctly:
>>> my_dmatrix = TensorType('float64', (False,)*2)
>>> x = my_dmatrix() # allocate a matrix variable
>>> my_dmatrix = TensorType('float64', shape=(None,)*2)
>>> x = my_dmatrix() # allocate a matrix variable
>>> my_dmatrix == dmatrix
True
See :class:`TensorType` for more information about creating new types of
Tensor.
tensors.
Converting from Python Objects
-------------------------------
Another way of creating a TensorVariable (a TensorSharedVariable to be
precise) is by calling :func:`shared()`
Another way of creating a `TensorVariable` (a `TensorSharedVariable` to be
precise) is by calling :func:`aesara.shared`
.. testcode::
x = shared(numpy.random.randn(3,4))
x = aesara.shared(np.random.standard_normal((3, 4)))
This will return a :term:`shared variable <shared variable>` whose ``.value`` is
a numpy ndarray. The number of dimensions and dtype of the Variable are
inferred from the ndarray argument. The argument to `shared` *will not be
a NumPy `ndarray`. The number of dimensions and dtype of the `Variable` are
inferred from the `ndarray` argument. The argument to `shared` *will not be
copied*, and subsequent changes will be reflected in ``x.value``.
For additional information, see the :func:`shared() <shared.shared>` documentation.
.. _libdoc_tensor_autocasting:
Finally, when you use a numpy ndarray or a Python number together with
Finally, when you use a NumPy `ndarray` or a Python number together with
:class:`TensorVariable` instances in arithmetic expressions, the result is a
:class:`TensorVariable`. What happens to the ndarray or the number?
Aesara requires that the inputs to all expressions be Variable instances, so
:class:`TensorVariable`. What happens to the `ndarray` or the number?
Aesara requires that the inputs to all expressions be `Variable` instances, so
Aesara automatically wraps them in a :class:`TensorConstant`.
.. note::
Aesara makes a copy of any ndarray that you use in an expression, so
subsequent
changes to that ndarray will not have any effect on the Aesara expression.
Aesara makes a copy of any `ndarray` that is used in an expression, so
subsequent changes to that `ndarray` will not have any effect on the Aesara
expression in which they're contained.
For numpy ndarrays the dtype is given, but the broadcastable pattern must be
inferred. The TensorConstant is given a type with a matching dtype,
and a broadcastable pattern with a ``True`` for every shape dimension that is 1.
For NumPy `ndarrays` the dtype is given, but the static shape/broadcastable pattern must be
inferred. The `TensorConstant` is given a type with a matching dtype,
and a static shape/broadcastable pattern with a ``1``\/``True`` for every shape
dimension that is one and ``None``\/``False`` for every dimension with an unknown
shape.
For python numbers, the broadcastable pattern is ``()`` but the dtype must be
For Python numbers, the static shape/broadcastable pattern is ``()`` but the dtype must be
inferred. Python integers are stored in the smallest dtype that can hold
them, so small constants like ``1`` are stored in a ``bscalar``.
Likewise, Python floats are stored in an fscalar if fscalar suffices to hold
them perfectly, but a dscalar otherwise.
them, so small constants like ``1`` are stored in a `bscalar`.
Likewise, Python floats are stored in an `fscalar` if `fscalar` suffices to hold
them perfectly, but a `dscalar` otherwise.
.. note::
When config.floatX==float32 (see :mod:`config`), then Python floats
When ``config.floatX == float32`` (see :mod:`config`), then Python floats
are stored instead as single-precision floats.
For fine control of this rounding policy, see
aesara.tensor.basic.autocast_float.
`aesara.tensor.basic.autocast_float`.
.. function:: as_tensor_variable(x, name=None, ndim=None)
Turn an argument `x` into a TensorVariable or TensorConstant.
Turn an argument `x` into a `TensorVariable` or `TensorConstant`.
Many tensor Ops run their arguments through this function as
pre-processing. It passes through TensorVariable instances, and tries to
wrap other objects into TensorConstant.
Many tensor `Op`\s run their arguments through this function as
pre-processing. It passes through `TensorVariable` instances, and tries to
wrap other objects into `TensorConstant`.
When `x` is a Python number, the dtype is inferred as described above.
When `x` is a `list` or `tuple` it is passed through numpy.asarray
When `x` is a `list` or `tuple` it is passed through `np.asarray`
If the `ndim` argument is not None, it must be an integer and the output
If the `ndim` argument is not ``None``, it must be an integer and the output
will be broadcasted if necessary in order to have this many dimensions.
:rtype: :class:`TensorVariable` or :class:`TensorConstant`
TensorType and TensorVariable
=============================
`TensorType` and `TensorVariable`
=================================
.. class:: TensorType(Type)
The Type class used to mark Variables that stand for `numpy.ndarray`
values (`numpy.memmap`, which is a subclass of `numpy.ndarray`, is also allowed).
Recalling to the tutorial, the purple box in
The `Type` class used to mark Variables that stand for `numpy.ndarray`
values. `numpy.memmap`, which is a subclass of `numpy.ndarray`, is also
allowed. Recalling to the tutorial, the purple box in
:ref:`the tutorial's graph-structure figure <tutorial-graphfigure>` is an instance of this class.
.. attribute:: shape
A tuple of ``None`` and integer values representing the static shape associated with this
`Type`. ``None`` values represent unknown/non-fixed shape values.
.. note::
Broadcastable tuples/values are an old Theano construct that are
being phased-out in Aesara.
.. attribute:: broadcastable
A tuple of True/False values, one for each dimension. True in
position 'i' indicates that at evaluation-time, the ndarray will have
size 1 in that 'i'-th dimension. Such a dimension is called a
A tuple of ``True``\/``False`` values, one for each dimension. ``True`` in
position ``i`` indicates that at evaluation-time, the `ndarray` will have
size one in that ``i``-th dimension. Such a dimension is called a
*broadcastable dimension* (see :ref:`tutbroadcasting`).
The broadcastable pattern indicates both the number of dimensions and
whether a particular dimension must have length 1.
whether a particular dimension must have length one.
Here is a table mapping some `broadcastable` patterns to what they
Here is a table mapping some broadcastable patterns to what they
mean:
===================== =================================
......@@ -380,19 +394,18 @@ TensorType and TensorVariable
[False, False, False] A MxNxP tensor (pattern of a + b)
===================== =================================
For dimensions in which broadcasting is False, the length of this
dimension can be 1 or more. For dimensions in which broadcasting is True,
the length of this dimension must be 1.
For dimensions in which broadcasting is ``False``, the length of this
dimension can be one or more. For dimensions in which broadcasting is ``True``,
the length of this dimension must be one.
When two arguments to an element-wise operation (like addition or
subtraction) have a different
number of dimensions, the broadcastable
subtraction) have a different number of dimensions, the broadcastable
pattern is *expanded to the left*, by padding with ``True``. For example,
a vector's pattern, ``[False]``, could be expanded to ``[True, False]``, and
would behave like a row (1xN matrix). In the same way, a matrix (``[False,
False]``) would behave like a 1xNxP tensor (``[True, False, False]``).
If we wanted to create a type representing a matrix that would
If we wanted to create a `TensorType` representing a matrix that would
broadcast over the middle dimension of a 3-dimensional tensor when
adding them together, we would define it like this:
......@@ -400,19 +413,18 @@ TensorType and TensorVariable
.. attribute:: ndim
The number of dimensions that a Variable's value will have at
The number of dimensions that a `Variable`'s value will have at
evaluation-time. This must be known when we are building the
expression graph.
.. attribute:: dtype
A string indicating
the numerical type of the ndarray for which a Variable of this Type
is standing.
A string indicating the numerical type of the `ndarray` for which a
`Variable` of this `Type` represents.
.. _dtype_list:
The dtype attribute of a TensorType instance can be any of the
The :attr:`dtype` attribute of a `TensorType` instance can be any of the
following strings.
================= =================== =================
......@@ -434,31 +446,31 @@ TensorType and TensorVariable
.. method:: __init__(self, dtype, broadcastable)
If you wish to use a type of tensor which is not already available
(for example, a 5D tensor) you can build an appropriate type by instantiating
If you wish to use a `Type` that is not already available (for example,
a 5D tensor), you can build an appropriate `Type` by instantiating
:class:`TensorType`.
TensorVariable
`TensorVariable`
----------------
.. class:: TensorVariable(Variable, _tensor_py_operators)
The result of symbolic operations typically have this type.
A `Variable` type that represents symbolic tensors.
See :class:`_tensor_py_operators` for most of the attributes and methods
you'll want to call.
.. class:: TensorConstant(Variable, _tensor_py_operators)
Python and numpy numbers are wrapped in this type.
Python and NumPy numbers are wrapped in this type.
See :class:`_tensor_py_operators` for most of the attributes and methods
you'll want to call.
.. class:: TensorSharedVariable(Variable, _tensor_py_operators)
This type is returned by :func:`shared` when the value to share is a numpy
This type is returned by :func:`shared` when the value to share is a NumPy
ndarray.
See :class:`_tensor_py_operators` for most of the attributes and methods
......@@ -469,7 +481,7 @@ TensorVariable
:members:
This mix-in class adds convenient attributes, methods, and support
to TensorVariable, TensorConstant and TensorSharedVariable for
to `TensorVariable`, `TensorConstant` and `TensorSharedVariable` for
Python operators (see :ref:`tensor_operator_support`).
.. attribute:: type
......@@ -493,7 +505,7 @@ TensorVariable
:noindex:
Returns a view of this tensor that has been reshaped as in
numpy.reshape. If the shape is a Variable argument, then you might
`numpy.reshape`. If the shape is a `Variable` argument, then you might
need to use the optional `ndim` parameter to declare how many elements
the shape has, and therefore how many dimensions the reshaped Variable
will have.
......@@ -504,32 +516,32 @@ TensorVariable
:noindex:
Returns a view of this tensor with permuted dimensions. Typically the
pattern will include the integers 0, 1, ... ndim-1, and any number of
'x' characters in dimensions where this tensor should be broadcasted.
pattern will include the integers ``0, 1, ... ndim-1``, and any number of
``'x'`` characters in dimensions where this tensor should be broadcasted.
A few examples of patterns and their effect:
* ('x') -> make a 0d (scalar) into a 1d vector
* (0, 1) -> identity for 2d vectors
* (1, 0) -> inverts the first and second dimensions
* ('x', 0) -> make a row out of a 1d vector (N to 1xN)
* (0, 'x') -> make a column out of a 1d vector (N to Nx1)
* (2, 0, 1) -> AxBxC to CxAxB
* (0, 'x', 1) -> AxB to Ax1xB
* (1, 'x', 0) -> AxB to Bx1xA
* (1,) -> This remove dimensions 0. It must be a broadcastable dimension (1xA to A)
* ``('x',)``: make a 0d (scalar) into a 1d vector
* ``(0, 1)``: identity for 2d vectors
* ``(1, 0)``: inverts the first and second dimensions
* ``('x', 0)``: make a row out of a 1d vector (N to 1xN)
* ``(0, 'x')``: make a column out of a 1d vector (N to Nx1)
* ``(2, 0, 1)``: AxBxC to CxAxB
* ``(0, 'x', 1)``: AxB to Ax1xB
* ``(1, 'x', 0)``: AxB to Bx1xA
* ``(1,)``: This removes the dimension at index 0. It must be a broadcastable dimension.
.. method:: flatten(ndim=1)
Returns a view of this tensor with `ndim` dimensions, whose shape for the first
`ndim-1` dimensions will be the same as `self`, and shape in the
remaining dimension will be expanded to fit in all the data from self.
``ndim-1`` dimensions will be the same as ``self``, and shape in the
remaining dimension will be expanded to fit in all the data from ``self``.
See :func:`flatten`.
.. method:: ravel()
return self.flatten(). For NumPy compatibility.
return `flatten`. For NumPy compatibility.
.. attribute:: T
......@@ -538,19 +550,16 @@ TensorVariable
>>> x = at.zmatrix()
>>> y = 3+.2j * x.T
.. note::
In numpy and in Aesara, the transpose of a vector is exactly the
same vector! Use `reshape` or `dimshuffle` to turn your vector
into a row or column matrix.
.. method:: {any,all}(axis=None, keepdims=False)
.. method:: {sum,prod,mean}(axis=None, dtype=None, keepdims=False, acc_dtype=None)
.. method:: {var,std,min,max,argmin,argmax}(axis=None, keepdims=False),
.. method:: diagonal(offset=0, axis1=0, axis2=1)
.. method:: astype(dtype)
.. method:: take(indices, axis=None, mode='raise')
.. method:: copy() Return a new symbolic variable that is a copy of the variable. Does not copy the tag.
.. method:: copy()
Return a new symbolic variable that is a copy of the variable. Does not copy the tag.
.. method:: norm(L, axis=None)
.. method:: nonzero(self, return_matrix=False)
:noindex:
......@@ -584,27 +593,27 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`.
.. function:: shape(x)
Returns an lvector representing the shape of `x`.
Returns an `lvector` representing the shape of `x`.
.. function:: reshape(x, newshape, ndim=None)
:noindex:
:type x: any TensorVariable (or compatible)
:type x: any `TensorVariable` (or compatible)
:param x: variable to be reshaped
:type newshape: lvector (or compatible)
:type newshape: `lvector` (or compatible)
:param newshape: the new shape for `x`
:param ndim: optional - the length that `newshape`'s value will have.
If this is ``None``, then `reshape()` will infer it from `newshape`.
If this is ``None``, then `reshape` will infer it from `newshape`.
:rtype: variable with x's dtype, but ndim dimensions
:rtype: variable with `x`'s dtype, but `ndim` dimensions
.. note::
This function can infer the length of a symbolic newshape in some
cases, but if it cannot and you do not provide the `ndim`, then this
function will raise an Exception.
This function can infer the length of a symbolic `newshape` value in
some cases, but if it cannot and you do not provide the `ndim`, then
this function will raise an Exception.
.. function:: shape_padleft(x, n_ones=1)
......@@ -614,7 +623,7 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`.
see the :func:`unbroadcast`.
:param x: variable to be reshaped
:type x: any TensorVariable (or compatible)
:type x: any `TensorVariable` (or compatible)
:type n_ones: int
:type n_ones: number of dimension to be added to `x`
......@@ -623,7 +632,7 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`.
.. function:: shape_padright(x, n_ones=1)
Reshape `x` by right padding the shape with `n_ones` 1s. Note that all
Reshape `x` by right padding the shape with `n_ones` ones. Note that all
this new dimension will be broadcastable. To make them non-broadcastable
see the :func:`unbroadcast`.
......@@ -636,11 +645,11 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`.
.. function:: shape_padaxis(t, axis)
Reshape `t` by inserting 1 at the dimension `axis`. Note that this new
Reshape `t` by inserting ``1`` at the dimension `axis`. Note that this new
dimension will be broadcastable. To make it non-broadcastable
see the :func:`unbroadcast`.
:type x: any TensorVariable (or compatible)
:type x: any `TensorVariable` (or compatible)
:param x: variable to be reshaped
:type axis: int
......@@ -669,7 +678,7 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`.
Similar to :func:`reshape`, but the shape is inferred from the shape of `x`.
:param x: variable to be flattened
:type x: any TensorVariable (or compatible)
:type x: any `TensorVariable` (or compatible)
:type ndim: int
:param ndim: the number of dimensions in the returned variable
......@@ -679,10 +688,10 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`.
dimensions, but with all remaining dimensions of `x` collapsed into
the last dimension.
For example, if we flatten a tensor of shape (2, 3, 4, 5) with flatten(x,
ndim=2), then we'll have the same (2-1=1) leading dimensions (2,), and the
remaining dimensions are collapsed. So the output in this example would
have shape (2, 60).
For example, if we flatten a tensor of shape ``(2, 3, 4, 5)`` with ``flatten(x,
ndim=2)``, then we'll have the same (i.e. ``2-1=1``) leading dimensions
``(2,)``, and the remaining dimensions are collapsed, so the output in this
example would have shape ``(2, 60)``.
.. function:: tile(x, reps, ndim=None)
......@@ -702,13 +711,13 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`.
<aesara.tensor.extra_ops.repeat>`
:note: Currently, `reps` must be a constant, `x.ndim` and
`len(reps)` must be equal and, if specified, `ndim` must be
``len(reps)`` must be equal and, if specified, `ndim` must be
equal to both.
.. autofunction:: roll
Creating Tensor
===============
Creating Tensors
================
.. function:: zeros_like(x, dtype=None)
......@@ -717,7 +726,7 @@ Creating Tensor
:param dtype: data-type, optional
By default, it will be x.dtype.
Returns a tensor the shape of x filled with zeros of the type of dtype.
Returns a tensor the shape of `x` filled with zeros of the type of `dtype`.
.. function:: ones_like(x)
......@@ -725,31 +734,31 @@ Creating Tensor
:param x: tensor that has the same shape as output
:param dtype: data-type, optional
By default, it will be x.dtype.
By default, it will be `x.dtype`.
Returns a tensor the shape of x filled with ones of the type of dtype.
Returns a tensor the shape of `x` filled with ones of the type of `dtype`.
.. function:: zeros(shape, dtype=None)
:param shape: a tuple/list of scalar with the shape information.
:param dtype: the dtype of the new tensor. If None, will use floatX.
:param dtype: the dtype of the new tensor. If ``None``, will use ``"floatX"``.
Returns a tensor filled with 0s of the provided shape.
Returns a tensor filled with zeros of the provided shape.
.. function:: ones(shape, dtype=None)
:param shape: a tuple/list of scalar with the shape information.
:param dtype: the dtype of the new tensor. If None, will use floatX.
:param dtype: the dtype of the new tensor. If ``None``, will use ``"floatX"``.
Returns a tensor filled with 1s of the provided shape.
Returns a tensor filled with ones of the provided shape.
.. function:: fill(a,b)
:param a: tensor that has same shape as output
:param b: aesara scalar or value with which you want to fill the output
:param b: Aesara scalar or value with which you want to fill the output
Create a matrix by filling the shape of `a` with `b`
Create a matrix by filling the shape of `a` with `b`.
.. function:: alloc(value, *shape)
......@@ -759,9 +768,9 @@ Creating Tensor
.. function:: eye(n, m=None, k=0, dtype=aesara.config.floatX)
:param n: number of rows in output (value or aesara scalar)
:param m: number of columns in output (value or aesara scalar)
:param k: Index of the diagonal: 0 refers to the main diagonal,
:param n: number of rows in output (value or Aesara scalar)
:param m: number of columns in output (value or Aesara scalar)
:param k: Index of the diagonal: ``0`` refers to the main diagonal,
a positive value refers to an upper diagonal, and a
negative value to a lower diagonal. It can be an Aesara
scalar.
......@@ -771,21 +780,21 @@ Creating Tensor
.. function:: identity_like(x)
:param x: tensor
:returns: A tensor of same shape as `x` that is filled with 0s everywhere
:returns: A tensor of same shape as `x` that is filled with zeros everywhere
except for the main diagonal, whose values are equal to one. The output
will have same dtype as `x`.
.. function:: stack(tensors, axis=0)
Stack tensors in sequence on given axis (default is 0).
Stack tensors in sequence on given axis (default is ``0``).
Take a sequence of tensors and stack them on given axis to make a single
tensor. The size in dimension `axis` of the result will be equal to the number
of tensors passed.
:param tensors: a list or a tuple of one or more tensors of the same rank.
:param axis: the axis along which the tensors will be stacked. Default value is 0.
:returns: A tensor such that rval[0] == tensors[0], rval[1] == tensors[1], etc.
:param axis: the axis along which the tensors will be stacked. Default value is ``0``.
:returns: A tensor such that ``rval[0] == tensors[0]``, ``rval[1] == tensors[1]``, etc.
Examples:
......@@ -805,7 +814,7 @@ Creating Tensor
>>> rval.shape # 3 tensors are stacked on axis 0
(3, 2, 2, 2, 2)
We can also specify different axis than default value 0
We can also specify different axis than default value ``0``:
>>> x = aesara.tensor.stack([a, b, c], axis=3)
>>> x.ndim
......@@ -834,7 +843,7 @@ Creating Tensor
tensor.
:param tensors: one or more tensors of the same rank
:returns: A tensor such that rval[0] == tensors[0], rval[1] == tensors[1], etc.
:returns: A tensor such that ``rval[0] == tensors[0]``, ``rval[1] == tensors[1]``, etc.
>>> x0 = at.scalar()
>>> x1 = at.scalar()
......@@ -906,7 +915,7 @@ Reductions
:Returns: maximum of *x* along *axis*
axis can be:
* *None* - in which case the maximum is computed along all axes (like numpy)
* *None* - in which case the maximum is computed along all axes (like NumPy)
* an *int* - computed along this axis
* a *list of ints* - computed along these axes
......@@ -919,7 +928,7 @@ Reductions
will broadcast correctly against the original tensor.
:Returns: the index of the maximum value along a given axis
if axis=None, argmax over the flattened tensor (like numpy)
if ``axis == None``, `argmax` over the flattened tensor (like NumPy)
.. function:: max_and_argmax(x, axis=None, keepdims=False)
......@@ -930,7 +939,7 @@ Reductions
will broadcast correctly against the original tensor.
:Returns: the maximum value along a given axis and its index.
if axis=None, max_and_argmax over the flattened tensor (like numpy)
if ``axis == None``, `max_and_argmax` over the flattened tensor (like NumPy)
.. function:: min(x, axis=None, keepdims=False)
......@@ -941,8 +950,8 @@ Reductions
will broadcast correctly against the original tensor.
:Returns: minimum of *x* along *axis*
axis can be:
* *None* - in which case the minimum is computed along all axes (like numpy)
`axis` can be:
* ``None`` - in which case the minimum is computed along all axes (like NumPy)
* an *int* - computed along this axis
* a *list of ints* - computed along these axes
......@@ -955,7 +964,7 @@ Reductions
will broadcast correctly against the original tensor.
:Returns: the index of the minimum value along a given axis
if axis=None, argmin over the flattened tensor (like numpy)
if ``axis == None``, `argmin` over the flattened tensor (like NumPy)
.. function:: sum(x, axis=None, dtype=None, keepdims=False, acc_dtype=None)
......@@ -987,10 +996,10 @@ Reductions
:Returns: sum of *x* along *axis*
axis can be:
* *None* - in which case the sum is computed along all axes (like numpy)
* an *int* - computed along this axis
* a *list of ints* - computed along these axes
`axis` can be:
* ``None`` - in which case the sum is computed along all axes (like NumPy)
* an int - computed along this axis
* a list of ints - computed along these axes
.. function:: prod(x, axis=None, dtype=None, keepdims=False, acc_dtype=None, no_zeros_in_input=False)
......@@ -1037,10 +1046,10 @@ Reductions
:Returns: product of every term in *x* along *axis*
axis can be:
* *None* - in which case the sum is computed along all axes (like numpy)
* an *int* - computed along this axis
* a *list of ints* - computed along these axes
`axis` can be:
* ``None`` - in which case the sum is computed along all axes (like NumPy)
* an int - computed along this axis
* a list of ints - computed along these axes
.. function:: mean(x, axis=None, dtype=None, keepdims=False, acc_dtype=None)
......@@ -1060,10 +1069,10 @@ Reductions
rules as :func:`sum()`.
:Returns: mean value of *x* along *axis*
axis can be:
* *None* - in which case the mean is computed along all axes (like numpy)
* an *int* - computed along this axis
* a *list of ints* - computed along these axes
`axis` can be:
* ``None`` - in which case the mean is computed along all axes (like NumPy)
* an int - computed along this axis
* a list of ints - computed along these axes
.. function:: var(x, axis=None, keepdims=False)
......@@ -1074,10 +1083,10 @@ Reductions
will broadcast correctly against the original tensor.
:Returns: variance of *x* along *axis*
axis can be:
* *None* - in which case the variance is computed along all axes (like numpy)
* an *int* - computed along this axis
* a *list of ints* - computed along these axes
`axis` can be:
* ``None`` - in which case the variance is computed along all axes (like NumPy)
* an int - computed along this axis
* a list of ints - computed along these axes
.. function:: std(x, axis=None, keepdims=False)
......@@ -1088,10 +1097,10 @@ Reductions
will broadcast correctly against the original tensor.
:Returns: variance of *x* along *axis*
axis can be:
* *None* - in which case the standard deviation is computed along all axes (like numpy)
* an *int* - computed along this axis
* a *list of ints* - computed along these axes
`axis` can be:
* ``None`` - in which case the standard deviation is computed along all axes (like NumPy)
* an int - computed along this axis
* a list of ints - computed along these axes
.. function:: all(x, axis=None, keepdims=False)
......@@ -1102,10 +1111,10 @@ Reductions
will broadcast correctly against the original tensor.
:Returns: bitwise and of *x* along *axis*
axis can be:
* *None* - in which case the 'bitwise and' is computed along all axes (like numpy)
* an *int* - computed along this axis
* a *list of ints* - computed along these axes
`axis` can be:
* ``None`` - in which case the 'bitwise and' is computed along all axes (like NumPy)
* an int - computed along this axis
* a list of ints - computed along these axes
.. function:: any(x, axis=None, keepdims=False)
......@@ -1116,10 +1125,10 @@ Reductions
will broadcast correctly against the original tensor.
:Returns: bitwise or of *x* along *axis*
axis can be:
* *None* - in which case the 'bitwise or' is computed along all axes (like numpy)
* an *int* - computed along this axis
* a *list of ints* - computed along these axes
`axis` can be:
* ``None`` - in which case the 'bitwise or' is computed along all axes (like NumPy)
* an int - computed along this axis
* a list of ints - computed along these axes
.. function:: ptp(x, axis = None)
......@@ -1205,30 +1214,30 @@ Casting
.. function:: cast(x, dtype)
Cast any tensor `x` to a Tensor of the same shape, but with a different
Cast any tensor `x` to a tensor of the same shape, but with a different
numerical type `dtype`.
This is not a reinterpret cast, but a coercion cast, similar to
This is not a reinterpret cast, but a coercion `cast`, similar to
``numpy.asarray(x, dtype=dtype)``.
.. testcode:: cast
import Aesara.tensor as at
import aesara.tensor as at
x = at.matrix()
x_as_int = at.cast(x, 'int32')
Attempting to casting a complex value to a real value is ambiguous and
will raise an exception. Use `real()`, `imag()`, `abs()`, or `angle()`.
will raise an exception. Use `real`, `imag`, `abs`, or `angle`.
.. function:: real(x)
Return the real (not imaginary) components of Tensor x.
For non-complex `x` this function returns x.
Return the real (not imaginary) components of tensor `x`.
For non-complex `x` this function returns `x`.
.. function:: imag(x)
Return the imaginary components of Tensor x.
For non-complex `x` this function returns zeros_like(x).
Return the imaginary components of tensor `x`.
For non-complex `x` this function returns ``zeros_like(x)``.
Comparisons
......@@ -1249,7 +1258,7 @@ The six usual equality and inequality operators share the same interface.
.. testcode:: oper
import Aesara.tensor as at
import aesara.tensor as at
x,y = at.dmatrices('x','y')
z = at.le(x,y)
......@@ -1332,8 +1341,8 @@ Condition
.. function:: switch(cond, ift, iff)
Returns a variable representing a switch between ift (iftrue) and iff (iffalse)
based on the condition cond. This is the Aesara equivalent of numpy.where.
Returns a variable representing a switch between ift (i.e. "if true") and iff (i.e. "if false")
based on the condition cond. This is the Aesara equivalent of `numpy.where`.
:Parameter: *cond* - symbolic Tensor (or compatible)
:Parameter: *ift* - symbolic Tensor (or compatible)
......@@ -1342,32 +1351,32 @@ Condition
.. testcode:: switch
import Aesara.tensor as at
import aesara.tensor as at
a,b = at.dmatrices('a','b')
x,y = at.dmatrices('x','y')
z = at.switch(at.lt(a,b), x, y)
.. function:: where(cond, ift, iff)
Alias for `switch`. where is the numpy name.
Alias for `switch`. where is the NumPy name.
.. function:: clip(x, min, max)
Return a variable representing x, but with all elements greater than
Return a variable representing `x`, but with all elements greater than
`max` clipped to `max` and all elements less than `min` clipped to `min`.
Normal broadcasting rules apply to each of `x`, `min`, and `max`.
Note that there is no warning for inputs that are the wrong way round
(`min > max`), and that results in this case may differ from ``numpy.clip``.
(`min > max`), and that results in this case may differ from `numpy.clip`.
Bit-wise
--------
The bitwise operators possess this interface:
:Parameter: *a* - symbolic Tensor of integer type.
:Parameter: *b* - symbolic Tensor of integer type.
:Parameter: *a* - symbolic tensor of integer type.
:Parameter: *b* - symbolic tensor of integer type.
.. note::
......@@ -1375,7 +1384,7 @@ The bitwise operators possess this interface:
The bit-wise not (invert) takes only one parameter.
:Return type: symbolic Tensor with corresponding dtype.
:Return type: symbolic tensor with corresponding dtype.
.. function:: and_(a, b)
......@@ -1395,25 +1404,25 @@ The bitwise operators possess this interface:
.. function:: bitwise_and(a, b)
Alias for `and_`. bitwise_and is the numpy name.
Alias for `and_`. bitwise_and is the NumPy name.
.. function:: bitwise_or(a, b)
Alias for `or_`. bitwise_or is the numpy name.
Alias for `or_`. bitwise_or is the NumPy name.
.. function:: bitwise_xor(a, b)
Alias for `xor_`. bitwise_xor is the numpy name.
Alias for `xor_`. bitwise_xor is the NumPy name.
.. function:: bitwise_not(a, b)
Alias for invert. invert is the numpy name.
Alias for invert. invert is the NumPy name.
Here is an example using the bit-wise ``and_`` via the ``&`` operator:
.. testcode:: bitwise
import Aesara.tensor as at
import aesara.tensor as at
x,y = at.imatrices('x','y')
z = x & y
......@@ -1518,7 +1527,7 @@ Mathematical
Returns a variable representing the survival function (1-cdf —
sometimes more accurate).
C code is provided in the Aesara_lgpl repository.
C code is provided in the Theano_lgpl repository.
This makes it faster.
https://github.com/Theano/Theano_lgpl.git
......@@ -1542,7 +1551,7 @@ Linear Algebra
:param Y: right term
:type X: symbolic tensor
:type Y: symbolic tensor
:rtype: `symbolic matrix or vector`
:rtype: symbolic matrix or vector
:return: the inner product of `X` and `Y`.
.. function:: outer(X, Y)
......@@ -1560,7 +1569,7 @@ Linear Algebra
Given two tensors a and b,tensordot computes a generalized dot product over
the provided axes. Aesara's implementation reduces all expressions to
matrix or vector dot products and is based on code from Tijmen Tieleman's
gnumpy (http://www.cs.toronto.edu/~tijmen/gnumpy.html).
`gnumpy` (http://www.cs.toronto.edu/~tijmen/gnumpy.html).
:param a: the first tensor variable
:type a: symbolic tensor
......@@ -1575,7 +1584,7 @@ Linear Algebra
Note that the default value of 2 is not guaranteed to work
for all values of a and b, and an error will be raised if
that is the case. The reason for keeping the default is to
maintain the same signature as numpy's tensordot function
maintain the same signature as NumPy's tensordot function
(and np.tensordot raises analogous errors for non-compatible
inputs).
......@@ -1612,21 +1621,17 @@ Linear Algebra
a = np.random.random((2,3,4))
b = np.random.random((5,6,4,3))
#tensordot
c = np.tensordot(a, b, [[1,2],[3,2]])
#loop replicating tensordot
a0, a1, a2 = a.shape
b0, b1, _, _ = b.shape
cloop = np.zeros((a0,b0,b1))
#loop over non-summed indices -- these exist
#in the tensor product.
# Loop over non-summed indices--these exist in the tensor product
for i in range(a0):
for j in range(b0):
for k in range(b1):
#loop over summed indices -- these don't exist
#in the tensor product.
# Loop over summed indices--these don't exist in the tensor product
for l in range(a1):
for m in range(a2):
cloop[i,j,k] += a[i,l,m] * b[j,k,m,l]
......@@ -1668,9 +1673,7 @@ Linear Algebra
>>> second = at.tensor3('second')
>>> result = batched_dot(first, second)
:note: This is a subset of numpy.einsum, but we do not provide it for now.
But numpy einsum is slower than dot or tensordot:
http://mail.scipy.org/pipermail/numpy-discussion/2012-October/064259.html
:note: This is a subset of `numpy.einsum`, but we do not provide it for now.
:param X: left term
:param Y: right term
......
......@@ -65,8 +65,8 @@ BUT, YOU GOTTA RUN THIS CODE AND MAKE SURE IT STILL WORKS NICELY, HEY?
up_fn, app_fn = build_logistic_regression_model(n_in=10, n_out=3, l2_coef=30.0)
x_data = numpy.random.randn(100, 10)
y_data = numpy.random.randn(100, 3)
x_data = numpy.random.standard_normal((100, 10))
y_data = numpy.random.standard_normal((100, 3))
y_data = _asarray(y_data == numpy.max(y_data, axis=1), dtype='int64')
print "Model Training ..."
......
......@@ -11,31 +11,36 @@ Note that you want SciPy >= 0.7.2
.. warning::
In SciPy 0.6, ``scipy.csc_matrix.dot`` has a bug with singleton
In SciPy 0.6, `scipy.csc_matrix.dot` has a bug with singleton
dimensions. There may be more bugs. It also has inconsistent
implementation of sparse matrices.
We do not test against SciPy versions below 0.7.2.
We describe the details of the compressed sparse matrix types.
``scipy.sparse.csc_matrix``
should be used if there are more rows than column (shape[0] > shape[1]).
``scipy.sparse.csr_matrix``
should be used if there are more columns than rows (shape[0] < shape[1]).
``scipy.sparse.lil_matrix``
`scipy.sparse.csc_matrix`
should be used if there are more rows than column (``shape[0] > shape[1]``).
`scipy.sparse.csr_matrix`
should be used if there are more columns than rows (``shape[0] < shape[1]``).
`scipy.sparse.lil_matrix`
is faster if we are modifying the array. After initial inserts,
we can then convert to the appropriate sparse matrix format.
The following types also exist:
``dok_matrix``
`dok_matrix`
Dictionary of Keys format. From their doc: This is an efficient structure for constructing sparse matrices incrementally.
``coo_matrix``
`coo_matrix`
Coordinate format. From their lil doc: consider using the COO format when constructing large matrices.
There seems to be a new format planned for scipy 0.7.x:
``bsr_matrix``
Block Compressed Row (BSR). From their doc: The Block Compressed Row (BSR) format is very similar to the Compressed Sparse Row (CSR) format. BSR is appropriate for sparse matrices with dense sub matrices like the last example below. Block matrices often arise in vector-valued finite element discretizations. In such cases, BSR is considerably more efficient than CSR and CSC for many sparse arithmetic operations.
``dia_matrix``
There seems to be a new format planned for SciPy 0.7.x:
`bsr_matrix`
Block Compressed Row (BSR). From their doc: The Block Compressed Row
(BSR) format is very similar to the Compressed Sparse Row (CSR)
format. BSR is appropriate for sparse matrices with dense sub matrices
like the last example below. Block matrices often arise in vector-valued
finite element discretizations. In such cases, BSR is considerably more
efficient than CSR and CSC for many sparse arithmetic operations.
`dia_matrix`
Sparse matrix with DIAgonal storage
There are four member variables that comprise a compressed matrix ``sp`` (for at least csc, csr and bsr):
......@@ -52,9 +57,9 @@ There are four member variables that comprise a compressed matrix ``sp`` (for at
row location.
``sp.indptr``
gives the other location of the non-zero entry. For CSC, there are
as many values of indptr as there are columns + 1 in the matrix.
as many values of indptr as there are ``columns + 1`` in the matrix.
``sp.indptr[k] = x`` and ``indptr[k+1] = y`` means that column
k contains sp.data[x:y], i.e. the xth through the y-1th non-zero values.
``k`` contains ``sp.data[x:y]``, i.e. the ``x``-th through the y-1th non-zero values.
See the example below for details.
......@@ -63,7 +68,7 @@ See the example below for details.
>>> import scipy.sparse
>>> sp = scipy.sparse.csc_matrix((5, 10))
>>> sp[4, 0] = 20
/u/lisa/local/byhost/test_maggie46.iro.umontreal.ca/lib64/python2.5/site-packages/scipy/sparse/compressed.py:494: SparseEfficiencyWarning: changing the sparsity structure of a csc_matrix is expensive. lil_matrix is more efficient.
SparseEfficiencyWarning: changing the sparsity structure of a csc_matrix is expensive. lil_matrix is more efficient.
SparseEfficiencyWarning)
>>> sp[0, 0] = 10
>>> sp[2, 3] = 30
......@@ -91,13 +96,13 @@ Several things should be learned from the above example:
* We actually use the wrong sparse matrix type. In fact, it is the
*rows* that are sparse, not the columns. So, it would have been
better to use ``sp = scipy.sparse.csr_matrix((5, 10))``.
* We should have actually created the matrix as a ``lil_matrix``,
* We should have actually created the matrix as a `lil_matrix`,
which is more efficient for inserts. Afterwards, we should convert
to the appropriate compressed format.
* `sp.indptr[0] = 0` and `sp.indptr[1] = 2`, which means that
column 0 contains sp.data[0:2], i.e. the first two non-zero values.
* `sp.indptr[3] = 2` and `sp.indptr[4] = 3`, which means that column
3 contains sp.data[2:3], i.e. the third non-zero value.
* ``sp.indptr[0] = 0`` and ``sp.indptr[1] = 2``, which means that
column 0 contains ``sp.data[0:2]``, i.e. the first two non-zero values.
* ``sp.indptr[3] = 2`` and ``sp.indptr[4] = 3``, which means that column
three contains ``sp.data[2:3]``, i.e. the third non-zero value.
TODO: Rewrite this documentation to do things in a smarter way.
......@@ -112,7 +117,7 @@ For faster sparse code:
Misc
----
The sparse equivalent of dmatrix is csc_matrix and csr_matrix.
The sparse equivalent of `dmatrix` is `csc_matrix` and `csr_matrix`.
:class:`~aesara.sparse.basic.Dot` vs. :class:`~aesara.sparse.basic.StructuredDot`
---------------------------------------------------------------------------------
......@@ -121,22 +126,22 @@ Often when you use a sparse matrix it is because there is a meaning to the
structure of non-zeros. The gradient on terms outside that structure
has no meaning, so it is computationally efficient not to compute them.
StructuredDot is when you want the gradient to have zeroes corresponding to
`StructuredDot` is when you want the gradient to have zeroes corresponding to
the sparse entries in the matrix.
TrueDot and Structured dot have different gradients
`TrueDot` and `Structured` dot have different gradients
but their perform functions should be the same.
The gradient of TrueDot can have non-zeros where the sparse matrix had zeros.
The gradient of StructuredDot can't.
The gradient of `TrueDot` can have non-zeros where the sparse matrix had zeros.
The gradient of `StructuredDot` can't.
Suppose you have ``dot(x,w)`` where ``x`` and ``w`` are square matrices.
If ``w`` is dense, like ``randn((5,5))`` and ``x`` is of full rank (though
potentially sparse, like a diagonal matrix of 1s) then the output will
be dense too. (But i guess the density of the output is a red herring.)
If ``w`` is dense, like ``standard_normal((5,5))`` and ``x`` is of full rank (though
potentially sparse, like a diagonal matrix of ones) then the output will
be dense too.
What's important is the density of the gradient on the output.
If the gradient on the output is dense, and ``w`` is dense (as we said it was)
then the True gradient on ``x`` will be dense.
If our dot is a TrueDot, then it will say that the gradient on ``x`` is dense.
If our dot is a StructuredDot, then it will say the gradient on ``x`` is only
then the ``True`` gradient on ``x`` will be dense.
If our dot is a `TrueDot`, then it will say that the gradient on ``x`` is dense.
If our dot is a `StructuredDot`, then it will say the gradient on ``x`` is only
defined on the diagonal and ignore the gradients on the off-diagonal.
......@@ -55,16 +55,15 @@ Running the code above we see:
Arguably the most useful information is approximately half-way through
the error message, where the kind of error is displayed along with its
cause (`ValueError: Input dimension mismatch. (input[0].shape[0] = 3,
input[1].shape[0] = 2`).
Below it, some other information is given, such as the apply node that
cause (e.g. ``ValueError: Input dimension mismatch. (input[0].shape[0] = 3, input[1].shape[0] = 2``).
Below it, some other information is given, such as the `Apply` node that
caused the error, as well as the input types, shapes, strides and
scalar values.
The two hints can also be helpful when debugging. Using the aesara flag
The two hints can also be helpful when debugging. Using the Aesara flag
``optimizer=fast_compile`` or ``optimizer=None`` can often tell you
the faulty line, while ``exception_verbosity=high`` will display a
debugprint of the apply node. Using these hints, the end of the error
debug print of the apply node. Using these hints, the end of the error
message becomes :
.. code-block:: none
......@@ -90,10 +89,10 @@ Using Test Values
-----------------
As of v.0.4.0, Aesara has a new mechanism by which graphs are executed
on-the-fly, before a ``aesara.function`` is ever compiled. Since optimizations
on-the-fly, before a :func:`aesara.function` is ever compiled. Since optimizations
haven't been applied at this stage, it is easier for the user to locate the
source of some bug. This functionality is enabled through the config flag
``aesara.config.compute_test_value``. Its use is best shown through the
`aesara.config.compute_test_value`. Its use is best shown through the
following example. Here, we use ``exception_verbosity=high`` and
``optimizer=fast_compile``, which would not tell you the line at fault.
``optimizer=None`` would and it could therefore be used instead of test values.
......@@ -101,7 +100,7 @@ following example. Here, we use ``exception_verbosity=high`` and
.. testcode:: testvalue
import numpy
import numpy as np
import aesara
import aesara.tensor as at
......@@ -109,15 +108,15 @@ following example. Here, we use ``exception_verbosity=high`` and
aesara.config.compute_test_value = 'off' # Use 'warn' to activate this feature
# configure shared variables
W1val = numpy.random.rand(2, 10, 10).astype(aesara.config.floatX)
W1val = np.random.random((2, 10, 10)).astype(aesara.config.floatX)
W1 = aesara.shared(W1val, 'W1')
W2val = numpy.random.rand(15, 20).astype(aesara.config.floatX)
W2val = np.random.random((15, 20)).astype(aesara.config.floatX)
W2 = aesara.shared(W2val, 'W2')
# input which will be of shape (5,10)
x = at.matrix('x')
# provide Aesara with a default test-value
#x.tag.test_value = numpy.random.rand(5, 10)
#x.tag.test_value = np.random.random((5, 10))
# transform the shared variable in some way. Aesara does not
# know off hand that the matrix func_of_W1 has shape (20, 10)
......@@ -131,7 +130,7 @@ following example. Here, we use ``exception_verbosity=high`` and
# compile and call the actual function
f = aesara.function([x], h2)
f(numpy.random.rand(5, 10))
f(np.random.random((5, 10)))
Running the above code generates the following error message:
......@@ -139,7 +138,7 @@ Running the above code generates the following error message:
Traceback (most recent call last):
File "test1.py", line 31, in <module>
f(numpy.random.rand(5, 10))
f(np.random.random((5, 10)))
File "PATH_TO_AESARA/aesara/compile/function/types.py", line 605, in __call__
self.fn.thunks[self.fn.position_of_error])
File "PATH_TO_AESARA/aesara/compile/function/types.py", line 595, in __call__
......@@ -171,10 +170,10 @@ so slightly, we can get Aesara to reveal the exact source of the error.
...
# input which will be of shape (5, 10)
# Input which will have the shape (5, 10)
x = at.matrix('x')
# provide Aesara with a default test-value
x.tag.test_value = numpy.random.rand(5, 10)
# Provide Aesara with a default test-value
x.tag.test_value = np.random.random((5, 10))
In the above, we are tagging the symbolic matrix *x* with a special test
value. This allows Aesara to evaluate symbolic expressions on-the-fly (by
......@@ -195,7 +194,7 @@ following error message, which properly identifies *line 24* as the culprit.
File "PATH_TO_AESARA/aesara/graph/op.py", line 752, in rval
r = p(n, [x[0] for x in i], o)
File "PATH_TO_AESARA/aesara/tensor/basic.py", line 4554, in perform
z[0] = numpy.asarray(numpy.dot(x, y))
z[0] = np.asarray(np.dot(x, y))
ValueError: matrices are not aligned
The ``compute_test_value`` mechanism works as follows:
......@@ -254,11 +253,11 @@ Running the code above returns the following output:
"How do I Print an Intermediate Value in a Function?"
-----------------------------------------------------
Aesara provides a 'Print' op to do this.
Aesara provides a :class:`Print`\ :class:`Op` to do this.
.. testcode::
import numpy
import numpy as np
import aesara
x = aesara.tensor.dvector('x')
......@@ -268,11 +267,11 @@ Aesara provides a 'Print' op to do this.
f = aesara.function([x], x * 5)
f_with_print = aesara.function([x], x_printed * 5)
#this runs the graph without any printing
assert numpy.all( f([1, 2, 3]) == [5, 10, 15])
# This runs the graph without any printing
assert np.array_equal(f([1, 2, 3]), [5, 10, 15])
#this runs the graph with the message, and value printed
assert numpy.all( f_with_print([1, 2, 3]) == [5, 10, 15])
# This runs the graph with the message, and value printed
assert np.array_equal(f_with_print([1, 2, 3]), [5, 10, 15])
.. testoutput::
......@@ -361,17 +360,16 @@ shows how to print all inputs and outputs:
0 Elemwise{mul,no_inplace}(TensorConstant{5.0}, x) input(s) value(s): [array(5.0), array(3.0)] output(s) value(s): [array(15.0)]
When using these ``inspect_inputs`` and ``inspect_outputs`` functions
with ``MonitorMode``, you should see [potentially a lot of] printed output.
Every ``Apply`` node will be printed out,
along with its position in the graph, the arguments to the functions ``perform`` or
``c_code`` and the output it computed.
Admittedly, this may be a huge amount of
output to read through if you are using big tensors... but you can choose to
add logic that would, for instance, print
with ``MonitorMode``, you should see (potentially a lot of) printed output.
Every ``Apply`` node will be printed out, along with its position in the graph,
the arguments to the functions ``perform`` or ``c_code`` and the output it
computed.
Admittedly, this may be a huge amount of output to read through if you are using
large tensors, but you can choose to add logic that would, for instance, print
something out only if a certain kind of op were used, at a certain program
position, or only if a particular value showed up in one of the inputs or outputs.
A typical example is to detect when NaN values are added into computations, which
can be achieved as follows:
position, or only if a particular value showed up in one of the inputs or
outputs. A typical example is to detect when NaN values are added into
computations, which can be achieved as follows:
.. testcode:: compiled
......@@ -382,13 +380,13 @@ can be achieved as follows:
# This is the current suggested detect_nan implementation to
# show you how it work. That way, you can modify it for your
# need. If you want exactly this method, you can use
# ``aesara.compile.monitormode.detect_nan`` that will always
# `aesara.compile.monitormode.detect_nan` that will always
# contain the current suggested version.
def detect_nan(fgraph, i, node, fn):
for output in fn.outputs:
if (not isinstance(output[0], numpy.random.RandomState) and
numpy.isnan(output[0]).any()):
if (not isinstance(output[0], np.ndarray) and
np.isnan(output[0]).any()):
print('*** NaN detected ***')
aesara.printing.debugprint(node)
print('Inputs : %s' % [input[0] for input in fn.inputs])
......@@ -396,9 +394,11 @@ can be achieved as follows:
break
x = aesara.tensor.dscalar('x')
f = aesara.function([x], [aesara.tensor.log(x) * x],
mode=aesara.compile.MonitorMode(
post_func=detect_nan))
f = aesara.function(
[x], [aesara.tensor.log(x) * x],
mode=aesara.compile.MonitorMode(
post_func=detect_nan)
)
f(0) # log(0) * 0 = -inf * 0 = NaN
.. testoutput:: compiled
......@@ -458,12 +458,12 @@ Intermediate results don't necessarily have a clear name and you can get
exceptions which are hard to decipher, due to the "compiled" nature of the
functions.
Consider this example script ("ex.py"):
Consider this example script (``ex.py``):
.. testcode::
import numpy as np
import aesara
import numpy
import aesara.tensor as at
a = at.dmatrix('a')
......@@ -471,9 +471,9 @@ Consider this example script ("ex.py"):
f = aesara.function([a, b], [a * b])
# matrices chosen so dimensions are unsuitable for multiplication
mat1 = numpy.arange(12).reshape((3, 4))
mat2 = numpy.arange(25).reshape((5, 5))
# Matrices chosen so dimensions are unsuitable for multiplication
mat1 = np.arange(12).reshape((3, 4))
mat2 = np.arange(25).reshape((5, 5))
f(mat1, mat2)
......@@ -514,18 +514,18 @@ illustrative purposes. As the matrices can't be multiplied element-wise
The call stack contains some useful information to trace back the source
of the error. There's the script where the compiled function was called --
but if you're using (improperly parameterized) prebuilt modules, the error
might originate from ops in these modules, not this script. The last line
tells us about the op that caused the exception. In this case it's a "mul"
might originate from `Op`\s in these modules, not this script. The last line
tells us about the `Op` that caused the exception. In this case it's a "mul"
involving variables with names "a" and "b". But suppose we instead had an
intermediate result to which we hadn't given a name.
After learning a few things about the graph structure in Aesara, we can use
the Python debugger to explore the graph, and then we can get runtime
information about the error. Matrix dimensions, especially, are useful to
pinpoint the source of the error. In the printout, there are also 2 of the 4
dimensions of the matrices involved, but for the sake of example say we'd
need the other dimensions to pinpoint the error. First, we re-launch with
the debugger module and run the program with "c":
pinpoint the source of the error. In the printout, there are also two of the
four dimensions of the matrices involved, but for the sake of example say we'd
need the other dimensions to pinpoint the error. First, we re-launch with the
debugger module and run the program with "c":
.. code-block:: text
......@@ -537,22 +537,22 @@ the debugger module and run the program with "c":
Then we get back the above error printout, but the interpreter breaks in
that state. Useful commands here are
* "up" and "down" (to move up and down the call stack),
* "l" (to print code around the line in the current stack position),
* "p variable_name" (to print the string representation of 'variable_name'),
* "p dir(object_name)", using the Python dir() function to print the list of an object's members
* ``up`` and ``down`` (to move up and down the call stack),
* ``l`` (to print code around the line in the current stack position),
* ``p variable_name`` (to print the string representation of ``variable_name``),
* ``p dir(object_name)``, using the Python :func:`dir` function to print the list of an object's members
Here, for example, I do "up", and a simple "l" shows me there's a local
variable "node". This is the "node" from the computation graph, so by
following the "node.inputs", "node.owner" and "node.outputs" links I can
Here, for example, I do ``up``, and a simple ``l`` shows me there's a local
variable ``node``. This is the ``node`` from the computation graph, so by
following the ``node.inputs``, ``node.owner`` and ``node.outputs`` links I can
explore around the graph.
That graph is purely symbolic (no data, just symbols to manipulate it
abstractly). To get information about the actual parameters, you explore the
"thunk" objects, which bind the storage for the inputs (and outputs) with
the function itself (a "thunk" is a concept related to closures). Here, to
get the current node's first input's shape, you'd therefore do "p
thunk.inputs[0][0].shape", which prints out "(3, 4)".
"thunk" objects, which bind the storage for the inputs (and outputs) with the
function itself (a "thunk" is a concept related to closures). Here, to get the
current node's first input's shape, you'd therefore do
``p thunk.inputs[0][0].shape``, which prints out ``(3, 4)``.
.. _faq_dump_fct:
......@@ -562,14 +562,13 @@ Dumping a Function to help debug
If you are reading this, there is high chance that you emailed our
mailing list and we asked you to read this section. This section
explain how to dump all the parameter passed to
``aesara.function()``. This is useful to help us reproduce a problem
:func:`aesara.function`. This is useful to help us reproduce a problem
during compilation and it doesn't request you to make a self contained
example.
For this to work, we need to be able to import the code for all Op in
the graph. So if you create your own Op, we will need this
code. Otherwise, we won't be able to unpickle it. We already have all
the Ops from Aesara and Pylearn2.
For this to work, we need to be able to import the code for all `Op` in
the graph. So if you create your own `Op`, we will need this
code; otherwise, we won't be able to unpickle it.
.. code-block:: python
......@@ -577,9 +576,9 @@ the Ops from Aesara and Pylearn2.
aesara.function(...)
# with
aesara.function_dump(filename, ...)
# Where filename is a string to a file that we will write to.
# Where `filename` is a string to a file that we will write to.
Then send us filename.
Then send us ``filename``.
Breakpoint during Aesara function execution
......
......@@ -27,7 +27,7 @@ the logistic curve, which is given by:
.. figure:: logistic.png
A plot of the logistic function, with x on the x-axis and s(x) on the
A plot of the logistic function, with :math:`x` on the x-axis and :math:`s(x)` on the
y-axis.
You want to compute the function :ref:`element-wise
......@@ -49,9 +49,9 @@ Well, what you do is this:
array([[ 0.5 , 0.73105858],
[ 0.26894142, 0.11920292]])
The reason logistic is performed elementwise is because all of its
operations---division, addition, exponentiation, and division---are
themselves elementwise operations.
The reason the logistic is applied element-wise is because all of its
operations--division, addition, exponentiation, and division--are
themselves element-wise operations.
It is also the case that:
......@@ -76,7 +76,7 @@ Computing More than one Thing at the Same Time
Aesara supports functions with multiple outputs. For example, we can
compute the :ref:`element-wise <libdoc_tensor_elemwise>` difference, absolute difference, and
squared difference between two matrices *a* and *b* at the same time:
squared difference between two matrices ``a`` and ``b`` at the same time:
.. If you modify this code, also change :
.. tests/test_tutorial.py:T_examples.test_examples_3
......@@ -92,7 +92,7 @@ squared difference between two matrices *a* and *b* at the same time:
shortcut for allocating symbolic variables that we will often use in the
tutorials.
When we use the function f, it returns the three variables (the printing
When we use the function ``f``, it returns the three variables (the printing
was reformatted for readability):
>>> f([[1, 1], [1, 1]], [[0, 1], [2, 3]])
......@@ -124,12 +124,12 @@ array(35.0)
This makes use of the :ref:`In <function_inputs>` class which allows
you to specify properties of your function's parameters with greater detail. Here we
give a default value of 1 for *y* by creating a ``In`` instance with
its ``value`` field set to 1.
give a default value of ``1`` for ``y`` by creating a :class:`In` instance with
its ``value`` field set to ``1``.
Inputs with default values must follow inputs without default
values (like Python's functions). There can be multiple inputs with default values. These parameters can
be set positionally or by name, as in standard Python:
Inputs with default values must follow inputs without default values (like
Python's functions). There can be multiple inputs with default values. These
parameters can be set positionally or by name, as in standard Python:
.. If you modify this code, also change :
......@@ -150,9 +150,9 @@ array(34.0)
array(33.0)
.. note::
``In`` does not know the name of the local variables *y* and *w*
`In` does not know the name of the local variables ``y`` and ``w``
that are passed as arguments. The symbolic variable objects have name
attributes (set by ``dscalars`` in the example above) and *these* are the
attributes (set by `dscalars` in the example above) and *these* are the
names of the keyword parameters in the functions that we build. This is
the mechanism at work in ``In(y, value=1)``. In the case of ``In(w,
value=2, name='w_by_name')``. We override the symbolic variable's name
......@@ -169,11 +169,11 @@ Using Shared Variables
It is also possible to make a function with an internal state. For
example, let's say we want to make an accumulator: at the beginning,
the state is initialized to zero. Then, on each function call, the state
the state is initialized to zero, then, on each function call, the state
is incremented by the function's argument.
First let's define the *accumulator* function. It adds its argument to the
internal state, and returns the old state value.
internal state and returns the old state value.
.. If you modify this code, also change :
.. tests/test_tutorial.py:T_examples.test_examples_8
......@@ -187,17 +187,17 @@ This code introduces a few new concepts. The ``shared`` function constructs
so-called :ref:`shared variables<libdoc_compile_shared>`.
These are hybrid symbolic and non-symbolic variables whose value may be shared
between multiple functions. Shared variables can be used in symbolic expressions just like
the objects returned by ``dmatrices(...)`` but they also have an internal
the objects returned by `dmatrices` but they also have an internal
value that defines the value taken by this symbolic variable in *all* the
functions that use it. It is called a *shared* variable because its value is
shared between many functions. The value can be accessed and modified by the
``.get_value()`` and ``.set_value()`` methods. We will come back to this soon.
:meth:`get_value` and :meth:`set_value` methods. We will come back to this soon.
The other new thing in this code is the ``updates`` parameter of ``function``.
The other new thing in this code is the ``updates`` parameter of :func:`aesara.function`.
``updates`` must be supplied with a list of pairs of the form (shared-variable, new expression).
It can also be a dictionary whose keys are shared-variables and values are
the new expressions. Either way, it means "whenever this function runs, it
will replace the ``.value`` of each shared variable with the result of the
will replace the :attr:`value` of each shared variable with the result of the
corresponding expression". Above, our accumulator replaces the ``state``'s value with the sum
of the state and the increment amount.
......@@ -246,7 +246,7 @@ updates).
It may happen that you expressed some formula using a shared variable, but
you do *not* want to use its value. In this case, you can use the
``givens`` parameter of ``function`` which replaces a particular node in a graph
``givens`` parameter of :func:`aesara.function` which replaces a particular node in a graph
for the purpose of one particular function.
.. If you modify this code, also change :
......@@ -274,18 +274,26 @@ expression that evaluates to a tensor of same shape and dtype.
.. note::
Aesara shared variable broadcast pattern default to False for each
Aesara shared variable broadcast pattern default to ``False`` for each
dimensions. Shared variable size can change over time, so we can't
use the shape to find the broadcastable pattern. If you want a
different pattern, just pass it as a parameter
``aesara.shared(..., shape=(True, False))``
``aesara.shared(..., broadcastable=(True, False))``
.. note::
Use the ``shape`` parameter to specify tuples of static shapes instead;
the old broadcastable values are being phased-out. Unknown shape values
for dimensions take the value ``None``; otherwise, integers are used for
known static shape values.
For example, ``aesara.shared(..., shape=(1, None))``.
Copying functions
=================
Aesara functions can be copied, which can be useful for creating similar
functions but with different shared variables or updates. This is done using
the :func:`copy()<aesara.compile.function.types.Function.copy>` method of ``function`` objects. The optimized graph of the original function is copied,
so compilation only needs to be performed once.
the :func:`aesara.compile.function.types.Function.copy` method of :class:`Function` objects.
The optimized graph of the original function is copied, so compilation only
needs to be performed once.
Let's start from the accumulator defined above:
......@@ -302,7 +310,7 @@ array(0)
>>> print(state.get_value())
10
We can use ``copy()`` to create a similar accumulator but with its own internal state
We can use :meth:`copy` to create a similar accumulator but with its own internal state
using the ``swap`` parameter, which is a dictionary of shared variables to exchange:
>>> new_state = aesara.shared(0)
......@@ -361,11 +369,13 @@ Here's a brief example. The setup code is:
from aesara.tensor.random.utils import RandomStream
from aesara import function
srng = RandomStream(seed=234)
rv_u = srng.uniform(0, 1, size=(2,2))
rv_n = srng.normal(0, 1, size=(2,2))
f = function([], rv_u)
g = function([], rv_n, no_default_updates=True) #Not updating rv_n.rng
g = function([], rv_n, no_default_updates=True)
nearly_zeros = function([], rv_u + rv_u - 2 * rv_u)
Here, ``rv_u`` represents a random stream of 2x2 matrices of draws from a uniform
......@@ -383,16 +393,16 @@ so we get different random numbers every time.
>>> f_val1 = f() #different numbers from f_val0
When we add the extra argument ``no_default_updates=True`` to
``function`` (as in *g*), then the random number generator state is
``function`` (as in ``g``), then the random number generator state is
not affected by calling the returned function. So, for example, calling
*g* multiple times will return the same numbers.
``g`` multiple times will return the same numbers.
>>> g_val0 = g() # different numbers from f_val0 and f_val1
>>> g_val1 = g() # same numbers as g_val0!
An important remark is that a random variable is drawn at most once during any
single function execution. So the *nearly_zeros* function is guaranteed to
return approximately 0 (except for rounding error) even though the *rv_u*
single function execution. So the `nearly_zeros` function is guaranteed to
return approximately 0 (except for rounding error) even though the ``rv_u``
random variable appears three times in the output expression.
>>> nearly_zeros = function([], rv_u + rv_u - 2 * rv_u)
......@@ -400,17 +410,8 @@ random variable appears three times in the output expression.
Seeding Streams
---------------
Random variables can be seeded individually or collectively.
You can seed just one random variable by seeding or assigning to the
``.rng`` attribute, using ``.rng.set_value()``.
>>> rng_val = rv_u.rng.get_value(borrow=True) # Get the rng for rv_u
>>> rng_val.seed(89234) # seeds the generator
>>> rv_u.rng.set_value(rng_val, borrow=True) # Assign back seeded rng
You can also seed *all* of the random variables allocated by a :class:`RandomStream`
object by that object's ``seed`` method. This seed will be used to seed a
You can seed all of the random variables allocated by a :class:`RandomStream`
object by that object's :meth:`RandomStream.seed` method. This seed will be used to seed a
temporary random number generator, that will in turn generate seeds for each
of the random variables.
......@@ -420,28 +421,15 @@ Sharing Streams Between Functions
---------------------------------
As usual for shared variables, the random number generators used for random
variables are common between functions. So our *nearly_zeros* function will
update the state of the generators used in function *f* above.
For example:
>>> state_after_v0 = rv_u.rng.get_value().get_state()
>>> nearly_zeros() # this affects rv_u's generator
array([[ 0., 0.],
[ 0., 0.]])
>>> v1 = f()
>>> rng = rv_u.rng.get_value(borrow=True)
>>> rng.set_state(state_after_v0)
>>> rv_u.rng.set_value(rng, borrow=True)
>>> v2 = f() # v2 != v1
>>> v3 = f() # v3 == v1
variables are common between functions. So our ``nearly_zeros`` function will
update the state of the generators used in function ``f`` above.
Copying Random State Between Aesara Graphs
------------------------------------------
In some use cases, a user might want to transfer the "state" of all random
number generators associated with a given aesara graph (e.g. g1, with compiled
function f1 below) to a second graph (e.g. g2, with function f2). This might
number generators associated with a given Aesara graph (e.g. ``g1``, with compiled
function ``f1`` below) to a second graph (e.g. ``g2``, with function ``f2``). This might
arise for example if you are trying to initialize the state of a model, from
the parameters of a pickled version of a previous model. For
:class:`aesara.tensor.random.utils.RandomStream` and
......@@ -449,50 +437,10 @@ the parameters of a pickled version of a previous model. For
this can be achieved by copying elements of the `state_updates` parameter.
Each time a random variable is drawn from a `RandomStream` object, a tuple is
added to the `state_updates` list. The first element is a shared variable,
added to its :attr:`state_updates` list. The first element is a shared variable,
which represents the state of the random number generator associated with this
*particular* variable, while the second represents the aesara graph
corresponding to the random number generation process (i.e. RandomFunction{uniform}.0).
An example of how "random states" can be transferred from one aesara function
to another is shown below.
>>> import aesara
>>> import numpy
>>> import aesara.tensor as at
>>> from aesara.sandbox.rng_mrg import MRG_RandomStream
>>> from aesara.tensor.random.utils import RandomStream
>>> class Graph():
... def __init__(self, seed=123):
... self.rng = RandomStream(seed)
... self.y = self.rng.uniform(size=(1,))
>>> g1 = Graph(seed=123)
>>> f1 = aesara.function([], g1.y)
>>> g2 = Graph(seed=987)
>>> f2 = aesara.function([], g2.y)
>>> # By default, the two functions are out of sync.
>>> f1()
array([ 0.72803009])
>>> f2()
array([ 0.55056769])
>>> def copy_random_state(g1, g2):
... if isinstance(g1.rng, MRG_RandomStream):
... g2.rng.rstate = g1.rng.rstate
... for (su1, su2) in zip(g1.rng.state_updates, g2.rng.state_updates):
... su2[0].set_value(su1[0].get_value())
>>> # We now copy the state of the aesara random number generators.
>>> copy_random_state(g1, g2)
>>> f1()
array([ 0.59044123])
>>> f2()
array([ 0.59044123])
*particular* variable, while the second represents the Aesara graph
corresponding to the random number generation process.
Other Random Distributions
--------------------------
......@@ -511,16 +459,18 @@ It will be used repeatedly.
.. testcode::
import numpy
import numpy as np
import aesara
import aesara.tensor as at
rng = numpy.random
rng = np.random.default_rng(2882)
N = 400 # training sample size
feats = 784 # number of input variables
# generate a dataset: D = (input_values, target_class)
D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
D = (rng.standard_normal((N, feats)), rng.integers(size=N, low=0, high=2))
training_steps = 10000
# Declare Aesara symbolic variables
......@@ -532,7 +482,7 @@ It will be used repeatedly.
# this and the following bias variable b
# are shared so they keep their values
# between training iterations (updates)
w = aesara.shared(rng.randn(feats), name="w")
w = aesara.shared(rng.standard_normal(feats), name="w")
# initialize the bias term
b = aesara.shared(0., name="b")
......@@ -542,7 +492,7 @@ It will be used repeatedly.
print(b.get_value())
# Construct Aesara expression graph
p_1 = 1 / (1 + at.exp(-at.dot(x, w) - b)) # Probability that target = 1
p_1 = 1 / (1 + at.exp(-at.dot(x, w) - b)) # Probability that target = 1
prediction = p_1 > 0.5 # The prediction thresholded
xent = -y * at.log(p_1) - (1-y) * at.log(1-p_1) # Cross-entropy loss function
cost = xent.mean() + 0.01 * (w ** 2).sum() # The cost to minimize
......
......@@ -9,11 +9,11 @@ Configuration Settings and Compiling Modes
Configuration
=============
The ``config`` module contains several *attributes* that modify Aesara's behavior. Many of these
attributes are examined during the import of the ``aesara`` module and several are assumed to be
The :mod:`aesara.config` module contains several *attributes* that modify Aesara's behavior. Many of these
attributes are examined during the import of the :mod:`aesara` module and several are assumed to be
read-only.
*As a rule, the attributes in the* ``config`` *module should not be modified inside the user code.*
*As a rule, the attributes in the* :mod:`aesara.config` *module should not be modified inside the user code.*
Aesara's code comes with default values for these attributes, but you can
override them from your ``.aesararc`` file, and override those values in turn by
......@@ -21,12 +21,12 @@ the :envvar:`AESARA_FLAGS` environment variable.
The order of precedence is:
1. an assignment to aesara.config.<property>
1. an assignment to ``aesara.config.<property>``
2. an assignment in :envvar:`AESARA_FLAGS`
3. an assignment in the .aesararc file (or the file indicated in :envvar:`AESARARC`)
3. an assignment in the ``.aesararc`` file (or the file indicated in :envvar:`AESARARC`)
You can display the current/effective configuration at any time by printing
aesara.config. For example, to see a list of all active configuration
`aesara.config`. For example, to see a list of all active configuration
variables, type this from the command-line:
.. code-block:: bash
......@@ -45,22 +45,24 @@ Consider the logistic regression:
.. testcode::
import numpy
import numpy as np
import aesara
import aesara.tensor as at
rng = numpy.random
rng = np.random.default_rng(2498)
N = 400
feats = 784
D = (rng.randn(N, feats).astype(aesara.config.floatX),
rng.randint(size=N,low=0, high=2).astype(aesara.config.floatX))
D = (rng.standard_normal((N, feats)).astype(aesara.config.floatX),
rng.integers(size=N,low=0, high=2).astype(aesara.config.floatX))
training_steps = 10000
# Declare Aesara symbolic variables
x = at.matrix("x")
y = at.vector("y")
w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w")
b = aesara.shared(numpy.asarray(0., dtype=aesara.config.floatX), name="b")
w = aesara.shared(rng.standard_normal(feats).astype(aesara.config.floatX), name="w")
b = aesara.shared(np.asarray(0., dtype=aesara.config.floatX), name="b")
x.tag.test_value = D[0]
y.tag.test_value = D[1]
......@@ -73,15 +75,18 @@ Consider the logistic regression:
# Compile expressions to functions
train = aesara.function(
inputs=[x,y],
outputs=[prediction, xent],
updates=[(w, w-0.01*gw), (b, b-0.01*gb)],
name = "train")
predict = aesara.function(inputs=[x], outputs=prediction,
name = "predict")
if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in
train.maker.fgraph.toposort()]):
inputs=[x,y],
outputs=[prediction, xent],
updates=[(w, w-0.01*gw), (b, b-0.01*gb)],
name = "train"
)
predict = aesara.function(
inputs=[x], outputs=prediction,
name = "predict"
)
if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm']
for x in train.maker.fgraph.toposort()]):
print('Used the cpu')
else:
print('ERROR, not able to tell if aesara used the cpu or another device')
......@@ -106,7 +111,7 @@ Consider the logistic regression:
prediction on D
...
Modify and execute this example to run on CPU (the default) with floatX=float32 and
Modify and execute this example to run on CPU (the default) with ``floatX=float32`` and
time the execution using the command line ``time python file.py``. Save your code
as it will be useful later on.
......@@ -114,10 +119,10 @@ as it will be useful later on.
* Apply the Aesara flag ``floatX=float32`` (through ``aesara.config.floatX``) in your code.
* Cast inputs before storing them into a shared variable.
* Circumvent the automatic cast of *int32* with *float32* to *float64*:
* Circumvent the automatic cast of int32 with float32 to float64:
* Insert manual cast in your code or use *[u]int{8,16}*.
* Insert manual cast around the mean operator (this involves division by length, which is an *int64*).
* Insert manual cast in your code or use [u]int{8,16}.
* Insert manual cast around the mean operator (this involves division by length, which is an int64).
* Note that a new casting mechanism is being developed.
:download:`Solution<modes_solution_1.py>`
......@@ -156,7 +161,7 @@ short name Full constructor
.. Note::
For debugging purpose, there also exists a ``MonitorMode`` (which has no
For debugging purpose, there also exists a :class:`MonitorMode` (which has no
short name). It can be used to step through the execution of a function:
see :ref:`the debugging FAQ<faq_monitormode>` for details.
......@@ -165,8 +170,8 @@ Linkers
=======
A mode is composed of 2 things: an optimizer and a linker. Some modes,
like ``NanGuardMode`` and ``DebugMode``, add logic around the
optimizer and linker. ``DebugMode`` uses its own linker.
like `NanGuardMode` and `DebugMode`, add logic around the
optimizer and linker. `DebugMode` uses its own linker.
You can select which linker to use with the Aesara flag :attr:`config.linker`.
Here is a table to compare the different linkers.
......@@ -233,8 +238,8 @@ Using DebugMode
While normally you should use the ``FAST_RUN`` or ``FAST_COMPILE`` mode,
it is useful at first (especially when you are defining new kinds of
expressions or new optimizations) to run your code using the DebugMode
(available via ``mode='DebugMode``). The DebugMode is designed to
expressions or new optimizations) to run your code using the `DebugMode`
(available via ``mode='DebugMode``). The `DebugMode` is designed to
run several self-checks and assertions that can help diagnose
possible programming errors leading to incorrect output. Note that
``DebugMode`` is much slower than ``FAST_RUN`` or ``FAST_COMPILE`` so
......@@ -245,7 +250,7 @@ cluster!).
.. If you modify this code, also change :
.. tests/test_tutorial.py:T_modes.test_modes_1
DebugMode is used as follows:
`DebugMode` is used as follows:
.. testcode::
......@@ -258,21 +263,21 @@ DebugMode is used as follows:
f([7])
If any problem is detected, DebugMode will raise an exception according to
what went wrong, either at call time (*f(5)*) or compile time (
If any problem is detected, `DebugMode` will raise an exception according to
what went wrong, either at call time (e.g. ``f(5)``) or compile time (
``f = aesara.function(x, 10 * x, mode='DebugMode')``). These exceptions
should *not* be ignored; talk to your local Aesara guru or email the
users list if you cannot make the exception go away.
Some kinds of errors can only be detected for certain input value combinations.
In the example above, there is no way to guarantee that a future call to, say
*f(-1)*, won't cause a problem. DebugMode is not a silver bullet.
``f(-1)``, won't cause a problem. `DebugMode` is not a silver bullet.
.. TODO: repair the following link
If you instantiate DebugMode using the constructor (see :class:`DebugMode`)
rather than the keyword ``DebugMode`` you can configure its behaviour via
constructor arguments. The keyword version of DebugMode (which you get by using ``mode='DebugMode'``)
If you instantiate `DebugMode` using the constructor (see :class:`DebugMode`)
rather than the keyword `DebugMode` you can configure its behaviour via
constructor arguments. The keyword version of `DebugMode` (which you get by using ``mode='DebugMode'``)
is quite strict.
For more detail, see :ref:`DebugMode<debugmode>` in the library.
......@@ -2,59 +2,62 @@
# Aesara tutorial
# Solution to Exercise in section 'Configuration Settings and Compiling Modes'
import numpy as np
import aesara
import aesara.tensor as at
aesara.config.floatX = 'float32'
rng = np.random
aesara.config.floatX = "float32"
rng = np.random.default_rng(428)
N = 400
feats = 784
D = (rng.randn(N, feats).astype(aesara.config.floatX),
rng.randint(size=N, low=0, high=2).astype(aesara.config.floatX))
D = (
rng.standard_normal((N, feats)).astype(aesara.config.floatX),
rng.integers(size=N, low=0, high=2).astype(aesara.config.floatX),
)
training_steps = 10000
# Declare Aesara symbolic variables
x = at.matrix("x")
y = at.vector("y")
w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w")
b = aesara.shared(np.asarray(0., dtype=aesara.config.floatX), name="b")
w = aesara.shared(rng.standard_normal(feats).astype(aesara.config.floatX), name="w")
b = aesara.shared(np.asarray(0.0, dtype=aesara.config.floatX), name="b")
x.tag.test_value = D[0]
y.tag.test_value = D[1]
#print "Initial model:"
#print w.get_value(), b.get_value()
# print "Initial model:"
# print w.get_value(), b.get_value()
# Construct Aesara expression graph
p_1 = 1 / (1 + at.exp(-at.dot(x, w) - b)) # Probability of having a one
prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
xent = -y * at.log(p_1) - (1 - y) * at.log(1 - p_1) # Cross-entropy
cost = at.cast(xent.mean(), 'float32') + \
0.01 * (w ** 2).sum() # The cost to optimize
cost = at.cast(xent.mean(), "float32") + 0.01 * (w**2).sum() # The cost to optimize
gw, gb = at.grad(cost, [w, b])
# Compile expressions to functions
train = aesara.function(
inputs=[x, y],
outputs=[prediction, xent],
updates={w: w - 0.01 * gw, b: b - 0.01 * gb},
name="train")
predict = aesara.function(inputs=[x], outputs=prediction,
name="predict")
if any(x.op.__class__.__name__ in ('Gemv', 'CGemv', 'Gemm', 'CGemm') for x in
train.maker.fgraph.toposort()):
print('Used the cpu')
inputs=[x, y],
outputs=[prediction, xent],
updates={w: w - 0.01 * gw, b: b - 0.01 * gb},
name="train",
)
predict = aesara.function(inputs=[x], outputs=prediction, name="predict")
if any(
x.op.__class__.__name__ in ("Gemv", "CGemv", "Gemm", "CGemm")
for x in train.maker.fgraph.toposort()
):
print("Used the cpu")
else:
print('ERROR, not able to tell if aesara used the cpu or another device')
print("ERROR, not able to tell if aesara used the cpu or another device")
print(train.maker.fgraph.toposort())
for i in range(training_steps):
pred, err = train(D[0], D[1])
#print "Final model:"
#print w.get_value(), b.get_value()
# print "Final model:"
# print w.get_value(), b.get_value()
print("target values for D")
print(D[1])
......
......@@ -25,20 +25,20 @@ that creates an image of the function. You can read about them in
Consider again the logistic regression example:
>>> import numpy
>>> import numpy as np
>>> import aesara
>>> import aesara.tensor as at
>>> rng = numpy.random
>>> rng = np.random.default_rng(2382)
>>> # Training data
>>> N = 400
>>> feats = 784
>>> D = (rng.randn(N, feats).astype(aesara.config.floatX), rng.randint(size=N,low=0, high=2).astype(aesara.config.floatX))
>>> D = (rng.standard_normal(N, feats).astype(aesara.config.floatX), rng.integers(size=N,low=0, high=2).astype(aesara.config.floatX))
>>> training_steps = 10000
>>> # Declare Aesara symbolic variables
>>> x = at.matrix("x")
>>> y = at.vector("y")
>>> w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w")
>>> b = aesara.shared(numpy.asarray(0., dtype=aesara.config.floatX), name="b")
>>> w = aesara.shared(rng.standard_normal(feats).astype(aesara.config.floatX), name="w")
>>> b = aesara.shared(np.asarray(0., dtype=aesara.config.floatX), name="b")
>>> x.tag.test_value = D[0]
>>> y.tag.test_value = D[1]
>>> # Construct Aesara expression graph
......
import numpy as np
import aesara
x, y, z = aesara.tensor.vectors('xyz')
x, y, z = aesara.tensor.vectors("xyz")
f = aesara.function([x, y, z], [(x + y + z) * 2])
xv = np.random.rand(10).astype(aesara.config.floatX)
yv = np.random.rand(10).astype(aesara.config.floatX)
zv = np.random.rand(10).astype(aesara.config.floatX)
xv = np.random.random((10,)).astype(aesara.config.floatX)
yv = np.random.random((10,)).astype(aesara.config.floatX)
zv = np.random.random((10,)).astype(aesara.config.floatX)
f(xv, yv, zv)
......@@ -49,7 +49,7 @@ upgrade. Here is the current state of what can be done:
aesara.tensor.nnet.conv2d(..., image_shape=(7, 3, 5, 5), filter_shape=(2, 3, 4, 4))
- You can use the ``SpecifyShape`` op to add shape information anywhere in the
- You can use the :class:`SpecifyShape`\ :class:`Op` to add shape information anywhere in the
graph. This allows to perform some optimizations. In the following example,
this makes it possible to precompute the Aesara function to a constant.
......@@ -67,13 +67,13 @@ Problems with Shape inference
Sometimes this can lead to errors. Consider this example:
>>> import numpy
>>> import numpy as np
>>> import aesara
>>> x = aesara.tensor.matrix('x')
>>> y = aesara.tensor.matrix('y')
>>> z = aesara.tensor.join(0, x, y)
>>> xv = numpy.random.rand(5, 4)
>>> yv = numpy.random.rand(3, 3)
>>> xv = np.random.random((5, 4))
>>> yv = np.random.random((3, 3))
>>> f = aesara.function([x, y], z.shape)
>>> aesara.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
......@@ -109,7 +109,7 @@ This makes the computation of the shape faster, but it can also hide errors. In
this example, the computation of the shape of the output of ``join`` is done only
based on the first input Aesara variable, which leads to an error.
This might happen with other ops such as ``elemwise`` and ``dot``, for example.
This might happen with other `Op`\s such as :class:`Elemwise` and :class:`Dot`, for example.
Indeed, to perform some optimizations (for speed or stability, for instance),
Aesara assumes that the computation is correct and consistent
in the first place, as it does here.
......@@ -118,5 +118,5 @@ You can detect those problems by running the code without this
optimization, using the Aesara flag
``optimizer_excluding=local_shape_to_shape_i``. You can also obtain the
same effect by running in the modes ``FAST_COMPILE`` (it will not apply this
optimization, nor most other optimizations) or ``DebugMode`` (it will test
before and after all optimizations (much slower)).
optimization, nor most other optimizations) or :class:`DebugMode` (it will test
before and after all optimizations).
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论