提交 e40c1b29 authored 作者: Brandon T. Willard's avatar Brandon T. Willard 提交者: Brandon T. Willard

Documentation formatting and NumPy usage updates

上级 8b7446e0
......@@ -146,7 +146,7 @@ class OpFromGraph(Op, HasInnerGraph):
from aesara.compile.builders import OpFromGraph
x, y, z = at.scalars('xyz')
s = aesara.shared(np.random.rand(2, 2).astype(config.floatX))
s = aesara.shared(np.random.random((2, 2)).astype(config.floatX))
e = x + y * z + s
op = OpFromGraph([x, y, z], [e])
# op behaves like a normal aesara op
......
......@@ -5,6 +5,7 @@ import time
import warnings
from collections import OrderedDict
from functools import partial, reduce
from typing import TYPE_CHECKING, Callable, List, Optional, Union
import numpy as np
......@@ -18,6 +19,10 @@ from aesara.graph.op import get_test_values
from aesara.graph.type import Type
if TYPE_CHECKING:
from aesara.compile.mode import Mode
__docformat__ = "restructuredtext en"
_logger = logging.getLogger("aesara.gradient")
......@@ -684,8 +689,8 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
.. code-block:: python
x, t = aesara.tensor.fvector('x'), aesara.tensor.fvector('t')
w1 = aesara.shared(np.random.randn(3,4))
w2 = aesara.shared(np.random.randn(4,2))
w1 = aesara.shared(np.random.standard_normal((3,4)))
w2 = aesara.shared(np.random.standard_normal((4,2)))
a1 = aesara.tensor.tanh(aesara.tensor.dot(x,w1))
a2 = aesara.tensor.tanh(aesara.tensor.dot(a1,w2))
cost2 = aesara.tensor.sqr(a2 - t).sum()
......@@ -1690,17 +1695,17 @@ def mode_not_slow(mode):
def verify_grad(
fun,
pt,
n_tests=2,
rng=None,
eps=None,
out_type=None,
abs_tol=None,
rel_tol=None,
mode=None,
cast_to_output_type=False,
no_debug_ref=True,
fun: Callable,
pt: List[np.ndarray],
n_tests: int = 2,
rng: Optional[Union[np.random.Generator, np.random.RandomState]] = None,
eps: Optional[float] = None,
out_type: Optional[str] = None,
abs_tol: Optional[float] = None,
rel_tol: Optional[float] = None,
mode: Optional[Union["Mode", str]] = None,
cast_to_output_type: bool = False,
no_debug_ref: bool = True,
):
"""Test a gradient by Finite Difference Method. Raise error on failure.
......@@ -1713,47 +1718,47 @@ def verify_grad(
--------
>>> verify_grad(aesara.tensor.tanh,
... (np.asarray([[2, 3, 4], [-1, 3.3, 9.9]]),),
... rng=np.random)
... rng=np.random.default_rng(23098))
Parameters
----------
fun : a Python function
fun
`fun` takes Aesara variables as inputs, and returns an Aesara variable.
For instance, an Op instance with a single output.
pt : list of numpy.ndarrays
For instance, an `Op` instance with a single output.
pt
Input values, points where the gradient is estimated.
These arrays must be either float16, float32, or float64 arrays.
n_tests : int
Number of times to run the test
rng : numpy.random.RandomState
n_tests
Number o to run the test.
rng
Random number generator used to sample the output random projection `u`,
we test gradient of sum(u * fun) at `pt`
eps : float, optional
we test gradient of ``sum(u * fun)`` at `pt`.
eps
Step size used in the Finite Difference Method (Default
None is type-dependent).
Raising the value of eps can raise or lower the absolute
``None`` is type-dependent).
Raising the value of `eps` can raise or lower the absolute
and relative errors of the verification depending on the
Op. Raising eps does not lower the verification quality for
`Op`. Raising `eps` does not lower the verification quality for
linear operations. It is better to raise `eps` than raising
`abs_tol` or `rel_tol`.
out_type : string
Dtype of output, if complex (i.e., 'complex32' or 'complex64')
abs_tol : float
out_type
Dtype of output, if complex (i.e., ``'complex32'`` or ``'complex64'``)
abs_tol
Absolute tolerance used as threshold for gradient comparison
rel_tol : float
rel_tol
Relative tolerance used as threshold for gradient comparison
cast_to_output_type : bool
If the output is float32 and cast_to_output_type is True, cast
the random projection to float32. Otherwise it is float64.
cast_to_output_type
If the output is float32 and `cast_to_output_type` is ``True``, cast
the random projection to float32; otherwise, it is float64.
float16 is not handled here.
no_debug_ref : bool
Don't use DebugMode for the numerical gradient function.
no_debug_ref
Don't use `DebugMode` for the numerical gradient function.
Notes
-----
This function does not support multiple outputs. In
tests/scan/test_basic.py there is an experimental `verify_grad` that covers
that case as well by using random projections.
This function does not support multiple outputs. In `tests.scan.test_basic`
there is an experimental `verify_grad` that covers that case as well by
using random projections.
"""
from aesara.compile.function import function
......
......@@ -404,13 +404,14 @@ You can try the new :class:`Op` as follows:
.. testcode:: example
import numpy as np
import aesara
x = aesara.tensor.matrix()
f = aesara.function([x], DoubleOp1()(x))
import numpy
inp = numpy.random.rand(5, 4)
inp = np.random.random_sample((5, 4))
out = f(inp)
assert numpy.allclose(inp * 2, out)
assert np.allclose(inp * 2, out)
print(inp)
print(out)
......@@ -435,13 +436,14 @@ You can try the new :class:`Op` as follows:
.. testcode:: example
import numpy as np
import aesara
x = aesara.tensor.matrix()
f = aesara.function([x], DoubleOp2()(x))
import numpy
inp = numpy.random.rand(5, 4)
inp = np.random.random_sample((5, 4))
out = f(inp)
assert numpy.allclose(inp * 2, out)
assert np.allclose(inp * 2, out)
print(inp)
print(out)
......@@ -530,10 +532,9 @@ We can test this by running the following segment:
f = aesara.function([x], mult4plus5op(x))
g = aesara.function([x], mult2plus3op(x))
import numpy
inp = numpy.random.rand(5, 4).astype(numpy.float32)
assert numpy.allclose(4 * inp + 5, f(inp))
assert numpy.allclose(2 * inp + 3, g(inp))
inp = np.random.random_sample((5, 4)).astype(np.float32)
assert np.allclose(4 * inp + 5, f(inp))
assert np.allclose(2 * inp + 3, g(inp))
How To Test it
......@@ -553,11 +554,11 @@ returns the right answer. If you detect an error, you must raise an
.. testcode:: tests
import numpy
import numpy as np
import aesara
from tests import unittest_tools as utt
from aesara.configdefaults import config
class TestDouble(utt.InferShapeTester):
def setup_method(self):
super().setup_method()
......@@ -565,9 +566,12 @@ returns the right answer. If you detect an error, you must raise an
self.op = DoubleOp()
def test_basic(self):
rng = np.random.default_rng(utt.fetch_seed())
x = aesara.tensor.matrix()
f = aesara.function([x], self.op(x))
inp = numpy.asarray(numpy.random.rand(5, 4), dtype=config.floatX)
inp = np.asarray(rng.random((5, 4)), dtype=aesara.config.floatX)
out = f(inp)
# Compare the result computed to the expected value.
utt.assert_allclose(inp * 2, out)
......@@ -612,20 +616,26 @@ your :class:`Op` works only with such matrices, you can disable the warning with
.. testcode:: tests
from tests import unittest_tools as utt
from aesara.configdefaults import config
from tests import unittest_tools as utt
class TestDouble(utt.InferShapeTester):
# [...] as previous tests.
def test_infer_shape(self):
rng = np.random.default_rng(utt.fetch_seed())
x = aesara.tensor.matrix()
self._compile_and_check([x], # aesara.function inputs
[self.op(x)], # aesara.function outputs
# Always use not square matrix!
# inputs data
[numpy.asarray(numpy.random.rand(5, 4),
dtype=config.floatX)],
# Op that should be removed from the graph.
self.op_class)
self._compile_and_check(
[x], # aesara.function inputs
[self.op(x)], # aesara.function outputs
# Always use not square matrix!
# inputs data
[np.asarray(rng.random((5, 4)), dtype=config.floatX)],
# Op that should be removed from the graph.
self.op_class,
)
Testing the gradient
^^^^^^^^^^^^^^^^^^^^
......@@ -642,8 +652,11 @@ the multiplication by 2).
.. testcode:: tests
def test_grad(self):
tests.unittest_tools.verify_grad(self.op,
[numpy.random.rand(5, 7, 2)])
rng = np.random.default_rng(utt.fetch_seed())
tests.unittest_tools.verify_grad(
self.op,
[rng.random((5, 7, 2))]
)
Testing the Rop
^^^^^^^^^^^^^^^
......@@ -778,40 +791,34 @@ signature:
.. testcode:: asop
import aesara
import numpy
import aesara.tensor as at
import numpy as np
from aesara import function
from aesara.compile.ops import as_op
def infer_shape_numpy_dot(fgraph, node, input_shapes):
ashp, bshp = input_shapes
return [ashp[:-1] + bshp[-1:]]
@as_op(itypes=[aesara.tensor.fmatrix, aesara.tensor.fmatrix],
otypes=[aesara.tensor.fmatrix], infer_shape=infer_shape_numpy_dot)
@as_op(itypes=[at.matrix, at.matrix],
otypes=[at.matrix], infer_shape=infer_shape_numpy_dot)
def numpy_dot(a, b):
return numpy.dot(a, b)
return np.dot(a, b)
You can try it as follows:
.. testcode:: asop
x = aesara.tensor.fmatrix()
y = aesara.tensor.fmatrix()
x = at.matrix()
y = at.matrix()
f = function([x, y], numpy_dot(x, y))
inp1 = numpy.random.rand(5, 4).astype('float32')
inp2 = numpy.random.rand(4, 7).astype('float32')
inp1 = np.random.random_sample((5, 4))
inp2 = np.random.random_sample((4, 7))
out = f(inp1, inp2)
Exercise
^^^^^^^^
Run the code of the ``numpy_dot`` example above.
Modify and execute to compute: ``numpy.add`` and ``numpy.subtract``.
Modify and execute the example to return two outputs: ``x + y`` and ``x - y``.
.. _Documentation:
Documentation and Coding Style
......@@ -822,7 +829,7 @@ will not be accepted.
:class:`NanGuardMode` and :class:`AllocEmpty`
---------------------------------------------
:class:`NanGuardMode` help users find where in the graph NaN appear. But
:class:`NanGuardMode` help users find where in the graph ``NaN`` appear. But
sometimes, we want some variables to not be checked. For example, in
the old GPU back-end, we used a float32 :class:`CudaNdarray` to store the MRG
random number generator state (they are integers). So if :class:`NanGuardMode`
......
......@@ -81,60 +81,60 @@ from aesara.tensor.type import dmatrix, matrix
class TestProdOp(utt.InferShapeTester):
rng = np.random.RandomState(43)
def setup_method(self):
super().setup_method()
self.op_class = ProdOp # case 1
def test_perform(self):
rng = np.random.default_rng(43)
x = matrix()
y = matrix()
f = aesara.function([x, y], self.op_class()(x, y))
x_val = np.random.rand(5, 4)
y_val = np.random.rand(5, 4)
x_val = rng.random((5, 4))
y_val = rng.random((5, 4))
out = f(x_val, y_val)
assert np.allclose(x_val * y_val, out)
def test_gradient(self):
rng = np.random.default_rng(43)
utt.verify_grad(
self.op_class(),
[np.random.rand(5, 4), np.random.rand(5, 4)],
[rng.random((5, 4)), rng.random((5, 4))],
n_tests=1,
rng=TestProdOp.rng,
)
def test_infer_shape(self):
rng = np.random.default_rng(43)
x = dmatrix()
y = dmatrix()
self._compile_and_check(
[x, y],
[self.op_class()(x, y)],
[np.random.rand(5, 6), np.random.rand(5, 6)],
[rng.random(5, 6), rng.random((5, 6))],
self.op_class,
)
class TestSumDiffOp(utt.InferShapeTester):
rng = np.random.RandomState(43)
def setup_method(self):
super().setup_method()
self.op_class = SumDiffOp
def test_perform(self):
rng = np.random.RandomState(43)
x = matrix()
y = matrix()
f = aesara.function([x, y], self.op_class()(x, y))
x_val = np.random.rand(5, 4)
y_val = np.random.rand(5, 4)
x_val = rng.random((5, 4))
y_val = rng.random((5, 4))
out = f(x_val, y_val)
assert np.allclose([x_val + y_val, x_val - y_val], out)
def test_gradient(self):
rng = np.random.RandomState(43)
def output_0(x, y):
return self.op_class()(x, y)[0]
......@@ -143,18 +143,20 @@ class TestSumDiffOp(utt.InferShapeTester):
utt.verify_grad(
output_0,
[np.random.rand(5, 4), np.random.rand(5, 4)],
[rng.random((5, 4)), rng.random((5, 4))],
n_tests=1,
rng=TestSumDiffOp.rng,
)
utt.verify_grad(
output_1,
[np.random.rand(5, 4), np.random.rand(5, 4)],
[rng.random((5, 4)), rng.random((5, 4))],
n_tests=1,
rng=TestSumDiffOp.rng,
)
def test_infer_shape(self):
rng = np.random.RandomState(43)
x = dmatrix()
y = dmatrix()
......@@ -163,7 +165,7 @@ class TestSumDiffOp(utt.InferShapeTester):
self._compile_and_check(
[x, y],
self.op_class()(x, y),
[np.random.rand(5, 6), np.random.rand(5, 6)],
[rng.random((5, 6)), rng.random((5, 6))],
self.op_class,
)
......
......@@ -97,12 +97,23 @@ Example:
.. code-block:: python
import numpy as np
import aesara.tensor as at
def test_dot_validity():
a = at.dmatrix('a')
b = at.dmatrix('b')
c = at.dot(a, b)
f = aesara.function([a, b], [c])
assert np.array_equal(f(self.avals, self.bvals), numpy.dot(self.avals, self.bvals))
c_fn = aesara.function([a, b], [c])
avals = ...
bvals = ...
res = c_fn(avals, bvals)
exp_res = np.dot(self.avals, self.bvals)
assert np.array_equal(res, exp_res)
Creating an :class:`Op` Unit Test
......@@ -117,16 +128,16 @@ unit tests for Aesara :class:`Op`\s.
Validating the Gradient
-----------------------
The :func:`verify_grad` function can be used to validate that the :meth:`Op.grad`
The :func:`aesara.gradient.verify_grad` function can be used to validate that the :meth:`Op.grad`
method of your :class:`Op` is properly implemented. :func:`verify_grad` is based
on the Finite Difference Method where the derivative of function ``f``
at point ``x`` is approximated as:
on the Finite Difference Method where the derivative of function :math:`f`
at point :math:`x` is approximated as:
.. math::
\frac{\partial{f}}{\partial{x}} = lim_{\Delta \rightarrow 0} \frac {f(x+\Delta) - f(x-\Delta)} {2\Delta}
``verify_grad`` performs the following steps:
:func:`verify_grad` performs the following steps:
* approximates the gradient numerically using the Finite Difference Method
......@@ -142,7 +153,7 @@ Here is the prototype for the :func:`verify_grad` function.
def verify_grad(fun, pt, n_tests=2, rng=None, eps=1.0e-7, abs_tol=0.0001, rel_tol=0.0001):
:func:`verify_grad` raises an ``Exception`` if the difference between the analytic gradient and
:func:`verify_grad` raises an :class:`Exception` if the difference between the analytic gradient and
numerical gradient (computed through the Finite Difference Method) of a random
projection of the fun's output to a scalar exceeds both the given absolute and
relative tolerances.
......@@ -152,15 +163,15 @@ The parameters are as follows:
* ``fun``: a Python function that takes Aesara variables as inputs,
and returns an Aesara variable.
For instance, an :class:`Op` instance with a single output is such a function.
It can also be a Python function that calls an op with some of its
It can also be a Python function that calls an :class:`Op` with some of its
inputs being fixed to specific values, or that combine multiple :class:`Op`\s.
* ``pt``: the list of numpy.ndarrays to use as input values
* ``pt``: the list of `np.ndarrays` to use as input values
* ``n_tests``: number of times to run the test
* ``rng``: random number generator used to generate a random vector u,
we check the gradient of sum(u*fn) at pt
* ``rng``: random number generator used to generate a random vector `u`,
we check the gradient of ``sum(u*fn)`` at ``pt``
* ``eps``: stepsize used in the Finite Difference Method
......@@ -176,12 +187,12 @@ symbolic variable:
def test_verify_exprgrad():
def fun(x,y,z):
return (x + tensor.cos(y)) / (4 * z)**2
return (x + at.cos(y)) / (4 * z)**2
x_val = numpy.asarray([[1], [1.1], [1.2]])
y_val = numpy.asarray([0.1, 0.2])
z_val = numpy.asarray(2)
rng = numpy.random.RandomState(42)
x_val = np.asarray([[1], [1.1], [1.2]])
y_val = np.asarray([0.1, 0.2])
z_val = np.asarray(2)
rng = np.random.default_rng(42)
aesara.gradient.verify_grad(fun, [x_val, y_val, z_val], rng=rng)
......@@ -190,11 +201,13 @@ Here is an example showing how to use :func:`verify_grad` on an :class:`Op` inst
.. testcode::
def test_flatten_outdimNone():
# Testing gradient w.r.t. all inputs of an op (in this example the op
# being used is Flatten(), which takes a single input).
a_val = numpy.asarray([[0,1,2],[3,4,5]], dtype='float64')
rng = numpy.random.RandomState(42)
aesara.gradient.verify_grad(tensor.Flatten(), [a_val], rng=rng)
"""
Testing gradient w.r.t. all inputs of an `Op` (in this example the `Op`
being used is `Flatten`, which takes a single input).
"""
a_val = np.asarray([[0,1,2],[3,4,5]], dtype='float64')
rng = np.random.default_rng(42)
aesara.gradient.verify_grad(at.Flatten(), [a_val], rng=rng)
Here is another example, showing how to verify the gradient w.r.t. a subset of
an :class:`Op`'s inputs. This is useful in particular when the gradient w.r.t. some of
......@@ -204,29 +217,30 @@ which would cause :func:`verify_grad` to crash.
.. testcode::
def test_crossentropy_softmax_grad():
op = tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias
op = at.nnet.crossentropy_softmax_argmax_1hot_with_bias
def op_with_fixed_y_idx(x, b):
# Input `y_idx` of this Op takes integer values, so we fix them
# Input `y_idx` of this `Op` takes integer values, so we fix them
# to some constant array.
# Although this op has multiple outputs, we can return only one.
# Although this `Op` has multiple outputs, we can return only one.
# Here, we return the first output only.
return op(x, b, y_idx=numpy.asarray([0, 2]))[0]
return op(x, b, y_idx=np.asarray([0, 2]))[0]
x_val = numpy.asarray([[-1, 0, 1], [3, 2, 1]], dtype='float64')
b_val = numpy.asarray([1, 2, 3], dtype='float64')
rng = numpy.random.RandomState(42)
x_val = np.asarray([[-1, 0, 1], [3, 2, 1]], dtype='float64')
b_val = np.asarray([1, 2, 3], dtype='float64')
rng = np.random.default_rng(42)
aesara.gradient.verify_grad(op_with_fixed_y_idx, [x_val, b_val], rng=rng)
.. note::
Although ``verify_grad`` is defined in ``aesara.tensor.basic``, unittests
should use the version of ``verify_grad`` defined in ``tests.unittest_tools``.
Although :func:`verify_grad` is defined in :mod:`aesara.gradient`, unittests
should use the version of :func:`verify_grad` defined in :mod:`tests.unittest_tools`.
This is simply a wrapper function which takes care of seeding the random
number generator appropriately before calling ``aesara.gradient.verify_grad``
number generator appropriately before calling :func:`aesara.gradient.verify_grad`
makeTester and makeBroadcastTester
==================================
:func:`makeTester` and :func:`makeBroadcastTester`
==================================================
Most :class:`Op` unittests perform the same function. All such tests must
verify that the :class:`Op` generates the proper output, that the gradient is
......@@ -244,21 +258,23 @@ product :class:`Op`:
from tests.tensor.utils import makeTester
rng = np.random.default_rng(23098)
TestDot = makeTester(
name="DotTester",
op=np.dot,
expected=lambda x, y: numpy.dot(x, y),
expected=lambda x, y: np.dot(x, y),
checks={},
good=dict(
correct1=(rng.rand(5, 7), rng.rand(7, 5)),
correct2=(rng.rand(5, 7), rng.rand(7, 9)),
correct3=(rng.rand(5, 7), rng.rand(7)),
correct1=(rng.random((5, 7)), rng.random((7, 5))),
correct2=(rng.random((5, 7)), rng.random((7, 9))),
correct3=(rng.random((5, 7)), rng.random((7,))),
),
bad_build=dict(),
bad_runtime=dict(
bad1=(rng.rand(5, 7), rng.rand(5, 7)), bad2=(rng.rand(5, 7), rng.rand(8, 3))
bad1=(rng.random((5, 7)), rng.random((5, 7))),
bad2=(rng.random((5, 7)), rng.random((8, 3)))
),
grad=dict(),
)
......
......@@ -14,37 +14,36 @@ Guide
=====
The NanGuardMode aims to prevent the model from outputting NaNs or Infs. It has
a number of self-checks, which can help to find out which apply node is
generating those incorrect outputs. It provides automatic detection of 3 types
The :class:`NanGuardMode` aims to prevent the model from outputting NaNs or Infs. It has
a number of self-checks, which can help to find out which :class:`Apply` node is
generating those incorrect outputs. It provides automatic detection of three types
of abnormal values: NaNs, Infs, and abnormally big values.
NanGuardMode can be used as follows:
`NanGuardMode` can be used as follows:
.. testcode::
import numpy
import numpy as np
import aesara
import aesara.tensor as at
from aesara.compile.nanguardmode import NanGuardMode
x = at.matrix()
w = aesara.shared(numpy.random.randn(5, 7).astype(aesara.config.floatX))
w = aesara.shared(np.random.standard_normal((5, 7)).astype(aesara.config.floatX))
y = at.dot(x, w)
fun = aesara.function(
[x], y,
mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
)
While using the aesara function ``fun``, it will monitor the values of each
While using the Aesara function ``fun``, it will monitor the values of each
input and output variable of each node. When abnormal values are
detected, it raises an error to indicate which node yields the NaNs. For
example, if we pass the following values to ``fun``:
.. testcode::
infa = numpy.tile(
(numpy.asarray(100.) ** 1000000).astype(aesara.config.floatX), (3, 5))
infa = np.tile((np.asarray(100.) ** 1000000).astype(aesara.config.floatX), (3, 5))
fun(infa)
.. testoutput::
......@@ -55,17 +54,17 @@ example, if we pass the following values to ``fun``:
...
AssertionError: ...
It will raise an AssertionError indicating that Inf value is detected while
It will raise an `AssertionError` indicating that Inf value is detected while
executing the function.
You can also set the three parameters in ``NanGuardMode()`` to indicate which
You can also set the three parameters in `NanGuardMode` to indicate which
kind of abnormal values to monitor. ``nan_is_error`` and ``inf_is_error`` has
no default values, so they need to be set explicitly, but ``big_is_error`` is
set to be ``True`` by default.
.. note::
NanGuardMode significantly slows down computations; only
`NanGuardMode` significantly slows down computations; only
enable as needed.
Reference
......
......@@ -797,8 +797,7 @@ import ``aesara`` and print the config variable, as in:
Aesara will execute the graph using constants and/or shared variables
provided by the user. Purely symbolic variables (e.g. ``x =
aesara.tensor.dmatrix()``) can be augmented with test values, by writing to
their ``tag.test_value`` attribute (e.g. ``x.tag.test_value =
numpy.random.rand(5, 4)``).
their ``.tag.test_value`` attributes (e.g. ``x.tag.test_value = np.ones((5, 4))``).
When not ``'off'``, the value of this option dictates what happens when
an :class:`Op`'s inputs do not provide appropriate test values:
......
......@@ -65,8 +65,8 @@ BUT, YOU GOTTA RUN THIS CODE AND MAKE SURE IT STILL WORKS NICELY, HEY?
up_fn, app_fn = build_logistic_regression_model(n_in=10, n_out=3, l2_coef=30.0)
x_data = numpy.random.randn(100, 10)
y_data = numpy.random.randn(100, 3)
x_data = numpy.random.standard_normal((100, 10))
y_data = numpy.random.standard_normal((100, 3))
y_data = _asarray(y_data == numpy.max(y_data, axis=1), dtype='int64')
print "Model Training ..."
......
......@@ -11,31 +11,36 @@ Note that you want SciPy >= 0.7.2
.. warning::
In SciPy 0.6, ``scipy.csc_matrix.dot`` has a bug with singleton
In SciPy 0.6, `scipy.csc_matrix.dot` has a bug with singleton
dimensions. There may be more bugs. It also has inconsistent
implementation of sparse matrices.
We do not test against SciPy versions below 0.7.2.
We describe the details of the compressed sparse matrix types.
``scipy.sparse.csc_matrix``
should be used if there are more rows than column (shape[0] > shape[1]).
``scipy.sparse.csr_matrix``
should be used if there are more columns than rows (shape[0] < shape[1]).
``scipy.sparse.lil_matrix``
`scipy.sparse.csc_matrix`
should be used if there are more rows than column (``shape[0] > shape[1]``).
`scipy.sparse.csr_matrix`
should be used if there are more columns than rows (``shape[0] < shape[1]``).
`scipy.sparse.lil_matrix`
is faster if we are modifying the array. After initial inserts,
we can then convert to the appropriate sparse matrix format.
The following types also exist:
``dok_matrix``
`dok_matrix`
Dictionary of Keys format. From their doc: This is an efficient structure for constructing sparse matrices incrementally.
``coo_matrix``
`coo_matrix`
Coordinate format. From their lil doc: consider using the COO format when constructing large matrices.
There seems to be a new format planned for scipy 0.7.x:
``bsr_matrix``
Block Compressed Row (BSR). From their doc: The Block Compressed Row (BSR) format is very similar to the Compressed Sparse Row (CSR) format. BSR is appropriate for sparse matrices with dense sub matrices like the last example below. Block matrices often arise in vector-valued finite element discretizations. In such cases, BSR is considerably more efficient than CSR and CSC for many sparse arithmetic operations.
``dia_matrix``
There seems to be a new format planned for SciPy 0.7.x:
`bsr_matrix`
Block Compressed Row (BSR). From their doc: The Block Compressed Row
(BSR) format is very similar to the Compressed Sparse Row (CSR)
format. BSR is appropriate for sparse matrices with dense sub matrices
like the last example below. Block matrices often arise in vector-valued
finite element discretizations. In such cases, BSR is considerably more
efficient than CSR and CSC for many sparse arithmetic operations.
`dia_matrix`
Sparse matrix with DIAgonal storage
There are four member variables that comprise a compressed matrix ``sp`` (for at least csc, csr and bsr):
......@@ -52,9 +57,9 @@ There are four member variables that comprise a compressed matrix ``sp`` (for at
row location.
``sp.indptr``
gives the other location of the non-zero entry. For CSC, there are
as many values of indptr as there are columns + 1 in the matrix.
as many values of indptr as there are ``columns + 1`` in the matrix.
``sp.indptr[k] = x`` and ``indptr[k+1] = y`` means that column
k contains sp.data[x:y], i.e. the xth through the y-1th non-zero values.
``k`` contains ``sp.data[x:y]``, i.e. the ``x``-th through the y-1th non-zero values.
See the example below for details.
......@@ -63,7 +68,7 @@ See the example below for details.
>>> import scipy.sparse
>>> sp = scipy.sparse.csc_matrix((5, 10))
>>> sp[4, 0] = 20
/u/lisa/local/byhost/test_maggie46.iro.umontreal.ca/lib64/python2.5/site-packages/scipy/sparse/compressed.py:494: SparseEfficiencyWarning: changing the sparsity structure of a csc_matrix is expensive. lil_matrix is more efficient.
SparseEfficiencyWarning: changing the sparsity structure of a csc_matrix is expensive. lil_matrix is more efficient.
SparseEfficiencyWarning)
>>> sp[0, 0] = 10
>>> sp[2, 3] = 30
......@@ -91,13 +96,13 @@ Several things should be learned from the above example:
* We actually use the wrong sparse matrix type. In fact, it is the
*rows* that are sparse, not the columns. So, it would have been
better to use ``sp = scipy.sparse.csr_matrix((5, 10))``.
* We should have actually created the matrix as a ``lil_matrix``,
* We should have actually created the matrix as a `lil_matrix`,
which is more efficient for inserts. Afterwards, we should convert
to the appropriate compressed format.
* `sp.indptr[0] = 0` and `sp.indptr[1] = 2`, which means that
column 0 contains sp.data[0:2], i.e. the first two non-zero values.
* `sp.indptr[3] = 2` and `sp.indptr[4] = 3`, which means that column
3 contains sp.data[2:3], i.e. the third non-zero value.
* ``sp.indptr[0] = 0`` and ``sp.indptr[1] = 2``, which means that
column 0 contains ``sp.data[0:2]``, i.e. the first two non-zero values.
* ``sp.indptr[3] = 2`` and ``sp.indptr[4] = 3``, which means that column
three contains ``sp.data[2:3]``, i.e. the third non-zero value.
TODO: Rewrite this documentation to do things in a smarter way.
......@@ -112,7 +117,7 @@ For faster sparse code:
Misc
----
The sparse equivalent of dmatrix is csc_matrix and csr_matrix.
The sparse equivalent of `dmatrix` is `csc_matrix` and `csr_matrix`.
:class:`~aesara.sparse.basic.Dot` vs. :class:`~aesara.sparse.basic.StructuredDot`
---------------------------------------------------------------------------------
......@@ -121,22 +126,22 @@ Often when you use a sparse matrix it is because there is a meaning to the
structure of non-zeros. The gradient on terms outside that structure
has no meaning, so it is computationally efficient not to compute them.
StructuredDot is when you want the gradient to have zeroes corresponding to
`StructuredDot` is when you want the gradient to have zeroes corresponding to
the sparse entries in the matrix.
TrueDot and Structured dot have different gradients
`TrueDot` and `Structured` dot have different gradients
but their perform functions should be the same.
The gradient of TrueDot can have non-zeros where the sparse matrix had zeros.
The gradient of StructuredDot can't.
The gradient of `TrueDot` can have non-zeros where the sparse matrix had zeros.
The gradient of `StructuredDot` can't.
Suppose you have ``dot(x,w)`` where ``x`` and ``w`` are square matrices.
If ``w`` is dense, like ``randn((5,5))`` and ``x`` is of full rank (though
potentially sparse, like a diagonal matrix of 1s) then the output will
be dense too. (But i guess the density of the output is a red herring.)
If ``w`` is dense, like ``standard_normal((5,5))`` and ``x`` is of full rank (though
potentially sparse, like a diagonal matrix of ones) then the output will
be dense too.
What's important is the density of the gradient on the output.
If the gradient on the output is dense, and ``w`` is dense (as we said it was)
then the True gradient on ``x`` will be dense.
If our dot is a TrueDot, then it will say that the gradient on ``x`` is dense.
If our dot is a StructuredDot, then it will say the gradient on ``x`` is only
then the ``True`` gradient on ``x`` will be dense.
If our dot is a `TrueDot`, then it will say that the gradient on ``x`` is dense.
If our dot is a `StructuredDot`, then it will say the gradient on ``x`` is only
defined on the diagonal and ignore the gradients on the off-diagonal.
差异被折叠。
差异被折叠。
......@@ -9,11 +9,11 @@ Configuration Settings and Compiling Modes
Configuration
=============
The ``config`` module contains several *attributes* that modify Aesara's behavior. Many of these
attributes are examined during the import of the ``aesara`` module and several are assumed to be
The :mod:`aesara.config` module contains several *attributes* that modify Aesara's behavior. Many of these
attributes are examined during the import of the :mod:`aesara` module and several are assumed to be
read-only.
*As a rule, the attributes in the* ``config`` *module should not be modified inside the user code.*
*As a rule, the attributes in the* :mod:`aesara.config` *module should not be modified inside the user code.*
Aesara's code comes with default values for these attributes, but you can
override them from your ``.aesararc`` file, and override those values in turn by
......@@ -21,12 +21,12 @@ the :envvar:`AESARA_FLAGS` environment variable.
The order of precedence is:
1. an assignment to aesara.config.<property>
1. an assignment to ``aesara.config.<property>``
2. an assignment in :envvar:`AESARA_FLAGS`
3. an assignment in the .aesararc file (or the file indicated in :envvar:`AESARARC`)
3. an assignment in the ``.aesararc`` file (or the file indicated in :envvar:`AESARARC`)
You can display the current/effective configuration at any time by printing
aesara.config. For example, to see a list of all active configuration
`aesara.config`. For example, to see a list of all active configuration
variables, type this from the command-line:
.. code-block:: bash
......@@ -45,22 +45,24 @@ Consider the logistic regression:
.. testcode::
import numpy
import numpy as np
import aesara
import aesara.tensor as at
rng = numpy.random
rng = np.random.default_rng(2498)
N = 400
feats = 784
D = (rng.randn(N, feats).astype(aesara.config.floatX),
rng.randint(size=N,low=0, high=2).astype(aesara.config.floatX))
D = (rng.standard_normal((N, feats)).astype(aesara.config.floatX),
rng.integers(size=N,low=0, high=2).astype(aesara.config.floatX))
training_steps = 10000
# Declare Aesara symbolic variables
x = at.matrix("x")
y = at.vector("y")
w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w")
b = aesara.shared(numpy.asarray(0., dtype=aesara.config.floatX), name="b")
w = aesara.shared(rng.standard_normal(feats).astype(aesara.config.floatX), name="w")
b = aesara.shared(np.asarray(0., dtype=aesara.config.floatX), name="b")
x.tag.test_value = D[0]
y.tag.test_value = D[1]
......@@ -73,15 +75,18 @@ Consider the logistic regression:
# Compile expressions to functions
train = aesara.function(
inputs=[x,y],
outputs=[prediction, xent],
updates=[(w, w-0.01*gw), (b, b-0.01*gb)],
name = "train")
predict = aesara.function(inputs=[x], outputs=prediction,
name = "predict")
if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in
train.maker.fgraph.toposort()]):
inputs=[x,y],
outputs=[prediction, xent],
updates=[(w, w-0.01*gw), (b, b-0.01*gb)],
name = "train"
)
predict = aesara.function(
inputs=[x], outputs=prediction,
name = "predict"
)
if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm']
for x in train.maker.fgraph.toposort()]):
print('Used the cpu')
else:
print('ERROR, not able to tell if aesara used the cpu or another device')
......@@ -106,7 +111,7 @@ Consider the logistic regression:
prediction on D
...
Modify and execute this example to run on CPU (the default) with floatX=float32 and
Modify and execute this example to run on CPU (the default) with ``floatX=float32`` and
time the execution using the command line ``time python file.py``. Save your code
as it will be useful later on.
......@@ -114,10 +119,10 @@ as it will be useful later on.
* Apply the Aesara flag ``floatX=float32`` (through ``aesara.config.floatX``) in your code.
* Cast inputs before storing them into a shared variable.
* Circumvent the automatic cast of *int32* with *float32* to *float64*:
* Circumvent the automatic cast of int32 with float32 to float64:
* Insert manual cast in your code or use *[u]int{8,16}*.
* Insert manual cast around the mean operator (this involves division by length, which is an *int64*).
* Insert manual cast in your code or use [u]int{8,16}.
* Insert manual cast around the mean operator (this involves division by length, which is an int64).
* Note that a new casting mechanism is being developed.
:download:`Solution<modes_solution_1.py>`
......@@ -156,7 +161,7 @@ short name Full constructor
.. Note::
For debugging purpose, there also exists a ``MonitorMode`` (which has no
For debugging purpose, there also exists a :class:`MonitorMode` (which has no
short name). It can be used to step through the execution of a function:
see :ref:`the debugging FAQ<faq_monitormode>` for details.
......@@ -165,8 +170,8 @@ Linkers
=======
A mode is composed of 2 things: an optimizer and a linker. Some modes,
like ``NanGuardMode`` and ``DebugMode``, add logic around the
optimizer and linker. ``DebugMode`` uses its own linker.
like `NanGuardMode` and `DebugMode`, add logic around the
optimizer and linker. `DebugMode` uses its own linker.
You can select which linker to use with the Aesara flag :attr:`config.linker`.
Here is a table to compare the different linkers.
......@@ -233,8 +238,8 @@ Using DebugMode
While normally you should use the ``FAST_RUN`` or ``FAST_COMPILE`` mode,
it is useful at first (especially when you are defining new kinds of
expressions or new optimizations) to run your code using the DebugMode
(available via ``mode='DebugMode``). The DebugMode is designed to
expressions or new optimizations) to run your code using the `DebugMode`
(available via ``mode='DebugMode``). The `DebugMode` is designed to
run several self-checks and assertions that can help diagnose
possible programming errors leading to incorrect output. Note that
``DebugMode`` is much slower than ``FAST_RUN`` or ``FAST_COMPILE`` so
......@@ -245,7 +250,7 @@ cluster!).
.. If you modify this code, also change :
.. tests/test_tutorial.py:T_modes.test_modes_1
DebugMode is used as follows:
`DebugMode` is used as follows:
.. testcode::
......@@ -258,21 +263,21 @@ DebugMode is used as follows:
f([7])
If any problem is detected, DebugMode will raise an exception according to
what went wrong, either at call time (*f(5)*) or compile time (
If any problem is detected, `DebugMode` will raise an exception according to
what went wrong, either at call time (e.g. ``f(5)``) or compile time (
``f = aesara.function(x, 10 * x, mode='DebugMode')``). These exceptions
should *not* be ignored; talk to your local Aesara guru or email the
users list if you cannot make the exception go away.
Some kinds of errors can only be detected for certain input value combinations.
In the example above, there is no way to guarantee that a future call to, say
*f(-1)*, won't cause a problem. DebugMode is not a silver bullet.
``f(-1)``, won't cause a problem. `DebugMode` is not a silver bullet.
.. TODO: repair the following link
If you instantiate DebugMode using the constructor (see :class:`DebugMode`)
rather than the keyword ``DebugMode`` you can configure its behaviour via
constructor arguments. The keyword version of DebugMode (which you get by using ``mode='DebugMode'``)
If you instantiate `DebugMode` using the constructor (see :class:`DebugMode`)
rather than the keyword `DebugMode` you can configure its behaviour via
constructor arguments. The keyword version of `DebugMode` (which you get by using ``mode='DebugMode'``)
is quite strict.
For more detail, see :ref:`DebugMode<debugmode>` in the library.
......@@ -2,59 +2,62 @@
# Aesara tutorial
# Solution to Exercise in section 'Configuration Settings and Compiling Modes'
import numpy as np
import aesara
import aesara.tensor as at
aesara.config.floatX = 'float32'
rng = np.random
aesara.config.floatX = "float32"
rng = np.random.default_rng(428)
N = 400
feats = 784
D = (rng.randn(N, feats).astype(aesara.config.floatX),
rng.randint(size=N, low=0, high=2).astype(aesara.config.floatX))
D = (
rng.standard_normal((N, feats)).astype(aesara.config.floatX),
rng.integers(size=N, low=0, high=2).astype(aesara.config.floatX),
)
training_steps = 10000
# Declare Aesara symbolic variables
x = at.matrix("x")
y = at.vector("y")
w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w")
b = aesara.shared(np.asarray(0., dtype=aesara.config.floatX), name="b")
w = aesara.shared(rng.standard_normal(feats).astype(aesara.config.floatX), name="w")
b = aesara.shared(np.asarray(0.0, dtype=aesara.config.floatX), name="b")
x.tag.test_value = D[0]
y.tag.test_value = D[1]
#print "Initial model:"
#print w.get_value(), b.get_value()
# print "Initial model:"
# print w.get_value(), b.get_value()
# Construct Aesara expression graph
p_1 = 1 / (1 + at.exp(-at.dot(x, w) - b)) # Probability of having a one
prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
xent = -y * at.log(p_1) - (1 - y) * at.log(1 - p_1) # Cross-entropy
cost = at.cast(xent.mean(), 'float32') + \
0.01 * (w ** 2).sum() # The cost to optimize
cost = at.cast(xent.mean(), "float32") + 0.01 * (w**2).sum() # The cost to optimize
gw, gb = at.grad(cost, [w, b])
# Compile expressions to functions
train = aesara.function(
inputs=[x, y],
outputs=[prediction, xent],
updates={w: w - 0.01 * gw, b: b - 0.01 * gb},
name="train")
predict = aesara.function(inputs=[x], outputs=prediction,
name="predict")
if any(x.op.__class__.__name__ in ('Gemv', 'CGemv', 'Gemm', 'CGemm') for x in
train.maker.fgraph.toposort()):
print('Used the cpu')
inputs=[x, y],
outputs=[prediction, xent],
updates={w: w - 0.01 * gw, b: b - 0.01 * gb},
name="train",
)
predict = aesara.function(inputs=[x], outputs=prediction, name="predict")
if any(
x.op.__class__.__name__ in ("Gemv", "CGemv", "Gemm", "CGemm")
for x in train.maker.fgraph.toposort()
):
print("Used the cpu")
else:
print('ERROR, not able to tell if aesara used the cpu or another device')
print("ERROR, not able to tell if aesara used the cpu or another device")
print(train.maker.fgraph.toposort())
for i in range(training_steps):
pred, err = train(D[0], D[1])
#print "Final model:"
#print w.get_value(), b.get_value()
# print "Final model:"
# print w.get_value(), b.get_value()
print("target values for D")
print(D[1])
......
......@@ -25,20 +25,20 @@ that creates an image of the function. You can read about them in
Consider again the logistic regression example:
>>> import numpy
>>> import numpy as np
>>> import aesara
>>> import aesara.tensor as at
>>> rng = numpy.random
>>> rng = np.random.default_rng(2382)
>>> # Training data
>>> N = 400
>>> feats = 784
>>> D = (rng.randn(N, feats).astype(aesara.config.floatX), rng.randint(size=N,low=0, high=2).astype(aesara.config.floatX))
>>> D = (rng.standard_normal(N, feats).astype(aesara.config.floatX), rng.integers(size=N,low=0, high=2).astype(aesara.config.floatX))
>>> training_steps = 10000
>>> # Declare Aesara symbolic variables
>>> x = at.matrix("x")
>>> y = at.vector("y")
>>> w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w")
>>> b = aesara.shared(numpy.asarray(0., dtype=aesara.config.floatX), name="b")
>>> w = aesara.shared(rng.standard_normal(feats).astype(aesara.config.floatX), name="w")
>>> b = aesara.shared(np.asarray(0., dtype=aesara.config.floatX), name="b")
>>> x.tag.test_value = D[0]
>>> y.tag.test_value = D[1]
>>> # Construct Aesara expression graph
......
import numpy as np
import aesara
x, y, z = aesara.tensor.vectors('xyz')
x, y, z = aesara.tensor.vectors("xyz")
f = aesara.function([x, y, z], [(x + y + z) * 2])
xv = np.random.rand(10).astype(aesara.config.floatX)
yv = np.random.rand(10).astype(aesara.config.floatX)
zv = np.random.rand(10).astype(aesara.config.floatX)
xv = np.random.random((10,)).astype(aesara.config.floatX)
yv = np.random.random((10,)).astype(aesara.config.floatX)
zv = np.random.random((10,)).astype(aesara.config.floatX)
f(xv, yv, zv)
......@@ -49,7 +49,7 @@ upgrade. Here is the current state of what can be done:
aesara.tensor.nnet.conv2d(..., image_shape=(7, 3, 5, 5), filter_shape=(2, 3, 4, 4))
- You can use the ``SpecifyShape`` op to add shape information anywhere in the
- You can use the :class:`SpecifyShape`\ :class:`Op` to add shape information anywhere in the
graph. This allows to perform some optimizations. In the following example,
this makes it possible to precompute the Aesara function to a constant.
......@@ -67,13 +67,13 @@ Problems with Shape inference
Sometimes this can lead to errors. Consider this example:
>>> import numpy
>>> import numpy as np
>>> import aesara
>>> x = aesara.tensor.matrix('x')
>>> y = aesara.tensor.matrix('y')
>>> z = aesara.tensor.join(0, x, y)
>>> xv = numpy.random.rand(5, 4)
>>> yv = numpy.random.rand(3, 3)
>>> xv = np.random.random((5, 4))
>>> yv = np.random.random((3, 3))
>>> f = aesara.function([x, y], z.shape)
>>> aesara.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
......@@ -109,7 +109,7 @@ This makes the computation of the shape faster, but it can also hide errors. In
this example, the computation of the shape of the output of ``join`` is done only
based on the first input Aesara variable, which leads to an error.
This might happen with other ops such as ``elemwise`` and ``dot``, for example.
This might happen with other `Op`\s such as :class:`Elemwise` and :class:`Dot`, for example.
Indeed, to perform some optimizations (for speed or stability, for instance),
Aesara assumes that the computation is correct and consistent
in the first place, as it does here.
......@@ -118,5 +118,5 @@ You can detect those problems by running the code without this
optimization, using the Aesara flag
``optimizer_excluding=local_shape_to_shape_i``. You can also obtain the
same effect by running in the modes ``FAST_COMPILE`` (it will not apply this
optimization, nor most other optimizations) or ``DebugMode`` (it will test
before and after all optimizations (much slower)).
optimization, nor most other optimizations) or :class:`DebugMode` (it will test
before and after all optimizations).
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论