提交 e40c1b29 authored 作者: Brandon T. Willard's avatar Brandon T. Willard 提交者: Brandon T. Willard

Documentation formatting and NumPy usage updates

上级 8b7446e0
...@@ -146,7 +146,7 @@ class OpFromGraph(Op, HasInnerGraph): ...@@ -146,7 +146,7 @@ class OpFromGraph(Op, HasInnerGraph):
from aesara.compile.builders import OpFromGraph from aesara.compile.builders import OpFromGraph
x, y, z = at.scalars('xyz') x, y, z = at.scalars('xyz')
s = aesara.shared(np.random.rand(2, 2).astype(config.floatX)) s = aesara.shared(np.random.random((2, 2)).astype(config.floatX))
e = x + y * z + s e = x + y * z + s
op = OpFromGraph([x, y, z], [e]) op = OpFromGraph([x, y, z], [e])
# op behaves like a normal aesara op # op behaves like a normal aesara op
......
...@@ -5,6 +5,7 @@ import time ...@@ -5,6 +5,7 @@ import time
import warnings import warnings
from collections import OrderedDict from collections import OrderedDict
from functools import partial, reduce from functools import partial, reduce
from typing import TYPE_CHECKING, Callable, List, Optional, Union
import numpy as np import numpy as np
...@@ -18,6 +19,10 @@ from aesara.graph.op import get_test_values ...@@ -18,6 +19,10 @@ from aesara.graph.op import get_test_values
from aesara.graph.type import Type from aesara.graph.type import Type
if TYPE_CHECKING:
from aesara.compile.mode import Mode
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
_logger = logging.getLogger("aesara.gradient") _logger = logging.getLogger("aesara.gradient")
...@@ -684,8 +689,8 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False): ...@@ -684,8 +689,8 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
.. code-block:: python .. code-block:: python
x, t = aesara.tensor.fvector('x'), aesara.tensor.fvector('t') x, t = aesara.tensor.fvector('x'), aesara.tensor.fvector('t')
w1 = aesara.shared(np.random.randn(3,4)) w1 = aesara.shared(np.random.standard_normal((3,4)))
w2 = aesara.shared(np.random.randn(4,2)) w2 = aesara.shared(np.random.standard_normal((4,2)))
a1 = aesara.tensor.tanh(aesara.tensor.dot(x,w1)) a1 = aesara.tensor.tanh(aesara.tensor.dot(x,w1))
a2 = aesara.tensor.tanh(aesara.tensor.dot(a1,w2)) a2 = aesara.tensor.tanh(aesara.tensor.dot(a1,w2))
cost2 = aesara.tensor.sqr(a2 - t).sum() cost2 = aesara.tensor.sqr(a2 - t).sum()
...@@ -1690,17 +1695,17 @@ def mode_not_slow(mode): ...@@ -1690,17 +1695,17 @@ def mode_not_slow(mode):
def verify_grad( def verify_grad(
fun, fun: Callable,
pt, pt: List[np.ndarray],
n_tests=2, n_tests: int = 2,
rng=None, rng: Optional[Union[np.random.Generator, np.random.RandomState]] = None,
eps=None, eps: Optional[float] = None,
out_type=None, out_type: Optional[str] = None,
abs_tol=None, abs_tol: Optional[float] = None,
rel_tol=None, rel_tol: Optional[float] = None,
mode=None, mode: Optional[Union["Mode", str]] = None,
cast_to_output_type=False, cast_to_output_type: bool = False,
no_debug_ref=True, no_debug_ref: bool = True,
): ):
"""Test a gradient by Finite Difference Method. Raise error on failure. """Test a gradient by Finite Difference Method. Raise error on failure.
...@@ -1713,47 +1718,47 @@ def verify_grad( ...@@ -1713,47 +1718,47 @@ def verify_grad(
-------- --------
>>> verify_grad(aesara.tensor.tanh, >>> verify_grad(aesara.tensor.tanh,
... (np.asarray([[2, 3, 4], [-1, 3.3, 9.9]]),), ... (np.asarray([[2, 3, 4], [-1, 3.3, 9.9]]),),
... rng=np.random) ... rng=np.random.default_rng(23098))
Parameters Parameters
---------- ----------
fun : a Python function fun
`fun` takes Aesara variables as inputs, and returns an Aesara variable. `fun` takes Aesara variables as inputs, and returns an Aesara variable.
For instance, an Op instance with a single output. For instance, an `Op` instance with a single output.
pt : list of numpy.ndarrays pt
Input values, points where the gradient is estimated. Input values, points where the gradient is estimated.
These arrays must be either float16, float32, or float64 arrays. These arrays must be either float16, float32, or float64 arrays.
n_tests : int n_tests
Number of times to run the test Number o to run the test.
rng : numpy.random.RandomState rng
Random number generator used to sample the output random projection `u`, Random number generator used to sample the output random projection `u`,
we test gradient of sum(u * fun) at `pt` we test gradient of ``sum(u * fun)`` at `pt`.
eps : float, optional eps
Step size used in the Finite Difference Method (Default Step size used in the Finite Difference Method (Default
None is type-dependent). ``None`` is type-dependent).
Raising the value of eps can raise or lower the absolute Raising the value of `eps` can raise or lower the absolute
and relative errors of the verification depending on the and relative errors of the verification depending on the
Op. Raising eps does not lower the verification quality for `Op`. Raising `eps` does not lower the verification quality for
linear operations. It is better to raise `eps` than raising linear operations. It is better to raise `eps` than raising
`abs_tol` or `rel_tol`. `abs_tol` or `rel_tol`.
out_type : string out_type
Dtype of output, if complex (i.e., 'complex32' or 'complex64') Dtype of output, if complex (i.e., ``'complex32'`` or ``'complex64'``)
abs_tol : float abs_tol
Absolute tolerance used as threshold for gradient comparison Absolute tolerance used as threshold for gradient comparison
rel_tol : float rel_tol
Relative tolerance used as threshold for gradient comparison Relative tolerance used as threshold for gradient comparison
cast_to_output_type : bool cast_to_output_type
If the output is float32 and cast_to_output_type is True, cast If the output is float32 and `cast_to_output_type` is ``True``, cast
the random projection to float32. Otherwise it is float64. the random projection to float32; otherwise, it is float64.
float16 is not handled here. float16 is not handled here.
no_debug_ref : bool no_debug_ref
Don't use DebugMode for the numerical gradient function. Don't use `DebugMode` for the numerical gradient function.
Notes Notes
----- -----
This function does not support multiple outputs. In This function does not support multiple outputs. In `tests.scan.test_basic`
tests/scan/test_basic.py there is an experimental `verify_grad` that covers there is an experimental `verify_grad` that covers that case as well by
that case as well by using random projections. using random projections.
""" """
from aesara.compile.function import function from aesara.compile.function import function
......
...@@ -404,13 +404,14 @@ You can try the new :class:`Op` as follows: ...@@ -404,13 +404,14 @@ You can try the new :class:`Op` as follows:
.. testcode:: example .. testcode:: example
import numpy as np
import aesara import aesara
x = aesara.tensor.matrix() x = aesara.tensor.matrix()
f = aesara.function([x], DoubleOp1()(x)) f = aesara.function([x], DoubleOp1()(x))
import numpy inp = np.random.random_sample((5, 4))
inp = numpy.random.rand(5, 4)
out = f(inp) out = f(inp)
assert numpy.allclose(inp * 2, out) assert np.allclose(inp * 2, out)
print(inp) print(inp)
print(out) print(out)
...@@ -435,13 +436,14 @@ You can try the new :class:`Op` as follows: ...@@ -435,13 +436,14 @@ You can try the new :class:`Op` as follows:
.. testcode:: example .. testcode:: example
import numpy as np
import aesara import aesara
x = aesara.tensor.matrix() x = aesara.tensor.matrix()
f = aesara.function([x], DoubleOp2()(x)) f = aesara.function([x], DoubleOp2()(x))
import numpy inp = np.random.random_sample((5, 4))
inp = numpy.random.rand(5, 4)
out = f(inp) out = f(inp)
assert numpy.allclose(inp * 2, out) assert np.allclose(inp * 2, out)
print(inp) print(inp)
print(out) print(out)
...@@ -530,10 +532,9 @@ We can test this by running the following segment: ...@@ -530,10 +532,9 @@ We can test this by running the following segment:
f = aesara.function([x], mult4plus5op(x)) f = aesara.function([x], mult4plus5op(x))
g = aesara.function([x], mult2plus3op(x)) g = aesara.function([x], mult2plus3op(x))
import numpy inp = np.random.random_sample((5, 4)).astype(np.float32)
inp = numpy.random.rand(5, 4).astype(numpy.float32) assert np.allclose(4 * inp + 5, f(inp))
assert numpy.allclose(4 * inp + 5, f(inp)) assert np.allclose(2 * inp + 3, g(inp))
assert numpy.allclose(2 * inp + 3, g(inp))
How To Test it How To Test it
...@@ -553,11 +554,11 @@ returns the right answer. If you detect an error, you must raise an ...@@ -553,11 +554,11 @@ returns the right answer. If you detect an error, you must raise an
.. testcode:: tests .. testcode:: tests
import numpy import numpy as np
import aesara import aesara
from tests import unittest_tools as utt from tests import unittest_tools as utt
from aesara.configdefaults import config
class TestDouble(utt.InferShapeTester): class TestDouble(utt.InferShapeTester):
def setup_method(self): def setup_method(self):
super().setup_method() super().setup_method()
...@@ -565,9 +566,12 @@ returns the right answer. If you detect an error, you must raise an ...@@ -565,9 +566,12 @@ returns the right answer. If you detect an error, you must raise an
self.op = DoubleOp() self.op = DoubleOp()
def test_basic(self): def test_basic(self):
rng = np.random.default_rng(utt.fetch_seed())
x = aesara.tensor.matrix() x = aesara.tensor.matrix()
f = aesara.function([x], self.op(x)) f = aesara.function([x], self.op(x))
inp = numpy.asarray(numpy.random.rand(5, 4), dtype=config.floatX)
inp = np.asarray(rng.random((5, 4)), dtype=aesara.config.floatX)
out = f(inp) out = f(inp)
# Compare the result computed to the expected value. # Compare the result computed to the expected value.
utt.assert_allclose(inp * 2, out) utt.assert_allclose(inp * 2, out)
...@@ -612,20 +616,26 @@ your :class:`Op` works only with such matrices, you can disable the warning with ...@@ -612,20 +616,26 @@ your :class:`Op` works only with such matrices, you can disable the warning with
.. testcode:: tests .. testcode:: tests
from tests import unittest_tools as utt
from aesara.configdefaults import config from aesara.configdefaults import config
from tests import unittest_tools as utt
class TestDouble(utt.InferShapeTester): class TestDouble(utt.InferShapeTester):
# [...] as previous tests. # [...] as previous tests.
def test_infer_shape(self): def test_infer_shape(self):
rng = np.random.default_rng(utt.fetch_seed())
x = aesara.tensor.matrix() x = aesara.tensor.matrix()
self._compile_and_check([x], # aesara.function inputs self._compile_and_check(
[self.op(x)], # aesara.function outputs [x], # aesara.function inputs
# Always use not square matrix! [self.op(x)], # aesara.function outputs
# inputs data # Always use not square matrix!
[numpy.asarray(numpy.random.rand(5, 4), # inputs data
dtype=config.floatX)], [np.asarray(rng.random((5, 4)), dtype=config.floatX)],
# Op that should be removed from the graph. # Op that should be removed from the graph.
self.op_class) self.op_class,
)
Testing the gradient Testing the gradient
^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^
...@@ -642,8 +652,11 @@ the multiplication by 2). ...@@ -642,8 +652,11 @@ the multiplication by 2).
.. testcode:: tests .. testcode:: tests
def test_grad(self): def test_grad(self):
tests.unittest_tools.verify_grad(self.op, rng = np.random.default_rng(utt.fetch_seed())
[numpy.random.rand(5, 7, 2)]) tests.unittest_tools.verify_grad(
self.op,
[rng.random((5, 7, 2))]
)
Testing the Rop Testing the Rop
^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^
...@@ -778,40 +791,34 @@ signature: ...@@ -778,40 +791,34 @@ signature:
.. testcode:: asop .. testcode:: asop
import aesara import aesara
import numpy import aesara.tensor as at
import numpy as np
from aesara import function from aesara import function
from aesara.compile.ops import as_op from aesara.compile.ops import as_op
def infer_shape_numpy_dot(fgraph, node, input_shapes): def infer_shape_numpy_dot(fgraph, node, input_shapes):
ashp, bshp = input_shapes ashp, bshp = input_shapes
return [ashp[:-1] + bshp[-1:]] return [ashp[:-1] + bshp[-1:]]
@as_op(itypes=[aesara.tensor.fmatrix, aesara.tensor.fmatrix],
otypes=[aesara.tensor.fmatrix], infer_shape=infer_shape_numpy_dot) @as_op(itypes=[at.matrix, at.matrix],
otypes=[at.matrix], infer_shape=infer_shape_numpy_dot)
def numpy_dot(a, b): def numpy_dot(a, b):
return numpy.dot(a, b) return np.dot(a, b)
You can try it as follows: You can try it as follows:
.. testcode:: asop .. testcode:: asop
x = aesara.tensor.fmatrix() x = at.matrix()
y = aesara.tensor.fmatrix() y = at.matrix()
f = function([x, y], numpy_dot(x, y)) f = function([x, y], numpy_dot(x, y))
inp1 = numpy.random.rand(5, 4).astype('float32') inp1 = np.random.random_sample((5, 4))
inp2 = numpy.random.rand(4, 7).astype('float32') inp2 = np.random.random_sample((4, 7))
out = f(inp1, inp2) out = f(inp1, inp2)
Exercise
^^^^^^^^
Run the code of the ``numpy_dot`` example above.
Modify and execute to compute: ``numpy.add`` and ``numpy.subtract``.
Modify and execute the example to return two outputs: ``x + y`` and ``x - y``.
.. _Documentation: .. _Documentation:
Documentation and Coding Style Documentation and Coding Style
...@@ -822,7 +829,7 @@ will not be accepted. ...@@ -822,7 +829,7 @@ will not be accepted.
:class:`NanGuardMode` and :class:`AllocEmpty` :class:`NanGuardMode` and :class:`AllocEmpty`
--------------------------------------------- ---------------------------------------------
:class:`NanGuardMode` help users find where in the graph NaN appear. But :class:`NanGuardMode` help users find where in the graph ``NaN`` appear. But
sometimes, we want some variables to not be checked. For example, in sometimes, we want some variables to not be checked. For example, in
the old GPU back-end, we used a float32 :class:`CudaNdarray` to store the MRG the old GPU back-end, we used a float32 :class:`CudaNdarray` to store the MRG
random number generator state (they are integers). So if :class:`NanGuardMode` random number generator state (they are integers). So if :class:`NanGuardMode`
......
...@@ -81,60 +81,60 @@ from aesara.tensor.type import dmatrix, matrix ...@@ -81,60 +81,60 @@ from aesara.tensor.type import dmatrix, matrix
class TestProdOp(utt.InferShapeTester): class TestProdOp(utt.InferShapeTester):
rng = np.random.RandomState(43)
def setup_method(self): def setup_method(self):
super().setup_method() super().setup_method()
self.op_class = ProdOp # case 1 self.op_class = ProdOp # case 1
def test_perform(self): def test_perform(self):
rng = np.random.default_rng(43)
x = matrix() x = matrix()
y = matrix() y = matrix()
f = aesara.function([x, y], self.op_class()(x, y)) f = aesara.function([x, y], self.op_class()(x, y))
x_val = np.random.rand(5, 4) x_val = rng.random((5, 4))
y_val = np.random.rand(5, 4) y_val = rng.random((5, 4))
out = f(x_val, y_val) out = f(x_val, y_val)
assert np.allclose(x_val * y_val, out) assert np.allclose(x_val * y_val, out)
def test_gradient(self): def test_gradient(self):
rng = np.random.default_rng(43)
utt.verify_grad( utt.verify_grad(
self.op_class(), self.op_class(),
[np.random.rand(5, 4), np.random.rand(5, 4)], [rng.random((5, 4)), rng.random((5, 4))],
n_tests=1, n_tests=1,
rng=TestProdOp.rng, rng=TestProdOp.rng,
) )
def test_infer_shape(self): def test_infer_shape(self):
rng = np.random.default_rng(43)
x = dmatrix() x = dmatrix()
y = dmatrix() y = dmatrix()
self._compile_and_check( self._compile_and_check(
[x, y], [x, y],
[self.op_class()(x, y)], [self.op_class()(x, y)],
[np.random.rand(5, 6), np.random.rand(5, 6)], [rng.random(5, 6), rng.random((5, 6))],
self.op_class, self.op_class,
) )
class TestSumDiffOp(utt.InferShapeTester): class TestSumDiffOp(utt.InferShapeTester):
rng = np.random.RandomState(43)
def setup_method(self): def setup_method(self):
super().setup_method() super().setup_method()
self.op_class = SumDiffOp self.op_class = SumDiffOp
def test_perform(self): def test_perform(self):
rng = np.random.RandomState(43)
x = matrix() x = matrix()
y = matrix() y = matrix()
f = aesara.function([x, y], self.op_class()(x, y)) f = aesara.function([x, y], self.op_class()(x, y))
x_val = np.random.rand(5, 4) x_val = rng.random((5, 4))
y_val = np.random.rand(5, 4) y_val = rng.random((5, 4))
out = f(x_val, y_val) out = f(x_val, y_val)
assert np.allclose([x_val + y_val, x_val - y_val], out) assert np.allclose([x_val + y_val, x_val - y_val], out)
def test_gradient(self): def test_gradient(self):
rng = np.random.RandomState(43)
def output_0(x, y): def output_0(x, y):
return self.op_class()(x, y)[0] return self.op_class()(x, y)[0]
...@@ -143,18 +143,20 @@ class TestSumDiffOp(utt.InferShapeTester): ...@@ -143,18 +143,20 @@ class TestSumDiffOp(utt.InferShapeTester):
utt.verify_grad( utt.verify_grad(
output_0, output_0,
[np.random.rand(5, 4), np.random.rand(5, 4)], [rng.random((5, 4)), rng.random((5, 4))],
n_tests=1, n_tests=1,
rng=TestSumDiffOp.rng, rng=TestSumDiffOp.rng,
) )
utt.verify_grad( utt.verify_grad(
output_1, output_1,
[np.random.rand(5, 4), np.random.rand(5, 4)], [rng.random((5, 4)), rng.random((5, 4))],
n_tests=1, n_tests=1,
rng=TestSumDiffOp.rng, rng=TestSumDiffOp.rng,
) )
def test_infer_shape(self): def test_infer_shape(self):
rng = np.random.RandomState(43)
x = dmatrix() x = dmatrix()
y = dmatrix() y = dmatrix()
...@@ -163,7 +165,7 @@ class TestSumDiffOp(utt.InferShapeTester): ...@@ -163,7 +165,7 @@ class TestSumDiffOp(utt.InferShapeTester):
self._compile_and_check( self._compile_and_check(
[x, y], [x, y],
self.op_class()(x, y), self.op_class()(x, y),
[np.random.rand(5, 6), np.random.rand(5, 6)], [rng.random((5, 6)), rng.random((5, 6))],
self.op_class, self.op_class,
) )
......
...@@ -97,12 +97,23 @@ Example: ...@@ -97,12 +97,23 @@ Example:
.. code-block:: python .. code-block:: python
import numpy as np
import aesara.tensor as at
def test_dot_validity(): def test_dot_validity():
a = at.dmatrix('a') a = at.dmatrix('a')
b = at.dmatrix('b') b = at.dmatrix('b')
c = at.dot(a, b) c = at.dot(a, b)
f = aesara.function([a, b], [c])
assert np.array_equal(f(self.avals, self.bvals), numpy.dot(self.avals, self.bvals)) c_fn = aesara.function([a, b], [c])
avals = ...
bvals = ...
res = c_fn(avals, bvals)
exp_res = np.dot(self.avals, self.bvals)
assert np.array_equal(res, exp_res)
Creating an :class:`Op` Unit Test Creating an :class:`Op` Unit Test
...@@ -117,16 +128,16 @@ unit tests for Aesara :class:`Op`\s. ...@@ -117,16 +128,16 @@ unit tests for Aesara :class:`Op`\s.
Validating the Gradient Validating the Gradient
----------------------- -----------------------
The :func:`verify_grad` function can be used to validate that the :meth:`Op.grad` The :func:`aesara.gradient.verify_grad` function can be used to validate that the :meth:`Op.grad`
method of your :class:`Op` is properly implemented. :func:`verify_grad` is based method of your :class:`Op` is properly implemented. :func:`verify_grad` is based
on the Finite Difference Method where the derivative of function ``f`` on the Finite Difference Method where the derivative of function :math:`f`
at point ``x`` is approximated as: at point :math:`x` is approximated as:
.. math:: .. math::
\frac{\partial{f}}{\partial{x}} = lim_{\Delta \rightarrow 0} \frac {f(x+\Delta) - f(x-\Delta)} {2\Delta} \frac{\partial{f}}{\partial{x}} = lim_{\Delta \rightarrow 0} \frac {f(x+\Delta) - f(x-\Delta)} {2\Delta}
``verify_grad`` performs the following steps: :func:`verify_grad` performs the following steps:
* approximates the gradient numerically using the Finite Difference Method * approximates the gradient numerically using the Finite Difference Method
...@@ -142,7 +153,7 @@ Here is the prototype for the :func:`verify_grad` function. ...@@ -142,7 +153,7 @@ Here is the prototype for the :func:`verify_grad` function.
def verify_grad(fun, pt, n_tests=2, rng=None, eps=1.0e-7, abs_tol=0.0001, rel_tol=0.0001): def verify_grad(fun, pt, n_tests=2, rng=None, eps=1.0e-7, abs_tol=0.0001, rel_tol=0.0001):
:func:`verify_grad` raises an ``Exception`` if the difference between the analytic gradient and :func:`verify_grad` raises an :class:`Exception` if the difference between the analytic gradient and
numerical gradient (computed through the Finite Difference Method) of a random numerical gradient (computed through the Finite Difference Method) of a random
projection of the fun's output to a scalar exceeds both the given absolute and projection of the fun's output to a scalar exceeds both the given absolute and
relative tolerances. relative tolerances.
...@@ -152,15 +163,15 @@ The parameters are as follows: ...@@ -152,15 +163,15 @@ The parameters are as follows:
* ``fun``: a Python function that takes Aesara variables as inputs, * ``fun``: a Python function that takes Aesara variables as inputs,
and returns an Aesara variable. and returns an Aesara variable.
For instance, an :class:`Op` instance with a single output is such a function. For instance, an :class:`Op` instance with a single output is such a function.
It can also be a Python function that calls an op with some of its It can also be a Python function that calls an :class:`Op` with some of its
inputs being fixed to specific values, or that combine multiple :class:`Op`\s. inputs being fixed to specific values, or that combine multiple :class:`Op`\s.
* ``pt``: the list of numpy.ndarrays to use as input values * ``pt``: the list of `np.ndarrays` to use as input values
* ``n_tests``: number of times to run the test * ``n_tests``: number of times to run the test
* ``rng``: random number generator used to generate a random vector u, * ``rng``: random number generator used to generate a random vector `u`,
we check the gradient of sum(u*fn) at pt we check the gradient of ``sum(u*fn)`` at ``pt``
* ``eps``: stepsize used in the Finite Difference Method * ``eps``: stepsize used in the Finite Difference Method
...@@ -176,12 +187,12 @@ symbolic variable: ...@@ -176,12 +187,12 @@ symbolic variable:
def test_verify_exprgrad(): def test_verify_exprgrad():
def fun(x,y,z): def fun(x,y,z):
return (x + tensor.cos(y)) / (4 * z)**2 return (x + at.cos(y)) / (4 * z)**2
x_val = numpy.asarray([[1], [1.1], [1.2]]) x_val = np.asarray([[1], [1.1], [1.2]])
y_val = numpy.asarray([0.1, 0.2]) y_val = np.asarray([0.1, 0.2])
z_val = numpy.asarray(2) z_val = np.asarray(2)
rng = numpy.random.RandomState(42) rng = np.random.default_rng(42)
aesara.gradient.verify_grad(fun, [x_val, y_val, z_val], rng=rng) aesara.gradient.verify_grad(fun, [x_val, y_val, z_val], rng=rng)
...@@ -190,11 +201,13 @@ Here is an example showing how to use :func:`verify_grad` on an :class:`Op` inst ...@@ -190,11 +201,13 @@ Here is an example showing how to use :func:`verify_grad` on an :class:`Op` inst
.. testcode:: .. testcode::
def test_flatten_outdimNone(): def test_flatten_outdimNone():
# Testing gradient w.r.t. all inputs of an op (in this example the op """
# being used is Flatten(), which takes a single input). Testing gradient w.r.t. all inputs of an `Op` (in this example the `Op`
a_val = numpy.asarray([[0,1,2],[3,4,5]], dtype='float64') being used is `Flatten`, which takes a single input).
rng = numpy.random.RandomState(42) """
aesara.gradient.verify_grad(tensor.Flatten(), [a_val], rng=rng) a_val = np.asarray([[0,1,2],[3,4,5]], dtype='float64')
rng = np.random.default_rng(42)
aesara.gradient.verify_grad(at.Flatten(), [a_val], rng=rng)
Here is another example, showing how to verify the gradient w.r.t. a subset of Here is another example, showing how to verify the gradient w.r.t. a subset of
an :class:`Op`'s inputs. This is useful in particular when the gradient w.r.t. some of an :class:`Op`'s inputs. This is useful in particular when the gradient w.r.t. some of
...@@ -204,29 +217,30 @@ which would cause :func:`verify_grad` to crash. ...@@ -204,29 +217,30 @@ which would cause :func:`verify_grad` to crash.
.. testcode:: .. testcode::
def test_crossentropy_softmax_grad(): def test_crossentropy_softmax_grad():
op = tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias op = at.nnet.crossentropy_softmax_argmax_1hot_with_bias
def op_with_fixed_y_idx(x, b): def op_with_fixed_y_idx(x, b):
# Input `y_idx` of this Op takes integer values, so we fix them # Input `y_idx` of this `Op` takes integer values, so we fix them
# to some constant array. # to some constant array.
# Although this op has multiple outputs, we can return only one. # Although this `Op` has multiple outputs, we can return only one.
# Here, we return the first output only. # Here, we return the first output only.
return op(x, b, y_idx=numpy.asarray([0, 2]))[0] return op(x, b, y_idx=np.asarray([0, 2]))[0]
x_val = numpy.asarray([[-1, 0, 1], [3, 2, 1]], dtype='float64') x_val = np.asarray([[-1, 0, 1], [3, 2, 1]], dtype='float64')
b_val = numpy.asarray([1, 2, 3], dtype='float64') b_val = np.asarray([1, 2, 3], dtype='float64')
rng = numpy.random.RandomState(42) rng = np.random.default_rng(42)
aesara.gradient.verify_grad(op_with_fixed_y_idx, [x_val, b_val], rng=rng) aesara.gradient.verify_grad(op_with_fixed_y_idx, [x_val, b_val], rng=rng)
.. note:: .. note::
Although ``verify_grad`` is defined in ``aesara.tensor.basic``, unittests Although :func:`verify_grad` is defined in :mod:`aesara.gradient`, unittests
should use the version of ``verify_grad`` defined in ``tests.unittest_tools``. should use the version of :func:`verify_grad` defined in :mod:`tests.unittest_tools`.
This is simply a wrapper function which takes care of seeding the random This is simply a wrapper function which takes care of seeding the random
number generator appropriately before calling ``aesara.gradient.verify_grad`` number generator appropriately before calling :func:`aesara.gradient.verify_grad`
makeTester and makeBroadcastTester :func:`makeTester` and :func:`makeBroadcastTester`
================================== ==================================================
Most :class:`Op` unittests perform the same function. All such tests must Most :class:`Op` unittests perform the same function. All such tests must
verify that the :class:`Op` generates the proper output, that the gradient is verify that the :class:`Op` generates the proper output, that the gradient is
...@@ -244,21 +258,23 @@ product :class:`Op`: ...@@ -244,21 +258,23 @@ product :class:`Op`:
from tests.tensor.utils import makeTester from tests.tensor.utils import makeTester
rng = np.random.default_rng(23098) rng = np.random.default_rng(23098)
TestDot = makeTester( TestDot = makeTester(
name="DotTester", name="DotTester",
op=np.dot, op=np.dot,
expected=lambda x, y: numpy.dot(x, y), expected=lambda x, y: np.dot(x, y),
checks={}, checks={},
good=dict( good=dict(
correct1=(rng.rand(5, 7), rng.rand(7, 5)), correct1=(rng.random((5, 7)), rng.random((7, 5))),
correct2=(rng.rand(5, 7), rng.rand(7, 9)), correct2=(rng.random((5, 7)), rng.random((7, 9))),
correct3=(rng.rand(5, 7), rng.rand(7)), correct3=(rng.random((5, 7)), rng.random((7,))),
), ),
bad_build=dict(), bad_build=dict(),
bad_runtime=dict( bad_runtime=dict(
bad1=(rng.rand(5, 7), rng.rand(5, 7)), bad2=(rng.rand(5, 7), rng.rand(8, 3)) bad1=(rng.random((5, 7)), rng.random((5, 7))),
bad2=(rng.random((5, 7)), rng.random((8, 3)))
), ),
grad=dict(), grad=dict(),
) )
......
...@@ -14,37 +14,36 @@ Guide ...@@ -14,37 +14,36 @@ Guide
===== =====
The NanGuardMode aims to prevent the model from outputting NaNs or Infs. It has The :class:`NanGuardMode` aims to prevent the model from outputting NaNs or Infs. It has
a number of self-checks, which can help to find out which apply node is a number of self-checks, which can help to find out which :class:`Apply` node is
generating those incorrect outputs. It provides automatic detection of 3 types generating those incorrect outputs. It provides automatic detection of three types
of abnormal values: NaNs, Infs, and abnormally big values. of abnormal values: NaNs, Infs, and abnormally big values.
NanGuardMode can be used as follows: `NanGuardMode` can be used as follows:
.. testcode:: .. testcode::
import numpy import numpy as np
import aesara import aesara
import aesara.tensor as at import aesara.tensor as at
from aesara.compile.nanguardmode import NanGuardMode from aesara.compile.nanguardmode import NanGuardMode
x = at.matrix() x = at.matrix()
w = aesara.shared(numpy.random.randn(5, 7).astype(aesara.config.floatX)) w = aesara.shared(np.random.standard_normal((5, 7)).astype(aesara.config.floatX))
y = at.dot(x, w) y = at.dot(x, w)
fun = aesara.function( fun = aesara.function(
[x], y, [x], y,
mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
) )
While using the aesara function ``fun``, it will monitor the values of each While using the Aesara function ``fun``, it will monitor the values of each
input and output variable of each node. When abnormal values are input and output variable of each node. When abnormal values are
detected, it raises an error to indicate which node yields the NaNs. For detected, it raises an error to indicate which node yields the NaNs. For
example, if we pass the following values to ``fun``: example, if we pass the following values to ``fun``:
.. testcode:: .. testcode::
infa = numpy.tile( infa = np.tile((np.asarray(100.) ** 1000000).astype(aesara.config.floatX), (3, 5))
(numpy.asarray(100.) ** 1000000).astype(aesara.config.floatX), (3, 5))
fun(infa) fun(infa)
.. testoutput:: .. testoutput::
...@@ -55,17 +54,17 @@ example, if we pass the following values to ``fun``: ...@@ -55,17 +54,17 @@ example, if we pass the following values to ``fun``:
... ...
AssertionError: ... AssertionError: ...
It will raise an AssertionError indicating that Inf value is detected while It will raise an `AssertionError` indicating that Inf value is detected while
executing the function. executing the function.
You can also set the three parameters in ``NanGuardMode()`` to indicate which You can also set the three parameters in `NanGuardMode` to indicate which
kind of abnormal values to monitor. ``nan_is_error`` and ``inf_is_error`` has kind of abnormal values to monitor. ``nan_is_error`` and ``inf_is_error`` has
no default values, so they need to be set explicitly, but ``big_is_error`` is no default values, so they need to be set explicitly, but ``big_is_error`` is
set to be ``True`` by default. set to be ``True`` by default.
.. note:: .. note::
NanGuardMode significantly slows down computations; only `NanGuardMode` significantly slows down computations; only
enable as needed. enable as needed.
Reference Reference
......
...@@ -797,8 +797,7 @@ import ``aesara`` and print the config variable, as in: ...@@ -797,8 +797,7 @@ import ``aesara`` and print the config variable, as in:
Aesara will execute the graph using constants and/or shared variables Aesara will execute the graph using constants and/or shared variables
provided by the user. Purely symbolic variables (e.g. ``x = provided by the user. Purely symbolic variables (e.g. ``x =
aesara.tensor.dmatrix()``) can be augmented with test values, by writing to aesara.tensor.dmatrix()``) can be augmented with test values, by writing to
their ``tag.test_value`` attribute (e.g. ``x.tag.test_value = their ``.tag.test_value`` attributes (e.g. ``x.tag.test_value = np.ones((5, 4))``).
numpy.random.rand(5, 4)``).
When not ``'off'``, the value of this option dictates what happens when When not ``'off'``, the value of this option dictates what happens when
an :class:`Op`'s inputs do not provide appropriate test values: an :class:`Op`'s inputs do not provide appropriate test values:
......
...@@ -65,8 +65,8 @@ BUT, YOU GOTTA RUN THIS CODE AND MAKE SURE IT STILL WORKS NICELY, HEY? ...@@ -65,8 +65,8 @@ BUT, YOU GOTTA RUN THIS CODE AND MAKE SURE IT STILL WORKS NICELY, HEY?
up_fn, app_fn = build_logistic_regression_model(n_in=10, n_out=3, l2_coef=30.0) up_fn, app_fn = build_logistic_regression_model(n_in=10, n_out=3, l2_coef=30.0)
x_data = numpy.random.randn(100, 10) x_data = numpy.random.standard_normal((100, 10))
y_data = numpy.random.randn(100, 3) y_data = numpy.random.standard_normal((100, 3))
y_data = _asarray(y_data == numpy.max(y_data, axis=1), dtype='int64') y_data = _asarray(y_data == numpy.max(y_data, axis=1), dtype='int64')
print "Model Training ..." print "Model Training ..."
......
...@@ -11,31 +11,36 @@ Note that you want SciPy >= 0.7.2 ...@@ -11,31 +11,36 @@ Note that you want SciPy >= 0.7.2
.. warning:: .. warning::
In SciPy 0.6, ``scipy.csc_matrix.dot`` has a bug with singleton In SciPy 0.6, `scipy.csc_matrix.dot` has a bug with singleton
dimensions. There may be more bugs. It also has inconsistent dimensions. There may be more bugs. It also has inconsistent
implementation of sparse matrices. implementation of sparse matrices.
We do not test against SciPy versions below 0.7.2. We do not test against SciPy versions below 0.7.2.
We describe the details of the compressed sparse matrix types. We describe the details of the compressed sparse matrix types.
``scipy.sparse.csc_matrix`` `scipy.sparse.csc_matrix`
should be used if there are more rows than column (shape[0] > shape[1]). should be used if there are more rows than column (``shape[0] > shape[1]``).
``scipy.sparse.csr_matrix`` `scipy.sparse.csr_matrix`
should be used if there are more columns than rows (shape[0] < shape[1]). should be used if there are more columns than rows (``shape[0] < shape[1]``).
``scipy.sparse.lil_matrix`` `scipy.sparse.lil_matrix`
is faster if we are modifying the array. After initial inserts, is faster if we are modifying the array. After initial inserts,
we can then convert to the appropriate sparse matrix format. we can then convert to the appropriate sparse matrix format.
The following types also exist: The following types also exist:
``dok_matrix`` `dok_matrix`
Dictionary of Keys format. From their doc: This is an efficient structure for constructing sparse matrices incrementally. Dictionary of Keys format. From their doc: This is an efficient structure for constructing sparse matrices incrementally.
``coo_matrix`` `coo_matrix`
Coordinate format. From their lil doc: consider using the COO format when constructing large matrices. Coordinate format. From their lil doc: consider using the COO format when constructing large matrices.
There seems to be a new format planned for scipy 0.7.x: There seems to be a new format planned for SciPy 0.7.x:
``bsr_matrix`` `bsr_matrix`
Block Compressed Row (BSR). From their doc: The Block Compressed Row (BSR) format is very similar to the Compressed Sparse Row (CSR) format. BSR is appropriate for sparse matrices with dense sub matrices like the last example below. Block matrices often arise in vector-valued finite element discretizations. In such cases, BSR is considerably more efficient than CSR and CSC for many sparse arithmetic operations. Block Compressed Row (BSR). From their doc: The Block Compressed Row
``dia_matrix`` (BSR) format is very similar to the Compressed Sparse Row (CSR)
format. BSR is appropriate for sparse matrices with dense sub matrices
like the last example below. Block matrices often arise in vector-valued
finite element discretizations. In such cases, BSR is considerably more
efficient than CSR and CSC for many sparse arithmetic operations.
`dia_matrix`
Sparse matrix with DIAgonal storage Sparse matrix with DIAgonal storage
There are four member variables that comprise a compressed matrix ``sp`` (for at least csc, csr and bsr): There are four member variables that comprise a compressed matrix ``sp`` (for at least csc, csr and bsr):
...@@ -52,9 +57,9 @@ There are four member variables that comprise a compressed matrix ``sp`` (for at ...@@ -52,9 +57,9 @@ There are four member variables that comprise a compressed matrix ``sp`` (for at
row location. row location.
``sp.indptr`` ``sp.indptr``
gives the other location of the non-zero entry. For CSC, there are gives the other location of the non-zero entry. For CSC, there are
as many values of indptr as there are columns + 1 in the matrix. as many values of indptr as there are ``columns + 1`` in the matrix.
``sp.indptr[k] = x`` and ``indptr[k+1] = y`` means that column ``sp.indptr[k] = x`` and ``indptr[k+1] = y`` means that column
k contains sp.data[x:y], i.e. the xth through the y-1th non-zero values. ``k`` contains ``sp.data[x:y]``, i.e. the ``x``-th through the y-1th non-zero values.
See the example below for details. See the example below for details.
...@@ -63,7 +68,7 @@ See the example below for details. ...@@ -63,7 +68,7 @@ See the example below for details.
>>> import scipy.sparse >>> import scipy.sparse
>>> sp = scipy.sparse.csc_matrix((5, 10)) >>> sp = scipy.sparse.csc_matrix((5, 10))
>>> sp[4, 0] = 20 >>> sp[4, 0] = 20
/u/lisa/local/byhost/test_maggie46.iro.umontreal.ca/lib64/python2.5/site-packages/scipy/sparse/compressed.py:494: SparseEfficiencyWarning: changing the sparsity structure of a csc_matrix is expensive. lil_matrix is more efficient. SparseEfficiencyWarning: changing the sparsity structure of a csc_matrix is expensive. lil_matrix is more efficient.
SparseEfficiencyWarning) SparseEfficiencyWarning)
>>> sp[0, 0] = 10 >>> sp[0, 0] = 10
>>> sp[2, 3] = 30 >>> sp[2, 3] = 30
...@@ -91,13 +96,13 @@ Several things should be learned from the above example: ...@@ -91,13 +96,13 @@ Several things should be learned from the above example:
* We actually use the wrong sparse matrix type. In fact, it is the * We actually use the wrong sparse matrix type. In fact, it is the
*rows* that are sparse, not the columns. So, it would have been *rows* that are sparse, not the columns. So, it would have been
better to use ``sp = scipy.sparse.csr_matrix((5, 10))``. better to use ``sp = scipy.sparse.csr_matrix((5, 10))``.
* We should have actually created the matrix as a ``lil_matrix``, * We should have actually created the matrix as a `lil_matrix`,
which is more efficient for inserts. Afterwards, we should convert which is more efficient for inserts. Afterwards, we should convert
to the appropriate compressed format. to the appropriate compressed format.
* `sp.indptr[0] = 0` and `sp.indptr[1] = 2`, which means that * ``sp.indptr[0] = 0`` and ``sp.indptr[1] = 2``, which means that
column 0 contains sp.data[0:2], i.e. the first two non-zero values. column 0 contains ``sp.data[0:2]``, i.e. the first two non-zero values.
* `sp.indptr[3] = 2` and `sp.indptr[4] = 3`, which means that column * ``sp.indptr[3] = 2`` and ``sp.indptr[4] = 3``, which means that column
3 contains sp.data[2:3], i.e. the third non-zero value. three contains ``sp.data[2:3]``, i.e. the third non-zero value.
TODO: Rewrite this documentation to do things in a smarter way. TODO: Rewrite this documentation to do things in a smarter way.
...@@ -112,7 +117,7 @@ For faster sparse code: ...@@ -112,7 +117,7 @@ For faster sparse code:
Misc Misc
---- ----
The sparse equivalent of dmatrix is csc_matrix and csr_matrix. The sparse equivalent of `dmatrix` is `csc_matrix` and `csr_matrix`.
:class:`~aesara.sparse.basic.Dot` vs. :class:`~aesara.sparse.basic.StructuredDot` :class:`~aesara.sparse.basic.Dot` vs. :class:`~aesara.sparse.basic.StructuredDot`
--------------------------------------------------------------------------------- ---------------------------------------------------------------------------------
...@@ -121,22 +126,22 @@ Often when you use a sparse matrix it is because there is a meaning to the ...@@ -121,22 +126,22 @@ Often when you use a sparse matrix it is because there is a meaning to the
structure of non-zeros. The gradient on terms outside that structure structure of non-zeros. The gradient on terms outside that structure
has no meaning, so it is computationally efficient not to compute them. has no meaning, so it is computationally efficient not to compute them.
StructuredDot is when you want the gradient to have zeroes corresponding to `StructuredDot` is when you want the gradient to have zeroes corresponding to
the sparse entries in the matrix. the sparse entries in the matrix.
TrueDot and Structured dot have different gradients `TrueDot` and `Structured` dot have different gradients
but their perform functions should be the same. but their perform functions should be the same.
The gradient of TrueDot can have non-zeros where the sparse matrix had zeros. The gradient of `TrueDot` can have non-zeros where the sparse matrix had zeros.
The gradient of StructuredDot can't. The gradient of `StructuredDot` can't.
Suppose you have ``dot(x,w)`` where ``x`` and ``w`` are square matrices. Suppose you have ``dot(x,w)`` where ``x`` and ``w`` are square matrices.
If ``w`` is dense, like ``randn((5,5))`` and ``x`` is of full rank (though If ``w`` is dense, like ``standard_normal((5,5))`` and ``x`` is of full rank (though
potentially sparse, like a diagonal matrix of 1s) then the output will potentially sparse, like a diagonal matrix of ones) then the output will
be dense too. (But i guess the density of the output is a red herring.) be dense too.
What's important is the density of the gradient on the output. What's important is the density of the gradient on the output.
If the gradient on the output is dense, and ``w`` is dense (as we said it was) If the gradient on the output is dense, and ``w`` is dense (as we said it was)
then the True gradient on ``x`` will be dense. then the ``True`` gradient on ``x`` will be dense.
If our dot is a TrueDot, then it will say that the gradient on ``x`` is dense. If our dot is a `TrueDot`, then it will say that the gradient on ``x`` is dense.
If our dot is a StructuredDot, then it will say the gradient on ``x`` is only If our dot is a `StructuredDot`, then it will say the gradient on ``x`` is only
defined on the diagonal and ignore the gradients on the off-diagonal. defined on the diagonal and ignore the gradients on the off-diagonal.
差异被折叠。
差异被折叠。
...@@ -9,11 +9,11 @@ Configuration Settings and Compiling Modes ...@@ -9,11 +9,11 @@ Configuration Settings and Compiling Modes
Configuration Configuration
============= =============
The ``config`` module contains several *attributes* that modify Aesara's behavior. Many of these The :mod:`aesara.config` module contains several *attributes* that modify Aesara's behavior. Many of these
attributes are examined during the import of the ``aesara`` module and several are assumed to be attributes are examined during the import of the :mod:`aesara` module and several are assumed to be
read-only. read-only.
*As a rule, the attributes in the* ``config`` *module should not be modified inside the user code.* *As a rule, the attributes in the* :mod:`aesara.config` *module should not be modified inside the user code.*
Aesara's code comes with default values for these attributes, but you can Aesara's code comes with default values for these attributes, but you can
override them from your ``.aesararc`` file, and override those values in turn by override them from your ``.aesararc`` file, and override those values in turn by
...@@ -21,12 +21,12 @@ the :envvar:`AESARA_FLAGS` environment variable. ...@@ -21,12 +21,12 @@ the :envvar:`AESARA_FLAGS` environment variable.
The order of precedence is: The order of precedence is:
1. an assignment to aesara.config.<property> 1. an assignment to ``aesara.config.<property>``
2. an assignment in :envvar:`AESARA_FLAGS` 2. an assignment in :envvar:`AESARA_FLAGS`
3. an assignment in the .aesararc file (or the file indicated in :envvar:`AESARARC`) 3. an assignment in the ``.aesararc`` file (or the file indicated in :envvar:`AESARARC`)
You can display the current/effective configuration at any time by printing You can display the current/effective configuration at any time by printing
aesara.config. For example, to see a list of all active configuration `aesara.config`. For example, to see a list of all active configuration
variables, type this from the command-line: variables, type this from the command-line:
.. code-block:: bash .. code-block:: bash
...@@ -45,22 +45,24 @@ Consider the logistic regression: ...@@ -45,22 +45,24 @@ Consider the logistic regression:
.. testcode:: .. testcode::
import numpy import numpy as np
import aesara import aesara
import aesara.tensor as at import aesara.tensor as at
rng = numpy.random
rng = np.random.default_rng(2498)
N = 400 N = 400
feats = 784 feats = 784
D = (rng.randn(N, feats).astype(aesara.config.floatX), D = (rng.standard_normal((N, feats)).astype(aesara.config.floatX),
rng.randint(size=N,low=0, high=2).astype(aesara.config.floatX)) rng.integers(size=N,low=0, high=2).astype(aesara.config.floatX))
training_steps = 10000 training_steps = 10000
# Declare Aesara symbolic variables # Declare Aesara symbolic variables
x = at.matrix("x") x = at.matrix("x")
y = at.vector("y") y = at.vector("y")
w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w") w = aesara.shared(rng.standard_normal(feats).astype(aesara.config.floatX), name="w")
b = aesara.shared(numpy.asarray(0., dtype=aesara.config.floatX), name="b") b = aesara.shared(np.asarray(0., dtype=aesara.config.floatX), name="b")
x.tag.test_value = D[0] x.tag.test_value = D[0]
y.tag.test_value = D[1] y.tag.test_value = D[1]
...@@ -73,15 +75,18 @@ Consider the logistic regression: ...@@ -73,15 +75,18 @@ Consider the logistic regression:
# Compile expressions to functions # Compile expressions to functions
train = aesara.function( train = aesara.function(
inputs=[x,y], inputs=[x,y],
outputs=[prediction, xent], outputs=[prediction, xent],
updates=[(w, w-0.01*gw), (b, b-0.01*gb)], updates=[(w, w-0.01*gw), (b, b-0.01*gb)],
name = "train") name = "train"
predict = aesara.function(inputs=[x], outputs=prediction, )
name = "predict") predict = aesara.function(
inputs=[x], outputs=prediction,
if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in name = "predict"
train.maker.fgraph.toposort()]): )
if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm']
for x in train.maker.fgraph.toposort()]):
print('Used the cpu') print('Used the cpu')
else: else:
print('ERROR, not able to tell if aesara used the cpu or another device') print('ERROR, not able to tell if aesara used the cpu or another device')
...@@ -106,7 +111,7 @@ Consider the logistic regression: ...@@ -106,7 +111,7 @@ Consider the logistic regression:
prediction on D prediction on D
... ...
Modify and execute this example to run on CPU (the default) with floatX=float32 and Modify and execute this example to run on CPU (the default) with ``floatX=float32`` and
time the execution using the command line ``time python file.py``. Save your code time the execution using the command line ``time python file.py``. Save your code
as it will be useful later on. as it will be useful later on.
...@@ -114,10 +119,10 @@ as it will be useful later on. ...@@ -114,10 +119,10 @@ as it will be useful later on.
* Apply the Aesara flag ``floatX=float32`` (through ``aesara.config.floatX``) in your code. * Apply the Aesara flag ``floatX=float32`` (through ``aesara.config.floatX``) in your code.
* Cast inputs before storing them into a shared variable. * Cast inputs before storing them into a shared variable.
* Circumvent the automatic cast of *int32* with *float32* to *float64*: * Circumvent the automatic cast of int32 with float32 to float64:
* Insert manual cast in your code or use *[u]int{8,16}*. * Insert manual cast in your code or use [u]int{8,16}.
* Insert manual cast around the mean operator (this involves division by length, which is an *int64*). * Insert manual cast around the mean operator (this involves division by length, which is an int64).
* Note that a new casting mechanism is being developed. * Note that a new casting mechanism is being developed.
:download:`Solution<modes_solution_1.py>` :download:`Solution<modes_solution_1.py>`
...@@ -156,7 +161,7 @@ short name Full constructor ...@@ -156,7 +161,7 @@ short name Full constructor
.. Note:: .. Note::
For debugging purpose, there also exists a ``MonitorMode`` (which has no For debugging purpose, there also exists a :class:`MonitorMode` (which has no
short name). It can be used to step through the execution of a function: short name). It can be used to step through the execution of a function:
see :ref:`the debugging FAQ<faq_monitormode>` for details. see :ref:`the debugging FAQ<faq_monitormode>` for details.
...@@ -165,8 +170,8 @@ Linkers ...@@ -165,8 +170,8 @@ Linkers
======= =======
A mode is composed of 2 things: an optimizer and a linker. Some modes, A mode is composed of 2 things: an optimizer and a linker. Some modes,
like ``NanGuardMode`` and ``DebugMode``, add logic around the like `NanGuardMode` and `DebugMode`, add logic around the
optimizer and linker. ``DebugMode`` uses its own linker. optimizer and linker. `DebugMode` uses its own linker.
You can select which linker to use with the Aesara flag :attr:`config.linker`. You can select which linker to use with the Aesara flag :attr:`config.linker`.
Here is a table to compare the different linkers. Here is a table to compare the different linkers.
...@@ -233,8 +238,8 @@ Using DebugMode ...@@ -233,8 +238,8 @@ Using DebugMode
While normally you should use the ``FAST_RUN`` or ``FAST_COMPILE`` mode, While normally you should use the ``FAST_RUN`` or ``FAST_COMPILE`` mode,
it is useful at first (especially when you are defining new kinds of it is useful at first (especially when you are defining new kinds of
expressions or new optimizations) to run your code using the DebugMode expressions or new optimizations) to run your code using the `DebugMode`
(available via ``mode='DebugMode``). The DebugMode is designed to (available via ``mode='DebugMode``). The `DebugMode` is designed to
run several self-checks and assertions that can help diagnose run several self-checks and assertions that can help diagnose
possible programming errors leading to incorrect output. Note that possible programming errors leading to incorrect output. Note that
``DebugMode`` is much slower than ``FAST_RUN`` or ``FAST_COMPILE`` so ``DebugMode`` is much slower than ``FAST_RUN`` or ``FAST_COMPILE`` so
...@@ -245,7 +250,7 @@ cluster!). ...@@ -245,7 +250,7 @@ cluster!).
.. If you modify this code, also change : .. If you modify this code, also change :
.. tests/test_tutorial.py:T_modes.test_modes_1 .. tests/test_tutorial.py:T_modes.test_modes_1
DebugMode is used as follows: `DebugMode` is used as follows:
.. testcode:: .. testcode::
...@@ -258,21 +263,21 @@ DebugMode is used as follows: ...@@ -258,21 +263,21 @@ DebugMode is used as follows:
f([7]) f([7])
If any problem is detected, DebugMode will raise an exception according to If any problem is detected, `DebugMode` will raise an exception according to
what went wrong, either at call time (*f(5)*) or compile time ( what went wrong, either at call time (e.g. ``f(5)``) or compile time (
``f = aesara.function(x, 10 * x, mode='DebugMode')``). These exceptions ``f = aesara.function(x, 10 * x, mode='DebugMode')``). These exceptions
should *not* be ignored; talk to your local Aesara guru or email the should *not* be ignored; talk to your local Aesara guru or email the
users list if you cannot make the exception go away. users list if you cannot make the exception go away.
Some kinds of errors can only be detected for certain input value combinations. Some kinds of errors can only be detected for certain input value combinations.
In the example above, there is no way to guarantee that a future call to, say In the example above, there is no way to guarantee that a future call to, say
*f(-1)*, won't cause a problem. DebugMode is not a silver bullet. ``f(-1)``, won't cause a problem. `DebugMode` is not a silver bullet.
.. TODO: repair the following link .. TODO: repair the following link
If you instantiate DebugMode using the constructor (see :class:`DebugMode`) If you instantiate `DebugMode` using the constructor (see :class:`DebugMode`)
rather than the keyword ``DebugMode`` you can configure its behaviour via rather than the keyword `DebugMode` you can configure its behaviour via
constructor arguments. The keyword version of DebugMode (which you get by using ``mode='DebugMode'``) constructor arguments. The keyword version of `DebugMode` (which you get by using ``mode='DebugMode'``)
is quite strict. is quite strict.
For more detail, see :ref:`DebugMode<debugmode>` in the library. For more detail, see :ref:`DebugMode<debugmode>` in the library.
...@@ -2,59 +2,62 @@ ...@@ -2,59 +2,62 @@
# Aesara tutorial # Aesara tutorial
# Solution to Exercise in section 'Configuration Settings and Compiling Modes' # Solution to Exercise in section 'Configuration Settings and Compiling Modes'
import numpy as np import numpy as np
import aesara import aesara
import aesara.tensor as at import aesara.tensor as at
aesara.config.floatX = 'float32'
rng = np.random aesara.config.floatX = "float32"
rng = np.random.default_rng(428)
N = 400 N = 400
feats = 784 feats = 784
D = (rng.randn(N, feats).astype(aesara.config.floatX), D = (
rng.randint(size=N, low=0, high=2).astype(aesara.config.floatX)) rng.standard_normal((N, feats)).astype(aesara.config.floatX),
rng.integers(size=N, low=0, high=2).astype(aesara.config.floatX),
)
training_steps = 10000 training_steps = 10000
# Declare Aesara symbolic variables # Declare Aesara symbolic variables
x = at.matrix("x") x = at.matrix("x")
y = at.vector("y") y = at.vector("y")
w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w") w = aesara.shared(rng.standard_normal(feats).astype(aesara.config.floatX), name="w")
b = aesara.shared(np.asarray(0., dtype=aesara.config.floatX), name="b") b = aesara.shared(np.asarray(0.0, dtype=aesara.config.floatX), name="b")
x.tag.test_value = D[0] x.tag.test_value = D[0]
y.tag.test_value = D[1] y.tag.test_value = D[1]
#print "Initial model:" # print "Initial model:"
#print w.get_value(), b.get_value() # print w.get_value(), b.get_value()
# Construct Aesara expression graph # Construct Aesara expression graph
p_1 = 1 / (1 + at.exp(-at.dot(x, w) - b)) # Probability of having a one p_1 = 1 / (1 + at.exp(-at.dot(x, w) - b)) # Probability of having a one
prediction = p_1 > 0.5 # The prediction that is done: 0 or 1 prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
xent = -y * at.log(p_1) - (1 - y) * at.log(1 - p_1) # Cross-entropy xent = -y * at.log(p_1) - (1 - y) * at.log(1 - p_1) # Cross-entropy
cost = at.cast(xent.mean(), 'float32') + \ cost = at.cast(xent.mean(), "float32") + 0.01 * (w**2).sum() # The cost to optimize
0.01 * (w ** 2).sum() # The cost to optimize
gw, gb = at.grad(cost, [w, b]) gw, gb = at.grad(cost, [w, b])
# Compile expressions to functions # Compile expressions to functions
train = aesara.function( train = aesara.function(
inputs=[x, y], inputs=[x, y],
outputs=[prediction, xent], outputs=[prediction, xent],
updates={w: w - 0.01 * gw, b: b - 0.01 * gb}, updates={w: w - 0.01 * gw, b: b - 0.01 * gb},
name="train") name="train",
predict = aesara.function(inputs=[x], outputs=prediction, )
name="predict") predict = aesara.function(inputs=[x], outputs=prediction, name="predict")
if any(x.op.__class__.__name__ in ('Gemv', 'CGemv', 'Gemm', 'CGemm') for x in if any(
train.maker.fgraph.toposort()): x.op.__class__.__name__ in ("Gemv", "CGemv", "Gemm", "CGemm")
print('Used the cpu') for x in train.maker.fgraph.toposort()
):
print("Used the cpu")
else: else:
print('ERROR, not able to tell if aesara used the cpu or another device') print("ERROR, not able to tell if aesara used the cpu or another device")
print(train.maker.fgraph.toposort()) print(train.maker.fgraph.toposort())
for i in range(training_steps): for i in range(training_steps):
pred, err = train(D[0], D[1]) pred, err = train(D[0], D[1])
#print "Final model:" # print "Final model:"
#print w.get_value(), b.get_value() # print w.get_value(), b.get_value()
print("target values for D") print("target values for D")
print(D[1]) print(D[1])
......
...@@ -25,20 +25,20 @@ that creates an image of the function. You can read about them in ...@@ -25,20 +25,20 @@ that creates an image of the function. You can read about them in
Consider again the logistic regression example: Consider again the logistic regression example:
>>> import numpy >>> import numpy as np
>>> import aesara >>> import aesara
>>> import aesara.tensor as at >>> import aesara.tensor as at
>>> rng = numpy.random >>> rng = np.random.default_rng(2382)
>>> # Training data >>> # Training data
>>> N = 400 >>> N = 400
>>> feats = 784 >>> feats = 784
>>> D = (rng.randn(N, feats).astype(aesara.config.floatX), rng.randint(size=N,low=0, high=2).astype(aesara.config.floatX)) >>> D = (rng.standard_normal(N, feats).astype(aesara.config.floatX), rng.integers(size=N,low=0, high=2).astype(aesara.config.floatX))
>>> training_steps = 10000 >>> training_steps = 10000
>>> # Declare Aesara symbolic variables >>> # Declare Aesara symbolic variables
>>> x = at.matrix("x") >>> x = at.matrix("x")
>>> y = at.vector("y") >>> y = at.vector("y")
>>> w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w") >>> w = aesara.shared(rng.standard_normal(feats).astype(aesara.config.floatX), name="w")
>>> b = aesara.shared(numpy.asarray(0., dtype=aesara.config.floatX), name="b") >>> b = aesara.shared(np.asarray(0., dtype=aesara.config.floatX), name="b")
>>> x.tag.test_value = D[0] >>> x.tag.test_value = D[0]
>>> y.tag.test_value = D[1] >>> y.tag.test_value = D[1]
>>> # Construct Aesara expression graph >>> # Construct Aesara expression graph
......
import numpy as np import numpy as np
import aesara import aesara
x, y, z = aesara.tensor.vectors('xyz') x, y, z = aesara.tensor.vectors("xyz")
f = aesara.function([x, y, z], [(x + y + z) * 2]) f = aesara.function([x, y, z], [(x + y + z) * 2])
xv = np.random.rand(10).astype(aesara.config.floatX) xv = np.random.random((10,)).astype(aesara.config.floatX)
yv = np.random.rand(10).astype(aesara.config.floatX) yv = np.random.random((10,)).astype(aesara.config.floatX)
zv = np.random.rand(10).astype(aesara.config.floatX) zv = np.random.random((10,)).astype(aesara.config.floatX)
f(xv, yv, zv) f(xv, yv, zv)
...@@ -49,7 +49,7 @@ upgrade. Here is the current state of what can be done: ...@@ -49,7 +49,7 @@ upgrade. Here is the current state of what can be done:
aesara.tensor.nnet.conv2d(..., image_shape=(7, 3, 5, 5), filter_shape=(2, 3, 4, 4)) aesara.tensor.nnet.conv2d(..., image_shape=(7, 3, 5, 5), filter_shape=(2, 3, 4, 4))
- You can use the ``SpecifyShape`` op to add shape information anywhere in the - You can use the :class:`SpecifyShape`\ :class:`Op` to add shape information anywhere in the
graph. This allows to perform some optimizations. In the following example, graph. This allows to perform some optimizations. In the following example,
this makes it possible to precompute the Aesara function to a constant. this makes it possible to precompute the Aesara function to a constant.
...@@ -67,13 +67,13 @@ Problems with Shape inference ...@@ -67,13 +67,13 @@ Problems with Shape inference
Sometimes this can lead to errors. Consider this example: Sometimes this can lead to errors. Consider this example:
>>> import numpy >>> import numpy as np
>>> import aesara >>> import aesara
>>> x = aesara.tensor.matrix('x') >>> x = aesara.tensor.matrix('x')
>>> y = aesara.tensor.matrix('y') >>> y = aesara.tensor.matrix('y')
>>> z = aesara.tensor.join(0, x, y) >>> z = aesara.tensor.join(0, x, y)
>>> xv = numpy.random.rand(5, 4) >>> xv = np.random.random((5, 4))
>>> yv = numpy.random.rand(3, 3) >>> yv = np.random.random((3, 3))
>>> f = aesara.function([x, y], z.shape) >>> f = aesara.function([x, y], z.shape)
>>> aesara.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE >>> aesara.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
...@@ -109,7 +109,7 @@ This makes the computation of the shape faster, but it can also hide errors. In ...@@ -109,7 +109,7 @@ This makes the computation of the shape faster, but it can also hide errors. In
this example, the computation of the shape of the output of ``join`` is done only this example, the computation of the shape of the output of ``join`` is done only
based on the first input Aesara variable, which leads to an error. based on the first input Aesara variable, which leads to an error.
This might happen with other ops such as ``elemwise`` and ``dot``, for example. This might happen with other `Op`\s such as :class:`Elemwise` and :class:`Dot`, for example.
Indeed, to perform some optimizations (for speed or stability, for instance), Indeed, to perform some optimizations (for speed or stability, for instance),
Aesara assumes that the computation is correct and consistent Aesara assumes that the computation is correct and consistent
in the first place, as it does here. in the first place, as it does here.
...@@ -118,5 +118,5 @@ You can detect those problems by running the code without this ...@@ -118,5 +118,5 @@ You can detect those problems by running the code without this
optimization, using the Aesara flag optimization, using the Aesara flag
``optimizer_excluding=local_shape_to_shape_i``. You can also obtain the ``optimizer_excluding=local_shape_to_shape_i``. You can also obtain the
same effect by running in the modes ``FAST_COMPILE`` (it will not apply this same effect by running in the modes ``FAST_COMPILE`` (it will not apply this
optimization, nor most other optimizations) or ``DebugMode`` (it will test optimization, nor most other optimizations) or :class:`DebugMode` (it will test
before and after all optimizations (much slower)). before and after all optimizations).
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论