提交 c058326d authored 作者: Brandon T. Willard's avatar Brandon T. Willard 提交者: Brandon T. Willard

Replace use of T with aet

上级 4a8ccb6d
...@@ -25,9 +25,10 @@ class Assert(COp): ...@@ -25,9 +25,10 @@ class Assert(COp):
-------- --------
>>> import aesara >>> import aesara
>>> import aesara.tensor as aet >>> import aesara.tensor as aet
>>> x = aet.vector('x') >>> from aesara.assert_op import Assert
>>> assert_op = aet.opt.Assert() >>> x = aet.vector("x")
>>> func = aesara.function([x], assert_op(x, x.size<2)) >>> assert_op = Assert("This assert failed")
>>> func = aesara.function([x], assert_op(x, x.size < 2))
""" """
......
...@@ -3379,10 +3379,10 @@ def dnn_batch_normalization_train( ...@@ -3379,10 +3379,10 @@ def dnn_batch_normalization_train(
axes = 0 if mode == 'per-activation' else (0, 2, 3) axes = 0 if mode == 'per-activation' else (0, 2, 3)
mean = inputs.mean(axes, keepdims=True) mean = inputs.mean(axes, keepdims=True)
var = inputs.var(axes, keepdims=True) var = inputs.var(axes, keepdims=True)
invstd = T.inv(T.sqrt(var + epsilon)) invstd = aet.inv(aet.sqrt(var + epsilon))
out = (inputs - mean) * gamma * invstd + beta out = (inputs - mean) * gamma * invstd + beta
m = T.cast(T.prod(inputs.shape) / T.prod(mean.shape), 'float32') m = aet.cast(aet.prod(inputs.shape) / aet.prod(mean.shape), 'float32')
running_mean = running_mean * (1 - running_average_factor) + \\ running_mean = running_mean * (1 - running_average_factor) + \\
mean * running_average_factor mean * running_average_factor
running_var = running_var * (1 - running_average_factor) + \\ running_var = running_var * (1 - running_average_factor) + \\
...@@ -3511,9 +3511,9 @@ def dnn_batch_normalization_test( ...@@ -3511,9 +3511,9 @@ def dnn_batch_normalization_test(
.. code-block:: python .. code-block:: python
axes = (0,) if mode == 'per-activation' else (0, 2, 3) axes = (0,) if mode == 'per-activation' else (0, 2, 3)
gamma, beta, mean, var = (T.addbroadcast(t, *axes) gamma, beta, mean, var = (aet.addbroadcast(t, *axes)
for t in (gamma, beta, mean, var)) for t in (gamma, beta, mean, var))
out = (inputs - mean) * gamma / T.sqrt(var + epsilon) + beta out = (inputs - mean) * gamma / aet.sqrt(var + epsilon) + beta
For 5d tensors, the axes would be (0, 2, 3, 4). For 5d tensors, the axes would be (0, 2, 3, 4).
""" """
......
...@@ -3420,7 +3420,7 @@ class _nd_grid: ...@@ -3420,7 +3420,7 @@ class _nd_grid:
Examples Examples
-------- --------
>>> a = T.mgrid[0:5, 0:3] >>> a = aet.mgrid[0:5, 0:3]
>>> a[0].eval() >>> a[0].eval()
array([[0, 0, 0], array([[0, 0, 0],
[1, 1, 1], [1, 1, 1],
...@@ -3433,7 +3433,7 @@ class _nd_grid: ...@@ -3433,7 +3433,7 @@ class _nd_grid:
[0, 1, 2], [0, 1, 2],
[0, 1, 2], [0, 1, 2],
[0, 1, 2]], dtype=int8) [0, 1, 2]], dtype=int8)
>>> b = T.ogrid[0:5, 0:3] >>> b = aet.ogrid[0:5, 0:3]
>>> b[0].eval() >>> b[0].eval()
array([[0], array([[0],
[1], [1],
...@@ -3853,45 +3853,28 @@ def diagonal(a, offset=0, axis1=0, axis2=1): ...@@ -3853,45 +3853,28 @@ def diagonal(a, offset=0, axis1=0, axis2=1):
class AllocDiag(Op): class AllocDiag(Op):
""" """An `Op` that copies a vector to the diagonal of an empty matrix.
An op that copies a vector to the diagonal of an empty matrix. It does the
inverse of ExtractDiag.
Usage: T.AllocDiag()(x)
`x` should be a tensor vector. The parenthesis in the front should indicate
which main diagonal the vector value goes into. By default it is set to
`0`, which corresponds to setting the values of x to the main diagonal in
the returned matrix.
Parameters
----------
axis1: Axis to be used as the first axis of the 2-D
sub-arrays to which the diagonals will be allocated.
Defaults to first axis (0).
axis2: Axis to be used as the second axis of the 2-D
sub-arrays to which the diagonals will be allocated.
Defaults to second axis (1).
offset: Offset of the diagonal from the main diagonal defined by `axis1`
and `axis2`.
Can be positive or negative.
Defaults to main diagonal (0).
x: symbolic vector
A tensor vector consists of diagonal values.
Returns
-------
tensor : symbolic tenstor
A tensor with passed tensor values at their corresponding diagonals.
It does the inverse of `ExtractDiag`.
""" """
__props__ = ("offset", "axis1", "axis2") __props__ = ("offset", "axis1", "axis2")
def __init__(self, offset=0, axis1=0, axis2=1): def __init__(self, offset=0, axis1=0, axis2=1):
"""
Parameters
----------
offset: int
Offset of the diagonal from the main diagonal defined by `axis1`
and `axis2`. Can be positive or negative. Defaults to main
diagonal (i.e. 0).
axis1: int
Axis to be used as the first axis of the 2-D sub-arrays to which
the diagonals will be allocated. Defaults to first axis (i.e. 0).
axis2: int
Axis to be used as the second axis of the 2-D sub-arrays to which
the diagonals will be allocated. Defaults to second axis (i.e. 1).
"""
self.offset = offset self.offset = offset
self.axis1 = axis1 self.axis1 = axis1
self.axis2 = axis2 self.axis2 = axis2
......
...@@ -810,7 +810,7 @@ class ShapeFeature(toolbox.Feature): ...@@ -810,7 +810,7 @@ class ShapeFeature(toolbox.Feature):
2. to infer the shape of every node in the graph in terms of the 2. to infer the shape of every node in the graph in terms of the
input shapes. input shapes.
3. remove all fills (T.second, T.fill) from the graph 3. remove all fills ``(aet.second, aet.fill)`` from the graph
Lifting shapes as close to the inputs as possible is important for Lifting shapes as close to the inputs as possible is important for
canonicalization because it is very bad form to have to compute canonicalization because it is very bad form to have to compute
...@@ -2236,12 +2236,12 @@ def local_alloc_unary(fgraph, node): ...@@ -2236,12 +2236,12 @@ def local_alloc_unary(fgraph, node):
x = a.owner.inputs[0] x = a.owner.inputs[0]
shp = a.owner.inputs[1:] shp = a.owner.inputs[1:]
v = node.op(x) v = node.op(x)
# T.alloc does not preserve the stacktrace of v, # aet.alloc does not preserve the stacktrace of v,
# so we need to copy it over from x. # so we need to copy it over from x.
copy_stack_trace(node.outputs[0], v) copy_stack_trace(node.outputs[0], v)
ret = alloc(cast(v, node.outputs[0].dtype), *shp) ret = alloc(cast(v, node.outputs[0].dtype), *shp)
# T.cast does not preserve the stacktrace of x, # aet.cast does not preserve the stacktrace of x,
# so we need to copy it over to the output. # so we need to copy it over to the output.
copy_stack_trace([node.outputs[0], a], ret) copy_stack_trace([node.outputs[0], a], ret)
return [ret] return [ret]
...@@ -3132,14 +3132,11 @@ def local_subtensor_of_alloc(fgraph, node): ...@@ -3132,14 +3132,11 @@ def local_subtensor_of_alloc(fgraph, node):
@register_specialize @register_specialize
@local_optimizer([Subtensor]) @local_optimizer([Subtensor])
def local_subtensor_of_dot(fgraph, node): def local_subtensor_of_dot(fgraph, node):
""" """Rewrite ``aet.dot(A, B)[idxs]`` into ``aet.dot(A[idxs_a], B[idxs_b])``.
This optimization translates T.dot(A, B)[idxs] into T.dot(A[idxs_a], B[idxs_b]),
where idxs_a and idxs_b are defined appropriately.
idxs_a is the first A.ndim-1 entries of idxs, ``idxs_a`` is the first ``A.ndim-1`` entries of ``idxs``, and ``idxs_b`` is
and idxs_b is the remaining entries of idxs (if any), the remaining entries of ``idxs`` (if any), modified to skip the
modified to skip the second-to-last dimension of B second-to-last dimension of ``B`` (because dot sums over this dimension).
(because dot sums over this dimension).
""" """
if not isinstance(node.op, Subtensor): if not isinstance(node.op, Subtensor):
...@@ -3535,7 +3532,7 @@ def local_useless_inc_subtensor_alloc(fgraph, node): ...@@ -3535,7 +3532,7 @@ def local_useless_inc_subtensor_alloc(fgraph, node):
i = node.inputs[2:] i = node.inputs[2:]
if y.owner is not None and isinstance(y.owner.op, Alloc): if y.owner is not None and isinstance(y.owner.op, Alloc):
# `z` is the input of the Alloc op, i.e. T.alloc(z, <shape>) # `z` is the input of the Alloc op, i.e. aet.alloc(z, <shape>)
z = y.owner.inputs[0] z = y.owner.inputs[0]
try: try:
...@@ -3803,7 +3800,7 @@ def local_join_empty(fgraph, node): ...@@ -3803,7 +3800,7 @@ def local_join_empty(fgraph, node):
new_inputs.append(inp) new_inputs.append(inp)
if len(new_inputs) < len(node.inputs) - 1: if len(new_inputs) < len(node.inputs) - 1:
if len(new_inputs) == 0: if len(new_inputs) == 0:
# T.join do not work in that case. # aet.join do not work in that case.
# constant folding will take care of this case. # constant folding will take care of this case.
return return
ret = join(node.inputs[0], *new_inputs) ret = join(node.inputs[0], *new_inputs)
...@@ -3880,12 +3877,16 @@ def local_join_make_vector(fgraph, node): ...@@ -3880,12 +3877,16 @@ def local_join_make_vector(fgraph, node):
def local_useless_switch(fgraph, node): def local_useless_switch(fgraph, node):
""" """
This optimization makes the following changes in the graph: This optimization makes the following changes in the graph:
T.switch(cond,left,right) -->
if cond is constant and cond == 0: right
if cond is constant and cond != 0: left
if left is right -> left
T.switch(le(shape_i{id}(X), 0), 0, shape_i{id}(X)) -> shape_i{id}(X) ``aet.switch(cond, left, right)`` ->
``if cond is constant and cond == 0``: right
``if cond is constant and cond != 0``: left
``if left is right`` -> ``left``
and
``aet.switch(le(shape_i{id}(X), 0), 0, shape_i{id}(X))`` -> ``shape_i{id}(X)``
""" """
if isinstance(node.op, Elemwise) and isinstance(node.op.scalar_op, aes.Switch): if isinstance(node.op, Elemwise) and isinstance(node.op.scalar_op, aes.Switch):
......
...@@ -1111,7 +1111,7 @@ def res_is_a(fgraph, var, op, maxclients=None): ...@@ -1111,7 +1111,7 @@ def res_is_a(fgraph, var, op, maxclients=None):
def _as_scalar(res, dtype=None): def _as_scalar(res, dtype=None):
"""Return None or a TensorVariable whose type is in T.float_scalar_types""" """Return ``None`` or a `TensorVariable` whose type is in `float_scalar_types`"""
if dtype is None: if dtype is None:
dtype = config.floatX dtype = config.floatX
if np.all(res.type.broadcastable): if np.all(res.type.broadcastable):
......
...@@ -2490,14 +2490,14 @@ class Prod(CAReduceDtype): ...@@ -2490,14 +2490,14 @@ class Prod(CAReduceDtype):
Implementing that case-by-case logic is not as trivial, so a bunch of Implementing that case-by-case logic is not as trivial, so a bunch of
hacks are piled down here to do it. Notably, for the "only one zero" hacks are piled down here to do it. Notably, for the "only one zero"
case, there's a special Op that computes the product of the elements case, there's a special Op that computes the product of the elements
in the group, minus the zero (see ProdWithoutZero). The trick is then in the group, minus the zero (see `ProdWithoutZeros`). The trick is then
to use the division trick for groups with no zero, to use the to use the division trick for groups with no zero, to use the
ProdWithoutZeros op where there's only one zero, and to output a `ProdWithoutZeros` op where there's only one zero, and to output a
derivative of zero for any element part of a group with more than derivative of zero for any element part of a group with more than
one zero. one zero.
I do this by first counting the number of zeros in each group (see I do this by first counting the number of zeros in each group (see the
the "T.eq()" bits), then taking this or that behavior (see T.switch) `aet.eq` bits), then taking this or that behavior (see `aet.switch`)
based on the result of this count. based on the result of this count.
""" """
...@@ -2532,7 +2532,7 @@ class Prod(CAReduceDtype): ...@@ -2532,7 +2532,7 @@ class Prod(CAReduceDtype):
gz = gz.dimshuffle(new_dims) gz = gz.dimshuffle(new_dims)
# division trick if we don't have zeros. This will contain # division trick if we don't have zeros. This will contain
# NaNs to be eliminated in the T.switch if we do have zeros. # NaNs to be eliminated in the `aet.switch` if we do have zeros.
grad_case_without_zeros = gz * prod_out / prod_in grad_case_without_zeros = gz * prod_out / prod_in
if self.no_zeros_in_input: if self.no_zeros_in_input:
......
...@@ -148,8 +148,7 @@ def local_0_dot_x(fgraph, node): ...@@ -148,8 +148,7 @@ def local_0_dot_x(fgraph, node):
@register_canonicalize @register_canonicalize
@local_optimizer([DimShuffle]) @local_optimizer([DimShuffle])
def local_lift_transpose_through_dot(fgraph, node): def local_lift_transpose_through_dot(fgraph, node):
""" """Perform the rewrite ``dot(x,y).T -> dot(y.T, x.T)``
dot(x,y).T -> dot(y.T, x.T)
These optimizations "lift" (propagate towards the inputs) DimShuffle These optimizations "lift" (propagate towards the inputs) DimShuffle
through dot product. It allows to put the graph in a more standard shape, through dot product. It allows to put the graph in a more standard shape,
...@@ -231,8 +230,9 @@ def local_func_inv(fgraph, node): ...@@ -231,8 +230,9 @@ def local_func_inv(fgraph, node):
@local_optimizer([Sum]) @local_optimizer([Sum])
def local_sumsqr2dot(fgraph, node): def local_sumsqr2dot(fgraph, node):
""" """
This optimization detects T.sqr( W.dimshuffle('x',0,1) * G.dimshuffle(0,'x',1) ).sum(axis=(1,2)) This optimization detects
and converts this to T.dot(T.sqr(G), T.sqr(W).sum(axis=0)). ``aet.sqr(W.dimshuffle("x", 0, 1) * G.dimshuffle(0, "x", 1) ).sum(axis=(1, 2))``
and converts it to ``aet.dot(aet.sqr(G), aet.sqr(W).sum(axis=0))``.
""" """
if ( if (
isinstance(node.op, Sum) isinstance(node.op, Sum)
...@@ -305,24 +305,30 @@ def local_expm1(fgraph, node): ...@@ -305,24 +305,30 @@ def local_expm1(fgraph, node):
def local_mul_switch_sink(fgraph, node): def local_mul_switch_sink(fgraph, node):
""" """
This optimization makes the following changes in the graph: This optimization makes the following changes in the graph:
T.mul(A,T.switch(cond,0,iff),B) --> T.switch(cond,0,T.mul(A,B,iff)) ``aet.mul(A, aet.switch(cond, 0, iff), B)`` -> ``aet.switch(cond, 0, aet.mul(A, B, iff))``
T.mul(A,T.switch(cond,ift,0),B) --> T.switch(cond,T.mul(A,B,ift),0) ``aet.mul(A, aet.switch(cond, ift, 0), B)`` -> ``aet.switch(cond, aet.mul(A, B, ift), 0)``
A and B being several (or none) symbolic variables. ``A`` and ``B`` being several (or none) symbolic variables.
This is useful because A and B may not be numerically stable and give This is useful because ``A`` and ``B`` may not be numerically stable and give
NaN or inf values for cases where the switch returns 0. NaN or inf values for cases where the switch returns 0.
With this optimization T.grad(T.switch(...)) has the right behavior. With this optimization ``aet.grad(aet.switch(...))`` has the right behavior.
Examples Examples
-------- --------
x -> f(x) x -> f(x)
x -> g(x) x -> g(x)
y = T.switch(cond,f(x),g(x)) y = aet.switch(cond, f(x), g(x))
**without the optimization
T.grad(y,x) -> grad(f(x),x) * grad(y,f(x)) + grad(g(x),x) * grad(y,g(x)) without the optimization:
**with the optimization
T.grad(y,x) -> switch(cond,grad(f(x),x), 0) + switch(cond,0,grad(g(x),x)) aet.grad(y, x) -> grad(f(x), x) * grad(y, f(x)) + grad(g(x), x) * grad(y, g(x))
This will be particularly useful for the lazyif because we skip
an entire part of the graph. with the optimization
aet.grad(y, x) -> switch(cond, grad(f(x), x), 0) + switch(cond, 0, grad(g(x), x))
This will be particularly useful for the lazy ``if`` because we skip an entire
part of the graph.
""" """
if node.op != mul: if node.op != mul:
...@@ -393,13 +399,16 @@ def local_mul_switch_sink(fgraph, node): ...@@ -393,13 +399,16 @@ def local_mul_switch_sink(fgraph, node):
def local_div_switch_sink(fgraph, node): def local_div_switch_sink(fgraph, node):
""" """
This optimization makes the following changes in the graph: This optimization makes the following changes in the graph:
T.div(T.switch(cond,0,iff),A) --> T.switch(cond,0,T.div(iff,A))
T.div(T.switch(cond,ift,0),A) --> T.switch(cond,T.div(ift,A),0)
A being a symbolic variable. ``aet.div(aet.switch(cond, 0, iff), A)`` -> ``aet.switch(cond, 0, aet.div(iff, A))``
This is useful because A may not be numerically stable and give ``aet.div(aet.switch(cond, ift, 0), A)`` -> ``aet.switch(cond, aet.div(ift, A), 0)``
NaN or inf values for cases where the switch returns 0.
See local_mul_switch_sink for more details. where ``A`` is a symbolic variable.
This is useful because ``A`` may not be numerically stable and give
``nan`` or ``inf`` values for cases where the switch returns 0.
See `local_mul_switch_sink` for more details.
""" """
if node.op != true_div and node.op != int_div: if node.op != true_div and node.op != int_div:
...@@ -1027,9 +1036,8 @@ def local_sum_prod_mul_by_scalar(fgraph, node): ...@@ -1027,9 +1036,8 @@ def local_sum_prod_mul_by_scalar(fgraph, node):
# for same reason as above. # for same reason as above.
copy_stack_trace(node.outputs, new_op_output) copy_stack_trace(node.outputs, new_op_output)
# If node.op is a T.elemwise.Prod, then the scalars need to be # If `node.op` is a `Prod`, then the scalars need to be raised to
# raised to the power of the number of elements in the input # the power of the number of elements in the input to the `Prod`
# to the Prod
if isinstance(node.op, Prod) and new_op_input_nb_elements != 1: if isinstance(node.op, Prod) and new_op_input_nb_elements != 1:
scalars = [s ** new_op_input_nb_elements for s in scalars] scalars = [s ** new_op_input_nb_elements for s in scalars]
......
...@@ -17,6 +17,7 @@ import warnings ...@@ -17,6 +17,7 @@ import warnings
import numpy as np import numpy as np
import aesara import aesara
from aesara import tensor as aet
from aesara.assert_op import Assert from aesara.assert_op import Assert
from aesara.configdefaults import config from aesara.configdefaults import config
from aesara.graph.basic import Apply, Variable from aesara.graph.basic import Apply, Variable
...@@ -560,12 +561,12 @@ def assert_conv_shape(shape): ...@@ -560,12 +561,12 @@ def assert_conv_shape(shape):
assert_shp = Assert( assert_shp = Assert(
f"The convolution would produce an invalid shape (dim[{int(i)}] < 0)." f"The convolution would produce an invalid shape (dim[{int(i)}] < 0)."
) )
out_shape.append(assert_shp(n, aesara.tensor.ge(n, 0))) out_shape.append(assert_shp(n, aet.ge(n, 0)))
else: else:
assert_shp = Assert( assert_shp = Assert(
f"The convolution would produce an invalid shape (dim[{int(i)}] < 0)." f"The convolution would produce an invalid shape (dim[{int(i)}] < 0)."
) )
out_shape.append(assert_shp(n, aesara.tensor.gt(n, 0))) out_shape.append(assert_shp(n, aet.gt(n, 0)))
return tuple(out_shape) return tuple(out_shape)
...@@ -597,7 +598,7 @@ def assert_shape(x, expected_shape, msg="Unexpected shape."): ...@@ -597,7 +598,7 @@ def assert_shape(x, expected_shape, msg="Unexpected shape."):
tests = [] tests = []
for i in range(x.ndim): for i in range(x.ndim):
if expected_shape[i] is not None: if expected_shape[i] is not None:
tests.append(aesara.tensor.eq(shape[i], expected_shape[i])) tests.append(aet.eq(shape[i], expected_shape[i]))
if tests: if tests:
return Assert(msg)(x, *tests) return Assert(msg)(x, *tests)
else: else:
...@@ -1862,13 +1863,11 @@ def bilinear_kernel_1D(ratio, normalize=True): ...@@ -1862,13 +1863,11 @@ def bilinear_kernel_1D(ratio, normalize=True):
by the indicated ratio using bilinear interpolation in one dimension. by the indicated ratio using bilinear interpolation in one dimension.
""" """
half_kern = aet.arange(1, ratio + 1, dtype=config.floatX)
T = aesara.tensor kern = aet.concatenate([half_kern, half_kern[-2::-1]])
half_kern = T.arange(1, ratio + 1, dtype=config.floatX)
kern = T.concatenate([half_kern, half_kern[-2::-1]])
if normalize: if normalize:
kern /= T.cast(ratio, config.floatX) kern /= aet.cast(ratio, config.floatX)
return kern return kern
...@@ -1903,7 +1902,6 @@ def frac_bilinear_upsampling(input, frac_ratio): ...@@ -1903,7 +1902,6 @@ def frac_bilinear_upsampling(input, frac_ratio):
sides. This does not happen when it is odd. sides. This does not happen when it is odd.
""" """
T = aesara.tensor
row, col = input.shape[2:] row, col = input.shape[2:]
up_input = input.reshape((-1, 1, row, col)) up_input = input.reshape((-1, 1, row, col))
...@@ -1928,15 +1926,15 @@ def frac_bilinear_upsampling(input, frac_ratio): ...@@ -1928,15 +1926,15 @@ def frac_bilinear_upsampling(input, frac_ratio):
subsample = (frac_ratio[1], frac_ratio[1]) subsample = (frac_ratio[1], frac_ratio[1])
# duplicate borders of the input # duplicate borders of the input
concat_mat = T.concatenate( concat_mat = aet.concatenate(
(up_input[:, :, :1, :], up_input, up_input[:, :, -1:, :]), axis=2 (up_input[:, :, :1, :], up_input, up_input[:, :, -1:, :]), axis=2
) )
concat_mat = T.concatenate( concat_mat = aet.concatenate(
(concat_mat[:, :, :, :1], concat_mat, concat_mat[:, :, :, -1:]), axis=3 (concat_mat[:, :, :, :1], concat_mat, concat_mat[:, :, :, -1:]), axis=3
) )
# add padding for the pyramidal kernel # add padding for the pyramidal kernel
double_pad = (2 * T.as_tensor([row, col]) - 1) * np.array(ratio) + 1 double_pad = (2 * aet.as_tensor([row, col]) - 1) * np.array(ratio) + 1
pad = double_pad // 2 pad = double_pad // 2
# build pyramidal kernel # build pyramidal kernel
...@@ -1945,25 +1943,25 @@ def frac_bilinear_upsampling(input, frac_ratio): ...@@ -1945,25 +1943,25 @@ def frac_bilinear_upsampling(input, frac_ratio):
) )
# add corresponding padding # add corresponding padding
pad_kern = T.concatenate( pad_kern = aet.concatenate(
( (
T.zeros( aet.zeros(
tuple(kern.shape[:2]) + (pad[0], kern.shape[-1]), tuple(kern.shape[:2]) + (pad[0], kern.shape[-1]),
dtype=config.floatX, dtype=config.floatX,
), ),
kern, kern,
T.zeros( aet.zeros(
tuple(kern.shape[:2]) + (double_pad[0] - pad[0], kern.shape[-1]), tuple(kern.shape[:2]) + (double_pad[0] - pad[0], kern.shape[-1]),
dtype=config.floatX, dtype=config.floatX,
), ),
), ),
axis=2, axis=2,
) )
pad_kern = T.concatenate( pad_kern = aet.concatenate(
( (
T.zeros(tuple(pad_kern.shape[:3]) + (pad[1],), dtype=config.floatX), aet.zeros(tuple(pad_kern.shape[:3]) + (pad[1],), dtype=config.floatX),
pad_kern, pad_kern,
T.zeros( aet.zeros(
tuple(pad_kern.shape[:3]) + (double_pad[1] - pad[1],), tuple(pad_kern.shape[:3]) + (double_pad[1] - pad[1],),
dtype=config.floatX, dtype=config.floatX,
), ),
...@@ -1972,7 +1970,7 @@ def frac_bilinear_upsampling(input, frac_ratio): ...@@ -1972,7 +1970,7 @@ def frac_bilinear_upsampling(input, frac_ratio):
) )
# upsample the input by passing it as kernel of conv and using filter_dilation # upsample the input by passing it as kernel of conv and using filter_dilation
upsamp = T.nnet.conv2d( upsamp = conv2d(
pad_kern, pad_kern,
concat_mat, concat_mat,
border_mode="valid", border_mode="valid",
...@@ -2048,7 +2046,6 @@ def bilinear_upsampling( ...@@ -2048,7 +2046,6 @@ def bilinear_upsampling(
return frac_bilinear_upsampling(input, frac_ratio=frac_ratio) return frac_bilinear_upsampling(input, frac_ratio=frac_ratio)
# the remaining case if integer ratio with use_1D_kernel # the remaining case if integer ratio with use_1D_kernel
T = aesara.tensor
try: try:
up_bs = batch_size * num_input_channels up_bs = batch_size * num_input_channels
except TypeError: except TypeError:
...@@ -2058,11 +2055,11 @@ def bilinear_upsampling( ...@@ -2058,11 +2055,11 @@ def bilinear_upsampling(
# concatenating the first and last row and column # concatenating the first and last row and column
# first and last row # first and last row
concat_mat = T.concatenate( concat_mat = aet.concatenate(
(up_input[:, :, :1, :], up_input, up_input[:, :, -1:, :]), axis=2 (up_input[:, :, :1, :], up_input, up_input[:, :, -1:, :]), axis=2
) )
# first and last col # first and last col
concat_mat = T.concatenate( concat_mat = aet.concatenate(
(concat_mat[:, :, :, :1], concat_mat, concat_mat[:, :, :, -1:]), axis=3 (concat_mat[:, :, :, :1], concat_mat, concat_mat[:, :, :, -1:]), axis=3
) )
concat_col = col + 2 concat_col = col + 2
......
...@@ -2081,7 +2081,7 @@ def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(fgraph, node): ...@@ -2081,7 +2081,7 @@ def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(fgraph, node):
assert dy.ndim == 1 assert dy.ndim == 1
if dy.owner is not None and isinstance(dy.owner.op, aet.Alloc): if dy.owner is not None and isinstance(dy.owner.op, aet.Alloc):
# dz is the input of the Alloc op, i.e. T.alloc(dz, <shape>) # dz is the input of the Alloc op, i.e. aet.alloc(dz, <shape>)
dz = dy.owner.inputs[0] dz = dy.owner.inputs[0]
try: try:
......
...@@ -185,10 +185,10 @@ def batch_normalization_train( ...@@ -185,10 +185,10 @@ def batch_normalization_train(
axes = (0,) + tuple(range(2, inputs.ndim)) axes = (0,) + tuple(range(2, inputs.ndim))
mean = inputs.mean(axes, keepdims=True) mean = inputs.mean(axes, keepdims=True)
var = inputs.var(axes, keepdims=True) var = inputs.var(axes, keepdims=True)
invstd = T.inv(T.sqrt(var + epsilon)) invstd = aet.inv(aet.sqrt(var + epsilon))
out = (inputs - mean) * gamma * invstd + beta out = (inputs - mean) * gamma * invstd + beta
m = T.cast(T.prod(inputs.shape) / T.prod(mean.shape), 'float32') m = aet.cast(ate.prod(inputs.shape) / aet.prod(mean.shape), 'float32')
running_mean = running_mean * (1 - running_average_factor) + \\ running_mean = running_mean * (1 - running_average_factor) + \\
mean * running_average_factor mean * running_average_factor
running_var = running_var * (1 - running_average_factor) + \\ running_var = running_var * (1 - running_average_factor) + \\
...@@ -332,9 +332,9 @@ def batch_normalization_test( ...@@ -332,9 +332,9 @@ def batch_normalization_test(
axes = (0,) axes = (0,)
# for spatial normalization # for spatial normalization
axes = (0,) + tuple(range(2, inputs.ndim)) axes = (0,) + tuple(range(2, inputs.ndim))
gamma, beta, mean, var = (T.addbroadcast(t, *axes) gamma, beta, mean, var = (aet.addbroadcast(t, *axes)
for t in (gamma, beta, mean, var)) for t in (gamma, beta, mean, var))
out = (inputs - mean) * gamma / T.sqrt(var + epsilon) + beta out = (inputs - mean) * gamma / aet.sqrt(var + epsilon) + beta
""" """
ndim = inputs.ndim ndim = inputs.ndim
axes, non_bc_axes = _prepare_batch_normalization_axes(axes, ndim) axes, non_bc_axes = _prepare_batch_normalization_axes(axes, ndim)
......
...@@ -1920,8 +1920,8 @@ class TestConv2dGrads: ...@@ -1920,8 +1920,8 @@ class TestConv2dGrads:
def test_conv2d_grad_wrt_inputs(self): def test_conv2d_grad_wrt_inputs(self):
# Compares calculated abstract grads wrt inputs with the fwd grads # Compares calculated abstract grads wrt inputs with the fwd grads
# This method checks the outputs of conv2_grad_wrt_inputs against # This method checks the outputs of `conv2_grad_wrt_inputs` against
# the outputs of T.nnet.conv forward grads to make sure the # the outputs of `aesara.tensor.nnet.conv` forward grads to make sure the
# results are the same. # results are the same.
for (in_shape, fltr_shape) in zip(self.inputs_shapes, self.filters_shapes): for (in_shape, fltr_shape) in zip(self.inputs_shapes, self.filters_shapes):
...@@ -1986,8 +1986,8 @@ class TestConv2dGrads: ...@@ -1986,8 +1986,8 @@ class TestConv2dGrads:
def test_conv2d_grad_wrt_weights(self): def test_conv2d_grad_wrt_weights(self):
# Compares calculated abstract grads wrt weights with the fwd grads # Compares calculated abstract grads wrt weights with the fwd grads
# This method checks the outputs of conv2_grad_wrt_weights against # This method checks the outputs of `conv2_grad_wrt_weights` against
# the outputs of T.nnet.conv forward grads to make sure the # the outputs of `aesara.tensor.nnet.conv` forward grads to make sure the
# results are the same. # results are the same.
for (in_shape, fltr_shape) in zip(self.inputs_shapes, self.filters_shapes): for (in_shape, fltr_shape) in zip(self.inputs_shapes, self.filters_shapes):
......
...@@ -906,7 +906,7 @@ def test_gemm_nested(): ...@@ -906,7 +906,7 @@ def test_gemm_nested():
def test_gemm_opt_wishlist(): def test_gemm_opt_wishlist():
X, Y, Z, a, b = matrix(), matrix(), matrix(), scalar(), scalar() X, Y, Z, a, b = matrix(), matrix(), matrix(), scalar(), scalar()
# with >2 additions of the same T.dot(X,Y term # with >2 additions of the same ``aet.dot(X, Y)`` term
just_gemm([X, Y, Z, a, b], [(b * b) * Z * a + (a * a) * dot(X, Y) + b * dot(X, Y)]) just_gemm([X, Y, Z, a, b], [(b * b) * Z * a + (a * a) * dot(X, Y) + b * dot(X, Y)])
just_gemm([X, Y, Z, a, b], [Z + dot(X, Y) + dot(X, Y)]) just_gemm([X, Y, Z, a, b], [Z + dot(X, Y) + dot(X, Y)])
......
...@@ -842,7 +842,7 @@ class TestAlgebraicCanonize: ...@@ -842,7 +842,7 @@ class TestAlgebraicCanonize:
# 4 * x / abs(2*x) it get simplifier during canonicalisation. # 4 * x / abs(2*x) it get simplifier during canonicalisation.
x = dscalar() x = dscalar()
# a = T.abs_(x) # a = aet.abs_(x)
if config.mode == "FAST_COMPILE": if config.mode == "FAST_COMPILE":
mode = get_mode("FAST_RUN").excluding("local_elemwise_fusion") mode = get_mode("FAST_RUN").excluding("local_elemwise_fusion")
...@@ -2366,7 +2366,6 @@ def test_local_pow_specialize(): ...@@ -2366,7 +2366,6 @@ def test_local_pow_specialize():
assert len(nodes) == 2 assert len(nodes) == 2
assert nodes[0] == sqr assert nodes[0] == sqr
assert isinstance(nodes[1].scalar_op, aes.basic.Inv) assert isinstance(nodes[1].scalar_op, aes.basic.Inv)
# assert nodes == [T.sqr,T.inv]#Why this don't work?
utt.assert_allclose(f(val_no0), val_no0 ** (-2)) utt.assert_allclose(f(val_no0), val_no0 ** (-2))
f = function([v], v ** (0.5), mode=mode) f = function([v], v ** (0.5), mode=mode)
...@@ -2379,7 +2378,6 @@ def test_local_pow_specialize(): ...@@ -2379,7 +2378,6 @@ def test_local_pow_specialize():
assert len(nodes) == 2 assert len(nodes) == 2
assert nodes[0] == sqrt assert nodes[0] == sqrt
assert isinstance(nodes[1].scalar_op, aes.basic.Inv) assert isinstance(nodes[1].scalar_op, aes.basic.Inv)
# assert nodes == [T.sqrt,T.inv]#Why this don't work?
utt.assert_allclose(f(val_no0), val_no0 ** (-0.5)) utt.assert_allclose(f(val_no0), val_no0 ** (-0.5))
......
...@@ -127,10 +127,10 @@ class LogisticRegression: ...@@ -127,10 +127,10 @@ class LogisticRegression:
the learning rate is less dependent on the batch size the learning rate is less dependent on the batch size
""" """
# y.shape[0] is (symbolically) the number of rows in y, i.e., number of examples (call it n) in the minibatch # y.shape[0] is (symbolically) the number of rows in y, i.e., number of examples (call it n) in the minibatch
# T.arange(y.shape[0]) is a symbolic vector which will contain [0,1,2,... n-1] # aet.arange(y.shape[0]) is a symbolic vector which will contain [0,1,2,... n-1]
# T.log(self.p_y_given_x) is a matrix of Log-Probabilities (call it LP) with one row per example and one column per class # aet.log(self.p_y_given_x) is a matrix of Log-Probabilities (call it LP) with one row per example and one column per class
# LP[T.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]] # LP[aet.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]]
# and T.mean(LP[T.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v, # and aet.mean(LP[aet.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v,
# i.e., the mean log-likelihood across the minibatch. # i.e., the mean log-likelihood across the minibatch.
return log(self.p_y_given_x[aet.arange(y.shape[0]), y]) return log(self.p_y_given_x[aet.arange(y.shape[0]), y])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论