Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
c058326d
提交
c058326d
authored
2月 13, 2021
作者:
Brandon T. Willard
提交者:
Brandon T. Willard
2月 13, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Replace use of T with aet
上级
4a8ccb6d
隐藏空白字符变更
内嵌
并排
正在显示
14 个修改的文件
包含
119 行增加
和
131 行删除
+119
-131
assert_op.py
aesara/assert_op.py
+4
-3
dnn.py
aesara/gpuarray/dnn.py
+4
-4
basic.py
aesara/tensor/basic.py
+18
-35
basic_opt.py
aesara/tensor/basic_opt.py
+18
-17
blas.py
aesara/tensor/blas.py
+1
-1
math.py
aesara/tensor/math.py
+5
-5
math_opt.py
aesara/tensor/math_opt.py
+35
-27
abstract_conv.py
aesara/tensor/nnet/abstract_conv.py
+19
-22
basic.py
aesara/tensor/nnet/basic.py
+1
-1
batchnorm.py
aesara/tensor/nnet/batchnorm.py
+4
-4
test_abstract_conv.py
tests/tensor/nnet/test_abstract_conv.py
+4
-4
test_blas.py
tests/tensor/test_blas.py
+1
-1
test_math_opt.py
tests/tensor/test_math_opt.py
+1
-3
test_mlp.py
tests/tensor/test_mlp.py
+4
-4
没有找到文件。
aesara/assert_op.py
浏览文件 @
c058326d
...
...
@@ -25,9 +25,10 @@ class Assert(COp):
--------
>>> import aesara
>>> import aesara.tensor as aet
>>> x = aet.vector('x')
>>> assert_op = aet.opt.Assert()
>>> func = aesara.function([x], assert_op(x, x.size<2))
>>> from aesara.assert_op import Assert
>>> x = aet.vector("x")
>>> assert_op = Assert("This assert failed")
>>> func = aesara.function([x], assert_op(x, x.size < 2))
"""
...
...
aesara/gpuarray/dnn.py
浏览文件 @
c058326d
...
...
@@ -3379,10 +3379,10 @@ def dnn_batch_normalization_train(
axes = 0 if mode == 'per-activation' else (0, 2, 3)
mean = inputs.mean(axes, keepdims=True)
var = inputs.var(axes, keepdims=True)
invstd =
T.inv(T
.sqrt(var + epsilon))
invstd =
aet.inv(aet
.sqrt(var + epsilon))
out = (inputs - mean) * gamma * invstd + beta
m =
T.cast(T.prod(inputs.shape) / T
.prod(mean.shape), 'float32')
m =
aet.cast(aet.prod(inputs.shape) / aet
.prod(mean.shape), 'float32')
running_mean = running_mean * (1 - running_average_factor) +
\\
mean * running_average_factor
running_var = running_var * (1 - running_average_factor) +
\\
...
...
@@ -3511,9 +3511,9 @@ def dnn_batch_normalization_test(
.. code-block:: python
axes = (0,) if mode == 'per-activation' else (0, 2, 3)
gamma, beta, mean, var = (
T
.addbroadcast(t, *axes)
gamma, beta, mean, var = (
aet
.addbroadcast(t, *axes)
for t in (gamma, beta, mean, var))
out = (inputs - mean) * gamma /
T
.sqrt(var + epsilon) + beta
out = (inputs - mean) * gamma /
aet
.sqrt(var + epsilon) + beta
For 5d tensors, the axes would be (0, 2, 3, 4).
"""
...
...
aesara/tensor/basic.py
浏览文件 @
c058326d
...
...
@@ -3420,7 +3420,7 @@ class _nd_grid:
Examples
--------
>>> a =
T
.mgrid[0:5, 0:3]
>>> a =
aet
.mgrid[0:5, 0:3]
>>> a[0].eval()
array([[0, 0, 0],
[1, 1, 1],
...
...
@@ -3433,7 +3433,7 @@ class _nd_grid:
[0, 1, 2],
[0, 1, 2],
[0, 1, 2]], dtype=int8)
>>> b =
T
.ogrid[0:5, 0:3]
>>> b =
aet
.ogrid[0:5, 0:3]
>>> b[0].eval()
array([[0],
[1],
...
...
@@ -3853,45 +3853,28 @@ def diagonal(a, offset=0, axis1=0, axis2=1):
class
AllocDiag
(
Op
):
"""
An op that copies a vector to the diagonal of an empty matrix. It does the
inverse of ExtractDiag.
Usage: T.AllocDiag()(x)
`x` should be a tensor vector. The parenthesis in the front should indicate
which main diagonal the vector value goes into. By default it is set to
`0`, which corresponds to setting the values of x to the main diagonal in
the returned matrix.
Parameters
----------
axis1: Axis to be used as the first axis of the 2-D
sub-arrays to which the diagonals will be allocated.
Defaults to first axis (0).
axis2: Axis to be used as the second axis of the 2-D
sub-arrays to which the diagonals will be allocated.
Defaults to second axis (1).
offset: Offset of the diagonal from the main diagonal defined by `axis1`
and `axis2`.
Can be positive or negative.
Defaults to main diagonal (0).
x: symbolic vector
A tensor vector consists of diagonal values.
Returns
-------
tensor : symbolic tenstor
A tensor with passed tensor values at their corresponding diagonals.
"""An `Op` that copies a vector to the diagonal of an empty matrix.
It does the inverse of `ExtractDiag`.
"""
__props__
=
(
"offset"
,
"axis1"
,
"axis2"
)
def
__init__
(
self
,
offset
=
0
,
axis1
=
0
,
axis2
=
1
):
"""
Parameters
----------
offset: int
Offset of the diagonal from the main diagonal defined by `axis1`
and `axis2`. Can be positive or negative. Defaults to main
diagonal (i.e. 0).
axis1: int
Axis to be used as the first axis of the 2-D sub-arrays to which
the diagonals will be allocated. Defaults to first axis (i.e. 0).
axis2: int
Axis to be used as the second axis of the 2-D sub-arrays to which
the diagonals will be allocated. Defaults to second axis (i.e. 1).
"""
self
.
offset
=
offset
self
.
axis1
=
axis1
self
.
axis2
=
axis2
...
...
aesara/tensor/basic_opt.py
浏览文件 @
c058326d
...
...
@@ -810,7 +810,7 @@ class ShapeFeature(toolbox.Feature):
2. to infer the shape of every node in the graph in terms of the
input shapes.
3. remove all fills
(T.second, T.fill)
from the graph
3. remove all fills
``(aet.second, aet.fill)``
from the graph
Lifting shapes as close to the inputs as possible is important for
canonicalization because it is very bad form to have to compute
...
...
@@ -2236,12 +2236,12 @@ def local_alloc_unary(fgraph, node):
x
=
a
.
owner
.
inputs
[
0
]
shp
=
a
.
owner
.
inputs
[
1
:]
v
=
node
.
op
(
x
)
#
T
.alloc does not preserve the stacktrace of v,
#
aet
.alloc does not preserve the stacktrace of v,
# so we need to copy it over from x.
copy_stack_trace
(
node
.
outputs
[
0
],
v
)
ret
=
alloc
(
cast
(
v
,
node
.
outputs
[
0
]
.
dtype
),
*
shp
)
#
T
.cast does not preserve the stacktrace of x,
#
aet
.cast does not preserve the stacktrace of x,
# so we need to copy it over to the output.
copy_stack_trace
([
node
.
outputs
[
0
],
a
],
ret
)
return
[
ret
]
...
...
@@ -3132,14 +3132,11 @@ def local_subtensor_of_alloc(fgraph, node):
@register_specialize
@local_optimizer
([
Subtensor
])
def
local_subtensor_of_dot
(
fgraph
,
node
):
"""
This optimization translates T.dot(A, B)[idxs] into T.dot(A[idxs_a], B[idxs_b]),
where idxs_a and idxs_b are defined appropriately.
"""Rewrite ``aet.dot(A, B)[idxs]`` into ``aet.dot(A[idxs_a], B[idxs_b])``.
idxs_a is the first A.ndim-1 entries of idxs,
and idxs_b is the remaining entries of idxs (if any),
modified to skip the second-to-last dimension of B
(because dot sums over this dimension).
``idxs_a`` is the first ``A.ndim-1`` entries of ``idxs``, and ``idxs_b`` is
the remaining entries of ``idxs`` (if any), modified to skip the
second-to-last dimension of ``B`` (because dot sums over this dimension).
"""
if
not
isinstance
(
node
.
op
,
Subtensor
):
...
...
@@ -3535,7 +3532,7 @@ def local_useless_inc_subtensor_alloc(fgraph, node):
i
=
node
.
inputs
[
2
:]
if
y
.
owner
is
not
None
and
isinstance
(
y
.
owner
.
op
,
Alloc
):
# `z` is the input of the Alloc op, i.e.
T
.alloc(z, <shape>)
# `z` is the input of the Alloc op, i.e.
aet
.alloc(z, <shape>)
z
=
y
.
owner
.
inputs
[
0
]
try
:
...
...
@@ -3803,7 +3800,7 @@ def local_join_empty(fgraph, node):
new_inputs
.
append
(
inp
)
if
len
(
new_inputs
)
<
len
(
node
.
inputs
)
-
1
:
if
len
(
new_inputs
)
==
0
:
#
T
.join do not work in that case.
#
aet
.join do not work in that case.
# constant folding will take care of this case.
return
ret
=
join
(
node
.
inputs
[
0
],
*
new_inputs
)
...
...
@@ -3880,12 +3877,16 @@ def local_join_make_vector(fgraph, node):
def
local_useless_switch
(
fgraph
,
node
):
"""
This optimization makes the following changes in the graph:
T.switch(cond,left,right) -->
if cond is constant and cond == 0: right
if cond is constant and cond != 0: left
if left is right -> left
T.switch(le(shape_i{id}(X), 0), 0, shape_i{id}(X)) -> shape_i{id}(X)
``aet.switch(cond, left, right)`` ->
``if cond is constant and cond == 0``: right
``if cond is constant and cond != 0``: left
``if left is right`` -> ``left``
and
``aet.switch(le(shape_i{id}(X), 0), 0, shape_i{id}(X))`` -> ``shape_i{id}(X)``
"""
if
isinstance
(
node
.
op
,
Elemwise
)
and
isinstance
(
node
.
op
.
scalar_op
,
aes
.
Switch
):
...
...
aesara/tensor/blas.py
浏览文件 @
c058326d
...
...
@@ -1111,7 +1111,7 @@ def res_is_a(fgraph, var, op, maxclients=None):
def
_as_scalar
(
res
,
dtype
=
None
):
"""Return
None or a TensorVariable whose type is in T.float_scalar_types
"""
"""Return
``None`` or a `TensorVariable` whose type is in `float_scalar_types`
"""
if
dtype
is
None
:
dtype
=
config
.
floatX
if
np
.
all
(
res
.
type
.
broadcastable
):
...
...
aesara/tensor/math.py
浏览文件 @
c058326d
...
...
@@ -2490,14 +2490,14 @@ class Prod(CAReduceDtype):
Implementing that case-by-case logic is not as trivial, so a bunch of
hacks are piled down here to do it. Notably, for the "only one zero"
case, there's a special Op that computes the product of the elements
in the group, minus the zero (see
ProdWithoutZero
). The trick is then
in the group, minus the zero (see
`ProdWithoutZeros`
). The trick is then
to use the division trick for groups with no zero, to use the
ProdWithoutZeros
op where there's only one zero, and to output a
`ProdWithoutZeros`
op where there's only one zero, and to output a
derivative of zero for any element part of a group with more than
one zero.
I do this by first counting the number of zeros in each group (see
the "T.eq()" bits), then taking this or that behavior (see T.switch
)
I do this by first counting the number of zeros in each group (see
the
`aet.eq` bits), then taking this or that behavior (see `aet.switch`
)
based on the result of this count.
"""
...
...
@@ -2532,7 +2532,7 @@ class Prod(CAReduceDtype):
gz
=
gz
.
dimshuffle
(
new_dims
)
# division trick if we don't have zeros. This will contain
# NaNs to be eliminated in the
T.switch
if we do have zeros.
# NaNs to be eliminated in the
`aet.switch`
if we do have zeros.
grad_case_without_zeros
=
gz
*
prod_out
/
prod_in
if
self
.
no_zeros_in_input
:
...
...
aesara/tensor/math_opt.py
浏览文件 @
c058326d
...
...
@@ -148,8 +148,7 @@ def local_0_dot_x(fgraph, node):
@register_canonicalize
@local_optimizer
([
DimShuffle
])
def
local_lift_transpose_through_dot
(
fgraph
,
node
):
"""
dot(x,y).T -> dot(y.T, x.T)
"""Perform the rewrite ``dot(x,y).T -> dot(y.T, x.T)``
These optimizations "lift" (propagate towards the inputs) DimShuffle
through dot product. It allows to put the graph in a more standard shape,
...
...
@@ -231,8 +230,9 @@ def local_func_inv(fgraph, node):
@local_optimizer
([
Sum
])
def
local_sumsqr2dot
(
fgraph
,
node
):
"""
This optimization detects T.sqr( W.dimshuffle('x',0,1) * G.dimshuffle(0,'x',1) ).sum(axis=(1,2))
and converts this to T.dot(T.sqr(G), T.sqr(W).sum(axis=0)).
This optimization detects
``aet.sqr(W.dimshuffle("x", 0, 1) * G.dimshuffle(0, "x", 1) ).sum(axis=(1, 2))``
and converts it to ``aet.dot(aet.sqr(G), aet.sqr(W).sum(axis=0))``.
"""
if
(
isinstance
(
node
.
op
,
Sum
)
...
...
@@ -305,24 +305,30 @@ def local_expm1(fgraph, node):
def
local_mul_switch_sink
(
fgraph
,
node
):
"""
This optimization makes the following changes in the graph:
T.mul(A,T.switch(cond,0,iff),B) --> T.switch(cond,0,T.mul(A,B,iff))
T.mul(A,T.switch(cond,ift,0),B) --> T.switch(cond,T.mul(A,B,ift),0)
A and B
being several (or none) symbolic variables.
This is useful because
A and B
may not be numerically stable and give
``aet.mul(A, aet.switch(cond, 0, iff), B)`` -> ``aet.switch(cond, 0, aet.mul(A, B, iff))``
``aet.mul(A, aet.switch(cond, ift, 0), B)`` -> ``aet.switch(cond, aet.mul(A, B, ift), 0)``
``A`` and ``B``
being several (or none) symbolic variables.
This is useful because
``A`` and ``B``
may not be numerically stable and give
NaN or inf values for cases where the switch returns 0.
With this optimization
T.grad(T.switch(...))
has the right behavior.
With this optimization
``aet.grad(aet.switch(...))``
has the right behavior.
Examples
--------
x -> f(x)
x -> g(x)
y = T.switch(cond,f(x),g(x))
**without the optimization
T.grad(y,x) -> grad(f(x),x) * grad(y,f(x)) + grad(g(x),x) * grad(y,g(x))
**with the optimization
T.grad(y,x) -> switch(cond,grad(f(x),x), 0) + switch(cond,0,grad(g(x),x))
This will be particularly useful for the lazyif because we skip
an entire part of the graph.
x -> f(x)
x -> g(x)
y = aet.switch(cond, f(x), g(x))
without the optimization:
aet.grad(y, x) -> grad(f(x), x) * grad(y, f(x)) + grad(g(x), x) * grad(y, g(x))
with the optimization
aet.grad(y, x) -> switch(cond, grad(f(x), x), 0) + switch(cond, 0, grad(g(x), x))
This will be particularly useful for the lazy ``if`` because we skip an entire
part of the graph.
"""
if
node
.
op
!=
mul
:
...
...
@@ -393,13 +399,16 @@ def local_mul_switch_sink(fgraph, node):
def
local_div_switch_sink
(
fgraph
,
node
):
"""
This optimization makes the following changes in the graph:
T.div(T.switch(cond,0,iff),A) --> T.switch(cond,0,T.div(iff,A))
T.div(T.switch(cond,ift,0),A) --> T.switch(cond,T.div(ift,A),0)
A being a symbolic variable.
This is useful because A may not be numerically stable and give
NaN or inf values for cases where the switch returns 0.
See local_mul_switch_sink for more details.
``aet.div(aet.switch(cond, 0, iff), A)`` -> ``aet.switch(cond, 0, aet.div(iff, A))``
``aet.div(aet.switch(cond, ift, 0), A)`` -> ``aet.switch(cond, aet.div(ift, A), 0)``
where ``A`` is a symbolic variable.
This is useful because ``A`` may not be numerically stable and give
``nan`` or ``inf`` values for cases where the switch returns 0.
See `local_mul_switch_sink` for more details.
"""
if
node
.
op
!=
true_div
and
node
.
op
!=
int_div
:
...
...
@@ -1027,9 +1036,8 @@ def local_sum_prod_mul_by_scalar(fgraph, node):
# for same reason as above.
copy_stack_trace
(
node
.
outputs
,
new_op_output
)
# If node.op is a T.elemwise.Prod, then the scalars need to be
# raised to the power of the number of elements in the input
# to the Prod
# If `node.op` is a `Prod`, then the scalars need to be raised to
# the power of the number of elements in the input to the `Prod`
if
isinstance
(
node
.
op
,
Prod
)
and
new_op_input_nb_elements
!=
1
:
scalars
=
[
s
**
new_op_input_nb_elements
for
s
in
scalars
]
...
...
aesara/tensor/nnet/abstract_conv.py
浏览文件 @
c058326d
...
...
@@ -17,6 +17,7 @@ import warnings
import
numpy
as
np
import
aesara
from
aesara
import
tensor
as
aet
from
aesara.assert_op
import
Assert
from
aesara.configdefaults
import
config
from
aesara.graph.basic
import
Apply
,
Variable
...
...
@@ -560,12 +561,12 @@ def assert_conv_shape(shape):
assert_shp
=
Assert
(
f
"The convolution would produce an invalid shape (dim[{int(i)}] < 0)."
)
out_shape
.
append
(
assert_shp
(
n
,
ae
sara
.
tensor
.
ge
(
n
,
0
)))
out_shape
.
append
(
assert_shp
(
n
,
ae
t
.
ge
(
n
,
0
)))
else
:
assert_shp
=
Assert
(
f
"The convolution would produce an invalid shape (dim[{int(i)}] < 0)."
)
out_shape
.
append
(
assert_shp
(
n
,
ae
sara
.
tensor
.
gt
(
n
,
0
)))
out_shape
.
append
(
assert_shp
(
n
,
ae
t
.
gt
(
n
,
0
)))
return
tuple
(
out_shape
)
...
...
@@ -597,7 +598,7 @@ def assert_shape(x, expected_shape, msg="Unexpected shape."):
tests
=
[]
for
i
in
range
(
x
.
ndim
):
if
expected_shape
[
i
]
is
not
None
:
tests
.
append
(
ae
sara
.
tensor
.
eq
(
shape
[
i
],
expected_shape
[
i
]))
tests
.
append
(
ae
t
.
eq
(
shape
[
i
],
expected_shape
[
i
]))
if
tests
:
return
Assert
(
msg
)(
x
,
*
tests
)
else
:
...
...
@@ -1862,13 +1863,11 @@ def bilinear_kernel_1D(ratio, normalize=True):
by the indicated ratio using bilinear interpolation in one dimension.
"""
T
=
aesara
.
tensor
half_kern
=
T
.
arange
(
1
,
ratio
+
1
,
dtype
=
config
.
floatX
)
kern
=
T
.
concatenate
([
half_kern
,
half_kern
[
-
2
::
-
1
]])
half_kern
=
aet
.
arange
(
1
,
ratio
+
1
,
dtype
=
config
.
floatX
)
kern
=
aet
.
concatenate
([
half_kern
,
half_kern
[
-
2
::
-
1
]])
if
normalize
:
kern
/=
T
.
cast
(
ratio
,
config
.
floatX
)
kern
/=
aet
.
cast
(
ratio
,
config
.
floatX
)
return
kern
...
...
@@ -1903,7 +1902,6 @@ def frac_bilinear_upsampling(input, frac_ratio):
sides. This does not happen when it is odd.
"""
T
=
aesara
.
tensor
row
,
col
=
input
.
shape
[
2
:]
up_input
=
input
.
reshape
((
-
1
,
1
,
row
,
col
))
...
...
@@ -1928,15 +1926,15 @@ def frac_bilinear_upsampling(input, frac_ratio):
subsample
=
(
frac_ratio
[
1
],
frac_ratio
[
1
])
# duplicate borders of the input
concat_mat
=
T
.
concatenate
(
concat_mat
=
aet
.
concatenate
(
(
up_input
[:,
:,
:
1
,
:],
up_input
,
up_input
[:,
:,
-
1
:,
:]),
axis
=
2
)
concat_mat
=
T
.
concatenate
(
concat_mat
=
aet
.
concatenate
(
(
concat_mat
[:,
:,
:,
:
1
],
concat_mat
,
concat_mat
[:,
:,
:,
-
1
:]),
axis
=
3
)
# add padding for the pyramidal kernel
double_pad
=
(
2
*
T
.
as_tensor
([
row
,
col
])
-
1
)
*
np
.
array
(
ratio
)
+
1
double_pad
=
(
2
*
aet
.
as_tensor
([
row
,
col
])
-
1
)
*
np
.
array
(
ratio
)
+
1
pad
=
double_pad
//
2
# build pyramidal kernel
...
...
@@ -1945,25 +1943,25 @@ def frac_bilinear_upsampling(input, frac_ratio):
)
# add corresponding padding
pad_kern
=
T
.
concatenate
(
pad_kern
=
aet
.
concatenate
(
(
T
.
zeros
(
aet
.
zeros
(
tuple
(
kern
.
shape
[:
2
])
+
(
pad
[
0
],
kern
.
shape
[
-
1
]),
dtype
=
config
.
floatX
,
),
kern
,
T
.
zeros
(
aet
.
zeros
(
tuple
(
kern
.
shape
[:
2
])
+
(
double_pad
[
0
]
-
pad
[
0
],
kern
.
shape
[
-
1
]),
dtype
=
config
.
floatX
,
),
),
axis
=
2
,
)
pad_kern
=
T
.
concatenate
(
pad_kern
=
aet
.
concatenate
(
(
T
.
zeros
(
tuple
(
pad_kern
.
shape
[:
3
])
+
(
pad
[
1
],),
dtype
=
config
.
floatX
),
aet
.
zeros
(
tuple
(
pad_kern
.
shape
[:
3
])
+
(
pad
[
1
],),
dtype
=
config
.
floatX
),
pad_kern
,
T
.
zeros
(
aet
.
zeros
(
tuple
(
pad_kern
.
shape
[:
3
])
+
(
double_pad
[
1
]
-
pad
[
1
],),
dtype
=
config
.
floatX
,
),
...
...
@@ -1972,7 +1970,7 @@ def frac_bilinear_upsampling(input, frac_ratio):
)
# upsample the input by passing it as kernel of conv and using filter_dilation
upsamp
=
T
.
nnet
.
conv2d
(
upsamp
=
conv2d
(
pad_kern
,
concat_mat
,
border_mode
=
"valid"
,
...
...
@@ -2048,7 +2046,6 @@ def bilinear_upsampling(
return
frac_bilinear_upsampling
(
input
,
frac_ratio
=
frac_ratio
)
# the remaining case if integer ratio with use_1D_kernel
T
=
aesara
.
tensor
try
:
up_bs
=
batch_size
*
num_input_channels
except
TypeError
:
...
...
@@ -2058,11 +2055,11 @@ def bilinear_upsampling(
# concatenating the first and last row and column
# first and last row
concat_mat
=
T
.
concatenate
(
concat_mat
=
aet
.
concatenate
(
(
up_input
[:,
:,
:
1
,
:],
up_input
,
up_input
[:,
:,
-
1
:,
:]),
axis
=
2
)
# first and last col
concat_mat
=
T
.
concatenate
(
concat_mat
=
aet
.
concatenate
(
(
concat_mat
[:,
:,
:,
:
1
],
concat_mat
,
concat_mat
[:,
:,
:,
-
1
:]),
axis
=
3
)
concat_col
=
col
+
2
...
...
aesara/tensor/nnet/basic.py
浏览文件 @
c058326d
...
...
@@ -2081,7 +2081,7 @@ def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(fgraph, node):
assert
dy
.
ndim
==
1
if
dy
.
owner
is
not
None
and
isinstance
(
dy
.
owner
.
op
,
aet
.
Alloc
):
# dz is the input of the Alloc op, i.e.
T
.alloc(dz, <shape>)
# dz is the input of the Alloc op, i.e.
aet
.alloc(dz, <shape>)
dz
=
dy
.
owner
.
inputs
[
0
]
try
:
...
...
aesara/tensor/nnet/batchnorm.py
浏览文件 @
c058326d
...
...
@@ -185,10 +185,10 @@ def batch_normalization_train(
axes = (0,) + tuple(range(2, inputs.ndim))
mean = inputs.mean(axes, keepdims=True)
var = inputs.var(axes, keepdims=True)
invstd =
T.inv(T
.sqrt(var + epsilon))
invstd =
aet.inv(aet
.sqrt(var + epsilon))
out = (inputs - mean) * gamma * invstd + beta
m =
T.cast(T.prod(inputs.shape) / T
.prod(mean.shape), 'float32')
m =
aet.cast(ate.prod(inputs.shape) / aet
.prod(mean.shape), 'float32')
running_mean = running_mean * (1 - running_average_factor) +
\\
mean * running_average_factor
running_var = running_var * (1 - running_average_factor) +
\\
...
...
@@ -332,9 +332,9 @@ def batch_normalization_test(
axes = (0,)
# for spatial normalization
axes = (0,) + tuple(range(2, inputs.ndim))
gamma, beta, mean, var = (
T
.addbroadcast(t, *axes)
gamma, beta, mean, var = (
aet
.addbroadcast(t, *axes)
for t in (gamma, beta, mean, var))
out = (inputs - mean) * gamma /
T
.sqrt(var + epsilon) + beta
out = (inputs - mean) * gamma /
aet
.sqrt(var + epsilon) + beta
"""
ndim
=
inputs
.
ndim
axes
,
non_bc_axes
=
_prepare_batch_normalization_axes
(
axes
,
ndim
)
...
...
tests/tensor/nnet/test_abstract_conv.py
浏览文件 @
c058326d
...
...
@@ -1920,8 +1920,8 @@ class TestConv2dGrads:
def
test_conv2d_grad_wrt_inputs
(
self
):
# Compares calculated abstract grads wrt inputs with the fwd grads
# This method checks the outputs of
conv2_grad_wrt_inputs
against
# the outputs of
T.nnet.conv
forward grads to make sure the
# This method checks the outputs of
`conv2_grad_wrt_inputs`
against
# the outputs of
`aesara.tensor.nnet.conv`
forward grads to make sure the
# results are the same.
for
(
in_shape
,
fltr_shape
)
in
zip
(
self
.
inputs_shapes
,
self
.
filters_shapes
):
...
...
@@ -1986,8 +1986,8 @@ class TestConv2dGrads:
def
test_conv2d_grad_wrt_weights
(
self
):
# Compares calculated abstract grads wrt weights with the fwd grads
# This method checks the outputs of
conv2_grad_wrt_weights
against
# the outputs of
T.nnet.conv
forward grads to make sure the
# This method checks the outputs of
`conv2_grad_wrt_weights`
against
# the outputs of
`aesara.tensor.nnet.conv`
forward grads to make sure the
# results are the same.
for
(
in_shape
,
fltr_shape
)
in
zip
(
self
.
inputs_shapes
,
self
.
filters_shapes
):
...
...
tests/tensor/test_blas.py
浏览文件 @
c058326d
...
...
@@ -906,7 +906,7 @@ def test_gemm_nested():
def
test_gemm_opt_wishlist
():
X
,
Y
,
Z
,
a
,
b
=
matrix
(),
matrix
(),
matrix
(),
scalar
(),
scalar
()
# with >2 additions of the same
T.dot(X,Y
term
# with >2 additions of the same
``aet.dot(X, Y)``
term
just_gemm
([
X
,
Y
,
Z
,
a
,
b
],
[(
b
*
b
)
*
Z
*
a
+
(
a
*
a
)
*
dot
(
X
,
Y
)
+
b
*
dot
(
X
,
Y
)])
just_gemm
([
X
,
Y
,
Z
,
a
,
b
],
[
Z
+
dot
(
X
,
Y
)
+
dot
(
X
,
Y
)])
...
...
tests/tensor/test_math_opt.py
浏览文件 @
c058326d
...
...
@@ -842,7 +842,7 @@ class TestAlgebraicCanonize:
# 4 * x / abs(2*x) it get simplifier during canonicalisation.
x
=
dscalar
()
# a =
T
.abs_(x)
# a =
aet
.abs_(x)
if
config
.
mode
==
"FAST_COMPILE"
:
mode
=
get_mode
(
"FAST_RUN"
)
.
excluding
(
"local_elemwise_fusion"
)
...
...
@@ -2366,7 +2366,6 @@ def test_local_pow_specialize():
assert
len
(
nodes
)
==
2
assert
nodes
[
0
]
==
sqr
assert
isinstance
(
nodes
[
1
]
.
scalar_op
,
aes
.
basic
.
Inv
)
# assert nodes == [T.sqr,T.inv]#Why this don't work?
utt
.
assert_allclose
(
f
(
val_no0
),
val_no0
**
(
-
2
))
f
=
function
([
v
],
v
**
(
0.5
),
mode
=
mode
)
...
...
@@ -2379,7 +2378,6 @@ def test_local_pow_specialize():
assert
len
(
nodes
)
==
2
assert
nodes
[
0
]
==
sqrt
assert
isinstance
(
nodes
[
1
]
.
scalar_op
,
aes
.
basic
.
Inv
)
# assert nodes == [T.sqrt,T.inv]#Why this don't work?
utt
.
assert_allclose
(
f
(
val_no0
),
val_no0
**
(
-
0.5
))
...
...
tests/tensor/test_mlp.py
浏览文件 @
c058326d
...
...
@@ -127,10 +127,10 @@ class LogisticRegression:
the learning rate is less dependent on the batch size
"""
# y.shape[0] is (symbolically) the number of rows in y, i.e., number of examples (call it n) in the minibatch
#
T
.arange(y.shape[0]) is a symbolic vector which will contain [0,1,2,... n-1]
#
T
.log(self.p_y_given_x) is a matrix of Log-Probabilities (call it LP) with one row per example and one column per class
# LP[
T
.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]]
# and
T.mean(LP[T
.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v,
#
aet
.arange(y.shape[0]) is a symbolic vector which will contain [0,1,2,... n-1]
#
aet
.log(self.p_y_given_x) is a matrix of Log-Probabilities (call it LP) with one row per example and one column per class
# LP[
aet
.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]]
# and
aet.mean(LP[aet
.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v,
# i.e., the mean log-likelihood across the minibatch.
return
log
(
self
.
p_y_given_x
[
aet
.
arange
(
y
.
shape
[
0
]),
y
])
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论