Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
58cb5c30
提交
58cb5c30
authored
11月 24, 2021
作者:
Ricardo
提交者:
Brandon T. Willard
12月 13, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add axis to Softmax and SoftmaxGrad Ops
上级
c6c85acb
隐藏空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
565 行增加
和
507 行删除
+565
-507
dispatch.py
aesara/link/jax/dispatch.py
+3
-1
elemwise.py
aesara/link/numba/dispatch/elemwise.py
+13
-6
__init__.py
aesara/tensor/nnet/__init__.py
+2
-2
basic.py
aesara/tensor/nnet/basic.py
+438
-255
test_dnn.py
tests/gpuarray/test_dnn.py
+3
-3
test_nnet.py
tests/gpuarray/test_nnet.py
+3
-3
test_jax.py
tests/link/test_jax.py
+7
-2
test_numba.py
tests/link/test_numba.py
+10
-3
test_basic.py
tests/tensor/nnet/test_basic.py
+85
-230
test_rop.py
tests/test_rop.py
+1
-2
没有找到文件。
aesara/link/jax/dispatch.py
浏览文件 @
58cb5c30
...
...
@@ -198,8 +198,10 @@ def jax_funcify_Identity(op, **kwargs):
@jax_funcify.register
(
Softmax
)
def
jax_funcify_Softmax
(
op
,
**
kwargs
):
axis
=
op
.
axis
def
softmax
(
x
):
return
jax
.
nn
.
softmax
(
x
)
return
jax
.
nn
.
softmax
(
x
,
axis
=
axis
)
return
softmax
...
...
aesara/link/numba/dispatch/elemwise.py
浏览文件 @
58cb5c30
...
...
@@ -400,17 +400,24 @@ def numba_funcify_Softmax(op, node, **kwargs):
x_at
=
node
.
inputs
[
0
]
x_dtype
=
x_at
.
type
.
numpy_dtype
x_dtype
=
numba
.
np
.
numpy_support
.
from_dtype
(
x_dtype
)
axis
=
op
.
axis
# np.max(x, axis=1)
reduce_max
=
create_axis_reducer
(
np
.
maximum
,
-
np
.
inf
,
1
,
x_at
.
ndim
,
x_dtype
)
# np.sum(x, axis=1)
reduce_sum
=
create_axis_reducer
(
np
.
add
,
0.0
,
1
,
x_at
.
ndim
,
x_dtype
)
if
axis
is
not
None
:
reduce_max
=
create_axis_reducer
(
np
.
maximum
,
-
np
.
inf
,
axis
,
x_at
.
ndim
,
x_dtype
,
keepdims
=
True
)
reduce_sum
=
create_axis_reducer
(
np
.
add
,
0.0
,
axis
,
x_at
.
ndim
,
x_dtype
,
keepdims
=
True
)
else
:
reduce_max
=
np
.
max
reduce_sum
=
np
.
sum
@numba.njit
def
softmax
(
x
):
z
=
np
.
expand_dims
(
reduce_max
(
x
),
-
1
)
z
=
reduce_max
(
x
)
e_x
=
np
.
exp
(
x
-
z
)
w
=
np
.
expand_dims
(
reduce_sum
(
e_x
),
-
1
)
w
=
reduce_sum
(
e_x
)
sm
=
e_x
/
w
return
sm
...
...
aesara/tensor/nnet/__init__.py
浏览文件 @
58cb5c30
...
...
@@ -35,9 +35,9 @@ from aesara.tensor.nnet.basic import (
selu
,
sigmoid_binary_crossentropy
,
softmax
,
softmax_grad
,
softmax_grad
_legacy
,
softmax_graph
,
softmax_
op
,
softmax_
legacy
,
softmax_simplifier
,
softmax_with_bias
,
softsign
,
...
...
aesara/tensor/nnet/basic.py
浏览文件 @
58cb5c30
...
...
@@ -14,8 +14,10 @@ revisited later when all the intermediate part are on the GPU.
"""
import
warnings
from
textwrap
import
dedent
import
numpy
as
np
import
scipy.special
import
aesara
from
aesara
import
scalar
as
aes
...
...
@@ -140,7 +142,7 @@ class SoftmaxWithBias(COp):
if
isinstance
(
g_sm
.
type
,
DisconnectedType
):
return
[
DisconnectedType
()(),
DisconnectedType
()()]
dx
=
softmax_grad
(
g_sm
,
outputs
[
0
])
dx
=
softmax_grad
_legacy
(
g_sm
,
outputs
[
0
])
db
=
aet_sum
(
dx
,
axis
=
0
)
return
dx
,
db
...
...
@@ -339,36 +341,39 @@ class SoftmaxGrad(COp):
nin
=
2
nout
=
1
__props__
=
()
__props__
=
(
"axis"
,)
def
__init__
(
self
,
axis
):
if
axis
is
not
None
and
not
isinstance
(
axis
,
int
):
raise
TypeError
(
"axis must be an integer or `None`"
)
self
.
axis
=
axis
def
make_node
(
self
,
dy
,
sm
):
dy
=
aet
.
as_tensor_variable
(
dy
)
sm
=
aet
.
as_tensor_variable
(
sm
)
if
dy
.
type
.
ndim
not
in
(
1
,
2
)
or
dy
.
type
.
dtype
not
in
float_dtypes
:
raise
ValueError
(
"dy must be 1-d or 2-d tensor of floats. Got "
,
dy
.
type
)
if
dy
.
ndim
==
1
:
dy
=
shape_padleft
(
dy
,
n_ones
=
1
)
if
sm
.
ndim
==
1
:
sm
=
shape_padleft
(
sm
,
n_ones
=
1
)
if
self
.
axis
is
not
None
and
(
self
.
axis
>=
sm
.
ndim
or
self
.
axis
<
-
sm
.
ndim
):
raise
ValueError
(
f
"SoftmaxGrad axis(={self.axis}) out of bounds for {sm.ndim}D array {sm}"
)
return
Apply
(
self
,
[
dy
,
sm
],
[
sm
.
type
()])
def
perform
(
self
,
node
,
input_storage
,
output_storage
):
dy
,
sm
=
input_storage
dx
=
np
.
zeros_like
(
sm
)
# dx[i,j] = - (\sum_k dy[i,k] sm[i,k]) sm[i,j] + dy[i,j] sm[i,j]
for
i
in
range
(
sm
.
shape
[
0
]):
dy_times_sm_i
=
dy
[
i
]
*
sm
[
i
]
dx
[
i
]
=
dy_times_sm_i
-
sum
(
dy_times_sm_i
)
*
sm
[
i
]
dy_times_sm
=
dy
*
sm
dx
=
dy_times_sm
-
np
.
sum
(
dy_times_sm
,
axis
=
self
.
axis
,
keepdims
=
True
)
*
sm
output_storage
[
0
][
0
]
=
dx
def
grad
(
self
,
inp
,
grads
):
dy
,
sm
=
inp
(
g
,)
=
grads
tmp
=
g
+
neg
(
aet_sum
(
g
*
sm
,
axis
=
1
)
.
dimshuffle
((
0
,
"x"
)
))
tmp
=
g
+
neg
(
aet_sum
(
g
*
sm
,
axis
=
self
.
axis
,
keepdims
=
True
))
g_dy
=
tmp
*
sm
tmp2
=
aet_sum
(
dy
*
sm
,
axis
=
1
)
.
dimshuffle
((
0
,
"x"
)
)
tmp2
=
aet_sum
(
dy
*
sm
,
axis
=
self
.
axis
,
keepdims
=
True
)
g_sm
=
tmp
*
dy
-
g
*
tmp2
return
g_dy
,
g_sm
...
...
@@ -377,79 +382,184 @@ class SoftmaxGrad(COp):
return
[
shape
[
1
]]
def
c_code_cache_version
(
self
):
return
(
3
,)
return
(
4
,)
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
dy
,
sm
=
inp
(
dx
,)
=
out
return
"""
if ((PyArray_TYPE(
%(dy)
s) != NPY_DOUBLE) &&
(PyArray_TYPE(
%(dy)
s) != NPY_FLOAT))
{
PyErr_SetString(PyExc_TypeError,
"types should be float or float64");
%(fail)
s;
}
if ((PyArray_TYPE(
%(sm)
s) != NPY_DOUBLE) &&
(PyArray_TYPE(
%(sm)
s) != NPY_FLOAT))
{
PyErr_SetString(PyExc_TypeError,
"types should be float or float64");
%(fail)
s;
}
if ((PyArray_NDIM(
%(dy)
s) != 2)
|| (PyArray_NDIM(
%(sm)
s) != 2))
{
PyErr_SetString(PyExc_ValueError, "rank error");
%(fail)
s;
}
if (PyArray_DIMS(
%(dy)
s)[0] != PyArray_DIMS(
%(sm)
s)[0])
{
PyErr_SetString(PyExc_ValueError, "dy.shape[0] != sm.shape[0]");
%(fail)
s;
}
if ((NULL ==
%(dx)
s)
|| (PyArray_DIMS(
%(dx)
s)[0] != PyArray_DIMS(
%(sm)
s)[0])
|| (PyArray_DIMS(
%(dx)
s)[1] != PyArray_DIMS(
%(sm)
s)[1]))
{
Py_XDECREF(
%(dx)
s);
%(dx)
s = (PyArrayObject*) PyArray_SimpleNew(2,
PyArray_DIMS(
%(sm)
s),
PyArray_TYPE(
%(sm)
s));
if (!
%(dx)
s)
{
PyErr_SetString(PyExc_MemoryError,
"failed to alloc dx output");
%(fail)
s;
}
}
for (size_t i = 0; i < PyArray_DIMS(
%(dx)
s)[0]; ++i)
{
const dtype_
%(dy)
s* __restrict__ dy_i = (dtype_
%(dy)
s*) (PyArray_BYTES(
%(dy)
s) + PyArray_STRIDES(
%(dy)
s)[0] * i);
npy_intp Sdy = PyArray_STRIDES(
%(dy)
s)[1]/sizeof(dtype_
%(dy)
s);
const dtype_
%(sm)
s* __restrict__ sm_i = (dtype_
%(sm)
s*) (PyArray_BYTES(
%(sm)
s) + PyArray_STRIDES(
%(sm)
s)[0] * i);
npy_intp Ssm = PyArray_STRIDES(
%(sm)
s)[1]/sizeof(dtype_
%(sm)
s);
dtype_
%(dx)
s* __restrict__ dx_i = (dtype_
%(dx)
s*) (PyArray_BYTES(
%(dx)
s) + PyArray_STRIDES(
%(dx)
s)[0] * i);
npy_intp Sdx = PyArray_STRIDES(
%(dx)
s)[1]/sizeof(dtype_
%(dx)
s);
double sum_dy_times_sm = 0.;
for (size_t j = 0; j < PyArray_DIMS(
%(dx)
s)[1]; ++j)
{
dx_i[j * Sdx] = dy_i[j * Sdy] * sm_i[j * Ssm];
sum_dy_times_sm += dx_i[j * Sdx];
}
for (size_t j = 0; j < PyArray_DIMS(
%(dx)
s)[1]; ++j)
{
dx_i[j * Sdx] -= sum_dy_times_sm * sm_i[j * Ssm];
}
}
"""
%
dict
(
locals
(),
**
sub
axis
=
self
.
axis
if
self
.
axis
is
not
None
else
np
.
MAXDIMS
fail
=
sub
[
"fail"
]
return
dedent
(
f
"""
PyArrayObject* op[3];
npy_uint32 op_flags[3];
npy_uint32 iter_flags;
NpyIter* iter;
NpyIter_IterNextFunc* get_next;
char** data_ptr;
int sm_ndim = PyArray_NDIM({sm});
int axis = {axis};
int iterate_axis = !(axis == NPY_MAXDIMS || sm_ndim == 1);
// Validate inputs
if ((PyArray_TYPE({dy}) != NPY_DOUBLE) &&
(PyArray_TYPE({dy}) != NPY_FLOAT))
{{
PyErr_SetString(PyExc_TypeError, "types should be float or float64");
{fail};
}}
if ((PyArray_TYPE({sm}) != NPY_DOUBLE) &&
(PyArray_TYPE({sm}) != NPY_FLOAT))
{{
PyErr_SetString(PyExc_TypeError, "types should be float or float64");
{fail};
}}
if (axis < 0) axis = sm_ndim + axis;
if ((axis < 0) || (iterate_axis && (axis > sm_ndim)))
{{
PyErr_SetString(PyExc_ValueError, "invalid axis in SoftmaxGrad");
{fail};
}}
if (({dx} == NULL)
|| !(PyArray_CompareLists(PyArray_DIMS({dx}), PyArray_DIMS({sm}), sm_ndim)))
{{
Py_XDECREF({dx});
{dx} = (PyArrayObject*)PyArray_SimpleNew(sm_ndim,
PyArray_DIMS({sm}),
PyArray_TYPE({sm}));
if (!{dx})
{{
PyErr_SetString(PyExc_MemoryError, "failed to alloc SoftMaxGrad dx output");
{fail};
}}
}}
// Create numpy iterator
op[0] = {dy};
op[1] = {sm};
op[2] = {dx};
op_flags[0] = NPY_ITER_READONLY;
op_flags[1] = NPY_ITER_READONLY;
op_flags[2] = NPY_ITER_READWRITE;
iter_flags = (iterate_axis)? NPY_ITER_MULTI_INDEX : 0;
iter = NpyIter_MultiNew(
3,
op,
iter_flags,
NPY_KEEPORDER,
NPY_NO_CASTING,
op_flags,
NULL
);
if (iter == NULL)
{{
PyErr_SetString(PyExc_MemoryError, "failed to create softmax iterator");
{fail};
}}
// SoftmaxGrad is applied across the entire array
if (!iterate_axis)
{{
get_next = NpyIter_GetIterNext(iter, NULL);
if (get_next == NULL)
{{
NpyIter_Deallocate(iter);
PyErr_SetString(PyExc_RuntimeError, "Failed to obtain SoftMaxGrad GetIterNext");
{fail};
}}
data_ptr = NpyIter_GetDataPtrArray(iter);
// Compute and accumulate dy * sm
dtype_{dx} sum_dy_times_sm = 0.0;
do
{{
dtype_{dy}* dy_ptr = (dtype_{dy}*)data_ptr[0];
dtype_{sm}* sm_ptr = (dtype_{sm}*)data_ptr[1];
dtype_{dx}* dx_ptr = (dtype_{dx}*)data_ptr[2];
*dx_ptr = (dtype_{dx})((*dy_ptr) * (*sm_ptr));
sum_dy_times_sm += *dx_ptr;
}} while(get_next(iter));
// Reset Iterator
if (NpyIter_GotoIterIndex(iter, 0) == NPY_FAIL)
{{
PyErr_SetString(PyExc_RuntimeError, "Failed to reset softmax iterator");
{fail};
}}
// Subtract sum(dy*sm) * sm
do
{{
dtype_{sm}* sm_ptr = (dtype_{sm}*)data_ptr[1];
dtype_{dx}* dx_ptr = (dtype_{dx}*)data_ptr[2];
*dx_ptr -= sum_dy_times_sm * ((dtype_{dx})(*sm_ptr));
}} while(get_next(iter));
}}
// SoftmaxGrad is applied across a specific axis
else {{
// Collect axis strides and remove it from iteration
npy_intp axis_size = PyArray_DIM({sm}, axis);
npy_intp* axis_stride = NpyIter_GetAxisStrideArray(iter, axis);
if (axis_stride == NULL)
{{
PyErr_SetString(PyExc_RuntimeError, "Failed to obtain softmax axis strides");
{fail};
}}
npy_intp dy_axis_stride = axis_stride[0] / sizeof(dtype_{dy});
npy_intp sm_axis_stride = axis_stride[1] / sizeof(dtype_{sm});
npy_intp dx_axis_stride = axis_stride[2] / sizeof(dtype_{dx});
if (NpyIter_RemoveAxis(iter, axis) == NPY_FAIL)
{{
PyErr_SetString(PyExc_RuntimeError, "Failed to remove SoftmaxGrad axis from iterator");
{fail};
}}
// Iterate over remaining axes
get_next = NpyIter_GetIterNext(iter, NULL);
if (get_next == NULL)
{{
NpyIter_Deallocate(iter);
PyErr_SetString(PyExc_RuntimeError, "Failed to obtain SoftamGrad GetIterNext");
{fail};
}}
data_ptr = NpyIter_GetDataPtrArray(iter);
do
{{
dtype_{dy}* dy_axis = (dtype_{dy}*)data_ptr[0];
dtype_{sm}* sm_axis = (dtype_{sm}*)data_ptr[1];
dtype_{dx}* dx_axis = (dtype_{dx}*)data_ptr[2];
// Compute and accumulate dy * sm
dtype_{dx} sum_dy_times_sm = 0.0;
for (npy_intp i = 0; i < axis_size; i++)
{{
dx_axis[i * dx_axis_stride] = (dtype_{dx})(dy_axis[i * dy_axis_stride] * sm_axis[i * sm_axis_stride]);
sum_dy_times_sm += dx_axis[i * dx_axis_stride];
}}
// Subtract sum(dy*sm) * sm
for (npy_intp i = 0; i < axis_size; i++)
{{
dx_axis[i * dx_axis_stride] -= sum_dy_times_sm * (dtype_{dx})(sm_axis[i * sm_axis_stride]);
}}
}} while(get_next(iter));
}}
NpyIter_Deallocate(iter);
"""
)
softmax_grad
=
SoftmaxGrad
(
)
softmax_grad
_legacy
=
SoftmaxGrad
(
axis
=-
1
)
class
Softmax
(
COp
):
...
...
@@ -464,34 +574,32 @@ class Softmax(COp):
nin
=
1
nout
=
1
__props__
=
()
__props__
=
(
"axis"
,)
def
__init__
(
self
,
axis
):
if
axis
is
not
None
and
not
isinstance
(
axis
,
int
):
raise
TypeError
(
"axis must be an integer or `None`"
)
self
.
axis
=
axis
def
make_node
(
self
,
x
):
x
=
aet
.
as_tensor_variable
(
x
)
if
x
.
type
.
ndim
not
in
(
1
,
2
)
or
x
.
type
.
dtype
not
in
float_dtypes
:
raise
ValueError
(
f
"x must be 1-d or 2-d tensor of floats. Got {x.type}"
)
if
x
.
ndim
==
1
:
warnings
.
warn
(
"If x is a vector, Softmax will not automatically pad x "
"anymore in next releases. If you need it, please do it manually. The "
"vector case is gonna be supported soon and the output will be a vector."
,
category
=
PendingDeprecationWarning
,
stacklevel
=
4
,
if
self
.
axis
is
not
None
and
(
self
.
axis
>=
x
.
ndim
or
self
.
axis
<
-
x
.
ndim
):
raise
ValueError
(
f
"Softmax axis(={self.axis}) out of bounds for {x.ndim}D array {x}"
)
x
=
shape_padleft
(
x
,
n_ones
=
1
)
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
def
perform
(
self
,
node
,
input_storage
,
output_storage
):
(
x
,)
=
input_storage
e_x
=
np
.
exp
(
x
-
x
.
max
(
axis
=
1
)[:,
None
])
sm
=
e_x
/
e_x
.
sum
(
axis
=
1
)[:,
None
]
output_storage
[
0
][
0
]
=
sm
(
z
,)
=
output_storage
z
[
0
]
=
scipy
.
special
.
softmax
(
x
,
axis
=
self
.
axis
)
def
L_op
(
self
,
inp
,
outputs
,
grads
):
(
x
,)
=
inp
(
g_sm
,)
=
grads
return
[
softmax_grad
(
g_sm
,
outputs
[
0
])]
return
[
SoftmaxGrad
(
axis
=
self
.
axis
)
(
g_sm
,
outputs
[
0
])]
def
R_op
(
self
,
inputs
,
eval_points
):
# I think the Jacobian is symmetric so the R_op
...
...
@@ -506,151 +614,202 @@ class Softmax(COp):
def
c_headers
(
self
,
**
kwargs
):
return
[
"<iostream>"
,
"<cmath>"
]
@staticmethod
def
c_code_template
(
dtype
):
# this implementation was lifted from
# /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
(
x
,)
=
inp
(
sm
,)
=
out
axis
=
self
.
axis
if
self
.
axis
is
not
None
else
np
.
MAXDIMS
fail
=
sub
[
"fail"
]
# dtype = node.inputs[0].type.dtype_specs()[1]
# TODO: put this into a templated function, in the support code
# TODO: declare the max of each row as an Op output
# TODO: set error messages for failures in this code
# TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
init_decl
=
"""
npy_intp* Nx = PyArray_DIMS(
%(x)
s);
npy_intp Sx1 = 0;
npy_intp Ssm1 = 0;
if (PyArray_NDIM(
%(x)
s) != 2)
{
PyErr_SetString(PyExc_ValueError, "not a 2d tensor");
%(fail)
s;
}
if ((PyArray_TYPE(
%(x)
s) != NPY_DOUBLE) &&
(PyArray_TYPE(
%(x)
s) != NPY_FLOAT))
{
PyErr_SetString(PyExc_TypeError, "not a float");
%(fail)
s;
}
if ((NULL ==
%(sm)
s)
|| (PyArray_DIMS(
%(sm)
s)[0] != PyArray_DIMS(
%(x)
s)[0])
|| (PyArray_DIMS(
%(sm)
s)[1] != PyArray_DIMS(
%(x)
s)[1]))
{
Py_XDECREF(
%(sm)
s);
%(sm)
s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(
%(x)
s),
PyArray_TYPE(
%(x)
s));
if(!
%(sm)
s) {
PyErr_SetString(PyExc_MemoryError,
"failed to alloc sm output");
%(fail)
s
}
}
Sx1 = PyArray_STRIDES(
%(x)
s)[1]/sizeof(dtype_
%(x)
s);
Ssm1 = PyArray_STRIDES(
%(sm)
s)[1]/sizeof(dtype_
%(sm)
s);
"""
begin_row_loop
=
"""
for (size_t i = 0; i < Nx[0]; ++i)
{
size_t j;
double sum = 0.0;
const dtype_
%(x)
s* __restrict__ x_i = (dtype_
%(x)
s*)(PyArray_BYTES(
%(x)
s) + PyArray_STRIDES(
%(x)
s)[0] * i);
dtype_
%(sm)
s* __restrict__ sm_i = (dtype_
%(sm)
s*)(PyArray_BYTES(
%(sm)
s) + PyArray_STRIDES(
%(sm)
s)[0] * i);
dtype_
%(sm)
s row_max = x_i[0];
//std::cout << "0 " << row_max << "
\\
n";
// Get the maximum value of the row
for (j = 1; j < Nx[1]; ++j)
{
dtype_
%(sm)
s row_ij = x_i[j * Sx1] ;
//std::cout << "1 " << row_ij << "
\\
n";
row_max = (row_ij > row_max) ? row_ij : row_max;
}
"""
inside_row_loop
=
"""
for (j = 0; j < Nx[1]; ++j)
{
dtype_
%(sm)
s row_ij = x_i[j * Sx1] ;
//std::cout << "2 " << j << " " << row_ij << " " << row_max << "
\\
n";
dtype_
%(sm)
s sm_ij = exp(row_ij - row_max);
//std::cout << "3 " << j << " " << sm_ij << "
\\
n";
sum += sm_ij;
sm_i[j * Ssm1] = sm_ij;
}
//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
double sum_inv = 1.0 / sum;
for (j = 0; j < Nx[1]; ++j)
{
sm_i[j * Ssm1] *= sum_inv;
}
"""
# Get the vectorized version of exp if it exist
try
:
vec_exp
=
aesara
.
scalar
.
exp
.
c_code_contiguous_raw
(
dtype
,
"Nx[1]"
,
"sm_i"
,
"sm_i"
)
inside_row_loop_contig
=
(
"""
for (j = 0; j < Nx[1]; ++j)
{
sm_i[j * Ssm1] = x_i[j * Sx1] - row_max;
}
%(vec_exp)
s;
for (j = 0; j < Nx[1]; ++j)
{
sum += sm_i[j * Ssm1];
}
//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
double sum_inv = 1.0 / sum;
for (j = 0; j < Nx[1]; ++j)
{
sm_i[j * Ssm1] *= sum_inv;
}
"""
%
locals
()
)
inside_row_loop
=
(
"""
if(Ssm1 == 1){
%(inside_row_loop_contig)
s
}else{
%(inside_row_loop)
s
}
return
dedent
(
f
"""
PyArrayObject* op[2];
npy_uint32 op_flags[2];
npy_uint32 iter_flags;
NpyIter* iter;
NpyIter_IterNextFunc* get_next;
char** data_ptr;
int x_ndim = PyArray_NDIM({x});
int axis = {axis};
int iterate_axis = !(axis == NPY_MAXDIMS || x_ndim == 1);
// Validate inputs
if ((PyArray_TYPE({x}) != NPY_DOUBLE) &&
(PyArray_TYPE({x}) != NPY_FLOAT))
{{
PyErr_SetString(PyExc_TypeError, "not a float");
{fail}
}}
if (axis < 0) axis = x_ndim + axis;
if ((axis < 0) || (iterate_axis && (axis > x_ndim)))
{{
PyErr_SetString(PyExc_ValueError, "invalid axis in Softmax");
{fail}
}}
// Allocate Output Array
if (({sm}) == NULL || !(PyArray_CompareLists(PyArray_DIMS({sm}), PyArray_DIMS({x}), x_ndim)))
{{
Py_XDECREF({sm});
{sm} = (PyArrayObject*)PyArray_SimpleNew(x_ndim, PyArray_DIMS({x}), PyArray_TYPE({x}));
if(!{sm}) {{
PyErr_SetString(PyExc_MemoryError, "failed to alloc Softmax output");
{fail}
}}
}}
// Create numpy iterator
op[0] = {x};
op[1] = {sm};
op_flags[0] = NPY_ITER_READONLY;
op_flags[1] = NPY_ITER_READWRITE;
iter_flags = (iterate_axis)? NPY_ITER_MULTI_INDEX : 0;
iter = NpyIter_MultiNew(
2,
op,
iter_flags,
NPY_KEEPORDER,
NPY_NO_CASTING,
op_flags,
NULL
);
if (iter == NULL)
{{
PyErr_SetString(PyExc_MemoryError, "failed to create Softmax iterator");
{fail}
}}
// Softmax is applied across the entire array
if (!iterate_axis)
{{
get_next = NpyIter_GetIterNext(iter, NULL);
if (get_next == NULL)
{{
NpyIter_Deallocate(iter);
PyErr_SetString(PyExc_RuntimeError, "Failed to obtain Softmax GetIterNext");
{fail}
}}
data_ptr = NpyIter_GetDataPtrArray(iter);
// Find axis max
dtype_{x}* x_ptr = (dtype_{x}*)data_ptr[0];
dtype_{x} max = *x_ptr;
if (get_next(iter))
{{
do
{{
dtype_{x}* x_ptr = (dtype_{x}*)data_ptr[0];
max = (*x_ptr > max)? *x_ptr : max;
}} while(get_next(iter));
}}
// Reset Iterator
if (NpyIter_GotoIterIndex(iter, 0) == NPY_FAIL)
{{
PyErr_SetString(PyExc_RuntimeError, "Failed to reset Softmax iterator");
{fail}
}}
// Compute and accumulate exp(x-max(x)) exponent
double sum_exp_dev = 0.0;
do
{{
dtype_{x}* x_ptr = (dtype_{x}*)data_ptr[0];
dtype_{sm}* sm_ptr = (dtype_{sm}*)data_ptr[1];
*sm_ptr = (dtype_{sm}) exp(*x_ptr - max);
sum_exp_dev += *sm_ptr;
}} while(get_next(iter));
// Reset Iterator
if (NpyIter_GotoIterIndex(iter, 0) == NPY_FAIL)
{{
PyErr_SetString(PyExc_RuntimeError, "Failed to reset Softmax iterator");
{fail}
}}
// Divide by sum(exp(x-max(x)))
double inv_sum_exp_dev = 1.0 / sum_exp_dev;
do
{{
dtype_{sm}* sm_ptr = (dtype_{sm}*)data_ptr[1];
*sm_ptr *= inv_sum_exp_dev;
}} while(get_next(iter));
}}
// Softmax is applied across a specific axis
else {{
// Collect axis strides and remove it from iteration
npy_intp axis_size = PyArray_DIM({x}, axis);
npy_intp* axis_stride = NpyIter_GetAxisStrideArray(iter, axis);
if (axis_stride == NULL)
{{
PyErr_SetString(PyExc_RuntimeError, "Failed to obtain Softmax axis strides");
{fail}
}}
npy_intp x_axis_stride = axis_stride[0] / sizeof(dtype_{x});
npy_intp sm_axis_stride = axis_stride[1] / sizeof(dtype_{sm});
if (NpyIter_RemoveAxis(iter, axis) == NPY_FAIL)
{{
PyErr_SetString(PyExc_RuntimeError, "Failed to remove softmax axis from iterator");
{fail}
}}
// Iterate over remaining axes
get_next = NpyIter_GetIterNext(iter, NULL);
if (get_next == NULL)
{{
NpyIter_Deallocate(iter);
PyErr_SetString(PyExc_RuntimeError, "Failed to obtain softmax GetIterNext");
{fail}
}}
data_ptr = NpyIter_GetDataPtrArray(iter);
do
{{
dtype_{x}* x_axis = (dtype_{x}*)data_ptr[0];
dtype_{sm}* sm_axis = (dtype_{sm}*)data_ptr[1];
// Find axis max
dtype_{x} max = x_axis[0];
for (npy_intp i = 1; i < axis_size; i++)
{{
dtype_{x} x_val = x_axis[i * x_axis_stride];
max = (x_val > max)? x_val : max;
}}
// Compute and accumulate exp(x-max(x)) exponent
dtype_{sm} sum_exp_dev = 0.0;
for (npy_intp i = 0; i < axis_size; i++)
{{
sm_axis[i * sm_axis_stride] = (dtype_{sm}) exp(x_axis[i * x_axis_stride] - max);
sum_exp_dev += sm_axis[i * sm_axis_stride];
}}
// Divide by sum(exp(x-max(x)))
dtype_{sm} inv_sum_exp_dev = 1.0 / sum_exp_dev;
for (npy_intp i = 0; i < axis_size; i++)
{{
sm_axis[i * sm_axis_stride] *= inv_sum_exp_dev;
}}
}} while(get_next(iter));
}}
NpyIter_Deallocate(iter);
"""
%
locals
()
)
except
aesara
.
graph
.
utils
.
MethodNotDefined
:
pass
end_row_loop
=
"""
}
"""
return
(
init_decl
,
begin_row_loop
,
inside_row_loop
,
end_row_loop
)
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
(
x
,)
=
inp
(
sm
,)
=
out
code_template
=
""
.
join
(
self
.
c_code_template
(
node
.
inputs
[
0
]
.
type
.
dtype_specs
()[
1
])
)
return
code_template
%
dict
(
locals
(),
**
sub
)
@staticmethod
def
c_code_cache_version
():
return
(
3
,)
return
(
4
,)
softmax_
op
=
Softmax
(
)
softmax_
legacy
=
Softmax
(
axis
=-
1
)
class
LogSoftmax
(
COp
):
...
...
@@ -689,7 +848,7 @@ class LogSoftmax(COp):
def
grad
(
self
,
inp
,
grads
):
(
x
,)
=
inp
sm
=
softmax_
op
(
x
)
sm
=
softmax_
legacy
(
x
)
return
[
grads
[
0
]
-
aet_sum
(
grads
[
0
],
axis
=
1
,
keepdims
=
True
)
*
sm
]
def
R_op
(
self
,
inputs
,
eval_points
):
...
...
@@ -816,7 +975,8 @@ def local_logsoftmax(fgraph, node):
and
isinstance
(
node
.
op
.
scalar_op
,
aes
.
Log
)
and
len
(
node
.
inputs
)
==
1
and
node
.
inputs
[
0
]
.
owner
is
not
None
and
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
Softmax
)
and
node
.
inputs
[
0
]
.
owner
.
op
==
softmax_legacy
and
node
.
inputs
[
0
]
.
ndim
==
2
):
inVars
=
node
.
inputs
[
0
]
.
owner
.
inputs
[
0
]
new_op
=
LogSoftmax
()
...
...
@@ -843,7 +1003,8 @@ def local_logsoftmax_grad(fgraph, node):
and
node
.
inputs
[
0
]
.
owner
.
op
==
true_div
and
len
(
node
.
inputs
[
0
]
.
owner
.
inputs
)
>=
2
and
node
.
inputs
[
0
]
.
owner
.
inputs
[
1
]
.
owner
is
not
None
and
node
.
inputs
[
0
]
.
owner
.
inputs
[
1
]
.
owner
.
op
==
softmax_op
and
node
.
inputs
[
0
]
.
owner
.
inputs
[
1
]
.
owner
.
op
==
softmax_legacy
and
node
.
inputs
[
0
]
.
owner
.
inputs
[
1
]
.
ndim
==
2
and
node
.
inputs
[
1
]
==
node
.
inputs
[
0
]
.
owner
.
inputs
[
1
]
and
not
(
# skip if it will be optimized by
...
...
@@ -871,13 +1032,26 @@ def softmax_graph(c):
return
exp
(
c
)
/
exp
(
c
)
.
sum
(
axis
=-
1
,
keepdims
=
True
)
def
softmax
(
c
):
UNSET_AXIS
=
object
()
def
softmax
(
c
,
axis
=
UNSET_AXIS
):
if
axis
is
UNSET_AXIS
:
warnings
.
warn
(
"Softmax now accepts an axis argument. For backwards-compatibility it defaults to -1 when not specified, "
"but in the future the default will be `None`.
\n
To suppress this warning specify axis explicitly."
,
FutureWarning
,
)
axis
=
-
1
c
=
as_tensor_variable
(
c
)
if
c
.
broadcastable
[
-
1
]:
if
c
.
ndim
==
1
:
# TODO: Create Specific warning type that can be suppressed?
warnings
.
warn
(
"The softmax is applied on a dimension of shape 1, which does not have a semantic meaning."
"Softmax no longer converts a vector to a row matrix."
,
UserWarning
,
)
return
softmax_op
(
c
)
return
Softmax
(
axis
=
axis
)
(
c
)
def
logsoftmax
(
c
):
...
...
@@ -885,13 +1059,13 @@ def logsoftmax(c):
@register_specialize
(
"fast_compile_gpu"
)
@local_optimizer
([
softmax_
op
])
@local_optimizer
([
softmax_
legacy
])
def
local_softmax_with_bias
(
fgraph
,
node
):
"""
Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias).
"""
if
node
.
op
==
softmax_
op
:
if
node
.
op
==
softmax_
legacy
and
node
.
outputs
[
0
]
.
ndim
==
2
:
(
x
,)
=
node
.
inputs
if
x
.
owner
and
x
.
owner
.
op
==
add
:
vectors
=
[]
...
...
@@ -980,7 +1154,7 @@ def softmax_simplifier(numerators, denominators):
matching_denom
=
denominator
break
if
matching_denom
:
softmax
=
softmax_
op
(
x
)
softmax
=
softmax_
legacy
(
x
)
copy_stack_trace
(
numerator
,
softmax
)
numerators
.
remove
(
numerator
)
denominators
.
remove
(
matching_denom
)
...
...
@@ -1611,7 +1785,7 @@ def crossentropy_to_crossentropy_with_softmax(fgraph):
if
node
.
op
==
crossentropy_categorical_1hot
:
(
nll
,)
=
node
.
outputs
sm
,
one_of_n
=
node
.
inputs
if
sm
.
owner
and
sm
.
owner
.
op
==
softmax_
op
:
if
sm
.
owner
and
sm
.
owner
.
op
==
softmax_
legacy
and
sm
.
ndim
==
2
:
(
x
,)
=
sm
.
owner
.
inputs
(
new_nll
,
...
...
@@ -1658,9 +1832,9 @@ optdb.register(
@register_specialize
(
"fast_compile_gpu"
,
"local_crossentropy_to_crossentropy_with_softmax_grad"
)
# old name
@local_optimizer
([
softmax_grad
])
@local_optimizer
([
softmax_grad
_legacy
])
def
local_softmax_grad_to_crossentropy_with_softmax_grad
(
fgraph
,
node
):
if
node
.
op
==
softmax_grad
:
if
node
.
op
==
softmax_grad
_legacy
and
node
.
inputs
[
1
]
.
ndim
==
2
:
g_coding_dist
,
coding_dist
=
node
.
inputs
if
(
g_coding_dist
.
owner
...
...
@@ -1686,13 +1860,16 @@ def local_argmax_pushdown(fgraph, node):
x
=
node
.
inputs
[
0
]
axis
=
node
.
op
.
get_params
(
node
)
# TODO: Make a list/set of monotonic ops...
if
x
.
owner
and
x
.
owner
.
op
in
(
softmax_op
,
softplus
,
exp
,
log
,
tanh
,
sigmoid
,
if
x
.
owner
and
(
x
.
owner
.
op
in
(
softplus
,
exp
,
log
,
tanh
,
sigmoid
,
)
or
isinstance
(
x
.
owner
.
op
,
Softmax
)
):
(
pre_x
,)
=
x
.
owner
.
inputs
ret
=
max_and_argmax
(
pre_x
,
axis
)
...
...
@@ -1786,7 +1963,12 @@ def local_advanced_indexing_crossentropy_onehot(fgraph, node):
except
Exception
:
pass
if
sm
is
not
None
and
sm
.
owner
and
sm
.
owner
.
op
in
(
softmax_op
,
softmax_with_bias
):
if
(
sm
is
not
None
and
sm
.
owner
and
sm
.
owner
.
op
in
(
softmax_legacy
,
softmax_with_bias
)
and
sm
.
ndim
==
2
):
sm_w_bias
=
local_softmax_with_bias
.
transform
(
fgraph
,
sm
.
owner
)
if
sm_w_bias
:
assert
sm_w_bias
[
0
]
.
owner
.
op
==
softmax_with_bias
...
...
@@ -1807,9 +1989,9 @@ def local_advanced_indexing_crossentropy_onehot(fgraph, node):
@register_specialize
(
"fast_compile_gpu"
)
@local_optimizer
([
softmax_grad
])
@local_optimizer
([
softmax_grad
_legacy
])
def
local_advanced_indexing_crossentropy_onehot_grad
(
fgraph
,
node
):
if
not
(
node
.
op
==
softmax_grad
):
if
not
(
node
.
op
==
softmax_grad
_legacy
and
node
.
inputs
[
1
]
.
ndim
==
2
):
return
sm
=
None
...
...
@@ -1821,7 +2003,8 @@ def local_advanced_indexing_crossentropy_onehot_grad(fgraph, node):
if
(
(
sm
is
not
None
)
and
sm
.
owner
and
(
sm
.
owner
.
op
in
(
softmax_op
,
softmax_with_bias
))
and
(
sm
.
owner
.
op
in
(
softmax_legacy
,
softmax_with_bias
))
and
sm
.
ndim
==
2
):
sm_w_bias
=
local_softmax_with_bias
.
transform
(
fgraph
,
sm
.
owner
)
if
sm_w_bias
:
...
...
tests/gpuarray/test_dnn.py
浏览文件 @
58cb5c30
...
...
@@ -32,7 +32,7 @@ from aesara.tensor.math import (
sqrt
,
)
from
aesara.tensor.math
import
sum
as
aet_sum
from
aesara.tensor.nnet
import
batchnorm
,
conv2d
,
softmax
,
softmax_
op
from
aesara.tensor.nnet
import
batchnorm
,
conv2d
,
softmax
,
softmax_
legacy
from
aesara.tensor.nnet.abstract_conv
import
(
get_conv_gradinputs_shape
,
get_conv_output_shape
,
...
...
@@ -1456,7 +1456,7 @@ class TestSoftMax(test_nnet.TestSoftMax):
def
test_softmax_f16
(
self
):
x
=
matrix
(
"x"
,
"float16"
)
x_gpu
=
tensor4
(
"x_gpu"
,
"float16"
)
f_z
=
softmax_
op
f_z
=
softmax_
legacy
f_gpu
=
dnn
.
GpuDnnSoftmax
(
"accurate"
,
"channel"
)
def
cmp
(
n
,
m
,
f
,
f_gpu
):
...
...
@@ -1480,7 +1480,7 @@ class TestSoftMax(test_nnet.TestSoftMax):
x
=
matrix
(
"x"
)
x_gpu
=
tensor4
(
"x_gpu"
)
f_z
=
softmax_
op
f_z
=
softmax_
legacy
f_gpu
=
dnn
.
GpuDnnSoftmax
(
"accurate"
,
"channel"
)
# Verify the grad operation
...
...
tests/gpuarray/test_nnet.py
浏览文件 @
58cb5c30
...
...
@@ -210,7 +210,7 @@ def softmax_unittest_template(dtypeInput):
z
=
aesara
.
tensor
.
nnet
.
softmax
(
x
)
f
=
aesara
.
function
([
x
],
z
,
mode
=
mode_without_gpu
)
f_gpu
=
aesara
.
function
([
x
],
z
,
mode
=
mode_wo_cudnn
)
assert
f
.
maker
.
fgraph
.
toposort
()[
-
1
]
.
op
==
aesara
.
tensor
.
nnet
.
softmax_
op
assert
f
.
maker
.
fgraph
.
toposort
()[
-
1
]
.
op
==
aesara
.
tensor
.
nnet
.
softmax_
legacy
assert
isinstance
(
f_gpu
.
maker
.
fgraph
.
toposort
()[
-
2
]
.
op
,
GpuSoftmax
)
def
cmp
(
n
,
m
):
...
...
@@ -300,7 +300,7 @@ class TestSoftMax:
def
test_softmax
(
self
):
x
=
fmatrix
(
"x"
)
z
=
aesara
.
tensor
.
nnet
.
softmax_
op
z
=
aesara
.
tensor
.
nnet
.
softmax_
legacy
f
,
f_gpu
=
self
.
_test_softmax
(
x
,
x
,
z
,
z
,
self
.
_cmp
)
...
...
@@ -308,7 +308,7 @@ class TestSoftMax:
def
test_softmax_shape_0
(
self
):
x
=
fmatrix
(
"x"
)
z
=
aesara
.
tensor
.
nnet
.
softmax_
op
z
=
aesara
.
tensor
.
nnet
.
softmax_
legacy
f
,
f_gpu
=
self
.
_test_softmax
(
x
,
x
,
z
,
z
,
self
.
_cmp
)
# Aesara can handle that case, but cudnn can't
...
...
tests/link/test_jax.py
浏览文件 @
58cb5c30
...
...
@@ -969,11 +969,16 @@ def test_nnet():
fgraph
=
FunctionGraph
([
x
],
[
out
])
compare_jax_and_py
(
fgraph
,
[
get_test_value
(
i
)
for
i
in
fgraph
.
inputs
])
out
=
aet_nnet
.
softmax
(
x
)
out
=
aet_nnet
.
log
softmax
(
x
)
fgraph
=
FunctionGraph
([
x
],
[
out
])
compare_jax_and_py
(
fgraph
,
[
get_test_value
(
i
)
for
i
in
fgraph
.
inputs
])
out
=
aet_nnet
.
logsoftmax
(
x
)
@pytest.mark.parametrize
(
"axis"
,
[
None
,
0
,
1
])
def
test_softmax
(
axis
):
x
=
matrix
(
"x"
)
x
.
tag
.
test_value
=
np
.
arange
(
6
,
dtype
=
config
.
floatX
)
.
reshape
(
2
,
3
)
out
=
aet_nnet
.
softmax
(
x
,
axis
=
axis
)
fgraph
=
FunctionGraph
([
x
],
[
out
])
compare_jax_and_py
(
fgraph
,
[
get_test_value
(
i
)
for
i
in
fgraph
.
inputs
])
...
...
tests/link/test_numba.py
浏览文件 @
58cb5c30
...
...
@@ -1894,20 +1894,27 @@ def test_Dot(x, y, exc):
@pytest.mark.parametrize
(
"x, exc"
,
"x,
axis,
exc"
,
[
(
set_test_value
(
aet
.
vector
(),
rng
.
random
(
size
=
(
2
,))
.
astype
(
config
.
floatX
)),
None
,
None
,
),
(
set_test_value
(
aet
.
matrix
(),
rng
.
random
(
size
=
(
2
,
3
))
.
astype
(
config
.
floatX
)),
None
,
None
,
),
(
set_test_value
(
aet
.
matrix
(),
rng
.
random
(
size
=
(
2
,
3
))
.
astype
(
config
.
floatX
)),
0
,
None
,
),
],
)
def
test_Softmax
(
x
,
exc
):
g
=
nnetb
.
Softmax
()(
x
)
def
test_Softmax
(
x
,
axis
,
exc
):
g
=
nnetb
.
Softmax
(
axis
=
axis
)(
x
)
g_fg
=
FunctionGraph
(
outputs
=
[
g
])
cm
=
contextlib
.
suppress
()
if
exc
is
None
else
pytest
.
warns
(
exc
)
...
...
tests/tensor/nnet/test_basic.py
浏览文件 @
58cb5c30
from
contextlib
import
ExitStack
as
does_not_raise
import
numpy
as
np
import
pytest
import
scipy.special
as
sp
import
aesara
import
aesara.tensor
as
aet
...
...
@@ -49,13 +52,13 @@ from aesara.tensor.nnet.basic import (
selu
,
sigmoid_binary_crossentropy
,
softmax
,
softmax_grad
,
softmax_grad
_legacy
,
softmax_graph
,
softmax_
op
,
softmax_
legacy
,
softmax_with_bias
,
softsign
,
)
from
aesara.tensor.shape
import
shape_padleft
,
specify_shape
from
aesara.tensor.shape
import
shape_padleft
from
aesara.tensor.subtensor
import
AdvancedSubtensor
from
aesara.tensor.type
import
(
dmatrix
,
...
...
@@ -67,6 +70,7 @@ from aesara.tensor.type import (
matrices
,
matrix
,
scalar
,
tensor3
,
tensor4
,
vector
,
vectors
,
...
...
@@ -80,46 +84,64 @@ from tests.tensor.utils import (
)
class
TestSoftmax
(
utt
.
InferShapeTester
):
def
test_basic
(
self
):
def
f
(
a
):
return
softmax_op
(
a
)[:,
0
]
def
valid_axis_tester
(
Op
):
with
pytest
.
raises
(
TypeError
):
Op
(
1.5
)
utt
.
verify_grad
(
f
,
[
np
.
random
.
random
((
3
,
4
))])
x
=
[
tensor3
()]
*
Op
.
nin
with
does_not_raise
():
Op
(
2
)(
*
x
)
def
f
(
a
):
return
softmax_op
(
a
)[:,
1
]
with
pytest
.
raises
(
ValueError
):
Op
(
3
)(
*
x
)
utt
.
verify_grad
(
f
,
[
np
.
random
.
random
((
3
,
4
))])
with
does_not_raise
():
Op
(
-
3
)(
*
x
)
def
f
(
a
):
return
softmax_op
(
a
)[:,
2
]
with
pytest
.
raises
(
ValueError
):
Op
(
-
4
)(
*
x
)
utt
.
verify_grad
(
f
,
[
np
.
random
.
random
((
3
,
4
))])
class
TestSoftmax
(
utt
.
InferShapeTester
):
@pytest.mark.parametrize
(
"axis"
,
[
None
,
0
,
1
,
2
,
3
,
-
1
,
-
2
])
def
test_perform
(
self
,
axis
):
x
=
tensor4
(
"x"
)
xv
=
np
.
random
.
randn
(
2
,
3
,
4
,
5
)
.
astype
(
config
.
floatX
)
f
=
aesara
.
function
([
x
],
softmax
(
x
,
axis
=
axis
))
assert
np
.
allclose
(
f
(
xv
),
sp
.
softmax
(
xv
,
axis
=
axis
))
@pytest.mark.parametrize
(
"column"
,
[
0
,
1
,
2
,
3
])
@pytest.mark.parametrize
(
"axis"
,
[
None
,
0
,
1
])
def
test_grad
(
self
,
axis
,
column
):
def
f
(
a
):
return
softmax
_op
(
a
)[:,
3
]
return
softmax
(
a
,
axis
=
axis
)[:,
column
]
utt
.
verify_grad
(
f
,
[
np
.
random
.
random
((
3
,
4
))])
utt
.
verify_grad
(
f
,
[
np
.
random
.
random
((
3
,
4
,
2
))])
def
test_infer_shape
(
self
):
admat
=
matrix
()
admat_val
=
np
.
random
.
random
((
3
,
4
))
.
astype
(
config
.
floatX
)
self
.
_compile_and_check
([
admat
],
[
Softmax
()(
admat
)],
[
admat_val
],
Softmax
)
self
.
_compile_and_check
(
[
admat
],
[
Softmax
(
axis
=-
1
)(
admat
)],
[
admat_val
],
Softmax
)
def
test_vector
(
self
):
def
test_vector
_perform
(
self
):
x
=
vector
()
f
=
aesara
.
function
([
x
],
softmax
_op
(
x
))
f
=
aesara
.
function
([
x
],
softmax
(
x
,
axis
=
None
))
xv
=
np
.
random
.
randn
(
6
)
.
astype
(
config
.
floatX
)
assert
np
.
allclose
(
f
(
xv
),
np
.
exp
(
xv
)
/
np
.
exp
(
xv
)
.
sum
(
))
assert
np
.
allclose
(
f
(
xv
),
sp
.
softmax
(
xv
))
def
test_vector_grad
(
self
):
def
f
(
a
):
return
softmax
_op
(
a
)
return
softmax
(
a
,
axis
=
None
)
utt
.
verify_grad
(
f
,
[
np
.
random
.
random
((
4
))])
def
test_valid_axis
(
self
):
valid_axis_tester
(
Softmax
)
class
TestSoftmaxWithBias
(
utt
.
InferShapeTester
):
def
test_basic
(
self
):
...
...
@@ -154,10 +176,10 @@ class TestSoftmaxWithBias(utt.InferShapeTester):
W
=
aesara
.
shared
(
value
=
initial_W
,
name
=
"W"
)
vbias
=
aesara
.
shared
(
value
=
0.1
,
name
=
"vbias"
)
# 0.01
hid
=
vector
(
"hid"
)
f
=
aesara
.
function
([
hid
],
softmax_
op
(
dot
(
hid
,
W
.
T
)
+
vbias
))
f
=
aesara
.
function
([
hid
],
softmax_
legacy
(
dot
(
hid
,
W
.
T
)
+
vbias
))
ops
=
[
node
.
op
for
node
in
f
.
maker
.
fgraph
.
toposort
()]
assert
softmax_with_bias
not
in
ops
assert
softmax_
op
in
ops
assert
softmax_
legacy
in
ops
f
([
0
,
1
,
0
])
# print f.maker.fgraph.toposort()
...
...
@@ -315,17 +337,19 @@ class TestLogSoftmax(utt.InferShapeTester):
g
=
grad
(
y
.
sum
(),
x
)
softmax_grad_node
=
g
.
owner
assert
softmax_grad_node
.
op
==
softmax_grad
assert
softmax_grad_node
.
op
==
softmax_grad
_legacy
true_div_node
=
softmax_grad_node
.
inputs
[
0
]
.
owner
assert
true_div_node
.
op
==
true_div
# We replace the elemwise true_div op by an elemwise add.
new_g
=
softmax_grad
(
add
(
*
true_div_node
.
inputs
),
softmax_grad_node
.
inputs
[
1
])
new_g
=
softmax_grad_legacy
(
add
(
*
true_div_node
.
inputs
),
softmax_grad_node
.
inputs
[
1
]
)
fgraph
=
FunctionGraph
([
x
],
[
new_g
])
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
assert
softmax_grad
in
[
n
.
op
for
n
in
fgraph
.
toposort
()]
assert
softmax_grad
_legacy
in
[
n
.
op
for
n
in
fgraph
.
toposort
()]
class
TestSoftmaxGrad
(
utt
.
InferShapeTester
):
...
...
@@ -336,11 +360,14 @@ class TestSoftmaxGrad(utt.InferShapeTester):
bdmat_val
=
np
.
random
.
random
((
3
,
4
))
.
astype
(
config
.
floatX
)
self
.
_compile_and_check
(
[
admat
,
bdmat
],
[
SoftmaxGrad
()(
admat
,
bdmat
)],
[
SoftmaxGrad
(
axis
=-
1
)(
admat
,
bdmat
)],
[
admat_val
,
bdmat_val
],
SoftmaxGrad
,
)
def
test_valid_axis
(
self
):
valid_axis_tester
(
SoftmaxGrad
)
class
TestCrossEntropySoftmax1Hot
:
def
test_basic
(
self
):
...
...
@@ -611,17 +638,7 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
op
=
crossentropy_categorical_1hot
# xe = op(x, one_of_n)
fgraph
=
FunctionGraph
([
x
,
one_of_n
],
[
op
(
softmax_op
(
x
),
one_of_n
)])
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
op
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
def
test_softmax_optimizations_vector
(
self
):
x
=
vector
(
"x"
)
one_of_n
=
lvector
(
"one_of_n"
)
op
=
crossentropy_categorical_1hot
fgraph
=
FunctionGraph
([
x
,
one_of_n
],
[
op
(
softmax_op
(
x
),
one_of_n
)])
fgraph
=
FunctionGraph
([
x
,
one_of_n
],
[
op
(
softmax_legacy
(
x
),
one_of_n
)])
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
op
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
...
...
@@ -633,7 +650,7 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
one_of_n
=
lvector
(
"one_of_n"
)
op
=
crossentropy_categorical_1hot
fgraph
=
FunctionGraph
([
x
,
b
,
one_of_n
],
[
op
(
softmax_
op
(
x
+
b
),
one_of_n
)])
fgraph
=
FunctionGraph
([
x
,
b
,
one_of_n
],
[
op
(
softmax_
legacy
(
x
+
b
),
one_of_n
)])
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
op
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
...
...
@@ -649,7 +666,7 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
op
=
crossentropy_categorical_1hot
fgraph
=
FunctionGraph
(
[
x
,
b
,
c
,
one_of_n
],
[
op
(
softmax_
op
(
add
(
x
,
b
,
c
)),
one_of_n
)]
[
x
,
b
,
c
,
one_of_n
],
[
op
(
softmax_
legacy
(
add
(
x
,
b
,
c
)),
one_of_n
)]
)
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
op
...
...
@@ -658,29 +675,17 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
assert
len
(
fgraph
.
toposort
())
==
2
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
def
test_softmax_optimizations_w_bias_vector
(
self
):
x
=
vector
(
"x"
)
b
=
vector
(
"b"
)
one_of_n
=
lvector
(
"one_of_n"
)
op
=
crossentropy_categorical_1hot
fgraph
=
FunctionGraph
([
x
,
b
,
one_of_n
],
[
op
(
softmax_op
(
x
+
b
),
one_of_n
)])
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
op
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
assert
len
(
fgraph
.
toposort
())
==
2
assert
fgraph
.
outputs
[
0
]
.
owner
.
op
==
crossentropy_softmax_argmax_1hot_with_bias
def
test_softmax_grad_optimizations
(
self
):
x
=
matrix
(
"x"
)
one_of_n
=
lvector
(
"one_of_n"
)
op
=
crossentropy_categorical_1hot
xe
=
op
(
softmax_
op
(
x
),
one_of_n
)
xe
=
op
(
softmax_
legacy
(
x
),
one_of_n
)
sum_xe
=
aet_sum
(
xe
)
g_x
=
grad
(
sum_xe
,
x
)
fgraph
=
FunctionGraph
([
x
,
one_of_n
],
[
g_x
])
assert
check_stack_trace
(
fgraph
,
ops_to_check
=
[
crossentropy_softmax_1hot_with_bias_dx
,
softmax_op
]
fgraph
,
ops_to_check
=
[
crossentropy_softmax_1hot_with_bias_dx
,
softmax_legacy
],
)
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
...
...
@@ -688,25 +693,8 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
ops
=
{
node
.
op
for
node
in
fgraph
.
toposort
()}
assert
crossentropy_softmax_argmax_1hot_with_bias
not
in
ops
assert
crossentropy_softmax_1hot_with_bias_dx
in
ops
assert
softmax_op
in
ops
assert
softmax_grad
not
in
ops
def
test_softmax_grad_optimizations_vector
(
self
):
x
=
vector
(
"x"
)
one_of_n
=
lvector
(
"one_of_n"
)
op
=
crossentropy_categorical_1hot
xe
=
op
(
softmax_op
(
x
),
one_of_n
)
sum_xe
=
aet_sum
(
xe
)
g_x
=
grad
(
sum_xe
,
x
)
fgraph
=
FunctionGraph
([
x
,
one_of_n
],
[
g_x
])
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
ops
=
{
node
.
op
for
node
in
fgraph
.
toposort
()}
assert
crossentropy_softmax_argmax_1hot_with_bias
not
in
ops
assert
crossentropy_softmax_1hot_with_bias_dx
in
ops
assert
softmax_op
in
ops
assert
softmax_grad
not
in
ops
assert
softmax_legacy
in
ops
assert
softmax_grad_legacy
not
in
ops
def
test_get_rid_of_advanced_indexing_version_of_xent
(
self
):
x
=
matrix
(
"x"
)
...
...
@@ -737,8 +725,8 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
ops
=
[
node
.
op
for
node
in
fgraph
.
toposort
()]
assert
len
(
ops
)
==
2
assert
crossentropy_softmax_1hot_with_bias_dx
in
ops
assert
softmax_
op
in
ops
assert
softmax_grad
not
in
ops
assert
softmax_
legacy
in
ops
assert
softmax_grad
_legacy
not
in
ops
# Test that a biased softmax is optimized correctly
bias_expressions
=
[
...
...
@@ -763,7 +751,7 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
assert
len
(
ops
)
==
2
assert
crossentropy_softmax_1hot_with_bias_dx
in
ops
assert
softmax_with_bias
in
ops
assert
softmax_grad
not
in
ops
assert
softmax_grad
_legacy
not
in
ops
# Test that using "mean" instead of sum works, too
mean_expressions
=
[
...
...
@@ -791,8 +779,8 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
# there's an extra dimshuffle in there
# but I can't think of a good rule to get rid of it
assert
crossentropy_softmax_1hot_with_bias_dx
in
ops
assert
softmax_
op
in
ops
assert
softmax_grad
not
in
ops
assert
softmax_
legacy
in
ops
assert
softmax_grad
_legacy
not
in
ops
mean_bias_expressions
=
[
mean
(
-
log
(
softmax
(
x
+
b
)[
aet
.
arange
(
y
.
shape
[
0
]),
y
])),
...
...
@@ -818,7 +806,7 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
assert
len
(
ops
)
==
5
assert
crossentropy_softmax_1hot_with_bias_dx
in
ops
assert
softmax_with_bias
in
ops
assert
softmax_grad
not
in
ops
assert
softmax_grad
_legacy
not
in
ops
def
test_xent_thing_int32
(
self
):
x
=
matrix
(
"x"
)
...
...
@@ -847,141 +835,8 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
ops
=
[
node
.
op
for
node
in
fgraph
.
toposort
()]
assert
len
(
ops
)
==
3
assert
crossentropy_softmax_1hot_with_bias_dx
in
ops
assert
softmax_op
in
ops
assert
softmax_grad
not
in
ops
def
test_optimize_xent_vector
(
self
):
x
=
vector
(
"x"
)
y
=
lvector
(
"y"
)
# Test that a biased softmax is optimized correctly
bias_expressions
=
[
aet_sum
(
-
log
(
softmax
(
x
)[
aet
.
arange
(
y
.
shape
[
0
]),
y
])),
-
aet_sum
(
log
(
softmax
(
x
)[
aet
.
arange
(
y
.
shape
[
0
]),
y
])),
]
for
expr
in
bias_expressions
:
fgraph
=
FunctionGraph
([
x
,
y
],
[
expr
])
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
ops
=
[
node
.
op
for
node
in
fgraph
.
toposort
()]
assert
len
(
ops
)
==
5
assert
crossentropy_softmax_argmax_1hot_with_bias
in
ops
assert
not
[
1
for
o
in
ops
if
isinstance
(
o
,
AdvancedSubtensor
)]
fgraph
=
FunctionGraph
([
x
,
y
],
[
grad
(
expr
,
x
)])
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
ops
=
[
node
.
op
for
node
in
fgraph
.
toposort
()]
assert
len
(
ops
)
==
4
assert
crossentropy_softmax_1hot_with_bias_dx
in
ops
assert
softmax_op
in
ops
assert
softmax_grad
not
in
ops
def
test_optimize_xent_vector2
(
self
):
x
=
vector
(
"x"
)
b
=
vector
(
"b"
)
y
=
lvector
(
"y"
)
# Test that a biased softmax is optimized correctly
bias_expressions
=
[
aet_sum
(
-
log
(
softmax
(
x
+
b
)[
aet
.
arange
(
y
.
shape
[
0
]),
y
])),
-
aet_sum
(
log
(
softmax
(
b
+
x
)[
aet
.
arange
(
y
.
shape
[
0
]),
y
])),
-
aet_sum
(
log
(
softmax
(
x
+
b
))[
aet
.
arange
(
y
.
shape
[
0
]),
y
]),
aet_sum
(
-
log
(
softmax
(
b
+
x
))[
aet
.
arange
(
y
.
shape
[
0
]),
y
]),
]
for
expr
in
bias_expressions
:
fgraph
=
FunctionGraph
([
x
,
b
,
y
],
[
expr
])
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
ops
=
[
node
.
op
for
node
in
fgraph
.
toposort
()]
# [big_op, sum, dim_shuffle]
assert
len
(
ops
)
==
3
assert
crossentropy_softmax_argmax_1hot_with_bias
in
ops
assert
not
[
1
for
o
in
ops
if
isinstance
(
o
,
AdvancedSubtensor
)]
fgraph
=
FunctionGraph
([
x
,
b
,
y
],
[
grad
(
expr
,
x
)])
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
ops
=
[
node
.
op
for
node
in
fgraph
.
toposort
()]
assert
len
(
ops
)
<=
6
assert
crossentropy_softmax_1hot_with_bias_dx
in
ops
assert
softmax_with_bias
in
ops
assert
softmax_grad
not
in
ops
def
test_optimize_xent_vector3
(
self
):
# Same as test_optimize_xent_vector2, but y is the result of
# a "flatten", and it used to make the constant-folding
# of arange(y.shape[0]) happen before the xent optimization
x
=
vector
(
"x"
)
b
=
vector
(
"b"
)
y_
=
lvector
(
"y_"
)
y
=
y_
.
flatten
()
# Test that a biased softmax is optimized correctly
bias_expressions
=
[
aet_sum
(
-
log
(
softmax
(
x
+
b
)[
aet
.
arange
(
y
.
shape
[
0
]),
y
])),
-
aet_sum
(
log
(
softmax
(
b
+
x
)[
aet
.
arange
(
y
.
shape
[
0
]),
y
])),
-
aet_sum
(
log
(
softmax
(
x
+
b
))[
aet
.
arange
(
y
.
shape
[
0
]),
y
]),
aet_sum
(
-
log
(
softmax
(
b
+
x
))[
aet
.
arange
(
y
.
shape
[
0
]),
y
]),
]
for
expr
in
bias_expressions
:
fgraph
=
FunctionGraph
([
x
,
b
,
y_
],
[
expr
])
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
ops
=
[
node
.
op
for
node
in
fgraph
.
toposort
()]
# [big_op, sum, dim_shuffle, flatten]
assert
len
(
ops
)
<=
4
assert
crossentropy_softmax_argmax_1hot_with_bias
in
ops
assert
not
[
1
for
o
in
ops
if
isinstance
(
o
,
AdvancedSubtensor
)]
fgraph
=
FunctionGraph
([
x
,
b
,
y
],
[
grad
(
expr
,
x
)])
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
ops
=
[
node
.
op
for
node
in
fgraph
.
toposort
()]
assert
len
(
ops
)
<=
6
assert
crossentropy_softmax_1hot_with_bias_dx
in
ops
assert
softmax_with_bias
in
ops
assert
softmax_grad
not
in
ops
def
test_optimize_xent_vector4
(
self
):
# Same as test_optimize_xent_vector2, but y is the result of a
# "specify_shape" that indicates its length is 1, so the
# constant-folding of arange(y.shape[0]) happen before the xent
# optimization
x
=
vector
(
"x"
)
b
=
vector
(
"b"
)
y_
=
lvector
(
"y_"
)
y
=
specify_shape
(
y_
,
(
1
,))
# Test that a biased softmax is optimized correctly
bias_expressions
=
[
aet_sum
(
-
log
(
softmax
(
x
+
b
)[
aet
.
arange
(
y
.
shape
[
0
]),
y
])),
-
aet_sum
(
log
(
softmax
(
b
+
x
)[
aet
.
arange
(
y
.
shape
[
0
]),
y
])),
-
aet_sum
(
log
(
softmax
(
x
+
b
))[
aet
.
arange
(
y
.
shape
[
0
]),
y
]),
aet_sum
(
-
log
(
softmax
(
b
+
x
))[
aet
.
arange
(
y
.
shape
[
0
]),
y
]),
]
for
expr
in
bias_expressions
:
fgraph
=
FunctionGraph
([
x
,
b
,
y_
],
[
expr
])
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
ops
=
[
node
.
op
for
node
in
fgraph
.
toposort
()]
# [big_op, sum, dim_shuffle, specify_shape]
assert
len
(
ops
)
<=
4
assert
crossentropy_softmax_argmax_1hot_with_bias
in
ops
assert
not
[
1
for
o
in
ops
if
isinstance
(
o
,
AdvancedSubtensor
)]
fgraph
=
FunctionGraph
([
x
,
b
,
y
],
[
grad
(
expr
,
x
)])
optdb
.
query
(
OPT_FAST_RUN
)
.
optimize
(
fgraph
)
ops
=
[
node
.
op
for
node
in
fgraph
.
toposort
()]
assert
len
(
ops
)
<=
6
assert
crossentropy_softmax_1hot_with_bias_dx
in
ops
assert
softmax_with_bias
in
ops
assert
softmax_grad
not
in
ops
assert
softmax_legacy
in
ops
assert
softmax_grad_legacy
not
in
ops
def
test_crossentropy_softmax_1hot_with_bias_dxcale_cost
(
self
):
x
=
matrix
(
"x"
)
...
...
@@ -996,9 +851,9 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
for
node
in
func
.
maker
.
fgraph
.
toposort
():
if
node
.
op
==
crossentropy_softmax_1hot_with_bias_dx
:
has_cx1hotdx
=
True
if
node
.
op
==
softmax_
op
:
if
node
.
op
==
softmax_
legacy
:
has_softmax
=
True
if
node
.
op
==
softmax_grad
:
if
node
.
op
==
softmax_grad
_legacy
:
has_softmaxdx
=
True
assert
has_cx1hotdx
...
...
@@ -1033,7 +888,7 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
ops
=
{
node
.
op
for
node
in
fgraph
.
toposort
()}
assert
crossentropy_softmax_argmax_1hot_with_bias
in
ops
assert
softmax_
op
not
in
ops
assert
softmax_
legacy
not
in
ops
# Verify the gradient wrt x
fgraph
=
FunctionGraph
([
x
,
y
,
a
],
[
grad
(
expr
,
x
)])
...
...
@@ -1043,8 +898,8 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
ops
=
{
node
.
op
for
node
in
fgraph
.
toposort
()}
assert
crossentropy_softmax_1hot_with_bias_dx
in
ops
assert
softmax_
op
in
ops
assert
softmax_grad
not
in
ops
assert
softmax_
legacy
in
ops
assert
softmax_grad
_legacy
not
in
ops
# Verify the gradient when providing output gradient
fgraph
=
FunctionGraph
(
...
...
@@ -1056,13 +911,13 @@ class TestCrossEntropyCategorical1Hot(utt.InferShapeTester):
ops
=
{
node
.
op
for
node
in
fgraph
.
toposort
()}
assert
crossentropy_softmax_1hot_with_bias_dx
in
ops
assert
softmax_
op
in
ops
assert
softmax_grad
not
in
ops
assert
softmax_
legacy
in
ops
assert
softmax_grad
_legacy
not
in
ops
def
test_argmax_pushdown
():
x
=
matrix
()
for
sm
in
[
softmax_graph
,
softmax_
op
]:
for
sm
in
[
softmax_graph
,
softmax_
legacy
]:
# test that the max_and_argmax is pushed down if the max is not used
out
=
max_and_argmax
(
sm
(
exp
(
tanh
(
sigmoid
(
x
)))),
axis
=-
1
)[
1
]
fgraph
=
FunctionGraph
([
x
],
[
out
])
...
...
@@ -1188,12 +1043,12 @@ class TestSoftmaxOpt:
# test that function contains softmax and no div.
f
=
aesara
.
function
([
c
],
p_y
,
mode
=
self
.
mode
)
assert
check_stack_trace
(
f
,
ops_to_check
=
softmax_
op
)
assert
check_stack_trace
(
f
,
ops_to_check
=
softmax_
legacy
)
f_ops
=
[
n
.
op
for
n
in
f
.
maker
.
fgraph
.
toposort
()]
assert
len
(
f_ops
)
==
1
assert
softmax_
op
in
f_ops
assert
softmax_
legacy
in
f_ops
f
(
self
.
rng
.
random
((
3
,
4
))
.
astype
(
config
.
floatX
))
...
...
@@ -1204,12 +1059,12 @@ class TestSoftmaxOpt:
# test that function contains softmax and no div.
f
=
aesara
.
function
([
c
],
p_y
,
mode
=
self
.
mode
)
assert
check_stack_trace
(
f
,
ops_to_check
=
softmax_
op
)
assert
check_stack_trace
(
f
,
ops_to_check
=
softmax_
legacy
)
f_ops
=
[
n
.
op
for
n
in
f
.
maker
.
fgraph
.
toposort
()]
assert
len
(
f_ops
)
==
1
assert
softmax_
op
in
f_ops
assert
softmax_
legacy
in
f_ops
f
(
self
.
rng
.
random
((
3
,
4
))
.
astype
(
config
.
floatX
))
...
...
@@ -1226,8 +1081,8 @@ class TestSoftmaxOpt:
g_ops
=
[
n
.
op
for
n
in
g
.
maker
.
fgraph
.
toposort
()]
assert
len
(
g_ops
)
==
2
assert
softmax_
op
in
g_ops
assert
softmax_grad
in
g_ops
assert
softmax_
legacy
in
g_ops
assert
softmax_grad
_legacy
in
g_ops
g
(
self
.
rng
.
random
((
3
,
4
)),
self
.
rng
.
uniform
(
0.5
,
1
,
(
3
,
4
)))
...
...
@@ -1272,7 +1127,7 @@ def test_grad_softmax_grad():
x
=
aesara
.
shared
(
rng
.
normal
(
size
=
(
3
,
4
)))
def
f
(
inputs
):
y
=
softmax_
op
(
x
)
y
=
softmax_
legacy
(
x
)
return
aesara
.
grad
(
None
,
x
,
known_grads
=
{
y
:
inputs
})
utt
.
verify_grad
(
f
,
[
rng
.
random
((
3
,
4
))])
...
...
tests/test_rop.py
浏览文件 @
58cb5c30
...
...
@@ -384,8 +384,7 @@ class TestRopLop(RopLopChecker):
self
.
check_mat_rop_lop
(
self
.
mx
.
sum
(
axis
=
1
),
(
self
.
mat_in_shape
[
0
],))
def
test_softmax
(
self
):
# Softmax adds an extra dimnesion !
self
.
check_rop_lop
(
aesara
.
tensor
.
nnet
.
softmax
(
self
.
x
)[
0
],
self
.
in_shape
[
0
])
self
.
check_rop_lop
(
aesara
.
tensor
.
nnet
.
softmax
(
self
.
x
),
self
.
in_shape
)
def
test_alloc
(
self
):
# Alloc of the sum of x into a vector
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论